From 099f53cb50e45ef617a9f1d63ceec799e489418b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 8 Apr 2009 14:28:37 -0700
Subject: async_tx: rename zero_sum to val

'zero_sum' does not properly describe the operation of generating parity
and checking that it validates against an existing buffer.  Change the
name of the operation to 'val' (for 'validate').  This is in
anticipation of the p+q case where it is a requirement to identify the
target parity buffers separately from the source buffers, because the
target parity buffers will not have corresponding pq coefficients.

Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/async_tx.h  | 2 +-
 include/linux/dmaengine.h | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 5fc2ef8d97fa..513150d8c25b 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -117,7 +117,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	dma_async_tx_callback cb_fn, void *cb_fn_param);
 
 struct dma_async_tx_descriptor *
-async_xor_zero_sum(struct page *dest, struct page **src_list,
+async_xor_val(struct page *dest, struct page **src_list,
 	unsigned int offset, int src_cnt, size_t len,
 	u32 *result, enum async_tx_flags flags,
 	struct dma_async_tx_descriptor *depend_tx,
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 2e2aa3df170c..6768727d00d7 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -55,8 +55,8 @@ enum dma_transaction_type {
 	DMA_PQ_XOR,
 	DMA_DUAL_XOR,
 	DMA_PQ_UPDATE,
-	DMA_ZERO_SUM,
-	DMA_PQ_ZERO_SUM,
+	DMA_XOR_VAL,
+	DMA_PQ_VAL,
 	DMA_MEMSET,
 	DMA_MEMCPY_CRC32C,
 	DMA_INTERRUPT,
@@ -214,7 +214,7 @@ struct dma_async_tx_descriptor {
  * @device_free_chan_resources: release DMA channel's resources
  * @device_prep_dma_memcpy: prepares a memcpy operation
  * @device_prep_dma_xor: prepares a xor operation
- * @device_prep_dma_zero_sum: prepares a zero_sum operation
+ * @device_prep_dma_xor_val: prepares a xor validation operation
  * @device_prep_dma_memset: prepares a memset operation
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
  * @device_prep_slave_sg: prepares a slave dma operation
@@ -243,7 +243,7 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
 		struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
 		unsigned int src_cnt, size_t len, unsigned long flags);
-	struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
+	struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
 		struct dma_chan *chan, dma_addr_t *src,	unsigned int src_cnt,
 		size_t len, u32 *result, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
-- 
cgit v1.2.3


From 88ba2aa586c874681c072101287e15d40de7e6e2 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 9 Apr 2009 16:16:18 -0700
Subject: async_tx: kill ASYNC_TX_DEP_ACK flag

In support of inter-channel chaining async_tx utilizes an ack flag to
gate whether a dependent operation can be chained to another.  While the
flag is not set the chain can be considered open for appending.  Setting
the ack flag closes the chain and flags the descriptor for garbage
collection.  The ASYNC_TX_DEP_ACK flag essentially means "close the
chain after adding this dependency".  Since each operation can only have
one child the api now implicitly sets the ack flag at dependency
submission time.  This removes an unnecessary management burden from
clients of the api.

[ Impact: clean up and enforce one dependency per operation ]

Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/crypto/async-tx-api.txt |  9 ++++-----
 crypto/async_tx/async_memcpy.c        |  2 +-
 crypto/async_tx/async_memset.c        |  2 +-
 crypto/async_tx/async_tx.c            |  4 ++--
 crypto/async_tx/async_xor.c           |  6 ++----
 drivers/md/raid5.c                    | 25 +++++++++++--------------
 include/linux/async_tx.h              |  4 +---
 7 files changed, 22 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 4af12180d191..76feda8541dc 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -80,8 +80,8 @@ acknowledged by the application before the offload engine driver is allowed to
 recycle (or free) the descriptor.  A descriptor can be acked by one of the
 following methods:
 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
-2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent
-   descriptor of a new operation.
+2/ submitting an unacknowledged descriptor as a dependency to another
+   async_tx call will implicitly set the acknowledged state.
 3/ calling async_tx_ack() on the descriptor.
 
 3.4 When does the operation execute?
@@ -136,10 +136,9 @@ int run_xor_copy_xor(struct page **xor_srcs,
 
 	tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
 		       ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL);
-	tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len,
-			  ASYNC_TX_DEP_ACK, tx, NULL, NULL);
+	tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, tx, NULL, NULL);
 	tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
-		       ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK,
+		       ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK,
 		       tx, complete_xor_copy_xor, NULL);
 
 	async_tx_issue_pending_all();
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index ddccfb01c416..7117ec6f1b74 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -35,7 +35,7 @@
  * @src: src page
  * @offset: offset in pages to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
+ * @flags: ASYNC_TX_ACK
  * @depend_tx: memcpy depends on the result of this transaction
  * @cb_fn: function to call when the memcpy completes
  * @cb_param: parameter to pass to the callback routine
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index 5b5eb99bb244..b2f133885b7f 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -35,7 +35,7 @@
  * @val: fill value
  * @offset: offset in pages to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_ACK
  * @depend_tx: memset depends on the result of this transaction
  * @cb_fn: function to call when the memcpy completes
  * @cb_param: parameter to pass to the callback routine
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 06eb6cc09fef..3766bc3d7d89 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -223,7 +223,7 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
 	if (flags & ASYNC_TX_ACK)
 		async_tx_ack(tx);
 
-	if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
+	if (depend_tx)
 		async_tx_ack(depend_tx);
 }
 EXPORT_SYMBOL_GPL(async_tx_submit);
@@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(async_tx_submit);
 /**
  * async_trigger_callback - schedules the callback function to be run after
  * any dependent operations have been completed.
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_ACK
  * @depend_tx: 'callback' requires the completion of this transaction
  * @cb_fn: function to call after depend_tx completes
  * @cb_param: parameter to pass to the callback routine
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index e0580b0ea533..3cc5dc763b54 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -105,7 +105,6 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 				_cb_param);
 
 		depend_tx = tx;
-		flags |= ASYNC_TX_DEP_ACK;
 
 		if (src_cnt > xor_src_cnt) {
 			/* drop completed sources */
@@ -168,8 +167,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
  * @offset: offset in pages to start transaction
  * @src_cnt: number of source pages
  * @len: length in bytes
- * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
- *	ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, ASYNC_TX_ACK
  * @depend_tx: xor depends on the result of this transaction.
  * @cb_fn: function to call when the xor completes
  * @cb_param: parameter to pass to the callback routine
@@ -230,7 +228,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
  * @src_cnt: number of source pages
  * @len: length in bytes
  * @result: 0 if sum == 0 else non-zero
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_ACK
  * @depend_tx: xor depends on the result of this transaction.
  * @cb_fn: function to call when the xor completes
  * @cb_param: parameter to pass to the callback routine
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f8d2d35ed298..0ef5362c8d02 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -525,14 +525,12 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
 			bio_page = bio_iovec_idx(bio, i)->bv_page;
 			if (frombio)
 				tx = async_memcpy(page, bio_page, page_offset,
-					b_offset, clen,
-					ASYNC_TX_DEP_ACK,
-					tx, NULL, NULL);
+						  b_offset, clen, 0,
+						  tx, NULL, NULL);
 			else
 				tx = async_memcpy(bio_page, page, b_offset,
-					page_offset, clen,
-					ASYNC_TX_DEP_ACK,
-					tx, NULL, NULL);
+						  page_offset, clen, 0,
+						  tx, NULL, NULL);
 		}
 		if (clen < len) /* hit end of page */
 			break;
@@ -615,8 +613,7 @@ static void ops_run_biofill(struct stripe_head *sh)
 	}
 
 	atomic_inc(&sh->count);
-	async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
-		ops_complete_biofill, sh);
+	async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_biofill, sh);
 }
 
 static void ops_complete_compute5(void *stripe_head_ref)
@@ -701,8 +698,8 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 	}
 
 	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-		ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx,
-		ops_complete_prexor, sh);
+		       ASYNC_TX_XOR_DROP_DST, tx,
+		       ops_complete_prexor, sh);
 
 	return tx;
 }
@@ -809,7 +806,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 	 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
 	 * for the synchronous xor case
 	 */
-	flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK |
+	flags = ASYNC_TX_ACK |
 		(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
 
 	atomic_inc(&sh->count);
@@ -858,7 +855,7 @@ static void ops_run_check(struct stripe_head *sh)
 		&sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
 
 	atomic_inc(&sh->count);
-	tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
+	tx = async_trigger_callback(ASYNC_TX_ACK, tx,
 		ops_complete_check, sh);
 }
 
@@ -2687,8 +2684,8 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
 
 			/* place all the copies on one channel */
 			tx = async_memcpy(sh2->dev[dd_idx].page,
-				sh->dev[i].page, 0, 0, STRIPE_SIZE,
-				ASYNC_TX_DEP_ACK, tx, NULL, NULL);
+					  sh->dev[i].page, 0, 0, STRIPE_SIZE,
+					  0, tx, NULL, NULL);
 
 			set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
 			set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 513150d8c25b..9f14cd540cd2 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -58,13 +58,11 @@ struct dma_chan_ref {
  * array.
  * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
  * dependency chain
- * @ASYNC_TX_DEP_ACK: ack the dependency descriptor.  Useful for chaining.
  */
 enum async_tx_flags {
 	ASYNC_TX_XOR_ZERO_DST	 = (1 << 0),
 	ASYNC_TX_XOR_DROP_DST	 = (1 << 1),
-	ASYNC_TX_ACK		 = (1 << 3),
-	ASYNC_TX_DEP_ACK	 = (1 << 4),
+	ASYNC_TX_ACK		 = (1 << 2),
 };
 
 #ifdef CONFIG_DMA_ENGINE
-- 
cgit v1.2.3


From a08abd8ca890a377521d65d493d174bebcaf694b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 3 Jun 2009 11:43:59 -0700
Subject: async_tx: structify submission arguments, add scribble

Prepare the api for the arrival of a new parameter, 'scribble'.  This
will allow callers to identify scratchpad memory for dma address or page
address conversions.  As this adds yet another parameter, take this
opportunity to convert the common submission parameters (flags,
dependency, callback, and callback argument) into an object that is
passed by reference.

Also, take this opportunity to fix up the kerneldoc and add notes about
the relevant ASYNC_TX_* flags for each routine.

[ Impact: moves api pass-by-value parameters to a pass-by-reference struct ]

Signed-off-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/crypto/async-tx-api.txt |   6 +-
 crypto/async_tx/async_memcpy.c        |  26 +++----
 crypto/async_tx/async_memset.c        |  25 +++----
 crypto/async_tx/async_tx.c            |  51 +++++++-------
 crypto/async_tx/async_xor.c           | 123 +++++++++++++++++-----------------
 drivers/md/raid5.c                    |  59 +++++++++-------
 include/linux/async_tx.h              |  84 ++++++++++++++---------
 7 files changed, 200 insertions(+), 174 deletions(-)

(limited to 'include')

diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 76feda8541dc..dfe0475f7919 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -54,11 +54,7 @@ features surfaced as a result:
 
 3.1 General format of the API:
 struct dma_async_tx_descriptor *
-async_<operation>(<op specific parameters>,
-		  enum async_tx_flags flags,
-        	  struct dma_async_tx_descriptor *dependency,
-        	  dma_async_tx_callback callback_routine,
-		  void *callback_parameter);
+async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
 
 3.2 Supported operations:
 memcpy  - memory copy between a source and a destination buffer
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 7117ec6f1b74..89e05556f3df 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -33,28 +33,28 @@
  * async_memcpy - attempt to copy memory with a dma engine.
  * @dest: destination page
  * @src: src page
- * @offset: offset in pages to start transaction
+ * @dest_offset: offset into 'dest' to start transaction
+ * @src_offset: offset into 'src' to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK
- * @depend_tx: memcpy depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
  */
 struct dma_async_tx_descriptor *
 async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
-	unsigned int src_offset, size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+	     unsigned int src_offset, size_t len,
+	     struct async_submit_ctl *submit)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY,
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
 						      &dest, 1, &src, 1, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
 	if (device) {
 		dma_addr_t dma_dest, dma_src;
-		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+		unsigned long dma_prep_flags;
 
+		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
 		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
 					DMA_FROM_DEVICE);
 
@@ -67,13 +67,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 
 	if (tx) {
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
-		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+		async_tx_submit(chan, tx, submit);
 	} else {
 		void *dest_buf, *src_buf;
 		pr_debug("%s: (sync) len: %zu\n", __func__, len);
 
 		/* wait for any prerequisite operations */
-		async_tx_quiesce(&depend_tx);
+		async_tx_quiesce(&submit->depend_tx);
 
 		dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
 		src_buf = kmap_atomic(src, KM_USER1) + src_offset;
@@ -83,7 +83,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 		kunmap_atomic(dest_buf, KM_USER0);
 		kunmap_atomic(src_buf, KM_USER1);
 
-		async_tx_sync_epilog(cb_fn, cb_param);
+		async_tx_sync_epilog(submit);
 	}
 
 	return tx;
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index b2f133885b7f..c14437238f4c 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -35,26 +35,23 @@
  * @val: fill value
  * @offset: offset in pages to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK
- * @depend_tx: memset depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ *
+ * honored flags: ASYNC_TX_ACK
  */
 struct dma_async_tx_descriptor *
-async_memset(struct page *dest, int val, unsigned int offset,
-	size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+async_memset(struct page *dest, int val, unsigned int offset, size_t len,
+	     struct async_submit_ctl *submit)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET,
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET,
 						      &dest, 1, NULL, 0, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
 	if (device) {
 		dma_addr_t dma_dest;
-		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+		unsigned long dma_prep_flags;
 
+		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
 		dma_dest = dma_map_page(device->dev, dest, offset, len,
 					DMA_FROM_DEVICE);
 
@@ -64,19 +61,19 @@ async_memset(struct page *dest, int val, unsigned int offset,
 
 	if (tx) {
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
-		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+		async_tx_submit(chan, tx, submit);
 	} else { /* run the memset synchronously */
 		void *dest_buf;
 		pr_debug("%s: (sync) len: %zu\n", __func__, len);
 
-		dest_buf = (void *) (((char *) page_address(dest)) + offset);
+		dest_buf = page_address(dest) + offset;
 
 		/* wait for any prerequisite operations */
-		async_tx_quiesce(&depend_tx);
+		async_tx_quiesce(&submit->depend_tx);
 
 		memset(dest_buf, val, len);
 
-		async_tx_sync_epilog(cb_fn, cb_param);
+		async_tx_sync_epilog(submit);
 	}
 
 	return tx;
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 3766bc3d7d89..802a5ce437d9 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -45,13 +45,15 @@ static void __exit async_tx_exit(void)
 /**
  * __async_tx_find_channel - find a channel to carry out the operation or let
  *	the transaction execute synchronously
- * @depend_tx: transaction dependency
+ * @submit: transaction dependency and submission modifiers
  * @tx_type: transaction type
  */
 struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-	enum dma_transaction_type tx_type)
+__async_tx_find_channel(struct async_submit_ctl *submit,
+			enum dma_transaction_type tx_type)
 {
+	struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
 	/* see if we can keep the chain on one channel */
 	if (depend_tx &&
 	    dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
@@ -144,13 +146,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 
 
 /**
- * submit_disposition - while holding depend_tx->lock we must avoid submitting
- * 	new operations to prevent a circular locking dependency with
- * 	drivers that already hold a channel lock when calling
- * 	async_tx_run_dependencies.
+ * submit_disposition - flags for routing an incoming operation
  * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock
  * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch
  * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly
+ *
+ * while holding depend_tx->lock we must avoid submitting new operations
+ * to prevent a circular locking dependency with drivers that already
+ * hold a channel lock when calling async_tx_run_dependencies.
  */
 enum submit_disposition {
 	ASYNC_TX_SUBMITTED,
@@ -160,11 +163,12 @@ enum submit_disposition {
 
 void
 async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
-	enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+		struct async_submit_ctl *submit)
 {
-	tx->callback = cb_fn;
-	tx->callback_param = cb_param;
+	struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
+	tx->callback = submit->cb_fn;
+	tx->callback_param = submit->cb_param;
 
 	if (depend_tx) {
 		enum submit_disposition s;
@@ -220,7 +224,7 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
 		tx->tx_submit(tx);
 	}
 
-	if (flags & ASYNC_TX_ACK)
+	if (submit->flags & ASYNC_TX_ACK)
 		async_tx_ack(tx);
 
 	if (depend_tx)
@@ -229,21 +233,20 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
 EXPORT_SYMBOL_GPL(async_tx_submit);
 
 /**
- * async_trigger_callback - schedules the callback function to be run after
- * any dependent operations have been completed.
- * @flags: ASYNC_TX_ACK
- * @depend_tx: 'callback' requires the completion of this transaction
- * @cb_fn: function to call after depend_tx completes
- * @cb_param: parameter to pass to the callback routine
+ * async_trigger_callback - schedules the callback function to be run
+ * @submit: submission and completion parameters
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * The callback is run after any dependent operations have completed.
  */
 struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+async_trigger_callback(struct async_submit_ctl *submit)
 {
 	struct dma_chan *chan;
 	struct dma_device *device;
 	struct dma_async_tx_descriptor *tx;
+	struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
 
 	if (depend_tx) {
 		chan = depend_tx->chan;
@@ -262,14 +265,14 @@ async_trigger_callback(enum async_tx_flags flags,
 	if (tx) {
 		pr_debug("%s: (async)\n", __func__);
 
-		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+		async_tx_submit(chan, tx, submit);
 	} else {
 		pr_debug("%s: (sync)\n", __func__);
 
 		/* wait for any prerequisite operations */
-		async_tx_quiesce(&depend_tx);
+		async_tx_quiesce(&submit->depend_tx);
 
-		async_tx_sync_epilog(cb_fn, cb_param);
+		async_tx_sync_epilog(submit);
 	}
 
 	return tx;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 3cc5dc763b54..691fa98a18c4 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -34,18 +34,16 @@
 static __async_inline struct dma_async_tx_descriptor *
 do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 	     unsigned int offset, int src_cnt, size_t len,
-	     enum async_tx_flags flags,
-	     struct dma_async_tx_descriptor *depend_tx,
-	     dma_async_tx_callback cb_fn, void *cb_param)
+	     struct async_submit_ctl *submit)
 {
 	struct dma_device *dma = chan->device;
 	dma_addr_t *dma_src = (dma_addr_t *) src_list;
 	struct dma_async_tx_descriptor *tx = NULL;
 	int src_off = 0;
 	int i;
-	dma_async_tx_callback _cb_fn;
-	void *_cb_param;
-	enum async_tx_flags async_flags;
+	dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+	void *cb_param_orig = submit->cb_param;
+	enum async_tx_flags flags_orig = submit->flags;
 	enum dma_ctrl_flags dma_flags;
 	int xor_src_cnt;
 	dma_addr_t dma_dest;
@@ -63,7 +61,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 	}
 
 	while (src_cnt) {
-		async_flags = flags;
+		submit->flags = flags_orig;
 		dma_flags = 0;
 		xor_src_cnt = min(src_cnt, dma->max_xor);
 		/* if we are submitting additional xors, leave the chain open,
@@ -71,15 +69,15 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 		 * buffer mapped
 		 */
 		if (src_cnt > xor_src_cnt) {
-			async_flags &= ~ASYNC_TX_ACK;
+			submit->flags &= ~ASYNC_TX_ACK;
 			dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
-			_cb_fn = NULL;
-			_cb_param = NULL;
+			submit->cb_fn = NULL;
+			submit->cb_param = NULL;
 		} else {
-			_cb_fn = cb_fn;
-			_cb_param = cb_param;
+			submit->cb_fn = cb_fn_orig;
+			submit->cb_param = cb_param_orig;
 		}
-		if (_cb_fn)
+		if (submit->cb_fn)
 			dma_flags |= DMA_PREP_INTERRUPT;
 
 		/* Since we have clobbered the src_list we are committed
@@ -90,7 +88,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 					      xor_src_cnt, len, dma_flags);
 
 		if (unlikely(!tx))
-			async_tx_quiesce(&depend_tx);
+			async_tx_quiesce(&submit->depend_tx);
 
 		/* spin wait for the preceeding transactions to complete */
 		while (unlikely(!tx)) {
@@ -101,10 +99,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 						      dma_flags);
 		}
 
-		async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn,
-				_cb_param);
-
-		depend_tx = tx;
+		async_tx_submit(chan, tx, submit);
+		submit->depend_tx = tx;
 
 		if (src_cnt > xor_src_cnt) {
 			/* drop completed sources */
@@ -123,8 +119,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 
 static void
 do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
-	    int src_cnt, size_t len, enum async_tx_flags flags,
-	    dma_async_tx_callback cb_fn, void *cb_param)
+	    int src_cnt, size_t len, struct async_submit_ctl *submit)
 {
 	int i;
 	int xor_src_cnt;
@@ -139,7 +134,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	/* set destination address */
 	dest_buf = page_address(dest) + offset;
 
-	if (flags & ASYNC_TX_XOR_ZERO_DST)
+	if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
 		memset(dest_buf, 0, len);
 
 	while (src_cnt > 0) {
@@ -152,33 +147,35 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
 		src_off += xor_src_cnt;
 	}
 
-	async_tx_sync_epilog(cb_fn, cb_param);
+	async_tx_sync_epilog(submit);
 }
 
 /**
  * async_xor - attempt to xor a set of blocks with a dma engine.
- *	xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
- *	flag must be set to not include dest data in the calculation.  The
- *	assumption with dma eninges is that they only use the destination
- *	buffer as a source when it is explicity specified in the source list.
  * @dest: destination page
- * @src_list: array of source pages (if the dest is also a source it must be
- *	at index zero).  The contents of this array may be overwritten.
- * @offset: offset in pages to start transaction
+ * @src_list: array of source pages
+ * @offset: common src/dst offset to start transaction
  * @src_cnt: number of source pages
  * @len: length in bytes
- * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, ASYNC_TX_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
+ *
+ * xor_blocks always uses the dest as a source so the
+ * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
+ * the calculation.  The assumption with dma eninges is that they only
+ * use the destination buffer as a source when it is explicity specified
+ * in the source list.
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
  */
 struct dma_async_tx_descriptor *
 async_xor(struct page *dest, struct page **src_list, unsigned int offset,
-	int src_cnt, size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+	  int src_cnt, size_t len, struct async_submit_ctl *submit)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
 						      &dest, 1, src_list,
 						      src_cnt, len);
 	BUG_ON(src_cnt <= 1);
@@ -188,7 +185,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 		pr_debug("%s (async): len: %zu\n", __func__, len);
 
 		return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
-				    flags, depend_tx, cb_fn, cb_param);
+				    submit);
 	} else {
 		/* run the xor synchronously */
 		pr_debug("%s (sync): len: %zu\n", __func__, len);
@@ -196,16 +193,15 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 		/* in the sync case the dest is an implied source
 		 * (assumes the dest is the first source)
 		 */
-		if (flags & ASYNC_TX_XOR_DROP_DST) {
+		if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
 			src_cnt--;
 			src_list++;
 		}
 
 		/* wait for any prerequisite operations */
-		async_tx_quiesce(&depend_tx);
+		async_tx_quiesce(&submit->depend_tx);
 
-		do_sync_xor(dest, src_list, offset, src_cnt, len,
-			    flags, cb_fn, cb_param);
+		do_sync_xor(dest, src_list, offset, src_cnt, len, submit);
 
 		return NULL;
 	}
@@ -222,25 +218,25 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
 /**
  * async_xor_val - attempt a xor parity check with a dma engine.
  * @dest: destination page used if the xor is performed synchronously
- * @src_list: array of source pages.  The dest page must be listed as a source
- * 	at index zero.  The contents of this array may be overwritten.
+ * @src_list: array of source pages
  * @offset: offset in pages to start transaction
  * @src_cnt: number of source pages
  * @len: length in bytes
  * @result: 0 if sum == 0 else non-zero
- * @flags: ASYNC_TX_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
  */
 struct dma_async_tx_descriptor *
-async_xor_val(struct page *dest, struct page **src_list,
-	unsigned int offset, int src_cnt, size_t len,
-	u32 *result, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+	      int src_cnt, size_t len, u32 *result,
+	      struct async_submit_ctl *submit)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR_VAL,
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
 						      &dest, 1, src_list,
 						      src_cnt, len);
 	struct dma_device *device = chan ? chan->device : NULL;
@@ -250,11 +246,12 @@ async_xor_val(struct page *dest, struct page **src_list,
 
 	if (device && src_cnt <= device->max_xor) {
 		dma_addr_t *dma_src = (dma_addr_t *) src_list;
-		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+		unsigned long dma_prep_flags;
 		int i;
 
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
 
+		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
 		for (i = 0; i < src_cnt; i++)
 			dma_src[i] = dma_map_page(device->dev, src_list[i],
 						  offset, len, DMA_TO_DEVICE);
@@ -263,7 +260,7 @@ async_xor_val(struct page *dest, struct page **src_list,
 						     len, result,
 						     dma_prep_flags);
 		if (unlikely(!tx)) {
-			async_tx_quiesce(&depend_tx);
+			async_tx_quiesce(&submit->depend_tx);
 
 			while (!tx) {
 				dma_async_issue_pending(chan);
@@ -273,23 +270,23 @@ async_xor_val(struct page *dest, struct page **src_list,
 			}
 		}
 
-		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+		async_tx_submit(chan, tx, submit);
 	} else {
-		unsigned long xor_flags = flags;
+		enum async_tx_flags flags_orig = submit->flags;
 
 		pr_debug("%s: (sync) len: %zu\n", __func__, len);
 
-		xor_flags |= ASYNC_TX_XOR_DROP_DST;
-		xor_flags &= ~ASYNC_TX_ACK;
+		submit->flags |= ASYNC_TX_XOR_DROP_DST;
+		submit->flags &= ~ASYNC_TX_ACK;
 
-		tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
-			depend_tx, NULL, NULL);
+		tx = async_xor(dest, src_list, offset, src_cnt, len, submit);
 
 		async_tx_quiesce(&tx);
 
 		*result = page_is_zero(dest, offset, len) ? 0 : 1;
 
-		async_tx_sync_epilog(cb_fn, cb_param);
+		async_tx_sync_epilog(submit);
+		submit->flags = flags_orig;
 	}
 
 	return tx;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0ef5362c8d02..e1920f23579f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -499,11 +499,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
 	struct page *bio_page;
 	int i;
 	int page_offset;
+	struct async_submit_ctl submit;
 
 	if (bio->bi_sector >= sector)
 		page_offset = (signed)(bio->bi_sector - sector) * 512;
 	else
 		page_offset = (signed)(sector - bio->bi_sector) * -512;
+
+	init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
 	bio_for_each_segment(bvl, bio, i) {
 		int len = bio_iovec_idx(bio, i)->bv_len;
 		int clen;
@@ -525,13 +528,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
 			bio_page = bio_iovec_idx(bio, i)->bv_page;
 			if (frombio)
 				tx = async_memcpy(page, bio_page, page_offset,
-						  b_offset, clen, 0,
-						  tx, NULL, NULL);
+						  b_offset, clen, &submit);
 			else
 				tx = async_memcpy(bio_page, page, b_offset,
-						  page_offset, clen, 0,
-						  tx, NULL, NULL);
+						  page_offset, clen, &submit);
 		}
+		/* chain the operations */
+		submit.depend_tx = tx;
+
 		if (clen < len) /* hit end of page */
 			break;
 		page_offset +=  len;
@@ -590,6 +594,7 @@ static void ops_run_biofill(struct stripe_head *sh)
 {
 	struct dma_async_tx_descriptor *tx = NULL;
 	raid5_conf_t *conf = sh->raid_conf;
+	struct async_submit_ctl submit;
 	int i;
 
 	pr_debug("%s: stripe %llu\n", __func__,
@@ -613,7 +618,8 @@ static void ops_run_biofill(struct stripe_head *sh)
 	}
 
 	atomic_inc(&sh->count);
-	async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_biofill, sh);
+	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
+	async_trigger_callback(&submit);
 }
 
 static void ops_complete_compute5(void *stripe_head_ref)
@@ -645,6 +651,7 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
 	struct page *xor_dest = tgt->page;
 	int count = 0;
 	struct dma_async_tx_descriptor *tx;
+	struct async_submit_ctl submit;
 	int i;
 
 	pr_debug("%s: stripe %llu block: %d\n",
@@ -657,13 +664,12 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
 
 	atomic_inc(&sh->count);
 
+	init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+			  ops_complete_compute5, sh, NULL);
 	if (unlikely(count == 1))
-		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
-			0, NULL, ops_complete_compute5, sh);
+		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
 	else
-		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-			ASYNC_TX_XOR_ZERO_DST, NULL,
-			ops_complete_compute5, sh);
+		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 
 	return tx;
 }
@@ -683,6 +689,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 	int disks = sh->disks;
 	struct page *xor_srcs[disks];
 	int count = 0, pd_idx = sh->pd_idx, i;
+	struct async_submit_ctl submit;
 
 	/* existing parity data subtracted */
 	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -697,9 +704,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 			xor_srcs[count++] = dev->page;
 	}
 
-	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-		       ASYNC_TX_XOR_DROP_DST, tx,
-		       ops_complete_prexor, sh);
+	init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx,
+			  ops_complete_prexor, sh, NULL);
+	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 
 	return tx;
 }
@@ -772,7 +779,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 	/* kernel stack size limits the total number of disks */
 	int disks = sh->disks;
 	struct page *xor_srcs[disks];
-
+	struct async_submit_ctl submit;
 	int count = 0, pd_idx = sh->pd_idx, i;
 	struct page *xor_dest;
 	int prexor = 0;
@@ -811,13 +818,11 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 
 	atomic_inc(&sh->count);
 
-	if (unlikely(count == 1)) {
-		flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);
-		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
-			flags, tx, ops_complete_postxor, sh);
-	} else
-		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-			flags, tx, ops_complete_postxor, sh);
+	init_async_submit(&submit, flags, tx, ops_complete_postxor, sh, NULL);
+	if (unlikely(count == 1))
+		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
+	else
+		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 }
 
 static void ops_complete_check(void *stripe_head_ref)
@@ -838,6 +843,7 @@ static void ops_run_check(struct stripe_head *sh)
 	int disks = sh->disks;
 	struct page *xor_srcs[disks];
 	struct dma_async_tx_descriptor *tx;
+	struct async_submit_ctl submit;
 
 	int count = 0, pd_idx = sh->pd_idx, i;
 	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -851,12 +857,13 @@ static void ops_run_check(struct stripe_head *sh)
 			xor_srcs[count++] = dev->page;
 	}
 
+	init_async_submit(&submit, 0, NULL, NULL, NULL, NULL);
 	tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-		&sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
+			   &sh->ops.zero_sum_result, &submit);
 
 	atomic_inc(&sh->count);
-	tx = async_trigger_callback(ASYNC_TX_ACK, tx,
-		ops_complete_check, sh);
+	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
+	tx = async_trigger_callback(&submit);
 }
 
 static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
@@ -2664,6 +2671,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
 		if (i != sh->pd_idx && i != sh->qd_idx) {
 			int dd_idx, j;
 			struct stripe_head *sh2;
+			struct async_submit_ctl submit;
 
 			sector_t bn = compute_blocknr(sh, i, 1);
 			sector_t s = raid5_compute_sector(conf, bn, 0,
@@ -2683,9 +2691,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
 			}
 
 			/* place all the copies on one channel */
+			init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
 			tx = async_memcpy(sh2->dev[dd_idx].page,
 					  sh->dev[i].page, 0, 0, STRIPE_SIZE,
-					  0, tx, NULL, NULL);
+					  &submit);
 
 			set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
 			set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 9f14cd540cd2..00cfb637ddf2 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -65,6 +65,22 @@ enum async_tx_flags {
 	ASYNC_TX_ACK		 = (1 << 2),
 };
 
+/**
+ * struct async_submit_ctl - async_tx submission/completion modifiers
+ * @flags: submission modifiers
+ * @depend_tx: parent dependency of the current operation being submitted
+ * @cb_fn: callback routine to run at operation completion
+ * @cb_param: parameter for the callback routine
+ * @scribble: caller provided space for dma/page address conversions
+ */
+struct async_submit_ctl {
+	enum async_tx_flags flags;
+	struct dma_async_tx_descriptor *depend_tx;
+	dma_async_tx_callback cb_fn;
+	void *cb_param;
+	void *scribble;
+};
+
 #ifdef CONFIG_DMA_ENGINE
 #define async_tx_issue_pending_all dma_issue_pending_all
 #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
@@ -73,8 +89,8 @@ enum async_tx_flags {
 #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
 	 __async_tx_find_channel(dep, type)
 struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-	enum dma_transaction_type tx_type);
+__async_tx_find_channel(struct async_submit_ctl *submit,
+			enum dma_transaction_type tx_type);
 #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */
 #else
 static inline void async_tx_issue_pending_all(void)
@@ -83,9 +99,10 @@ static inline void async_tx_issue_pending_all(void)
 }
 
 static inline struct dma_chan *
-async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-	enum dma_transaction_type tx_type, struct page **dst, int dst_count,
-	struct page **src, int src_count, size_t len)
+async_tx_find_channel(struct async_submit_ctl *submit,
+		      enum dma_transaction_type tx_type, struct page **dst,
+		      int dst_count, struct page **src, int src_count,
+		      size_t len)
 {
 	return NULL;
 }
@@ -97,46 +114,53 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
  * @cb_fn_param: parameter to pass to the callback routine
  */
 static inline void
-async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param)
+async_tx_sync_epilog(struct async_submit_ctl *submit)
+{
+	if (submit->cb_fn)
+		submit->cb_fn(submit->cb_param);
+}
+
+typedef union {
+	unsigned long addr;
+	struct page *page;
+	dma_addr_t dma;
+} addr_conv_t;
+
+static inline void
+init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags,
+		  struct dma_async_tx_descriptor *tx,
+		  dma_async_tx_callback cb_fn, void *cb_param,
+		  addr_conv_t *scribble)
 {
-	if (cb_fn)
-		cb_fn(cb_fn_param);
+	args->flags = flags;
+	args->depend_tx = tx;
+	args->cb_fn = cb_fn;
+	args->cb_param = cb_param;
+	args->scribble = scribble;
 }
 
-void
-async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
-	enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
+		     struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
 async_xor(struct page *dest, struct page **src_list, unsigned int offset,
-	int src_cnt, size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+	  int src_cnt, size_t len, struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
-async_xor_val(struct page *dest, struct page **src_list,
-	unsigned int offset, int src_cnt, size_t len,
-	u32 *result, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+	      int src_cnt, size_t len, u32 *result,
+	      struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
 async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
-	unsigned int src_offset, size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+	     unsigned int src_offset, size_t len,
+	     struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
 async_memset(struct page *dest, int val, unsigned int offset,
-	size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+	     size_t len, struct async_submit_ctl *submit);
 
-struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
 
 void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
 #endif /* _ASYNC_TX_H_ */
-- 
cgit v1.2.3


From e74e396204bfcb67570ba4517b08f5918e69afea Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 30 Mar 2009 19:07:44 +0900
Subject: percpu: use dynamic percpu allocator as the default percpu allocator

This patch makes most !CONFIG_HAVE_SETUP_PER_CPU_AREA archs use
dynamic percpu allocator.  The first chunk is allocated using
embedding helper and 8k is reserved for modules.  This ensures that
the new allocator behaves almost identically to the original allocator
as long as static percpu variables are concerned, so it shouldn't
introduce much breakage.

s390 and alpha use custom SHIFT_PERCPU_PTR() to work around addressing
range limit the addressing model imposes.  Unfortunately, this breaks
if the address is specified using a variable, so for now, the two
archs aren't converted.

The following architectures are affected by this change.

* sh
* arm
* cris
* mips
* sparc(32)
* blackfin
* avr32
* parisc (broken, under investigation)
* m32r
* powerpc(32)

As this change makes the dynamic allocator the default one,
CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is replaced with its invert -
CONFIG_HAVE_LEGACY_PER_CPU_AREA, which is added to yet-to-be converted
archs.  These archs implement their own setup_per_cpu_areas() and the
conversion is not trivial.

* powerpc(64)
* sparc(64)
* ia64
* alpha
* s390

Boot and batch alloc/free tests on x86_32 with debug code (x86_32
doesn't use default first chunk initialization).  Compile tested on
sparc(32), powerpc(32), arm and alpha.

Kyle McMartin reported that this change breaks parisc.  The problem is
still under investigation and he is okay with pushing this patch
forward and fixing parisc later.

[ Impact: use dynamic allocator for most archs w/o custom percpu setup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Reviewed-by: Christoph Lameter <cl@linux.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Bryan Wu <cooloney@kernel.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Grant Grundler <grundler@parisc-linux.org>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/Kconfig     |  3 +++
 arch/ia64/Kconfig      |  3 +++
 arch/powerpc/Kconfig   |  3 +++
 arch/s390/Kconfig      |  3 +++
 arch/sparc/Kconfig     |  3 +++
 arch/x86/Kconfig       |  3 ---
 include/linux/percpu.h | 12 +++++++++---
 init/main.c            | 24 ------------------------
 kernel/module.c        |  6 +++---
 mm/Makefile            |  2 +-
 mm/allocpercpu.c       | 28 ++++++++++++++++++++++++++++
 mm/percpu.c            | 40 +++++++++++++++++++++++++++++++++++++++-
 12 files changed, 95 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 9fb8aae5c391..05d86407188c 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -70,6 +70,9 @@ config AUTO_IRQ_AFFINITY
 	depends on SMP
 	default y
 
+config HAVE_LEGACY_PER_CPU_AREA
+	def_bool y
+
 source "init/Kconfig"
 source "kernel/Kconfig.freezer"
 
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 170042b420d4..328d2f8b8c3f 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -89,6 +89,9 @@ config GENERIC_TIME_VSYSCALL
 	bool
 	default y
 
+config HAVE_LEGACY_PER_CPU_AREA
+	def_bool y
+
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index bf6cedfa05db..a774c2acbe69 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -46,6 +46,9 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
 	bool
 	default y
 
+config HAVE_LEGACY_PER_CPU_AREA
+	def_bool PPC64
+
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool PPC64
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a14dba0e4d67..f4a3cc62d28f 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -75,6 +75,9 @@ config VIRT_CPU_ACCOUNTING
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	def_bool y
 
+config HAVE_LEGACY_PER_CPU_AREA
+	def_bool y
+
 mainmenu "Linux Kernel Configuration"
 
 config S390
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 3f8b6a92eabd..7a8698b913fe 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -92,6 +92,9 @@ config AUDIT_ARCH
 	bool
 	default y
 
+config HAVE_LEGACY_PER_CPU_AREA
+	def_bool y if SPARC64
+
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y if SPARC64
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d1430ef6b4f9..a48a90076d83 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -149,9 +149,6 @@ config ARCH_HAS_CACHE_LINE_SIZE
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y
 
-config HAVE_DYNAMIC_PER_CPU_AREA
-	def_bool y
-
 config HAVE_CPUMASK_OF_CPU_MAP
 	def_bool X86_64_SMP
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 26fd9d12f050..e5000343dd61 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -34,7 +34,7 @@
 
 #ifdef CONFIG_SMP
 
-#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
 
 /* minimum unit size, also is the maximum supported allocation size */
 #define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(64 << 10)
@@ -80,7 +80,7 @@ extern ssize_t __init pcpu_embed_first_chunk(
 
 extern void *__alloc_reserved_percpu(size_t size, size_t align);
 
-#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#else /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
 
 struct percpu_data {
 	void *ptrs[1];
@@ -99,11 +99,15 @@ struct percpu_data {
         (__typeof__(ptr))__p->ptrs[(cpu)];				\
 })
 
-#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
 
 extern void *__alloc_percpu(size_t size, size_t align);
 extern void free_percpu(void *__pdata);
 
+#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
+extern void __init setup_per_cpu_areas(void);
+#endif
+
 #else /* CONFIG_SMP */
 
 #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
@@ -124,6 +128,8 @@ static inline void free_percpu(void *p)
 	kfree(p);
 }
 
+static inline void __init setup_per_cpu_areas(void) { }
+
 #endif /* CONFIG_SMP */
 
 #define alloc_percpu(type)	(type *)__alloc_percpu(sizeof(type), \
diff --git a/init/main.c b/init/main.c
index 09131ec090c1..602d724afa5c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -357,7 +357,6 @@ static void __init smp_init(void)
 #define smp_init()	do { } while (0)
 #endif
 
-static inline void setup_per_cpu_areas(void) { }
 static inline void setup_nr_cpu_ids(void) { }
 static inline void smp_prepare_cpus(unsigned int maxcpus) { }
 
@@ -378,29 +377,6 @@ static void __init setup_nr_cpu_ids(void)
 	nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
 }
 
-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-
-EXPORT_SYMBOL(__per_cpu_offset);
-
-static void __init setup_per_cpu_areas(void)
-{
-	unsigned long size, i;
-	char *ptr;
-	unsigned long nr_possible_cpus = num_possible_cpus();
-
-	/* Copy section for each CPU (we discard the original) */
-	size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
-	ptr = alloc_bootmem_pages(size * nr_possible_cpus);
-
-	for_each_possible_cpu(i) {
-		__per_cpu_offset[i] = ptr - __per_cpu_start;
-		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-		ptr += size;
-	}
-}
-#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
-
 /* Called by boot processor to activate the rest. */
 static void __init smp_init(void)
 {
diff --git a/kernel/module.c b/kernel/module.c
index 38928fcaff2b..f5934954fa99 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -364,7 +364,7 @@ EXPORT_SYMBOL_GPL(find_module);
 
 #ifdef CONFIG_SMP
 
-#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
 
 static void *percpu_modalloc(unsigned long size, unsigned long align,
 			     const char *name)
@@ -389,7 +389,7 @@ static void percpu_modfree(void *freeme)
 	free_percpu(freeme);
 }
 
-#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */
 
 /* Number of blocks used and allocated. */
 static unsigned int pcpu_num_used, pcpu_num_allocated;
@@ -535,7 +535,7 @@ static int percpu_modinit(void)
 }
 __initcall(percpu_modinit);
 
-#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
 
 static unsigned int find_pcpusec(Elf_Ehdr *hdr,
 				 Elf_Shdr *sechdrs,
diff --git a/mm/Makefile b/mm/Makefile
index 5e0bd6426693..c77c6487552f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -33,7 +33,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
-ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
 obj-$(CONFIG_SMP) += percpu.o
 else
 obj-$(CONFIG_SMP) += allocpercpu.o
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index dfdee6a47359..df34ceae0c67 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -5,6 +5,8 @@
  */
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/bootmem.h>
+#include <asm/sections.h>
 
 #ifndef cache_line_size
 #define cache_line_size()	L1_CACHE_BYTES
@@ -147,3 +149,29 @@ void free_percpu(void *__pdata)
 	kfree(__percpu_disguise(__pdata));
 }
 EXPORT_SYMBOL_GPL(free_percpu);
+
+/*
+ * Generic percpu area setup.
+ */
+#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+
+EXPORT_SYMBOL(__per_cpu_offset);
+
+void __init setup_per_cpu_areas(void)
+{
+	unsigned long size, i;
+	char *ptr;
+	unsigned long nr_possible_cpus = num_possible_cpus();
+
+	/* Copy section for each CPU (we discard the original) */
+	size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
+	ptr = alloc_bootmem_pages(size * nr_possible_cpus);
+
+	for_each_possible_cpu(i) {
+		__per_cpu_offset[i] = ptr - __per_cpu_start;
+		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+		ptr += size;
+	}
+}
+#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
diff --git a/mm/percpu.c b/mm/percpu.c
index b70f2acd8853..b14984566f5a 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -43,7 +43,7 @@
  *
  * To use this allocator, arch code should do the followings.
  *
- * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+ * - drop CONFIG_HAVE_LEGACY_PER_CPU_AREA
  *
  * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
  *   regular address to percpu pointer and back if they need to be
@@ -1275,3 +1275,41 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 				      reserved_size, dyn_size,
 				      pcpue_unit_size, pcpue_ptr, NULL);
 }
+
+/*
+ * Generic percpu area setup.
+ *
+ * The embedding helper is used because its behavior closely resembles
+ * the original non-dynamic generic percpu area setup.  This is
+ * important because many archs have addressing restrictions and might
+ * fail if the percpu area is located far away from the previous
+ * location.  As an added bonus, in non-NUMA cases, embedding is
+ * generally a good idea TLB-wise because percpu area can piggy back
+ * on the physical linear memory mapping which uses large page
+ * mappings on applicable archs.
+ */
+#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+void __init setup_per_cpu_areas(void)
+{
+	size_t static_size = __per_cpu_end - __per_cpu_start;
+	ssize_t unit_size;
+	unsigned long delta;
+	unsigned int cpu;
+
+	/*
+	 * Always reserve area for module percpu variables.  That's
+	 * what the legacy allocator did.
+	 */
+	unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE,
+					   PERCPU_DYNAMIC_RESERVE, -1);
+	if (unit_size < 0)
+		panic("Failed to initialized percpu areas.");
+
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu)
+		__per_cpu_offset[cpu] = delta + cpu * unit_size;
+}
+#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
-- 
cgit v1.2.3


From 405d967dc70002991f8fc35c20e0d3cbc7614f63 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Jun 2009 15:13:38 +0900
Subject: linker script: throw away .discard section

x86 throws away .discard section but no other archs do.  Also,
.discard is not thrown away while linking modules.  Make every arch
and module linking throw it away.  This will be used to define dummy
variables for percpu declarations and definitions.

This patch is based on Ivan Kokshaysky's alpha percpu patch.

[ Impact: always throw away everything in .discard ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Bryan Wu <cooloney@kernel.org>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>
---
 Makefile                             | 2 +-
 arch/alpha/kernel/vmlinux.lds.S      | 1 +
 arch/arm/kernel/vmlinux.lds.S        | 1 +
 arch/avr32/kernel/vmlinux.lds.S      | 1 +
 arch/blackfin/kernel/vmlinux.lds.S   | 1 +
 arch/cris/kernel/vmlinux.lds.S       | 1 +
 arch/frv/kernel/vmlinux.lds.S        | 2 ++
 arch/h8300/kernel/vmlinux.lds.S      | 1 +
 arch/ia64/kernel/vmlinux.lds.S       | 1 +
 arch/m32r/kernel/vmlinux.lds.S       | 1 +
 arch/m68k/kernel/vmlinux-std.lds     | 1 +
 arch/m68k/kernel/vmlinux-sun3.lds    | 1 +
 arch/m68knommu/kernel/vmlinux.lds.S  | 1 +
 arch/microblaze/kernel/vmlinux.lds.S | 2 ++
 arch/mips/kernel/vmlinux.lds.S       | 1 +
 arch/mn10300/kernel/vmlinux.lds.S    | 1 +
 arch/parisc/kernel/vmlinux.lds.S     | 1 +
 arch/powerpc/kernel/vmlinux.lds.S    | 1 +
 arch/s390/kernel/vmlinux.lds.S       | 1 +
 arch/sh/kernel/vmlinux.lds.S         | 1 +
 arch/sparc/kernel/vmlinux.lds.S      | 1 +
 arch/um/kernel/dyn.lds.S             | 2 ++
 arch/um/kernel/uml.lds.S             | 2 ++
 arch/xtensa/kernel/vmlinux.lds.S     | 1 +
 include/asm-generic/vmlinux.lds.h    | 8 ++++++++
 scripts/module-common.lds            | 8 ++++++++
 26 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 scripts/module-common.lds

(limited to 'include')

diff --git a/Makefile b/Makefile
index 46e1c9d03d51..12245be05122 100644
--- a/Makefile
+++ b/Makefile
@@ -327,7 +327,7 @@ CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
 MODFLAGS	= -DMODULE
 CFLAGS_MODULE   = $(MODFLAGS)
 AFLAGS_MODULE   = $(MODFLAGS)
-LDFLAGS_MODULE  =
+LDFLAGS_MODULE  = -T $(srctree)/scripts/module-common.lds
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
 CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index b9d6568e5f7f..75fe1d6877e9 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -139,6 +139,7 @@ SECTIONS
 		EXIT_TEXT
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
 	}
 
 	.mdebug 0 : {
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 6c0779792546..e256c57b8981 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -82,6 +82,7 @@ SECTIONS
 		EXIT_TEXT
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
 		*(.ARM.exidx.exit.text)
 		*(.ARM.extab.exit.text)
 #ifndef CONFIG_MMU
diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S
index 7910d41eb886..b8324608ec0c 100644
--- a/arch/avr32/kernel/vmlinux.lds.S
+++ b/arch/avr32/kernel/vmlinux.lds.S
@@ -131,6 +131,7 @@ SECTIONS
 	/DISCARD/       	: {
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
 	}
 
 	DWARF_DEBUG
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index 6ac307ca0d80..6e8eabd8f0a6 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -280,5 +280,6 @@ SECTIONS
 	/DISCARD/ :
 	{
 		*(.exitcall.exit)
+		*(.discard)
 	}
 }
diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S
index 0d2adfc794d4..a3175ebb38cc 100644
--- a/arch/cris/kernel/vmlinux.lds.S
+++ b/arch/cris/kernel/vmlinux.lds.S
@@ -145,6 +145,7 @@ SECTIONS
 		EXIT_TEXT
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
         }
 
 	dram_end = dram_start + (CONFIG_ETRAX_DRAM_SIZE - __CONFIG_ETRAX_VMEM_SIZE)*1024*1024;
diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S
index 22d9787406ed..64b5a5e4d35e 100644
--- a/arch/frv/kernel/vmlinux.lds.S
+++ b/arch/frv/kernel/vmlinux.lds.S
@@ -177,6 +177,8 @@ SECTIONS
   .debug_ranges		0 : { *(.debug_ranges) }
 
   .comment 0 : { *(.comment) }
+
+  /DISCARD/ : { *(.discard) }
 }
 
 __kernel_image_size_no_bss = __bss_start - __kernel_image_start;
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index 43a87b9085b6..03d6c0df33db 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -154,6 +154,7 @@ SECTIONS
 	}
 	/DISCARD/ : {
 		*(.exitcall.exit)
+		*(.discard)
 	}
         .romfs :	
 	{
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 4a95e86b9ac2..13d958975874 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -29,6 +29,7 @@ SECTIONS
 	EXIT_TEXT
 	EXIT_DATA
 	*(.exitcall.exit)
+	*(.discard)
 	*(.IA_64.unwind.exit.text)
 	*(.IA_64.unwind_info.exit.text)
 	}
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index 4179adf6c624..480a49944cfd 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -125,6 +125,7 @@ SECTIONS
 	EXIT_TEXT
 	EXIT_DATA
 	*(.exitcall.exit)
+	*(.discard)
 	}
 
   /* Stabs debugging sections.  */
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 01d212bb05a6..905a797ada93 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -87,6 +87,7 @@ SECTIONS
 	EXIT_TEXT
 	EXIT_DATA
 	*(.exitcall.exit)
+	*(.discard)
 	}
 
   /* Stabs debugging sections.  */
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index c192f773db96..47d04be322aa 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -82,6 +82,7 @@ __init_begin = .;
 	EXIT_TEXT
 	EXIT_DATA
 	*(.exitcall.exit)
+	*(.discard)
 	}
 
   .crap : {
diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S
index b7fe505e358d..68111a61a77f 100644
--- a/arch/m68knommu/kernel/vmlinux.lds.S
+++ b/arch/m68knommu/kernel/vmlinux.lds.S
@@ -188,6 +188,7 @@ SECTIONS {
 		EXIT_TEXT
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
 	}
 
 	.bss : {
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index d34d38dcd12c..a207543c5927 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -162,4 +162,6 @@ SECTIONS {
 	}
 	. = ALIGN(4096);
 	_end = .;
+
+	/DISCARD/ : { *(.discard) }
 }
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 58738c8d754f..45901609b741 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -179,6 +179,7 @@ SECTIONS
 	/* Sections to be discarded */
 	/DISCARD/ : {
 		*(.exitcall.exit)
+		*(.discard)
 
 		/* ABI crap starts here */
 		*(.MIPS.options)
diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S
index 24de6b90f401..5d9f2f96ad92 100644
--- a/arch/mn10300/kernel/vmlinux.lds.S
+++ b/arch/mn10300/kernel/vmlinux.lds.S
@@ -146,6 +146,7 @@ SECTIONS
   /* Sections to be discarded */
   /DISCARD/ : {
 	*(.exitcall.exit)
+	*(.discard)
 	}
 
   STABS_DEBUG
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index fd2cc4fd2b65..ccf58341845a 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -240,6 +240,7 @@ SECTIONS
 	/* Sections to be discarded */
 	/DISCARD/ : {
 		*(.exitcall.exit)
+		*(.discard)
 #ifdef CONFIG_64BIT
 		/* temporary hack until binutils is fixed to not emit these
 	 	 * for static binaries
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 8ef8a14abc95..7fca9355fd3d 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -40,6 +40,7 @@ SECTIONS
 	/* Sections to be discarded. */
 	/DISCARD/ : {
 	*(.exitcall.exit)
+	*(.discard)
 	EXIT_DATA
 	}
 
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index a53db23ee092..98867dfea469 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -161,6 +161,7 @@ SECTIONS
 	/DISCARD/ : {
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
 	}
 
 	/* Debugging sections.	*/
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index f53c76acaede..766976d27b21 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -171,6 +171,7 @@ SECTIONS
 	 */
 	/DISCARD/ : {
 		*(.exitcall.exit)
+		*(.discard)
 	}
 
 	STABS_DEBUG
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index fcbbd000ec08..d63cf914667d 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -175,6 +175,7 @@ SECTIONS
 		EXIT_TEXT
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
 	}
 
 	STABS_DEBUG
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index 9975e1ab44fb..2916d6eadffd 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -156,4 +156,6 @@ SECTIONS
   STABS_DEBUG
 
   DWARF_DEBUG
+
+  /DISCARD/	: { *(.discard) }
 }
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 11b835248b86..1f8a622cabe1 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -100,4 +100,6 @@ SECTIONS
   STABS_DEBUG
 
   DWARF_DEBUG
+
+  /DISCARD/	: { *(.discard) }
 }
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 41c159cd872f..b1e24638acd7 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -287,6 +287,7 @@ SECTIONS
 	EXIT_TEXT
 	EXIT_DATA
         *(.exitcall.exit)
+	*(.discard)
   }
 
   .xt.lit : { *(.xt.lit) }
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 55413e568f07..a19120c4e109 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -628,6 +628,14 @@
 #define INITRAMFS
 #endif
 
+#define DISCARDS							\
+	/DISCARD/ : {							\
+	EXIT_TEXT							\
+	EXIT_DATA							\
+	*(.exitcall.exit)						\
+	*(.discard)							\
+	}
+
 /**
  * PERCPU_VADDR - define output section for percpu area
  * @vaddr: explicit base address (optional)
diff --git a/scripts/module-common.lds b/scripts/module-common.lds
new file mode 100644
index 000000000000..47a1f9ae0ede
--- /dev/null
+++ b/scripts/module-common.lds
@@ -0,0 +1,8 @@
+/*
+ * Common module linker script, always used when linking a module.
+ * Archs are free to supply their own linker scripts.  ld will
+ * combine them automatically.
+ */
+SECTIONS {
+	/DISCARD/ : { *(.discard) }
+}
-- 
cgit v1.2.3


From 7c756e6e19e71f0327760d8955f7077118ebb2b1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Jun 2009 15:13:50 +0900
Subject: percpu: implement optional weak percpu definitions

Some archs (alpha and s390) need to use weak definitions for percpu
variables in modules so that the compiler generates external
references for them.

This patch implements weak percpu definitions which arch can enable by
defining ARCH_NEEDS_WEAK_PER_CPU in arch percpu header file.  This
weak definition adds the following two restrictions on percpu variable
definitions.

  1. percpu symbols must be unique whether static or not
  2. percpu variables can't be defined inside a function

To ensure that these restrictions are observed in generic code, config
option DEBUG_FORCE_WEAK_PER_CPU enables weak percpu definitions for
all cases.

This patch is inspired by Ivan Kokshaysky's alpha percpu patch.

[ Impact: stricter rules for percpu variables, one more debug config option ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Howells <dhowells@redhat.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
---
 include/linux/percpu-defs.h | 65 ++++++++++++++++++++++++++++++++++++++-------
 lib/Kconfig.debug           | 15 +++++++++++
 2 files changed, 71 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 8f921d74f49f..cf32838ad0fa 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -10,21 +10,68 @@
 /*
  * Base implementations of per-CPU variable declarations and definitions, where
  * the section in which the variable is to be placed is provided by the
- * 'section' argument.  This may be used to affect the parameters governing the
+ * 'sec' argument.  This may be used to affect the parameters governing the
  * variable's storage.
  *
  * NOTE!  The sections for the DECLARE and for the DEFINE must match, lest
  * linkage errors occur due the compiler generating the wrong code to access
  * that section.
  */
-#define DECLARE_PER_CPU_SECTION(type, name, section)			\
-	extern								\
-	__attribute__((__section__(PER_CPU_BASE_SECTION section)))	\
-	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SECTION(type, name, section)			\
-	__attribute__((__section__(PER_CPU_BASE_SECTION section)))	\
-	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+#define __PCPU_ATTRS(sec)						\
+	__attribute__((section(PER_CPU_BASE_SECTION sec)))		\
+	PER_CPU_ATTRIBUTES
+
+#define __PCPU_DUMMY_ATTRS						\
+	__attribute__((section(".discard"), unused))
+
+/*
+ * s390 and alpha modules require percpu variables to be defined as
+ * weak to force the compiler to generate GOT based external
+ * references for them.  This is necessary because percpu sections
+ * will be located outside of the usually addressable area.
+ *
+ * This definition puts the following two extra restrictions when
+ * defining percpu variables.
+ *
+ * 1. The symbol must be globally unique, even the static ones.
+ * 2. Static percpu variables cannot be defined inside a function.
+ *
+ * Archs which need weak percpu definitions should define
+ * ARCH_NEEDS_WEAK_PER_CPU in asm/percpu.h when necessary.
+ *
+ * To ensure that the generic code observes the above two
+ * restrictions, if CONFIG_DEBUG_FORCE_WEAK_PER_CPU is set weak
+ * definition is used for all cases.
+ */
+#if defined(ARCH_NEEDS_WEAK_PER_CPU) || defined(CONFIG_DEBUG_FORCE_WEAK_PER_CPU)
+/*
+ * __pcpu_scope_* dummy variable is used to enforce scope.  It
+ * receives the static modifier when it's used in front of
+ * DEFINE_PER_CPU() and will trigger build failure if
+ * DECLARE_PER_CPU() is used for the same variable.
+ *
+ * __pcpu_unique_* dummy variable is used to enforce symbol uniqueness
+ * such that hidden weak symbol collision, which will cause unrelated
+ * variables to share the same address, can be detected during build.
+ */
+#define DECLARE_PER_CPU_SECTION(type, name, sec)			\
+	extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name;		\
+	extern __PCPU_ATTRS(sec) __weak __typeof__(type) per_cpu__##name
+
+#define DEFINE_PER_CPU_SECTION(type, name, sec)				\
+	__PCPU_DUMMY_ATTRS char __pcpu_scope_##name;			\
+	__PCPU_DUMMY_ATTRS char __pcpu_unique_##name;			\
+	__PCPU_ATTRS(sec) __weak __typeof__(type) per_cpu__##name
+#else
+/*
+ * Normal declaration and definition macros.
+ */
+#define DECLARE_PER_CPU_SECTION(type, name, sec)			\
+	extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
+
+#define DEFINE_PER_CPU_SECTION(type, name, sec)				\
+	__PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
+#endif
 
 /*
  * Variant on the per-CPU variable declaration/definition theme used for
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 23067ab1a73c..77e0d8b1b7c5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -777,6 +777,21 @@ config DEBUG_BLOCK_EXT_DEVT
 
 	  Say N if you are unsure.
 
+config DEBUG_FORCE_WEAK_PER_CPU
+	bool "Force weak per-cpu definitions"
+	depends on DEBUG_KERNEL
+	help
+	  s390 and alpha require percpu variables in modules to be
+	  defined weak to work around addressing range issue which
+	  puts the following two restrictions on percpu variable
+	  definitions.
+
+	  1. percpu symbols must be unique whether static or not
+	  2. percpu variables can't be defined inside a function
+
+	  To ensure that generic code follows the above rules, this
+	  option forces all percpu variables to be defined as weak.
+
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
 	depends on DEBUG_KERNEL
-- 
cgit v1.2.3


From dcf52fb71d988ba945054308f661bddf9b2455fb Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 22 Jun 2009 20:41:45 +0000
Subject: ACPI: remove unused acpi_device_ops .stop method

No drivers use the .stop method, so remove it.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c     | 5 -----
 include/acpi/acpi_bus.h | 2 --
 2 files changed, 7 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 781435d7e369..4a89f081160f 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -426,9 +426,6 @@ static int acpi_device_probe(struct device * dev)
 		if (acpi_drv->ops.notify) {
 			ret = acpi_device_install_notify_handler(acpi_dev);
 			if (ret) {
-				if (acpi_drv->ops.stop)
-					acpi_drv->ops.stop(acpi_dev,
-						   acpi_dev->removal_type);
 				if (acpi_drv->ops.remove)
 					acpi_drv->ops.remove(acpi_dev,
 						     acpi_dev->removal_type);
@@ -452,8 +449,6 @@ static int acpi_device_remove(struct device * dev)
 	if (acpi_drv) {
 		if (acpi_drv->ops.notify)
 			acpi_device_remove_notify_handler(acpi_dev);
-		if (acpi_drv->ops.stop)
-			acpi_drv->ops.stop(acpi_dev, acpi_dev->removal_type);
 		if (acpi_drv->ops.remove)
 			acpi_drv->ops.remove(acpi_dev, acpi_dev->removal_type);
 	}
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c65e4ce6c3af..79a6c5ebe908 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -89,7 +89,6 @@ struct acpi_device;
 typedef int (*acpi_op_add) (struct acpi_device * device);
 typedef int (*acpi_op_remove) (struct acpi_device * device, int type);
 typedef int (*acpi_op_start) (struct acpi_device * device);
-typedef int (*acpi_op_stop) (struct acpi_device * device, int type);
 typedef int (*acpi_op_suspend) (struct acpi_device * device,
 				pm_message_t state);
 typedef int (*acpi_op_resume) (struct acpi_device * device);
@@ -106,7 +105,6 @@ struct acpi_device_ops {
 	acpi_op_add add;
 	acpi_op_remove remove;
 	acpi_op_start start;
-	acpi_op_stop stop;
 	acpi_op_suspend suspend;
 	acpi_op_resume resume;
 	acpi_op_bind bind;
-- 
cgit v1.2.3


From 1a8dd307cc0a2119be4e578c517795464e6dabba Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 29 Jun 2009 17:45:39 +0900
Subject: percpu: use __weak only in the definition of weak percpu variables

__weak is necessary only for definition and might even not work in
declaration.  Drop it from declaration.

This change was suggested by Ivan Kokshaysky.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
---
 include/linux/percpu-defs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index cf32838ad0fa..9b7a53cc16eb 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -56,7 +56,7 @@
  */
 #define DECLARE_PER_CPU_SECTION(type, name, sec)			\
 	extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name;		\
-	extern __PCPU_ATTRS(sec) __weak __typeof__(type) per_cpu__##name
+	extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
 
 #define DEFINE_PER_CPU_SECTION(type, name, sec)				\
 	__PCPU_DUMMY_ATTRS char __pcpu_scope_##name;			\
-- 
cgit v1.2.3


From b294a290d24d1196d68399cc3a9b8c50bfb55abd Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Tue, 30 Jun 2009 02:13:01 -0400
Subject: Revert "power: remove POWER_SUPPLY_PROP_CAPACITY_LEVEL"

This reverts commit 8efe444038a205e79b38b7ad03878824901849a8 and
4cbc76eadf56399cd11fb736b33c53aec9caab8c.

Richard@laptop.org was apparently using CAPACITY_LEVEL for debugging
battery/EC problems, and was upset that it was removed.  This readds it.

Conflicts:

	Documentation/power_supply_class.txt

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 Documentation/power/power_supply_class.txt |  2 ++
 drivers/power/olpc_battery.c               |  9 +++++++++
 drivers/power/power_supply_sysfs.c         |  6 ++++++
 include/linux/power_supply.h               | 10 ++++++++++
 4 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index c6cd4956047c..709d95571d7b 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -108,6 +108,8 @@ relative, time-based measurements.
 ENERGY_FULL, ENERGY_EMPTY - same as above but for energy.
 
 CAPACITY - capacity in percents.
+CAPACITY_LEVEL - capacity level. This corresponds to
+POWER_SUPPLY_CAPACITY_LEVEL_*.
 
 TEMP - temperature of the power supply.
 TEMP_AMBIENT - ambient temperature.
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index 58e419299cd6..3a589df09376 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -276,6 +276,14 @@ static int olpc_bat_get_property(struct power_supply *psy,
 			return ret;
 		val->intval = ec_byte;
 		break;
+	case POWER_SUPPLY_PROP_CAPACITY_LEVEL:
+		if (ec_byte & BAT_STAT_FULL)
+			val->intval = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
+		else if (ec_byte & BAT_STAT_LOW)
+			val->intval = POWER_SUPPLY_CAPACITY_LEVEL_LOW;
+		else
+			val->intval = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
+		break;
 	case POWER_SUPPLY_PROP_TEMP:
 		ret = olpc_ec_cmd(EC_BAT_TEMP, NULL, 0, (void *)&ec_word, 2);
 		if (ret)
@@ -321,6 +329,7 @@ static enum power_supply_property olpc_bat_props[] = {
 	POWER_SUPPLY_PROP_VOLTAGE_AVG,
 	POWER_SUPPLY_PROP_CURRENT_AVG,
 	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
 	POWER_SUPPLY_PROP_TEMP,
 	POWER_SUPPLY_PROP_TEMP_AMBIENT,
 	POWER_SUPPLY_PROP_MANUFACTURER,
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index da73591017f9..9deabbde6fd6 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -51,6 +51,9 @@ static ssize_t power_supply_show_property(struct device *dev,
 		"Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe", "NiCd",
 		"LiMn"
 	};
+	static char *capacity_level_text[] = {
+		"Unknown", "Critical", "Low", "Normal", "High", "Full"
+	};
 	ssize_t ret;
 	struct power_supply *psy = dev_get_drvdata(dev);
 	const ptrdiff_t off = attr - power_supply_attrs;
@@ -71,6 +74,8 @@ static ssize_t power_supply_show_property(struct device *dev,
 		return sprintf(buf, "%s\n", health_text[value.intval]);
 	else if (off == POWER_SUPPLY_PROP_TECHNOLOGY)
 		return sprintf(buf, "%s\n", technology_text[value.intval]);
+	else if (off == POWER_SUPPLY_PROP_CAPACITY_LEVEL)
+		return sprintf(buf, "%s\n", capacity_level_text[value.intval]);
 	else if (off >= POWER_SUPPLY_PROP_MODEL_NAME)
 		return sprintf(buf, "%s\n", value.strval);
 
@@ -109,6 +114,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(energy_now),
 	POWER_SUPPLY_ATTR(energy_avg),
 	POWER_SUPPLY_ATTR(capacity),
+	POWER_SUPPLY_ATTR(capacity_level),
 	POWER_SUPPLY_ATTR(temp),
 	POWER_SUPPLY_ATTR(temp_ambient),
 	POWER_SUPPLY_ATTR(time_to_empty_now),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 594c494ac3f0..0ab6aa171241 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -58,6 +58,15 @@ enum {
 	POWER_SUPPLY_TECHNOLOGY_LiMn,
 };
 
+enum {
+	POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN = 0,
+	POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL,
+	POWER_SUPPLY_CAPACITY_LEVEL_LOW,
+	POWER_SUPPLY_CAPACITY_LEVEL_NORMAL,
+	POWER_SUPPLY_CAPACITY_LEVEL_HIGH,
+	POWER_SUPPLY_CAPACITY_LEVEL_FULL,
+};
+
 enum power_supply_property {
 	/* Properties of type `int' */
 	POWER_SUPPLY_PROP_STATUS = 0,
@@ -89,6 +98,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_ENERGY_NOW,
 	POWER_SUPPLY_PROP_ENERGY_AVG,
 	POWER_SUPPLY_PROP_CAPACITY, /* in percents! */
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
 	POWER_SUPPLY_PROP_TEMP,
 	POWER_SUPPLY_PROP_TEMP_AMBIENT,
 	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
-- 
cgit v1.2.3


From ee8076ed3e1cdd0cd1e61318386932669c90b92f Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Thu, 2 Jul 2009 09:45:18 -0400
Subject: power_supply: Add a charge_type property, and use it for olpc driver

This adds a new sysfs file called 'charge_type' which displays the
type of charging (unknown, n/a, trickle charge, or fast charging).

This allows things like battery diagnostics to determine what the
battery/EC is doing without resorting to changing the 'status' sysfs
output.

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 Documentation/power/power_supply_class.txt | 5 +++++
 drivers/power/olpc_battery.c               | 9 +++++++++
 drivers/power/power_supply_sysfs.c         | 6 ++++++
 include/linux/power_supply.h               | 8 ++++++++
 4 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index 709d95571d7b..9f16c5178b66 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -76,6 +76,11 @@ STATUS - this attribute represents operating status (charging, full,
 discharging (i.e. powering a load), etc.). This corresponds to
 BATTERY_STATUS_* values, as defined in battery.h.
 
+CHARGE_TYPE - batteries can typically charge at different rates.
+This defines trickle and fast charges.  For batteries that
+are already charged or discharging, 'n/a' can be displayed (or
+'unknown', if the status is not known).
+
 HEALTH - represents health of the battery, values corresponds to
 POWER_SUPPLY_HEALTH_*, defined in battery.h.
 
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index 602bbd008f78..8fefe5a73558 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -233,6 +233,14 @@ static int olpc_bat_get_property(struct power_supply *psy,
 		if (ret)
 			return ret;
 		break;
+	case POWER_SUPPLY_PROP_CHARGE_TYPE:
+		if (ec_byte & BAT_STAT_TRICKLE)
+			val->intval = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
+		else if (ec_byte & BAT_STAT_CHARGING)
+			val->intval = POWER_SUPPLY_CHARGE_TYPE_FAST;
+		else
+			val->intval = POWER_SUPPLY_CHARGE_TYPE_NONE;
+		break;
 	case POWER_SUPPLY_PROP_PRESENT:
 		val->intval = !!(ec_byte & (BAT_STAT_PRESENT |
 					    BAT_STAT_TRICKLE));
@@ -325,6 +333,7 @@ static int olpc_bat_get_property(struct power_supply *psy,
 
 static enum power_supply_property olpc_bat_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_CHARGE_TYPE,
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_HEALTH,
 	POWER_SUPPLY_PROP_TECHNOLOGY,
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 9deabbde6fd6..08144393d64b 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -43,6 +43,9 @@ static ssize_t power_supply_show_property(struct device *dev,
 	static char *status_text[] = {
 		"Unknown", "Charging", "Discharging", "Not charging", "Full"
 	};
+	static char *charge_type[] = {
+		"Unknown", "N/A", "Trickle", "Fast"
+	};
 	static char *health_text[] = {
 		"Unknown", "Good", "Overheat", "Dead", "Over voltage",
 		"Unspecified failure", "Cold",
@@ -70,6 +73,8 @@ static ssize_t power_supply_show_property(struct device *dev,
 
 	if (off == POWER_SUPPLY_PROP_STATUS)
 		return sprintf(buf, "%s\n", status_text[value.intval]);
+	else if (off == POWER_SUPPLY_PROP_CHARGE_TYPE)
+		return sprintf(buf, "%s\n", charge_type[value.intval]);
 	else if (off == POWER_SUPPLY_PROP_HEALTH)
 		return sprintf(buf, "%s\n", health_text[value.intval]);
 	else if (off == POWER_SUPPLY_PROP_TECHNOLOGY)
@@ -86,6 +91,7 @@ static ssize_t power_supply_show_property(struct device *dev,
 static struct device_attribute power_supply_attrs[] = {
 	/* Properties of type `int' */
 	POWER_SUPPLY_ATTR(status),
+	POWER_SUPPLY_ATTR(charge_type),
 	POWER_SUPPLY_ATTR(health),
 	POWER_SUPPLY_ATTR(present),
 	POWER_SUPPLY_ATTR(online),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 0ab6aa171241..4c7c6fc35487 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -38,6 +38,13 @@ enum {
 	POWER_SUPPLY_STATUS_FULL,
 };
 
+enum {
+	POWER_SUPPLY_CHARGE_TYPE_UNKNOWN = 0,
+	POWER_SUPPLY_CHARGE_TYPE_NONE,
+	POWER_SUPPLY_CHARGE_TYPE_TRICKLE,
+	POWER_SUPPLY_CHARGE_TYPE_FAST,
+};
+
 enum {
 	POWER_SUPPLY_HEALTH_UNKNOWN = 0,
 	POWER_SUPPLY_HEALTH_GOOD,
@@ -70,6 +77,7 @@ enum {
 enum power_supply_property {
 	/* Properties of type `int' */
 	POWER_SUPPLY_PROP_STATUS = 0,
+	POWER_SUPPLY_PROP_CHARGE_TYPE,
 	POWER_SUPPLY_PROP_HEALTH,
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_ONLINE,
-- 
cgit v1.2.3


From 788e5abc5441e9046dd91c995c6f1f75bbd144bf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:10:58 +0900
Subject: percpu: drop @unit_size from embed first chunk allocator

The only extra feature @unit_size provides is making dead space at the
end of the first chunk which doesn't have any valid usecase.  Drop the
parameter.  This will increase consistency with generalized 4k
allocator.

James Bottomley spotted missing conversion for the default
setup_per_cpu_areas() which caused build breakage on all arcsh which
use it.

[ Impact: drop unused code path ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c |  2 +-
 include/linux/percpu.h         |  2 +-
 mm/percpu.c                    | 18 ++++++------------
 3 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 29a3eef7cf4a..14728206fb52 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -342,7 +342,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
 		return -EINVAL;
 
 	return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
-				      reserve - PERCPU_FIRST_CHUNK_RESERVE, -1);
+				      reserve - PERCPU_FIRST_CHUNK_RESERVE);
 }
 
 /*
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e5000343dd61..83bff053bd1c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -69,7 +69,7 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 
 extern ssize_t __init pcpu_embed_first_chunk(
 				size_t static_size, size_t reserved_size,
-				ssize_t dyn_size, ssize_t unit_size);
+				ssize_t dyn_size);
 
 /*
  * Use this to get to a cpu's version of the per-cpu object
diff --git a/mm/percpu.c b/mm/percpu.c
index 19dd83b5cbdc..fc6babe6e554 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1207,7 +1207,6 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
  * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
  * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
- * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
  *
  * This is a helper to ease setting up embedded first percpu chunk and
  * can be called where pcpu_setup_first_chunk() is expected.
@@ -1219,9 +1218,9 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
  * page size.
  *
  * When @dyn_size is positive, dynamic area might be larger than
- * specified to fill page alignment.  Also, when @dyn_size is auto,
- * @dyn_size does not fill the whole first chunk but only what's
- * necessary for page alignment after static and reserved areas.
+ * specified to fill page alignment.  When @dyn_size is auto,
+ * @dyn_size is just big enough to fill page alignment after static
+ * and reserved areas.
  *
  * If the needed size is smaller than the minimum or specified unit
  * size, the leftover is returned to the bootmem allocator.
@@ -1231,7 +1230,7 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
  * percpu access on success, -errno on failure.
  */
 ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
-				      ssize_t dyn_size, ssize_t unit_size)
+				      ssize_t dyn_size)
 {
 	size_t chunk_size;
 	unsigned int cpu;
@@ -1242,12 +1241,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 	if (dyn_size != 0)
 		dyn_size = pcpue_size - static_size - reserved_size;
 
-	if (unit_size >= 0) {
-		BUG_ON(unit_size < pcpue_size);
-		pcpue_unit_size = unit_size;
-	} else
-		pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
-
+	pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
 	chunk_size = pcpue_unit_size * num_possible_cpus();
 
 	pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
@@ -1304,7 +1298,7 @@ void __init setup_per_cpu_areas(void)
 	 * what the legacy allocator did.
 	 */
 	unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE,
-					   PERCPU_DYNAMIC_RESERVE, -1);
+					   PERCPU_DYNAMIC_RESERVE);
 	if (unit_size < 0)
 		panic("Failed to initialized percpu areas.");
 
-- 
cgit v1.2.3


From d4b95f80399471e4bce5e992700ff7f06ef91f6a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:10:59 +0900
Subject: x86,percpu: generalize 4k first chunk allocator

Generalize and move x86 setup_pcpu_4k() into pcpu_4k_first_chunk().
setup_pcpu_4k() now is a simple wrapper around the generalized
version.  Other than taking size parameters and using arch supplied
callbacks to allocate/free memory, pcpu_4k_first_chunk() is identical
to the original implementation.

This simplifies arch code and will help converting more archs to
dynamic percpu allocator.

While at it, s/pcpu_populate_pte_fn_t/pcpu_fc_populate_pte_fn_t/ for
consistency.

[ Impact: code reorganization and generalization ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c | 78 ++++++++++----------------------------
 include/linux/percpu.h         | 12 +++++-
 mm/percpu.c                    | 85 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 113 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 14728206fb52..ab896b31e80b 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -123,6 +123,19 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
 #endif
 }
 
+/*
+ * Helpers for first chunk memory allocation
+ */
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size)
+{
+	return pcpu_alloc_bootmem(cpu, size, size);
+}
+
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+	free_bootmem(__pa(ptr), size);
+}
+
 /*
  * Large page remap allocator
  *
@@ -346,22 +359,11 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
 }
 
 /*
- * 4k page allocator
+ * 4k allocator
  *
- * This is the basic allocator.  Static percpu area is allocated
- * page-by-page and most of initialization is done by the generic
- * setup function.
+ * Boring fallback 4k allocator.  This allocator puts more pressure on
+ * PTE TLBs but other than that behaves nicely on both UMA and NUMA.
  */
-static struct page **pcpu4k_pages __initdata;
-static int pcpu4k_nr_static_pages __initdata;
-
-static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
-{
-	if (pageno < pcpu4k_nr_static_pages)
-		return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
-	return NULL;
-}
-
 static void __init pcpu4k_populate_pte(unsigned long addr)
 {
 	populate_extra_pte(addr);
@@ -369,51 +371,9 @@ static void __init pcpu4k_populate_pte(unsigned long addr)
 
 static ssize_t __init setup_pcpu_4k(size_t static_size)
 {
-	size_t pages_size;
-	unsigned int cpu;
-	int i, j;
-	ssize_t ret;
-
-	pcpu4k_nr_static_pages = PFN_UP(static_size);
-
-	/* unaligned allocations can't be freed, round up to page size */
-	pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus()
-			       * sizeof(pcpu4k_pages[0]));
-	pcpu4k_pages = alloc_bootmem(pages_size);
-
-	/* allocate and copy */
-	j = 0;
-	for_each_possible_cpu(cpu)
-		for (i = 0; i < pcpu4k_nr_static_pages; i++) {
-			void *ptr;
-
-			ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
-			if (!ptr) {
-				pr_warning("PERCPU: failed to allocate "
-					   "4k page for cpu%u\n", cpu);
-				goto enomem;
-			}
-
-			memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
-			pcpu4k_pages[j++] = virt_to_page(ptr);
-		}
-
-	/* we're ready, commit */
-	pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
-		pcpu4k_nr_static_pages, static_size);
-
-	ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
-				     PERCPU_FIRST_CHUNK_RESERVE, -1,
-				     -1, NULL, pcpu4k_populate_pte);
-	goto out_free_ar;
-
-enomem:
-	while (--j >= 0)
-		free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE);
-	ret = -ENOMEM;
-out_free_ar:
-	free_bootmem(__pa(pcpu4k_pages), pages_size);
-	return ret;
+	return pcpu_4k_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
+				   pcpu_fc_alloc, pcpu_fc_free,
+				   pcpu4k_populate_pte);
 }
 
 /* for explicit first chunk allocator selection */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 83bff053bd1c..41b5bfab4195 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -59,18 +59,26 @@
 extern void *pcpu_base_addr;
 
 typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno);
-typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr);
+typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
+typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
+typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 
 extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 				size_t static_size, size_t reserved_size,
 				ssize_t dyn_size, ssize_t unit_size,
 				void *base_addr,
-				pcpu_populate_pte_fn_t populate_pte_fn);
+				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 
 extern ssize_t __init pcpu_embed_first_chunk(
 				size_t static_size, size_t reserved_size,
 				ssize_t dyn_size);
 
+extern ssize_t __init pcpu_4k_first_chunk(
+				size_t static_size, size_t reserved_size,
+				pcpu_fc_alloc_fn_t alloc_fn,
+				pcpu_fc_free_fn_t free_fn,
+				pcpu_fc_populate_pte_fn_t populate_pte_fn);
+
 /*
  * Use this to get to a cpu's version of the per-cpu object
  * dynamically allocated. Non-atomic access to the current CPU's
diff --git a/mm/percpu.c b/mm/percpu.c
index fc6babe6e554..27b0f40a3ea8 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1037,7 +1037,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 				     size_t static_size, size_t reserved_size,
 				     ssize_t dyn_size, ssize_t unit_size,
 				     void *base_addr,
-				     pcpu_populate_pte_fn_t populate_pte_fn)
+				     pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
 	static struct vm_struct first_vm;
 	static int smap[2], dmap[2];
@@ -1270,6 +1270,89 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 				      pcpue_unit_size, pcpue_ptr, NULL);
 }
 
+/*
+ * 4k page first chunk setup helper.
+ */
+static struct page **pcpu4k_pages __initdata;
+static int pcpu4k_nr_static_pages __initdata;
+
+static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
+{
+	if (pageno < pcpu4k_nr_static_pages)
+		return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
+	return NULL;
+}
+
+/**
+ * pcpu_4k_first_chunk - map the first chunk using PAGE_SIZE pages
+ * @static_size: the size of static percpu area in bytes
+ * @reserved_size: the size of reserved percpu area in bytes
+ * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
+ * @free_fn: funtion to free percpu page, always called with PAGE_SIZE
+ * @populate_pte_fn: function to populate pte
+ *
+ * This is a helper to ease setting up embedded first percpu chunk and
+ * can be called where pcpu_setup_first_chunk() is expected.
+ *
+ * This is the basic allocator.  Static percpu area is allocated
+ * page-by-page into vmalloc area.
+ *
+ * RETURNS:
+ * The determined pcpu_unit_size which can be used to initialize
+ * percpu access on success, -errno on failure.
+ */
+ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
+				   pcpu_fc_alloc_fn_t alloc_fn,
+				   pcpu_fc_free_fn_t free_fn,
+				   pcpu_fc_populate_pte_fn_t populate_pte_fn)
+{
+	size_t pages_size;
+	unsigned int cpu;
+	int i, j;
+	ssize_t ret;
+
+	pcpu4k_nr_static_pages = PFN_UP(static_size);
+
+	/* unaligned allocations can't be freed, round up to page size */
+	pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() *
+			       sizeof(pcpu4k_pages[0]));
+	pcpu4k_pages = alloc_bootmem(pages_size);
+
+	/* allocate and copy */
+	j = 0;
+	for_each_possible_cpu(cpu)
+		for (i = 0; i < pcpu4k_nr_static_pages; i++) {
+			void *ptr;
+
+			ptr = alloc_fn(cpu, PAGE_SIZE);
+			if (!ptr) {
+				pr_warning("PERCPU: failed to allocate "
+					   "4k page for cpu%u\n", cpu);
+				goto enomem;
+			}
+
+			memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
+			pcpu4k_pages[j++] = virt_to_page(ptr);
+		}
+
+	/* we're ready, commit */
+	pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
+		pcpu4k_nr_static_pages, static_size);
+
+	ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
+				     reserved_size, -1,
+				     -1, NULL, populate_pte_fn);
+	goto out_free_ar;
+
+enomem:
+	while (--j >= 0)
+		free_fn(page_address(pcpu4k_pages[j]), PAGE_SIZE);
+	ret = -ENOMEM;
+out_free_ar:
+	free_bootmem(__pa(pcpu4k_pages), pages_size);
+	return ret;
+}
+
 /*
  * Generic percpu area setup.
  *
-- 
cgit v1.2.3


From 8c4bfc6e8801616ab2e01c38140b2159b388d2ff Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:10:59 +0900
Subject: x86,percpu: generalize lpage first chunk allocator

Generalize and move x86 setup_pcpu_lpage() into
pcpu_lpage_first_chunk().  setup_pcpu_lpage() now is a simple wrapper
around the generalized version.  Other than taking size parameters and
using arch supplied callbacks to allocate/free/map memory,
pcpu_lpage_first_chunk() is identical to the original implementation.

This simplifies arch code and will help converting more archs to
dynamic percpu allocator.

While at it, factor out pcpu_calc_fc_sizes() which is common to
pcpu_embed_first_chunk() and pcpu_lpage_first_chunk().

[ Impact: code reorganization and generalization ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/percpu.h  |   9 --
 arch/x86/kernel/setup_percpu.c | 169 +++------------------------------
 arch/x86/mm/pageattr.c         |   1 +
 include/linux/percpu.h         |  27 ++++++
 mm/percpu.c                    | 209 ++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 244 insertions(+), 171 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 103f1ddb0d85..a18c038a3079 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -156,15 +156,6 @@ do {							\
 /* We can use this directly for local CPU (faster). */
 DECLARE_PER_CPU(unsigned long, this_cpu_off);
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-void *pcpu_lpage_remapped(void *kaddr);
-#else
-static inline void *pcpu_lpage_remapped(void *kaddr)
-{
-	return NULL;
-}
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ab896b31e80b..4f2e0ac9130b 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -137,44 +137,21 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
 }
 
 /*
- * Large page remap allocator
- *
- * This allocator uses PMD page as unit.  A PMD page is allocated for
- * each cpu and each is remapped into vmalloc area using PMD mapping.
- * As PMD page is quite large, only part of it is used for the first
- * chunk.  Unused part is returned to the bootmem allocator.
- *
- * So, the PMD pages are mapped twice - once to the physical mapping
- * and to the vmalloc area for the first percpu chunk.  The double
- * mapping does add one more PMD TLB entry pressure but still is much
- * better than only using 4k mappings while still being NUMA friendly.
+ * Large page remapping allocator
  */
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-struct pcpul_ent {
-	unsigned int	cpu;
-	void		*ptr;
-};
-
-static size_t pcpul_size;
-static struct pcpul_ent *pcpul_map;
-static struct vm_struct pcpul_vm;
-
-static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
+static void __init pcpul_map(void *ptr, size_t size, void *addr)
 {
-	size_t off = (size_t)pageno << PAGE_SHIFT;
+	pmd_t *pmd, pmd_v;
 
-	if (off >= pcpul_size)
-		return NULL;
-
-	return virt_to_page(pcpul_map[cpu].ptr + off);
+	pmd = populate_extra_pmd((unsigned long)addr);
+	pmd_v = pfn_pmd(page_to_pfn(virt_to_page(ptr)), PAGE_KERNEL_LARGE);
+	set_pmd(pmd, pmd_v);
 }
 
 static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
 {
-	size_t map_size, dyn_size;
-	unsigned int cpu;
-	int i, j;
-	ssize_t ret;
+	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 
 	if (!chosen) {
 		size_t vm_size = VMALLOC_END - VMALLOC_START;
@@ -198,134 +175,10 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
 		return -EINVAL;
 	}
 
-	/*
-	 * Currently supports only single page.  Supporting multiple
-	 * pages won't be too difficult if it ever becomes necessary.
-	 */
-	pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
-			       PERCPU_DYNAMIC_RESERVE);
-	if (pcpul_size > PMD_SIZE) {
-		pr_warning("PERCPU: static data is larger than large page, "
-			   "can't use large page\n");
-		return -EINVAL;
-	}
-	dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
-
-	/* allocate pointer array and alloc large pages */
-	map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
-	pcpul_map = alloc_bootmem(map_size);
-
-	for_each_possible_cpu(cpu) {
-		pcpul_map[cpu].cpu = cpu;
-		pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE,
-							PMD_SIZE);
-		if (!pcpul_map[cpu].ptr) {
-			pr_warning("PERCPU: failed to allocate large page "
-				   "for cpu%u\n", cpu);
-			goto enomem;
-		}
-
-		/*
-		 * Only use pcpul_size bytes and give back the rest.
-		 *
-		 * Ingo: The 2MB up-rounding bootmem is needed to make
-		 * sure the partial 2MB page is still fully RAM - it's
-		 * not well-specified to have a PAT-incompatible area
-		 * (unmapped RAM, device memory, etc.) in that hole.
-		 */
-		free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size),
-			     PMD_SIZE - pcpul_size);
-
-		memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size);
-	}
-
-	/* allocate address and map */
-	pcpul_vm.flags = VM_ALLOC;
-	pcpul_vm.size = num_possible_cpus() * PMD_SIZE;
-	vm_area_register_early(&pcpul_vm, PMD_SIZE);
-
-	for_each_possible_cpu(cpu) {
-		pmd_t *pmd, pmd_v;
-
-		pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr +
-					 cpu * PMD_SIZE);
-		pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)),
-				PAGE_KERNEL_LARGE);
-		set_pmd(pmd, pmd_v);
-	}
-
-	/* we're ready, commit */
-	pr_info("PERCPU: Remapped at %p with large pages, static data "
-		"%zu bytes\n", pcpul_vm.addr, static_size);
-
-	ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
-				     PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
-				     PMD_SIZE, pcpul_vm.addr, NULL);
-
-	/* sort pcpul_map array for pcpu_lpage_remapped() */
-	for (i = 0; i < num_possible_cpus() - 1; i++)
-		for (j = i + 1; j < num_possible_cpus(); j++)
-			if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
-				struct pcpul_ent tmp = pcpul_map[i];
-				pcpul_map[i] = pcpul_map[j];
-				pcpul_map[j] = tmp;
-			}
-
-	return ret;
-
-enomem:
-	for_each_possible_cpu(cpu)
-		if (pcpul_map[cpu].ptr)
-			free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size);
-	free_bootmem(__pa(pcpul_map), map_size);
-	return -ENOMEM;
-}
-
-/**
- * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
- * @kaddr: the kernel address in question
- *
- * Determine whether @kaddr falls in the pcpul recycled area.  This is
- * used by pageattr to detect VM aliases and break up the pcpu PMD
- * mapping such that the same physical page is not mapped under
- * different attributes.
- *
- * The recycled area is always at the tail of a partially used PMD
- * page.
- *
- * RETURNS:
- * Address of corresponding remapped pcpu address if match is found;
- * otherwise, NULL.
- */
-void *pcpu_lpage_remapped(void *kaddr)
-{
-	void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
-	unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
-	int left = 0, right = num_possible_cpus() - 1;
-	int pos;
-
-	/* pcpul in use at all? */
-	if (!pcpul_map)
-		return NULL;
-
-	/* okay, perform binary search */
-	while (left <= right) {
-		pos = (left + right) / 2;
-
-		if (pcpul_map[pos].ptr < pmd_addr)
-			left = pos + 1;
-		else if (pcpul_map[pos].ptr > pmd_addr)
-			right = pos - 1;
-		else {
-			/* it shouldn't be in the area for the first chunk */
-			WARN_ON(offset < pcpul_size);
-
-			return pcpul_vm.addr +
-				pcpul_map[pos].cpu * PMD_SIZE + offset;
-		}
-	}
-
-	return NULL;
+	return pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
+				      reserve - PERCPU_FIRST_CHUNK_RESERVE,
+				      PMD_SIZE,
+				      pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
 }
 #else
 static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1b734d7a8966..c106f7852424 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -12,6 +12,7 @@
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/pfn.h>
+#include <linux/percpu.h>
 
 #include <asm/e820.h>
 #include <asm/processor.h>
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 41b5bfab4195..9f6bfd7d4b92 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -62,6 +62,7 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno);
 typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
+typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
 extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 				size_t static_size, size_t reserved_size,
@@ -79,6 +80,32 @@ extern ssize_t __init pcpu_4k_first_chunk(
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+extern ssize_t __init pcpu_lpage_first_chunk(
+				size_t static_size, size_t reserved_size,
+				ssize_t dyn_size, size_t lpage_size,
+				pcpu_fc_alloc_fn_t alloc_fn,
+				pcpu_fc_free_fn_t free_fn,
+				pcpu_fc_map_fn_t map_fn);
+
+extern void *pcpu_lpage_remapped(void *kaddr);
+#else
+static inline ssize_t __init pcpu_lpage_first_chunk(
+				size_t static_size, size_t reserved_size,
+				ssize_t dyn_size, size_t lpage_size,
+				pcpu_fc_alloc_fn_t alloc_fn,
+				pcpu_fc_free_fn_t free_fn,
+				pcpu_fc_map_fn_t map_fn)
+{
+	return -EINVAL;
+}
+
+static inline void *pcpu_lpage_remapped(void *kaddr)
+{
+	return NULL;
+}
+#endif
+
 /*
  * Use this to get to a cpu's version of the per-cpu object
  * dynamically allocated. Non-atomic access to the current CPU's
diff --git a/mm/percpu.c b/mm/percpu.c
index f3fe7bc7378f..17db527ee2e2 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1190,6 +1190,19 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	return pcpu_unit_size;
 }
 
+static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size,
+				 ssize_t *dyn_sizep)
+{
+	size_t size_sum;
+
+	size_sum = PFN_ALIGN(static_size + reserved_size +
+			     (*dyn_sizep >= 0 ? *dyn_sizep : 0));
+	if (*dyn_sizep != 0)
+		*dyn_sizep = size_sum - static_size - reserved_size;
+
+	return size_sum;
+}
+
 /*
  * Embedding first chunk setup helper.
  */
@@ -1241,10 +1254,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 	unsigned int cpu;
 
 	/* determine parameters and allocate */
-	pcpue_size = PFN_ALIGN(static_size + reserved_size +
-			       (dyn_size >= 0 ? dyn_size : 0));
-	if (dyn_size != 0)
-		dyn_size = pcpue_size - static_size - reserved_size;
+	pcpue_size = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
 
 	pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
 	chunk_size = pcpue_unit_size * num_possible_cpus();
@@ -1390,6 +1400,197 @@ out_free_ar:
 	return ret;
 }
 
+/*
+ * Large page remapping first chunk setup helper
+ */
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+struct pcpul_ent {
+	unsigned int	cpu;
+	void		*ptr;
+};
+
+static size_t pcpul_size;
+static size_t pcpul_unit_size;
+static struct pcpul_ent *pcpul_map;
+static struct vm_struct pcpul_vm;
+
+static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
+{
+	size_t off = (size_t)pageno << PAGE_SHIFT;
+
+	if (off >= pcpul_size)
+		return NULL;
+
+	return virt_to_page(pcpul_map[cpu].ptr + off);
+}
+
+/**
+ * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
+ * @static_size: the size of static percpu area in bytes
+ * @reserved_size: the size of reserved percpu area in bytes
+ * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @lpage_size: the size of a large page
+ * @alloc_fn: function to allocate percpu lpage, always called with lpage_size
+ * @free_fn: function to free percpu memory, @size <= lpage_size
+ * @map_fn: function to map percpu lpage, always called with lpage_size
+ *
+ * This allocator uses large page as unit.  A large page is allocated
+ * for each cpu and each is remapped into vmalloc area using large
+ * page mapping.  As large page can be quite large, only part of it is
+ * used for the first chunk.  Unused part is returned to the bootmem
+ * allocator.
+ *
+ * So, the large pages are mapped twice - once to the physical mapping
+ * and to the vmalloc area for the first percpu chunk.  The double
+ * mapping does add one more large TLB entry pressure but still is
+ * much better than only using 4k mappings while still being NUMA
+ * friendly.
+ *
+ * RETURNS:
+ * The determined pcpu_unit_size which can be used to initialize
+ * percpu access on success, -errno on failure.
+ */
+ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
+				      ssize_t dyn_size, size_t lpage_size,
+				      pcpu_fc_alloc_fn_t alloc_fn,
+				      pcpu_fc_free_fn_t free_fn,
+				      pcpu_fc_map_fn_t map_fn)
+{
+	size_t size_sum;
+	size_t map_size;
+	unsigned int cpu;
+	int i, j;
+	ssize_t ret;
+
+	/*
+	 * Currently supports only single page.  Supporting multiple
+	 * pages won't be too difficult if it ever becomes necessary.
+	 */
+	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
+
+	pcpul_unit_size = lpage_size;
+	pcpul_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+	if (pcpul_size > pcpul_unit_size) {
+		pr_warning("PERCPU: static data is larger than large page, "
+			   "can't use large page\n");
+		return -EINVAL;
+	}
+
+	/* allocate pointer array and alloc large pages */
+	map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
+	pcpul_map = alloc_bootmem(map_size);
+
+	for_each_possible_cpu(cpu) {
+		void *ptr;
+
+		ptr = alloc_fn(cpu, lpage_size);
+		if (!ptr) {
+			pr_warning("PERCPU: failed to allocate large page "
+				   "for cpu%u\n", cpu);
+			goto enomem;
+		}
+
+		/*
+		 * Only use pcpul_size bytes and give back the rest.
+		 *
+		 * Ingo: The lpage_size up-rounding bootmem is needed
+		 * to make sure the partial lpage is still fully RAM -
+		 * it's not well-specified to have a incompatible area
+		 * (unmapped RAM, device memory, etc.) in that hole.
+		 */
+		free_fn(ptr + pcpul_size, lpage_size - pcpul_size);
+
+		pcpul_map[cpu].cpu = cpu;
+		pcpul_map[cpu].ptr = ptr;
+
+		memcpy(ptr, __per_cpu_load, static_size);
+	}
+
+	/* allocate address and map */
+	pcpul_vm.flags = VM_ALLOC;
+	pcpul_vm.size = num_possible_cpus() * pcpul_unit_size;
+	vm_area_register_early(&pcpul_vm, pcpul_unit_size);
+
+	for_each_possible_cpu(cpu)
+		map_fn(pcpul_map[cpu].ptr, pcpul_unit_size,
+		       pcpul_vm.addr + cpu * pcpul_unit_size);
+
+	/* we're ready, commit */
+	pr_info("PERCPU: Remapped at %p with large pages, static data "
+		"%zu bytes\n", pcpul_vm.addr, static_size);
+
+	ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
+				     reserved_size, dyn_size, pcpul_unit_size,
+				     pcpul_vm.addr, NULL);
+
+	/* sort pcpul_map array for pcpu_lpage_remapped() */
+	for (i = 0; i < num_possible_cpus() - 1; i++)
+		for (j = i + 1; j < num_possible_cpus(); j++)
+			if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
+				struct pcpul_ent tmp = pcpul_map[i];
+				pcpul_map[i] = pcpul_map[j];
+				pcpul_map[j] = tmp;
+			}
+
+	return ret;
+
+enomem:
+	for_each_possible_cpu(cpu)
+		if (pcpul_map[cpu].ptr)
+			free_fn(pcpul_map[cpu].ptr, pcpul_size);
+	free_bootmem(__pa(pcpul_map), map_size);
+	return -ENOMEM;
+}
+
+/**
+ * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
+ * @kaddr: the kernel address in question
+ *
+ * Determine whether @kaddr falls in the pcpul recycled area.  This is
+ * used by pageattr to detect VM aliases and break up the pcpu large
+ * page mapping such that the same physical page is not mapped under
+ * different attributes.
+ *
+ * The recycled area is always at the tail of a partially used large
+ * page.
+ *
+ * RETURNS:
+ * Address of corresponding remapped pcpu address if match is found;
+ * otherwise, NULL.
+ */
+void *pcpu_lpage_remapped(void *kaddr)
+{
+	unsigned long unit_mask = pcpul_unit_size - 1;
+	void *lpage_addr = (void *)((unsigned long)kaddr & ~unit_mask);
+	unsigned long offset = (unsigned long)kaddr & unit_mask;
+	int left = 0, right = num_possible_cpus() - 1;
+	int pos;
+
+	/* pcpul in use at all? */
+	if (!pcpul_map)
+		return NULL;
+
+	/* okay, perform binary search */
+	while (left <= right) {
+		pos = (left + right) / 2;
+
+		if (pcpul_map[pos].ptr < lpage_addr)
+			left = pos + 1;
+		else if (pcpul_map[pos].ptr > lpage_addr)
+			right = pos - 1;
+		else {
+			/* it shouldn't be in the area for the first chunk */
+			WARN_ON(offset < pcpul_size);
+
+			return pcpul_vm.addr +
+				pcpul_map[pos].cpu * pcpul_unit_size + offset;
+		}
+	}
+
+	return NULL;
+}
+#endif
+
 /*
  * Generic percpu area setup.
  *
-- 
cgit v1.2.3


From 38a6be525460f52ac6f2de1c3f73c5615a8853cd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:10:59 +0900
Subject: percpu: simplify pcpu_setup_first_chunk()

Now that all first chunk allocator helpers allocate and map the first
chunk themselves, there's no need to have optional default alloc/map
in pcpu_setup_first_chunk().  Drop @populate_pte_fn and only leave
@dyn_size optional and make all other params mandatory.

This makes it much easier to follow what pcpu_setup_first_chunk() is
doing and what actual differences tweaking each parameter results in.

[ Impact: drop unused code path ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/sparc/kernel/smp_64.c |   2 +-
 include/linux/percpu.h     |   5 +--
 mm/percpu.c                | 104 +++++++++++++--------------------------------
 3 files changed, 33 insertions(+), 78 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index fa44eaf8d897..ccad7b20ae75 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1528,7 +1528,7 @@ void __init setup_per_cpu_areas(void)
 
 	pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size,
 						PERCPU_MODULE_RESERVE, dyn_size,
-						PCPU_CHUNK_SIZE, vm.addr, NULL);
+						PCPU_CHUNK_SIZE, vm.addr);
 
 	free_bootmem(__pa(pcpur_ptrs), ptrs_size);
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 9f6bfd7d4b92..ec64357e1762 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -66,9 +66,8 @@ typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
 extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 				size_t static_size, size_t reserved_size,
-				ssize_t dyn_size, ssize_t unit_size,
-				void *base_addr,
-				pcpu_fc_populate_pte_fn_t populate_pte_fn);
+				ssize_t dyn_size, size_t unit_size,
+				void *base_addr);
 
 extern ssize_t __init pcpu_embed_first_chunk(
 				size_t static_size, size_t reserved_size,
diff --git a/mm/percpu.c b/mm/percpu.c
index 17db527ee2e2..21d938a10662 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -983,24 +983,22 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * pcpu_setup_first_chunk - initialize the first percpu chunk
  * @get_page_fn: callback to fetch page pointer
  * @static_size: the size of static percpu area in bytes
- * @reserved_size: the size of reserved percpu area in bytes
+ * @reserved_size: the size of reserved percpu area in bytes, 0 for none
  * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
- * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
- * @base_addr: mapped address, NULL for auto
- * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary
+ * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE
+ * @base_addr: mapped address
  *
  * Initialize the first percpu chunk which contains the kernel static
  * perpcu area.  This function is to be called from arch percpu area
- * setup path.  The first two parameters are mandatory.  The rest are
- * optional.
+ * setup path.
  *
  * @get_page_fn() should return pointer to percpu page given cpu
  * number and page number.  It should at least return enough pages to
  * cover the static area.  The returned pages for static area should
- * have been initialized with valid data.  If @unit_size is specified,
- * it can also return pages after the static area.  NULL return
- * indicates end of pages for the cpu.  Note that @get_page_fn() must
- * return the same number of pages for all cpus.
+ * have been initialized with valid data.  It can also return pages
+ * after the static area.  NULL return indicates end of pages for the
+ * cpu.  Note that @get_page_fn() must return the same number of pages
+ * for all cpus.
  *
  * @reserved_size, if non-zero, specifies the amount of bytes to
  * reserve after the static area in the first chunk.  This reserves
@@ -1015,17 +1013,12 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * non-negative value makes percpu leave alone the area beyond
  * @static_size + @reserved_size + @dyn_size.
  *
- * @unit_size, if non-negative, specifies unit size and must be
- * aligned to PAGE_SIZE and equal to or larger than @static_size +
- * @reserved_size + if non-negative, @dyn_size.
- *
- * Non-null @base_addr means that the caller already allocated virtual
- * region for the first chunk and mapped it.  percpu must not mess
- * with the chunk.  Note that @base_addr with 0 @unit_size or non-NULL
- * @populate_pte_fn doesn't make any sense.
+ * @unit_size specifies unit size and must be aligned to PAGE_SIZE and
+ * equal to or larger than @static_size + @reserved_size + if
+ * non-negative, @dyn_size.
  *
- * @populate_pte_fn is used to populate the pagetable.  NULL means the
- * caller already populated the pagetable.
+ * The caller should have mapped the first chunk at @base_addr and
+ * copied static data to each unit.
  *
  * If the first chunk ends up with both reserved and dynamic areas, it
  * is served by two chunks - one to serve the core static and reserved
@@ -1040,9 +1033,8 @@ EXPORT_SYMBOL_GPL(free_percpu);
  */
 size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 				     size_t static_size, size_t reserved_size,
-				     ssize_t dyn_size, ssize_t unit_size,
-				     void *base_addr,
-				     pcpu_fc_populate_pte_fn_t populate_pte_fn)
+				     ssize_t dyn_size, size_t unit_size,
+				     void *base_addr)
 {
 	static struct vm_struct first_vm;
 	static int smap[2], dmap[2];
@@ -1050,27 +1042,18 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 			  (dyn_size >= 0 ? dyn_size : 0);
 	struct pcpu_chunk *schunk, *dchunk = NULL;
 	unsigned int cpu;
-	int nr_pages;
-	int err, i;
+	int i, nr_pages;
 
 	/* santiy checks */
 	BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
 		     ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
 	BUG_ON(!static_size);
-	if (unit_size >= 0) {
-		BUG_ON(unit_size < size_sum);
-		BUG_ON(unit_size & ~PAGE_MASK);
-		BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE);
-	} else
-		BUG_ON(base_addr);
-	BUG_ON(base_addr && populate_pte_fn);
-
-	if (unit_size >= 0)
-		pcpu_unit_pages = unit_size >> PAGE_SHIFT;
-	else
-		pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT,
-					PFN_UP(size_sum));
+	BUG_ON(!base_addr);
+	BUG_ON(unit_size < size_sum);
+	BUG_ON(unit_size & ~PAGE_MASK);
+	BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE);
 
+	pcpu_unit_pages = unit_size >> PAGE_SHIFT;
 	pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
 	pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
 	pcpu_chunk_struct_size = sizeof(struct pcpu_chunk)
@@ -1079,6 +1062,10 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	if (dyn_size < 0)
 		dyn_size = pcpu_unit_size - static_size - reserved_size;
 
+	first_vm.flags = VM_ALLOC;
+	first_vm.size = pcpu_chunk_size;
+	first_vm.addr = base_addr;
+
 	/*
 	 * Allocate chunk slots.  The additional last slot is for
 	 * empty chunks.
@@ -1101,6 +1088,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	schunk->map = smap;
 	schunk->map_alloc = ARRAY_SIZE(smap);
 	schunk->page = schunk->page_ar;
+	schunk->immutable = true;
 
 	if (reserved_size) {
 		schunk->free_size = reserved_size;
@@ -1124,31 +1112,13 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 		dchunk->map = dmap;
 		dchunk->map_alloc = ARRAY_SIZE(dmap);
 		dchunk->page = schunk->page_ar;	/* share page map with schunk */
+		dchunk->immutable = true;
 
 		dchunk->contig_hint = dchunk->free_size = dyn_size;
 		dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit;
 		dchunk->map[dchunk->map_used++] = dchunk->free_size;
 	}
 
-	/* allocate vm address */
-	first_vm.flags = VM_ALLOC;
-	first_vm.size = pcpu_chunk_size;
-
-	if (!base_addr)
-		vm_area_register_early(&first_vm, PAGE_SIZE);
-	else {
-		/*
-		 * Pages already mapped.  No need to remap into
-		 * vmalloc area.  In this case the first chunks can't
-		 * be mapped or unmapped by percpu and are marked
-		 * immutable.
-		 */
-		first_vm.addr = base_addr;
-		schunk->immutable = true;
-		if (dchunk)
-			dchunk->immutable = true;
-	}
-
 	/* assign pages */
 	nr_pages = -1;
 	for_each_possible_cpu(cpu) {
@@ -1168,19 +1138,6 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 			BUG_ON(nr_pages != i);
 	}
 
-	/* map them */
-	if (populate_pte_fn) {
-		for_each_possible_cpu(cpu)
-			for (i = 0; i < nr_pages; i++)
-				populate_pte_fn(pcpu_chunk_addr(schunk,
-								cpu, i));
-
-		err = pcpu_map(schunk, 0, nr_pages);
-		if (err)
-			panic("failed to setup static percpu area, err=%d\n",
-			      err);
-	}
-
 	/* link the first chunk in */
 	pcpu_first_chunk = dchunk ?: schunk;
 	pcpu_chunk_relocate(pcpu_first_chunk, -1);
@@ -1282,7 +1239,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 
 	return pcpu_setup_first_chunk(pcpue_get_page, static_size,
 				      reserved_size, dyn_size,
-				      pcpue_unit_size, pcpue_ptr, NULL);
+				      pcpue_unit_size, pcpue_ptr);
 }
 
 /*
@@ -1387,8 +1344,7 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 
 	ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
 				     reserved_size, -1,
-				     pcpu4k_unit_pages << PAGE_SHIFT, vm.addr,
-				     NULL);
+				     pcpu4k_unit_pages << PAGE_SHIFT, vm.addr);
 	goto out_free_ar;
 
 enomem:
@@ -1521,7 +1477,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
 
 	ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
 				     reserved_size, dyn_size, pcpul_unit_size,
-				     pcpul_vm.addr, NULL);
+				     pcpul_vm.addr);
 
 	/* sort pcpul_map array for pcpu_lpage_remapped() */
 	for (i = 0; i < num_possible_cpus() - 1; i++)
-- 
cgit v1.2.3


From ce3141a277ff6cc37e51008b8888dc2cb7456ef1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:11:00 +0900
Subject: percpu: drop pcpu_chunk->page[]

percpu core doesn't need to tack all the allocated pages.  It needs to
know whether certain pages are populated and a way to reverse map
address to page when freeing.  This patch drops pcpu_chunk->page[] and
use populated bitmap and vmalloc_to_page() lookup instead.  Using
vmalloc_to_page() exclusively is also possible but complicates first
chunk handling, inflates cache footprint and prevents non-standard
memory allocation for percpu memory.

pcpu_chunk->page[] was used to track each page's allocation and
allowed asymmetric population which happens during failure path;
however, with single bitmap for all units, this is no longer possible.
Bite the bullet and rewrite (de)populate functions so that things are
done in clearly separated steps such that asymmetric population
doesn't happen.  This makes the (de)population process much more
modular and will also ease implementing non-standard memory usage in
the future (e.g. large pages).

This makes @get_page_fn parameter to pcpu_setup_first_chunk()
unnecessary.  The parameter is dropped and all first chunk helpers are
updated accordingly.  Please note that despite the volume most changes
to first chunk helpers are symbol renames for variables which don't
need to be referenced outside of the helper anymore.

This change reduces memory usage and cache footprint of pcpu_chunk.
Now only #unit_pages bits are necessary per chunk.

[ Impact: reduced memory usage and cache footprint for bookkeeping ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Miller <davem@davemloft.net>
---
 arch/sparc/kernel/smp_64.c |  42 ++--
 include/linux/percpu.h     |   3 +-
 mm/percpu.c                | 604 ++++++++++++++++++++++++++++-----------------
 3 files changed, 400 insertions(+), 249 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index ccad7b20ae75..f2f22ee97a7a 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1415,19 +1415,6 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
 #endif
 }
 
-static size_t pcpur_size __initdata;
-static void **pcpur_ptrs __initdata;
-
-static struct page * __init pcpur_get_page(unsigned int cpu, int pageno)
-{
-	size_t off = (size_t)pageno << PAGE_SHIFT;
-
-	if (off >= pcpur_size)
-		return NULL;
-
-	return virt_to_page(pcpur_ptrs[cpu] + off);
-}
-
 #define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL)
 
 static void __init pcpu_map_range(unsigned long start, unsigned long end,
@@ -1491,25 +1478,26 @@ void __init setup_per_cpu_areas(void)
 	size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start;
 	static struct vm_struct vm;
 	unsigned long delta, cpu;
-	size_t pcpu_unit_size;
+	size_t size_sum, pcpu_unit_size;
 	size_t ptrs_size;
+	void **ptrs;
 
-	pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
-			       PERCPU_DYNAMIC_RESERVE);
-	dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE;
+	size_sum = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
+			     PERCPU_DYNAMIC_RESERVE);
+	dyn_size = size_sum - static_size - PERCPU_MODULE_RESERVE;
 
 
-	ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0]));
-	pcpur_ptrs = alloc_bootmem(ptrs_size);
+	ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(ptrs[0]));
+	ptrs = alloc_bootmem(ptrs_size);
 
 	for_each_possible_cpu(cpu) {
-		pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE,
-						     PCPU_CHUNK_SIZE);
+		ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE,
+					       PCPU_CHUNK_SIZE);
 
-		free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size),
-			     PCPU_CHUNK_SIZE - pcpur_size);
+		free_bootmem(__pa(ptrs[cpu] + size_sum),
+			     PCPU_CHUNK_SIZE - size_sum);
 
-		memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size);
+		memcpy(ptrs[cpu], __per_cpu_load, static_size);
 	}
 
 	/* allocate address and map */
@@ -1523,14 +1511,14 @@ void __init setup_per_cpu_areas(void)
 
 		start += cpu * PCPU_CHUNK_SIZE;
 		end = start + PCPU_CHUNK_SIZE;
-		pcpu_map_range(start, end, virt_to_page(pcpur_ptrs[cpu]));
+		pcpu_map_range(start, end, virt_to_page(ptrs[cpu]));
 	}
 
-	pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size,
+	pcpu_unit_size = pcpu_setup_first_chunk(static_size,
 						PERCPU_MODULE_RESERVE, dyn_size,
 						PCPU_CHUNK_SIZE, vm.addr);
 
-	free_bootmem(__pa(pcpur_ptrs), ptrs_size);
+	free_bootmem(__pa(ptrs), ptrs_size);
 
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu) {
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index ec64357e1762..63c8b7a23e66 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -58,13 +58,12 @@
 
 extern void *pcpu_base_addr;
 
-typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno);
 typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
-extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
+extern size_t __init pcpu_setup_first_chunk(
 				size_t static_size, size_t reserved_size,
 				ssize_t dyn_size, size_t unit_size,
 				void *base_addr);
diff --git a/mm/percpu.c b/mm/percpu.c
index 639fce4d2caf..21756814d99f 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -94,8 +94,7 @@ struct pcpu_chunk {
 	int			map_alloc;	/* # of map entries allocated */
 	int			*map;		/* allocation map */
 	bool			immutable;	/* no [de]population allowed */
-	struct page		**page;		/* points to page array */
-	struct page		*page_ar[];	/* #cpus * UNIT_PAGES */
+	unsigned long		populated[];	/* populated bitmap */
 };
 
 static int pcpu_unit_pages __read_mostly;
@@ -129,9 +128,9 @@ static int pcpu_reserved_chunk_limit;
  * Synchronization rules.
  *
  * There are two locks - pcpu_alloc_mutex and pcpu_lock.  The former
- * protects allocation/reclaim paths, chunks and chunk->page arrays.
- * The latter is a spinlock and protects the index data structures -
- * chunk slots, chunks and area maps in chunks.
+ * protects allocation/reclaim paths, chunks, populated bitmap and
+ * vmalloc mapping.  The latter is a spinlock and protects the index
+ * data structures - chunk slots, chunks and area maps in chunks.
  *
  * During allocation, pcpu_alloc_mutex is kept locked all the time and
  * pcpu_lock is grabbed and released as necessary.  All actual memory
@@ -188,16 +187,13 @@ static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
 		(pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT);
 }
 
-static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk,
-				      unsigned int cpu, int page_idx)
+static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
+				    unsigned int cpu, int page_idx)
 {
-	return &chunk->page[pcpu_page_idx(cpu, page_idx)];
-}
+	/* must not be used on pre-mapped chunk */
+	WARN_ON(chunk->immutable);
 
-static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk,
-				     int page_idx)
-{
-	return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL;
+	return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx));
 }
 
 /* set the pointer to a chunk in a page struct */
@@ -212,6 +208,34 @@ static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
 	return (struct pcpu_chunk *)page->index;
 }
 
+static void pcpu_next_unpop(struct pcpu_chunk *chunk, int *rs, int *re, int end)
+{
+	*rs = find_next_zero_bit(chunk->populated, end, *rs);
+	*re = find_next_bit(chunk->populated, end, *rs + 1);
+}
+
+static void pcpu_next_pop(struct pcpu_chunk *chunk, int *rs, int *re, int end)
+{
+	*rs = find_next_bit(chunk->populated, end, *rs);
+	*re = find_next_zero_bit(chunk->populated, end, *rs + 1);
+}
+
+/*
+ * (Un)populated page region iterators.  Iterate over (un)populated
+ * page regions betwen @start and @end in @chunk.  @rs and @re should
+ * be integer variables and will be set to start and end page index of
+ * the current region.
+ */
+#define pcpu_for_each_unpop_region(chunk, rs, re, start, end)		    \
+	for ((rs) = (start), pcpu_next_unpop((chunk), &(rs), &(re), (end)); \
+	     (rs) < (re);						    \
+	     (rs) = (re) + 1, pcpu_next_unpop((chunk), &(rs), &(re), (end)))
+
+#define pcpu_for_each_pop_region(chunk, rs, re, start, end)		    \
+	for ((rs) = (start), pcpu_next_pop((chunk), &(rs), &(re), (end));   \
+	     (rs) < (re);						    \
+	     (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))
+
 /**
  * pcpu_mem_alloc - allocate memory
  * @size: bytes to allocate
@@ -545,42 +569,197 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
 }
 
 /**
- * pcpu_unmap - unmap pages out of a pcpu_chunk
+ * pcpu_get_pages_and_bitmap - get temp pages array and bitmap
+ * @chunk: chunk of interest
+ * @bitmapp: output parameter for bitmap
+ * @may_alloc: may allocate the array
+ *
+ * Returns pointer to array of pointers to struct page and bitmap,
+ * both of which can be indexed with pcpu_page_idx().  The returned
+ * array is cleared to zero and *@bitmapp is copied from
+ * @chunk->populated.  Note that there is only one array and bitmap
+ * and access exclusion is the caller's responsibility.
+ *
+ * CONTEXT:
+ * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc.
+ * Otherwise, don't care.
+ *
+ * RETURNS:
+ * Pointer to temp pages array on success, NULL on failure.
+ */
+static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
+					       unsigned long **bitmapp,
+					       bool may_alloc)
+{
+	static struct page **pages;
+	static unsigned long *bitmap;
+	size_t pages_size = num_possible_cpus() * pcpu_unit_pages *
+			    sizeof(pages[0]);
+	size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) *
+			     sizeof(unsigned long);
+
+	if (!pages || !bitmap) {
+		if (may_alloc && !pages)
+			pages = pcpu_mem_alloc(pages_size);
+		if (may_alloc && !bitmap)
+			bitmap = pcpu_mem_alloc(bitmap_size);
+		if (!pages || !bitmap)
+			return NULL;
+	}
+
+	memset(pages, 0, pages_size);
+	bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages);
+
+	*bitmapp = bitmap;
+	return pages;
+}
+
+/**
+ * pcpu_free_pages - free pages which were allocated for @chunk
+ * @chunk: chunk pages were allocated for
+ * @pages: array of pages to be freed, indexed by pcpu_page_idx()
+ * @populated: populated bitmap
+ * @page_start: page index of the first page to be freed
+ * @page_end: page index of the last page to be freed + 1
+ *
+ * Free pages [@page_start and @page_end) in @pages for all units.
+ * The pages were allocated for @chunk.
+ */
+static void pcpu_free_pages(struct pcpu_chunk *chunk,
+			    struct page **pages, unsigned long *populated,
+			    int page_start, int page_end)
+{
+	unsigned int cpu;
+	int i;
+
+	for_each_possible_cpu(cpu) {
+		for (i = page_start; i < page_end; i++) {
+			struct page *page = pages[pcpu_page_idx(cpu, i)];
+
+			if (page)
+				__free_page(page);
+		}
+	}
+}
+
+/**
+ * pcpu_alloc_pages - allocates pages for @chunk
+ * @chunk: target chunk
+ * @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
+ * @populated: populated bitmap
+ * @page_start: page index of the first page to be allocated
+ * @page_end: page index of the last page to be allocated + 1
+ *
+ * Allocate pages [@page_start,@page_end) into @pages for all units.
+ * The allocation is for @chunk.  Percpu core doesn't care about the
+ * content of @pages and will pass it verbatim to pcpu_map_pages().
+ */
+static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
+			    struct page **pages, unsigned long *populated,
+			    int page_start, int page_end)
+{
+	const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
+	unsigned int cpu;
+	int i;
+
+	for_each_possible_cpu(cpu) {
+		for (i = page_start; i < page_end; i++) {
+			struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
+
+			*pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0);
+			if (!*pagep) {
+				pcpu_free_pages(chunk, pages, populated,
+						page_start, page_end);
+				return -ENOMEM;
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * pcpu_pre_unmap_flush - flush cache prior to unmapping
+ * @chunk: chunk the regions to be flushed belongs to
+ * @page_start: page index of the first page to be flushed
+ * @page_end: page index of the last page to be flushed + 1
+ *
+ * Pages in [@page_start,@page_end) of @chunk are about to be
+ * unmapped.  Flush cache.  As each flushing trial can be very
+ * expensive, issue flush on the whole region at once rather than
+ * doing it for each cpu.  This could be an overkill but is more
+ * scalable.
+ */
+static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
+				 int page_start, int page_end)
+{
+	unsigned int last = num_possible_cpus() - 1;
+
+	flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start),
+			   pcpu_chunk_addr(chunk, last, page_end));
+}
+
+static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
+{
+	unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT);
+}
+
+/**
+ * pcpu_unmap_pages - unmap pages out of a pcpu_chunk
  * @chunk: chunk of interest
+ * @pages: pages array which can be used to pass information to free
+ * @populated: populated bitmap
  * @page_start: page index of the first page to unmap
  * @page_end: page index of the last page to unmap + 1
- * @flush_tlb: whether to flush tlb or not
  *
  * For each cpu, unmap pages [@page_start,@page_end) out of @chunk.
- * If @flush is true, vcache is flushed before unmapping and tlb
- * after.
+ * Corresponding elements in @pages were cleared by the caller and can
+ * be used to carry information to pcpu_free_pages() which will be
+ * called after all unmaps are finished.  The caller should call
+ * proper pre/post flush functions.
  */
-static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
-		       bool flush_tlb)
+static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
+			     struct page **pages, unsigned long *populated,
+			     int page_start, int page_end)
 {
-	unsigned int last = num_possible_cpus() - 1;
 	unsigned int cpu;
+	int i;
 
-	/* unmap must not be done on immutable chunk */
-	WARN_ON(chunk->immutable);
+	for_each_possible_cpu(cpu) {
+		for (i = page_start; i < page_end; i++) {
+			struct page *page;
 
-	/*
-	 * Each flushing trial can be very expensive, issue flush on
-	 * the whole region at once rather than doing it for each cpu.
-	 * This could be an overkill but is more scalable.
-	 */
-	flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start),
-			   pcpu_chunk_addr(chunk, last, page_end));
+			page = pcpu_chunk_page(chunk, cpu, i);
+			WARN_ON(!page);
+			pages[pcpu_page_idx(cpu, i)] = page;
+		}
+		__pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start),
+				   page_end - page_start);
+	}
 
-	for_each_possible_cpu(cpu)
-		unmap_kernel_range_noflush(
-				pcpu_chunk_addr(chunk, cpu, page_start),
-				(page_end - page_start) << PAGE_SHIFT);
-
-	/* ditto as flush_cache_vunmap() */
-	if (flush_tlb)
-		flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start),
-				       pcpu_chunk_addr(chunk, last, page_end));
+	for (i = page_start; i < page_end; i++)
+		__clear_bit(i, populated);
+}
+
+/**
+ * pcpu_post_unmap_tlb_flush - flush TLB after unmapping
+ * @chunk: pcpu_chunk the regions to be flushed belong to
+ * @page_start: page index of the first page to be flushed
+ * @page_end: page index of the last page to be flushed + 1
+ *
+ * Pages [@page_start,@page_end) of @chunk have been unmapped.  Flush
+ * TLB for the regions.  This can be skipped if the area is to be
+ * returned to vmalloc as vmalloc will handle TLB flushing lazily.
+ *
+ * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
+ * for the whole region.
+ */
+static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
+				      int page_start, int page_end)
+{
+	unsigned int last = num_possible_cpus() - 1;
+
+	flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start),
+			       pcpu_chunk_addr(chunk, last, page_end));
 }
 
 static int __pcpu_map_pages(unsigned long addr, struct page **pages,
@@ -591,35 +770,76 @@ static int __pcpu_map_pages(unsigned long addr, struct page **pages,
 }
 
 /**
- * pcpu_map - map pages into a pcpu_chunk
+ * pcpu_map_pages - map pages into a pcpu_chunk
  * @chunk: chunk of interest
+ * @pages: pages array containing pages to be mapped
+ * @populated: populated bitmap
  * @page_start: page index of the first page to map
  * @page_end: page index of the last page to map + 1
  *
- * For each cpu, map pages [@page_start,@page_end) into @chunk.
- * vcache is flushed afterwards.
+ * For each cpu, map pages [@page_start,@page_end) into @chunk.  The
+ * caller is responsible for calling pcpu_post_map_flush() after all
+ * mappings are complete.
+ *
+ * This function is responsible for setting corresponding bits in
+ * @chunk->populated bitmap and whatever is necessary for reverse
+ * lookup (addr -> chunk).
  */
-static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end)
+static int pcpu_map_pages(struct pcpu_chunk *chunk,
+			  struct page **pages, unsigned long *populated,
+			  int page_start, int page_end)
 {
-	unsigned int last = num_possible_cpus() - 1;
-	unsigned int cpu;
-	int err;
-
-	/* map must not be done on immutable chunk */
-	WARN_ON(chunk->immutable);
+	unsigned int cpu, tcpu;
+	int i, err;
 
 	for_each_possible_cpu(cpu) {
 		err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start),
-				       pcpu_chunk_pagep(chunk, cpu, page_start),
+				       &pages[pcpu_page_idx(cpu, page_start)],
 				       page_end - page_start);
 		if (err < 0)
-			return err;
+			goto err;
 	}
 
+	/* mapping successful, link chunk and mark populated */
+	for (i = page_start; i < page_end; i++) {
+		for_each_possible_cpu(cpu)
+			pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)],
+					    chunk);
+		__set_bit(i, populated);
+	}
+
+	return 0;
+
+err:
+	for_each_possible_cpu(tcpu) {
+		if (tcpu == cpu)
+			break;
+		__pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
+				   page_end - page_start);
+	}
+	return err;
+}
+
+/**
+ * pcpu_post_map_flush - flush cache after mapping
+ * @chunk: pcpu_chunk the regions to be flushed belong to
+ * @page_start: page index of the first page to be flushed
+ * @page_end: page index of the last page to be flushed + 1
+ *
+ * Pages [@page_start,@page_end) of @chunk have been mapped.  Flush
+ * cache.
+ *
+ * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
+ * for the whole region.
+ */
+static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
+				int page_start, int page_end)
+{
+	unsigned int last = num_possible_cpus() - 1;
+
 	/* flush at once, please read comments in pcpu_unmap() */
 	flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start),
 			 pcpu_chunk_addr(chunk, last, page_end));
-	return 0;
 }
 
 /**
@@ -636,39 +856,45 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end)
  * CONTEXT:
  * pcpu_alloc_mutex.
  */
-static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size,
-				  bool flush)
+static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size)
 {
 	int page_start = PFN_DOWN(off);
 	int page_end = PFN_UP(off + size);
-	int unmap_start = -1;
-	int uninitialized_var(unmap_end);
-	unsigned int cpu;
-	int i;
+	struct page **pages;
+	unsigned long *populated;
+	int rs, re;
+
+	/* quick path, check whether it's empty already */
+	pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
+		if (rs == page_start && re == page_end)
+			return;
+		break;
+	}
 
-	for (i = page_start; i < page_end; i++) {
-		for_each_possible_cpu(cpu) {
-			struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i);
+	/* immutable chunks can't be depopulated */
+	WARN_ON(chunk->immutable);
 
-			if (!*pagep)
-				continue;
+	/*
+	 * If control reaches here, there must have been at least one
+	 * successful population attempt so the temp pages array must
+	 * be available now.
+	 */
+	pages = pcpu_get_pages_and_bitmap(chunk, &populated, false);
+	BUG_ON(!pages);
 
-			__free_page(*pagep);
+	/* unmap and free */
+	pcpu_pre_unmap_flush(chunk, page_start, page_end);
 
-			/*
-			 * If it's partial depopulation, it might get
-			 * populated or depopulated again.  Mark the
-			 * page gone.
-			 */
-			*pagep = NULL;
+	pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end)
+		pcpu_unmap_pages(chunk, pages, populated, rs, re);
 
-			unmap_start = unmap_start < 0 ? i : unmap_start;
-			unmap_end = i + 1;
-		}
-	}
+	/* no need to flush tlb, vmalloc will handle it lazily */
+
+	pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end)
+		pcpu_free_pages(chunk, pages, populated, rs, re);
 
-	if (unmap_start >= 0)
-		pcpu_unmap(chunk, unmap_start, unmap_end, flush);
+	/* commit new bitmap */
+	bitmap_copy(chunk->populated, populated, pcpu_unit_pages);
 }
 
 /**
@@ -685,50 +911,61 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size,
  */
 static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
 {
-	const gfp_t alloc_mask = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
 	int page_start = PFN_DOWN(off);
 	int page_end = PFN_UP(off + size);
-	int map_start = -1;
-	int uninitialized_var(map_end);
+	int free_end = page_start, unmap_end = page_start;
+	struct page **pages;
+	unsigned long *populated;
 	unsigned int cpu;
-	int i;
+	int rs, re, rc;
 
-	for (i = page_start; i < page_end; i++) {
-		if (pcpu_chunk_page_occupied(chunk, i)) {
-			if (map_start >= 0) {
-				if (pcpu_map(chunk, map_start, map_end))
-					goto err;
-				map_start = -1;
-			}
-			continue;
-		}
+	/* quick path, check whether all pages are already there */
+	pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) {
+		if (rs == page_start && re == page_end)
+			goto clear;
+		break;
+	}
 
-		map_start = map_start < 0 ? i : map_start;
-		map_end = i + 1;
+	/* need to allocate and map pages, this chunk can't be immutable */
+	WARN_ON(chunk->immutable);
 
-		for_each_possible_cpu(cpu) {
-			struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i);
+	pages = pcpu_get_pages_and_bitmap(chunk, &populated, true);
+	if (!pages)
+		return -ENOMEM;
 
-			*pagep = alloc_pages_node(cpu_to_node(cpu),
-						  alloc_mask, 0);
-			if (!*pagep)
-				goto err;
-			pcpu_set_page_chunk(*pagep, chunk);
-		}
+	/* alloc and map */
+	pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
+		rc = pcpu_alloc_pages(chunk, pages, populated, rs, re);
+		if (rc)
+			goto err_free;
+		free_end = re;
 	}
 
-	if (map_start >= 0 && pcpu_map(chunk, map_start, map_end))
-		goto err;
+	pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
+		rc = pcpu_map_pages(chunk, pages, populated, rs, re);
+		if (rc)
+			goto err_unmap;
+		unmap_end = re;
+	}
+	pcpu_post_map_flush(chunk, page_start, page_end);
 
+	/* commit new bitmap */
+	bitmap_copy(chunk->populated, populated, pcpu_unit_pages);
+clear:
 	for_each_possible_cpu(cpu)
 		memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0,
 		       size);
-
 	return 0;
-err:
-	/* likely under heavy memory pressure, give memory back */
-	pcpu_depopulate_chunk(chunk, off, size, true);
-	return -ENOMEM;
+
+err_unmap:
+	pcpu_pre_unmap_flush(chunk, page_start, unmap_end);
+	pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end)
+		pcpu_unmap_pages(chunk, pages, populated, rs, re);
+	pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end);
+err_free:
+	pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end)
+		pcpu_free_pages(chunk, pages, populated, rs, re);
+	return rc;
 }
 
 static void free_pcpu_chunk(struct pcpu_chunk *chunk)
@@ -752,7 +989,6 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
 	chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0]));
 	chunk->map_alloc = PCPU_DFL_MAP_ALLOC;
 	chunk->map[chunk->map_used++] = pcpu_unit_size;
-	chunk->page = chunk->page_ar;
 
 	chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL);
 	if (!chunk->vm) {
@@ -933,7 +1169,7 @@ static void pcpu_reclaim(struct work_struct *work)
 	mutex_unlock(&pcpu_alloc_mutex);
 
 	list_for_each_entry_safe(chunk, next, &todo, list) {
-		pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false);
+		pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size);
 		free_pcpu_chunk(chunk);
 	}
 }
@@ -981,7 +1217,6 @@ EXPORT_SYMBOL_GPL(free_percpu);
 
 /**
  * pcpu_setup_first_chunk - initialize the first percpu chunk
- * @get_page_fn: callback to fetch page pointer
  * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes, 0 for none
  * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
@@ -992,14 +1227,6 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * perpcu area.  This function is to be called from arch percpu area
  * setup path.
  *
- * @get_page_fn() should return pointer to percpu page given cpu
- * number and page number.  It should at least return enough pages to
- * cover the static area.  The returned pages for static area should
- * have been initialized with valid data.  It can also return pages
- * after the static area.  NULL return indicates end of pages for the
- * cpu.  Note that @get_page_fn() must return the same number of pages
- * for all cpus.
- *
  * @reserved_size, if non-zero, specifies the amount of bytes to
  * reserve after the static area in the first chunk.  This reserves
  * the first chunk such that it's available only through reserved
@@ -1031,8 +1258,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access.
  */
-size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
-				     size_t static_size, size_t reserved_size,
+size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 				     ssize_t dyn_size, size_t unit_size,
 				     void *base_addr)
 {
@@ -1041,8 +1267,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	size_t size_sum = static_size + reserved_size +
 			  (dyn_size >= 0 ? dyn_size : 0);
 	struct pcpu_chunk *schunk, *dchunk = NULL;
-	unsigned int cpu;
-	int i, nr_pages;
+	int i;
 
 	/* santiy checks */
 	BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
@@ -1056,8 +1281,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	pcpu_unit_pages = unit_size >> PAGE_SHIFT;
 	pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
 	pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
-	pcpu_chunk_struct_size = sizeof(struct pcpu_chunk)
-		+ num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *);
+	pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
+		BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long);
 
 	if (dyn_size < 0)
 		dyn_size = pcpu_unit_size - static_size - reserved_size;
@@ -1087,8 +1312,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	schunk->vm = &first_vm;
 	schunk->map = smap;
 	schunk->map_alloc = ARRAY_SIZE(smap);
-	schunk->page = schunk->page_ar;
 	schunk->immutable = true;
+	bitmap_fill(schunk->populated, pcpu_unit_pages);
 
 	if (reserved_size) {
 		schunk->free_size = reserved_size;
@@ -1106,38 +1331,19 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 
 	/* init dynamic chunk if necessary */
 	if (dyn_size) {
-		dchunk = alloc_bootmem(sizeof(struct pcpu_chunk));
+		dchunk = alloc_bootmem(pcpu_chunk_struct_size);
 		INIT_LIST_HEAD(&dchunk->list);
 		dchunk->vm = &first_vm;
 		dchunk->map = dmap;
 		dchunk->map_alloc = ARRAY_SIZE(dmap);
-		dchunk->page = schunk->page_ar;	/* share page map with schunk */
 		dchunk->immutable = true;
+		bitmap_fill(dchunk->populated, pcpu_unit_pages);
 
 		dchunk->contig_hint = dchunk->free_size = dyn_size;
 		dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit;
 		dchunk->map[dchunk->map_used++] = dchunk->free_size;
 	}
 
-	/* assign pages */
-	nr_pages = -1;
-	for_each_possible_cpu(cpu) {
-		for (i = 0; i < pcpu_unit_pages; i++) {
-			struct page *page = get_page_fn(cpu, i);
-
-			if (!page)
-				break;
-			*pcpu_chunk_pagep(schunk, cpu, i) = page;
-		}
-
-		BUG_ON(i < PFN_UP(static_size));
-
-		if (nr_pages < 0)
-			nr_pages = i;
-		else
-			BUG_ON(nr_pages != i);
-	}
-
 	/* link the first chunk in */
 	pcpu_first_chunk = dchunk ?: schunk;
 	pcpu_chunk_relocate(pcpu_first_chunk, -1);
@@ -1160,23 +1366,6 @@ static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size,
 	return size_sum;
 }
 
-/*
- * Embedding first chunk setup helper.
- */
-static void *pcpue_ptr __initdata;
-static size_t pcpue_size __initdata;
-static size_t pcpue_unit_size __initdata;
-
-static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
-{
-	size_t off = (size_t)pageno << PAGE_SHIFT;
-
-	if (off >= pcpue_size)
-		return NULL;
-
-	return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off);
-}
-
 /**
  * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
  * @static_size: the size of static percpu area in bytes
@@ -1207,18 +1396,19 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
 ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 				      ssize_t dyn_size)
 {
-	size_t chunk_size;
+	size_t size_sum, unit_size, chunk_size;
+	void *base;
 	unsigned int cpu;
 
 	/* determine parameters and allocate */
-	pcpue_size = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
+	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
 
-	pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
-	chunk_size = pcpue_unit_size * num_possible_cpus();
+	unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+	chunk_size = unit_size * num_possible_cpus();
 
-	pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
-					    __pa(MAX_DMA_ADDRESS));
-	if (!pcpue_ptr) {
+	base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
+				       __pa(MAX_DMA_ADDRESS));
+	if (!base) {
 		pr_warning("PERCPU: failed to allocate %zu bytes for "
 			   "embedding\n", chunk_size);
 		return -ENOMEM;
@@ -1226,33 +1416,18 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 
 	/* return the leftover and copy */
 	for_each_possible_cpu(cpu) {
-		void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
+		void *ptr = base + cpu * unit_size;
 
-		free_bootmem(__pa(ptr + pcpue_size),
-			     pcpue_unit_size - pcpue_size);
+		free_bootmem(__pa(ptr + size_sum), unit_size - size_sum);
 		memcpy(ptr, __per_cpu_load, static_size);
 	}
 
 	/* we're ready, commit */
 	pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
-		pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
+		size_sum >> PAGE_SHIFT, base, static_size);
 
-	return pcpu_setup_first_chunk(pcpue_get_page, static_size,
-				      reserved_size, dyn_size,
-				      pcpue_unit_size, pcpue_ptr);
-}
-
-/*
- * 4k page first chunk setup helper.
- */
-static struct page **pcpu4k_pages __initdata;
-static int pcpu4k_unit_pages __initdata;
-
-static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
-{
-	if (pageno < pcpu4k_unit_pages)
-		return pcpu4k_pages[cpu * pcpu4k_unit_pages + pageno];
-	return NULL;
+	return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
+				      unit_size, base);
 }
 
 /**
@@ -1279,23 +1454,25 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 				   pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
 	static struct vm_struct vm;
+	int unit_pages;
 	size_t pages_size;
+	struct page **pages;
 	unsigned int cpu;
 	int i, j;
 	ssize_t ret;
 
-	pcpu4k_unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size,
-					 PCPU_MIN_UNIT_SIZE));
+	unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size,
+				  PCPU_MIN_UNIT_SIZE));
 
 	/* unaligned allocations can't be freed, round up to page size */
-	pages_size = PFN_ALIGN(pcpu4k_unit_pages * num_possible_cpus() *
-			       sizeof(pcpu4k_pages[0]));
-	pcpu4k_pages = alloc_bootmem(pages_size);
+	pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
+			       sizeof(pages[0]));
+	pages = alloc_bootmem(pages_size);
 
 	/* allocate pages */
 	j = 0;
 	for_each_possible_cpu(cpu)
-		for (i = 0; i < pcpu4k_unit_pages; i++) {
+		for (i = 0; i < unit_pages; i++) {
 			void *ptr;
 
 			ptr = alloc_fn(cpu, PAGE_SIZE);
@@ -1304,25 +1481,24 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 					   "4k page for cpu%u\n", cpu);
 				goto enomem;
 			}
-			pcpu4k_pages[j++] = virt_to_page(ptr);
+			pages[j++] = virt_to_page(ptr);
 		}
 
 	/* allocate vm area, map the pages and copy static data */
 	vm.flags = VM_ALLOC;
-	vm.size = num_possible_cpus() * pcpu4k_unit_pages << PAGE_SHIFT;
+	vm.size = num_possible_cpus() * unit_pages << PAGE_SHIFT;
 	vm_area_register_early(&vm, PAGE_SIZE);
 
 	for_each_possible_cpu(cpu) {
 		unsigned long unit_addr = (unsigned long)vm.addr +
-			(cpu * pcpu4k_unit_pages << PAGE_SHIFT);
+			(cpu * unit_pages << PAGE_SHIFT);
 
-		for (i = 0; i < pcpu4k_unit_pages; i++)
+		for (i = 0; i < unit_pages; i++)
 			populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
 
 		/* pte already populated, the following shouldn't fail */
-		ret = __pcpu_map_pages(unit_addr,
-				       &pcpu4k_pages[cpu * pcpu4k_unit_pages],
-				       pcpu4k_unit_pages);
+		ret = __pcpu_map_pages(unit_addr, &pages[cpu * unit_pages],
+				       unit_pages);
 		if (ret < 0)
 			panic("failed to map percpu area, err=%zd\n", ret);
 
@@ -1340,19 +1516,18 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 
 	/* we're ready, commit */
 	pr_info("PERCPU: %d 4k pages per cpu, static data %zu bytes\n",
-		pcpu4k_unit_pages, static_size);
+		unit_pages, static_size);
 
-	ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
-				     reserved_size, -1,
-				     pcpu4k_unit_pages << PAGE_SHIFT, vm.addr);
+	ret = pcpu_setup_first_chunk(static_size, reserved_size, -1,
+				     unit_pages << PAGE_SHIFT, vm.addr);
 	goto out_free_ar;
 
 enomem:
 	while (--j >= 0)
-		free_fn(page_address(pcpu4k_pages[j]), PAGE_SIZE);
+		free_fn(page_address(pages[j]), PAGE_SIZE);
 	ret = -ENOMEM;
 out_free_ar:
-	free_bootmem(__pa(pcpu4k_pages), pages_size);
+	free_bootmem(__pa(pages), pages_size);
 	return ret;
 }
 
@@ -1370,16 +1545,6 @@ static size_t pcpul_unit_size;
 static struct pcpul_ent *pcpul_map;
 static struct vm_struct pcpul_vm;
 
-static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
-{
-	size_t off = (size_t)pageno << PAGE_SHIFT;
-
-	if (off >= pcpul_size)
-		return NULL;
-
-	return virt_to_page(pcpul_map[cpu].ptr + off);
-}
-
 /**
  * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
  * @static_size: the size of static percpu area in bytes
@@ -1475,9 +1640,8 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
 	pr_info("PERCPU: Remapped at %p with large pages, static data "
 		"%zu bytes\n", pcpul_vm.addr, static_size);
 
-	ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
-				     reserved_size, dyn_size, pcpul_unit_size,
-				     pcpul_vm.addr);
+	ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
+				     pcpul_unit_size, pcpul_vm.addr);
 
 	/* sort pcpul_map array for pcpu_lpage_remapped() */
 	for (i = 0; i < num_possible_cpus() - 1; i++)
-- 
cgit v1.2.3


From 2f39e637ea240efb74cf807d31c93a71a0b89174 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:11:00 +0900
Subject: percpu: allow non-linear / sparse cpu -> unit mapping

Currently cpu and unit are always identity mapped.  To allow more
efficient large page support on NUMA and lazy allocation for possible
but offline cpus, cpu -> unit mapping needs to be non-linear and/or
sparse.  This can be easily implemented by adding a cpu -> unit
mapping array and using it whenever looking up the matching unit for a
cpu.

The only unusal conversion is in pcpu_chunk_addr_search().  The passed
in address is unit0 based and unit0 might not be in use so it needs to
be converted to address of an in-use unit.  This is easily done by
adding the unit offset for the current processor.

[ Impact: allows non-linear/sparse cpu -> unit mapping, no visible change yet ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Miller <davem@davemloft.net>
---
 arch/sparc/kernel/smp_64.c |   2 +-
 include/linux/percpu.h     |   3 +-
 mm/percpu.c                | 129 +++++++++++++++++++++++++++++++++------------
 3 files changed, 97 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index f2f22ee97a7a..6970333b48b8 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1516,7 +1516,7 @@ void __init setup_per_cpu_areas(void)
 
 	pcpu_unit_size = pcpu_setup_first_chunk(static_size,
 						PERCPU_MODULE_RESERVE, dyn_size,
-						PCPU_CHUNK_SIZE, vm.addr);
+						PCPU_CHUNK_SIZE, vm.addr, NULL);
 
 	free_bootmem(__pa(ptrs), ptrs_size);
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 63c8b7a23e66..1e0e8878dc2a 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -57,6 +57,7 @@
 #endif
 
 extern void *pcpu_base_addr;
+extern const int *pcpu_unit_map;
 
 typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
@@ -66,7 +67,7 @@ typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 extern size_t __init pcpu_setup_first_chunk(
 				size_t static_size, size_t reserved_size,
 				ssize_t dyn_size, size_t unit_size,
-				void *base_addr);
+				void *base_addr, const int *unit_map);
 
 extern ssize_t __init pcpu_embed_first_chunk(
 				size_t static_size, size_t reserved_size,
diff --git a/mm/percpu.c b/mm/percpu.c
index 21756814d99f..2196fae24f00 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -8,12 +8,13 @@
  *
  * This is percpu allocator which can handle both static and dynamic
  * areas.  Percpu areas are allocated in chunks in vmalloc area.  Each
- * chunk is consisted of num_possible_cpus() units and the first chunk
- * is used for static percpu variables in the kernel image (special
- * boot time alloc/init handling necessary as these areas need to be
- * brought up before allocation services are running).  Unit grows as
- * necessary and all units grow or shrink in unison.  When a chunk is
- * filled up, another chunk is allocated.  ie. in vmalloc area
+ * chunk is consisted of boot-time determined number of units and the
+ * first chunk is used for static percpu variables in the kernel image
+ * (special boot time alloc/init handling necessary as these areas
+ * need to be brought up before allocation services are running).
+ * Unit grows as necessary and all units grow or shrink in unison.
+ * When a chunk is filled up, another chunk is allocated.  ie. in
+ * vmalloc area
  *
  *  c0                           c1                         c2
  *  -------------------          -------------------        ------------
@@ -22,11 +23,13 @@
  *
  * Allocation is done in offset-size areas of single unit space.  Ie,
  * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0,
- * c1:u1, c1:u2 and c1:u3.  Percpu access can be done by configuring
- * percpu base registers pcpu_unit_size apart.
+ * c1:u1, c1:u2 and c1:u3.  On UMA, units corresponds directly to
+ * cpus.  On NUMA, the mapping can be non-linear and even sparse.
+ * Percpu access can be done by configuring percpu base registers
+ * according to cpu to unit mapping and pcpu_unit_size.
  *
- * There are usually many small percpu allocations many of them as
- * small as 4 bytes.  The allocator organizes chunks into lists
+ * There are usually many small percpu allocations many of them being
+ * as small as 4 bytes.  The allocator organizes chunks into lists
  * according to free size and tries to allocate from the fullest one.
  * Each chunk keeps the maximum contiguous area size hint which is
  * guaranteed to be eqaul to or larger than the maximum contiguous
@@ -99,14 +102,22 @@ struct pcpu_chunk {
 
 static int pcpu_unit_pages __read_mostly;
 static int pcpu_unit_size __read_mostly;
+static int pcpu_nr_units __read_mostly;
 static int pcpu_chunk_size __read_mostly;
 static int pcpu_nr_slots __read_mostly;
 static size_t pcpu_chunk_struct_size __read_mostly;
 
+/* cpus with the lowest and highest unit numbers */
+static unsigned int pcpu_first_unit_cpu __read_mostly;
+static unsigned int pcpu_last_unit_cpu __read_mostly;
+
 /* the address of the first chunk which starts with the kernel static area */
 void *pcpu_base_addr __read_mostly;
 EXPORT_SYMBOL_GPL(pcpu_base_addr);
 
+/* cpu -> unit map */
+const int *pcpu_unit_map __read_mostly;
+
 /*
  * The first chunk which always exists.  Note that unlike other
  * chunks, this one can be allocated and mapped in several different
@@ -177,7 +188,7 @@ static int pcpu_chunk_slot(const struct pcpu_chunk *chunk)
 
 static int pcpu_page_idx(unsigned int cpu, int page_idx)
 {
-	return cpu * pcpu_unit_pages + page_idx;
+	return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
 }
 
 static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
@@ -321,6 +332,14 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 		return pcpu_first_chunk;
 	}
 
+	/*
+	 * The address is relative to unit0 which might be unused and
+	 * thus unmapped.  Offset the address to the unit space of the
+	 * current processor before looking it up in the vmalloc
+	 * space.  Note that any possible cpu id can be used here, so
+	 * there's no need to worry about preemption or cpu hotplug.
+	 */
+	addr += pcpu_unit_map[smp_processor_id()] * pcpu_unit_size;
 	return pcpu_get_page_chunk(vmalloc_to_page(addr));
 }
 
@@ -593,8 +612,7 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
 {
 	static struct page **pages;
 	static unsigned long *bitmap;
-	size_t pages_size = num_possible_cpus() * pcpu_unit_pages *
-			    sizeof(pages[0]);
+	size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
 	size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) *
 			     sizeof(unsigned long);
 
@@ -692,10 +710,9 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
 static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
 				 int page_start, int page_end)
 {
-	unsigned int last = num_possible_cpus() - 1;
-
-	flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start),
-			   pcpu_chunk_addr(chunk, last, page_end));
+	flush_cache_vunmap(
+		pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+		pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
 }
 
 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
@@ -756,10 +773,9 @@ static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
 static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
 				      int page_start, int page_end)
 {
-	unsigned int last = num_possible_cpus() - 1;
-
-	flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start),
-			       pcpu_chunk_addr(chunk, last, page_end));
+	flush_tlb_kernel_range(
+		pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+		pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
 }
 
 static int __pcpu_map_pages(unsigned long addr, struct page **pages,
@@ -835,11 +851,9 @@ err:
 static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
 				int page_start, int page_end)
 {
-	unsigned int last = num_possible_cpus() - 1;
-
-	/* flush at once, please read comments in pcpu_unmap() */
-	flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start),
-			 pcpu_chunk_addr(chunk, last, page_end));
+	flush_cache_vmap(
+		pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+		pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
 }
 
 /**
@@ -953,8 +967,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
 	bitmap_copy(chunk->populated, populated, pcpu_unit_pages);
 clear:
 	for_each_possible_cpu(cpu)
-		memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0,
-		       size);
+		memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
 	return 0;
 
 err_unmap:
@@ -1088,6 +1101,7 @@ area_found:
 
 	mutex_unlock(&pcpu_alloc_mutex);
 
+	/* return address relative to unit0 */
 	return __addr_to_pcpu_ptr(chunk->vm->addr + off);
 
 fail_unlock:
@@ -1222,6 +1236,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
  * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE
  * @base_addr: mapped address
+ * @unit_map: cpu -> unit map, NULL for sequential mapping
  *
  * Initialize the first percpu chunk which contains the kernel static
  * perpcu area.  This function is to be called from arch percpu area
@@ -1260,16 +1275,17 @@ EXPORT_SYMBOL_GPL(free_percpu);
  */
 size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 				     ssize_t dyn_size, size_t unit_size,
-				     void *base_addr)
+				     void *base_addr, const int *unit_map)
 {
 	static struct vm_struct first_vm;
 	static int smap[2], dmap[2];
 	size_t size_sum = static_size + reserved_size +
 			  (dyn_size >= 0 ? dyn_size : 0);
 	struct pcpu_chunk *schunk, *dchunk = NULL;
+	unsigned int cpu, tcpu;
 	int i;
 
-	/* santiy checks */
+	/* sanity checks */
 	BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
 		     ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
 	BUG_ON(!static_size);
@@ -1278,9 +1294,52 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 	BUG_ON(unit_size & ~PAGE_MASK);
 	BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE);
 
+	/* determine number of units and verify and initialize pcpu_unit_map */
+	if (unit_map) {
+		int first_unit = INT_MAX, last_unit = INT_MIN;
+
+		for_each_possible_cpu(cpu) {
+			int unit = unit_map[cpu];
+
+			BUG_ON(unit < 0);
+			for_each_possible_cpu(tcpu) {
+				if (tcpu == cpu)
+					break;
+				/* the mapping should be one-to-one */
+				BUG_ON(unit_map[tcpu] == unit);
+			}
+
+			if (unit < first_unit) {
+				pcpu_first_unit_cpu = cpu;
+				first_unit = unit;
+			}
+			if (unit > last_unit) {
+				pcpu_last_unit_cpu = cpu;
+				last_unit = unit;
+			}
+		}
+		pcpu_nr_units = last_unit + 1;
+		pcpu_unit_map = unit_map;
+	} else {
+		int *identity_map;
+
+		/* #units == #cpus, identity mapped */
+		identity_map = alloc_bootmem(num_possible_cpus() *
+					     sizeof(identity_map[0]));
+
+		for_each_possible_cpu(cpu)
+			identity_map[cpu] = cpu;
+
+		pcpu_first_unit_cpu = 0;
+		pcpu_last_unit_cpu = pcpu_nr_units - 1;
+		pcpu_nr_units = num_possible_cpus();
+		pcpu_unit_map = identity_map;
+	}
+
+	/* determine basic parameters */
 	pcpu_unit_pages = unit_size >> PAGE_SHIFT;
 	pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
-	pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
+	pcpu_chunk_size = pcpu_nr_units * pcpu_unit_size;
 	pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
 		BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long);
 
@@ -1349,7 +1408,7 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 	pcpu_chunk_relocate(pcpu_first_chunk, -1);
 
 	/* we're done */
-	pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);
+	pcpu_base_addr = schunk->vm->addr;
 	return pcpu_unit_size;
 }
 
@@ -1427,7 +1486,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 		size_sum >> PAGE_SHIFT, base, static_size);
 
 	return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
-				      unit_size, base);
+				      unit_size, base, NULL);
 }
 
 /**
@@ -1519,7 +1578,7 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 		unit_pages, static_size);
 
 	ret = pcpu_setup_first_chunk(static_size, reserved_size, -1,
-				     unit_pages << PAGE_SHIFT, vm.addr);
+				     unit_pages << PAGE_SHIFT, vm.addr, NULL);
 	goto out_free_ar;
 
 enomem:
@@ -1641,7 +1700,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
 		"%zu bytes\n", pcpul_vm.addr, static_size);
 
 	ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
-				     pcpul_unit_size, pcpul_vm.addr);
+				     pcpul_unit_size, pcpul_vm.addr, NULL);
 
 	/* sort pcpul_map array for pcpu_lpage_remapped() */
 	for (i = 0; i < num_possible_cpus() - 1; i++)
-- 
cgit v1.2.3


From a530b7958612bafe2027e21359083dba84f0b3b4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:11:00 +0900
Subject: percpu: teach large page allocator about NUMA

Large page first chunk allocator is primarily used for NUMA machines;
however, its NUMA handling is extremely simplistic.  Regardless of
their proximity, each cpu is put into separate large page just to
return most of the allocated space back wasting large amount of
vmalloc space and increasing cache footprint.

This patch teachs NUMA details to large page allocator.  Given
processor proximity information, pcpu_lpage_build_unit_map() will find
fitting cpu -> unit mapping in which cpus in LOCAL_DISTANCE share the
same large page and not too much virtual address space is wasted.

This greatly reduces the unit and thus chunk size and wastes much less
address space for the first chunk.  For example, on 4/4 NUMA machine,
the original code occupied 16MB of virtual space for the first chunk
while the new code only uses 4MB - one 2MB page for each node.

[ Impact: much better space efficiency on NUMA machines ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jan Beulich <JBeulich@novell.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Miller <davem@davemloft.net>
---
 arch/x86/kernel/setup_percpu.c |  72 +++++++--
 include/linux/percpu.h         |  24 ++-
 mm/percpu.c                    | 358 ++++++++++++++++++++++++++++++++---------
 3 files changed, 359 insertions(+), 95 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 4f2e0ac9130b..7501bb14bd51 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -149,36 +149,73 @@ static void __init pcpul_map(void *ptr, size_t size, void *addr)
 	set_pmd(pmd, pmd_v);
 }
 
+static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to)
+{
+	if (early_cpu_to_node(from) == early_cpu_to_node(to))
+		return LOCAL_DISTANCE;
+	else
+		return REMOTE_DISTANCE;
+}
+
 static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
 {
 	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
+	size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE;
+	size_t unit_map_size, unit_size;
+	int *unit_map;
+	int nr_units;
+	ssize_t ret;
+
+	/* on non-NUMA, embedding is better */
+	if (!chosen && !pcpu_need_numa())
+		return -EINVAL;
+
+	/* need PSE */
+	if (!cpu_has_pse) {
+		pr_warning("PERCPU: lpage allocator requires PSE\n");
+		return -EINVAL;
+	}
 
+	/* allocate and build unit_map */
+	unit_map_size = num_possible_cpus() * sizeof(int);
+	unit_map = alloc_bootmem_nopanic(unit_map_size);
+	if (!unit_map) {
+		pr_warning("PERCPU: failed to allocate unit_map\n");
+		return -ENOMEM;
+	}
+
+	ret = pcpu_lpage_build_unit_map(static_size,
+					PERCPU_FIRST_CHUNK_RESERVE,
+					&dyn_size, &unit_size, PMD_SIZE,
+					unit_map, pcpu_lpage_cpu_distance);
+	if (ret < 0) {
+		pr_warning("PERCPU: failed to build unit_map\n");
+		goto out_free;
+	}
+	nr_units = ret;
+
+	/* do the parameters look okay? */
 	if (!chosen) {
 		size_t vm_size = VMALLOC_END - VMALLOC_START;
-		size_t tot_size = num_possible_cpus() * PMD_SIZE;
-
-		/* on non-NUMA, embedding is better */
-		if (!pcpu_need_numa())
-			return -EINVAL;
+		size_t tot_size = nr_units * unit_size;
 
 		/* don't consume more than 20% of vmalloc area */
 		if (tot_size > vm_size / 5) {
 			pr_info("PERCPU: too large chunk size %zuMB for "
 				"large page remap\n", tot_size >> 20);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto out_free;
 		}
 	}
 
-	/* need PSE */
-	if (!cpu_has_pse) {
-		pr_warning("PERCPU: lpage allocator requires PSE\n");
-		return -EINVAL;
-	}
-
-	return pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
-				      reserve - PERCPU_FIRST_CHUNK_RESERVE,
-				      PMD_SIZE,
-				      pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
+	ret = pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
+				     dyn_size, unit_size, PMD_SIZE,
+				     unit_map, nr_units,
+				     pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
+out_free:
+	if (ret < 0)
+		free_bootmem(__pa(unit_map), unit_map_size);
+	return ret;
 }
 #else
 static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
@@ -299,7 +336,8 @@ void __init setup_per_cpu_areas(void)
 	/* alrighty, percpu areas up and running */
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu) {
-		per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
+		per_cpu_offset(cpu) =
+			delta + pcpu_unit_map[cpu] * pcpu_unit_size;
 		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 		per_cpu(cpu_number, cpu) = cpu;
 		setup_percpu_segment(cpu);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 1e0e8878dc2a..8ce91af4aa19 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -62,6 +62,7 @@ extern const int *pcpu_unit_map;
 typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
+typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
 typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
 extern size_t __init pcpu_setup_first_chunk(
@@ -80,18 +81,37 @@ extern ssize_t __init pcpu_4k_first_chunk(
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
+extern int __init pcpu_lpage_build_unit_map(
+				size_t static_size, size_t reserved_size,
+				ssize_t *dyn_sizep, size_t *unit_sizep,
+				size_t lpage_size, int *unit_map,
+				pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
+
 extern ssize_t __init pcpu_lpage_first_chunk(
 				size_t static_size, size_t reserved_size,
-				ssize_t dyn_size, size_t lpage_size,
+				size_t dyn_size, size_t unit_size,
+				size_t lpage_size, const int *unit_map,
+				int nr_units,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_map_fn_t map_fn);
 
 extern void *pcpu_lpage_remapped(void *kaddr);
 #else
+static inline int pcpu_lpage_build_unit_map(
+				size_t static_size, size_t reserved_size,
+				ssize_t *dyn_sizep, size_t *unit_sizep,
+				size_t lpage_size, int *unit_map,
+				pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
+{
+	return -EINVAL;
+}
+
 static inline ssize_t __init pcpu_lpage_first_chunk(
 				size_t static_size, size_t reserved_size,
-				ssize_t dyn_size, size_t lpage_size,
+				size_t dyn_size, size_t unit_size,
+				size_t lpage_size, const int *unit_map,
+				int nr_units,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_map_fn_t map_fn)
diff --git a/mm/percpu.c b/mm/percpu.c
index 2196fae24f00..b3d0bcff8c7c 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -59,6 +59,7 @@
 #include <linux/bitmap.h>
 #include <linux/bootmem.h>
 #include <linux/list.h>
+#include <linux/log2.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -1594,75 +1595,259 @@ out_free_ar:
  * Large page remapping first chunk setup helper
  */
 #ifdef CONFIG_NEED_MULTIPLE_NODES
+
+/**
+ * pcpu_lpage_build_unit_map - build unit_map for large page remapping
+ * @static_size: the size of static percpu area in bytes
+ * @reserved_size: the size of reserved percpu area in bytes
+ * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
+ * @unit_sizep: out parameter for unit size
+ * @unit_map: unit_map to be filled
+ * @cpu_distance_fn: callback to determine distance between cpus
+ *
+ * This function builds cpu -> unit map and determine other parameters
+ * considering needed percpu size, large page size and distances
+ * between CPUs in NUMA.
+ *
+ * CPUs which are of LOCAL_DISTANCE both ways are grouped together and
+ * may share units in the same large page.  The returned configuration
+ * is guaranteed to have CPUs on different nodes on different large
+ * pages and >=75% usage of allocated virtual address space.
+ *
+ * RETURNS:
+ * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
+ * returns the number of units to be allocated.  -errno on failure.
+ */
+int __init pcpu_lpage_build_unit_map(size_t static_size, size_t reserved_size,
+				     ssize_t *dyn_sizep, size_t *unit_sizep,
+				     size_t lpage_size, int *unit_map,
+				     pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
+{
+	static int group_map[NR_CPUS] __initdata;
+	static int group_cnt[NR_CPUS] __initdata;
+	int group_cnt_max = 0;
+	size_t size_sum, min_unit_size, alloc_size;
+	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
+	int last_allocs;
+	unsigned int cpu, tcpu;
+	int group, unit;
+
+	/*
+	 * Determine min_unit_size, alloc_size and max_upa such that
+	 * alloc_size is multiple of lpage_size and is the smallest
+	 * which can accomodate 4k aligned segments which are equal to
+	 * or larger than min_unit_size.
+	 */
+	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep);
+	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+
+	alloc_size = roundup(min_unit_size, lpage_size);
+	upa = alloc_size / min_unit_size;
+	while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+		upa--;
+	max_upa = upa;
+
+	/* group cpus according to their proximity */
+	for_each_possible_cpu(cpu) {
+		group = 0;
+	next_group:
+		for_each_possible_cpu(tcpu) {
+			if (cpu == tcpu)
+				break;
+			if (group_map[tcpu] == group &&
+			    (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
+			     cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
+				group++;
+				goto next_group;
+			}
+		}
+		group_map[cpu] = group;
+		group_cnt[group]++;
+		group_cnt_max = max(group_cnt_max, group_cnt[group]);
+	}
+
+	/*
+	 * Expand unit size until address space usage goes over 75%
+	 * and then as much as possible without using more address
+	 * space.
+	 */
+	last_allocs = INT_MAX;
+	for (upa = max_upa; upa; upa--) {
+		int allocs = 0, wasted = 0;
+
+		if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+			continue;
+
+		for (group = 0; group_cnt[group]; group++) {
+			int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
+			allocs += this_allocs;
+			wasted += this_allocs * upa - group_cnt[group];
+		}
+
+		/*
+		 * Don't accept if wastage is over 25%.  The
+		 * greater-than comparison ensures upa==1 always
+		 * passes the following check.
+		 */
+		if (wasted > num_possible_cpus() / 3)
+			continue;
+
+		/* and then don't consume more memory */
+		if (allocs > last_allocs)
+			break;
+		last_allocs = allocs;
+		best_upa = upa;
+	}
+	*unit_sizep = alloc_size / best_upa;
+
+	/* assign units to cpus accordingly */
+	unit = 0;
+	for (group = 0; group_cnt[group]; group++) {
+		for_each_possible_cpu(cpu)
+			if (group_map[cpu] == group)
+				unit_map[cpu] = unit++;
+		unit = roundup(unit, best_upa);
+	}
+
+	return unit;	/* unit contains aligned number of units */
+}
+
 struct pcpul_ent {
-	unsigned int	cpu;
 	void		*ptr;
+	void		*map_addr;
 };
 
 static size_t pcpul_size;
-static size_t pcpul_unit_size;
+static size_t pcpul_lpage_size;
+static int pcpul_nr_lpages;
 static struct pcpul_ent *pcpul_map;
-static struct vm_struct pcpul_vm;
+
+static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
+				     unsigned int *cpup)
+{
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu)
+		if (unit_map[cpu] == unit) {
+			if (cpup)
+				*cpup = cpu;
+			return true;
+		}
+
+	return false;
+}
+
+static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
+					size_t reserved_size, size_t dyn_size,
+					size_t unit_size, size_t lpage_size,
+					const int *unit_map, int nr_units)
+{
+	int width = 1, v = nr_units;
+	char empty_str[] = "--------";
+	int upl, lpl;	/* units per lpage, lpage per line */
+	unsigned int cpu;
+	int lpage, unit;
+
+	while (v /= 10)
+		width++;
+	empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0';
+
+	upl = max_t(int, lpage_size / unit_size, 1);
+	lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1));
+
+	printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl,
+	       static_size, reserved_size, dyn_size, unit_size, lpage_size);
+
+	for (lpage = 0, unit = 0; unit < nr_units; unit++) {
+		if (!(unit % upl)) {
+			if (!(lpage++ % lpl)) {
+				printk("\n");
+				printk("%spcpu-lpage: ", lvl);
+			} else
+				printk("| ");
+		}
+		if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
+			printk("%0*d ", width, cpu);
+		else
+			printk("%s ", empty_str);
+	}
+	printk("\n");
+}
 
 /**
  * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
  * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @dyn_size: free size for dynamic allocation in bytes
+ * @unit_size: unit size in bytes
  * @lpage_size: the size of a large page
+ * @unit_map: cpu -> unit mapping
+ * @nr_units: the number of units
  * @alloc_fn: function to allocate percpu lpage, always called with lpage_size
  * @free_fn: function to free percpu memory, @size <= lpage_size
  * @map_fn: function to map percpu lpage, always called with lpage_size
  *
- * This allocator uses large page as unit.  A large page is allocated
- * for each cpu and each is remapped into vmalloc area using large
- * page mapping.  As large page can be quite large, only part of it is
- * used for the first chunk.  Unused part is returned to the bootmem
- * allocator.
- *
- * So, the large pages are mapped twice - once to the physical mapping
- * and to the vmalloc area for the first percpu chunk.  The double
- * mapping does add one more large TLB entry pressure but still is
- * much better than only using 4k mappings while still being NUMA
- * friendly.
+ * This allocator uses large page to build and map the first chunk.
+ * Unlike other helpers, the caller should always specify @dyn_size
+ * and @unit_size.  These parameters along with @unit_map and
+ * @nr_units can be determined using pcpu_lpage_build_unit_map().
+ * This two stage initialization is to allow arch code to evaluate the
+ * parameters before committing to it.
+ *
+ * Large pages are allocated as directed by @unit_map and other
+ * parameters and mapped to vmalloc space.  Unused holes are returned
+ * to the page allocator.  Note that these holes end up being actively
+ * mapped twice - once to the physical mapping and to the vmalloc area
+ * for the first percpu chunk.  Depending on architecture, this might
+ * cause problem when changing page attributes of the returned area.
+ * These double mapped areas can be detected using
+ * pcpu_lpage_remapped().
  *
  * RETURNS:
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access on success, -errno on failure.
  */
 ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
-				      ssize_t dyn_size, size_t lpage_size,
+				      size_t dyn_size, size_t unit_size,
+				      size_t lpage_size, const int *unit_map,
+				      int nr_units,
 				      pcpu_fc_alloc_fn_t alloc_fn,
 				      pcpu_fc_free_fn_t free_fn,
 				      pcpu_fc_map_fn_t map_fn)
 {
-	size_t size_sum;
+	static struct vm_struct vm;
+	size_t chunk_size = unit_size * nr_units;
 	size_t map_size;
 	unsigned int cpu;
-	int i, j;
 	ssize_t ret;
+	int i, j, unit;
 
-	/*
-	 * Currently supports only single page.  Supporting multiple
-	 * pages won't be too difficult if it ever becomes necessary.
-	 */
-	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
+	pcpul_lpage_dump_cfg(KERN_DEBUG, static_size, reserved_size, dyn_size,
+			     unit_size, lpage_size, unit_map, nr_units);
 
-	pcpul_unit_size = lpage_size;
-	pcpul_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
-	if (pcpul_size > pcpul_unit_size) {
-		pr_warning("PERCPU: static data is larger than large page, "
-			   "can't use large page\n");
-		return -EINVAL;
-	}
+	BUG_ON(chunk_size % lpage_size);
+
+	pcpul_size = static_size + reserved_size + dyn_size;
+	pcpul_lpage_size = lpage_size;
+	pcpul_nr_lpages = chunk_size / lpage_size;
 
 	/* allocate pointer array and alloc large pages */
-	map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
+	map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]);
 	pcpul_map = alloc_bootmem(map_size);
 
-	for_each_possible_cpu(cpu) {
+	/* allocate all pages */
+	for (i = 0; i < pcpul_nr_lpages; i++) {
+		size_t offset = i * lpage_size;
+		int first_unit = offset / unit_size;
+		int last_unit = (offset + lpage_size - 1) / unit_size;
 		void *ptr;
 
+		/* find out which cpu is mapped to this unit */
+		for (unit = first_unit; unit <= last_unit; unit++)
+			if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
+				goto found;
+		continue;
+	found:
 		ptr = alloc_fn(cpu, lpage_size);
 		if (!ptr) {
 			pr_warning("PERCPU: failed to allocate large page "
@@ -1670,53 +1855,79 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
 			goto enomem;
 		}
 
-		/*
-		 * Only use pcpul_size bytes and give back the rest.
-		 *
-		 * Ingo: The lpage_size up-rounding bootmem is needed
-		 * to make sure the partial lpage is still fully RAM -
-		 * it's not well-specified to have a incompatible area
-		 * (unmapped RAM, device memory, etc.) in that hole.
-		 */
-		free_fn(ptr + pcpul_size, lpage_size - pcpul_size);
-
-		pcpul_map[cpu].cpu = cpu;
-		pcpul_map[cpu].ptr = ptr;
+		pcpul_map[i].ptr = ptr;
+	}
 
-		memcpy(ptr, __per_cpu_load, static_size);
+	/* return unused holes */
+	for (unit = 0; unit < nr_units; unit++) {
+		size_t start = unit * unit_size;
+		size_t end = start + unit_size;
+		size_t off, next;
+
+		/* don't free used part of occupied unit */
+		if (pcpul_unit_to_cpu(unit, unit_map, NULL))
+			start += pcpul_size;
+
+		/* unit can span more than one page, punch the holes */
+		for (off = start; off < end; off = next) {
+			void *ptr = pcpul_map[off / lpage_size].ptr;
+			next = min(roundup(off + 1, lpage_size), end);
+			if (ptr)
+				free_fn(ptr + off % lpage_size, next - off);
+		}
 	}
 
-	/* allocate address and map */
-	pcpul_vm.flags = VM_ALLOC;
-	pcpul_vm.size = num_possible_cpus() * pcpul_unit_size;
-	vm_area_register_early(&pcpul_vm, pcpul_unit_size);
+	/* allocate address, map and copy */
+	vm.flags = VM_ALLOC;
+	vm.size = chunk_size;
+	vm_area_register_early(&vm, unit_size);
+
+	for (i = 0; i < pcpul_nr_lpages; i++) {
+		if (!pcpul_map[i].ptr)
+			continue;
+		pcpul_map[i].map_addr = vm.addr + i * lpage_size;
+		map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr);
+	}
 
 	for_each_possible_cpu(cpu)
-		map_fn(pcpul_map[cpu].ptr, pcpul_unit_size,
-		       pcpul_vm.addr + cpu * pcpul_unit_size);
+		memcpy(vm.addr + unit_map[cpu] * unit_size, __per_cpu_load,
+		       static_size);
 
 	/* we're ready, commit */
 	pr_info("PERCPU: Remapped at %p with large pages, static data "
-		"%zu bytes\n", pcpul_vm.addr, static_size);
+		"%zu bytes\n", vm.addr, static_size);
 
 	ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
-				     pcpul_unit_size, pcpul_vm.addr, NULL);
-
-	/* sort pcpul_map array for pcpu_lpage_remapped() */
-	for (i = 0; i < num_possible_cpus() - 1; i++)
-		for (j = i + 1; j < num_possible_cpus(); j++)
-			if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
-				struct pcpul_ent tmp = pcpul_map[i];
-				pcpul_map[i] = pcpul_map[j];
-				pcpul_map[j] = tmp;
-			}
+				     unit_size, vm.addr, unit_map);
+
+	/*
+	 * Sort pcpul_map array for pcpu_lpage_remapped().  Unmapped
+	 * lpages are pushed to the end and trimmed.
+	 */
+	for (i = 0; i < pcpul_nr_lpages - 1; i++)
+		for (j = i + 1; j < pcpul_nr_lpages; j++) {
+			struct pcpul_ent tmp;
+
+			if (!pcpul_map[j].ptr)
+				continue;
+			if (pcpul_map[i].ptr &&
+			    pcpul_map[i].ptr < pcpul_map[j].ptr)
+				continue;
+
+			tmp = pcpul_map[i];
+			pcpul_map[i] = pcpul_map[j];
+			pcpul_map[j] = tmp;
+		}
+
+	while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr)
+		pcpul_nr_lpages--;
 
 	return ret;
 
 enomem:
-	for_each_possible_cpu(cpu)
-		if (pcpul_map[cpu].ptr)
-			free_fn(pcpul_map[cpu].ptr, pcpul_size);
+	for (i = 0; i < pcpul_nr_lpages; i++)
+		if (pcpul_map[i].ptr)
+			free_fn(pcpul_map[i].ptr, lpage_size);
 	free_bootmem(__pa(pcpul_map), map_size);
 	return -ENOMEM;
 }
@@ -1739,10 +1950,10 @@ enomem:
  */
 void *pcpu_lpage_remapped(void *kaddr)
 {
-	unsigned long unit_mask = pcpul_unit_size - 1;
-	void *lpage_addr = (void *)((unsigned long)kaddr & ~unit_mask);
-	unsigned long offset = (unsigned long)kaddr & unit_mask;
-	int left = 0, right = num_possible_cpus() - 1;
+	unsigned long lpage_mask = pcpul_lpage_size - 1;
+	void *lpage_addr = (void *)((unsigned long)kaddr & ~lpage_mask);
+	unsigned long offset = (unsigned long)kaddr & lpage_mask;
+	int left = 0, right = pcpul_nr_lpages - 1;
 	int pos;
 
 	/* pcpul in use at all? */
@@ -1757,13 +1968,8 @@ void *pcpu_lpage_remapped(void *kaddr)
 			left = pos + 1;
 		else if (pcpul_map[pos].ptr > lpage_addr)
 			right = pos - 1;
-		else {
-			/* it shouldn't be in the area for the first chunk */
-			WARN_ON(offset < pcpul_size);
-
-			return pcpul_vm.addr +
-				pcpul_map[pos].cpu * pcpul_unit_size + offset;
-		}
+		else
+			return pcpul_map[pos].map_addr + offset;
 	}
 
 	return NULL;
-- 
cgit v1.2.3


From 7a6d3c8b3049d07123628f2bf57127bba2cc878f Mon Sep 17 00:00:00 2001
From: Csaba Henk <csaba@gluster.com>
Date: Wed, 1 Jul 2009 17:28:41 -0700
Subject: fuse: make the number of max background requests and congestion
 threshold tunable

The practical values for these limits depend on the design of the
filesystem server so let userspace set them at initialization time.

Signed-off-by: Csaba Henk <csaba@gluster.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dev.c        | 10 +++++-----
 fs/fuse/fuse_i.h     | 12 ++++++------
 fs/fuse/inode.c      | 14 ++++++++++++++
 include/linux/fuse.h |  9 +++++++--
 4 files changed, 32 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f58ecbc416c8..b152761c1bf6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -250,7 +250,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
 
 static void flush_bg_queue(struct fuse_conn *fc)
 {
-	while (fc->active_background < FUSE_MAX_BACKGROUND &&
+	while (fc->active_background < fc->max_background &&
 	       !list_empty(&fc->bg_queue)) {
 		struct fuse_req *req;
 
@@ -280,11 +280,11 @@ __releases(&fc->lock)
 	list_del(&req->intr_entry);
 	req->state = FUSE_REQ_FINISHED;
 	if (req->background) {
-		if (fc->num_background == FUSE_MAX_BACKGROUND) {
+		if (fc->num_background == fc->max_background) {
 			fc->blocked = 0;
 			wake_up_all(&fc->blocked_waitq);
 		}
-		if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
+		if (fc->num_background == fc->congestion_threshold &&
 		    fc->connected && fc->bdi_initialized) {
 			clear_bdi_congested(&fc->bdi, READ);
 			clear_bdi_congested(&fc->bdi, WRITE);
@@ -410,9 +410,9 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 {
 	req->background = 1;
 	fc->num_background++;
-	if (fc->num_background == FUSE_MAX_BACKGROUND)
+	if (fc->num_background == fc->max_background)
 		fc->blocked = 1;
-	if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
+	if (fc->num_background == fc->congestion_threshold &&
 	    fc->bdi_initialized) {
 		set_bdi_congested(&fc->bdi, READ);
 		set_bdi_congested(&fc->bdi, WRITE);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 52b641fc0faf..6bcfab04396f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -25,12 +25,6 @@
 /** Max number of pages that can be used in a single read request */
 #define FUSE_MAX_PAGES_PER_REQ 32
 
-/** Maximum number of outstanding background requests */
-#define FUSE_MAX_BACKGROUND 12
-
-/** Congestion starts at 75% of maximum */
-#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100)
-
 /** Bias for fi->writectr, meaning new writepages must not be sent */
 #define FUSE_NOWRITE INT_MIN
 
@@ -349,6 +343,12 @@ struct fuse_conn {
 	/** rbtree of fuse_files waiting for poll events indexed by ph */
 	struct rb_root polled_files;
 
+	/** Maximum number of outstanding background requests */
+	unsigned max_background;
+
+	/** Number of background requests at which congestion starts */
+	unsigned congestion_threshold;
+
 	/** Number of requests currently in the background */
 	unsigned num_background;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f91ccc4a189d..9aa6f46d0c32 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -32,6 +32,12 @@ DEFINE_MUTEX(fuse_mutex);
 
 #define FUSE_DEFAULT_BLKSIZE 512
 
+/** Maximum number of outstanding background requests */
+#define FUSE_DEFAULT_MAX_BACKGROUND 12
+
+/** Congestion starts at 75% of maximum */
+#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
+
 struct fuse_mount_data {
 	int fd;
 	unsigned rootmode;
@@ -517,6 +523,8 @@ void fuse_conn_init(struct fuse_conn *fc)
 	INIT_LIST_HEAD(&fc->bg_queue);
 	INIT_LIST_HEAD(&fc->entry);
 	atomic_set(&fc->num_waiting, 0);
+	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
+	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
 	fc->khctr = 0;
 	fc->polled_files = RB_ROOT;
 	fc->reqctr = 0;
@@ -736,6 +744,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 	else {
 		unsigned long ra_pages;
 
+		if (arg->minor >= 13) {
+			if (arg->max_background)
+				fc->max_background = arg->max_background;
+			if (arg->congestion_threshold)
+				fc->congestion_threshold = arg->congestion_threshold;
+		}
 		if (arg->minor >= 6) {
 			ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
 			if (arg->flags & FUSE_ASYNC_READ)
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index cf593bf9fd32..b3700f0ac268 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -30,6 +30,10 @@
  *  - add umask flag to input argument of open, mknod and mkdir
  *  - add notification messages for invalidation of inodes and
  *    directory entries
+ *
+ * 7.13
+ *  - make max number of background requests and congestion threshold
+ *    tunables
  */
 
 #ifndef _LINUX_FUSE_H
@@ -41,7 +45,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 12
+#define FUSE_KERNEL_MINOR_VERSION 13
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -427,7 +431,8 @@ struct fuse_init_out {
 	__u32	minor;
 	__u32	max_readahead;
 	__u32	flags;
-	__u32	unused;
+	__u16   max_background;
+	__u16   congestion_threshold;
 	__u32	max_write;
 };
 
-- 
cgit v1.2.3


From 37d217f029a56a6d385f99773fb27dfcb51f9a46 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 8 Jul 2009 18:17:58 +0200
Subject: fuse: document protocol version negotiation

Clarify how the protocol version should be negotiated between kernel
and userspace.  Notably libfuse didn't correctly handle the case when
the supported major versions didn't match.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 include/linux/fuse.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index b3700f0ac268..3e2925a34bf0 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -41,6 +41,26 @@
 
 #include <linux/types.h>
 
+/*
+ * Version negotiation:
+ *
+ * Both the kernel and userspace send the version they support in the
+ * INIT request and reply respectively.
+ *
+ * If the major versions match then both shall use the smallest
+ * of the two minor versions for communication.
+ *
+ * If the kernel supports a larger major version, then userspace shall
+ * reply with the major version it supports, ignore the rest of the
+ * INIT message and expect a new INIT message from the kernel with a
+ * matching major version.
+ *
+ * If the library supports a larger major version, then it shall fall
+ * back to the major protocol version sent by the kernel for
+ * communication and reply with that major version (and an arbitrary
+ * supported minor version).
+ */
+
 /** Version number of this interface */
 #define FUSE_KERNEL_VERSION 7
 
-- 
cgit v1.2.3


From 023bf6f1b8bf58dc4da7f0dc1cf4787b0d5297c1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 9 Jul 2009 11:27:40 +0900
Subject: linker script: unify usage of discard definition

Discarded sections in different archs share some commonality but have
considerable differences.  This led to linker script for each arch
implementing its own /DISCARD/ definition, which makes maintaining
tedious and adding new entries error-prone.

This patch makes all linker scripts to move discard definitions to the
end of the linker script and use the common DISCARDS macro.  As ld
uses the first matching section definition, archs can include default
discarded sections by including them earlier in the linker script.

ia64 is notable because it first throws away some ia64 specific
subsections and then include the rest of the sections into the final
image, so those sections must be discarded before the inclusion.

defconfig compile tested for x86, x86-64, powerpc, powerpc64, ia64,
alpha, sparc, sparc64 and s390.  Michal Simek tested microblaze.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Tested-by: Michal Simek <monstr@monstr.eu>
Cc: linux-arch@vger.kernel.org
Cc: Michal Simek <monstr@monstr.eu>
Cc: microblaze-uclinux@itee.uq.edu.au
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Tony Luck <tony.luck@intel.com>
---
 arch/alpha/kernel/vmlinux.lds.S      | 10 ++--------
 arch/avr32/kernel/vmlinux.lds.S      | 10 +++-------
 arch/blackfin/kernel/vmlinux.lds.S   |  6 +-----
 arch/cris/kernel/vmlinux.lds.S       | 10 ++--------
 arch/frv/kernel/vmlinux.lds.S        |  2 +-
 arch/h8300/kernel/vmlinux.lds.S      |  6 ++----
 arch/ia64/kernel/vmlinux.lds.S       | 17 ++++++++---------
 arch/m32r/kernel/vmlinux.lds.S       | 11 +++--------
 arch/m68k/kernel/vmlinux-std.lds     | 11 +++--------
 arch/m68k/kernel/vmlinux-sun3.lds    | 10 ++--------
 arch/m68knommu/kernel/vmlinux.lds.S  |  8 +-------
 arch/microblaze/kernel/vmlinux.lds.S |  2 +-
 arch/mips/kernel/vmlinux.lds.S       | 22 ++++++++++------------
 arch/mn10300/kernel/vmlinux.lds.S    |  9 +++------
 arch/parisc/kernel/vmlinux.lds.S     |  9 ++++-----
 arch/powerpc/kernel/vmlinux.lds.S    | 10 +++-------
 arch/s390/kernel/vmlinux.lds.S       | 10 +++-------
 arch/sh/kernel/vmlinux.lds.S         | 11 ++++-------
 arch/sparc/kernel/vmlinux.lds.S      |  9 ++-------
 arch/um/include/asm/common.lds.S     |  5 -----
 arch/um/kernel/dyn.lds.S             |  2 +-
 arch/um/kernel/uml.lds.S             |  2 +-
 arch/x86/kernel/vmlinux.lds.S        | 11 ++++-------
 arch/xtensa/kernel/vmlinux.lds.S     | 14 ++++----------
 include/asm-generic/vmlinux.lds.h    | 18 ++++++++++++------
 25 files changed, 80 insertions(+), 155 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index 75fe1d6877e9..6dc03c35caa0 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -134,14 +134,6 @@ SECTIONS
 	__bss_stop = .;
 	_end = .;
 
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		EXIT_TEXT
-		EXIT_DATA
-		*(.exitcall.exit)
-		*(.discard)
-	}
-
 	.mdebug 0 : {
 		*(.mdebug)
 	}
@@ -151,4 +143,6 @@ SECTIONS
 
 	STABS_DEBUG
 	DWARF_DEBUG
+
+	DISCARDS
 }
diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S
index b8324608ec0c..c4b56654349a 100644
--- a/arch/avr32/kernel/vmlinux.lds.S
+++ b/arch/avr32/kernel/vmlinux.lds.S
@@ -124,15 +124,11 @@ SECTIONS
 		_end = .;
 	}
 
+	DWARF_DEBUG
+
 	/* When something in the kernel is NOT compiled as a module, the module
 	 * cleanup code and data are put into these segments. Both can then be
 	 * thrown away, as cleanup code is never called unless it's a module.
 	 */
-	/DISCARD/       	: {
-		EXIT_DATA
-		*(.exitcall.exit)
-		*(.discard)
-	}
-
-	DWARF_DEBUG
+	DISCARDS
 }
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index 6e8eabd8f0a6..d7ffe299b979 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -277,9 +277,5 @@ SECTIONS
 
 	DWARF_DEBUG
 
-	/DISCARD/ :
-	{
-		*(.exitcall.exit)
-		*(.discard)
-	}
+	DISCARDS
 }
diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S
index a3175ebb38cc..6c81836b9229 100644
--- a/arch/cris/kernel/vmlinux.lds.S
+++ b/arch/cris/kernel/vmlinux.lds.S
@@ -140,13 +140,7 @@ SECTIONS
 	_end = .;
 	__end = .;
 
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		EXIT_TEXT
-		EXIT_DATA
-		*(.exitcall.exit)
-		*(.discard)
-        }
-
 	dram_end = dram_start + (CONFIG_ETRAX_DRAM_SIZE - __CONFIG_ETRAX_VMEM_SIZE)*1024*1024;
+
+	DISCARDS
 }
diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S
index 64b5a5e4d35e..7dbf41f68b52 100644
--- a/arch/frv/kernel/vmlinux.lds.S
+++ b/arch/frv/kernel/vmlinux.lds.S
@@ -178,7 +178,7 @@ SECTIONS
 
   .comment 0 : { *(.comment) }
 
-  /DISCARD/ : { *(.discard) }
+  DISCARDS
 }
 
 __kernel_image_size_no_bss = __bss_start - __kernel_image_start;
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index 03d6c0df33db..662b02ecb86e 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -152,10 +152,6 @@ SECTIONS
 	__end = . ;
 	__ramstart = .;
 	}
-	/DISCARD/ : {
-		*(.exitcall.exit)
-		*(.discard)
-	}
         .romfs :	
 	{
 		*(.romfs*)
@@ -166,4 +162,6 @@ SECTIONS
 	COMMAND_START = . - 0x200 ;
 	__ramend = . ;
 	}
+
+	DISCARDS
 }
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 13d958975874..eb4214d1c5af 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -24,15 +24,14 @@ PHDRS {
 }
 SECTIONS
 {
-  /* Sections to be discarded */
+  /* unwind exit sections must be discarded before the rest of the
+     sections get included. */
   /DISCARD/ : {
-	EXIT_TEXT
-	EXIT_DATA
-	*(.exitcall.exit)
-	*(.discard)
 	*(.IA_64.unwind.exit.text)
 	*(.IA_64.unwind_info.exit.text)
-	}
+	*(.comment)
+	*(.note)
+  }
 
   v = PAGE_OFFSET;	/* this symbol is here to make debugging easier... */
   phys_start = _start - LOAD_OFFSET;
@@ -317,7 +316,7 @@ SECTIONS
   .debug_funcnames 0 : { *(.debug_funcnames) }
   .debug_typenames 0 : { *(.debug_typenames) }
   .debug_varnames  0 : { *(.debug_varnames) }
-  /* These must appear regardless of  .  */
-  /DISCARD/ : { *(.comment) }
-  /DISCARD/ : { *(.note) }
+
+  /* Default discards */
+  DISCARDS
 }
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index 480a49944cfd..de5e21cca6a5 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -120,14 +120,6 @@ SECTIONS
 
   _end = . ;
 
-  /* Sections to be discarded */
-  /DISCARD/ : {
-	EXIT_TEXT
-	EXIT_DATA
-	*(.exitcall.exit)
-	*(.discard)
-	}
-
   /* Stabs debugging sections.  */
   .stab 0 : { *(.stab) }
   .stabstr 0 : { *(.stabstr) }
@@ -136,4 +128,7 @@ SECTIONS
   .stab.index 0 : { *(.stab.index) }
   .stab.indexstr 0 : { *(.stab.indexstr) }
   .comment 0 : { *(.comment) }
+
+  /* Sections to be discarded */
+  DISCARDS
 }
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 905a797ada93..47eac19e8f61 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -82,14 +82,6 @@ SECTIONS
 
   _end = . ;
 
-  /* Sections to be discarded */
-  /DISCARD/ : {
-	EXIT_TEXT
-	EXIT_DATA
-	*(.exitcall.exit)
-	*(.discard)
-	}
-
   /* Stabs debugging sections.  */
   .stab 0 : { *(.stab) }
   .stabstr 0 : { *(.stabstr) }
@@ -98,4 +90,7 @@ SECTIONS
   .stab.index 0 : { *(.stab.index) }
   .stab.indexstr 0 : { *(.stab.indexstr) }
   .comment 0 : { *(.comment) }
+
+  /* Sections to be discarded */
+  DISCARDS
 }
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index 47d04be322aa..03efaf04d7d7 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -77,14 +77,6 @@ __init_begin = .;
 
   _end = . ;
 
-  /* Sections to be discarded */
-  /DISCARD/ : {
-	EXIT_TEXT
-	EXIT_DATA
-	*(.exitcall.exit)
-	*(.discard)
-	}
-
   .crap : {
 	/* Stabs debugging sections.  */
 	*(.stab)
@@ -97,4 +89,6 @@ __init_begin = .;
 	*(.note)
   }
 
+  /* Sections to be discarded */
+  DISCARDS
 }
diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S
index 68111a61a77f..2736a5e309c0 100644
--- a/arch/m68knommu/kernel/vmlinux.lds.S
+++ b/arch/m68knommu/kernel/vmlinux.lds.S
@@ -184,13 +184,6 @@ SECTIONS {
 		__init_end = .;
 	} > INIT
 
-	/DISCARD/ : {
-		EXIT_TEXT
-		EXIT_DATA
-		*(.exitcall.exit)
-		*(.discard)
-	}
-
 	.bss : {
 		. = ALIGN(4);
 		_sbss = . ;
@@ -201,5 +194,6 @@ SECTIONS {
 	 	_end = . ;
 	} > BSS
 
+	DISCARDS
 }
 
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index 81bebdcb18fe..ec5fa91a48d8 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -163,5 +163,5 @@ SECTIONS {
 	. = ALIGN(4096);
 	_end = .;
 
-	/DISCARD/ : { *(.discard) }
+	DISCARDS
 }
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 45901609b741..1474c18fb777 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -176,18 +176,6 @@ SECTIONS
 
 	_end = . ;
 
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		*(.exitcall.exit)
-		*(.discard)
-
-		/* ABI crap starts here */
-		*(.MIPS.options)
-		*(.options)
-		*(.pdr)
-		*(.reginfo)
-	}
-
 	/* These mark the ABI of the kernel for debuggers.  */
 	.mdebug.abi32 : {
 		KEEP(*(.mdebug.abi32))
@@ -213,4 +201,14 @@ SECTIONS
 		*(.gptab.bss)
 		*(.gptab.sbss)
 	}
+
+	/* Sections to be discarded */
+	DISCARDS
+	/DISCARD/ : {
+		/* ABI crap starts here */
+		*(.MIPS.options)
+		*(.options)
+		*(.pdr)
+		*(.reginfo)
+	}
 }
diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S
index 5609d4962a55..8fcd0f1e21de 100644
--- a/arch/mn10300/kernel/vmlinux.lds.S
+++ b/arch/mn10300/kernel/vmlinux.lds.S
@@ -115,13 +115,10 @@ SECTIONS
   . = ALIGN(PAGE_SIZE);
   pg0 = .;
 
-  /* Sections to be discarded */
-  /DISCARD/ : {
-	EXIT_CALL
-	*(.discard)
-	}
-
   STABS_DEBUG
 
   DWARF_DEBUG
+
+  /* Sections to be discarded */
+  DISCARDS
 }
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index ccf58341845a..aea1784edbd1 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -237,10 +237,12 @@ SECTIONS
 	/* freed after init ends here */
 	_end = . ;
 
+	STABS_DEBUG
+	.note 0 : { *(.note) }
+
 	/* Sections to be discarded */
+	DISCARDS
 	/DISCARD/ : {
-		*(.exitcall.exit)
-		*(.discard)
 #ifdef CONFIG_64BIT
 		/* temporary hack until binutils is fixed to not emit these
 	 	 * for static binaries
@@ -253,7 +255,4 @@ SECTIONS
 		*(.gnu.hash)
 #endif
 	}
-
-	STABS_DEBUG
-	.note 0 : { *(.note) }	
 }
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 7fca9355fd3d..244e3658983c 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -37,13 +37,6 @@ jiffies = jiffies_64 + 4;
 #endif
 SECTIONS
 {
-	/* Sections to be discarded. */
-	/DISCARD/ : {
-	*(.exitcall.exit)
-	*(.discard)
-	EXIT_DATA
-	}
-
 	. = KERNELBASE;
 
 /*
@@ -299,4 +292,7 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	_end = . ;
 	PROVIDE32 (end = .);
+
+	/* Sections to be discarded. */
+	DISCARDS
 }
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 98867dfea469..82415c75b996 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -157,14 +157,10 @@ SECTIONS
 
 	_end = . ;
 
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		EXIT_DATA
-		*(.exitcall.exit)
-		*(.discard)
-	}
-
 	/* Debugging sections.	*/
 	STABS_DEBUG
 	DWARF_DEBUG
+
+	/* Sections to be discarded */
+	DISCARDS
 }
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index 766976d27b21..0ce254bca92f 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -163,17 +163,14 @@ SECTIONS
 		_end = . ;
 	}
 
+	STABS_DEBUG
+	DWARF_DEBUG
+
 	/*
 	 * When something in the kernel is NOT compiled as a module, the
 	 * module cleanup code and data are put into these segments. Both
 	 * can then be thrown away, as cleanup code is never called unless
 	 * it's a module.
 	 */
-	/DISCARD/ : {
-		*(.exitcall.exit)
-		*(.discard)
-	}
-
-	STABS_DEBUG
-	DWARF_DEBUG
+	DISCARDS
 }
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index d63cf914667d..866390feb683 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -171,13 +171,8 @@ SECTIONS
 	}
 	_end = . ;
 
-	/DISCARD/ : {
-		EXIT_TEXT
-		EXIT_DATA
-		*(.exitcall.exit)
-		*(.discard)
-	}
-
 	STABS_DEBUG
 	DWARF_DEBUG
+
+	DISCARDS
 }
diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
index cb0248616d49..37ecc5577a9a 100644
--- a/arch/um/include/asm/common.lds.S
+++ b/arch/um/include/asm/common.lds.S
@@ -123,8 +123,3 @@
 	__initramfs_end = .;
   }
 
-  /* Sections to be discarded */
-  /DISCARD/ : {
- 	*(.exitcall.exit)
-  }
-
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index 2916d6eadffd..715a188c0472 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -157,5 +157,5 @@ SECTIONS
 
   DWARF_DEBUG
 
-  /DISCARD/	: { *(.discard) }
+  DISCARDS
 }
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 1f8a622cabe1..2ebd39765db8 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -101,5 +101,5 @@ SECTIONS
 
   DWARF_DEBUG
 
-  /DISCARD/	: { *(.discard) }
+  DISCARDS
 }
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 367e87882041..b600c843710b 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -387,15 +387,12 @@ SECTIONS
 		_end = .;
 	}
 
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		*(.exitcall.exit)
-		*(.eh_frame)
-		*(.discard)
-	}
-
         STABS_DEBUG
         DWARF_DEBUG
+
+	/* Sections to be discarded */
+	DISCARDS
+	/DISCARD/ : { *(.eh_frame) }
 }
 
 
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index b1e24638acd7..921b6ff3b645 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -280,16 +280,6 @@ SECTIONS
     *(.ResetVector.text)
   }
 
-  /* Sections to be discarded */
-  /DISCARD/ :
-  {
-	*(.exit.literal)
-	EXIT_TEXT
-	EXIT_DATA
-        *(.exitcall.exit)
-	*(.discard)
-  }
-
   .xt.lit : { *(.xt.lit) }
   .xt.prop : { *(.xt.prop) }
 
@@ -322,4 +312,8 @@ SECTIONS
     *(.xt.lit)
     *(.gnu.linkonce.p*)
   }
+
+  /* Sections to be discarded */
+  DISCARDS
+  /DISCARD/ : { *(.exit.literal) }
 }
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c5c18ac878ab..ab8ea9b7741e 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -35,13 +35,10 @@
  *	__bss_stop = .;
  *	_end = .;
  *
- *	/DISCARD/ : {
- *		EXIT_TEXT
- *		EXIT_DATA
- *		EXIT_CALL
- *	}
  *	STABS_DEBUG
  *	DWARF_DEBUG
+ *
+ *	DISCARDS		// must be the last
  * }
  *
  * [__init_begin, __init_end] is the init section that may be freed after init
@@ -629,11 +626,20 @@
 #define INIT_RAM_FS
 #endif
 
+/*
+ * Default discarded sections.
+ *
+ * Some archs want to discard exit text/data at runtime rather than
+ * link time due to cross-section references such as alt instructions,
+ * bug table, eh_frame, etc.  DISCARDS must be the last of output
+ * section definitions so that such archs put those in earlier section
+ * definitions.
+ */
 #define DISCARDS							\
 	/DISCARD/ : {							\
 	EXIT_TEXT							\
 	EXIT_DATA							\
-	*(.exitcall.exit)						\
+	EXIT_CALL							\
 	*(.discard)							\
 	}
 
-- 
cgit v1.2.3


From 5a421ce3c062a87db0a9e7f2a0a7ee0a5b869aab Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Fri, 10 Jul 2009 12:37:40 +0300
Subject: nfsd41: gather and report statistics also for v4.1 ops

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfs4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index bd2eba530667..aff924a24abb 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -234,7 +234,7 @@ enum nfs_opnum4 {
 Needs to be updated if more operations are defined in future.*/
 
 #define FIRST_NFS4_OP	OP_ACCESS
-#define LAST_NFS4_OP 	OP_RELEASE_LOCKOWNER
+#define LAST_NFS4_OP 	OP_RECLAIM_COMPLETE
 
 enum nfsstat4 {
 	NFS4_OK = 0,
-- 
cgit v1.2.3


From 4bd9b0f4afc76cf972578c702e1bc1b6f2d10ba5 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 24 Jun 2009 15:37:45 -0400
Subject: nfsd41: use globals for DRC limits

The version 4.1 DRC memory limit and tracking variables are server wide and
session specific. Replace struct svc_serv fields with globals.
Stop using the svc_serv sv_lock.

Add a spinlock to serialize access to the DRC limit management variables which
change on session creation and deletion (usage counter) or (future)
administrative action to adjust the total DRC memory limit.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 fs/nfsd/nfs4state.c        | 10 +++++-----
 fs/nfsd/nfssvc.c           | 19 +++++++++++++++----
 include/linux/nfsd/nfsd.h  |  3 +++
 include/linux/sunrpc/svc.h |  2 --
 4 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 980a216a48c8..2e6a44e3d2fe 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -430,11 +430,11 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
 	else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
 		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
 
-	spin_lock(&nfsd_serv->sv_lock);
-	if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages)
-		np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used;
-	nfsd_serv->sv_drc_pages_used += np;
-	spin_unlock(&nfsd_serv->sv_lock);
+	spin_lock(&nfsd_drc_lock);
+	if (np + nfsd_drc_pages_used > nfsd_drc_max_pages)
+		np = nfsd_drc_max_pages - nfsd_drc_pages_used;
+	nfsd_drc_pages_used += np;
+	spin_unlock(&nfsd_drc_lock);
 
 	if (np <= 0) {
 		status = nfserr_resource;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d4c9884cd54b..78d8fcd883fb 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -67,6 +67,16 @@ struct timeval			nfssvc_boot;
 DEFINE_MUTEX(nfsd_mutex);
 struct svc_serv 		*nfsd_serv;
 
+/*
+ * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
+ * nfsd_drc_max_pages limits the total amount of memory available for
+ * version 4.1 DRC caches.
+ * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
+ */
+spinlock_t	nfsd_drc_lock;
+unsigned int	nfsd_drc_max_pages;
+unsigned int	nfsd_drc_pages_used;
+
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
 static struct svc_version *	nfsd_acl_version[] = {
@@ -238,11 +248,12 @@ static void set_max_drc(void)
 {
 	/* The percent of nr_free_buffer_pages used by the V4.1 server DRC */
 	#define NFSD_DRC_SIZE_SHIFT	7
-	nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages()
+	nfsd_drc_max_pages = nr_free_buffer_pages()
 						>> NFSD_DRC_SIZE_SHIFT;
-	nfsd_serv->sv_drc_pages_used = 0;
-	dprintk("%s svc_drc_max_pages %u\n", __func__,
-		nfsd_serv->sv_drc_max_pages);
+	nfsd_drc_pages_used = 0;
+	spin_lock_init(&nfsd_drc_lock);
+	dprintk("%s nfsd_drc_max_pages %u\n", __func__,
+		nfsd_drc_max_pages);
 }
 
 int nfsd_create_serv(void)
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 2b49d676d0c9..2571f856908f 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -56,6 +56,9 @@ extern struct svc_version	nfsd_version2, nfsd_version3,
 extern u32			nfsd_supported_minorversion;
 extern struct mutex		nfsd_mutex;
 extern struct svc_serv		*nfsd_serv;
+extern spinlock_t		nfsd_drc_lock;
+extern unsigned int		nfsd_drc_max_pages;
+extern unsigned int		nfsd_drc_pages_used;
 
 extern struct seq_operations nfs_exports_op;
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index ea8009695c69..52e8cb0a7569 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -94,8 +94,6 @@ struct svc_serv {
 	struct module *		sv_module;	/* optional module to count when
 						 * adding threads */
 	svc_thread_fn		sv_function;	/* main function for threads */
-	unsigned int		sv_drc_max_pages; /* Total pages for DRC */
-	unsigned int		sv_drc_pages_used;/* DRC pages used */
 #if defined(CONFIG_NFS_V4_1)
 	struct list_head	sv_cb_list;	/* queue for callback requests
 						 * that arrive over the same
-- 
cgit v1.2.3


From d782c3f95c9263dc0b98e7115f75f1e18b9600b3 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Mon, 22 Jun 2009 13:17:08 +0800
Subject: drm/mode: add the CVT algorithm in kernel space

Add the CVT algorithm in kernel space. And this function can be called to
generate the required modeline.

I copied it from the file of xserver/hw/xfree86/modes/xf86cvt.c. What I have
done is to translate it by using integer calculation. This is to avoid
the float-point calculation in kernel space.

[airlied:- cleaned up some bits]
Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/drm_modes.c | 219 ++++++++++++++++++++++++++++++++++++++++++++
 include/drm/drm_crtc.h      |   3 +
 2 files changed, 222 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 54f492a488a9..0dbc7e4f8643 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -8,6 +8,7 @@
  * Copyright © 2007 Dave Airlie
  * Copyright © 2007-2008 Intel Corporation
  *   Jesse Barnes <jesse.barnes@intel.com>
+ * Copyright 2005-2006 Luc Verhaegen
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -61,6 +62,224 @@ void drm_mode_debug_printmodeline(struct drm_display_mode *mode)
 }
 EXPORT_SYMBOL(drm_mode_debug_printmodeline);
 
+/**
+ * drm_cvt_mode -create a modeline based on CVT algorithm
+ * @dev: DRM device
+ * @hdisplay: hdisplay size
+ * @vdisplay: vdisplay size
+ * @vrefresh  : vrefresh rate
+ * @reduced : Whether the GTF calculation is simplified
+ * @interlaced:Whether the interlace is supported
+ *
+ * LOCKING:
+ * none.
+ *
+ * return the modeline based on CVT algorithm
+ *
+ * This function is called to generate the modeline based on CVT algorithm
+ * according to the hdisplay, vdisplay, vrefresh.
+ * It is based from the VESA(TM) Coordinated Video Timing Generator by
+ * Graham Loveridge April 9, 2003 available at
+ * http://www.vesa.org/public/CVT/CVTd6r1.xls
+ *
+ * And it is copied from xf86CVTmode in xserver/hw/xfree86/modes/xf86cvt.c.
+ * What I have done is to translate it by using integer calculation.
+ */
+#define HV_FACTOR			1000
+struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay,
+				      int vdisplay, int vrefresh,
+				      bool reduced, bool interlaced)
+{
+	/* 1) top/bottom margin size (% of height) - default: 1.8, */
+#define	CVT_MARGIN_PERCENTAGE		18
+	/* 2) character cell horizontal granularity (pixels) - default 8 */
+#define	CVT_H_GRANULARITY		8
+	/* 3) Minimum vertical porch (lines) - default 3 */
+#define	CVT_MIN_V_PORCH			3
+	/* 4) Minimum number of vertical back porch lines - default 6 */
+#define	CVT_MIN_V_BPORCH		6
+	/* Pixel Clock step (kHz) */
+#define CVT_CLOCK_STEP			250
+	struct drm_display_mode *drm_mode;
+	bool margins = false;
+	unsigned int vfieldrate, hperiod;
+	int hdisplay_rnd, hmargin, vdisplay_rnd, vmargin, vsync;
+	int interlace;
+
+	/* allocate the drm_display_mode structure. If failure, we will
+	 * return directly
+	 */
+	drm_mode = drm_mode_create(dev);
+	if (!drm_mode)
+		return NULL;
+
+	/* the CVT default refresh rate is 60Hz */
+	if (!vrefresh)
+		vrefresh = 60;
+
+	/* the required field fresh rate */
+	if (interlaced)
+		vfieldrate = vrefresh * 2;
+	else
+		vfieldrate = vrefresh;
+
+	/* horizontal pixels */
+	hdisplay_rnd = hdisplay - (hdisplay % CVT_H_GRANULARITY);
+
+	/* determine the left&right borders */
+	hmargin = 0;
+	if (margins) {
+		hmargin = hdisplay_rnd * CVT_MARGIN_PERCENTAGE / 1000;
+		hmargin -= hmargin % CVT_H_GRANULARITY;
+	}
+	/* find the total active pixels */
+	drm_mode->hdisplay = hdisplay_rnd + 2 * hmargin;
+
+	/* find the number of lines per field */
+	if (interlaced)
+		vdisplay_rnd = vdisplay / 2;
+	else
+		vdisplay_rnd = vdisplay;
+
+	/* find the top & bottom borders */
+	vmargin = 0;
+	if (margins)
+		vmargin = vdisplay_rnd * CVT_MARGIN_PERCENTAGE / 1000;
+
+	drm_mode->vdisplay = vdisplay_rnd + 2 * vmargin;
+
+	/* Interlaced */
+	if (interlaced)
+		interlace = 1;
+	else
+		interlace = 0;
+
+	/* Determine VSync Width from aspect ratio */
+	if (!(vdisplay % 3) && ((vdisplay * 4 / 3) == hdisplay))
+		vsync = 4;
+	else if (!(vdisplay % 9) && ((vdisplay * 16 / 9) == hdisplay))
+		vsync = 5;
+	else if (!(vdisplay % 10) && ((vdisplay * 16 / 10) == hdisplay))
+		vsync = 6;
+	else if (!(vdisplay % 4) && ((vdisplay * 5 / 4) == hdisplay))
+		vsync = 7;
+	else if (!(vdisplay % 9) && ((vdisplay * 15 / 9) == hdisplay))
+		vsync = 7;
+	else /* custom */
+		vsync = 10;
+
+	if (!reduced) {
+		/* simplify the GTF calculation */
+		/* 4) Minimum time of vertical sync + back porch interval (µs)
+		 * default 550.0
+		 */
+		int tmp1, tmp2;
+#define CVT_MIN_VSYNC_BP	550
+		/* 3) Nominal HSync width (% of line period) - default 8 */
+#define CVT_HSYNC_PERCENTAGE	8
+		unsigned int hblank_percentage;
+		int vsyncandback_porch, vback_porch, hblank;
+
+		/* estimated the horizontal period */
+		tmp1 = HV_FACTOR * 1000000  -
+				CVT_MIN_VSYNC_BP * HV_FACTOR * vfieldrate;
+		tmp2 = (vdisplay_rnd + 2 * vmargin + CVT_MIN_V_PORCH) * 2 +
+				interlace;
+		hperiod = tmp1 * 2 / (tmp2 * vfieldrate);
+
+		tmp1 = CVT_MIN_VSYNC_BP * HV_FACTOR / hperiod + 1;
+		/* 9. Find number of lines in sync + backporch */
+		if (tmp1 < (vsync + CVT_MIN_V_PORCH))
+			vsyncandback_porch = vsync + CVT_MIN_V_PORCH;
+		else
+			vsyncandback_porch = tmp1;
+		/* 10. Find number of lines in back porch */
+		vback_porch = vsyncandback_porch - vsync;
+		drm_mode->vtotal = vdisplay_rnd + 2 * vmargin +
+				vsyncandback_porch + CVT_MIN_V_PORCH;
+		/* 5) Definition of Horizontal blanking time limitation */
+		/* Gradient (%/kHz) - default 600 */
+#define CVT_M_FACTOR	600
+		/* Offset (%) - default 40 */
+#define CVT_C_FACTOR	40
+		/* Blanking time scaling factor - default 128 */
+#define CVT_K_FACTOR	128
+		/* Scaling factor weighting - default 20 */
+#define CVT_J_FACTOR	20
+#define CVT_M_PRIME	(CVT_M_FACTOR * CVT_K_FACTOR / 256)
+#define CVT_C_PRIME	((CVT_C_FACTOR - CVT_J_FACTOR) * CVT_K_FACTOR / 256 + \
+			 CVT_J_FACTOR)
+		/* 12. Find ideal blanking duty cycle from formula */
+		hblank_percentage = CVT_C_PRIME * HV_FACTOR - CVT_M_PRIME *
+					hperiod / 1000;
+		/* 13. Blanking time */
+		if (hblank_percentage < 20 * HV_FACTOR)
+			hblank_percentage = 20 * HV_FACTOR;
+		hblank = drm_mode->hdisplay * hblank_percentage /
+			 (100 * HV_FACTOR - hblank_percentage);
+		hblank -= hblank % (2 * CVT_H_GRANULARITY);
+		/* 14. find the total pixes per line */
+		drm_mode->htotal = drm_mode->hdisplay + hblank;
+		drm_mode->hsync_end = drm_mode->hdisplay + hblank / 2;
+		drm_mode->hsync_start = drm_mode->hsync_end -
+			(drm_mode->htotal * CVT_HSYNC_PERCENTAGE) / 100;
+		drm_mode->hsync_start += CVT_H_GRANULARITY -
+			drm_mode->hsync_start % CVT_H_GRANULARITY;
+		/* fill the Vsync values */
+		drm_mode->vsync_start = drm_mode->vdisplay + CVT_MIN_V_PORCH;
+		drm_mode->vsync_end = drm_mode->vsync_start + vsync;
+	} else {
+		/* Reduced blanking */
+		/* Minimum vertical blanking interval time (µs)- default 460 */
+#define CVT_RB_MIN_VBLANK	460
+		/* Fixed number of clocks for horizontal sync */
+#define CVT_RB_H_SYNC		32
+		/* Fixed number of clocks for horizontal blanking */
+#define CVT_RB_H_BLANK		160
+		/* Fixed number of lines for vertical front porch - default 3*/
+#define CVT_RB_VFPORCH		3
+		int vbilines;
+		int tmp1, tmp2;
+		/* 8. Estimate Horizontal period. */
+		tmp1 = HV_FACTOR * 1000000 -
+			CVT_RB_MIN_VBLANK * HV_FACTOR * vfieldrate;
+		tmp2 = vdisplay_rnd + 2 * vmargin;
+		hperiod = tmp1 / (tmp2 * vfieldrate);
+		/* 9. Find number of lines in vertical blanking */
+		vbilines = CVT_RB_MIN_VBLANK * HV_FACTOR / hperiod + 1;
+		/* 10. Check if vertical blanking is sufficient */
+		if (vbilines < (CVT_RB_VFPORCH + vsync + CVT_MIN_V_BPORCH))
+			vbilines = CVT_RB_VFPORCH + vsync + CVT_MIN_V_BPORCH;
+		/* 11. Find total number of lines in vertical field */
+		drm_mode->vtotal = vdisplay_rnd + 2 * vmargin + vbilines;
+		/* 12. Find total number of pixels in a line */
+		drm_mode->htotal = drm_mode->hdisplay + CVT_RB_H_BLANK;
+		/* Fill in HSync values */
+		drm_mode->hsync_end = drm_mode->hdisplay + CVT_RB_H_BLANK / 2;
+		drm_mode->hsync_start = drm_mode->hsync_end = CVT_RB_H_SYNC;
+	}
+	/* 15/13. Find pixel clock frequency (kHz for xf86) */
+	drm_mode->clock = drm_mode->htotal * HV_FACTOR * 1000 / hperiod;
+	drm_mode->clock -= drm_mode->clock % CVT_CLOCK_STEP;
+	/* 18/16. Find actual vertical frame frequency */
+	/* ignore - just set the mode flag for interlaced */
+	if (interlaced)
+		drm_mode->vtotal *= 2;
+	/* Fill the mode line name */
+	drm_mode_set_name(drm_mode);
+	if (reduced)
+		drm_mode->flags |= (DRM_MODE_FLAG_PHSYNC |
+					DRM_MODE_FLAG_NVSYNC);
+	else
+		drm_mode->flags |= (DRM_MODE_FLAG_PVSYNC |
+					DRM_MODE_FLAG_NHSYNC);
+	if (interlaced)
+		drm_mode->flags |= DRM_MODE_FLAG_INTERLACE;
+
+    return drm_mode;
+}
+EXPORT_SYMBOL(drm_cvt_mode);
+
 /**
  * drm_mode_set_name - set the name on a mode
  * @mode: name will be set in this mode
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 7300fb866767..820bc0977e5e 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -736,4 +736,7 @@ extern int drm_mode_gamma_get_ioctl(struct drm_device *dev,
 extern int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 				    void *data, struct drm_file *file_priv);
 extern bool drm_detect_hdmi_monitor(struct edid *edid);
+extern struct drm_display_mode *drm_cvt_mode(struct drm_device *dev,
+				int hdisplay, int vdisplay, int vrefresh,
+				bool reduced, bool interlaced);
 #endif /* __DRM_CRTC_H__ */
-- 
cgit v1.2.3


From 26bbdadad356ec02d33657858d91675f3e9aca94 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Mon, 22 Jun 2009 13:17:09 +0800
Subject: drm/mode: add the GTF algorithm in kernel space

Add the GTF algorithm in kernel space. And this function can be called to
generate the required modeline.

I copied it from the file of xserver/hw/xfree86/modes/xf86gtf.c. What I have
done is to translate it by using integer calculation. This is to avoid
the float-point calculation in kernel space.
At the same tie I also refer to the function of fb_get_mode in
drivers/video/fbmon.c

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/drm_modes.c | 197 ++++++++++++++++++++++++++++++++++++++++++++
 include/drm/drm_crtc.h      |   3 +
 2 files changed, 200 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 0dbc7e4f8643..fd489d76fbbc 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -9,6 +9,7 @@
  * Copyright © 2007-2008 Intel Corporation
  *   Jesse Barnes <jesse.barnes@intel.com>
  * Copyright 2005-2006 Luc Verhaegen
+ * Copyright (c) 2001, Andy Ritger  aritger@nvidia.com
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -280,6 +281,202 @@ struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay,
 }
 EXPORT_SYMBOL(drm_cvt_mode);
 
+/**
+ * drm_gtf_mode - create the modeline based on GTF algorithm
+ *
+ * @dev		:drm device
+ * @hdisplay	:hdisplay size
+ * @vdisplay	:vdisplay size
+ * @vrefresh	:vrefresh rate.
+ * @interlaced	:whether the interlace is supported
+ * @margins	:whether the margin is supported
+ *
+ * LOCKING.
+ * none.
+ *
+ * return the modeline based on GTF algorithm
+ *
+ * This function is to create the modeline based on the GTF algorithm.
+ * Generalized Timing Formula is derived from:
+ *	GTF Spreadsheet by Andy Morrish (1/5/97)
+ *	available at http://www.vesa.org
+ *
+ * And it is copied from the file of xserver/hw/xfree86/modes/xf86gtf.c.
+ * What I have done is to translate it by using integer calculation.
+ * I also refer to the function of fb_get_mode in the file of
+ * drivers/video/fbmon.c
+ */
+struct drm_display_mode *drm_gtf_mode(struct drm_device *dev, int hdisplay,
+				      int vdisplay, int vrefresh,
+				      bool interlaced, int margins)
+{
+	/* 1) top/bottom margin size (% of height) - default: 1.8, */
+#define	GTF_MARGIN_PERCENTAGE		18
+	/* 2) character cell horizontal granularity (pixels) - default 8 */
+#define	GTF_CELL_GRAN			8
+	/* 3) Minimum vertical porch (lines) - default 3 */
+#define	GTF_MIN_V_PORCH			1
+	/* width of vsync in lines */
+#define V_SYNC_RQD			3
+	/* width of hsync as % of total line */
+#define H_SYNC_PERCENT			8
+	/* min time of vsync + back porch (microsec) */
+#define MIN_VSYNC_PLUS_BP		550
+	/* blanking formula gradient */
+#define GTF_M				600
+	/* blanking formula offset */
+#define GTF_C				40
+	/* blanking formula scaling factor */
+#define GTF_K				128
+	/* blanking formula scaling factor */
+#define GTF_J				20
+	/* C' and M' are part of the Blanking Duty Cycle computation */
+#define GTF_C_PRIME		(((GTF_C - GTF_J) * GTF_K / 256) + GTF_J)
+#define GTF_M_PRIME		(GTF_K * GTF_M / 256)
+	struct drm_display_mode *drm_mode;
+	unsigned int hdisplay_rnd, vdisplay_rnd, vfieldrate_rqd;
+	int top_margin, bottom_margin;
+	int interlace;
+	unsigned int hfreq_est;
+	int vsync_plus_bp, vback_porch;
+	unsigned int vtotal_lines, vfieldrate_est, hperiod;
+	unsigned int vfield_rate, vframe_rate;
+	int left_margin, right_margin;
+	unsigned int total_active_pixels, ideal_duty_cycle;
+	unsigned int hblank, total_pixels, pixel_freq;
+	int hsync, hfront_porch, vodd_front_porch_lines;
+	unsigned int tmp1, tmp2;
+
+	drm_mode = drm_mode_create(dev);
+	if (!drm_mode)
+		return NULL;
+
+	/* 1. In order to give correct results, the number of horizontal
+	 * pixels requested is first processed to ensure that it is divisible
+	 * by the character size, by rounding it to the nearest character
+	 * cell boundary:
+	 */
+	hdisplay_rnd = (hdisplay + GTF_CELL_GRAN / 2) / GTF_CELL_GRAN;
+	hdisplay_rnd = hdisplay_rnd * GTF_CELL_GRAN;
+
+	/* 2. If interlace is requested, the number of vertical lines assumed
+	 * by the calculation must be halved, as the computation calculates
+	 * the number of vertical lines per field.
+	 */
+	if (interlaced)
+		vdisplay_rnd = vdisplay / 2;
+	else
+		vdisplay_rnd = vdisplay;
+
+	/* 3. Find the frame rate required: */
+	if (interlaced)
+		vfieldrate_rqd = vrefresh * 2;
+	else
+		vfieldrate_rqd = vrefresh;
+
+	/* 4. Find number of lines in Top margin: */
+	top_margin = 0;
+	if (margins)
+		top_margin = (vdisplay_rnd * GTF_MARGIN_PERCENTAGE + 500) /
+				1000;
+	/* 5. Find number of lines in bottom margin: */
+	bottom_margin = top_margin;
+
+	/* 6. If interlace is required, then set variable interlace: */
+	if (interlaced)
+		interlace = 1;
+	else
+		interlace = 0;
+
+	/* 7. Estimate the Horizontal frequency */
+	{
+		tmp1 = (1000000  - MIN_VSYNC_PLUS_BP * vfieldrate_rqd) / 500;
+		tmp2 = (vdisplay_rnd + 2 * top_margin + GTF_MIN_V_PORCH) *
+				2 + interlace;
+		hfreq_est = (tmp2 * 1000 * vfieldrate_rqd) / tmp1;
+	}
+
+	/* 8. Find the number of lines in V sync + back porch */
+	/* [V SYNC+BP] = RINT(([MIN VSYNC+BP] * hfreq_est / 1000000)) */
+	vsync_plus_bp = MIN_VSYNC_PLUS_BP * hfreq_est / 1000;
+	vsync_plus_bp = (vsync_plus_bp + 500) / 1000;
+	/*  9. Find the number of lines in V back porch alone: */
+	vback_porch = vsync_plus_bp - V_SYNC_RQD;
+	/*  10. Find the total number of lines in Vertical field period: */
+	vtotal_lines = vdisplay_rnd + top_margin + bottom_margin +
+			vsync_plus_bp + GTF_MIN_V_PORCH;
+	/*  11. Estimate the Vertical field frequency: */
+	vfieldrate_est = hfreq_est / vtotal_lines;
+	/*  12. Find the actual horizontal period: */
+	hperiod = 1000000 / (vfieldrate_rqd * vtotal_lines);
+
+	/*  13. Find the actual Vertical field frequency: */
+	vfield_rate = hfreq_est / vtotal_lines;
+	/*  14. Find the Vertical frame frequency: */
+	if (interlaced)
+		vframe_rate = vfield_rate / 2;
+	else
+		vframe_rate = vfield_rate;
+	/*  15. Find number of pixels in left margin: */
+	if (margins)
+		left_margin = (hdisplay_rnd * GTF_MARGIN_PERCENTAGE + 500) /
+				1000;
+	else
+		left_margin = 0;
+
+	/* 16.Find number of pixels in right margin: */
+	right_margin = left_margin;
+	/* 17.Find total number of active pixels in image and left and right */
+	total_active_pixels = hdisplay_rnd + left_margin + right_margin;
+	/* 18.Find the ideal blanking duty cycle from blanking duty cycle */
+	ideal_duty_cycle = GTF_C_PRIME * 1000 -
+				(GTF_M_PRIME * 1000000 / hfreq_est);
+	/* 19.Find the number of pixels in the blanking time to the nearest
+	 * double character cell: */
+	hblank = total_active_pixels * ideal_duty_cycle /
+			(100000 - ideal_duty_cycle);
+	hblank = (hblank + GTF_CELL_GRAN) / (2 * GTF_CELL_GRAN);
+	hblank = hblank * 2 * GTF_CELL_GRAN;
+	/* 20.Find total number of pixels: */
+	total_pixels = total_active_pixels + hblank;
+	/* 21.Find pixel clock frequency: */
+	pixel_freq = total_pixels * hfreq_est / 1000;
+	/* Stage 1 computations are now complete; I should really pass
+	 * the results to another function and do the Stage 2 computations,
+	 * but I only need a few more values so I'll just append the
+	 * computations here for now */
+	/* 17. Find the number of pixels in the horizontal sync period: */
+	hsync = H_SYNC_PERCENT * total_pixels / 100;
+	hsync = (hsync + GTF_CELL_GRAN / 2) / GTF_CELL_GRAN;
+	hsync = hsync * GTF_CELL_GRAN;
+	/* 18. Find the number of pixels in horizontal front porch period */
+	hfront_porch = hblank / 2 - hsync;
+	/*  36. Find the number of lines in the odd front porch period: */
+	vodd_front_porch_lines = GTF_MIN_V_PORCH ;
+
+	/* finally, pack the results in the mode struct */
+	drm_mode->hdisplay = hdisplay_rnd;
+	drm_mode->hsync_start = hdisplay_rnd + hfront_porch;
+	drm_mode->hsync_end = drm_mode->hsync_start + hsync;
+	drm_mode->htotal = total_pixels;
+	drm_mode->vdisplay = vdisplay_rnd;
+	drm_mode->vsync_start = vdisplay_rnd + vodd_front_porch_lines;
+	drm_mode->vsync_end = drm_mode->vsync_start + V_SYNC_RQD;
+	drm_mode->vtotal = vtotal_lines;
+
+	drm_mode->clock = pixel_freq;
+
+	drm_mode_set_name(drm_mode);
+	drm_mode->flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC;
+
+	if (interlaced) {
+		drm_mode->vtotal *= 2;
+		drm_mode->flags |= DRM_MODE_FLAG_INTERLACE;
+	}
+
+	return drm_mode;
+}
+EXPORT_SYMBOL(drm_gtf_mode);
 /**
  * drm_mode_set_name - set the name on a mode
  * @mode: name will be set in this mode
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 820bc0977e5e..125994d8ac0b 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -739,4 +739,7 @@ extern bool drm_detect_hdmi_monitor(struct edid *edid);
 extern struct drm_display_mode *drm_cvt_mode(struct drm_device *dev,
 				int hdisplay, int vdisplay, int vrefresh,
 				bool reduced, bool interlaced);
+extern struct drm_display_mode *drm_gtf_mode(struct drm_device *dev,
+				int hdisplay, int vdisplay, int vrefresh,
+				bool interlaced, int margins);
 #endif /* __DRM_CRTC_H__ */
-- 
cgit v1.2.3


From 3d39cecc4841e8d4c4abdb401d10180f5faaded0 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 8 Jul 2009 15:23:30 +0100
Subject: intel-iommu: Remove superfluous iova_alloc_lock from IOVA code

We only ever obtain this lock immediately before the iova_rbtree_lock,
and release it immediately after the iova_rbtree_lock. So ditch it and
just use iova_rbtree_lock.

[v2: Remove the lockdep bits this time too]
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c |  3 ---
 drivers/pci/iova.c        | 16 ++++------------
 include/linux/iova.h      |  1 -
 3 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index c5f7c73cbb55..d6a857397ec3 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1309,7 +1309,6 @@ static void iommu_detach_domain(struct dmar_domain *domain,
 }
 
 static struct iova_domain reserved_iova_list;
-static struct lock_class_key reserved_alloc_key;
 static struct lock_class_key reserved_rbtree_key;
 
 static void dmar_init_reserved_ranges(void)
@@ -1320,8 +1319,6 @@ static void dmar_init_reserved_ranges(void)
 
 	init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
 
-	lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
-		&reserved_alloc_key);
 	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
 		&reserved_rbtree_key);
 
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 46dd440e2315..7914951ef29a 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -22,7 +22,6 @@
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit)
 {
-	spin_lock_init(&iovad->iova_alloc_lock);
 	spin_lock_init(&iovad->iova_rbtree_lock);
 	iovad->rbroot = RB_ROOT;
 	iovad->cached32_node = NULL;
@@ -205,7 +204,6 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 	unsigned long limit_pfn,
 	bool size_aligned)
 {
-	unsigned long flags;
 	struct iova *new_iova;
 	int ret;
 
@@ -219,11 +217,9 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 	if (size_aligned)
 		size = __roundup_pow_of_two(size);
 
-	spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
 	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn,
 			new_iova, size_aligned);
 
-	spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
 	if (ret) {
 		free_iova_mem(new_iova);
 		return NULL;
@@ -381,8 +377,7 @@ reserve_iova(struct iova_domain *iovad,
 	struct iova *iova;
 	unsigned int overlap = 0;
 
-	spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
-	spin_lock(&iovad->iova_rbtree_lock);
+	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 	for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
 		if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
 			iova = container_of(node, struct iova, node);
@@ -402,8 +397,7 @@ reserve_iova(struct iova_domain *iovad,
 	iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
 finish:
 
-	spin_unlock(&iovad->iova_rbtree_lock);
-	spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 	return iova;
 }
 
@@ -420,8 +414,7 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
 	unsigned long flags;
 	struct rb_node *node;
 
-	spin_lock_irqsave(&from->iova_alloc_lock, flags);
-	spin_lock(&from->iova_rbtree_lock);
+	spin_lock_irqsave(&from->iova_rbtree_lock, flags);
 	for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
 		struct iova *iova = container_of(node, struct iova, node);
 		struct iova *new_iova;
@@ -430,6 +423,5 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
 			printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
 				iova->pfn_lo, iova->pfn_lo);
 	}
-	spin_unlock(&from->iova_rbtree_lock);
-	spin_unlock_irqrestore(&from->iova_alloc_lock, flags);
+	spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
 }
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 228f6c94b69c..76a0759e88ec 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -28,7 +28,6 @@ struct iova {
 
 /* holds all the iova translations for a domain */
 struct iova_domain {
-	spinlock_t	iova_alloc_lock;/* Lock to protect iova  allocation */
 	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
 	struct rb_root	rbroot;		/* iova domain rbtree root */
 	struct rb_node	*cached32_node; /* Save last alloced node */
-- 
cgit v1.2.3


From a76761b621bcd8336065c4fe3a74f046858bc34c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 15 Jul 2009 23:35:14 +0900
Subject: percpu: add dummy pcpu_lpage_remapped() for !CONFIG_SMP

!CONFIG_SMP was missing pcpu_lpage_remapped() definition causing build
failure.  Add dummy implementation.  This was discovered by linux-next
testing.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
---
 include/linux/percpu.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 8ce91af4aa19..e134c8229631 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -184,6 +184,11 @@ static inline void free_percpu(void *p)
 
 static inline void __init setup_per_cpu_areas(void) { }
 
+static inline void *pcpu_lpage_remapped(void *kaddr)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_SMP */
 
 #define alloc_percpu(type)	(type *)__alloc_percpu(sizeof(type), \
-- 
cgit v1.2.3


From 719a72b7c75bb239ca6184190ab994b71a31c6dc Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 17 Jul 2009 14:59:55 +0000
Subject: usb: r8a66597-hcd platform data on_chip support

Convert the r8a66597-hcd driver to use the on_chip flag
from platform data to enable on chip behaviour instead
of relying on CONFIG_SUPERH_ON_CHIP_R8A66597 ugliness.

This makes the code cleaner and also allows us to support
both external and internal r8a66597 with the same kernel.

It also makes the Kconfig part more future proof since
we with this patch can add support for new processors
with on-chip r8a66597 without modifying the Kconfig.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/mach-se/7724/setup.c    |   1 +
 arch/sh/kernel/cpu/sh4a/setup-sh7366.c |   2 +-
 arch/sh/kernel/cpu/sh4a/setup-sh7723.c |   2 +-
 drivers/usb/host/Kconfig               |   7 --
 drivers/usb/host/r8a66597-hcd.c        | 187 +++++++++++++++++++--------------
 drivers/usb/host/r8a66597.h            |  76 ++++++--------
 include/linux/usb/r8a66597.h           |   3 +
 7 files changed, 147 insertions(+), 131 deletions(-)

(limited to 'include')

diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index 8fed45a2fb85..4fb7e48e2843 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -304,6 +304,7 @@ static struct platform_device sh_eth_device = {
 };
 
 static struct r8a66597_platdata sh7724_usb0_host_data = {
+	.on_chip = 1,
 };
 
 static struct resource sh7724_usb0_host_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index c18f7d09281b..f6d208813564 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -40,7 +40,7 @@ static struct platform_device iic_device = {
 };
 
 static struct r8a66597_platdata r8a66597_data = {
-	/* This set zero to all members */
+	.on_chip = 1,
 };
 
 static struct resource usb_host_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index e1bb80b2a27b..28516499a2c4 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -398,7 +398,7 @@ static struct platform_device rtc_device = {
 };
 
 static struct r8a66597_platdata r8a66597_data = {
-	/* This set zero to all members */
+	.on_chip = 1,
 };
 
 static struct resource sh7723_usb_host_resources[] = {
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 1a920c70b5a1..f21ca7d27a43 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -336,13 +336,6 @@ config USB_R8A66597_HCD
 	  To compile this driver as a module, choose M here: the
 	  module will be called r8a66597-hcd.
 
-config SUPERH_ON_CHIP_R8A66597
-	boolean "Enable SuperH on-chip R8A66597 USB"
-	depends on USB_R8A66597_HCD && (CPU_SUBTYPE_SH7366 || CPU_SUBTYPE_SH7723 || CPU_SUBTYPE_SH7724)
-	help
-	   This driver enables support for the on-chip R8A66597 in the
-	   SH7366, SH7723 and SH7724 processors.
-
 config USB_WHCI_HCD
 	tristate "Wireless USB Host Controller Interface (WHCI) driver (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index 09895a97c10b..82dce3e0d4d7 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -91,43 +91,43 @@ static int r8a66597_clock_enable(struct r8a66597 *r8a66597)
 	u16 tmp;
 	int i = 0;
 
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
-#if defined(CONFIG_HAVE_CLK)
-	clk_enable(r8a66597->clk);
+	if (r8a66597->pdata->on_chip) {
+#ifdef CONFIG_HAVE_CLK
+		clk_enable(r8a66597->clk);
 #endif
-	do {
-		r8a66597_write(r8a66597, SCKE, SYSCFG0);
-		tmp = r8a66597_read(r8a66597, SYSCFG0);
-		if (i++ > 1000) {
-			printk(KERN_ERR "r8a66597: register access fail.\n");
-			return -ENXIO;
-		}
-	} while ((tmp & SCKE) != SCKE);
-	r8a66597_write(r8a66597, 0x04, 0x02);
-#else
-	do {
-		r8a66597_write(r8a66597, USBE, SYSCFG0);
-		tmp = r8a66597_read(r8a66597, SYSCFG0);
-		if (i++ > 1000) {
-			printk(KERN_ERR "r8a66597: register access fail.\n");
-			return -ENXIO;
-		}
-	} while ((tmp & USBE) != USBE);
-	r8a66597_bclr(r8a66597, USBE, SYSCFG0);
-	r8a66597_mdfy(r8a66597, get_xtal_from_pdata(r8a66597->pdata), XTAL,
-			SYSCFG0);
+		do {
+			r8a66597_write(r8a66597, SCKE, SYSCFG0);
+			tmp = r8a66597_read(r8a66597, SYSCFG0);
+			if (i++ > 1000) {
+				printk(KERN_ERR "r8a66597: reg access fail.\n");
+				return -ENXIO;
+			}
+		} while ((tmp & SCKE) != SCKE);
+		r8a66597_write(r8a66597, 0x04, 0x02);
+	} else {
+		do {
+			r8a66597_write(r8a66597, USBE, SYSCFG0);
+			tmp = r8a66597_read(r8a66597, SYSCFG0);
+			if (i++ > 1000) {
+				printk(KERN_ERR "r8a66597: reg access fail.\n");
+				return -ENXIO;
+			}
+		} while ((tmp & USBE) != USBE);
+		r8a66597_bclr(r8a66597, USBE, SYSCFG0);
+		r8a66597_mdfy(r8a66597, get_xtal_from_pdata(r8a66597->pdata),
+			      XTAL, SYSCFG0);
 
-	i = 0;
-	r8a66597_bset(r8a66597, XCKE, SYSCFG0);
-	do {
-		msleep(1);
-		tmp = r8a66597_read(r8a66597, SYSCFG0);
-		if (i++ > 500) {
-			printk(KERN_ERR "r8a66597: register access fail.\n");
-			return -ENXIO;
-		}
-	} while ((tmp & SCKE) != SCKE);
-#endif	/* #if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) */
+		i = 0;
+		r8a66597_bset(r8a66597, XCKE, SYSCFG0);
+		do {
+			msleep(1);
+			tmp = r8a66597_read(r8a66597, SYSCFG0);
+			if (i++ > 500) {
+				printk(KERN_ERR "r8a66597: reg access fail.\n");
+				return -ENXIO;
+			}
+		} while ((tmp & SCKE) != SCKE);
+	}
 
 	return 0;
 }
@@ -136,15 +136,16 @@ static void r8a66597_clock_disable(struct r8a66597 *r8a66597)
 {
 	r8a66597_bclr(r8a66597, SCKE, SYSCFG0);
 	udelay(1);
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
-#if defined(CONFIG_HAVE_CLK)
-	clk_disable(r8a66597->clk);
-#endif
-#else
-	r8a66597_bclr(r8a66597, PLLC, SYSCFG0);
-	r8a66597_bclr(r8a66597, XCKE, SYSCFG0);
-	r8a66597_bclr(r8a66597, USBE, SYSCFG0);
+
+	if (r8a66597->pdata->on_chip) {
+#ifdef CONFIG_HAVE_CLK
+		clk_disable(r8a66597->clk);
 #endif
+	} else {
+		r8a66597_bclr(r8a66597, PLLC, SYSCFG0);
+		r8a66597_bclr(r8a66597, XCKE, SYSCFG0);
+		r8a66597_bclr(r8a66597, USBE, SYSCFG0);
+	}
 }
 
 static void r8a66597_enable_port(struct r8a66597 *r8a66597, int port)
@@ -205,7 +206,7 @@ static int enable_controller(struct r8a66597 *r8a66597)
 
 	r8a66597_bset(r8a66597, SIGNE | SACKE, INTENB1);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++)
+	for (port = 0; port < r8a66597->max_root_hub; port++)
 		r8a66597_enable_port(r8a66597, port);
 
 	return 0;
@@ -218,7 +219,7 @@ static void disable_controller(struct r8a66597 *r8a66597)
 	r8a66597_write(r8a66597, 0, INTENB0);
 	r8a66597_write(r8a66597, 0, INTSTS0);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++)
+	for (port = 0; port < r8a66597->max_root_hub; port++)
 		r8a66597_disable_port(r8a66597, port);
 
 	r8a66597_clock_disable(r8a66597);
@@ -249,11 +250,12 @@ static int is_hub_limit(char *devpath)
 	return ((strlen(devpath) >= 4) ? 1 : 0);
 }
 
-static void get_port_number(char *devpath, u16 *root_port, u16 *hub_port)
+static void get_port_number(struct r8a66597 *r8a66597,
+			    char *devpath, u16 *root_port, u16 *hub_port)
 {
 	if (root_port) {
 		*root_port = (devpath[0] & 0x0F) - 1;
-		if (*root_port >= R8A66597_MAX_ROOT_HUB)
+		if (*root_port >= r8a66597->max_root_hub)
 			printk(KERN_ERR "r8a66597: Illegal root port number.\n");
 	}
 	if (hub_port)
@@ -355,7 +357,8 @@ static int make_r8a66597_device(struct r8a66597 *r8a66597,
 	INIT_LIST_HEAD(&dev->device_list);
 	list_add_tail(&dev->device_list, &r8a66597->child_device);
 
-	get_port_number(urb->dev->devpath, &dev->root_port, &dev->hub_port);
+	get_port_number(r8a66597, urb->dev->devpath,
+			&dev->root_port, &dev->hub_port);
 	if (!is_child_device(urb->dev->devpath))
 		r8a66597->root_hub[dev->root_port].dev = dev;
 
@@ -420,7 +423,7 @@ static void free_usb_address(struct r8a66597 *r8a66597,
 	list_del(&dev->device_list);
 	kfree(dev);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) {
+	for (port = 0; port < r8a66597->max_root_hub; port++) {
 		if (r8a66597->root_hub[port].dev == dev) {
 			r8a66597->root_hub[port].dev = NULL;
 			break;
@@ -495,10 +498,20 @@ static void r8a66597_pipe_toggle(struct r8a66597 *r8a66597,
 		r8a66597_bset(r8a66597, SQCLR, pipe->pipectr);
 }
 
+static inline unsigned short mbw_value(struct r8a66597 *r8a66597)
+{
+	if (r8a66597->pdata->on_chip)
+		return MBW_32;
+	else
+		return MBW_16;
+}
+
 /* this function must be called with interrupt disabled */
 static inline void cfifo_change(struct r8a66597 *r8a66597, u16 pipenum)
 {
-	r8a66597_mdfy(r8a66597, MBW | pipenum, MBW | CURPIPE, CFIFOSEL);
+	unsigned short mbw = mbw_value(r8a66597);
+
+	r8a66597_mdfy(r8a66597, mbw | pipenum, mbw | CURPIPE, CFIFOSEL);
 	r8a66597_reg_wait(r8a66597, CFIFOSEL, CURPIPE, pipenum);
 }
 
@@ -506,11 +519,13 @@ static inline void cfifo_change(struct r8a66597 *r8a66597, u16 pipenum)
 static inline void fifo_change_from_pipe(struct r8a66597 *r8a66597,
 					 struct r8a66597_pipe *pipe)
 {
+	unsigned short mbw = mbw_value(r8a66597);
+
 	cfifo_change(r8a66597, 0);
-	r8a66597_mdfy(r8a66597, MBW | 0, MBW | CURPIPE, D0FIFOSEL);
-	r8a66597_mdfy(r8a66597, MBW | 0, MBW | CURPIPE, D1FIFOSEL);
+	r8a66597_mdfy(r8a66597, mbw | 0, mbw | CURPIPE, D0FIFOSEL);
+	r8a66597_mdfy(r8a66597, mbw | 0, mbw | CURPIPE, D1FIFOSEL);
 
-	r8a66597_mdfy(r8a66597, MBW | pipe->info.pipenum, MBW | CURPIPE,
+	r8a66597_mdfy(r8a66597, mbw | pipe->info.pipenum, mbw | CURPIPE,
 		      pipe->fifosel);
 	r8a66597_reg_wait(r8a66597, pipe->fifosel, CURPIPE, pipe->info.pipenum);
 }
@@ -742,9 +757,13 @@ static void enable_r8a66597_pipe_dma(struct r8a66597 *r8a66597,
 				     struct r8a66597_pipe *pipe,
 				     struct urb *urb)
 {
-#if !defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
 	int i;
 	struct r8a66597_pipe_info *info = &pipe->info;
+	unsigned short mbw = mbw_value(r8a66597);
+
+	/* pipe dma is only for external controlles */
+	if (r8a66597->pdata->on_chip)
+		return;
 
 	if ((pipe->info.pipenum != 0) && (info->type != R8A66597_INT)) {
 		for (i = 0; i < R8A66597_MAX_DMA_CHANNEL; i++) {
@@ -763,8 +782,8 @@ static void enable_r8a66597_pipe_dma(struct r8a66597 *r8a66597,
 			set_pipe_reg_addr(pipe, i);
 
 			cfifo_change(r8a66597, 0);
-			r8a66597_mdfy(r8a66597, MBW | pipe->info.pipenum,
-				      MBW | CURPIPE, pipe->fifosel);
+			r8a66597_mdfy(r8a66597, mbw | pipe->info.pipenum,
+				      mbw | CURPIPE, pipe->fifosel);
 
 			r8a66597_reg_wait(r8a66597, pipe->fifosel, CURPIPE,
 					  pipe->info.pipenum);
@@ -772,7 +791,6 @@ static void enable_r8a66597_pipe_dma(struct r8a66597 *r8a66597,
 			break;
 		}
 	}
-#endif	/* #if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) */
 }
 
 /* this function must be called with interrupt disabled */
@@ -1769,7 +1787,7 @@ static void r8a66597_timer(unsigned long _r8a66597)
 
 	spin_lock_irqsave(&r8a66597->lock, flags);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++)
+	for (port = 0; port < r8a66597->max_root_hub; port++)
 		r8a66597_root_hub_control(r8a66597, port);
 
 	spin_unlock_irqrestore(&r8a66597->lock, flags);
@@ -1807,7 +1825,7 @@ static void set_address_zero(struct r8a66597 *r8a66597, struct urb *urb)
 	u16 root_port, hub_port;
 
 	if (usb_address == 0) {
-		get_port_number(urb->dev->devpath,
+		get_port_number(r8a66597, urb->dev->devpath,
 				&root_port, &hub_port);
 		set_devadd_reg(r8a66597, 0,
 			       get_r8a66597_usb_speed(urb->dev->speed),
@@ -2082,7 +2100,7 @@ static int r8a66597_hub_status_data(struct usb_hcd *hcd, char *buf)
 
 	*buf = 0;	/* initialize (no change) */
 
-	for (i = 0; i < R8A66597_MAX_ROOT_HUB; i++) {
+	for (i = 0; i < r8a66597->max_root_hub; i++) {
 		if (r8a66597->root_hub[i].port & 0xffff0000)
 			*buf |= 1 << (i + 1);
 	}
@@ -2097,11 +2115,11 @@ static void r8a66597_hub_descriptor(struct r8a66597 *r8a66597,
 {
 	desc->bDescriptorType = 0x29;
 	desc->bHubContrCurrent = 0;
-	desc->bNbrPorts = R8A66597_MAX_ROOT_HUB;
+	desc->bNbrPorts = r8a66597->max_root_hub;
 	desc->bDescLength = 9;
 	desc->bPwrOn2PwrGood = 0;
 	desc->wHubCharacteristics = cpu_to_le16(0x0011);
-	desc->bitmap[0] = ((1 << R8A66597_MAX_ROOT_HUB) - 1) << 1;
+	desc->bitmap[0] = ((1 << r8a66597->max_root_hub) - 1) << 1;
 	desc->bitmap[1] = ~0;
 }
 
@@ -2129,7 +2147,7 @@ static int r8a66597_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 		}
 		break;
 	case ClearPortFeature:
-		if (wIndex > R8A66597_MAX_ROOT_HUB)
+		if (wIndex > r8a66597->max_root_hub)
 			goto error;
 		if (wLength != 0)
 			goto error;
@@ -2162,12 +2180,12 @@ static int r8a66597_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 		*buf = 0x00;
 		break;
 	case GetPortStatus:
-		if (wIndex > R8A66597_MAX_ROOT_HUB)
+		if (wIndex > r8a66597->max_root_hub)
 			goto error;
 		*(__le32 *)buf = cpu_to_le32(rh->port);
 		break;
 	case SetPortFeature:
-		if (wIndex > R8A66597_MAX_ROOT_HUB)
+		if (wIndex > r8a66597->max_root_hub)
 			goto error;
 		if (wLength != 0)
 			goto error;
@@ -2216,7 +2234,7 @@ static int r8a66597_bus_suspend(struct usb_hcd *hcd)
 
 	dbg("%s", __func__);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) {
+	for (port = 0; port < r8a66597->max_root_hub; port++) {
 		struct r8a66597_root_hub *rh = &r8a66597->root_hub[port];
 		unsigned long dvstctr_reg = get_dvstctr_reg(port);
 
@@ -2247,7 +2265,7 @@ static int r8a66597_bus_resume(struct usb_hcd *hcd)
 
 	dbg("%s", __func__);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) {
+	for (port = 0; port < r8a66597->max_root_hub; port++) {
 		struct r8a66597_root_hub *rh = &r8a66597->root_hub[port];
 		unsigned long dvstctr_reg = get_dvstctr_reg(port);
 
@@ -2314,7 +2332,7 @@ static int r8a66597_suspend(struct device *dev)
 
 	disable_controller(r8a66597);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) {
+	for (port = 0; port < r8a66597->max_root_hub; port++) {
 		struct r8a66597_root_hub *rh = &r8a66597->root_hub[port];
 
 		rh->port = 0x00000000;
@@ -2354,8 +2372,9 @@ static int __init_or_module r8a66597_remove(struct platform_device *pdev)
 	del_timer_sync(&r8a66597->rh_timer);
 	usb_remove_hcd(hcd);
 	iounmap((void *)r8a66597->reg);
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
-	clk_put(r8a66597->clk);
+#ifdef CONFIG_HAVE_CLK
+	if (r8a66597->pdata->on_chip)
+		clk_put(r8a66597->clk);
 #endif
 	usb_put_hcd(hcd);
 	return 0;
@@ -2363,7 +2382,7 @@ static int __init_or_module r8a66597_remove(struct platform_device *pdev)
 
 static int __devinit r8a66597_probe(struct platform_device *pdev)
 {
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
+#ifdef CONFIG_HAVE_CLK
 	char clk_name[8];
 #endif
 	struct resource *res = NULL, *ires;
@@ -2425,15 +2444,20 @@ static int __devinit r8a66597_probe(struct platform_device *pdev)
 	r8a66597->pdata = pdev->dev.platform_data;
 	r8a66597->irq_sense_low = irq_trigger == IRQF_TRIGGER_LOW;
 
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
-	snprintf(clk_name, sizeof(clk_name), "usb%d", pdev->id);
-	r8a66597->clk = clk_get(&pdev->dev, clk_name);
-	if (IS_ERR(r8a66597->clk)) {
-		dev_err(&pdev->dev, "cannot get clock \"%s\"\n", clk_name);
-		ret = PTR_ERR(r8a66597->clk);
-		goto clean_up2;
-	}
+	if (r8a66597->pdata->on_chip) {
+#ifdef CONFIG_HAVE_CLK
+		snprintf(clk_name, sizeof(clk_name), "usb%d", pdev->id);
+		r8a66597->clk = clk_get(&pdev->dev, clk_name);
+		if (IS_ERR(r8a66597->clk)) {
+			dev_err(&pdev->dev, "cannot get clock \"%s\"\n",
+				clk_name);
+			ret = PTR_ERR(r8a66597->clk);
+			goto clean_up2;
+		}
 #endif
+		r8a66597->max_root_hub = 1;
+	} else
+		r8a66597->max_root_hub = 2;
 
 	spin_lock_init(&r8a66597->lock);
 	init_timer(&r8a66597->rh_timer);
@@ -2463,8 +2487,9 @@ static int __devinit r8a66597_probe(struct platform_device *pdev)
 	return 0;
 
 clean_up3:
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
-	clk_put(r8a66597->clk);
+#ifdef CONFIG_HAVE_CLK
+	if (r8a66597->pdata->on_chip)
+		clk_put(r8a66597->clk);
 clean_up2:
 #endif
 	usb_put_hcd(hcd);
diff --git a/drivers/usb/host/r8a66597.h b/drivers/usb/host/r8a66597.h
index d72680b433f9..eecbd917bc81 100644
--- a/drivers/usb/host/r8a66597.h
+++ b/drivers/usb/host/r8a66597.h
@@ -26,7 +26,7 @@
 #ifndef __R8A66597_H__
 #define __R8A66597_H__
 
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
+#ifdef CONFIG_HAVE_CLK
 #include <linux/clk.h>
 #endif
 
@@ -193,13 +193,9 @@
 #define	REW		0x4000	/* b14: Buffer rewind */
 #define	DCLRM		0x2000	/* b13: DMA buffer clear mode */
 #define	DREQE		0x1000	/* b12: DREQ output enable */
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
-#define	MBW		0x0800
-#else
-#define	MBW		0x0400	/* b10: Maximum bit width for FIFO access */
-#endif
 #define	  MBW_8		 0x0000	  /*  8bit */
 #define	  MBW_16	 0x0400	  /* 16bit */
+#define	  MBW_32	 0x0800   /* 32bit */
 #define	BIGEND		0x0100	/* b8: Big endian mode */
 #define	  BYTE_LITTLE	 0x0000		/* little dendian */
 #define	  BYTE_BIG	 0x0100		/* big endifan */
@@ -405,11 +401,7 @@
 #define R8A66597_MAX_NUM_PIPE		10
 #define R8A66597_BUF_BSIZE		8
 #define R8A66597_MAX_DEVICE		10
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
-#define R8A66597_MAX_ROOT_HUB		1
-#else
 #define R8A66597_MAX_ROOT_HUB		2
-#endif
 #define R8A66597_MAX_SAMPLING		5
 #define R8A66597_RH_POLL_TIME		10
 #define R8A66597_MAX_DMA_CHANNEL	2
@@ -487,7 +479,7 @@ struct r8a66597_root_hub {
 struct r8a66597 {
 	spinlock_t lock;
 	unsigned long reg;
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
+#ifdef CONFIG_HAVE_CLK
 	struct clk *clk;
 #endif
 	struct r8a66597_platdata	*pdata;
@@ -504,6 +496,7 @@ struct r8a66597 {
 	unsigned short interval_map;
 	unsigned char pipe_cnt[R8A66597_MAX_NUM_PIPE];
 	unsigned char dma_map;
+	unsigned int max_root_hub;
 
 	struct list_head child_device;
 	unsigned long child_connect_map[4];
@@ -550,21 +543,22 @@ static inline void r8a66597_read_fifo(struct r8a66597 *r8a66597,
 				      unsigned long offset, u16 *buf,
 				      int len)
 {
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
 	unsigned long fifoaddr = r8a66597->reg + offset;
 	unsigned long count;
 
-	count = len / 4;
-	insl(fifoaddr, buf, count);
+	if (r8a66597->pdata->on_chip) {
+		count = len / 4;
+		insl(fifoaddr, buf, count);
 
-	if (len & 0x00000003) {
-		unsigned long tmp = inl(fifoaddr);
-		memcpy((unsigned char *)buf + count * 4, &tmp, len & 0x03);
+		if (len & 0x00000003) {
+			unsigned long tmp = inl(fifoaddr);
+			memcpy((unsigned char *)buf + count * 4, &tmp,
+			       len & 0x03);
+		}
+	} else {
+		len = (len + 1) / 2;
+		insw(fifoaddr, buf, len);
 	}
-#else
-	len = (len + 1) / 2;
-	insw(r8a66597->reg + offset, buf, len);
-#endif
 }
 
 static inline void r8a66597_write(struct r8a66597 *r8a66597, u16 val,
@@ -578,33 +572,33 @@ static inline void r8a66597_write_fifo(struct r8a66597 *r8a66597,
 				       int len)
 {
 	unsigned long fifoaddr = r8a66597->reg + offset;
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
 	unsigned long count;
 	unsigned char *pb;
 	int i;
 
-	count = len / 4;
-	outsl(fifoaddr, buf, count);
+	if (r8a66597->pdata->on_chip) {
+		count = len / 4;
+		outsl(fifoaddr, buf, count);
+
+		if (len & 0x00000003) {
+			pb = (unsigned char *)buf + count * 4;
+			for (i = 0; i < (len & 0x00000003); i++) {
+				if (r8a66597_read(r8a66597, CFIFOSEL) & BIGEND)
+					outb(pb[i], fifoaddr + i);
+				else
+					outb(pb[i], fifoaddr + 3 - i);
+			}
+		}
+	} else {
+		int odd = len & 0x0001;
 
-	if (len & 0x00000003) {
-		pb = (unsigned char *)buf + count * 4;
-		for (i = 0; i < (len & 0x00000003); i++) {
-			if (r8a66597_read(r8a66597, CFIFOSEL) & BIGEND)
-				outb(pb[i], fifoaddr + i);
-			else
-				outb(pb[i], fifoaddr + 3 - i);
+		len = len / 2;
+		outsw(fifoaddr, buf, len);
+		if (unlikely(odd)) {
+			buf = &buf[len];
+			outb((unsigned char)*buf, fifoaddr);
 		}
 	}
-#else
-	int odd = len & 0x0001;
-
-	len = len / 2;
-	outsw(fifoaddr, buf, len);
-	if (unlikely(odd)) {
-		buf = &buf[len];
-		outb((unsigned char)*buf, fifoaddr);
-	}
-#endif
 }
 
 static inline void r8a66597_mdfy(struct r8a66597 *r8a66597,
diff --git a/include/linux/usb/r8a66597.h b/include/linux/usb/r8a66597.h
index e9f0384fa20c..460ee3f6a2c6 100644
--- a/include/linux/usb/r8a66597.h
+++ b/include/linux/usb/r8a66597.h
@@ -31,6 +31,9 @@ struct r8a66597_platdata {
 	/* This ops can controll port power instead of DVSTCTR register. */
 	void (*port_power)(int port, int power);
 
+	/* set one = on chip controller, set zero = external controller */
+	unsigned	on_chip:1;
+
 	/* (external controller only) set R8A66597_PLATDATA_XTAL_nnMHZ */
 	unsigned	xtal:2;
 
-- 
cgit v1.2.3


From fbd90375d7531927d312766b548376d909811b4d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 22 Jul 2009 13:40:14 +0200
Subject: hrtimer: Remove cb_entry from struct hrtimer

It's unused, remove it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
---
 include/linux/hrtimer.h | 2 --
 kernel/hrtimer.c        | 1 -
 2 files changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 54648e625efd..40e7d54fc424 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -91,7 +91,6 @@ enum hrtimer_restart {
  * @function:	timer expiry callback function
  * @base:	pointer to the timer base (per cpu and per clock)
  * @state:	state information (See bit values above)
- * @cb_entry:	list head to enqueue an expired timer into the callback list
  * @start_site:	timer statistics field to store the site where the timer
  *		was started
  * @start_comm: timer statistics field to store the name of the process which
@@ -108,7 +107,6 @@ struct hrtimer {
 	enum hrtimer_restart		(*function)(struct hrtimer *);
 	struct hrtimer_clock_base	*base;
 	unsigned long			state;
-	struct list_head		cb_entry;
 #ifdef CONFIG_TIMER_STATS
 	int				start_pid;
 	void				*start_site;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 43d151f185b6..052a0f53e4eb 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1092,7 +1092,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 		clock_id = CLOCK_MONOTONIC;
 
 	timer->base = &cpu_base->clock_base[clock_id];
-	INIT_LIST_HEAD(&timer->cb_entry);
 	hrtimer_init_timer_hres(timer);
 
 #ifdef CONFIG_TIMER_STATS
-- 
cgit v1.2.3


From cf4f1e76c49dacfde0680b170b9a9b6a42f296bb Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 22 Jul 2009 14:32:03 +0000
Subject: usb: move r8a66597 register defines

Move r8a66597 hardware register definitions from the host
controller header file to the platform data header file.

With this change in place we can easily share register
definitions between the host controller driver and a future
gadget driver.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/usb/host/r8a66597.h  | 366 ------------------------------------------
 include/linux/usb/r8a66597.h | 372 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 370 insertions(+), 368 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/host/r8a66597.h b/drivers/usb/host/r8a66597.h
index eecbd917bc81..228e3fb23854 100644
--- a/drivers/usb/host/r8a66597.h
+++ b/drivers/usb/host/r8a66597.h
@@ -32,372 +32,6 @@
 
 #include <linux/usb/r8a66597.h>
 
-#define SYSCFG0		0x00
-#define SYSCFG1		0x02
-#define SYSSTS0		0x04
-#define SYSSTS1		0x06
-#define DVSTCTR0	0x08
-#define DVSTCTR1	0x0A
-#define TESTMODE	0x0C
-#define PINCFG		0x0E
-#define DMA0CFG		0x10
-#define DMA1CFG		0x12
-#define CFIFO		0x14
-#define D0FIFO		0x18
-#define D1FIFO		0x1C
-#define CFIFOSEL	0x20
-#define CFIFOCTR	0x22
-#define CFIFOSIE	0x24
-#define D0FIFOSEL	0x28
-#define D0FIFOCTR	0x2A
-#define D1FIFOSEL	0x2C
-#define D1FIFOCTR	0x2E
-#define INTENB0		0x30
-#define INTENB1		0x32
-#define INTENB2		0x34
-#define BRDYENB		0x36
-#define NRDYENB		0x38
-#define BEMPENB		0x3A
-#define SOFCFG		0x3C
-#define INTSTS0		0x40
-#define INTSTS1		0x42
-#define INTSTS2		0x44
-#define BRDYSTS		0x46
-#define NRDYSTS		0x48
-#define BEMPSTS		0x4A
-#define FRMNUM		0x4C
-#define UFRMNUM		0x4E
-#define USBADDR		0x50
-#define USBREQ		0x54
-#define USBVAL		0x56
-#define USBINDX		0x58
-#define USBLENG		0x5A
-#define DCPCFG		0x5C
-#define DCPMAXP		0x5E
-#define DCPCTR		0x60
-#define PIPESEL		0x64
-#define PIPECFG		0x68
-#define PIPEBUF		0x6A
-#define PIPEMAXP	0x6C
-#define PIPEPERI	0x6E
-#define PIPE1CTR	0x70
-#define PIPE2CTR	0x72
-#define PIPE3CTR	0x74
-#define PIPE4CTR	0x76
-#define PIPE5CTR	0x78
-#define PIPE6CTR	0x7A
-#define PIPE7CTR	0x7C
-#define PIPE8CTR	0x7E
-#define PIPE9CTR	0x80
-#define PIPE1TRE	0x90
-#define PIPE1TRN	0x92
-#define PIPE2TRE	0x94
-#define PIPE2TRN	0x96
-#define PIPE3TRE	0x98
-#define PIPE3TRN	0x9A
-#define PIPE4TRE	0x9C
-#define	PIPE4TRN	0x9E
-#define	PIPE5TRE	0xA0
-#define	PIPE5TRN	0xA2
-#define DEVADD0		0xD0
-#define DEVADD1		0xD2
-#define DEVADD2		0xD4
-#define DEVADD3		0xD6
-#define DEVADD4		0xD8
-#define DEVADD5		0xDA
-#define DEVADD6		0xDC
-#define DEVADD7		0xDE
-#define DEVADD8		0xE0
-#define DEVADD9		0xE2
-#define DEVADDA		0xE4
-
-/* System Configuration Control Register */
-#define	XTAL		0xC000	/* b15-14: Crystal selection */
-#define	  XTAL48	 0x8000	  /* 48MHz */
-#define	  XTAL24	 0x4000	  /* 24MHz */
-#define	  XTAL12	 0x0000	  /* 12MHz */
-#define	XCKE		0x2000	/* b13: External clock enable */
-#define	PLLC		0x0800	/* b11: PLL control */
-#define	SCKE		0x0400	/* b10: USB clock enable */
-#define	PCSDIS		0x0200	/* b9: not CS wakeup */
-#define	LPSME		0x0100	/* b8: Low power sleep mode */
-#define	HSE		0x0080	/* b7: Hi-speed enable */
-#define	DCFM		0x0040	/* b6: Controller function select  */
-#define	DRPD		0x0020	/* b5: D+/- pull down control */
-#define	DPRPU		0x0010	/* b4: D+ pull up control */
-#define	USBE		0x0001	/* b0: USB module operation enable */
-
-/* System Configuration Status Register */
-#define	OVCBIT		0x8000	/* b15-14: Over-current bit */
-#define	OVCMON		0xC000	/* b15-14: Over-current monitor */
-#define	SOFEA		0x0020	/* b5: SOF monitor */
-#define	IDMON		0x0004	/* b3: ID-pin monitor */
-#define	LNST		0x0003	/* b1-0: D+, D- line status */
-#define	  SE1		 0x0003	  /* SE1 */
-#define	  FS_KSTS	 0x0002	  /* Full-Speed K State */
-#define	  FS_JSTS	 0x0001	  /* Full-Speed J State */
-#define	  LS_JSTS	 0x0002	  /* Low-Speed J State */
-#define	  LS_KSTS	 0x0001	  /* Low-Speed K State */
-#define	  SE0		 0x0000	  /* SE0 */
-
-/* Device State Control Register */
-#define	EXTLP0		0x0400	/* b10: External port */
-#define	VBOUT		0x0200	/* b9: VBUS output */
-#define	WKUP		0x0100	/* b8: Remote wakeup */
-#define	RWUPE		0x0080	/* b7: Remote wakeup sense */
-#define	USBRST		0x0040	/* b6: USB reset enable */
-#define	RESUME		0x0020	/* b5: Resume enable */
-#define	UACT		0x0010	/* b4: USB bus enable */
-#define	RHST		0x0007	/* b1-0: Reset handshake status */
-#define	  HSPROC	 0x0004	  /* HS handshake is processing */
-#define	  HSMODE	 0x0003	  /* Hi-Speed mode */
-#define	  FSMODE	 0x0002	  /* Full-Speed mode */
-#define	  LSMODE	 0x0001	  /* Low-Speed mode */
-#define	  UNDECID	 0x0000	  /* Undecided */
-
-/* Test Mode Register */
-#define	UTST			0x000F	/* b3-0: Test select */
-#define	  H_TST_PACKET		 0x000C	  /* HOST TEST Packet */
-#define	  H_TST_SE0_NAK		 0x000B	  /* HOST TEST SE0 NAK */
-#define	  H_TST_K		 0x000A	  /* HOST TEST K */
-#define	  H_TST_J		 0x0009	  /* HOST TEST J */
-#define	  H_TST_NORMAL		 0x0000	  /* HOST Normal Mode */
-#define	  P_TST_PACKET		 0x0004	  /* PERI TEST Packet */
-#define	  P_TST_SE0_NAK		 0x0003	  /* PERI TEST SE0 NAK */
-#define	  P_TST_K		 0x0002	  /* PERI TEST K */
-#define	  P_TST_J		 0x0001	  /* PERI TEST J */
-#define	  P_TST_NORMAL		 0x0000	  /* PERI Normal Mode */
-
-/* Data Pin Configuration Register */
-#define	LDRV			0x8000	/* b15: Drive Current Adjust */
-#define	  VIF1			  0x0000		/* VIF = 1.8V */
-#define	  VIF3			  0x8000		/* VIF = 3.3V */
-#define	INTA			0x0001	/* b1: USB INT-pin active */
-
-/* DMAx Pin Configuration Register */
-#define	DREQA			0x4000	/* b14: Dreq active select */
-#define	BURST			0x2000	/* b13: Burst mode */
-#define	DACKA			0x0400	/* b10: Dack active select */
-#define	DFORM			0x0380	/* b9-7: DMA mode select */
-#define	  CPU_ADR_RD_WR		 0x0000	  /* Address + RD/WR mode (CPU bus) */
-#define	  CPU_DACK_RD_WR	 0x0100	  /* DACK + RD/WR mode (CPU bus) */
-#define	  CPU_DACK_ONLY		 0x0180	  /* DACK only mode (CPU bus) */
-#define	  SPLIT_DACK_ONLY	 0x0200	  /* DACK only mode (SPLIT bus) */
-#define	DENDA			0x0040	/* b6: Dend active select */
-#define	PKTM			0x0020	/* b5: Packet mode */
-#define	DENDE			0x0010	/* b4: Dend enable */
-#define	OBUS			0x0004	/* b2: OUTbus mode */
-
-/* CFIFO/DxFIFO Port Select Register */
-#define	RCNT		0x8000	/* b15: Read count mode */
-#define	REW		0x4000	/* b14: Buffer rewind */
-#define	DCLRM		0x2000	/* b13: DMA buffer clear mode */
-#define	DREQE		0x1000	/* b12: DREQ output enable */
-#define	  MBW_8		 0x0000	  /*  8bit */
-#define	  MBW_16	 0x0400	  /* 16bit */
-#define	  MBW_32	 0x0800   /* 32bit */
-#define	BIGEND		0x0100	/* b8: Big endian mode */
-#define	  BYTE_LITTLE	 0x0000		/* little dendian */
-#define	  BYTE_BIG	 0x0100		/* big endifan */
-#define	ISEL		0x0020	/* b5: DCP FIFO port direction select */
-#define	CURPIPE		0x000F	/* b2-0: PIPE select */
-
-/* CFIFO/DxFIFO Port Control Register */
-#define	BVAL		0x8000	/* b15: Buffer valid flag */
-#define	BCLR		0x4000	/* b14: Buffer clear */
-#define	FRDY		0x2000	/* b13: FIFO ready */
-#define	DTLN		0x0FFF	/* b11-0: FIFO received data length */
-
-/* Interrupt Enable Register 0 */
-#define	VBSE	0x8000	/* b15: VBUS interrupt */
-#define	RSME	0x4000	/* b14: Resume interrupt */
-#define	SOFE	0x2000	/* b13: Frame update interrupt */
-#define	DVSE	0x1000	/* b12: Device state transition interrupt */
-#define	CTRE	0x0800	/* b11: Control transfer stage transition interrupt */
-#define	BEMPE	0x0400	/* b10: Buffer empty interrupt */
-#define	NRDYE	0x0200	/* b9: Buffer not ready interrupt */
-#define	BRDYE	0x0100	/* b8: Buffer ready interrupt */
-
-/* Interrupt Enable Register 1 */
-#define	OVRCRE		0x8000	/* b15: Over-current interrupt */
-#define	BCHGE		0x4000	/* b14: USB us chenge interrupt */
-#define	DTCHE		0x1000	/* b12: Detach sense interrupt */
-#define	ATTCHE		0x0800	/* b11: Attach sense interrupt */
-#define	EOFERRE		0x0040	/* b6: EOF error interrupt */
-#define	SIGNE		0x0020	/* b5: SETUP IGNORE interrupt */
-#define	SACKE		0x0010	/* b4: SETUP ACK interrupt */
-
-/* BRDY Interrupt Enable/Status Register */
-#define	BRDY9		0x0200	/* b9: PIPE9 */
-#define	BRDY8		0x0100	/* b8: PIPE8 */
-#define	BRDY7		0x0080	/* b7: PIPE7 */
-#define	BRDY6		0x0040	/* b6: PIPE6 */
-#define	BRDY5		0x0020	/* b5: PIPE5 */
-#define	BRDY4		0x0010	/* b4: PIPE4 */
-#define	BRDY3		0x0008	/* b3: PIPE3 */
-#define	BRDY2		0x0004	/* b2: PIPE2 */
-#define	BRDY1		0x0002	/* b1: PIPE1 */
-#define	BRDY0		0x0001	/* b1: PIPE0 */
-
-/* NRDY Interrupt Enable/Status Register */
-#define	NRDY9		0x0200	/* b9: PIPE9 */
-#define	NRDY8		0x0100	/* b8: PIPE8 */
-#define	NRDY7		0x0080	/* b7: PIPE7 */
-#define	NRDY6		0x0040	/* b6: PIPE6 */
-#define	NRDY5		0x0020	/* b5: PIPE5 */
-#define	NRDY4		0x0010	/* b4: PIPE4 */
-#define	NRDY3		0x0008	/* b3: PIPE3 */
-#define	NRDY2		0x0004	/* b2: PIPE2 */
-#define	NRDY1		0x0002	/* b1: PIPE1 */
-#define	NRDY0		0x0001	/* b1: PIPE0 */
-
-/* BEMP Interrupt Enable/Status Register */
-#define	BEMP9		0x0200	/* b9: PIPE9 */
-#define	BEMP8		0x0100	/* b8: PIPE8 */
-#define	BEMP7		0x0080	/* b7: PIPE7 */
-#define	BEMP6		0x0040	/* b6: PIPE6 */
-#define	BEMP5		0x0020	/* b5: PIPE5 */
-#define	BEMP4		0x0010	/* b4: PIPE4 */
-#define	BEMP3		0x0008	/* b3: PIPE3 */
-#define	BEMP2		0x0004	/* b2: PIPE2 */
-#define	BEMP1		0x0002	/* b1: PIPE1 */
-#define	BEMP0		0x0001	/* b0: PIPE0 */
-
-/* SOF Pin Configuration Register */
-#define	TRNENSEL	0x0100	/* b8: Select transaction enable period */
-#define	BRDYM		0x0040	/* b6: BRDY clear timing */
-#define	INTL		0x0020	/* b5: Interrupt sense select */
-#define	EDGESTS		0x0010	/* b4:  */
-#define	SOFMODE		0x000C	/* b3-2: SOF pin select */
-#define	  SOF_125US	 0x0008	  /* SOF OUT 125us Frame Signal */
-#define	  SOF_1MS	 0x0004	  /* SOF OUT 1ms Frame Signal */
-#define	  SOF_DISABLE	 0x0000	  /* SOF OUT Disable */
-
-/* Interrupt Status Register 0 */
-#define	VBINT	0x8000	/* b15: VBUS interrupt */
-#define	RESM	0x4000	/* b14: Resume interrupt */
-#define	SOFR	0x2000	/* b13: SOF frame update interrupt */
-#define	DVST	0x1000	/* b12: Device state transition interrupt */
-#define	CTRT	0x0800	/* b11: Control transfer stage transition interrupt */
-#define	BEMP	0x0400	/* b10: Buffer empty interrupt */
-#define	NRDY	0x0200	/* b9: Buffer not ready interrupt */
-#define	BRDY	0x0100	/* b8: Buffer ready interrupt */
-#define	VBSTS	0x0080	/* b7: VBUS input port */
-#define	DVSQ	0x0070	/* b6-4: Device state */
-#define	  DS_SPD_CNFG	 0x0070	  /* Suspend Configured */
-#define	  DS_SPD_ADDR	 0x0060	  /* Suspend Address */
-#define	  DS_SPD_DFLT	 0x0050	  /* Suspend Default */
-#define	  DS_SPD_POWR	 0x0040	  /* Suspend Powered */
-#define	  DS_SUSP	 0x0040	  /* Suspend */
-#define	  DS_CNFG	 0x0030	  /* Configured */
-#define	  DS_ADDS	 0x0020	  /* Address */
-#define	  DS_DFLT	 0x0010	  /* Default */
-#define	  DS_POWR	 0x0000	  /* Powered */
-#define	DVSQS		0x0030	/* b5-4: Device state */
-#define	VALID		0x0008	/* b3: Setup packet detected flag */
-#define	CTSQ		0x0007	/* b2-0: Control transfer stage */
-#define	  CS_SQER	 0x0006	  /* Sequence error */
-#define	  CS_WRND	 0x0005	  /* Control write nodata status stage */
-#define	  CS_WRSS	 0x0004	  /* Control write status stage */
-#define	  CS_WRDS	 0x0003	  /* Control write data stage */
-#define	  CS_RDSS	 0x0002	  /* Control read status stage */
-#define	  CS_RDDS	 0x0001	  /* Control read data stage */
-#define	  CS_IDST	 0x0000	  /* Idle or setup stage */
-
-/* Interrupt Status Register 1 */
-#define	OVRCR		0x8000	/* b15: Over-current interrupt */
-#define	BCHG		0x4000	/* b14: USB bus chenge interrupt */
-#define	DTCH		0x1000	/* b12: Detach sense interrupt */
-#define	ATTCH		0x0800	/* b11: Attach sense interrupt */
-#define	EOFERR		0x0040	/* b6: EOF-error interrupt */
-#define	SIGN		0x0020	/* b5: Setup ignore interrupt */
-#define	SACK		0x0010	/* b4: Setup acknowledge interrupt */
-
-/* Frame Number Register */
-#define	OVRN		0x8000	/* b15: Overrun error */
-#define	CRCE		0x4000	/* b14: Received data error */
-#define	FRNM		0x07FF	/* b10-0: Frame number */
-
-/* Micro Frame Number Register */
-#define	UFRNM		0x0007	/* b2-0: Micro frame number */
-
-/* Default Control Pipe Maxpacket Size Register */
-/* Pipe Maxpacket Size Register */
-#define	DEVSEL	0xF000	/* b15-14: Device address select */
-#define	MAXP	0x007F	/* b6-0: Maxpacket size of default control pipe */
-
-/* Default Control Pipe Control Register */
-#define	BSTS		0x8000	/* b15: Buffer status */
-#define	SUREQ		0x4000	/* b14: Send USB request  */
-#define	CSCLR		0x2000	/* b13: complete-split status clear */
-#define	CSSTS		0x1000	/* b12: complete-split status */
-#define	SUREQCLR	0x0800	/* b11: stop setup request */
-#define	SQCLR		0x0100	/* b8: Sequence toggle bit clear */
-#define	SQSET		0x0080	/* b7: Sequence toggle bit set */
-#define	SQMON		0x0040	/* b6: Sequence toggle bit monitor */
-#define	PBUSY		0x0020	/* b5: pipe busy */
-#define	PINGE		0x0010	/* b4: ping enable */
-#define	CCPL		0x0004	/* b2: Enable control transfer complete */
-#define	PID		0x0003	/* b1-0: Response PID */
-#define	  PID_STALL11	 0x0003	  /* STALL */
-#define	  PID_STALL	 0x0002	  /* STALL */
-#define	  PID_BUF	 0x0001	  /* BUF */
-#define	  PID_NAK	 0x0000	  /* NAK */
-
-/* Pipe Window Select Register */
-#define	PIPENM		0x0007	/* b2-0: Pipe select */
-
-/* Pipe Configuration Register */
-#define	R8A66597_TYP	0xC000	/* b15-14: Transfer type */
-#define	  R8A66597_ISO	 0xC000		  /* Isochronous */
-#define	  R8A66597_INT	 0x8000		  /* Interrupt */
-#define	  R8A66597_BULK	 0x4000		  /* Bulk */
-#define	R8A66597_BFRE	0x0400	/* b10: Buffer ready interrupt mode select */
-#define	R8A66597_DBLB	0x0200	/* b9: Double buffer mode select */
-#define	R8A66597_CNTMD	0x0100	/* b8: Continuous transfer mode select */
-#define	R8A66597_SHTNAK	0x0080	/* b7: Transfer end NAK */
-#define	R8A66597_DIR	0x0010	/* b4: Transfer direction select */
-#define	R8A66597_EPNUM	0x000F	/* b3-0: Eendpoint number select */
-
-/* Pipe Buffer Configuration Register */
-#define	BUFSIZE		0x7C00	/* b14-10: Pipe buffer size */
-#define	BUFNMB		0x007F	/* b6-0: Pipe buffer number */
-#define	PIPE0BUF	256
-#define	PIPExBUF	64
-
-/* Pipe Maxpacket Size Register */
-#define	MXPS		0x07FF	/* b10-0: Maxpacket size */
-
-/* Pipe Cycle Configuration Register */
-#define	IFIS	0x1000	/* b12: Isochronous in-buffer flush mode select */
-#define	IITV	0x0007	/* b2-0: Isochronous interval */
-
-/* Pipex Control Register */
-#define	BSTS	0x8000	/* b15: Buffer status */
-#define	INBUFM	0x4000	/* b14: IN buffer monitor (Only for PIPE1 to 5) */
-#define	CSCLR	0x2000	/* b13: complete-split status clear */
-#define	CSSTS	0x1000	/* b12: complete-split status */
-#define	ATREPM	0x0400	/* b10: Auto repeat mode */
-#define	ACLRM	0x0200	/* b9: Out buffer auto clear mode */
-#define	SQCLR	0x0100	/* b8: Sequence toggle bit clear */
-#define	SQSET	0x0080	/* b7: Sequence toggle bit set */
-#define	SQMON	0x0040	/* b6: Sequence toggle bit monitor */
-#define	PBUSY	0x0020	/* b5: pipe busy */
-#define	PID	0x0003	/* b1-0: Response PID */
-
-/* PIPExTRE */
-#define	TRENB		0x0200	/* b9: Transaction counter enable */
-#define	TRCLR		0x0100	/* b8: Transaction counter clear */
-
-/* PIPExTRN */
-#define	TRNCNT		0xFFFF	/* b15-0: Transaction counter */
-
-/* DEVADDx */
-#define	UPPHUB		0x7800
-#define	HUBPORT		0x0700
-#define	USBSPD		0x00C0
-#define	RTPORT		0x0001
-
 #define R8A66597_MAX_NUM_PIPE		10
 #define R8A66597_BUF_BSIZE		8
 #define R8A66597_MAX_DEVICE		10
diff --git a/include/linux/usb/r8a66597.h b/include/linux/usb/r8a66597.h
index 460ee3f6a2c6..26d216734057 100644
--- a/include/linux/usb/r8a66597.h
+++ b/include/linux/usb/r8a66597.h
@@ -28,7 +28,7 @@
 #define R8A66597_PLATDATA_XTAL_48MHZ	0x03
 
 struct r8a66597_platdata {
-	/* This ops can controll port power instead of DVSTCTR register. */
+	/* This callback can control port power instead of DVSTCTR register. */
 	void (*port_power)(int port, int power);
 
 	/* set one = on chip controller, set zero = external controller */
@@ -43,5 +43,373 @@ struct r8a66597_platdata {
 	/* set one = big endian, set zero = little endian */
 	unsigned	endian:1;
 };
-#endif
+
+/* Register definitions */
+#define SYSCFG0		0x00
+#define SYSCFG1		0x02
+#define SYSSTS0		0x04
+#define SYSSTS1		0x06
+#define DVSTCTR0	0x08
+#define DVSTCTR1	0x0A
+#define TESTMODE	0x0C
+#define PINCFG		0x0E
+#define DMA0CFG		0x10
+#define DMA1CFG		0x12
+#define CFIFO		0x14
+#define D0FIFO		0x18
+#define D1FIFO		0x1C
+#define CFIFOSEL	0x20
+#define CFIFOCTR	0x22
+#define CFIFOSIE	0x24
+#define D0FIFOSEL	0x28
+#define D0FIFOCTR	0x2A
+#define D1FIFOSEL	0x2C
+#define D1FIFOCTR	0x2E
+#define INTENB0		0x30
+#define INTENB1		0x32
+#define INTENB2		0x34
+#define BRDYENB		0x36
+#define NRDYENB		0x38
+#define BEMPENB		0x3A
+#define SOFCFG		0x3C
+#define INTSTS0		0x40
+#define INTSTS1		0x42
+#define INTSTS2		0x44
+#define BRDYSTS		0x46
+#define NRDYSTS		0x48
+#define BEMPSTS		0x4A
+#define FRMNUM		0x4C
+#define UFRMNUM		0x4E
+#define USBADDR		0x50
+#define USBREQ		0x54
+#define USBVAL		0x56
+#define USBINDX		0x58
+#define USBLENG		0x5A
+#define DCPCFG		0x5C
+#define DCPMAXP		0x5E
+#define DCPCTR		0x60
+#define PIPESEL		0x64
+#define PIPECFG		0x68
+#define PIPEBUF		0x6A
+#define PIPEMAXP	0x6C
+#define PIPEPERI	0x6E
+#define PIPE1CTR	0x70
+#define PIPE2CTR	0x72
+#define PIPE3CTR	0x74
+#define PIPE4CTR	0x76
+#define PIPE5CTR	0x78
+#define PIPE6CTR	0x7A
+#define PIPE7CTR	0x7C
+#define PIPE8CTR	0x7E
+#define PIPE9CTR	0x80
+#define PIPE1TRE	0x90
+#define PIPE1TRN	0x92
+#define PIPE2TRE	0x94
+#define PIPE2TRN	0x96
+#define PIPE3TRE	0x98
+#define PIPE3TRN	0x9A
+#define PIPE4TRE	0x9C
+#define	PIPE4TRN	0x9E
+#define	PIPE5TRE	0xA0
+#define	PIPE5TRN	0xA2
+#define DEVADD0		0xD0
+#define DEVADD1		0xD2
+#define DEVADD2		0xD4
+#define DEVADD3		0xD6
+#define DEVADD4		0xD8
+#define DEVADD5		0xDA
+#define DEVADD6		0xDC
+#define DEVADD7		0xDE
+#define DEVADD8		0xE0
+#define DEVADD9		0xE2
+#define DEVADDA		0xE4
+
+/* System Configuration Control Register */
+#define	XTAL		0xC000	/* b15-14: Crystal selection */
+#define	  XTAL48	 0x8000	  /* 48MHz */
+#define	  XTAL24	 0x4000	  /* 24MHz */
+#define	  XTAL12	 0x0000	  /* 12MHz */
+#define	XCKE		0x2000	/* b13: External clock enable */
+#define	PLLC		0x0800	/* b11: PLL control */
+#define	SCKE		0x0400	/* b10: USB clock enable */
+#define	PCSDIS		0x0200	/* b9: not CS wakeup */
+#define	LPSME		0x0100	/* b8: Low power sleep mode */
+#define	HSE		0x0080	/* b7: Hi-speed enable */
+#define	DCFM		0x0040	/* b6: Controller function select  */
+#define	DRPD		0x0020	/* b5: D+/- pull down control */
+#define	DPRPU		0x0010	/* b4: D+ pull up control */
+#define	USBE		0x0001	/* b0: USB module operation enable */
+
+/* System Configuration Status Register */
+#define	OVCBIT		0x8000	/* b15-14: Over-current bit */
+#define	OVCMON		0xC000	/* b15-14: Over-current monitor */
+#define	SOFEA		0x0020	/* b5: SOF monitor */
+#define	IDMON		0x0004	/* b3: ID-pin monitor */
+#define	LNST		0x0003	/* b1-0: D+, D- line status */
+#define	  SE1		 0x0003	  /* SE1 */
+#define	  FS_KSTS	 0x0002	  /* Full-Speed K State */
+#define	  FS_JSTS	 0x0001	  /* Full-Speed J State */
+#define	  LS_JSTS	 0x0002	  /* Low-Speed J State */
+#define	  LS_KSTS	 0x0001	  /* Low-Speed K State */
+#define	  SE0		 0x0000	  /* SE0 */
+
+/* Device State Control Register */
+#define	EXTLP0		0x0400	/* b10: External port */
+#define	VBOUT		0x0200	/* b9: VBUS output */
+#define	WKUP		0x0100	/* b8: Remote wakeup */
+#define	RWUPE		0x0080	/* b7: Remote wakeup sense */
+#define	USBRST		0x0040	/* b6: USB reset enable */
+#define	RESUME		0x0020	/* b5: Resume enable */
+#define	UACT		0x0010	/* b4: USB bus enable */
+#define	RHST		0x0007	/* b1-0: Reset handshake status */
+#define	  HSPROC	 0x0004	  /* HS handshake is processing */
+#define	  HSMODE	 0x0003	  /* Hi-Speed mode */
+#define	  FSMODE	 0x0002	  /* Full-Speed mode */
+#define	  LSMODE	 0x0001	  /* Low-Speed mode */
+#define	  UNDECID	 0x0000	  /* Undecided */
+
+/* Test Mode Register */
+#define	UTST			0x000F	/* b3-0: Test select */
+#define	  H_TST_PACKET		 0x000C	  /* HOST TEST Packet */
+#define	  H_TST_SE0_NAK		 0x000B	  /* HOST TEST SE0 NAK */
+#define	  H_TST_K		 0x000A	  /* HOST TEST K */
+#define	  H_TST_J		 0x0009	  /* HOST TEST J */
+#define	  H_TST_NORMAL		 0x0000	  /* HOST Normal Mode */
+#define	  P_TST_PACKET		 0x0004	  /* PERI TEST Packet */
+#define	  P_TST_SE0_NAK		 0x0003	  /* PERI TEST SE0 NAK */
+#define	  P_TST_K		 0x0002	  /* PERI TEST K */
+#define	  P_TST_J		 0x0001	  /* PERI TEST J */
+#define	  P_TST_NORMAL		 0x0000	  /* PERI Normal Mode */
+
+/* Data Pin Configuration Register */
+#define	LDRV			0x8000	/* b15: Drive Current Adjust */
+#define	  VIF1			  0x0000		/* VIF = 1.8V */
+#define	  VIF3			  0x8000		/* VIF = 3.3V */
+#define	INTA			0x0001	/* b1: USB INT-pin active */
+
+/* DMAx Pin Configuration Register */
+#define	DREQA			0x4000	/* b14: Dreq active select */
+#define	BURST			0x2000	/* b13: Burst mode */
+#define	DACKA			0x0400	/* b10: Dack active select */
+#define	DFORM			0x0380	/* b9-7: DMA mode select */
+#define	  CPU_ADR_RD_WR		 0x0000	  /* Address + RD/WR mode (CPU bus) */
+#define	  CPU_DACK_RD_WR	 0x0100	  /* DACK + RD/WR mode (CPU bus) */
+#define	  CPU_DACK_ONLY		 0x0180	  /* DACK only mode (CPU bus) */
+#define	  SPLIT_DACK_ONLY	 0x0200	  /* DACK only mode (SPLIT bus) */
+#define	DENDA			0x0040	/* b6: Dend active select */
+#define	PKTM			0x0020	/* b5: Packet mode */
+#define	DENDE			0x0010	/* b4: Dend enable */
+#define	OBUS			0x0004	/* b2: OUTbus mode */
+
+/* CFIFO/DxFIFO Port Select Register */
+#define	RCNT		0x8000	/* b15: Read count mode */
+#define	REW		0x4000	/* b14: Buffer rewind */
+#define	DCLRM		0x2000	/* b13: DMA buffer clear mode */
+#define	DREQE		0x1000	/* b12: DREQ output enable */
+#define	  MBW_8		 0x0000	  /*  8bit */
+#define	  MBW_16	 0x0400	  /* 16bit */
+#define	  MBW_32	 0x0800   /* 32bit */
+#define	BIGEND		0x0100	/* b8: Big endian mode */
+#define	  BYTE_LITTLE	 0x0000		/* little dendian */
+#define	  BYTE_BIG	 0x0100		/* big endifan */
+#define	ISEL		0x0020	/* b5: DCP FIFO port direction select */
+#define	CURPIPE		0x000F	/* b2-0: PIPE select */
+
+/* CFIFO/DxFIFO Port Control Register */
+#define	BVAL		0x8000	/* b15: Buffer valid flag */
+#define	BCLR		0x4000	/* b14: Buffer clear */
+#define	FRDY		0x2000	/* b13: FIFO ready */
+#define	DTLN		0x0FFF	/* b11-0: FIFO received data length */
+
+/* Interrupt Enable Register 0 */
+#define	VBSE	0x8000	/* b15: VBUS interrupt */
+#define	RSME	0x4000	/* b14: Resume interrupt */
+#define	SOFE	0x2000	/* b13: Frame update interrupt */
+#define	DVSE	0x1000	/* b12: Device state transition interrupt */
+#define	CTRE	0x0800	/* b11: Control transfer stage transition interrupt */
+#define	BEMPE	0x0400	/* b10: Buffer empty interrupt */
+#define	NRDYE	0x0200	/* b9: Buffer not ready interrupt */
+#define	BRDYE	0x0100	/* b8: Buffer ready interrupt */
+
+/* Interrupt Enable Register 1 */
+#define	OVRCRE		0x8000	/* b15: Over-current interrupt */
+#define	BCHGE		0x4000	/* b14: USB us chenge interrupt */
+#define	DTCHE		0x1000	/* b12: Detach sense interrupt */
+#define	ATTCHE		0x0800	/* b11: Attach sense interrupt */
+#define	EOFERRE		0x0040	/* b6: EOF error interrupt */
+#define	SIGNE		0x0020	/* b5: SETUP IGNORE interrupt */
+#define	SACKE		0x0010	/* b4: SETUP ACK interrupt */
+
+/* BRDY Interrupt Enable/Status Register */
+#define	BRDY9		0x0200	/* b9: PIPE9 */
+#define	BRDY8		0x0100	/* b8: PIPE8 */
+#define	BRDY7		0x0080	/* b7: PIPE7 */
+#define	BRDY6		0x0040	/* b6: PIPE6 */
+#define	BRDY5		0x0020	/* b5: PIPE5 */
+#define	BRDY4		0x0010	/* b4: PIPE4 */
+#define	BRDY3		0x0008	/* b3: PIPE3 */
+#define	BRDY2		0x0004	/* b2: PIPE2 */
+#define	BRDY1		0x0002	/* b1: PIPE1 */
+#define	BRDY0		0x0001	/* b1: PIPE0 */
+
+/* NRDY Interrupt Enable/Status Register */
+#define	NRDY9		0x0200	/* b9: PIPE9 */
+#define	NRDY8		0x0100	/* b8: PIPE8 */
+#define	NRDY7		0x0080	/* b7: PIPE7 */
+#define	NRDY6		0x0040	/* b6: PIPE6 */
+#define	NRDY5		0x0020	/* b5: PIPE5 */
+#define	NRDY4		0x0010	/* b4: PIPE4 */
+#define	NRDY3		0x0008	/* b3: PIPE3 */
+#define	NRDY2		0x0004	/* b2: PIPE2 */
+#define	NRDY1		0x0002	/* b1: PIPE1 */
+#define	NRDY0		0x0001	/* b1: PIPE0 */
+
+/* BEMP Interrupt Enable/Status Register */
+#define	BEMP9		0x0200	/* b9: PIPE9 */
+#define	BEMP8		0x0100	/* b8: PIPE8 */
+#define	BEMP7		0x0080	/* b7: PIPE7 */
+#define	BEMP6		0x0040	/* b6: PIPE6 */
+#define	BEMP5		0x0020	/* b5: PIPE5 */
+#define	BEMP4		0x0010	/* b4: PIPE4 */
+#define	BEMP3		0x0008	/* b3: PIPE3 */
+#define	BEMP2		0x0004	/* b2: PIPE2 */
+#define	BEMP1		0x0002	/* b1: PIPE1 */
+#define	BEMP0		0x0001	/* b0: PIPE0 */
+
+/* SOF Pin Configuration Register */
+#define	TRNENSEL	0x0100	/* b8: Select transaction enable period */
+#define	BRDYM		0x0040	/* b6: BRDY clear timing */
+#define	INTL		0x0020	/* b5: Interrupt sense select */
+#define	EDGESTS		0x0010	/* b4:  */
+#define	SOFMODE		0x000C	/* b3-2: SOF pin select */
+#define	  SOF_125US	 0x0008	  /* SOF OUT 125us Frame Signal */
+#define	  SOF_1MS	 0x0004	  /* SOF OUT 1ms Frame Signal */
+#define	  SOF_DISABLE	 0x0000	  /* SOF OUT Disable */
+
+/* Interrupt Status Register 0 */
+#define	VBINT	0x8000	/* b15: VBUS interrupt */
+#define	RESM	0x4000	/* b14: Resume interrupt */
+#define	SOFR	0x2000	/* b13: SOF frame update interrupt */
+#define	DVST	0x1000	/* b12: Device state transition interrupt */
+#define	CTRT	0x0800	/* b11: Control transfer stage transition interrupt */
+#define	BEMP	0x0400	/* b10: Buffer empty interrupt */
+#define	NRDY	0x0200	/* b9: Buffer not ready interrupt */
+#define	BRDY	0x0100	/* b8: Buffer ready interrupt */
+#define	VBSTS	0x0080	/* b7: VBUS input port */
+#define	DVSQ	0x0070	/* b6-4: Device state */
+#define	  DS_SPD_CNFG	 0x0070	  /* Suspend Configured */
+#define	  DS_SPD_ADDR	 0x0060	  /* Suspend Address */
+#define	  DS_SPD_DFLT	 0x0050	  /* Suspend Default */
+#define	  DS_SPD_POWR	 0x0040	  /* Suspend Powered */
+#define	  DS_SUSP	 0x0040	  /* Suspend */
+#define	  DS_CNFG	 0x0030	  /* Configured */
+#define	  DS_ADDS	 0x0020	  /* Address */
+#define	  DS_DFLT	 0x0010	  /* Default */
+#define	  DS_POWR	 0x0000	  /* Powered */
+#define	DVSQS		0x0030	/* b5-4: Device state */
+#define	VALID		0x0008	/* b3: Setup packet detected flag */
+#define	CTSQ		0x0007	/* b2-0: Control transfer stage */
+#define	  CS_SQER	 0x0006	  /* Sequence error */
+#define	  CS_WRND	 0x0005	  /* Control write nodata status stage */
+#define	  CS_WRSS	 0x0004	  /* Control write status stage */
+#define	  CS_WRDS	 0x0003	  /* Control write data stage */
+#define	  CS_RDSS	 0x0002	  /* Control read status stage */
+#define	  CS_RDDS	 0x0001	  /* Control read data stage */
+#define	  CS_IDST	 0x0000	  /* Idle or setup stage */
+
+/* Interrupt Status Register 1 */
+#define	OVRCR		0x8000	/* b15: Over-current interrupt */
+#define	BCHG		0x4000	/* b14: USB bus chenge interrupt */
+#define	DTCH		0x1000	/* b12: Detach sense interrupt */
+#define	ATTCH		0x0800	/* b11: Attach sense interrupt */
+#define	EOFERR		0x0040	/* b6: EOF-error interrupt */
+#define	SIGN		0x0020	/* b5: Setup ignore interrupt */
+#define	SACK		0x0010	/* b4: Setup acknowledge interrupt */
+
+/* Frame Number Register */
+#define	OVRN		0x8000	/* b15: Overrun error */
+#define	CRCE		0x4000	/* b14: Received data error */
+#define	FRNM		0x07FF	/* b10-0: Frame number */
+
+/* Micro Frame Number Register */
+#define	UFRNM		0x0007	/* b2-0: Micro frame number */
+
+/* Default Control Pipe Maxpacket Size Register */
+/* Pipe Maxpacket Size Register */
+#define	DEVSEL	0xF000	/* b15-14: Device address select */
+#define	MAXP	0x007F	/* b6-0: Maxpacket size of default control pipe */
+
+/* Default Control Pipe Control Register */
+#define	BSTS		0x8000	/* b15: Buffer status */
+#define	SUREQ		0x4000	/* b14: Send USB request  */
+#define	CSCLR		0x2000	/* b13: complete-split status clear */
+#define	CSSTS		0x1000	/* b12: complete-split status */
+#define	SUREQCLR	0x0800	/* b11: stop setup request */
+#define	SQCLR		0x0100	/* b8: Sequence toggle bit clear */
+#define	SQSET		0x0080	/* b7: Sequence toggle bit set */
+#define	SQMON		0x0040	/* b6: Sequence toggle bit monitor */
+#define	PBUSY		0x0020	/* b5: pipe busy */
+#define	PINGE		0x0010	/* b4: ping enable */
+#define	CCPL		0x0004	/* b2: Enable control transfer complete */
+#define	PID		0x0003	/* b1-0: Response PID */
+#define	  PID_STALL11	 0x0003	  /* STALL */
+#define	  PID_STALL	 0x0002	  /* STALL */
+#define	  PID_BUF	 0x0001	  /* BUF */
+#define	  PID_NAK	 0x0000	  /* NAK */
+
+/* Pipe Window Select Register */
+#define	PIPENM		0x0007	/* b2-0: Pipe select */
+
+/* Pipe Configuration Register */
+#define	R8A66597_TYP	0xC000	/* b15-14: Transfer type */
+#define	  R8A66597_ISO	 0xC000		  /* Isochronous */
+#define	  R8A66597_INT	 0x8000		  /* Interrupt */
+#define	  R8A66597_BULK	 0x4000		  /* Bulk */
+#define	R8A66597_BFRE	0x0400	/* b10: Buffer ready interrupt mode select */
+#define	R8A66597_DBLB	0x0200	/* b9: Double buffer mode select */
+#define	R8A66597_CNTMD	0x0100	/* b8: Continuous transfer mode select */
+#define	R8A66597_SHTNAK	0x0080	/* b7: Transfer end NAK */
+#define	R8A66597_DIR	0x0010	/* b4: Transfer direction select */
+#define	R8A66597_EPNUM	0x000F	/* b3-0: Eendpoint number select */
+
+/* Pipe Buffer Configuration Register */
+#define	BUFSIZE		0x7C00	/* b14-10: Pipe buffer size */
+#define	BUFNMB		0x007F	/* b6-0: Pipe buffer number */
+#define	PIPE0BUF	256
+#define	PIPExBUF	64
+
+/* Pipe Maxpacket Size Register */
+#define	MXPS		0x07FF	/* b10-0: Maxpacket size */
+
+/* Pipe Cycle Configuration Register */
+#define	IFIS	0x1000	/* b12: Isochronous in-buffer flush mode select */
+#define	IITV	0x0007	/* b2-0: Isochronous interval */
+
+/* Pipex Control Register */
+#define	BSTS	0x8000	/* b15: Buffer status */
+#define	INBUFM	0x4000	/* b14: IN buffer monitor (Only for PIPE1 to 5) */
+#define	CSCLR	0x2000	/* b13: complete-split status clear */
+#define	CSSTS	0x1000	/* b12: complete-split status */
+#define	ATREPM	0x0400	/* b10: Auto repeat mode */
+#define	ACLRM	0x0200	/* b9: Out buffer auto clear mode */
+#define	SQCLR	0x0100	/* b8: Sequence toggle bit clear */
+#define	SQSET	0x0080	/* b7: Sequence toggle bit set */
+#define	SQMON	0x0040	/* b6: Sequence toggle bit monitor */
+#define	PBUSY	0x0020	/* b5: pipe busy */
+#define	PID	0x0003	/* b1-0: Response PID */
+
+/* PIPExTRE */
+#define	TRENB		0x0200	/* b9: Transaction counter enable */
+#define	TRCLR		0x0100	/* b8: Transaction counter clear */
+
+/* PIPExTRN */
+#define	TRNCNT		0xFFFF	/* b15-0: Transaction counter */
+
+/* DEVADDx */
+#define	UPPHUB		0x7800
+#define	HUBPORT		0x0700
+#define	USBSPD		0x00C0
+#define	RTPORT		0x0001
+
+#endif /* __LINUX_USB_R8A66597_H */
 
-- 
cgit v1.2.3


From 2c59b0b70b9d5d61c726f179724660c4c2423f31 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 22 Jul 2009 14:41:35 +0000
Subject: usb: m66592-udc platform data on_chip support

Convert the m66592-udc driver to use the on_chip flag
from platform data to enable on chip behaviour instead
of relying on CONFIG_SUPERH_BUILT_IN_M66592 ugliness.

This makes the code cleaner and also allows us to support
both external and internal m66592 with the same kernel.

It also makes the Kconfig part more future proof since
we with this patch can add support for new processors
with on-chip m66592 without modifying the Kconfig.

The patch adds a m66592 header file for platform data
and ties in platform data to the existing m66592 devices.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/mach-highlander/setup.c |   7 +
 arch/sh/boards/mach-x3proto/setup.c    |   7 +
 arch/sh/kernel/cpu/sh4a/setup-sh7722.c |   8 +-
 drivers/usb/gadget/Kconfig             |  10 --
 drivers/usb/gadget/m66592-udc.c        | 252 +++++++++++++++++++--------------
 drivers/usb/gadget/m66592-udc.h        |  89 ++++++------
 include/linux/usb/m66592.h             |  44 ++++++
 7 files changed, 257 insertions(+), 160 deletions(-)
 create mode 100644 include/linux/usb/m66592.h

(limited to 'include')

diff --git a/arch/sh/boards/mach-highlander/setup.c b/arch/sh/boards/mach-highlander/setup.c
index 1639f8915000..566e69d8d729 100644
--- a/arch/sh/boards/mach-highlander/setup.c
+++ b/arch/sh/boards/mach-highlander/setup.c
@@ -22,6 +22,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/usb/r8a66597.h>
+#include <linux/usb/m66592.h>
 #include <net/ax88796.h>
 #include <asm/machvec.h>
 #include <mach/highlander.h>
@@ -60,6 +61,11 @@ static struct platform_device r8a66597_usb_host_device = {
 	.resource	= r8a66597_usb_host_resources,
 };
 
+static struct m66592_platdata usbf_platdata = {
+	.xtal = M66592_PLATDATA_XTAL_24MHZ,
+	.vif = 1,
+};
+
 static struct resource m66592_usb_peripheral_resources[] = {
 	[0] = {
 		.name	= "m66592_udc",
@@ -81,6 +87,7 @@ static struct platform_device m66592_usb_peripheral_device = {
 	.dev = {
 		.dma_mask		= NULL,		/* don't use dma */
 		.coherent_dma_mask	= 0xffffffff,
+		.platform_data		= &usbf_platdata,
 	},
 	.num_resources	= ARRAY_SIZE(m66592_usb_peripheral_resources),
 	.resource	= m66592_usb_peripheral_resources,
diff --git a/arch/sh/boards/mach-x3proto/setup.c b/arch/sh/boards/mach-x3proto/setup.c
index 8913ae39a802..efe4cb9f8a77 100644
--- a/arch/sh/boards/mach-x3proto/setup.c
+++ b/arch/sh/boards/mach-x3proto/setup.c
@@ -17,6 +17,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/usb/r8a66597.h>
+#include <linux/usb/m66592.h>
 #include <asm/ilsel.h>
 
 static struct resource heartbeat_resources[] = {
@@ -89,6 +90,11 @@ static struct platform_device r8a66597_usb_host_device = {
 	.resource	= r8a66597_usb_host_resources,
 };
 
+static struct m66592_platdata usbf_platdata = {
+	.xtal = M66592_PLATDATA_XTAL_24MHZ,
+	.vif = 1,
+};
+
 static struct resource m66592_usb_peripheral_resources[] = {
 	[0] = {
 		.name	= "m66592_udc",
@@ -109,6 +115,7 @@ static struct platform_device m66592_usb_peripheral_device = {
 	.dev = {
 		.dma_mask		= NULL,		/* don't use dma */
 		.coherent_dma_mask	= 0xffffffff,
+		.platform_data		= &usbf_platdata,
 	},
 	.num_resources	= ARRAY_SIZE(m66592_usb_peripheral_resources),
 	.resource	= m66592_usb_peripheral_resources,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index ea524a2da3e4..0bad14a44238 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -13,6 +13,7 @@
 #include <linux/serial_sci.h>
 #include <linux/mm.h>
 #include <linux/uio_driver.h>
+#include <linux/usb/m66592.h>
 #include <linux/sh_timer.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
@@ -47,9 +48,13 @@ static struct platform_device rtc_device = {
 	.resource	= rtc_resources,
 };
 
+static struct m66592_platdata usbf_platdata = {
+	.on_chip = 1,
+};
+
 static struct resource usbf_resources[] = {
 	[0] = {
-		.name	= "m66592_udc",
+		.name	= "USBF",
 		.start	= 0x04480000,
 		.end	= 0x044800FF,
 		.flags	= IORESOURCE_MEM,
@@ -67,6 +72,7 @@ static struct platform_device usbf_device = {
 	.dev = {
 		.dma_mask		= NULL,
 		.coherent_dma_mask	= 0xffffffff,
+		.platform_data		= &usbf_platdata,
 	},
 	.num_resources	= ARRAY_SIZE(usbf_resources),
 	.resource	= usbf_resources,
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 7f8e83a954ac..b7f10bc25c2c 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -360,16 +360,6 @@ config USB_M66592
 	default USB_GADGET
 	select USB_GADGET_SELECTED
 
-config SUPERH_BUILT_IN_M66592
-	boolean "Enable SuperH built-in USB like the M66592"
-	depends on USB_GADGET_M66592 && CPU_SUBTYPE_SH7722
-	help
-	   SH7722 has USB like the M66592.
-
-	   The transfer rate is very slow when use "Ethernet Gadget".
-	   However, this problem is improved if change a value of
-	   NET_IP_ALIGN to 4.
-
 #
 # Controllers available only in discrete form (and all PCI controllers)
 #
diff --git a/drivers/usb/gadget/m66592-udc.c b/drivers/usb/gadget/m66592-udc.c
index 0dddd2f8ff35..a61c70caff12 100644
--- a/drivers/usb/gadget/m66592-udc.c
+++ b/drivers/usb/gadget/m66592-udc.c
@@ -31,38 +31,12 @@
 
 #include "m66592-udc.h"
 
-
 MODULE_DESCRIPTION("M66592 USB gadget driver");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Yoshihiro Shimoda");
 MODULE_ALIAS("platform:m66592_udc");
 
-#define DRIVER_VERSION	"26 Jun 2009"
-
-/* module parameters */
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
-static unsigned short endian = M66592_LITTLE;
-module_param(endian, ushort, 0644);
-MODULE_PARM_DESC(endian, "data endian: big=0, little=0 (default=0)");
-#else
-static unsigned short clock = M66592_XTAL24;
-module_param(clock, ushort, 0644);
-MODULE_PARM_DESC(clock, "input clock: 48MHz=32768, 24MHz=16384, 12MHz=0 "
-		"(default=16384)");
-
-static unsigned short vif = M66592_LDRV;
-module_param(vif, ushort, 0644);
-MODULE_PARM_DESC(vif, "input VIF: 3.3V=32768, 1.5V=0 (default=32768)");
-
-static unsigned short endian;
-module_param(endian, ushort, 0644);
-MODULE_PARM_DESC(endian, "data endian: big=256, little=0 (default=0)");
-
-static unsigned short irq_sense = M66592_INTL;
-module_param(irq_sense, ushort, 0644);
-MODULE_PARM_DESC(irq_sense, "IRQ sense: low level=2, falling edge=0 "
-		"(default=2)");
-#endif
+#define DRIVER_VERSION	"21 July 2009"
 
 static const char udc_name[] = "m66592_udc";
 static const char *m66592_ep_name[] = {
@@ -244,6 +218,7 @@ static inline int get_buffer_size(struct m66592 *m66592, u16 pipenum)
 static inline void pipe_change(struct m66592 *m66592, u16 pipenum)
 {
 	struct m66592_ep *ep = m66592->pipenum2ep[pipenum];
+	unsigned short mbw;
 
 	if (ep->use_dma)
 		return;
@@ -252,7 +227,12 @@ static inline void pipe_change(struct m66592 *m66592, u16 pipenum)
 
 	ndelay(450);
 
-	m66592_bset(m66592, M66592_MBW, ep->fifosel);
+	if (m66592->pdata->on_chip)
+		mbw = M66592_MBW_32;
+	else
+		mbw = M66592_MBW_16;
+
+	m66592_bset(m66592, mbw, ep->fifosel);
 }
 
 static int pipe_buffer_setting(struct m66592 *m66592,
@@ -332,6 +312,7 @@ static void pipe_buffer_release(struct m66592 *m66592,
 static void pipe_initialize(struct m66592_ep *ep)
 {
 	struct m66592 *m66592 = ep->m66592;
+	unsigned short mbw;
 
 	m66592_mdfy(m66592, 0, M66592_CURPIPE, ep->fifosel);
 
@@ -343,7 +324,12 @@ static void pipe_initialize(struct m66592_ep *ep)
 
 		ndelay(450);
 
-		m66592_bset(m66592, M66592_MBW, ep->fifosel);
+		if (m66592->pdata->on_chip)
+			mbw = M66592_MBW_32;
+		else
+			mbw = M66592_MBW_16;
+
+		m66592_bset(m66592, mbw, ep->fifosel);
 	}
 }
 
@@ -359,15 +345,13 @@ static void m66592_ep_setting(struct m66592 *m66592, struct m66592_ep *ep,
 			ep->fifosel = M66592_D0FIFOSEL;
 			ep->fifoctr = M66592_D0FIFOCTR;
 			ep->fifotrn = M66592_D0FIFOTRN;
-#if !defined(CONFIG_SUPERH_BUILT_IN_M66592)
-		} else if (m66592->num_dma == 1) {
+		} else if (!m66592->pdata->on_chip && m66592->num_dma == 1) {
 			m66592->num_dma++;
 			ep->use_dma = 1;
 			ep->fifoaddr = M66592_D1FIFO;
 			ep->fifosel = M66592_D1FIFOSEL;
 			ep->fifoctr = M66592_D1FIFOCTR;
 			ep->fifotrn = M66592_D1FIFOTRN;
-#endif
 		} else {
 			ep->use_dma = 0;
 			ep->fifoaddr = M66592_CFIFO;
@@ -612,76 +596,120 @@ static void start_ep0(struct m66592_ep *ep, struct m66592_request *req)
 	}
 }
 
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
 static void init_controller(struct m66592 *m66592)
 {
-	m66592_bset(m66592, M66592_HSE, M66592_SYSCFG);		/* High spd */
-	m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG);
-	m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG);
-	m66592_bset(m66592, M66592_USBE, M66592_SYSCFG);
+	unsigned int endian;
 
-	/* This is a workaound for SH7722 2nd cut */
-	m66592_bset(m66592, 0x8000, M66592_DVSTCTR);
-	m66592_bset(m66592, 0x1000, M66592_TESTMODE);
-	m66592_bclr(m66592, 0x8000, M66592_DVSTCTR);
+	if (m66592->pdata->on_chip) {
+		if (m66592->pdata->endian)
+			endian = 0; /* big endian */
+		else
+			endian = M66592_LITTLE; /* little endian */
 
-	m66592_bset(m66592, M66592_INTL, M66592_INTENB1);
+		m66592_bset(m66592, M66592_HSE, M66592_SYSCFG);	/* High spd */
+		m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG);
+		m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG);
+		m66592_bset(m66592, M66592_USBE, M66592_SYSCFG);
 
-	m66592_write(m66592, 0, M66592_CFBCFG);
-	m66592_write(m66592, 0, M66592_D0FBCFG);
-	m66592_bset(m66592, endian, M66592_CFBCFG);
-	m66592_bset(m66592, endian, M66592_D0FBCFG);
-}
-#else	/* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */
-static void init_controller(struct m66592 *m66592)
-{
-	m66592_bset(m66592, (vif & M66592_LDRV) | (endian & M66592_BIGEND),
-			M66592_PINCFG);
-	m66592_bset(m66592, M66592_HSE, M66592_SYSCFG);		/* High spd */
-	m66592_mdfy(m66592, clock & M66592_XTAL, M66592_XTAL, M66592_SYSCFG);
+		/* This is a workaound for SH7722 2nd cut */
+		m66592_bset(m66592, 0x8000, M66592_DVSTCTR);
+		m66592_bset(m66592, 0x1000, M66592_TESTMODE);
+		m66592_bclr(m66592, 0x8000, M66592_DVSTCTR);
 
-	m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG);
-	m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG);
-	m66592_bset(m66592, M66592_USBE, M66592_SYSCFG);
+		m66592_bset(m66592, M66592_INTL, M66592_INTENB1);
+
+		m66592_write(m66592, 0, M66592_CFBCFG);
+		m66592_write(m66592, 0, M66592_D0FBCFG);
+		m66592_bset(m66592, endian, M66592_CFBCFG);
+		m66592_bset(m66592, endian, M66592_D0FBCFG);
+	} else {
+		unsigned int clock, vif, irq_sense;
+
+		if (m66592->pdata->endian)
+			endian = M66592_BIGEND; /* big endian */
+		else
+			endian = 0; /* little endian */
+
+		if (m66592->pdata->vif)
+			vif = M66592_LDRV; /* 3.3v */
+		else
+			vif = 0; /* 1.5v */
+
+		switch (m66592->pdata->xtal) {
+		case M66592_PLATDATA_XTAL_12MHZ:
+			clock = M66592_XTAL12;
+			break;
+		case M66592_PLATDATA_XTAL_24MHZ:
+			clock = M66592_XTAL24;
+			break;
+		case M66592_PLATDATA_XTAL_48MHZ:
+			clock = M66592_XTAL48;
+			break;
+		default:
+			pr_warning("m66592-udc: xtal configuration error\n");
+			clock = 0;
+		}
 
-	m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG);
+		switch (m66592->irq_trigger) {
+		case IRQF_TRIGGER_LOW:
+			irq_sense = M66592_INTL;
+			break;
+		case IRQF_TRIGGER_FALLING:
+			irq_sense = 0;
+			break;
+		default:
+			pr_warning("m66592-udc: irq trigger config error\n");
+			irq_sense = 0;
+		}
 
-	msleep(3);
+		m66592_bset(m66592,
+			    (vif & M66592_LDRV) | (endian & M66592_BIGEND),
+			    M66592_PINCFG);
+		m66592_bset(m66592, M66592_HSE, M66592_SYSCFG);	/* High spd */
+		m66592_mdfy(m66592, clock & M66592_XTAL, M66592_XTAL,
+			    M66592_SYSCFG);
+		m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG);
+		m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG);
+		m66592_bset(m66592, M66592_USBE, M66592_SYSCFG);
 
-	m66592_bset(m66592, M66592_RCKE | M66592_PLLC, M66592_SYSCFG);
+		m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG);
+
+		msleep(3);
 
-	msleep(1);
+		m66592_bset(m66592, M66592_RCKE | M66592_PLLC, M66592_SYSCFG);
 
-	m66592_bset(m66592, M66592_SCKE, M66592_SYSCFG);
+		msleep(1);
 
-	m66592_bset(m66592, irq_sense & M66592_INTL, M66592_INTENB1);
-	m66592_write(m66592, M66592_BURST | M66592_CPU_ADR_RD_WR,
-			M66592_DMA0CFG);
+		m66592_bset(m66592, M66592_SCKE, M66592_SYSCFG);
+
+		m66592_bset(m66592, irq_sense & M66592_INTL, M66592_INTENB1);
+		m66592_write(m66592, M66592_BURST | M66592_CPU_ADR_RD_WR,
+			     M66592_DMA0CFG);
+	}
 }
-#endif	/* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */
 
 static void disable_controller(struct m66592 *m66592)
 {
-#if !defined(CONFIG_SUPERH_BUILT_IN_M66592)
-	m66592_bclr(m66592, M66592_SCKE, M66592_SYSCFG);
-	udelay(1);
-	m66592_bclr(m66592, M66592_PLLC, M66592_SYSCFG);
-	udelay(1);
-	m66592_bclr(m66592, M66592_RCKE, M66592_SYSCFG);
-	udelay(1);
-	m66592_bclr(m66592, M66592_XCKE, M66592_SYSCFG);
-#endif
+	if (!m66592->pdata->on_chip) {
+		m66592_bclr(m66592, M66592_SCKE, M66592_SYSCFG);
+		udelay(1);
+		m66592_bclr(m66592, M66592_PLLC, M66592_SYSCFG);
+		udelay(1);
+		m66592_bclr(m66592, M66592_RCKE, M66592_SYSCFG);
+		udelay(1);
+		m66592_bclr(m66592, M66592_XCKE, M66592_SYSCFG);
+	}
 }
 
 static void m66592_start_xclock(struct m66592 *m66592)
 {
-#if !defined(CONFIG_SUPERH_BUILT_IN_M66592)
 	u16 tmp;
 
-	tmp = m66592_read(m66592, M66592_SYSCFG);
-	if (!(tmp & M66592_XCKE))
-		m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG);
-#endif
+	if (!m66592->pdata->on_chip) {
+		tmp = m66592_read(m66592, M66592_SYSCFG);
+		if (!(tmp & M66592_XCKE))
+			m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG);
+	}
 }
 
 /*-------------------------------------------------------------------------*/
@@ -1169,8 +1197,7 @@ static irqreturn_t m66592_irq(int irq, void *_m66592)
 	intsts0 = m66592_read(m66592, M66592_INTSTS0);
 	intenb0 = m66592_read(m66592, M66592_INTENB0);
 
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
-	if (!intsts0 && !intenb0) {
+	if (m66592->pdata->on_chip && !intsts0 && !intenb0) {
 		/*
 		 * When USB clock stops, it cannot read register. Even if a
 		 * clock stops, the interrupt occurs. So this driver turn on
@@ -1180,7 +1207,6 @@ static irqreturn_t m66592_irq(int irq, void *_m66592)
 		intsts0 = m66592_read(m66592, M66592_INTSTS0);
 		intenb0 = m66592_read(m66592, M66592_INTENB0);
 	}
-#endif
 
 	savepipe = m66592_read(m66592, M66592_CFIFOSEL);
 
@@ -1526,9 +1552,11 @@ static int __exit m66592_remove(struct platform_device *pdev)
 	iounmap(m66592->reg);
 	free_irq(platform_get_irq(pdev, 0), m66592);
 	m66592_free_request(&m66592->ep[0].ep, m66592->ep0_req);
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK)
-	clk_disable(m66592->clk);
-	clk_put(m66592->clk);
+#ifdef CONFIG_HAVE_CLK
+	if (m66592->pdata->on_chip) {
+		clk_disable(m66592->clk);
+		clk_put(m66592->clk);
+	}
 #endif
 	kfree(m66592);
 	return 0;
@@ -1540,11 +1568,10 @@ static void nop_completion(struct usb_ep *ep, struct usb_request *r)
 
 static int __init m66592_probe(struct platform_device *pdev)
 {
-	struct resource *res;
-	int irq;
+	struct resource *res, *ires;
 	void __iomem *reg = NULL;
 	struct m66592 *m66592 = NULL;
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK)
+#ifdef CONFIG_HAVE_CLK
 	char clk_name[8];
 #endif
 	int ret = 0;
@@ -1557,10 +1584,11 @@ static int __init m66592_probe(struct platform_device *pdev)
 		goto clean_up;
 	}
 
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
+	ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!ires) {
 		ret = -ENODEV;
-		pr_err("platform_get_irq error.\n");
+		dev_err(&pdev->dev,
+			"platform_get_resource IORESOURCE_IRQ error.\n");
 		goto clean_up;
 	}
 
@@ -1571,6 +1599,12 @@ static int __init m66592_probe(struct platform_device *pdev)
 		goto clean_up;
 	}
 
+	if (pdev->dev.platform_data == NULL) {
+		dev_err(&pdev->dev, "no platform data\n");
+		ret = -ENODEV;
+		goto clean_up;
+	}
+
 	/* initialize ucd */
 	m66592 = kzalloc(sizeof(struct m66592), GFP_KERNEL);
 	if (m66592 == NULL) {
@@ -1578,6 +1612,9 @@ static int __init m66592_probe(struct platform_device *pdev)
 		goto clean_up;
 	}
 
+	m66592->pdata = pdev->dev.platform_data;
+	m66592->irq_trigger = ires->flags & IRQF_TRIGGER_MASK;
+
 	spin_lock_init(&m66592->lock);
 	dev_set_drvdata(&pdev->dev, m66592);
 
@@ -1595,22 +1632,25 @@ static int __init m66592_probe(struct platform_device *pdev)
 	m66592->timer.data = (unsigned long)m66592;
 	m66592->reg = reg;
 
-	ret = request_irq(irq, m66592_irq, IRQF_DISABLED | IRQF_SHARED,
+	ret = request_irq(ires->start, m66592_irq, IRQF_DISABLED | IRQF_SHARED,
 			udc_name, m66592);
 	if (ret < 0) {
 		pr_err("request_irq error (%d)\n", ret);
 		goto clean_up;
 	}
 
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK)
-	snprintf(clk_name, sizeof(clk_name), "usbf%d", pdev->id);
-	m66592->clk = clk_get(&pdev->dev, clk_name);
-	if (IS_ERR(m66592->clk)) {
-		dev_err(&pdev->dev, "cannot get clock \"%s\"\n", clk_name);
-		ret = PTR_ERR(m66592->clk);
-		goto clean_up2;
+#ifdef CONFIG_HAVE_CLK
+	if (m66592->pdata->on_chip) {
+		snprintf(clk_name, sizeof(clk_name), "usbf%d", pdev->id);
+		m66592->clk = clk_get(&pdev->dev, clk_name);
+		if (IS_ERR(m66592->clk)) {
+			dev_err(&pdev->dev, "cannot get clock \"%s\"\n",
+				clk_name);
+			ret = PTR_ERR(m66592->clk);
+			goto clean_up2;
+		}
+		clk_enable(m66592->clk);
 	}
-	clk_enable(m66592->clk);
 #endif
 	INIT_LIST_HEAD(&m66592->gadget.ep_list);
 	m66592->gadget.ep0 = &m66592->ep[0].ep;
@@ -1652,12 +1692,14 @@ static int __init m66592_probe(struct platform_device *pdev)
 	return 0;
 
 clean_up3:
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK)
-	clk_disable(m66592->clk);
-	clk_put(m66592->clk);
+#ifdef CONFIG_HAVE_CLK
+	if (m66592->pdata->on_chip) {
+		clk_disable(m66592->clk);
+		clk_put(m66592->clk);
+	}
 clean_up2:
 #endif
-	free_irq(irq, m66592);
+	free_irq(ires->start, m66592);
 clean_up:
 	if (m66592) {
 		if (m66592->ep0_req)
diff --git a/drivers/usb/gadget/m66592-udc.h b/drivers/usb/gadget/m66592-udc.h
index 9a9c2bf9fbd5..8b960deed680 100644
--- a/drivers/usb/gadget/m66592-udc.h
+++ b/drivers/usb/gadget/m66592-udc.h
@@ -23,10 +23,12 @@
 #ifndef __M66592_UDC_H__
 #define __M66592_UDC_H__
 
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK)
+#ifdef CONFIG_HAVE_CLK
 #include <linux/clk.h>
 #endif
 
+#include <linux/usb/m66592.h>
+
 #define M66592_SYSCFG		0x00
 #define M66592_XTAL		0xC000	/* b15-14: Crystal selection */
 #define   M66592_XTAL48		 0x8000		/* 48MHz */
@@ -76,11 +78,11 @@
 #define   M66592_P_TST_J	 0x0001		/* PERI TEST J */
 #define   M66592_P_TST_NORMAL	 0x0000		/* PERI Normal Mode */
 
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
+/* built-in registers */
 #define M66592_CFBCFG		0x0A
 #define M66592_D0FBCFG		0x0C
 #define M66592_LITTLE		0x0100	/* b8: Little endian mode */
-#else
+/* external chip case */
 #define M66592_PINCFG		0x0A
 #define M66592_LDRV		0x8000	/* b15: Drive Current Adjust */
 #define M66592_BIGEND		0x0100	/* b8: Big endian mode */
@@ -100,8 +102,8 @@
 #define M66592_PKTM		0x0020	/* b5: Packet mode */
 #define M66592_DENDE		0x0010	/* b4: Dend enable */
 #define M66592_OBUS		0x0004	/* b2: OUTbus mode */
-#endif	/* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */
 
+/* common case */
 #define M66592_CFIFO		0x10
 #define M66592_D0FIFO		0x14
 #define M66592_D1FIFO		0x18
@@ -113,13 +115,9 @@
 #define M66592_REW		0x4000	/* b14: Buffer rewind */
 #define M66592_DCLRM		0x2000	/* b13: DMA buffer clear mode */
 #define M66592_DREQE		0x1000	/* b12: DREQ output enable */
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
-#define M66592_MBW		0x0800	/* b11: Maximum bit width for FIFO */
-#else
-#define M66592_MBW		0x0400	/* b10: Maximum bit width for FIFO */
-#define   M66592_MBW_8		 0x0000   /*  8bit */
-#define   M66592_MBW_16		 0x0400   /* 16bit */
-#endif	/* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */
+#define M66592_MBW_8		0x0000   /*  8bit */
+#define M66592_MBW_16		0x0400   /* 16bit */
+#define M66592_MBW_32		0x0800   /* 32bit */
 #define M66592_TRENB		0x0200	/* b9: Transaction counter enable */
 #define M66592_TRCLR		0x0100	/* b8: Transaction counter clear */
 #define M66592_DEZPM		0x0080	/* b7: Zero-length packet mode */
@@ -480,9 +478,11 @@ struct m66592_ep {
 struct m66592 {
 	spinlock_t		lock;
 	void __iomem		*reg;
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK)
+#ifdef CONFIG_HAVE_CLK
 	struct clk *clk;
 #endif
+	struct m66592_platdata	*pdata;
+	unsigned long		irq_trigger;
 
 	struct usb_gadget		gadget;
 	struct usb_gadget_driver	*driver;
@@ -546,13 +546,13 @@ static inline void m66592_read_fifo(struct m66592 *m66592,
 {
 	unsigned long fifoaddr = (unsigned long)m66592->reg + offset;
 
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
-	len = (len + 3) / 4;
-	insl(fifoaddr, buf, len);
-#else
-	len = (len + 1) / 2;
-	insw(fifoaddr, buf, len);
-#endif
+	if (m66592->pdata->on_chip) {
+		len = (len + 3) / 4;
+		insl(fifoaddr, buf, len);
+	} else {
+		len = (len + 1) / 2;
+		insw(fifoaddr, buf, len);
+	}
 }
 
 static inline void m66592_write(struct m66592 *m66592, u16 val,
@@ -566,33 +566,34 @@ static inline void m66592_write_fifo(struct m66592 *m66592,
 		void *buf, unsigned long len)
 {
 	unsigned long fifoaddr = (unsigned long)m66592->reg + offset;
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
-	unsigned long count;
-	unsigned char *pb;
-	int i;
-
-	count = len / 4;
-	outsl(fifoaddr, buf, count);
-
-	if (len & 0x00000003) {
-		pb = buf + count * 4;
-		for (i = 0; i < (len & 0x00000003); i++) {
-			if (m66592_read(m66592, M66592_CFBCFG))	/* little */
-				outb(pb[i], fifoaddr + (3 - i));
-			else
-				outb(pb[i], fifoaddr + i);
+
+	if (m66592->pdata->on_chip) {
+		unsigned long count;
+		unsigned char *pb;
+		int i;
+
+		count = len / 4;
+		outsl(fifoaddr, buf, count);
+
+		if (len & 0x00000003) {
+			pb = buf + count * 4;
+			for (i = 0; i < (len & 0x00000003); i++) {
+				if (m66592_read(m66592, M66592_CFBCFG))	/* le */
+					outb(pb[i], fifoaddr + (3 - i));
+				else
+					outb(pb[i], fifoaddr + i);
+			}
+		}
+	} else {
+		unsigned long odd = len & 0x0001;
+
+		len = len / 2;
+		outsw(fifoaddr, buf, len);
+		if (odd) {
+			unsigned char *p = buf + len*2;
+			outb(*p, fifoaddr);
 		}
 	}
-#else
-	unsigned long odd = len & 0x0001;
-
-	len = len / 2;
-	outsw(fifoaddr, buf, len);
-	if (odd) {
-		unsigned char *p = buf + len*2;
-		outb(*p, fifoaddr);
-	}
-#endif	/* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */
 }
 
 static inline void m66592_mdfy(struct m66592 *m66592, u16 val, u16 pat,
diff --git a/include/linux/usb/m66592.h b/include/linux/usb/m66592.h
new file mode 100644
index 000000000000..cda9625e7df0
--- /dev/null
+++ b/include/linux/usb/m66592.h
@@ -0,0 +1,44 @@
+/*
+ * M66592 driver platform data
+ *
+ * Copyright (C) 2009  Renesas Solutions Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef __LINUX_USB_M66592_H
+#define __LINUX_USB_M66592_H
+
+#define M66592_PLATDATA_XTAL_12MHZ	0x01
+#define M66592_PLATDATA_XTAL_24MHZ	0x02
+#define M66592_PLATDATA_XTAL_48MHZ	0x03
+
+struct m66592_platdata {
+	/* one = on chip controller, zero = external controller */
+	unsigned	on_chip:1;
+
+	/* one = big endian, zero = little endian */
+	unsigned	endian:1;
+
+	/* (external controller only) M66592_PLATDATA_XTAL_nnMHZ */
+	unsigned	xtal:2;
+
+	/* (external controller only) one = 3.3V, zero = 1.5V */
+	unsigned	vif:1;
+
+};
+
+#endif /* __LINUX_USB_M66592_H */
+
-- 
cgit v1.2.3


From 0c193054a4c1cf190d2f23e5e91bd14402e43912 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Mon, 27 Jul 2009 19:09:19 -0400
Subject: nfsd41: hange from page to memory based drc limits

NFSD_SLOT_CACHE_SIZE is the size of all encoded operation responses
(excluding the sequence operation) that we want to cache.

For now, keep NFSD_SLOT_CACHE_SIZE at PAGE_SIZE. It will be reduced
when the DRC is changed from page based to memory based.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 28 +++++++++++++---------------
 fs/nfsd/nfssvc.c           | 13 ++++++-------
 include/linux/nfsd/nfsd.h  |  4 ++--
 include/linux/nfsd/state.h |  1 +
 4 files changed, 22 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 70cba3fbfa6d..e2b11b1b515c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -414,31 +414,31 @@ gen_sessionid(struct nfsd4_session *ses)
 
 /*
  * Give the client the number of slots it requests bound by
- * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages.
+ * NFSD_MAX_SLOTS_PER_SESSION and by nfsd_drc_max_mem.
  *
- * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we
- * should (up to a point) re-negotiate active sessions and reduce their
- * slot usage to make rooom for new connections. For now we just fail the
- * create session.
+ * If we run out of reserved DRC memory we should (up to a point) re-negotiate
+ * active sessions and reduce their slot usage to make rooom for new
+ * connections. For now we just fail the create session.
  */
 static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
 {
-	int np;
+	int mem;
 
 	if (fchan->maxreqs < 1)
 		return nfserr_inval;
 	else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
 		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
 
-	np = fchan->maxreqs * NFSD_PAGES_PER_SLOT;
+	mem = fchan->maxreqs * NFSD_SLOT_CACHE_SIZE;
 
 	spin_lock(&nfsd_drc_lock);
-	if (np + nfsd_drc_pages_used > nfsd_drc_max_pages)
-		np = nfsd_drc_max_pages - nfsd_drc_pages_used;
-	nfsd_drc_pages_used += np;
+	if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem)
+		mem = ((nfsd_drc_max_mem - nfsd_drc_mem_used) /
+				NFSD_SLOT_CACHE_SIZE) * NFSD_SLOT_CACHE_SIZE;
+	nfsd_drc_mem_used += mem;
 	spin_unlock(&nfsd_drc_lock);
 
-	fchan->maxreqs = np / NFSD_PAGES_PER_SLOT;
+	fchan->maxreqs = mem / NFSD_SLOT_CACHE_SIZE;
 	if (fchan->maxreqs == 0)
 		return nfserr_resource;
 	return 0;
@@ -465,9 +465,7 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 		fchan->maxresp_sz = maxcount;
 	session_fchan->maxresp_sz = fchan->maxresp_sz;
 
-	/* Set the max response cached size our default which is
-	 * a multiple of PAGE_SIZE and small */
-	session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
+	session_fchan->maxresp_cached = NFSD_SLOT_CACHE_SIZE;
 	fchan->maxresp_cached = session_fchan->maxresp_cached;
 
 	/* Use the client's maxops if possible */
@@ -585,7 +583,7 @@ free_session(struct kref *kref)
 		nfsd4_release_respages(e->ce_respages, e->ce_resused);
 	}
 	spin_lock(&nfsd_drc_lock);
-	nfsd_drc_pages_used -= ses->se_fchannel.maxreqs * NFSD_PAGES_PER_SLOT;
+	nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE;
 	spin_unlock(&nfsd_drc_lock);
 	kfree(ses);
 }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 9be2a1932f8a..5a280a9cb540 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -74,8 +74,8 @@ struct svc_serv 		*nfsd_serv;
  * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
  */
 spinlock_t	nfsd_drc_lock;
-unsigned int	nfsd_drc_max_pages;
-unsigned int	nfsd_drc_pages_used;
+unsigned int	nfsd_drc_max_mem;
+unsigned int	nfsd_drc_mem_used;
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
@@ -247,12 +247,11 @@ void nfsd_reset_versions(void)
 static void set_max_drc(void)
 {
 	#define NFSD_DRC_SIZE_SHIFT	10
-	nfsd_drc_max_pages = nr_free_buffer_pages()
-						>> NFSD_DRC_SIZE_SHIFT;
-	nfsd_drc_pages_used = 0;
+	nfsd_drc_max_mem = (nr_free_buffer_pages()
+					>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
+	nfsd_drc_mem_used = 0;
 	spin_lock_init(&nfsd_drc_lock);
-	dprintk("%s nfsd_drc_max_pages %u\n", __func__,
-		nfsd_drc_max_pages);
+	dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem);
 }
 
 int nfsd_create_serv(void)
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 2571f856908f..2812ed52669d 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -57,8 +57,8 @@ extern u32			nfsd_supported_minorversion;
 extern struct mutex		nfsd_mutex;
 extern struct svc_serv		*nfsd_serv;
 extern spinlock_t		nfsd_drc_lock;
-extern unsigned int		nfsd_drc_max_pages;
-extern unsigned int		nfsd_drc_pages_used;
+extern unsigned int		nfsd_drc_max_mem;
+extern unsigned int		nfsd_drc_mem_used;
 
 extern struct seq_operations nfs_exports_op;
 
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 57ab2ed08459..a6c87d623891 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -96,6 +96,7 @@ struct nfs4_cb_conn {
 #define NFSD_MAX_SLOTS_PER_SESSION	128
 /* Maximum number of pages per slot cache entry */
 #define NFSD_PAGES_PER_SLOT	1
+#define NFSD_SLOT_CACHE_SIZE		PAGE_SIZE
 /* Maximum number of operations per session compound */
 #define NFSD_MAX_OPS_PER_COMPOUND	16
 
-- 
cgit v1.2.3


From 49557cc74c7bdf6a984be227ead9a84b3a26f053 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 23 Jul 2009 19:02:16 -0400
Subject: nfsd41: Use separate DRC for setclientid

Instead of trying to share the generic 4.1 reply cache code for the
CREATE_SESSION reply cache, it's simpler to handle CREATE_SESSION
separately.

The nfs41 single slot clientid DRC holds the results of create session
processing.  CREATE_SESSION can be preceeded by a SEQUENCE operation
(an embedded CREATE_SESSION) and the create session single slot cache must be
maintained.  nfsd4_replay_cache_entry() and nfsd4_store_cache_entry() do not
implement the replay of an embedded CREATE_SESSION.

The clientid DRC slot does not need the inuse, cachethis or other fields that
the multiple slot session cache uses.  Replace the clientid DRC cache struct
nfs4_slot cache with a new nfsd4_clid_slot cache.  Save the xdr struct
nfsd4_create_session into the cache at the end of processing, and on a replay,
replace the struct for the replay request with the cached version all while
under the state lock.

nfsd4_proc_compound will handle both the solo and embedded CREATE_SESSION case
via the normal use of encode_operation.

Errors that do not change the create session cache:
A create session NFS4ERR_STALE_CLIENTID error means that a client record
(and associated create session slot) could not be found and therefore can't
be changed.  NFSERR_SEQ_MISORDERED errors do not change the slot cache.

All other errors get cached.

Remove the clientid DRC specific check in nfs4svc_encode_compoundres to
put the session only if cstate.session is set which will now always be true.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4proc.c         |  2 +-
 fs/nfsd/nfs4state.c        | 64 +++++++++++++++++++++++++++-------------------
 fs/nfsd/nfs4xdr.c          |  3 +--
 include/linux/nfsd/state.h | 21 ++++++++++++++-
 include/linux/nfsd/xdr4.h  | 12 ---------
 5 files changed, 60 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d781658e8084..d606c6a427de 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1120,7 +1120,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			BUG_ON(op->status == nfs_ok);
 
 encode_op:
-		/* Only from SEQUENCE or CREATE_SESSION */
+		/* Only from SEQUENCE */
 		if (resp->cstate.status == nfserr_replay_cache) {
 			dprintk("%s NFS4.1 replay from cache\n", __func__);
 			if (nfsd4_not_cached(resp))
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 99df8e7a687b..7729d092c8a5 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -653,8 +653,6 @@ static inline void
 free_client(struct nfs4_client *clp)
 {
 	shutdown_callback_client(clp);
-	nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages,
-			     clp->cl_slot.sl_cache_entry.ce_resused);
 	if (clp->cl_cred.cr_group_info)
 		put_group_info(clp->cl_cred.cr_group_info);
 	kfree(clp->cl_principal);
@@ -1293,12 +1291,11 @@ out_copy:
 	exid->clientid.cl_boot = new->cl_clientid.cl_boot;
 	exid->clientid.cl_id = new->cl_clientid.cl_id;
 
-	new->cl_slot.sl_seqid = 0;
 	exid->seqid = 1;
 	nfsd4_set_ex_flags(new, exid);
 
 	dprintk("nfsd4_exchange_id seqid %d flags %x\n",
-		new->cl_slot.sl_seqid, new->cl_exchange_flags);
+		new->cl_cs_slot.sl_seqid, new->cl_exchange_flags);
 	status = nfs_ok;
 
 out:
@@ -1334,15 +1331,35 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
 	return nfserr_seq_misordered;
 }
 
+/*
+ * Cache the create session result into the create session single DRC
+ * slot cache by saving the xdr structure. sl_seqid has been set.
+ * Do this for solo or embedded create session operations.
+ */
+static void
+nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses,
+			   struct nfsd4_clid_slot *slot, int nfserr)
+{
+	slot->sl_status = nfserr;
+	memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses));
+}
+
+static __be32
+nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses,
+			    struct nfsd4_clid_slot *slot)
+{
+	memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses));
+	return slot->sl_status;
+}
+
 __be32
 nfsd4_create_session(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
 		     struct nfsd4_create_session *cr_ses)
 {
 	u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
-	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct nfs4_client *conf, *unconf;
-	struct nfsd4_slot *slot = NULL;
+	struct nfsd4_clid_slot *cs_slot = NULL;
 	int status = 0;
 
 	nfs4_lock_state();
@@ -1350,25 +1367,22 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 	conf = find_confirmed_client(&cr_ses->clientid);
 
 	if (conf) {
-		slot = &conf->cl_slot;
-		status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid,
-					  slot->sl_inuse);
+		cs_slot = &conf->cl_cs_slot;
+		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
 		if (status == nfserr_replay_cache) {
 			dprintk("Got a create_session replay! seqid= %d\n",
-				slot->sl_seqid);
-			cstate->slot = slot;
-			cstate->status = status;
+				cs_slot->sl_seqid);
 			/* Return the cached reply status */
-			status = nfsd4_replay_cache_entry(resp, NULL);
+			status = nfsd4_replay_create_session(cr_ses, cs_slot);
 			goto out;
-		} else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) {
+		} else if (cr_ses->seqid != cs_slot->sl_seqid + 1) {
 			status = nfserr_seq_misordered;
 			dprintk("Sequence misordered!\n");
 			dprintk("Expected seqid= %d but got seqid= %d\n",
-				slot->sl_seqid, cr_ses->seqid);
+				cs_slot->sl_seqid, cr_ses->seqid);
 			goto out;
 		}
-		conf->cl_slot.sl_seqid++;
+		cs_slot->sl_seqid++;
 	} else if (unconf) {
 		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
 		    (ip_addr != unconf->cl_addr)) {
@@ -1376,16 +1390,15 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 			goto out;
 		}
 
-		slot = &unconf->cl_slot;
-		status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid,
-					  slot->sl_inuse);
+		cs_slot = &unconf->cl_cs_slot;
+		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
 		if (status) {
 			/* an unconfirmed replay returns misordered */
 			status = nfserr_seq_misordered;
-			goto out;
+			goto out_cache;
 		}
 
-		slot->sl_seqid++; /* from 0 to 1 */
+		cs_slot->sl_seqid++; /* from 0 to 1 */
 		move_to_confirmed(unconf);
 
 		/*
@@ -1406,12 +1419,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 
 	memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data,
 	       NFS4_MAX_SESSIONID_LEN);
-	cr_ses->seqid = slot->sl_seqid;
+	cr_ses->seqid = cs_slot->sl_seqid;
 
-	slot->sl_inuse = true;
-	cstate->slot = slot;
-	/* Ensure a page is used for the cache */
-	slot->sl_cache_entry.ce_cachethis = 1;
+out_cache:
+	/* cache solo and embedded create sessions under the state lock */
+	nfsd4_cache_create_session(cr_ses, cs_slot, status);
 out:
 	nfs4_unlock_state();
 	dprintk("%s returns %d\n", __func__, ntohl(status));
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2dcc7feaa6ff..fdf632bf1cfe 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3313,8 +3313,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 			dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
 			resp->cstate.slot->sl_inuse = 0;
 		}
-		if (resp->cstate.session)
-			nfsd4_put_session(resp->cstate.session);
+		nfsd4_put_session(resp->cstate.session);
 	}
 	return 1;
 }
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index a6c87d623891..58bb19784e12 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -127,6 +127,25 @@ struct nfsd4_channel_attrs {
 	u32		rdma_attrs;
 };
 
+struct nfsd4_create_session {
+	clientid_t			clientid;
+	struct nfs4_sessionid		sessionid;
+	u32				seqid;
+	u32				flags;
+	struct nfsd4_channel_attrs	fore_channel;
+	struct nfsd4_channel_attrs	back_channel;
+	u32				callback_prog;
+	u32				uid;
+	u32				gid;
+};
+
+/* The single slot clientid cache structure */
+struct nfsd4_clid_slot {
+	u32				sl_seqid;
+	__be32				sl_status;
+	struct nfsd4_create_session	sl_cr_ses;
+};
+
 struct nfsd4_session {
 	struct kref		se_ref;
 	struct list_head	se_hash;	/* hash by sessionid */
@@ -193,7 +212,7 @@ struct nfs4_client {
 
 	/* for nfs41 */
 	struct list_head	cl_sessions;
-	struct nfsd4_slot	cl_slot;	/* create_session slot */
+	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
 	u32			cl_exchange_flags;
 	struct nfs4_sessionid	cl_sessionid;
 };
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 2bacf7535069..5e4beb0deb80 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -366,18 +366,6 @@ struct nfsd4_exchange_id {
 	int		spa_how;
 };
 
-struct nfsd4_create_session {
-	clientid_t		clientid;
-	struct nfs4_sessionid	sessionid;
-	u32			seqid;
-	u32			flags;
-	struct nfsd4_channel_attrs fore_channel;
-	struct nfsd4_channel_attrs back_channel;
-	u32			callback_prog;
-	u32			uid;
-	u32			gid;
-};
-
 struct nfsd4_sequence {
 	struct nfs4_sessionid	sessionid;		/* request/response */
 	u32			seqid;			/* request/response */
-- 
cgit v1.2.3


From e5f5ccb646bc6009572b5c23201b5e81638ff150 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 23 Jul 2009 20:35:53 +0200
Subject: power_supply: get_by_name and set_charged functionality

This adds a function that indicates that a battery is fully charged.
It also includes functions to get a power_supply device from the class
of registered devices by name reference. These can be used to find a
specific battery to call power_supply_set_battery_charged() on.

Some battery drivers might need this information to calibrate
themselves.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Ian Molton <spyro@f2s.com>
Cc: Anton Vorontsov <cbou@mail.ru>
Cc: Matt Reimer <mreimer@vpop.net>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/power_supply_core.c | 28 ++++++++++++++++++++++++++++
 include/linux/power_supply.h      |  3 +++
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 12cd6e36ff1d..cce75b40b435 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -116,6 +116,34 @@ int power_supply_is_system_supplied(void)
 }
 EXPORT_SYMBOL_GPL(power_supply_is_system_supplied);
 
+int power_supply_set_battery_charged(struct power_supply *psy)
+{
+	if (psy->type == POWER_SUPPLY_TYPE_BATTERY && psy->set_charged) {
+		psy->set_charged(psy);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(power_supply_set_battery_charged);
+
+static int power_supply_match_device_by_name(struct device *dev, void *data)
+{
+	const char *name = data;
+	struct power_supply *psy = dev_get_drvdata(dev);
+
+	return strcmp(psy->name, name) == 0;
+}
+
+struct power_supply *power_supply_get_by_name(char *name)
+{
+	struct device *dev = class_find_device(power_supply_class, NULL, name,
+					power_supply_match_device_by_name);
+
+	return dev ? dev_get_drvdata(dev) : NULL;
+}
+EXPORT_SYMBOL_GPL(power_supply_get_by_name);
+
 int power_supply_register(struct device *parent, struct power_supply *psy)
 {
 	int rc = 0;
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 4c7c6fc35487..b5d096d3a9be 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -144,6 +144,7 @@ struct power_supply {
 			    enum power_supply_property psp,
 			    union power_supply_propval *val);
 	void (*external_power_changed)(struct power_supply *psy);
+	void (*set_charged)(struct power_supply *psy);
 
 	/* For APM emulation, think legacy userspace. */
 	int use_for_apm;
@@ -183,8 +184,10 @@ struct power_supply_info {
 	int use_for_apm;
 };
 
+extern struct power_supply *power_supply_get_by_name(char *name);
 extern void power_supply_changed(struct power_supply *psy);
 extern int power_supply_am_i_supplied(struct power_supply *psy);
+extern int power_supply_set_battery_charged(struct power_supply *psy);
 
 #if defined(CONFIG_POWER_SUPPLY) || defined(CONFIG_POWER_SUPPLY_MODULE)
 extern int power_supply_is_system_supplied(void);
-- 
cgit v1.2.3


From ff663cf8705bea101d5f73cf471855c85242575e Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyu.z.wang@intel.com>
Date: Thu, 23 Jul 2009 17:25:49 +0100
Subject: agp: Add generic support for graphics dma remapping

New driver hooks for support graphics memory dma remapping
are introduced in this patch. It makes generic code can
tell if current device needs dma remapping, then call driver
provided interfaces for mapping and unmapping. Change has
also been made to handle scratch_page in remapping case.

Signed-off-by: Zhenyu Wang <zhenyu.z.wang@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/char/agp/agp.h      |  6 ++++++
 drivers/char/agp/backend.c  | 20 ++++++++++++++++++++
 drivers/char/agp/generic.c  |  9 +++++++++
 include/linux/agp_backend.h |  6 +++++-
 4 files changed, 40 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index ce110a3bf298..17e6d0d3ba36 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -121,6 +121,11 @@ struct agp_bridge_driver {
 	void (*agp_destroy_pages)(struct agp_memory *);
 	int (*agp_type_to_mask_type) (struct agp_bridge_data *, int);
 	void (*chipset_flush)(struct agp_bridge_data *);
+
+	int (*agp_map_page)(void *addr, dma_addr_t *ret);
+	void (*agp_unmap_page)(void *addr, dma_addr_t dma);
+	int (*agp_map_memory)(struct agp_memory *mem);
+	void (*agp_unmap_memory)(struct agp_memory *mem);
 };
 
 struct agp_bridge_data {
@@ -135,6 +140,7 @@ struct agp_bridge_data {
 	u32 *gatt_table_real;
 	unsigned long scratch_page;
 	unsigned long scratch_page_real;
+	dma_addr_t scratch_page_dma;
 	unsigned long gart_bus_addr;
 	unsigned long gatt_bus_addr;
 	u32 mode;
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 3bd7e503de41..19ac3663acdc 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -152,6 +152,15 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge)
 		bridge->scratch_page_real = phys_to_gart(page_to_phys(page));
 		bridge->scratch_page = bridge->driver->mask_memory(bridge,
 					   phys_to_gart(page_to_phys(page)), 0);
+
+		if (bridge->driver->agp_map_page &&
+		    bridge->driver->agp_map_page(phys_to_virt(page_to_phys(page)),
+						&bridge->scratch_page_dma)) {
+			dev_err(&bridge->dev->dev,
+				"unable to dma-map scratch page\n");
+			rc = -ENOMEM;
+			goto err_out_nounmap;
+		}
 	}
 
 	size_value = bridge->driver->fetch_size();
@@ -191,6 +200,13 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge)
 	return 0;
 
 err_out:
+	if (bridge->driver->needs_scratch_page &&
+	    bridge->driver->agp_unmap_page) {
+		void *va = gart_to_virt(bridge->scratch_page_real);
+
+		bridge->driver->agp_unmap_page(va, bridge->scratch_page_dma);
+	}
+err_out_nounmap:
 	if (bridge->driver->needs_scratch_page) {
 		void *va = gart_to_virt(bridge->scratch_page_real);
 
@@ -221,6 +237,10 @@ static void agp_backend_cleanup(struct agp_bridge_data *bridge)
 	    bridge->driver->needs_scratch_page) {
 		void *va = gart_to_virt(bridge->scratch_page_real);
 
+		if (bridge->driver->agp_unmap_page)
+			bridge->driver->agp_unmap_page(va,
+					       bridge->scratch_page_dma);
+
 		bridge->driver->agp_destroy_page(va, AGP_PAGE_DESTROY_UNMAP);
 		bridge->driver->agp_destroy_page(va, AGP_PAGE_DESTROY_FREE);
 	}
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index a3bcc7ef42f9..28f0208c66a6 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -437,6 +437,12 @@ int agp_bind_memory(struct agp_memory *curr, off_t pg_start)
 		curr->bridge->driver->cache_flush();
 		curr->is_flushed = true;
 	}
+
+	if (curr->bridge->driver->agp_map_memory) {
+		ret_val = curr->bridge->driver->agp_map_memory(curr);
+		if (ret_val)
+			return ret_val;
+	}
 	ret_val = curr->bridge->driver->insert_memory(curr, pg_start, curr->type);
 
 	if (ret_val != 0)
@@ -478,6 +484,9 @@ int agp_unbind_memory(struct agp_memory *curr)
 	if (ret_val != 0)
 		return ret_val;
 
+	if (curr->bridge->driver->agp_unmap_memory)
+		curr->bridge->driver->agp_unmap_memory(curr);
+
 	curr->is_bound = false;
 	curr->pg_start = 0;
 	spin_lock(&curr->bridge->mapped_lock);
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index 76fa794fdac0..8a294d65b9b1 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -79,9 +79,13 @@ struct agp_memory {
 	u32 physical;
 	bool is_bound;
 	bool is_flushed;
-        bool vmalloc_flag;
+	bool vmalloc_flag;
+	bool sg_vmalloc_flag;
 	/* list of agp_memory mapped to the aperture */
 	struct list_head mapped_list;
+	/* DMA-mapped addresses */
+	struct scatterlist *sg_list;
+	int num_sg;
 };
 
 #define AGP_NORMAL_MEMORY 0
-- 
cgit v1.2.3


From f692775d7e0a22477143cd884e45c955448ac7d2 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 29 Jul 2009 09:28:45 +0100
Subject: intel-agp: fix sglist allocation to avoid vmalloc()

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/char/agp/intel-agp.c | 29 ++++++++++-------------------
 include/linux/agp_backend.h  |  1 -
 2 files changed, 10 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index b9d9886ff3c3..d8c80d8be5e2 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -198,39 +198,30 @@ static void intel_agp_unmap_page(struct page *page, dma_addr_t dma)
 
 static void intel_agp_free_sglist(struct agp_memory *mem)
 {
+	struct sg_table st;
+
+	st.sgl = mem->sg_list;
+	st.orig_nents = st.nents = mem->page_count;
+
+	sg_free_table(&st);
 
-	if (mem->sg_vmalloc_flag)
-		vfree(mem->sg_list);
-	else
-		kfree(mem->sg_list);
-	mem->sg_vmalloc_flag = 0;
 	mem->sg_list = NULL;
 	mem->num_sg = 0;
 }
 
 static int intel_agp_map_memory(struct agp_memory *mem)
 {
+	struct sg_table st;
 	struct scatterlist *sg;
 	int i;
 
 	DBG("try mapping %lu pages\n", (unsigned long)mem->page_count);
 
-	if ((mem->page_count * sizeof(*mem->sg_list)) < 2*PAGE_SIZE)
-		mem->sg_list = kcalloc(mem->page_count, sizeof(*mem->sg_list),
-				       GFP_KERNEL);
-
-	if (mem->sg_list == NULL) {
-		mem->sg_list = vmalloc(mem->page_count * sizeof(*mem->sg_list));
-		mem->sg_vmalloc_flag = 1;
-	}
-
-	if (!mem->sg_list) {
-		mem->sg_vmalloc_flag = 0;
+	if (sg_alloc_table(&st, mem->page_count, GFP_KERNEL))
 		return -ENOMEM;
-	}
-	sg_init_table(mem->sg_list, mem->page_count);
 
-	sg = mem->sg_list;
+	mem->sg_list = sg = st.sgl;
+
 	for (i = 0 ; i < mem->page_count; i++, sg = sg_next(sg))
 		sg_set_page(sg, mem->pages[i], PAGE_SIZE, 0);
 
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index 8a294d65b9b1..880130f7311f 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -80,7 +80,6 @@ struct agp_memory {
 	bool is_bound;
 	bool is_flushed;
 	bool vmalloc_flag;
-	bool sg_vmalloc_flag;
 	/* list of agp_memory mapped to the aperture */
 	struct list_head mapped_list;
 	/* DMA-mapped addresses */
-- 
cgit v1.2.3


From 42c4ab41a176ee784c0f28c0b29025a8fc34f05a Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 29 Jul 2009 12:15:26 +0200
Subject: itimers: Merge ITIMER_VIRT and ITIMER_PROF

Both cpu itimers have same data flow in the few places, this
patch make unification of code related with VIRT and PROF
itimers.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <1248862529-6063-2-git-send-email-sgruszka@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h     |  14 ++++-
 kernel/fork.c             |   9 +--
 kernel/itimer.c           | 146 +++++++++++++++++++++-------------------------
 kernel/posix-cpu-timers.c |  98 +++++++++++++++----------------
 4 files changed, 130 insertions(+), 137 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3ab08e4bb6b8..3b3efaddd953 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -470,6 +470,11 @@ struct pacct_struct {
 	unsigned long		ac_minflt, ac_majflt;
 };
 
+struct cpu_itimer {
+	cputime_t expires;
+	cputime_t incr;
+};
+
 /**
  * struct task_cputime - collected CPU time counts
  * @utime:		time spent in user mode, in &cputime_t units
@@ -564,9 +569,12 @@ struct signal_struct {
 	struct pid *leader_pid;
 	ktime_t it_real_incr;
 
-	/* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */
-	cputime_t it_prof_expires, it_virt_expires;
-	cputime_t it_prof_incr, it_virt_incr;
+	/*
+	 * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
+	 * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
+	 * values are defined to 0 and 1 respectively
+	 */
+	struct cpu_itimer it[2];
 
 	/*
 	 * Thread group totals for process CPU timers.
diff --git a/kernel/fork.c b/kernel/fork.c
index 29b532e718f7..893ab0bf5e39 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -62,6 +62,7 @@
 #include <linux/fs_struct.h>
 #include <linux/magic.h>
 #include <linux/perf_counter.h>
+#include <linux/posix-timers.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -790,10 +791,10 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 	thread_group_cputime_init(sig);
 
 	/* Expiration times and increments. */
-	sig->it_virt_expires = cputime_zero;
-	sig->it_virt_incr = cputime_zero;
-	sig->it_prof_expires = cputime_zero;
-	sig->it_prof_incr = cputime_zero;
+	sig->it[CPUCLOCK_PROF].expires = cputime_zero;
+	sig->it[CPUCLOCK_PROF].incr = cputime_zero;
+	sig->it[CPUCLOCK_VIRT].expires = cputime_zero;
+	sig->it[CPUCLOCK_VIRT].incr = cputime_zero;
 
 	/* Cached expiration times. */
 	sig->cputime_expires.prof_exp = cputime_zero;
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 58762f7077ec..852c88ddd1f0 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -41,10 +41,43 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer)
 	return ktime_to_timeval(rem);
 }
 
+static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
+			   struct itimerval *value)
+{
+	cputime_t cval, cinterval;
+	struct cpu_itimer *it = &tsk->signal->it[clock_id];
+
+	spin_lock_irq(&tsk->sighand->siglock);
+
+	cval = it->expires;
+	cinterval = it->incr;
+	if (!cputime_eq(cval, cputime_zero)) {
+		struct task_cputime cputime;
+		cputime_t t;
+
+		thread_group_cputimer(tsk, &cputime);
+		if (clock_id == CPUCLOCK_PROF)
+			t = cputime_add(cputime.utime, cputime.stime);
+		else
+			/* CPUCLOCK_VIRT */
+			t = cputime.utime;
+
+		if (cputime_le(cval, t))
+			/* about to fire */
+			cval = jiffies_to_cputime(1);
+		else
+			cval = cputime_sub(cval, t);
+	}
+
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	cputime_to_timeval(cval, &value->it_value);
+	cputime_to_timeval(cinterval, &value->it_interval);
+}
+
 int do_getitimer(int which, struct itimerval *value)
 {
 	struct task_struct *tsk = current;
-	cputime_t cinterval, cval;
 
 	switch (which) {
 	case ITIMER_REAL:
@@ -55,44 +88,10 @@ int do_getitimer(int which, struct itimerval *value)
 		spin_unlock_irq(&tsk->sighand->siglock);
 		break;
 	case ITIMER_VIRTUAL:
-		spin_lock_irq(&tsk->sighand->siglock);
-		cval = tsk->signal->it_virt_expires;
-		cinterval = tsk->signal->it_virt_incr;
-		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_cputime cputime;
-			cputime_t utime;
-
-			thread_group_cputimer(tsk, &cputime);
-			utime = cputime.utime;
-			if (cputime_le(cval, utime)) { /* about to fire */
-				cval = jiffies_to_cputime(1);
-			} else {
-				cval = cputime_sub(cval, utime);
-			}
-		}
-		spin_unlock_irq(&tsk->sighand->siglock);
-		cputime_to_timeval(cval, &value->it_value);
-		cputime_to_timeval(cinterval, &value->it_interval);
+		get_cpu_itimer(tsk, CPUCLOCK_VIRT, value);
 		break;
 	case ITIMER_PROF:
-		spin_lock_irq(&tsk->sighand->siglock);
-		cval = tsk->signal->it_prof_expires;
-		cinterval = tsk->signal->it_prof_incr;
-		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_cputime times;
-			cputime_t ptime;
-
-			thread_group_cputimer(tsk, &times);
-			ptime = cputime_add(times.utime, times.stime);
-			if (cputime_le(cval, ptime)) { /* about to fire */
-				cval = jiffies_to_cputime(1);
-			} else {
-				cval = cputime_sub(cval, ptime);
-			}
-		}
-		spin_unlock_irq(&tsk->sighand->siglock);
-		cputime_to_timeval(cval, &value->it_value);
-		cputime_to_timeval(cinterval, &value->it_interval);
+		get_cpu_itimer(tsk, CPUCLOCK_PROF, value);
 		break;
 	default:
 		return(-EINVAL);
@@ -128,6 +127,36 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
+static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
+			   struct itimerval *value, struct itimerval *ovalue)
+{
+	cputime_t cval, cinterval, nval, ninterval;
+	struct cpu_itimer *it = &tsk->signal->it[clock_id];
+
+	nval = timeval_to_cputime(&value->it_value);
+	ninterval = timeval_to_cputime(&value->it_interval);
+
+	spin_lock_irq(&tsk->sighand->siglock);
+
+	cval = it->expires;
+	cinterval = it->incr;
+	if (!cputime_eq(cval, cputime_zero) ||
+	    !cputime_eq(nval, cputime_zero)) {
+		if (cputime_gt(nval, cputime_zero))
+			nval = cputime_add(nval, jiffies_to_cputime(1));
+		set_process_cpu_timer(tsk, clock_id, &nval, &cval);
+	}
+	it->expires = nval;
+	it->incr = ninterval;
+
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	if (ovalue) {
+		cputime_to_timeval(cval, &ovalue->it_value);
+		cputime_to_timeval(cinterval, &ovalue->it_interval);
+	}
+}
+
 /*
  * Returns true if the timeval is in canonical form
  */
@@ -139,7 +168,6 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
 	struct task_struct *tsk = current;
 	struct hrtimer *timer;
 	ktime_t expires;
-	cputime_t cval, cinterval, nval, ninterval;
 
 	/*
 	 * Validate the timevals in value.
@@ -174,48 +202,10 @@ again:
 		spin_unlock_irq(&tsk->sighand->siglock);
 		break;
 	case ITIMER_VIRTUAL:
-		nval = timeval_to_cputime(&value->it_value);
-		ninterval = timeval_to_cputime(&value->it_interval);
-		spin_lock_irq(&tsk->sighand->siglock);
-		cval = tsk->signal->it_virt_expires;
-		cinterval = tsk->signal->it_virt_incr;
-		if (!cputime_eq(cval, cputime_zero) ||
-		    !cputime_eq(nval, cputime_zero)) {
-			if (cputime_gt(nval, cputime_zero))
-				nval = cputime_add(nval,
-						   jiffies_to_cputime(1));
-			set_process_cpu_timer(tsk, CPUCLOCK_VIRT,
-					      &nval, &cval);
-		}
-		tsk->signal->it_virt_expires = nval;
-		tsk->signal->it_virt_incr = ninterval;
-		spin_unlock_irq(&tsk->sighand->siglock);
-		if (ovalue) {
-			cputime_to_timeval(cval, &ovalue->it_value);
-			cputime_to_timeval(cinterval, &ovalue->it_interval);
-		}
+		set_cpu_itimer(tsk, CPUCLOCK_VIRT, value, ovalue);
 		break;
 	case ITIMER_PROF:
-		nval = timeval_to_cputime(&value->it_value);
-		ninterval = timeval_to_cputime(&value->it_interval);
-		spin_lock_irq(&tsk->sighand->siglock);
-		cval = tsk->signal->it_prof_expires;
-		cinterval = tsk->signal->it_prof_incr;
-		if (!cputime_eq(cval, cputime_zero) ||
-		    !cputime_eq(nval, cputime_zero)) {
-			if (cputime_gt(nval, cputime_zero))
-				nval = cputime_add(nval,
-						   jiffies_to_cputime(1));
-			set_process_cpu_timer(tsk, CPUCLOCK_PROF,
-					      &nval, &cval);
-		}
-		tsk->signal->it_prof_expires = nval;
-		tsk->signal->it_prof_incr = ninterval;
-		spin_unlock_irq(&tsk->sighand->siglock);
-		if (ovalue) {
-			cputime_to_timeval(cval, &ovalue->it_value);
-			cputime_to_timeval(cinterval, &ovalue->it_interval);
-		}
+		set_cpu_itimer(tsk, CPUCLOCK_PROF, value, ovalue);
 		break;
 	default:
 		return -EINVAL;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bece7c0b67b2..9b2d5e4dc8c4 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -14,11 +14,11 @@
  */
 void update_rlimit_cpu(unsigned long rlim_new)
 {
-	cputime_t cputime;
+	cputime_t cputime = secs_to_cputime(rlim_new);
+	struct signal_struct *const sig = current->signal;
 
-	cputime = secs_to_cputime(rlim_new);
-	if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
-	    cputime_gt(current->signal->it_prof_expires, cputime)) {
+	if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) ||
+	    cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) {
 		spin_lock_irq(&current->sighand->siglock);
 		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
 		spin_unlock_irq(&current->sighand->siglock);
@@ -613,6 +613,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 				break;
 			}
 		} else {
+			struct signal_struct *const sig = p->signal;
+			union cpu_time_count *exp = &timer->it.cpu.expires;
+
 			/*
 			 * For a process timer, set the cached expiration time.
 			 */
@@ -620,30 +623,27 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 			default:
 				BUG();
 			case CPUCLOCK_VIRT:
-				if (!cputime_eq(p->signal->it_virt_expires,
+				if (!cputime_eq(sig->it[CPUCLOCK_VIRT].expires,
 						cputime_zero) &&
-				    cputime_lt(p->signal->it_virt_expires,
-					       timer->it.cpu.expires.cpu))
+				    cputime_lt(sig->it[CPUCLOCK_VIRT].expires,
+					       exp->cpu))
 					break;
-				p->signal->cputime_expires.virt_exp =
-					timer->it.cpu.expires.cpu;
+				sig->cputime_expires.virt_exp = exp->cpu;
 				break;
 			case CPUCLOCK_PROF:
-				if (!cputime_eq(p->signal->it_prof_expires,
+				if (!cputime_eq(sig->it[CPUCLOCK_PROF].expires,
 						cputime_zero) &&
-				    cputime_lt(p->signal->it_prof_expires,
-					       timer->it.cpu.expires.cpu))
+				    cputime_lt(sig->it[CPUCLOCK_PROF].expires,
+					       exp->cpu))
 					break;
-				i = p->signal->rlim[RLIMIT_CPU].rlim_cur;
+				i = sig->rlim[RLIMIT_CPU].rlim_cur;
 				if (i != RLIM_INFINITY &&
-				    i <= cputime_to_secs(timer->it.cpu.expires.cpu))
+				    i <= cputime_to_secs(exp->cpu))
 					break;
-				p->signal->cputime_expires.prof_exp =
-					timer->it.cpu.expires.cpu;
+				sig->cputime_expires.prof_exp = exp->cpu;
 				break;
 			case CPUCLOCK_SCHED:
-				p->signal->cputime_expires.sched_exp =
-					timer->it.cpu.expires.sched;
+				sig->cputime_expires.sched_exp = exp->sched;
 				break;
 			}
 		}
@@ -1070,6 +1070,27 @@ static void stop_process_timers(struct task_struct *tsk)
 	spin_unlock_irqrestore(&cputimer->lock, flags);
 }
 
+static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
+			     cputime_t *expires, cputime_t cur_time, int signo)
+{
+	if (cputime_eq(it->expires, cputime_zero))
+		return;
+
+	if (cputime_ge(cur_time, it->expires)) {
+		it->expires = it->incr;
+		if (!cputime_eq(it->expires, cputime_zero))
+			it->expires = cputime_add(it->expires, cur_time);
+
+		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
+	}
+
+	if (!cputime_eq(it->expires, cputime_zero) &&
+	    (cputime_eq(*expires, cputime_zero) ||
+	     cputime_lt(it->expires, *expires))) {
+		*expires = it->expires;
+	}
+}
+
 /*
  * Check for any per-thread CPU timers that have fired and move them
  * off the tsk->*_timers list onto the firing list.  Per-thread timers
@@ -1089,10 +1110,10 @@ static void check_process_timers(struct task_struct *tsk,
 	 * Don't sample the current process CPU clocks if there are no timers.
 	 */
 	if (list_empty(&timers[CPUCLOCK_PROF]) &&
-	    cputime_eq(sig->it_prof_expires, cputime_zero) &&
+	    cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) &&
 	    sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
 	    list_empty(&timers[CPUCLOCK_VIRT]) &&
-	    cputime_eq(sig->it_virt_expires, cputime_zero) &&
+	    cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
 	    list_empty(&timers[CPUCLOCK_SCHED])) {
 		stop_process_timers(tsk);
 		return;
@@ -1152,38 +1173,11 @@ static void check_process_timers(struct task_struct *tsk,
 	/*
 	 * Check for the special case process timers.
 	 */
-	if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
-		if (cputime_ge(ptime, sig->it_prof_expires)) {
-			/* ITIMER_PROF fires and reloads.  */
-			sig->it_prof_expires = sig->it_prof_incr;
-			if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
-				sig->it_prof_expires = cputime_add(
-					sig->it_prof_expires, ptime);
-			}
-			__group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk);
-		}
-		if (!cputime_eq(sig->it_prof_expires, cputime_zero) &&
-		    (cputime_eq(prof_expires, cputime_zero) ||
-		     cputime_lt(sig->it_prof_expires, prof_expires))) {
-			prof_expires = sig->it_prof_expires;
-		}
-	}
-	if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
-		if (cputime_ge(utime, sig->it_virt_expires)) {
-			/* ITIMER_VIRTUAL fires and reloads.  */
-			sig->it_virt_expires = sig->it_virt_incr;
-			if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
-				sig->it_virt_expires = cputime_add(
-					sig->it_virt_expires, utime);
-			}
-			__group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk);
-		}
-		if (!cputime_eq(sig->it_virt_expires, cputime_zero) &&
-		    (cputime_eq(virt_expires, cputime_zero) ||
-		     cputime_lt(sig->it_virt_expires, virt_expires))) {
-			virt_expires = sig->it_virt_expires;
-		}
-	}
+	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
+			 SIGPROF);
+	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
+			 SIGVTALRM);
+
 	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
 		unsigned long psecs = cputime_to_secs(ptime);
 		cputime_t x;
-- 
cgit v1.2.3


From 8356b5f9c424e5831715abbce747197c30d1fd71 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 29 Jul 2009 12:15:27 +0200
Subject: itimers: Fix periodic tics precision

Measure ITIMER_PROF and ITIMER_VIRT timers interval error
between real ticks and requested by user. Take it into account
when scheduling next tick.

This patch introduce possibility where time between two
consecutive tics is smaller then requested interval, it
preserve however dependency that n tick is generated not
earlier than n*interval time - counting from the beginning of
periodic signal generation.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <1248862529-6063-3-git-send-email-sgruszka@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h     |  2 ++
 kernel/itimer.c           | 24 +++++++++++++++++++++---
 kernel/posix-cpu-timers.c | 20 +++++++++++++++++---
 3 files changed, 40 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3b3efaddd953..a069e65e8bb7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -473,6 +473,8 @@ struct pacct_struct {
 struct cpu_itimer {
 	cputime_t expires;
 	cputime_t incr;
+	u32 error;
+	u32 incr_error;
 };
 
 /**
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 852c88ddd1f0..21adff7b2a17 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -42,7 +42,7 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer)
 }
 
 static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
-			   struct itimerval *value)
+			   struct itimerval *const value)
 {
 	cputime_t cval, cinterval;
 	struct cpu_itimer *it = &tsk->signal->it[clock_id];
@@ -127,14 +127,32 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
+static inline u32 cputime_sub_ns(cputime_t ct, s64 real_ns)
+{
+	struct timespec ts;
+	s64 cpu_ns;
+
+	cputime_to_timespec(ct, &ts);
+	cpu_ns = timespec_to_ns(&ts);
+
+	return (cpu_ns <= real_ns) ? 0 : cpu_ns - real_ns;
+}
+
 static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
-			   struct itimerval *value, struct itimerval *ovalue)
+			   const struct itimerval *const value,
+			   struct itimerval *const ovalue)
 {
-	cputime_t cval, cinterval, nval, ninterval;
+	cputime_t cval, nval, cinterval, ninterval;
+	s64 ns_ninterval, ns_nval;
 	struct cpu_itimer *it = &tsk->signal->it[clock_id];
 
 	nval = timeval_to_cputime(&value->it_value);
+	ns_nval = timeval_to_ns(&value->it_value);
 	ninterval = timeval_to_cputime(&value->it_interval);
+	ns_ninterval = timeval_to_ns(&value->it_interval);
+
+	it->incr_error = cputime_sub_ns(ninterval, ns_ninterval);
+	it->error = cputime_sub_ns(nval, ns_nval);
 
 	spin_lock_irq(&tsk->sighand->siglock);
 
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 9b2d5e4dc8c4..b60d644ea4b3 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1070,6 +1070,8 @@ static void stop_process_timers(struct task_struct *tsk)
 	spin_unlock_irqrestore(&cputimer->lock, flags);
 }
 
+static u32 onecputick;
+
 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 			     cputime_t *expires, cputime_t cur_time, int signo)
 {
@@ -1077,9 +1079,16 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 		return;
 
 	if (cputime_ge(cur_time, it->expires)) {
-		it->expires = it->incr;
-		if (!cputime_eq(it->expires, cputime_zero))
-			it->expires = cputime_add(it->expires, cur_time);
+		if (!cputime_eq(it->incr, cputime_zero)) {
+			it->expires = cputime_add(it->expires, it->incr);
+			it->error += it->incr_error;
+			if (it->error >= onecputick) {
+				it->expires = cputime_sub(it->expires,
+							jiffies_to_cputime(1));
+				it->error -= onecputick;
+			}
+		} else
+			it->expires = cputime_zero;
 
 		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 	}
@@ -1696,10 +1705,15 @@ static __init int init_posix_cpu_timers(void)
 		.nsleep = thread_cpu_nsleep,
 		.nsleep_restart = thread_cpu_nsleep_restart,
 	};
+	struct timespec ts;
 
 	register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
 	register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
 
+	cputime_to_timespec(jiffies_to_cputime(1), &ts);
+	onecputick = ts.tv_nsec;
+	WARN_ON(ts.tv_sec != 0);
+
 	return 0;
 }
 __initcall(init_posix_cpu_timers);
-- 
cgit v1.2.3


From a42548a18866e87092db93b771e6c5b060d78401 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 29 Jul 2009 12:15:29 +0200
Subject: cputime: Optimize jiffies_to_cputime(1)

For powerpc with CONFIG_VIRT_CPU_ACCOUNTING
jiffies_to_cputime(1) is not compile time constant and run time
calculations are quite expensive. To optimize we use
precomputed value. For all other architectures is is
preprocessor definition.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <1248862529-6063-5-git-send-email-sgruszka@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/cputime.h    |  1 +
 arch/powerpc/include/asm/cputime.h | 13 +++++++++++++
 arch/powerpc/kernel/time.c         |  4 ++++
 arch/s390/include/asm/cputime.h    |  1 +
 include/asm-generic/cputime.h      |  1 +
 kernel/itimer.c                    |  4 ++--
 kernel/posix-cpu-timers.c          |  6 +++---
 kernel/sched.c                     |  9 ++++-----
 8 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index d20b998cb91d..7fa8a8594660 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -30,6 +30,7 @@ typedef u64 cputime_t;
 typedef u64 cputime64_t;
 
 #define cputime_zero			((cputime_t)0)
+#define cputime_one_jiffy		jiffies_to_cputime(1)
 #define cputime_max			((~((cputime_t)0) >> 1) - 1)
 #define cputime_add(__a, __b)		((__a) +  (__b))
 #define cputime_sub(__a, __b)		((__a) -  (__b))
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index f42e623030ee..fa19f3fe05ff 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -18,6 +18,9 @@
 
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 #include <asm-generic/cputime.h>
+#ifdef __KERNEL__
+static inline void setup_cputime_one_jiffy(void) { }
+#endif
 #else
 
 #include <linux/types.h>
@@ -48,6 +51,11 @@ typedef u64 cputime64_t;
 
 #ifdef __KERNEL__
 
+/*
+ * One jiffy in timebase units computed during initialization
+ */
+extern cputime_t cputime_one_jiffy;
+
 /*
  * Convert cputime <-> jiffies
  */
@@ -89,6 +97,11 @@ static inline cputime_t jiffies_to_cputime(const unsigned long jif)
 	return ct;
 }
 
+static inline void setup_cputime_one_jiffy(void)
+{
+	cputime_one_jiffy = jiffies_to_cputime(1);
+}
+
 static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
 {
 	cputime_t ct;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index eae4511ceeac..211d7b0cd370 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -193,6 +193,8 @@ EXPORT_SYMBOL(__cputime_clockt_factor);
 DEFINE_PER_CPU(unsigned long, cputime_last_delta);
 DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta);
 
+cputime_t cputime_one_jiffy;
+
 static void calc_cputime_factors(void)
 {
 	struct div_result res;
@@ -500,6 +502,7 @@ static int __init iSeries_tb_recal(void)
 				tb_to_xs = divres.result_low;
 				vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
 				vdso_data->tb_to_xs = tb_to_xs;
+				setup_cputime_one_jiffy();
 			}
 			else {
 				printk( "Titan recalibrate: FAILED (difference > 4 percent)\n"
@@ -945,6 +948,7 @@ void __init time_init(void)
 	tb_ticks_per_usec = ppc_tb_freq / 1000000;
 	tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
 	calc_cputime_factors();
+	setup_cputime_one_jiffy();
 
 	/*
 	 * Calculate the length of each tick in ns.  It will not be
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 7a3817a656df..24b1244aadb9 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -42,6 +42,7 @@ __div(unsigned long long n, unsigned int base)
 #endif /* __s390x__ */
 
 #define cputime_zero			(0ULL)
+#define cputime_one_jiffy		jiffies_to_cputime(1)
 #define cputime_max			((~0UL >> 1) - 1)
 #define cputime_add(__a, __b)		((__a) +  (__b))
 #define cputime_sub(__a, __b)		((__a) -  (__b))
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 1c1fa422d18a..ca0f239f0e13 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -7,6 +7,7 @@
 typedef unsigned long cputime_t;
 
 #define cputime_zero			(0UL)
+#define cputime_one_jiffy		jiffies_to_cputime(1)
 #define cputime_max			((~0UL >> 1) - 1)
 #define cputime_add(__a, __b)		((__a) +  (__b))
 #define cputime_sub(__a, __b)		((__a) -  (__b))
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 21adff7b2a17..8078a32d3b10 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -64,7 +64,7 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 
 		if (cputime_le(cval, t))
 			/* about to fire */
-			cval = jiffies_to_cputime(1);
+			cval = cputime_one_jiffy;
 		else
 			cval = cputime_sub(cval, t);
 	}
@@ -161,7 +161,7 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 	if (!cputime_eq(cval, cputime_zero) ||
 	    !cputime_eq(nval, cputime_zero)) {
 		if (cputime_gt(nval, cputime_zero))
-			nval = cputime_add(nval, jiffies_to_cputime(1));
+			nval = cputime_add(nval, cputime_one_jiffy);
 		set_process_cpu_timer(tsk, clock_id, &nval, &cval);
 	}
 	it->expires = nval;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 69c92374355f..18bdde6f676f 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1086,7 +1086,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 			it->error += it->incr_error;
 			if (it->error >= onecputick) {
 				it->expires = cputime_sub(it->expires,
-							jiffies_to_cputime(1));
+							  cputime_one_jiffy);
 				it->error -= onecputick;
 			}
 		} else
@@ -1461,7 +1461,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 		if (!cputime_eq(*oldval, cputime_zero)) {
 			if (cputime_le(*oldval, now.cpu)) {
 				/* Just about to fire. */
-				*oldval = jiffies_to_cputime(1);
+				*oldval = cputime_one_jiffy;
 			} else {
 				*oldval = cputime_sub(*oldval, now.cpu);
 			}
@@ -1712,7 +1712,7 @@ static __init int init_posix_cpu_timers(void)
 	register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
 	register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
 
-	cputime_to_timespec(jiffies_to_cputime(1), &ts);
+	cputime_to_timespec(cputime_one_jiffy, &ts);
 	onecputick = ts.tv_nsec;
 	WARN_ON(ts.tv_sec != 0);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 1b59e265273b..8f977d5cc515 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5031,17 +5031,16 @@ void account_idle_time(cputime_t cputime)
  */
 void account_process_tick(struct task_struct *p, int user_tick)
 {
-	cputime_t one_jiffy = jiffies_to_cputime(1);
-	cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy);
+	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 	struct rq *rq = this_rq();
 
 	if (user_tick)
-		account_user_time(p, one_jiffy, one_jiffy_scaled);
+		account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
 	else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
-		account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
+		account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
 				    one_jiffy_scaled);
 	else
-		account_idle_time(one_jiffy);
+		account_idle_time(cputime_one_jiffy);
 }
 
 /*
-- 
cgit v1.2.3


From 8a4c47f346cc7a12d0897c05eb3cc1add26b487f Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Mon, 20 Jul 2009 13:48:04 +0800
Subject: drm: Remove the unused prefix in DRM_DEBUG_KMS/DRIVER/MODE

We will have to add a prefix when using the macro defintion of DRM_DEBUG_KMS
/DRM_DEBUG_DRIVER/MODE. It is not convenient. We should use the DRM_NAME
as default prefix.
So remove the prefix in the macro definition of DRM_DEBUG_KMS/DRIVER/MODE.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Acked-by: Ian Romanick <ian.d.romanick@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_modes.c       |  8 +++-----
 drivers/gpu/drm/i915/i915_dma.c   | 35 +++++++++++++++--------------------
 drivers/gpu/drm/i915/intel_lvds.c | 10 +++-------
 drivers/gpu/drm/i915/intel_sdvo.c | 35 ++++++++++++++++-------------------
 include/drm/drmP.h                | 18 +++++++++---------
 5 files changed, 46 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index fd489d76fbbc..5eca2d5c5f23 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -40,7 +40,6 @@
 #include "drm.h"
 #include "drm_crtc.h"
 
-#define DRM_MODESET_DEBUG	"drm_mode"
 /**
  * drm_mode_debug_printmodeline - debug print a mode
  * @dev: DRM device
@@ -53,8 +52,8 @@
  */
 void drm_mode_debug_printmodeline(struct drm_display_mode *mode)
 {
-	DRM_DEBUG_MODE(DRM_MODESET_DEBUG,
-		"Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x\n",
+	DRM_DEBUG_MODE("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d "
+			"0x%x 0x%x\n",
 		mode->base.id, mode->name, mode->vrefresh, mode->clock,
 		mode->hdisplay, mode->hsync_start,
 		mode->hsync_end, mode->htotal,
@@ -819,8 +818,7 @@ void drm_mode_prune_invalid(struct drm_device *dev,
 			list_del(&mode->head);
 			if (verbose) {
 				drm_mode_debug_printmodeline(mode);
-				DRM_DEBUG_MODE(DRM_MODESET_DEBUG,
-					"Not using %s mode %d\n",
+				DRM_DEBUG_MODE("Not using %s mode %d\n",
 					mode->name, mode->status);
 			}
 			drm_mode_destroy(dev, mode);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8c4783180bf6..14625e146f18 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -33,8 +33,6 @@
 #include "i915_drm.h"
 #include "i915_drv.h"
 
-#define I915_DRV	"i915_drv"
-
 /* Really want an OS-independent resettable timer.  Would like to have
  * this loop run for (eg) 3 sec, but have the timer reset every time
  * the head pointer changes, so that EBUSY only happens if the ring
@@ -101,7 +99,7 @@ static int i915_init_phys_hws(struct drm_device *dev)
 	memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
 
 	I915_WRITE(HWS_PGA, dev_priv->dma_status_page);
-	DRM_DEBUG_DRIVER(I915_DRV, "Enabled hardware status page\n");
+	DRM_DEBUG_DRIVER("Enabled hardware status page\n");
 	return 0;
 }
 
@@ -187,8 +185,7 @@ static int i915_initialize(struct drm_device * dev, drm_i915_init_t * init)
 		master_priv->sarea_priv = (drm_i915_sarea_t *)
 			((u8 *)master_priv->sarea->handle + init->sarea_priv_offset);
 	} else {
-		DRM_DEBUG_DRIVER(I915_DRV,
-				"sarea not found assuming DRI2 userspace\n");
+		DRM_DEBUG_DRIVER("sarea not found assuming DRI2 userspace\n");
 	}
 
 	if (init->ring_size != 0) {
@@ -238,7 +235,7 @@ static int i915_dma_resume(struct drm_device * dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 
-	DRM_DEBUG_DRIVER(I915_DRV, "%s\n", __func__);
+	DRM_DEBUG_DRIVER("%s\n", __func__);
 
 	if (dev_priv->ring.map.handle == NULL) {
 		DRM_ERROR("can not ioremap virtual address for"
@@ -251,14 +248,14 @@ static int i915_dma_resume(struct drm_device * dev)
 		DRM_ERROR("Can not find hardware status page\n");
 		return -EINVAL;
 	}
-	DRM_DEBUG_DRIVER(I915_DRV, "hw status page @ %p\n",
+	DRM_DEBUG_DRIVER("hw status page @ %p\n",
 				dev_priv->hw_status_page);
 
 	if (dev_priv->status_gfx_addr != 0)
 		I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
 	else
 		I915_WRITE(HWS_PGA, dev_priv->dma_status_page);
-	DRM_DEBUG_DRIVER(I915_DRV, "Enabled hardware status page\n");
+	DRM_DEBUG_DRIVER("Enabled hardware status page\n");
 
 	return 0;
 }
@@ -552,7 +549,7 @@ static int i915_dispatch_flip(struct drm_device * dev)
 	if (!master_priv->sarea_priv)
 		return -EINVAL;
 
-	DRM_DEBUG_DRIVER(I915_DRV, "%s: page=%d pfCurrentPage=%d\n",
+	DRM_DEBUG_DRIVER("%s: page=%d pfCurrentPage=%d\n",
 			  __func__,
 			 dev_priv->current_page,
 			 master_priv->sarea_priv->pf_current_page);
@@ -633,8 +630,7 @@ static int i915_batchbuffer(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	DRM_DEBUG_DRIVER(I915_DRV,
-			"i915 batchbuffer, start %x used %d cliprects %d\n",
+	DRM_DEBUG_DRIVER("i915 batchbuffer, start %x used %d cliprects %d\n",
 			batch->start, batch->used, batch->num_cliprects);
 
 	RING_LOCK_TEST_WITH_RETURN(dev, file_priv);
@@ -681,8 +677,7 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data,
 	void *batch_data;
 	int ret;
 
-	DRM_DEBUG_DRIVER(I915_DRV,
-			"i915 cmdbuffer, buf %p sz %d cliprects %d\n",
+	DRM_DEBUG_DRIVER("i915 cmdbuffer, buf %p sz %d cliprects %d\n",
 			cmdbuf->buf, cmdbuf->sz, cmdbuf->num_cliprects);
 
 	RING_LOCK_TEST_WITH_RETURN(dev, file_priv);
@@ -735,7 +730,7 @@ static int i915_flip_bufs(struct drm_device *dev, void *data,
 {
 	int ret;
 
-	DRM_DEBUG_DRIVER(I915_DRV, "%s\n", __func__);
+	DRM_DEBUG_DRIVER("%s\n", __func__);
 
 	RING_LOCK_TEST_WITH_RETURN(dev, file_priv);
 
@@ -778,7 +773,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		value = dev_priv->num_fence_regs - dev_priv->fence_reg_start;
 		break;
 	default:
-		DRM_DEBUG_DRIVER(I915_DRV, "Unknown parameter %d\n",
+		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 					param->param);
 		return -EINVAL;
 	}
@@ -819,7 +814,7 @@ static int i915_setparam(struct drm_device *dev, void *data,
 		dev_priv->fence_reg_start = param->value;
 		break;
 	default:
-		DRM_DEBUG_DRIVER(I915_DRV, "unknown parameter %d\n",
+		DRM_DEBUG_DRIVER("unknown parameter %d\n",
 					param->param);
 		return -EINVAL;
 	}
@@ -846,7 +841,7 @@ static int i915_set_status_page(struct drm_device *dev, void *data,
 		return 0;
 	}
 
-	DRM_DEBUG("set status page addr 0x%08x\n", (u32)hws->addr);
+	DRM_DEBUG_DRIVER("set status page addr 0x%08x\n", (u32)hws->addr);
 
 	dev_priv->status_gfx_addr = hws->addr & (0x1ffff<<12);
 
@@ -868,9 +863,9 @@ static int i915_set_status_page(struct drm_device *dev, void *data,
 
 	memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
 	I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
-	DRM_DEBUG_DRIVER(I915_DRV, "load hws HWS_PGA with gfx mem 0x%x\n",
+	DRM_DEBUG_DRIVER("load hws HWS_PGA with gfx mem 0x%x\n",
 				dev_priv->status_gfx_addr);
-	DRM_DEBUG_DRIVER(I915_DRV, "load hws at %p\n",
+	DRM_DEBUG_DRIVER("load hws at %p\n",
 				dev_priv->hw_status_page);
 	return 0;
 }
@@ -1310,7 +1305,7 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv)
 {
 	struct drm_i915_file_private *i915_file_priv;
 
-	DRM_DEBUG_DRIVER(I915_DRV, "\n");
+	DRM_DEBUG_DRIVER("\n");
 	i915_file_priv = (struct drm_i915_file_private *)
 	    kmalloc(sizeof(*i915_file_priv), GFP_KERNEL);
 
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 9ab38efffecf..b59c65d19d81 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -38,8 +38,6 @@
 #include "i915_drv.h"
 #include <linux/acpi.h>
 
-#define I915_LVDS "i915_lvds"
-
 /*
  * the following four scaling options are defined.
  * #define DRM_MODE_SCALE_NON_GPU	0
@@ -673,8 +671,7 @@ static int intel_lvds_set_property(struct drm_connector *connector,
 		struct drm_crtc *crtc = connector->encoder->crtc;
 		struct intel_lvds_priv *lvds_priv = intel_output->dev_priv;
 		if (value == DRM_MODE_SCALE_NON_GPU) {
-			DRM_DEBUG_KMS(I915_LVDS,
-					"non_GPU property is unsupported\n");
+			DRM_DEBUG_KMS("non_GPU property is unsupported\n");
 			return 0;
 		}
 		if (lvds_priv->fitting_mode == value) {
@@ -731,8 +728,7 @@ static const struct drm_encoder_funcs intel_lvds_enc_funcs = {
 
 static int __init intel_no_lvds_dmi_callback(const struct dmi_system_id *id)
 {
-	DRM_DEBUG_KMS(I915_LVDS,
-		      "Skipping LVDS initialization for %s\n", id->ident);
+	DRM_DEBUG_KMS("Skipping LVDS initialization for %s\n", id->ident);
 	return 1;
 }
 
@@ -1013,7 +1009,7 @@ out:
 	return;
 
 failed:
-	DRM_DEBUG_KMS(I915_LVDS, "No LVDS modes found, disabling.\n");
+	DRM_DEBUG_KMS("No LVDS modes found, disabling.\n");
 	if (intel_output->ddc_bus)
 		intel_i2c_destroy(intel_output->ddc_bus);
 	drm_connector_cleanup(connector);
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 4f0c30948bc4..abef69c8a49a 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -36,7 +36,6 @@
 #include "intel_sdvo_regs.h"
 
 #undef SDVO_DEBUG
-#define I915_SDVO	"i915_sdvo"
 struct intel_sdvo_priv {
 	u8 slave_addr;
 
@@ -178,7 +177,7 @@ static bool intel_sdvo_read_byte(struct intel_output *intel_output, u8 addr,
 		return true;
 	}
 
-	DRM_DEBUG("i2c transfer returned %d\n", ret);
+	DRM_DEBUG_KMS("i2c transfer returned %d\n", ret);
 	return false;
 }
 
@@ -288,7 +287,7 @@ static void intel_sdvo_debug_write(struct intel_output *intel_output, u8 cmd,
 	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
 	int i;
 
-	DRM_DEBUG_KMS(I915_SDVO, "%s: W: %02X ",
+	DRM_DEBUG_KMS("%s: W: %02X ",
 				SDVO_NAME(sdvo_priv), cmd);
 	for (i = 0; i < args_len; i++)
 		DRM_LOG_KMS("%02X ", ((u8 *)args)[i]);
@@ -341,7 +340,7 @@ static void intel_sdvo_debug_response(struct intel_output *intel_output,
 	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
 	int i;
 
-	DRM_DEBUG_KMS(I915_SDVO, "%s: R: ", SDVO_NAME(sdvo_priv));
+	DRM_DEBUG_KMS("%s: R: ", SDVO_NAME(sdvo_priv));
 	for (i = 0; i < response_len; i++)
 		DRM_LOG_KMS("%02X ", ((u8 *)response)[i]);
 	for (; i < 8; i++)
@@ -658,10 +657,10 @@ static int intel_sdvo_get_clock_rate_mult(struct intel_output *intel_output)
 	status = intel_sdvo_read_response(intel_output, &response, 1);
 
 	if (status != SDVO_CMD_STATUS_SUCCESS) {
-		DRM_DEBUG("Couldn't get SDVO clock rate multiplier\n");
+		DRM_DEBUG_KMS("Couldn't get SDVO clock rate multiplier\n");
 		return SDVO_CLOCK_RATE_MULT_1X;
 	} else {
-		DRM_DEBUG("Current clock rate multiplier: %d\n", response);
+		DRM_DEBUG_KMS("Current clock rate multiplier: %d\n", response);
 	}
 
 	return response;
@@ -942,14 +941,14 @@ static void intel_sdvo_set_tv_format(struct intel_output *output)
 	format = &sdvo_priv->tv_format;
 	memset(&unset, 0, sizeof(unset));
 	if (memcmp(format, &unset, sizeof(*format))) {
-		DRM_DEBUG("%s: Choosing default TV format of NTSC-M\n",
+		DRM_DEBUG_KMS("%s: Choosing default TV format of NTSC-M\n",
 				SDVO_NAME(sdvo_priv));
 		format->ntsc_m = 1;
 		intel_sdvo_write_cmd(output, SDVO_CMD_SET_TV_FORMAT, format,
 				sizeof(*format));
 		status = intel_sdvo_read_response(output, NULL, 0);
 		if (status != SDVO_CMD_STATUS_SUCCESS)
-			DRM_DEBUG("%s: Failed to set TV format\n",
+			DRM_DEBUG_KMS("%s: Failed to set TV format\n",
 					SDVO_NAME(sdvo_priv));
 	}
 }
@@ -1220,8 +1219,8 @@ static void intel_sdvo_dpms(struct drm_encoder *encoder, int mode)
 		 * a given it the status is a success, we succeeded.
 		 */
 		if (status == SDVO_CMD_STATUS_SUCCESS && !input1) {
-			DRM_DEBUG("First %s output reported failure to sync\n",
-				   SDVO_NAME(sdvo_priv));
+			DRM_DEBUG_KMS("First %s output reported failure to "
+					"sync\n", SDVO_NAME(sdvo_priv));
 		}
 
 		if (0)
@@ -1316,8 +1315,8 @@ static void intel_sdvo_restore(struct drm_connector *connector)
 			intel_wait_for_vblank(dev);
 		status = intel_sdvo_get_trained_inputs(intel_output, &input1, &input2);
 		if (status == SDVO_CMD_STATUS_SUCCESS && !input1)
-			DRM_DEBUG("First %s output reported failure to sync\n",
-				   SDVO_NAME(sdvo_priv));
+			DRM_DEBUG_KMS("First %s output reported failure to "
+					"sync\n", SDVO_NAME(sdvo_priv));
 	}
 
 	intel_sdvo_set_active_outputs(intel_output, sdvo_priv->save_active_outputs);
@@ -1395,7 +1394,7 @@ int intel_sdvo_supports_hotplug(struct drm_connector *connector)
 	u8 response[2];
 	u8 status;
 	struct intel_output *intel_output;
-	DRM_DEBUG("\n");
+	DRM_DEBUG_KMS("\n");
 
 	if (!connector)
 		return 0;
@@ -1460,7 +1459,7 @@ static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connect
 	intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_ATTACHED_DISPLAYS, NULL, 0);
 	status = intel_sdvo_read_response(intel_output, &response, 2);
 
-	DRM_DEBUG("SDVO response %d %d\n", response[0], response[1]);
+	DRM_DEBUG_KMS("SDVO response %d %d\n", response[0], response[1]);
 
 	if (status != SDVO_CMD_STATUS_SUCCESS)
 		return connector_status_unknown;
@@ -1905,8 +1904,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 	/* Read the regs to test if we can talk to the device */
 	for (i = 0; i < 0x40; i++) {
 		if (!intel_sdvo_read_byte(intel_output, i, &ch[i])) {
-			DRM_DEBUG_KMS(I915_SDVO,
-					"No SDVO device found on SDVO%c\n",
+			DRM_DEBUG_KMS("No SDVO device found on SDVO%c\n",
 					output_device == SDVOB ? 'B' : 'C');
 			goto err_i2c;
 		}
@@ -1989,8 +1987,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 
 		sdvo_priv->controlled_output = 0;
 		memcpy (bytes, &sdvo_priv->caps.output_flags, 2);
-		DRM_DEBUG_KMS(I915_SDVO,
-				"%s: Unknown SDVO output type (0x%02x%02x)\n",
+		DRM_DEBUG_KMS("%s: Unknown SDVO output type (0x%02x%02x)\n",
 				  SDVO_NAME(sdvo_priv),
 				  bytes[0], bytes[1]);
 		encoder_type = DRM_MODE_ENCODER_NONE;
@@ -2022,7 +2019,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 					       &sdvo_priv->pixel_clock_max);
 
 
-	DRM_DEBUG_KMS(I915_SDVO, "%s device VID/DID: %02X:%02X.%02X, "
+	DRM_DEBUG_KMS("%s device VID/DID: %02X:%02X.%02X, "
 			"clock range %dMHz - %dMHz, "
 			"input 1: %c, input 2: %c, "
 			"output 1: %c, output 2: %c\n",
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 45b67d9c39c1..edbdb02a7a3f 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -174,19 +174,19 @@ extern void drm_ut_debug_printk(unsigned int request_level,
 					__func__, fmt, ##args);		\
 	} while (0)
 
-#define DRM_DEBUG_DRIVER(prefix, fmt, args...)				\
+#define DRM_DEBUG_DRIVER(fmt, args...)					\
 	do {								\
-		drm_ut_debug_printk(DRM_UT_DRIVER, prefix,		\
+		drm_ut_debug_printk(DRM_UT_DRIVER, DRM_NAME,		\
 					__func__, fmt, ##args);		\
 	} while (0)
-#define DRM_DEBUG_KMS(prefix, fmt, args...)				\
+#define DRM_DEBUG_KMS(fmt, args...)				\
 	do {								\
-		drm_ut_debug_printk(DRM_UT_KMS, prefix, 		\
+		drm_ut_debug_printk(DRM_UT_KMS, DRM_NAME, 		\
 					 __func__, fmt, ##args);	\
 	} while (0)
-#define DRM_DEBUG_MODE(prefix, fmt, args...)				\
+#define DRM_DEBUG_MODE(fmt, args...)				\
 	do {								\
-		drm_ut_debug_printk(DRM_UT_MODE, prefix, 		\
+		drm_ut_debug_printk(DRM_UT_MODE, DRM_NAME, 		\
 					 __func__, fmt, ##args);	\
 	} while (0)
 #define DRM_LOG(fmt, args...)						\
@@ -210,9 +210,9 @@ extern void drm_ut_debug_printk(unsigned int request_level,
 					NULL, fmt, ##args);		\
 	} while (0)
 #else
-#define DRM_DEBUG_DRIVER(prefix, fmt, args...) do { } while (0)
-#define DRM_DEBUG_KMS(prefix, fmt, args...)	do { } while (0)
-#define DRM_DEBUG_MODE(prefix, fmt, args...)	do { } while (0)
+#define DRM_DEBUG_DRIVER(fmt, args...) do { } while (0)
+#define DRM_DEBUG_KMS(fmt, args...)	do { } while (0)
+#define DRM_DEBUG_MODE(fmt, args...)	do { } while (0)
 #define DRM_DEBUG(fmt, arg...)		 do { } while (0)
 #define DRM_LOG(fmt, arg...)		do { } while (0)
 #define DRM_LOG_KMS(fmt, args...) do { } while (0)
-- 
cgit v1.2.3


From f940f37f022f7392ab81a35516222cbd46110b42 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Mon, 20 Jul 2009 13:48:05 +0800
Subject: drm: Remove the macro defintion of DRM_DEBUG_MODE

Two macro definitions of DRM_DEBUG_KMS/MODE can be used to add the debug
info related with KMS. It is confusing.
So remove the macro definition of DRM_DEBUG_MODE. Instead it can be replaced
by the DRM_DEBUG_KMS.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Acked-by: Ian Romanick <ian.d.romanick@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_modes.c | 4 ++--
 include/drm/drmP.h          | 7 -------
 2 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 5eca2d5c5f23..6b4d2dc3cdd9 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -52,7 +52,7 @@
  */
 void drm_mode_debug_printmodeline(struct drm_display_mode *mode)
 {
-	DRM_DEBUG_MODE("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d "
+	DRM_DEBUG_KMS("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d "
 			"0x%x 0x%x\n",
 		mode->base.id, mode->name, mode->vrefresh, mode->clock,
 		mode->hdisplay, mode->hsync_start,
@@ -818,7 +818,7 @@ void drm_mode_prune_invalid(struct drm_device *dev,
 			list_del(&mode->head);
 			if (verbose) {
 				drm_mode_debug_printmodeline(mode);
-				DRM_DEBUG_MODE("Not using %s mode %d\n",
+				DRM_DEBUG_KMS("Not using %s mode %d\n",
 					mode->name, mode->status);
 			}
 			drm_mode_destroy(dev, mode);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index edbdb02a7a3f..6513d16cd029 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -88,7 +88,6 @@ struct drm_device;
 #define DRM_UT_CORE 		0x01
 #define DRM_UT_DRIVER		0x02
 #define DRM_UT_KMS		0x04
-#define DRM_UT_MODE		0x08
 
 extern void drm_ut_debug_printk(unsigned int request_level,
 				const char *prefix,
@@ -184,11 +183,6 @@ extern void drm_ut_debug_printk(unsigned int request_level,
 		drm_ut_debug_printk(DRM_UT_KMS, DRM_NAME, 		\
 					 __func__, fmt, ##args);	\
 	} while (0)
-#define DRM_DEBUG_MODE(fmt, args...)				\
-	do {								\
-		drm_ut_debug_printk(DRM_UT_MODE, DRM_NAME, 		\
-					 __func__, fmt, ##args);	\
-	} while (0)
 #define DRM_LOG(fmt, args...)						\
 	do {								\
 		drm_ut_debug_printk(DRM_UT_CORE, NULL,			\
@@ -212,7 +206,6 @@ extern void drm_ut_debug_printk(unsigned int request_level,
 #else
 #define DRM_DEBUG_DRIVER(fmt, args...) do { } while (0)
 #define DRM_DEBUG_KMS(fmt, args...)	do { } while (0)
-#define DRM_DEBUG_MODE(fmt, args...)	do { } while (0)
 #define DRM_DEBUG(fmt, arg...)		 do { } while (0)
 #define DRM_LOG(fmt, arg...)		do { } while (0)
 #define DRM_LOG_KMS(fmt, args...) do { } while (0)
-- 
cgit v1.2.3


From 87fdff81cd2d770f0adc742e21eb5e062ad20def Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Mon, 20 Jul 2009 13:48:06 +0800
Subject: DRM: Add the explanation about DRM debug level

Add the explanation about DRM debug level in the drmP header file. This is to
explain how/where to use the different DRM debug level.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drmP.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 6513d16cd029..e0f1c1fee58b 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -88,6 +88,37 @@ struct drm_device;
 #define DRM_UT_CORE 		0x01
 #define DRM_UT_DRIVER		0x02
 #define DRM_UT_KMS		0x04
+/*
+ * Three debug levels are defined.
+ * drm_core, drm_driver, drm_kms
+ * drm_core level can be used in the generic drm code. For example:
+ * 	drm_ioctl, drm_mm, drm_memory
+ * The macro definiton of DRM_DEBUG is used.
+ * 	DRM_DEBUG(fmt, args...)
+ * 	The debug info by using the DRM_DEBUG can be obtained by adding
+ * 	the boot option of "drm.debug=1".
+ *
+ * drm_driver level can be used in the specific drm driver. It is used
+ * to add the debug info related with the drm driver. For example:
+ * i915_drv, i915_dma, i915_gem, radeon_drv,
+ * 	The macro definition of DRM_DEBUG_DRIVER can be used.
+ * 	DRM_DEBUG_DRIVER(fmt, args...)
+ * 	The debug info by using the DRM_DEBUG_DRIVER can be obtained by
+ * 	adding the boot option of "drm.debug=0x02"
+ *
+ * drm_kms level can be used in the KMS code related with specific drm driver.
+ * It is used to add the debug info related with KMS mode. For example:
+ * the connector/crtc ,
+ * 	The macro definition of DRM_DEBUG_KMS can be used.
+ * 	DRM_DEBUG_KMS(fmt, args...)
+ * 	The debug info by using the DRM_DEBUG_KMS can be obtained by
+ * 	adding the boot option of "drm.debug=0x04"
+ *
+ * If we add the boot option of "drm.debug=0x06", we can get the debug info by
+ * using the DRM_DEBUG_KMS and DRM_DEBUG_DRIVER.
+ * If we add the boot option of "drm.debug=0x05", we can get the debug info by
+ * using the DRM_DEBUG_KMS and DRM_DEBUG.
+ */
 
 extern void drm_ut_debug_printk(unsigned int request_level,
 				const char *prefix,
-- 
cgit v1.2.3


From 2066facca4c7dfe9f5068ece0200a4dbf10f49e1 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sun, 2 Aug 2009 04:19:17 +0200
Subject: drm/kms: slave encoder interface.

Define some helper functions to make easier to detach a KMS encoder
implementation from the drm module of the GPU it's used in. This is
mainly useful for some external I2C encoders known to be present on
cards with GPUs from several different manufacturers.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Makefile            |   2 +-
 drivers/gpu/drm/drm_encoder_slave.c | 116 ++++++++++++++++++++++++++
 include/drm/drm_encoder_slave.h     | 162 ++++++++++++++++++++++++++++++++++++
 3 files changed, 279 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/drm_encoder_slave.c
 create mode 100644 include/drm/drm_encoder_slave.h

(limited to 'include')

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index fe23f29f7cba..5f0aec4f082a 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -11,7 +11,7 @@ drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
 		drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \
 		drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o \
 		drm_crtc.o drm_crtc_helper.o drm_modes.o drm_edid.o \
-		drm_info.o drm_debugfs.o
+		drm_info.o drm_debugfs.o drm_encoder_slave.o
 
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 
diff --git a/drivers/gpu/drm/drm_encoder_slave.c b/drivers/gpu/drm/drm_encoder_slave.c
new file mode 100644
index 000000000000..6ffd600ccfae
--- /dev/null
+++ b/drivers/gpu/drm/drm_encoder_slave.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_encoder_slave.h>
+
+/**
+ * drm_i2c_encoder_init - Initialize an I2C slave encoder
+ * @dev:	DRM device.
+ * @encoder:    Encoder to be attached to the I2C device. You aren't
+ *		required to have called drm_encoder_init() before.
+ * @adap:	I2C adapter that will be used to communicate with
+ *		the device.
+ * @info:	Information that will be used to create the I2C device.
+ *		Required fields are @addr and @type.
+ *
+ * Create an I2C device on the specified bus (the module containing its
+ * driver is transparently loaded) and attach it to the specified
+ * &drm_encoder_slave. The @slave_funcs field will be initialized with
+ * the hooks provided by the slave driver.
+ *
+ * Returns 0 on success or a negative errno on failure, in particular,
+ * -ENODEV is returned when no matching driver is found.
+ */
+int drm_i2c_encoder_init(struct drm_device *dev,
+			 struct drm_encoder_slave *encoder,
+			 struct i2c_adapter *adap,
+			 const struct i2c_board_info *info)
+{
+	char modalias[sizeof(I2C_MODULE_PREFIX)
+		      + I2C_NAME_SIZE];
+	struct module *module = NULL;
+	struct i2c_client *client;
+	struct drm_i2c_encoder_driver *encoder_drv;
+	int err = 0;
+
+	snprintf(modalias, sizeof(modalias),
+		 "%s%s", I2C_MODULE_PREFIX, info->type);
+	request_module(modalias);
+
+	client = i2c_new_device(adap, info);
+	if (!client) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	if (!client->driver) {
+		err = -ENODEV;
+		goto fail_unregister;
+	}
+
+	module = client->driver->driver.owner;
+	if (!try_module_get(module)) {
+		err = -ENODEV;
+		goto fail_unregister;
+	}
+
+	encoder->bus_priv = client;
+
+	encoder_drv = to_drm_i2c_encoder_driver(client->driver);
+
+	err = encoder_drv->encoder_init(client, dev, encoder);
+	if (err)
+		goto fail_unregister;
+
+	return 0;
+
+fail_unregister:
+	i2c_unregister_device(client);
+	module_put(module);
+fail:
+	return err;
+}
+EXPORT_SYMBOL(drm_i2c_encoder_init);
+
+/**
+ * drm_i2c_encoder_destroy - Unregister the I2C device backing an encoder
+ * @drm_encoder:	Encoder to be unregistered.
+ *
+ * This should be called from the @destroy method of an I2C slave
+ * encoder driver once I2C access is no longer needed.
+ */
+void drm_i2c_encoder_destroy(struct drm_encoder *drm_encoder)
+{
+	struct drm_encoder_slave *encoder = to_encoder_slave(drm_encoder);
+	struct i2c_client *client = drm_i2c_encoder_get_client(drm_encoder);
+	struct module *module = client->driver->driver.owner;
+
+	i2c_unregister_device(client);
+	encoder->bus_priv = NULL;
+
+	module_put(module);
+}
+EXPORT_SYMBOL(drm_i2c_encoder_destroy);
diff --git a/include/drm/drm_encoder_slave.h b/include/drm/drm_encoder_slave.h
new file mode 100644
index 000000000000..821ec40c17d8
--- /dev/null
+++ b/include/drm/drm_encoder_slave.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DRM_ENCODER_SLAVE_H__
+#define __DRM_ENCODER_SLAVE_H__
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+
+/**
+ * struct drm_encoder_slave_funcs - Entry points exposed by a slave encoder driver
+ * @set_config:	Initialize any encoder-specific modesetting parameters.
+ *		The meaning of the @params parameter is implementation
+ *		dependent. It will usually be a structure with DVO port
+ *		data format settings or timings. It's not required for
+ *		the new parameters to take effect until the next mode
+ *		is set.
+ *
+ * Most of its members are analogous to the function pointers in
+ * &drm_encoder_helper_funcs and they can optionally be used to
+ * initialize the latter. Connector-like methods (e.g. @get_modes and
+ * @set_property) will typically be wrapped around and only be called
+ * if the encoder is the currently selected one for the connector.
+ */
+struct drm_encoder_slave_funcs {
+	void (*set_config)(struct drm_encoder *encoder,
+			   void *params);
+
+	void (*destroy)(struct drm_encoder *encoder);
+	void (*dpms)(struct drm_encoder *encoder, int mode);
+	void (*save)(struct drm_encoder *encoder);
+	void (*restore)(struct drm_encoder *encoder);
+	bool (*mode_fixup)(struct drm_encoder *encoder,
+			   struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode);
+	int (*mode_valid)(struct drm_encoder *encoder,
+			  struct drm_display_mode *mode);
+	void (*mode_set)(struct drm_encoder *encoder,
+			 struct drm_display_mode *mode,
+			 struct drm_display_mode *adjusted_mode);
+
+	enum drm_connector_status (*detect)(struct drm_encoder *encoder,
+					    struct drm_connector *connector);
+	int (*get_modes)(struct drm_encoder *encoder,
+			 struct drm_connector *connector);
+	int (*create_resources)(struct drm_encoder *encoder,
+				 struct drm_connector *connector);
+	int (*set_property)(struct drm_encoder *encoder,
+			    struct drm_connector *connector,
+			    struct drm_property *property,
+			    uint64_t val);
+
+};
+
+/**
+ * struct drm_encoder_slave - Slave encoder struct
+ * @base: DRM encoder object.
+ * @slave_funcs: Slave encoder callbacks.
+ * @slave_priv: Slave encoder private data.
+ * @bus_priv: Bus specific data.
+ *
+ * A &drm_encoder_slave has two sets of callbacks, @slave_funcs and the
+ * ones in @base. The former are never actually called by the common
+ * CRTC code, it's just a convenience for splitting the encoder
+ * functions in an upper, GPU-specific layer and a (hopefully)
+ * GPU-agnostic lower layer: It's the GPU driver responsibility to
+ * call the slave methods when appropriate.
+ *
+ * drm_i2c_encoder_init() provides a way to get an implementation of
+ * this.
+ */
+struct drm_encoder_slave {
+	struct drm_encoder base;
+
+	struct drm_encoder_slave_funcs *slave_funcs;
+	void *slave_priv;
+	void *bus_priv;
+};
+#define to_encoder_slave(x) container_of((x), struct drm_encoder_slave, base)
+
+int drm_i2c_encoder_init(struct drm_device *dev,
+			 struct drm_encoder_slave *encoder,
+			 struct i2c_adapter *adap,
+			 const struct i2c_board_info *info);
+
+
+/**
+ * struct drm_i2c_encoder_driver
+ *
+ * Describes a device driver for an encoder connected to the GPU
+ * through an I2C bus. In addition to the entry points in @i2c_driver
+ * an @encoder_init function should be provided. It will be called to
+ * give the driver an opportunity to allocate any per-encoder data
+ * structures and to initialize the @slave_funcs and (optionally)
+ * @slave_priv members of @encoder.
+ */
+struct drm_i2c_encoder_driver {
+	struct i2c_driver i2c_driver;
+
+	int (*encoder_init)(struct i2c_client *client,
+			    struct drm_device *dev,
+			    struct drm_encoder_slave *encoder);
+
+};
+#define to_drm_i2c_encoder_driver(x) container_of((x),			\
+						  struct drm_i2c_encoder_driver, \
+						  i2c_driver)
+
+/**
+ * drm_i2c_encoder_get_client - Get the I2C client corresponding to an encoder
+ */
+static inline struct i2c_client *drm_i2c_encoder_get_client(struct drm_encoder *encoder)
+{
+	return (struct i2c_client *)to_encoder_slave(encoder)->bus_priv;
+}
+
+/**
+ * drm_i2c_encoder_register - Register an I2C encoder driver
+ * @owner:	Module containing the driver.
+ * @driver:	Driver to be registered.
+ */
+static inline int drm_i2c_encoder_register(struct module *owner,
+					   struct drm_i2c_encoder_driver *driver)
+{
+	return i2c_register_driver(owner, &driver->i2c_driver);
+}
+
+/**
+ * drm_i2c_encoder_unregister - Unregister an I2C encoder driver
+ * @driver:	Driver to be unregistered.
+ */
+static inline void drm_i2c_encoder_unregister(struct drm_i2c_encoder_driver *driver)
+{
+	return i2c_del_driver(&driver->i2c_driver);
+}
+
+void drm_i2c_encoder_destroy(struct drm_encoder *encoder);
+
+#endif
-- 
cgit v1.2.3


From 74bd3c26b90f39b9dcc05c471333da8998572b5d Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sun, 2 Aug 2009 04:19:18 +0200
Subject: drm: Define DRM_MODE_CONNECTOR_TV

The existing TV connector types are often unsuitable either because
there is no way to probe them until they're actually plugged in or
because they can change during run time (e.g. 7-pin DIN connectors
that behave as S-Video, Component, Composite or SCART depending on the
adaptor plugged in).

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c  | 1 +
 drivers/gpu/drm/drm_sysfs.c | 3 +++
 include/drm/drm_mode.h      | 1 +
 3 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 9c758305472c..c7ab80b45e3f 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -146,6 +146,7 @@ static struct drm_conn_prop_enum_list drm_connector_enum_list[] =
 	{ DRM_MODE_CONNECTOR_DisplayPort, "DisplayPort", 0 },
 	{ DRM_MODE_CONNECTOR_HDMIA, "HDMI Type A", 0 },
 	{ DRM_MODE_CONNECTOR_HDMIB, "HDMI Type B", 0 },
+	{ DRM_MODE_CONNECTOR_TV, "TV", 0 },
 };
 
 static struct drm_prop_enum_list drm_encoder_enum_list[] =
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 85ec31b3ff00..adc179459c25 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -247,6 +247,7 @@ static ssize_t subconnector_show(struct device *device,
 		case DRM_MODE_CONNECTOR_Composite:
 		case DRM_MODE_CONNECTOR_SVIDEO:
 		case DRM_MODE_CONNECTOR_Component:
+		case DRM_MODE_CONNECTOR_TV:
 			prop = dev->mode_config.tv_subconnector_property;
 			is_tv = 1;
 			break;
@@ -287,6 +288,7 @@ static ssize_t select_subconnector_show(struct device *device,
 		case DRM_MODE_CONNECTOR_Composite:
 		case DRM_MODE_CONNECTOR_SVIDEO:
 		case DRM_MODE_CONNECTOR_Component:
+		case DRM_MODE_CONNECTOR_TV:
 			prop = dev->mode_config.tv_select_subconnector_property;
 			is_tv = 1;
 			break;
@@ -385,6 +387,7 @@ int drm_sysfs_connector_add(struct drm_connector *connector)
 		case DRM_MODE_CONNECTOR_Composite:
 		case DRM_MODE_CONNECTOR_SVIDEO:
 		case DRM_MODE_CONNECTOR_Component:
+		case DRM_MODE_CONNECTOR_TV:
 			for (i = 0; i < ARRAY_SIZE(connector_attrs_opt1); i++) {
 				ret = device_create_file(&connector->kdev, &connector_attrs_opt1[i]);
 				if (ret)
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index ae304cc73c90..c51e9f528c8f 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -155,6 +155,7 @@ struct drm_mode_get_encoder {
 #define DRM_MODE_CONNECTOR_DisplayPort	10
 #define DRM_MODE_CONNECTOR_HDMIA	11
 #define DRM_MODE_CONNECTOR_HDMIB	12
+#define DRM_MODE_CONNECTOR_TV		13
 
 struct drm_mode_get_connector {
 
-- 
cgit v1.2.3


From aeaa1ad3ff32be833680e484d99ec29d892da1ff Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sun, 2 Aug 2009 04:19:19 +0200
Subject: drm: Define DRM_MODE_SUBCONNECTOR_SCART

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 2 ++
 include/drm/drm_mode.h     | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index c7ab80b45e3f..ed53c5c37ac4 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -108,6 +108,7 @@ static struct drm_prop_enum_list drm_tv_select_enum_list[] =
 	{ DRM_MODE_SUBCONNECTOR_Composite, "Composite" }, /* TV-out */
 	{ DRM_MODE_SUBCONNECTOR_SVIDEO,    "SVIDEO"    }, /* TV-out */
 	{ DRM_MODE_SUBCONNECTOR_Component, "Component" }, /* TV-out */
+	{ DRM_MODE_SUBCONNECTOR_SCART,     "SCART"     }, /* TV-out */
 };
 
 DRM_ENUM_NAME_FN(drm_get_tv_select_name, drm_tv_select_enum_list)
@@ -118,6 +119,7 @@ static struct drm_prop_enum_list drm_tv_subconnector_enum_list[] =
 	{ DRM_MODE_SUBCONNECTOR_Composite, "Composite" }, /* TV-out */
 	{ DRM_MODE_SUBCONNECTOR_SVIDEO,    "SVIDEO"    }, /* TV-out */
 	{ DRM_MODE_SUBCONNECTOR_Component, "Component" }, /* TV-out */
+	{ DRM_MODE_SUBCONNECTOR_SCART,     "SCART"     }, /* TV-out */
 };
 
 DRM_ENUM_NAME_FN(drm_get_tv_subconnector_name,
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index c51e9f528c8f..616aeb42b773 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -141,6 +141,7 @@ struct drm_mode_get_encoder {
 #define DRM_MODE_SUBCONNECTOR_Composite	5
 #define DRM_MODE_SUBCONNECTOR_SVIDEO	6
 #define DRM_MODE_SUBCONNECTOR_Component	8
+#define DRM_MODE_SUBCONNECTOR_SCART	9
 
 #define DRM_MODE_CONNECTOR_Unknown	0
 #define DRM_MODE_CONNECTOR_VGA		1
-- 
cgit v1.2.3


From b6b7902e54c7e8abbc213d8bdc290350c00ccfe5 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sun, 2 Aug 2009 04:19:20 +0200
Subject: drm: Define some new standard TV properties.

Namely "brightness", "contrast" and "flicker reduction".

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 18 ++++++++++++++++++
 include/drm/drm_crtc.h     |  3 +++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index ed53c5c37ac4..a8c831134fc3 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -718,6 +718,24 @@ int drm_mode_create_tv_properties(struct drm_device *dev, int num_modes,
 		drm_property_add_enum(dev->mode_config.tv_mode_property, i,
 				      i, modes[i]);
 
+	dev->mode_config.tv_brightness_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "brightness", 2);
+	dev->mode_config.tv_brightness_property->values[0] = 0;
+	dev->mode_config.tv_brightness_property->values[1] = 100;
+
+	dev->mode_config.tv_contrast_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "contrast", 2);
+	dev->mode_config.tv_contrast_property->values[0] = 0;
+	dev->mode_config.tv_contrast_property->values[1] = 100;
+
+	dev->mode_config.tv_flicker_reduction_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "flicker reduction", 2);
+	dev->mode_config.tv_flicker_reduction_property->values[0] = 0;
+	dev->mode_config.tv_flicker_reduction_property->values[1] = 100;
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_mode_create_tv_properties);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 125994d8ac0b..5f2cc0ca4c7d 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -572,6 +572,9 @@ struct drm_mode_config {
 	struct drm_property *tv_right_margin_property;
 	struct drm_property *tv_top_margin_property;
 	struct drm_property *tv_bottom_margin_property;
+	struct drm_property *tv_brightness_property;
+	struct drm_property *tv_contrast_property;
+	struct drm_property *tv_flicker_reduction_property;
 
 	/* Optional properties */
 	struct drm_property *scaling_mode_property;
-- 
cgit v1.2.3


From fa56d4cb4022c8b313c3b99236e1b87effc3655b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 23 Jun 2009 11:29:11 +0000
Subject: ide: allow ide_dev_read_id() to be called from the IRQ context

* Un-static __ide_wait_stat().

* Allow ide_dev_read_id() helper to be called from the IRQ context by
  adding irq_ctx flag and using mdelay()/__ide_wait_stat() when needed.

* Switch ide_driveid_update() to set irq_ctx flag.

This change is needed for the consecutive patch which fixes races in
handling of user-space SET XFER commands but for improved bisectability
and clarity it is better to do it in a separate patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-iops.c  |  6 +++---
 drivers/ide/ide-probe.c | 31 +++++++++++++++++++++----------
 include/linux/ide.h     |  3 ++-
 3 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 2892b242bbe1..b99873845d21 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -102,8 +102,8 @@ EXPORT_SYMBOL(ide_fixstring);
  * setting a timer to wake up at half second intervals thereafter,
  * until timeout is achieved, before timing out.
  */
-static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad,
-			   unsigned long timeout, u8 *rstat)
+int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad,
+		    unsigned long timeout, u8 *rstat)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
@@ -316,7 +316,7 @@ int ide_driveid_update(ide_drive_t *drive)
 		return 0;
 
 	SELECT_MASK(drive, 1);
-	rc = ide_dev_read_id(drive, ATA_CMD_ID_ATA, id);
+	rc = ide_dev_read_id(drive, ATA_CMD_ID_ATA, id, 1);
 	SELECT_MASK(drive, 0);
 
 	if (rc)
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 1bb106f6221a..8de442cbee94 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -238,6 +238,7 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id)
  *	@drive: drive to identify
  *	@cmd: command to use
  *	@id: buffer for IDENTIFY data
+ *	@irq_ctx: flag set when called from the IRQ context
  *
  *	Sends an ATA(PI) IDENTIFY request to a drive and waits for a response.
  *
@@ -246,7 +247,7 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id)
  *			2  device aborted the command (refused to identify itself)
  */
 
-int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id)
+int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id, int irq_ctx)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct ide_io_ports *io_ports = &hwif->io_ports;
@@ -263,7 +264,10 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id)
 		tp_ops->write_devctl(hwif, ATA_NIEN | ATA_DEVCTL_OBS);
 
 	/* take a deep breath */
-	msleep(50);
+	if (irq_ctx)
+		mdelay(50);
+	else
+		msleep(50);
 
 	if (io_ports->ctl_addr &&
 	    (hwif->host_flags & IDE_HFLAG_BROKEN_ALTSTATUS) == 0) {
@@ -295,12 +299,19 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id)
 
 	timeout = ((cmd == ATA_CMD_ID_ATA) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2;
 
-	if (ide_busy_sleep(drive, timeout, use_altstatus))
-		return 1;
-
 	/* wait for IRQ and ATA_DRQ */
-	msleep(50);
-	s = tp_ops->read_status(hwif);
+	if (irq_ctx) {
+		rc = __ide_wait_stat(drive, ATA_DRQ, BAD_R_STAT, timeout, &s);
+		if (rc)
+			return 1;
+	} else {
+		rc = ide_busy_sleep(drive, timeout, use_altstatus);
+		if (rc)
+			return 1;
+
+		msleep(50);
+		s = tp_ops->read_status(hwif);
+	}
 
 	if (OK_STAT(s, ATA_DRQ, BAD_R_STAT)) {
 		/* drive returned ID */
@@ -406,10 +417,10 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
 
 	if (OK_STAT(stat, ATA_DRDY, ATA_BUSY) ||
 	    present || cmd == ATA_CMD_ID_ATAPI) {
-		rc = ide_dev_read_id(drive, cmd, id);
+		rc = ide_dev_read_id(drive, cmd, id, 0);
 		if (rc)
 			/* failed: try again */
-			rc = ide_dev_read_id(drive, cmd, id);
+			rc = ide_dev_read_id(drive, cmd, id, 0);
 
 		stat = tp_ops->read_status(hwif);
 
@@ -424,7 +435,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
 			msleep(50);
 			tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET);
 			(void)ide_busy_sleep(drive, WAIT_WORSTCASE, 0);
-			rc = ide_dev_read_id(drive, cmd, id);
+			rc = ide_dev_read_id(drive, cmd, id, 0);
 		}
 
 		/* ensure drive IRQ is clear */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index edc93a6d931d..cb6cd0459a5e 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1081,6 +1081,7 @@ extern void ide_fixstring(u8 *, const int, const int);
 
 int ide_busy_sleep(ide_drive_t *, unsigned long, int);
 
+int __ide_wait_stat(ide_drive_t *, u8, u8, unsigned long, u8 *);
 int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long);
 
 ide_startstop_t ide_do_park_unpark(ide_drive_t *, struct request *);
@@ -1169,7 +1170,7 @@ int ide_no_data_taskfile(ide_drive_t *, struct ide_cmd *);
 
 int ide_taskfile_ioctl(ide_drive_t *, unsigned long);
 
-int ide_dev_read_id(ide_drive_t *, u8, u16 *);
+int ide_dev_read_id(ide_drive_t *, u8, u16 *, int);
 
 extern int ide_driveid_update(ide_drive_t *);
 extern int ide_config_drive_speed(ide_drive_t *, u8);
-- 
cgit v1.2.3


From 665d66e8fad60a5a162c4615f27f916ad1a6d567 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 23 Jun 2009 11:35:51 +0000
Subject: ide: fix races in handling of user-space SET XFER commands

* Make cmd->tf_flags field 'u16' and add IDE_TFLAG_SET_XFER taskfile flag.

* Update ide_finish_cmd() to set xfer / re-read id if the new flag is set.

* Convert set_xfer_rate() (write handler for /proc/ide/hd?/current_speed)
  and ide_cmd_ioctl() (HDIO_DRIVE_CMD ioctl handler) to use the new flag.

* Remove no longer needed disable_irq_nosync() + enable_irq() from
  ide_config_drive_speed().

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-ioctls.c   |  8 ++------
 drivers/ide/ide-iops.c     | 10 ----------
 drivers/ide/ide-proc.c     | 10 ++--------
 drivers/ide/ide-taskfile.c |  9 ++++++++-
 include/linux/ide.h        |  3 ++-
 5 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index e246d3d3fbcc..d3440b5010a5 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -167,6 +167,8 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
 			err = -EINVAL;
 			goto abort;
 		}
+
+		cmd.tf_flags |= IDE_TFLAG_SET_XFER;
 	}
 
 	err = ide_raw_taskfile(drive, &cmd, buf, args[3]);
@@ -174,12 +176,6 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
 	args[0] = tf->status;
 	args[1] = tf->error;
 	args[2] = tf->nsect;
-
-	if (!err && xfer_rate) {
-		/* active-retuning-calls future */
-		ide_set_xfer_rate(drive, xfer_rate);
-		ide_driveid_update(drive);
-	}
 abort:
 	if (copy_to_user((void __user *)arg, &args, 4))
 		err = -EFAULT;
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index b99873845d21..b14fa9a87c49 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -363,14 +363,6 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
 	 * this point (lost interrupt).
 	 */
 
-	/*
-	 *	FIXME: we race against the running IRQ here if
-	 *	this is called from non IRQ context. If we use
-	 *	disable_irq() we hang on the error path. Work
-	 *	is needed.
-	 */
-	disable_irq_nosync(hwif->irq);
-
 	udelay(1);
 	tp_ops->dev_select(drive);
 	SELECT_MASK(drive, 1);
@@ -394,8 +386,6 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
 
 	SELECT_MASK(drive, 0);
 
-	enable_irq(hwif->irq);
-
 	if (error) {
 		(void) ide_dump_status(drive, "set_drive_speed_status", stat);
 		return error;
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 3242698832a4..021de41655e6 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -195,7 +195,6 @@ ide_devset_get(xfer_rate, current_speed);
 static int set_xfer_rate (ide_drive_t *drive, int arg)
 {
 	struct ide_cmd cmd;
-	int err;
 
 	if (arg < XFER_PIO_0 || arg > XFER_UDMA_6)
 		return -EINVAL;
@@ -206,14 +205,9 @@ static int set_xfer_rate (ide_drive_t *drive, int arg)
 	cmd.tf.nsect   = (u8)arg;
 	cmd.valid.out.tf = IDE_VALID_FEATURE | IDE_VALID_NSECT;
 	cmd.valid.in.tf  = IDE_VALID_NSECT;
+	cmd.tf_flags   = IDE_TFLAG_SET_XFER;
 
-	err = ide_no_data_taskfile(drive, &cmd);
-
-	if (!err) {
-		ide_set_xfer_rate(drive, (u8) arg);
-		ide_driveid_update(drive);
-	}
-	return err;
+	return ide_no_data_taskfile(drive, &cmd);
 }
 
 ide_devset_rw(current_speed, xfer_rate);
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 50336d51eebc..cc8633cbe133 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -324,10 +324,17 @@ static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
 void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
 {
 	struct request *rq = drive->hwif->rq;
-	u8 err = ide_read_error(drive);
+	u8 err = ide_read_error(drive), nsect = cmd->tf.nsect;
+	u8 set_xfer = !!(cmd->tf_flags & IDE_TFLAG_SET_XFER);
 
 	ide_complete_cmd(drive, cmd, stat, err);
 	rq->errors = err;
+
+	if (err == 0 && set_xfer) {
+		ide_set_xfer_rate(drive, nsect);
+		ide_driveid_update(drive);
+	}
+
 	ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index cb6cd0459a5e..803c1ae31237 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -258,6 +258,7 @@ enum {
 	IDE_TFLAG_DYN			= (1 << 5),
 	IDE_TFLAG_FS			= (1 << 6),
 	IDE_TFLAG_MULTI_PIO		= (1 << 7),
+	IDE_TFLAG_SET_XFER		= (1 << 8),
 };
 
 enum {
@@ -294,7 +295,7 @@ struct ide_cmd {
 		} out, in;
 	} valid;
 
-	u8			tf_flags;
+	u16			tf_flags;
 	u8			ftf_flags;	/* for TASKFILE ioctl */
 	int			protocol;
 
-- 
cgit v1.2.3


From a75f0236292a5fca65f26efedca48bd07db1834d Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Wed, 12 Aug 2009 02:30:10 +0200
Subject: drm: Add more standard TV properties.

Overscan, saturation, hue. Used in the nouveau driver for GPUs with
integrated TV encoders.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 18 ++++++++++++++++++
 include/drm/drm_crtc.h     |  3 +++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index a8c831134fc3..362a538cdedc 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -736,6 +736,24 @@ int drm_mode_create_tv_properties(struct drm_device *dev, int num_modes,
 	dev->mode_config.tv_flicker_reduction_property->values[0] = 0;
 	dev->mode_config.tv_flicker_reduction_property->values[1] = 100;
 
+	dev->mode_config.tv_overscan_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "overscan", 2);
+	dev->mode_config.tv_overscan_property->values[0] = 0;
+	dev->mode_config.tv_overscan_property->values[1] = 100;
+
+	dev->mode_config.tv_saturation_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "saturation", 2);
+	dev->mode_config.tv_saturation_property->values[0] = 0;
+	dev->mode_config.tv_saturation_property->values[1] = 100;
+
+	dev->mode_config.tv_hue_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "hue", 2);
+	dev->mode_config.tv_hue_property->values[0] = 0;
+	dev->mode_config.tv_hue_property->values[1] = 100;
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_mode_create_tv_properties);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 5f2cc0ca4c7d..db92a83f8ca9 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -575,6 +575,9 @@ struct drm_mode_config {
 	struct drm_property *tv_brightness_property;
 	struct drm_property *tv_contrast_property;
 	struct drm_property *tv_flicker_reduction_property;
+	struct drm_property *tv_overscan_property;
+	struct drm_property *tv_saturation_property;
+	struct drm_property *tv_hue_property;
 
 	/* Optional properties */
 	struct drm_property *scaling_mode_property;
-- 
cgit v1.2.3


From 00ae4064b1445524752575dd84df227c0687c99d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:49 +0900
Subject: percpu: rename 4k first chunk allocator to page

Page size isn't always 4k depending on arch and configuration.  Rename
4k first chunk allocator to page.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: David Howells <dhowells@redhat.com>
---
 Documentation/kernel-parameters.txt |  2 +-
 arch/x86/kernel/setup_percpu.c      | 23 ++++++++++++-----------
 include/linux/percpu.h              |  2 +-
 mm/percpu.c                         | 25 ++++++++++++++-----------
 4 files changed, 28 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 7936b801fe6a..12e9eb77ee0d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1920,7 +1920,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			See arch/parisc/kernel/pdc_chassis.c
 
 	percpu_alloc=	[X86] Select which percpu first chunk allocator to use.
-			Allowed values are one of "lpage", "embed" and "4k".
+			Allowed values are one of "lpage", "embed" and "page".
 			See comments in arch/x86/kernel/setup_percpu.c for
 			details on each allocator.  This parameter is primarily
 			for debugging and performance comparison.
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index a26ff61e2fb0..1e17711c29d6 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -249,21 +249,22 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
 }
 
 /*
- * 4k allocator
+ * Page allocator
  *
- * Boring fallback 4k allocator.  This allocator puts more pressure on
- * PTE TLBs but other than that behaves nicely on both UMA and NUMA.
+ * Boring fallback 4k page allocator.  This allocator puts more
+ * pressure on PTE TLBs but other than that behaves nicely on both UMA
+ * and NUMA.
  */
-static void __init pcpu4k_populate_pte(unsigned long addr)
+static void __init pcpup_populate_pte(unsigned long addr)
 {
 	populate_extra_pte(addr);
 }
 
-static ssize_t __init setup_pcpu_4k(size_t static_size)
+static ssize_t __init setup_pcpu_page(size_t static_size)
 {
-	return pcpu_4k_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
-				   pcpu_fc_alloc, pcpu_fc_free,
-				   pcpu4k_populate_pte);
+	return pcpu_page_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
+				     pcpu_fc_alloc, pcpu_fc_free,
+				     pcpup_populate_pte);
 }
 
 /* for explicit first chunk allocator selection */
@@ -307,7 +308,7 @@ void __init setup_per_cpu_areas(void)
 	 */
 	ret = -EINVAL;
 	if (strlen(pcpu_chosen_alloc)) {
-		if (strcmp(pcpu_chosen_alloc, "4k")) {
+		if (strcmp(pcpu_chosen_alloc, "page")) {
 			if (!strcmp(pcpu_chosen_alloc, "lpage"))
 				ret = setup_pcpu_lpage(static_size, true);
 			else if (!strcmp(pcpu_chosen_alloc, "embed"))
@@ -317,7 +318,7 @@ void __init setup_per_cpu_areas(void)
 					   "specified\n", pcpu_chosen_alloc);
 			if (ret < 0)
 				pr_warning("PERCPU: %s allocator failed (%zd), "
-					   "falling back to 4k\n",
+					   "falling back to page size\n",
 					   pcpu_chosen_alloc, ret);
 		}
 	} else {
@@ -326,7 +327,7 @@ void __init setup_per_cpu_areas(void)
 			ret = setup_pcpu_embed(static_size, false);
 	}
 	if (ret < 0)
-		ret = setup_pcpu_4k(static_size);
+		ret = setup_pcpu_page(static_size);
 	if (ret < 0)
 		panic("cannot allocate static percpu area (%zu bytes, err=%zd)",
 		      static_size, ret);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e134c8229631..7989f61b03f3 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -74,7 +74,7 @@ extern ssize_t __init pcpu_embed_first_chunk(
 				size_t static_size, size_t reserved_size,
 				ssize_t dyn_size);
 
-extern ssize_t __init pcpu_4k_first_chunk(
+extern ssize_t __init pcpu_page_first_chunk(
 				size_t static_size, size_t reserved_size,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
diff --git a/mm/percpu.c b/mm/percpu.c
index cbddcbdab681..6feac7934904 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1497,15 +1497,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 }
 
 /**
- * pcpu_4k_first_chunk - map the first chunk using PAGE_SIZE pages
+ * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
  * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
  * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
  * @free_fn: funtion to free percpu page, always called with PAGE_SIZE
  * @populate_pte_fn: function to populate pte
  *
- * This is a helper to ease setting up embedded first percpu chunk and
- * can be called where pcpu_setup_first_chunk() is expected.
+ * This is a helper to ease setting up page-remapped first percpu
+ * chunk and can be called where pcpu_setup_first_chunk() is expected.
  *
  * This is the basic allocator.  Static percpu area is allocated
  * page-by-page into vmalloc area.
@@ -1514,12 +1514,13 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access on success, -errno on failure.
  */
-ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
-				   pcpu_fc_alloc_fn_t alloc_fn,
-				   pcpu_fc_free_fn_t free_fn,
-				   pcpu_fc_populate_pte_fn_t populate_pte_fn)
+ssize_t __init pcpu_page_first_chunk(size_t static_size, size_t reserved_size,
+				     pcpu_fc_alloc_fn_t alloc_fn,
+				     pcpu_fc_free_fn_t free_fn,
+				     pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
 	static struct vm_struct vm;
+	char psize_str[16];
 	int unit_pages;
 	size_t pages_size;
 	struct page **pages;
@@ -1527,6 +1528,8 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 	int i, j;
 	ssize_t ret;
 
+	snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
+
 	unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size,
 				  PCPU_MIN_UNIT_SIZE));
 
@@ -1542,8 +1545,8 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 
 			ptr = alloc_fn(cpu, PAGE_SIZE);
 			if (!ptr) {
-				pr_warning("PERCPU: failed to allocate "
-					   "4k page for cpu%u\n", cpu);
+				pr_warning("PERCPU: failed to allocate %s page "
+					   "for cpu%u\n", psize_str, cpu);
 				goto enomem;
 			}
 			pages[j++] = virt_to_page(ptr);
@@ -1580,8 +1583,8 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 	}
 
 	/* we're ready, commit */
-	pr_info("PERCPU: %d 4k pages/cpu @%p s%zu r%zu\n",
-		unit_pages, vm.addr, static_size, reserved_size);
+	pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu\n",
+		unit_pages, psize_str, vm.addr, static_size, reserved_size);
 
 	ret = pcpu_setup_first_chunk(static_size, reserved_size, -1,
 				     unit_pages << PAGE_SHIFT, vm.addr, NULL);
-- 
cgit v1.2.3


From 08fc45806103e59a37418e84719b878f9bb32540 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:49 +0900
Subject: percpu: build first chunk allocators selectively

There's no need to build unused first chunk allocators in.  Define
CONFIG_NEED_PER_CPU_*_FIRST_CHUNK and let archs enable them
selectively.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/Kconfig       | 10 ++++++++++
 include/linux/percpu.h | 27 +++++----------------------
 mm/percpu.c            | 19 +++++++++++--------
 3 files changed, 26 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e06b2eeff9f2..f7ac27215512 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -150,6 +150,16 @@ config ARCH_HAS_CACHE_LINE_SIZE
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y
 
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+	def_bool y
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+	def_bool y
+
+config NEED_PER_CPU_LPAGE_FIRST_CHUNK
+	def_bool y
+	depends on NEED_MULTIPLE_NODES
+
 config HAVE_CPUMASK_OF_CPU_MAP
 	def_bool X86_64_SMP
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 7989f61b03f3..e26788e0da4a 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -70,17 +70,21 @@ extern size_t __init pcpu_setup_first_chunk(
 				ssize_t dyn_size, size_t unit_size,
 				void *base_addr, const int *unit_map);
 
+#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
 extern ssize_t __init pcpu_embed_first_chunk(
 				size_t static_size, size_t reserved_size,
 				ssize_t dyn_size);
+#endif
 
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 extern ssize_t __init pcpu_page_first_chunk(
 				size_t static_size, size_t reserved_size,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
+#endif
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
 extern int __init pcpu_lpage_build_unit_map(
 				size_t static_size, size_t reserved_size,
 				ssize_t *dyn_sizep, size_t *unit_sizep,
@@ -98,27 +102,6 @@ extern ssize_t __init pcpu_lpage_first_chunk(
 
 extern void *pcpu_lpage_remapped(void *kaddr);
 #else
-static inline int pcpu_lpage_build_unit_map(
-				size_t static_size, size_t reserved_size,
-				ssize_t *dyn_sizep, size_t *unit_sizep,
-				size_t lpage_size, int *unit_map,
-				pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
-{
-	return -EINVAL;
-}
-
-static inline ssize_t __init pcpu_lpage_first_chunk(
-				size_t static_size, size_t reserved_size,
-				size_t dyn_size, size_t unit_size,
-				size_t lpage_size, const int *unit_map,
-				int nr_units,
-				pcpu_fc_alloc_fn_t alloc_fn,
-				pcpu_fc_free_fn_t free_fn,
-				pcpu_fc_map_fn_t map_fn)
-{
-	return -EINVAL;
-}
-
 static inline void *pcpu_lpage_remapped(void *kaddr)
 {
 	return NULL;
diff --git a/mm/percpu.c b/mm/percpu.c
index 6feac7934904..7971997de310 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1414,8 +1414,9 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 	return pcpu_unit_size;
 }
 
-static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size,
-				 ssize_t *dyn_sizep)
+static inline size_t pcpu_calc_fc_sizes(size_t static_size,
+					size_t reserved_size,
+					ssize_t *dyn_sizep)
 {
 	size_t size_sum;
 
@@ -1427,6 +1428,8 @@ static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size,
 	return size_sum;
 }
 
+#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
+	!defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
 /**
  * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
  * @static_size: the size of static percpu area in bytes
@@ -1495,7 +1498,10 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 	return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
 				      unit_size, base, NULL);
 }
+#endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
+	  !CONFIG_HAVE_SETUP_PER_CPU_AREA */
 
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 /**
  * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
  * @static_size: the size of static percpu area in bytes
@@ -1598,12 +1604,9 @@ out_free_ar:
 	free_bootmem(__pa(pages), pages_size);
 	return ret;
 }
+#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
 
-/*
- * Large page remapping first chunk setup helper
- */
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-
+#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
 /**
  * pcpu_lpage_build_unit_map - build unit_map for large page remapping
  * @static_size: the size of static percpu area in bytes
@@ -1982,7 +1985,7 @@ void *pcpu_lpage_remapped(void *kaddr)
 
 	return NULL;
 }
-#endif
+#endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */
 
 /*
  * Generic percpu area setup.
-- 
cgit v1.2.3


From f58dc01ba2ca9fe3ab2ba4ca43d9c8a735cf62d8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:50 +0900
Subject: percpu: generalize first chunk allocator selection

Now that all first chunk allocators are in mm/percpu.c, it makes sense
to make generalize percpu_alloc kernel parameter.  Define PCPU_FC_*
and set pcpu_chosen_fc using early_param() in mm/percpu.c.  Arch code
can use the set value to determine which first chunk allocator to use.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/kernel-parameters.txt | 11 ++++++-----
 arch/x86/kernel/setup_percpu.c      | 24 ++++++------------------
 include/linux/percpu.h              | 12 ++++++++++++
 mm/percpu.c                         | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 56 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 12e9eb77ee0d..dee9ce2e6cfa 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1919,11 +1919,12 @@ and is between 256 and 4096 characters. It is defined in the file
 			Format: { 0 | 1 }
 			See arch/parisc/kernel/pdc_chassis.c
 
-	percpu_alloc=	[X86] Select which percpu first chunk allocator to use.
-			Allowed values are one of "lpage", "embed" and "page".
-			See comments in arch/x86/kernel/setup_percpu.c for
-			details on each allocator.  This parameter is primarily
-			for debugging and performance comparison.
+	percpu_alloc=	Select which percpu first chunk allocator to use.
+			Currently supported values are "embed", "page" and
+			"lpage".  Archs may support subset or none of the
+			selections.  See comments in mm/percpu.c for details
+			on each allocator.  This parameter is primarily	for
+			debugging and performance comparison.
 
 	pf.		[PARIDE]
 			See Documentation/blockdev/paride.txt.
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 1e17711c29d6..b961d99e6416 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -267,16 +267,6 @@ static ssize_t __init setup_pcpu_page(size_t static_size)
 				     pcpup_populate_pte);
 }
 
-/* for explicit first chunk allocator selection */
-static char pcpu_chosen_alloc[16] __initdata;
-
-static int __init percpu_alloc_setup(char *str)
-{
-	strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1);
-	return 0;
-}
-early_param("percpu_alloc", percpu_alloc_setup);
-
 static inline void setup_percpu_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
@@ -307,19 +297,17 @@ void __init setup_per_cpu_areas(void)
 	 * each allocator for details.
 	 */
 	ret = -EINVAL;
-	if (strlen(pcpu_chosen_alloc)) {
-		if (strcmp(pcpu_chosen_alloc, "page")) {
-			if (!strcmp(pcpu_chosen_alloc, "lpage"))
+	if (pcpu_chosen_fc != PCPU_FC_AUTO) {
+		if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+			if (pcpu_chosen_fc == PCPU_FC_LPAGE)
 				ret = setup_pcpu_lpage(static_size, true);
-			else if (!strcmp(pcpu_chosen_alloc, "embed"))
-				ret = setup_pcpu_embed(static_size, true);
 			else
-				pr_warning("PERCPU: unknown allocator %s "
-					   "specified\n", pcpu_chosen_alloc);
+				ret = setup_pcpu_embed(static_size, true);
+
 			if (ret < 0)
 				pr_warning("PERCPU: %s allocator failed (%zd), "
 					   "falling back to page size\n",
-					   pcpu_chosen_alloc, ret);
+					   pcpu_fc_names[pcpu_chosen_fc], ret);
 		}
 	} else {
 		ret = setup_pcpu_lpage(static_size, false);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e26788e0da4a..9be05cbe5ee0 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -59,6 +59,18 @@
 extern void *pcpu_base_addr;
 extern const int *pcpu_unit_map;
 
+enum pcpu_fc {
+	PCPU_FC_AUTO,
+	PCPU_FC_EMBED,
+	PCPU_FC_PAGE,
+	PCPU_FC_LPAGE,
+
+	PCPU_FC_NR,
+};
+extern const char *pcpu_fc_names[PCPU_FC_NR];
+
+extern enum pcpu_fc pcpu_chosen_fc;
+
 typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
diff --git a/mm/percpu.c b/mm/percpu.c
index 7971997de310..7fb40bb1555a 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1414,6 +1414,38 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 	return pcpu_unit_size;
 }
 
+const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
+	[PCPU_FC_AUTO]	= "auto",
+	[PCPU_FC_EMBED]	= "embed",
+	[PCPU_FC_PAGE]	= "page",
+	[PCPU_FC_LPAGE]	= "lpage",
+};
+
+enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;
+
+static int __init percpu_alloc_setup(char *str)
+{
+	if (0)
+		/* nada */;
+#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
+	else if (!strcmp(str, "embed"))
+		pcpu_chosen_fc = PCPU_FC_EMBED;
+#endif
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+	else if (!strcmp(str, "page"))
+		pcpu_chosen_fc = PCPU_FC_PAGE;
+#endif
+#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
+	else if (!strcmp(str, "lpage"))
+		pcpu_chosen_fc = PCPU_FC_LPAGE;
+#endif
+	else
+		pr_warning("PERCPU: unknown allocator %s specified\n", str);
+
+	return 0;
+}
+early_param("percpu_alloc", percpu_alloc_setup);
+
 static inline size_t pcpu_calc_fc_sizes(size_t static_size,
 					size_t reserved_size,
 					ssize_t *dyn_sizep)
-- 
cgit v1.2.3


From 9a7737691e90d3cce0e5248f91826c50e5aa3fcf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:50 +0900
Subject: percpu: drop @static_size from first chunk allocators

First chunk allocators assume percpu areas have been linked using one
of PERCPU_*() macros and depend on __per_cpu_load symbol defined by
those macros, so there isn't much point in passing in static area size
explicitly when it can be easily calculated from __per_cpu_start and
__per_cpu_end.  Drop @static_size from all percpu first chunk
allocators and helpers.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/setup_percpu.c | 34 +++++++++++++++-------------------
 include/linux/percpu.h         | 18 ++++++++----------
 mm/percpu.c                    | 29 +++++++++++++----------------
 3 files changed, 36 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index b961d99e6416..8aad486c688f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -157,7 +157,7 @@ static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to)
 		return REMOTE_DISTANCE;
 }
 
-static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
+static ssize_t __init setup_pcpu_lpage(bool chosen)
 {
 	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 	size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE;
@@ -184,8 +184,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
 		return -ENOMEM;
 	}
 
-	ret = pcpu_lpage_build_unit_map(static_size,
-					PERCPU_FIRST_CHUNK_RESERVE,
+	ret = pcpu_lpage_build_unit_map(PERCPU_FIRST_CHUNK_RESERVE,
 					&dyn_size, &unit_size, PMD_SIZE,
 					unit_map, pcpu_lpage_cpu_distance);
 	if (ret < 0) {
@@ -208,9 +207,8 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
 		}
 	}
 
-	ret = pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
-				     dyn_size, unit_size, PMD_SIZE,
-				     unit_map, nr_units,
+	ret = pcpu_lpage_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
+				     unit_size, PMD_SIZE, unit_map, nr_units,
 				     pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
 out_free:
 	if (ret < 0)
@@ -218,7 +216,7 @@ out_free:
 	return ret;
 }
 #else
-static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
+static ssize_t __init setup_pcpu_lpage(bool chosen)
 {
 	return -EINVAL;
 }
@@ -232,7 +230,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
  * mapping so that it can use PMD mapping without additional TLB
  * pressure.
  */
-static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
+static ssize_t __init setup_pcpu_embed(bool chosen)
 {
 	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 
@@ -244,7 +242,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
 	if (!chosen && (!cpu_has_pse || pcpu_need_numa()))
 		return -EINVAL;
 
-	return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
+	return pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
 				      reserve - PERCPU_FIRST_CHUNK_RESERVE);
 }
 
@@ -260,9 +258,9 @@ static void __init pcpup_populate_pte(unsigned long addr)
 	populate_extra_pte(addr);
 }
 
-static ssize_t __init setup_pcpu_page(size_t static_size)
+static ssize_t __init setup_pcpu_page(void)
 {
-	return pcpu_page_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
+	return pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
 				     pcpu_fc_alloc, pcpu_fc_free,
 				     pcpup_populate_pte);
 }
@@ -282,7 +280,6 @@ static inline void setup_percpu_segment(int cpu)
 
 void __init setup_per_cpu_areas(void)
 {
-	size_t static_size = __per_cpu_end - __per_cpu_start;
 	unsigned int cpu;
 	unsigned long delta;
 	size_t pcpu_unit_size;
@@ -300,9 +297,9 @@ void __init setup_per_cpu_areas(void)
 	if (pcpu_chosen_fc != PCPU_FC_AUTO) {
 		if (pcpu_chosen_fc != PCPU_FC_PAGE) {
 			if (pcpu_chosen_fc == PCPU_FC_LPAGE)
-				ret = setup_pcpu_lpage(static_size, true);
+				ret = setup_pcpu_lpage(true);
 			else
-				ret = setup_pcpu_embed(static_size, true);
+				ret = setup_pcpu_embed(true);
 
 			if (ret < 0)
 				pr_warning("PERCPU: %s allocator failed (%zd), "
@@ -310,15 +307,14 @@ void __init setup_per_cpu_areas(void)
 					   pcpu_fc_names[pcpu_chosen_fc], ret);
 		}
 	} else {
-		ret = setup_pcpu_lpage(static_size, false);
+		ret = setup_pcpu_lpage(false);
 		if (ret < 0)
-			ret = setup_pcpu_embed(static_size, false);
+			ret = setup_pcpu_embed(false);
 	}
 	if (ret < 0)
-		ret = setup_pcpu_page(static_size);
+		ret = setup_pcpu_page();
 	if (ret < 0)
-		panic("cannot allocate static percpu area (%zu bytes, err=%zd)",
-		      static_size, ret);
+		panic("cannot initialize percpu area (err=%zd)", ret);
 
 	pcpu_unit_size = ret;
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 9be05cbe5ee0..be2fc8fb9b6f 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -84,13 +84,12 @@ extern size_t __init pcpu_setup_first_chunk(
 
 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
 extern ssize_t __init pcpu_embed_first_chunk(
-				size_t static_size, size_t reserved_size,
-				ssize_t dyn_size);
+				size_t reserved_size, ssize_t dyn_size);
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 extern ssize_t __init pcpu_page_first_chunk(
-				size_t static_size, size_t reserved_size,
+				size_t reserved_size,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
@@ -98,16 +97,15 @@ extern ssize_t __init pcpu_page_first_chunk(
 
 #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
 extern int __init pcpu_lpage_build_unit_map(
-				size_t static_size, size_t reserved_size,
-				ssize_t *dyn_sizep, size_t *unit_sizep,
-				size_t lpage_size, int *unit_map,
+				size_t reserved_size, ssize_t *dyn_sizep,
+				size_t *unit_sizep, size_t lpage_size,
+				int *unit_map,
 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
 
 extern ssize_t __init pcpu_lpage_first_chunk(
-				size_t static_size, size_t reserved_size,
-				size_t dyn_size, size_t unit_size,
-				size_t lpage_size, const int *unit_map,
-				int nr_units,
+				size_t reserved_size, size_t dyn_size,
+				size_t unit_size, size_t lpage_size,
+				const int *unit_map, int nr_units,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_map_fn_t map_fn);
diff --git a/mm/percpu.c b/mm/percpu.c
index 7fb40bb1555a..e2ac58a39bb2 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1464,7 +1464,6 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size,
 	!defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
 /**
  * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
- * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
  * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
  *
@@ -1489,9 +1488,9 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size,
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access on success, -errno on failure.
  */
-ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
-				      ssize_t dyn_size)
+ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
 {
+	const size_t static_size = __per_cpu_end - __per_cpu_start;
 	size_t size_sum, unit_size, chunk_size;
 	void *base;
 	unsigned int cpu;
@@ -1536,7 +1535,6 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 /**
  * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
- * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
  * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
  * @free_fn: funtion to free percpu page, always called with PAGE_SIZE
@@ -1552,12 +1550,13 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access on success, -errno on failure.
  */
-ssize_t __init pcpu_page_first_chunk(size_t static_size, size_t reserved_size,
+ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 				     pcpu_fc_alloc_fn_t alloc_fn,
 				     pcpu_fc_free_fn_t free_fn,
 				     pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
 	static struct vm_struct vm;
+	const size_t static_size = __per_cpu_end - __per_cpu_start;
 	char psize_str[16];
 	int unit_pages;
 	size_t pages_size;
@@ -1641,7 +1640,6 @@ out_free_ar:
 #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
 /**
  * pcpu_lpage_build_unit_map - build unit_map for large page remapping
- * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
  * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
  * @unit_sizep: out parameter for unit size
@@ -1661,13 +1659,14 @@ out_free_ar:
  * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
  * returns the number of units to be allocated.  -errno on failure.
  */
-int __init pcpu_lpage_build_unit_map(size_t static_size, size_t reserved_size,
-				     ssize_t *dyn_sizep, size_t *unit_sizep,
-				     size_t lpage_size, int *unit_map,
+int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
+				     size_t *unit_sizep, size_t lpage_size,
+				     int *unit_map,
 				     pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
 {
 	static int group_map[NR_CPUS] __initdata;
 	static int group_cnt[NR_CPUS] __initdata;
+	const size_t static_size = __per_cpu_end - __per_cpu_start;
 	int group_cnt_max = 0;
 	size_t size_sum, min_unit_size, alloc_size;
 	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
@@ -1819,7 +1818,6 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
 
 /**
  * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
- * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
  * @dyn_size: free size for dynamic allocation in bytes
  * @unit_size: unit size in bytes
@@ -1850,15 +1848,15 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access on success, -errno on failure.
  */
-ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
-				      size_t dyn_size, size_t unit_size,
-				      size_t lpage_size, const int *unit_map,
-				      int nr_units,
+ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
+				      size_t unit_size, size_t lpage_size,
+				      const int *unit_map, int nr_units,
 				      pcpu_fc_alloc_fn_t alloc_fn,
 				      pcpu_fc_free_fn_t free_fn,
 				      pcpu_fc_map_fn_t map_fn)
 {
 	static struct vm_struct vm;
+	const size_t static_size = __per_cpu_end - __per_cpu_start;
 	size_t chunk_size = unit_size * nr_units;
 	size_t map_size;
 	unsigned int cpu;
@@ -2037,7 +2035,6 @@ EXPORT_SYMBOL(__per_cpu_offset);
 
 void __init setup_per_cpu_areas(void)
 {
-	size_t static_size = __per_cpu_end - __per_cpu_start;
 	ssize_t unit_size;
 	unsigned long delta;
 	unsigned int cpu;
@@ -2046,7 +2043,7 @@ void __init setup_per_cpu_areas(void)
 	 * Always reserve area for module percpu variables.  That's
 	 * what the legacy allocator did.
 	 */
-	unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE,
+	unit_size = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
 					   PERCPU_DYNAMIC_RESERVE);
 	if (unit_size < 0)
 		panic("Failed to initialized percpu areas.");
-- 
cgit v1.2.3


From 1d9d32572163b30be81dbe1409dfa7ea9763d0e8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:50 +0900
Subject: percpu: make @dyn_size mandatory for pcpu_setup_first_chunk()

Now that all actual first chunk allocation and copying happen in the
first chunk allocators and helpers, there's no reason for
pcpu_setup_first_chunk() to try to determine @dyn_size automatically.
The only left user is page first chunk allocator.  Make it determine
dyn_size like other allocators and make @dyn_size mandatory for
pcpu_setup_first_chunk().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h |  2 +-
 mm/percpu.c            | 39 +++++++++++++++++++--------------------
 2 files changed, 20 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index be2fc8fb9b6f..0cfdd14d096a 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -79,7 +79,7 @@ typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
 extern size_t __init pcpu_setup_first_chunk(
 				size_t static_size, size_t reserved_size,
-				ssize_t dyn_size, size_t unit_size,
+				size_t dyn_size, size_t unit_size,
 				void *base_addr, const int *unit_map);
 
 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
diff --git a/mm/percpu.c b/mm/percpu.c
index e2ac58a39bb2..287f59cc5fb9 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1235,7 +1235,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * pcpu_setup_first_chunk - initialize the first percpu chunk
  * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes, 0 for none
- * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @dyn_size: free size for dynamic allocation in bytes
  * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE
  * @base_addr: mapped address
  * @unit_map: cpu -> unit map, NULL for sequential mapping
@@ -1252,10 +1252,9 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * limited offset range for symbol relocations to guarantee module
  * percpu symbols fall inside the relocatable range.
  *
- * @dyn_size, if non-negative, determines the number of bytes
- * available for dynamic allocation in the first chunk.  Specifying
- * non-negative value makes percpu leave alone the area beyond
- * @static_size + @reserved_size + @dyn_size.
+ * @dyn_size determines the number of bytes available for dynamic
+ * allocation in the first chunk.  The area between @static_size +
+ * @reserved_size + @dyn_size and @unit_size is unused.
  *
  * @unit_size specifies unit size and must be aligned to PAGE_SIZE and
  * equal to or larger than @static_size + @reserved_size + if
@@ -1276,13 +1275,12 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * percpu access.
  */
 size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
-				     ssize_t dyn_size, size_t unit_size,
+				     size_t dyn_size, size_t unit_size,
 				     void *base_addr, const int *unit_map)
 {
 	static struct vm_struct first_vm;
 	static int smap[2], dmap[2];
-	size_t size_sum = static_size + reserved_size +
-			  (dyn_size >= 0 ? dyn_size : 0);
+	size_t size_sum = static_size + reserved_size + dyn_size;
 	struct pcpu_chunk *schunk, *dchunk = NULL;
 	unsigned int cpu, tcpu;
 	int i;
@@ -1345,9 +1343,6 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 	pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
 		BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long);
 
-	if (dyn_size < 0)
-		dyn_size = pcpu_unit_size - static_size - reserved_size;
-
 	first_vm.flags = VM_ALLOC;
 	first_vm.size = pcpu_chunk_size;
 	first_vm.addr = base_addr;
@@ -1557,6 +1552,8 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 {
 	static struct vm_struct vm;
 	const size_t static_size = __per_cpu_end - __per_cpu_start;
+	ssize_t dyn_size = -1;
+	size_t size_sum, unit_size;
 	char psize_str[16];
 	int unit_pages;
 	size_t pages_size;
@@ -1567,8 +1564,9 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 
 	snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
 
-	unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size,
-				  PCPU_MIN_UNIT_SIZE));
+	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
+	unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+	unit_pages = unit_size >> PAGE_SHIFT;
 
 	/* unaligned allocations can't be freed, round up to page size */
 	pages_size = PFN_ALIGN(unit_pages * nr_cpu_ids * sizeof(pages[0]));
@@ -1591,12 +1589,12 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 
 	/* allocate vm area, map the pages and copy static data */
 	vm.flags = VM_ALLOC;
-	vm.size = nr_cpu_ids * unit_pages << PAGE_SHIFT;
+	vm.size = nr_cpu_ids * unit_size;
 	vm_area_register_early(&vm, PAGE_SIZE);
 
 	for_each_possible_cpu(cpu) {
-		unsigned long unit_addr = (unsigned long)vm.addr +
-			(cpu * unit_pages << PAGE_SHIFT);
+		unsigned long unit_addr =
+			(unsigned long)vm.addr + cpu * unit_size;
 
 		for (i = 0; i < unit_pages; i++)
 			populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
@@ -1620,11 +1618,12 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 	}
 
 	/* we're ready, commit */
-	pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu\n",
-		unit_pages, psize_str, vm.addr, static_size, reserved_size);
+	pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n",
+		unit_pages, psize_str, vm.addr, static_size, reserved_size,
+		dyn_size);
 
-	ret = pcpu_setup_first_chunk(static_size, reserved_size, -1,
-				     unit_pages << PAGE_SHIFT, vm.addr, NULL);
+	ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
+				     unit_size, vm.addr, NULL);
 	goto out_free_ar;
 
 enomem:
-- 
cgit v1.2.3


From 3cbc85652767c38b252c8de55f9fd180b29e4c0d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:50 +0900
Subject: percpu: add @align to pcpu_fc_alloc_fn_t

pcpu_fc_alloc_fn_t is about to see more interesting usage, add @align
parameter.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/setup_percpu.c | 4 ++--
 include/linux/percpu.h         | 3 ++-
 mm/percpu.c                    | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 8aad486c688f..660cde133141 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -126,9 +126,9 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
 /*
  * Helpers for first chunk memory allocation
  */
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size)
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 {
-	return pcpu_alloc_bootmem(cpu, size, size);
+	return pcpu_alloc_bootmem(cpu, size, align);
 }
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 0cfdd14d096a..d385dbcf190b 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -71,7 +71,8 @@ extern const char *pcpu_fc_names[PCPU_FC_NR];
 
 extern enum pcpu_fc pcpu_chosen_fc;
 
-typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
+typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
+				     size_t align);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
diff --git a/mm/percpu.c b/mm/percpu.c
index 287f59cc5fb9..3316e3aac7ee 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1578,7 +1578,7 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 		for (i = 0; i < unit_pages; i++) {
 			void *ptr;
 
-			ptr = alloc_fn(cpu, PAGE_SIZE);
+			ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
 			if (!ptr) {
 				pr_warning("PERCPU: failed to allocate %s page "
 					   "for cpu%u\n", psize_str, cpu);
@@ -1888,7 +1888,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
 				goto found;
 		continue;
 	found:
-		ptr = alloc_fn(cpu, lpage_size);
+		ptr = alloc_fn(cpu, lpage_size, lpage_size);
 		if (!ptr) {
 			pr_warning("PERCPU: failed to allocate large page "
 				   "for cpu%u\n", cpu);
-- 
cgit v1.2.3


From 033e48fb82958053113178264ddb9d5038d5e38b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:51 +0900
Subject: percpu: move pcpu_lpage_build_unit_map() and pcpul_lpage_dump_cfg()
 upward

Unit map handling will be generalized and extended and used for
embedding sparse first chunk and other purposes.  Relocate two
unit_map related functions upward in preparation.  This patch just
moves the code without any actual change.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h |  14 +-
 mm/percpu.c            | 339 +++++++++++++++++++++++++------------------------
 2 files changed, 180 insertions(+), 173 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index d385dbcf190b..570fb18de2ba 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -78,6 +78,14 @@ typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
 typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
+#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
+extern int __init pcpu_lpage_build_unit_map(
+				size_t reserved_size, ssize_t *dyn_sizep,
+				size_t *unit_sizep, size_t lpage_size,
+				int *unit_map,
+				pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
+#endif
+
 extern size_t __init pcpu_setup_first_chunk(
 				size_t static_size, size_t reserved_size,
 				size_t dyn_size, size_t unit_size,
@@ -97,12 +105,6 @@ extern ssize_t __init pcpu_page_first_chunk(
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-extern int __init pcpu_lpage_build_unit_map(
-				size_t reserved_size, ssize_t *dyn_sizep,
-				size_t *unit_sizep, size_t lpage_size,
-				int *unit_map,
-				pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
-
 extern ssize_t __init pcpu_lpage_first_chunk(
 				size_t reserved_size, size_t dyn_size,
 				size_t unit_size, size_t lpage_size,
diff --git a/mm/percpu.c b/mm/percpu.c
index 3316e3aac7ee..2b9c4b2a2fc0 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1231,6 +1231,178 @@ void free_percpu(void *ptr)
 }
 EXPORT_SYMBOL_GPL(free_percpu);
 
+static inline size_t pcpu_calc_fc_sizes(size_t static_size,
+					size_t reserved_size,
+					ssize_t *dyn_sizep)
+{
+	size_t size_sum;
+
+	size_sum = PFN_ALIGN(static_size + reserved_size +
+			     (*dyn_sizep >= 0 ? *dyn_sizep : 0));
+	if (*dyn_sizep != 0)
+		*dyn_sizep = size_sum - static_size - reserved_size;
+
+	return size_sum;
+}
+
+#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
+/**
+ * pcpu_lpage_build_unit_map - build unit_map for large page remapping
+ * @reserved_size: the size of reserved percpu area in bytes
+ * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
+ * @unit_sizep: out parameter for unit size
+ * @unit_map: unit_map to be filled
+ * @cpu_distance_fn: callback to determine distance between cpus
+ *
+ * This function builds cpu -> unit map and determine other parameters
+ * considering needed percpu size, large page size and distances
+ * between CPUs in NUMA.
+ *
+ * CPUs which are of LOCAL_DISTANCE both ways are grouped together and
+ * may share units in the same large page.  The returned configuration
+ * is guaranteed to have CPUs on different nodes on different large
+ * pages and >=75% usage of allocated virtual address space.
+ *
+ * RETURNS:
+ * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
+ * returns the number of units to be allocated.  -errno on failure.
+ */
+int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
+				     size_t *unit_sizep, size_t lpage_size,
+				     int *unit_map,
+				     pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
+{
+	static int group_map[NR_CPUS] __initdata;
+	static int group_cnt[NR_CPUS] __initdata;
+	const size_t static_size = __per_cpu_end - __per_cpu_start;
+	int group_cnt_max = 0;
+	size_t size_sum, min_unit_size, alloc_size;
+	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
+	int last_allocs;
+	unsigned int cpu, tcpu;
+	int group, unit;
+
+	/*
+	 * Determine min_unit_size, alloc_size and max_upa such that
+	 * alloc_size is multiple of lpage_size and is the smallest
+	 * which can accomodate 4k aligned segments which are equal to
+	 * or larger than min_unit_size.
+	 */
+	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep);
+	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+
+	alloc_size = roundup(min_unit_size, lpage_size);
+	upa = alloc_size / min_unit_size;
+	while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+		upa--;
+	max_upa = upa;
+
+	/* group cpus according to their proximity */
+	for_each_possible_cpu(cpu) {
+		group = 0;
+	next_group:
+		for_each_possible_cpu(tcpu) {
+			if (cpu == tcpu)
+				break;
+			if (group_map[tcpu] == group &&
+			    (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
+			     cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
+				group++;
+				goto next_group;
+			}
+		}
+		group_map[cpu] = group;
+		group_cnt[group]++;
+		group_cnt_max = max(group_cnt_max, group_cnt[group]);
+	}
+
+	/*
+	 * Expand unit size until address space usage goes over 75%
+	 * and then as much as possible without using more address
+	 * space.
+	 */
+	last_allocs = INT_MAX;
+	for (upa = max_upa; upa; upa--) {
+		int allocs = 0, wasted = 0;
+
+		if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+			continue;
+
+		for (group = 0; group_cnt[group]; group++) {
+			int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
+			allocs += this_allocs;
+			wasted += this_allocs * upa - group_cnt[group];
+		}
+
+		/*
+		 * Don't accept if wastage is over 25%.  The
+		 * greater-than comparison ensures upa==1 always
+		 * passes the following check.
+		 */
+		if (wasted > num_possible_cpus() / 3)
+			continue;
+
+		/* and then don't consume more memory */
+		if (allocs > last_allocs)
+			break;
+		last_allocs = allocs;
+		best_upa = upa;
+	}
+	*unit_sizep = alloc_size / best_upa;
+
+	/* assign units to cpus accordingly */
+	unit = 0;
+	for (group = 0; group_cnt[group]; group++) {
+		for_each_possible_cpu(cpu)
+			if (group_map[cpu] == group)
+				unit_map[cpu] = unit++;
+		unit = roundup(unit, best_upa);
+	}
+
+	return unit;	/* unit contains aligned number of units */
+}
+
+static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
+				     unsigned int *cpup);
+
+static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
+					size_t reserved_size, size_t dyn_size,
+					size_t unit_size, size_t lpage_size,
+					const int *unit_map, int nr_units)
+{
+	int width = 1, v = nr_units;
+	char empty_str[] = "--------";
+	int upl, lpl;	/* units per lpage, lpage per line */
+	unsigned int cpu;
+	int lpage, unit;
+
+	while (v /= 10)
+		width++;
+	empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0';
+
+	upl = max_t(int, lpage_size / unit_size, 1);
+	lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1));
+
+	printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl,
+	       static_size, reserved_size, dyn_size, unit_size, lpage_size);
+
+	for (lpage = 0, unit = 0; unit < nr_units; unit++) {
+		if (!(unit % upl)) {
+			if (!(lpage++ % lpl)) {
+				printk("\n");
+				printk("%spcpu-lpage: ", lvl);
+			} else
+				printk("| ");
+		}
+		if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
+			printk("%0*d ", width, cpu);
+		else
+			printk("%s ", empty_str);
+	}
+	printk("\n");
+}
+#endif
+
 /**
  * pcpu_setup_first_chunk - initialize the first percpu chunk
  * @static_size: the size of static percpu area in bytes
@@ -1441,20 +1613,6 @@ static int __init percpu_alloc_setup(char *str)
 }
 early_param("percpu_alloc", percpu_alloc_setup);
 
-static inline size_t pcpu_calc_fc_sizes(size_t static_size,
-					size_t reserved_size,
-					ssize_t *dyn_sizep)
-{
-	size_t size_sum;
-
-	size_sum = PFN_ALIGN(static_size + reserved_size +
-			     (*dyn_sizep >= 0 ? *dyn_sizep : 0));
-	if (*dyn_sizep != 0)
-		*dyn_sizep = size_sum - static_size - reserved_size;
-
-	return size_sum;
-}
-
 #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
 	!defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
 /**
@@ -1637,122 +1795,6 @@ out_free_ar:
 #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
 
 #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-/**
- * pcpu_lpage_build_unit_map - build unit_map for large page remapping
- * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
- * @unit_sizep: out parameter for unit size
- * @unit_map: unit_map to be filled
- * @cpu_distance_fn: callback to determine distance between cpus
- *
- * This function builds cpu -> unit map and determine other parameters
- * considering needed percpu size, large page size and distances
- * between CPUs in NUMA.
- *
- * CPUs which are of LOCAL_DISTANCE both ways are grouped together and
- * may share units in the same large page.  The returned configuration
- * is guaranteed to have CPUs on different nodes on different large
- * pages and >=75% usage of allocated virtual address space.
- *
- * RETURNS:
- * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
- * returns the number of units to be allocated.  -errno on failure.
- */
-int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
-				     size_t *unit_sizep, size_t lpage_size,
-				     int *unit_map,
-				     pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
-{
-	static int group_map[NR_CPUS] __initdata;
-	static int group_cnt[NR_CPUS] __initdata;
-	const size_t static_size = __per_cpu_end - __per_cpu_start;
-	int group_cnt_max = 0;
-	size_t size_sum, min_unit_size, alloc_size;
-	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
-	int last_allocs;
-	unsigned int cpu, tcpu;
-	int group, unit;
-
-	/*
-	 * Determine min_unit_size, alloc_size and max_upa such that
-	 * alloc_size is multiple of lpage_size and is the smallest
-	 * which can accomodate 4k aligned segments which are equal to
-	 * or larger than min_unit_size.
-	 */
-	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep);
-	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
-
-	alloc_size = roundup(min_unit_size, lpage_size);
-	upa = alloc_size / min_unit_size;
-	while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
-		upa--;
-	max_upa = upa;
-
-	/* group cpus according to their proximity */
-	for_each_possible_cpu(cpu) {
-		group = 0;
-	next_group:
-		for_each_possible_cpu(tcpu) {
-			if (cpu == tcpu)
-				break;
-			if (group_map[tcpu] == group &&
-			    (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
-			     cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
-				group++;
-				goto next_group;
-			}
-		}
-		group_map[cpu] = group;
-		group_cnt[group]++;
-		group_cnt_max = max(group_cnt_max, group_cnt[group]);
-	}
-
-	/*
-	 * Expand unit size until address space usage goes over 75%
-	 * and then as much as possible without using more address
-	 * space.
-	 */
-	last_allocs = INT_MAX;
-	for (upa = max_upa; upa; upa--) {
-		int allocs = 0, wasted = 0;
-
-		if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
-			continue;
-
-		for (group = 0; group_cnt[group]; group++) {
-			int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
-			allocs += this_allocs;
-			wasted += this_allocs * upa - group_cnt[group];
-		}
-
-		/*
-		 * Don't accept if wastage is over 25%.  The
-		 * greater-than comparison ensures upa==1 always
-		 * passes the following check.
-		 */
-		if (wasted > num_possible_cpus() / 3)
-			continue;
-
-		/* and then don't consume more memory */
-		if (allocs > last_allocs)
-			break;
-		last_allocs = allocs;
-		best_upa = upa;
-	}
-	*unit_sizep = alloc_size / best_upa;
-
-	/* assign units to cpus accordingly */
-	unit = 0;
-	for (group = 0; group_cnt[group]; group++) {
-		for_each_possible_cpu(cpu)
-			if (group_map[cpu] == group)
-				unit_map[cpu] = unit++;
-		unit = roundup(unit, best_upa);
-	}
-
-	return unit;	/* unit contains aligned number of units */
-}
-
 struct pcpul_ent {
 	void		*ptr;
 	void		*map_addr;
@@ -1778,43 +1820,6 @@ static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
 	return false;
 }
 
-static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
-					size_t reserved_size, size_t dyn_size,
-					size_t unit_size, size_t lpage_size,
-					const int *unit_map, int nr_units)
-{
-	int width = 1, v = nr_units;
-	char empty_str[] = "--------";
-	int upl, lpl;	/* units per lpage, lpage per line */
-	unsigned int cpu;
-	int lpage, unit;
-
-	while (v /= 10)
-		width++;
-	empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0';
-
-	upl = max_t(int, lpage_size / unit_size, 1);
-	lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1));
-
-	printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl,
-	       static_size, reserved_size, dyn_size, unit_size, lpage_size);
-
-	for (lpage = 0, unit = 0; unit < nr_units; unit++) {
-		if (!(unit % upl)) {
-			if (!(lpage++ % lpl)) {
-				printk("\n");
-				printk("%spcpu-lpage: ", lvl);
-			} else
-				printk("| ");
-		}
-		if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
-			printk("%0*d ", width, cpu);
-		else
-			printk("%s ", empty_str);
-	}
-	printk("\n");
-}
-
 /**
  * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
  * @reserved_size: the size of reserved percpu area in bytes
-- 
cgit v1.2.3


From fd1e8a1fe2b54df6c185b4fa65f181f50b9c4d4e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:51 +0900
Subject: percpu: introduce pcpu_alloc_info and pcpu_group_info

Till now, non-linear cpu->unit map was expressed using an integer
array which maps each cpu to a unit and used only by lpage allocator.
Although how many units have been placed in a single contiguos area
(group) is known while building unit_map, the information is lost when
the result is recorded into the unit_map array.  For lpage allocator,
as all allocations are done by lpages and whether two adjacent lpages
are in the same group or not is irrelevant, this didn't cause any
problem.  Non-linear cpu->unit mapping will be used for sparse
embedding and this grouping information is necessary for that.

This patch introduces pcpu_alloc_info which contains all the
information necessary for initializing percpu allocator.
pcpu_alloc_info contains array of pcpu_group_info which describes how
units are grouped and mapped to cpus.  pcpu_group_info also has
base_offset field to specify its offset from the chunk's base address.
pcpu_build_alloc_info() initializes this field as if all groups are
allocated back-to-back as is currently done but this will be used to
sparsely place groups.

pcpu_alloc_info is a rather complex data structure which contains a
flexible array which in turn points to nested cpu_map arrays.

* pcpu_alloc_alloc_info() and pcpu_free_alloc_info() are provided to
  help dealing with pcpu_alloc_info.

* pcpu_lpage_build_unit_map() is updated to build pcpu_alloc_info,
  generalized and renamed to pcpu_build_alloc_info().
  @cpu_distance_fn may be NULL indicating that all cpus are of
  LOCAL_DISTANCE.

* pcpul_lpage_dump_cfg() is updated to process pcpu_alloc_info,
  generalized and renamed to pcpu_dump_alloc_info().  It now also
  prints which group each alloc unit belongs to.

* pcpu_setup_first_chunk() now takes pcpu_alloc_info instead of the
  separate parameters.  All first chunk allocators are updated to use
  pcpu_build_alloc_info() to build alloc_info and call
  pcpu_setup_first_chunk() with it.  This has the side effect of
  packing units for sparse possible cpus.  ie. if cpus 0, 2 and 4 are
  possible, they'll be assigned unit 0, 1 and 2 instead of 0, 2 and 4.

* x86 setup_pcpu_lpage() is updated to deal with alloc_info.

* sparc64 setup_per_cpu_areas() is updated to build alloc_info.

Although the changes made by this patch are pretty pervasive, it
doesn't cause any behavior difference other than packing of sparse
cpus.  It mostly changes how information is passed among
initialization functions and makes room for more flexibility.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Miller <davem@davemloft.net>
---
 arch/sparc/kernel/smp_64.c     |  24 +-
 arch/x86/kernel/setup_percpu.c |  38 ++-
 include/linux/percpu.h         |  42 +++-
 mm/percpu.c                    | 529 +++++++++++++++++++++++++----------------
 4 files changed, 389 insertions(+), 244 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 9856d866b77b..a42a4a744d14 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1475,17 +1475,29 @@ static void __init pcpu_map_range(unsigned long start, unsigned long end,
 
 void __init setup_per_cpu_areas(void)
 {
-	size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start;
 	static struct vm_struct vm;
+	struct pcpu_alloc_info *ai;
 	unsigned long delta, cpu;
 	size_t size_sum, pcpu_unit_size;
 	size_t ptrs_size;
 	void **ptrs;
 
-	size_sum = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
+	ai = pcpu_alloc_alloc_info(1, nr_cpu_ids);
+
+	ai->static_size = __per_cpu_end - __per_cpu_start;
+	ai->reserved_size = PERCPU_MODULE_RESERVE;
+
+	size_sum = PFN_ALIGN(ai->static_size + ai->reserved_size +
 			     PERCPU_DYNAMIC_RESERVE);
-	dyn_size = size_sum - static_size - PERCPU_MODULE_RESERVE;
 
+	ai->dyn_size = size_sum - ai->static_size - ai->reserved_size;
+	ai->unit_size = PCPU_CHUNK_SIZE;
+	ai->atom_size = PCPU_CHUNK_SIZE;
+	ai->alloc_size = PCPU_CHUNK_SIZE;
+	ai->groups[0].nr_units = nr_cpu_ids;
+
+	for_each_possible_cpu(cpu)
+		ai->groups[0].cpu_map[cpu] = cpu;
 
 	ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(ptrs[0]));
 	ptrs = alloc_bootmem(ptrs_size);
@@ -1497,7 +1509,7 @@ void __init setup_per_cpu_areas(void)
 		free_bootmem(__pa(ptrs[cpu] + size_sum),
 			     PCPU_CHUNK_SIZE - size_sum);
 
-		memcpy(ptrs[cpu], __per_cpu_load, static_size);
+		memcpy(ptrs[cpu], __per_cpu_load, ai->static_size);
 	}
 
 	/* allocate address and map */
@@ -1514,9 +1526,7 @@ void __init setup_per_cpu_areas(void)
 		pcpu_map_range(start, end, virt_to_page(ptrs[cpu]));
 	}
 
-	pcpu_unit_size = pcpu_setup_first_chunk(static_size,
-						PERCPU_MODULE_RESERVE, dyn_size,
-						PCPU_CHUNK_SIZE, vm.addr, NULL);
+	pcpu_unit_size = pcpu_setup_first_chunk(ai, vm.addr);
 
 	free_bootmem(__pa(ptrs), ptrs_size);
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 660cde133141..db5f9c49fec5 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -161,9 +161,7 @@ static ssize_t __init setup_pcpu_lpage(bool chosen)
 {
 	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 	size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE;
-	size_t unit_map_size, unit_size;
-	int *unit_map;
-	int nr_units;
+	struct pcpu_alloc_info *ai;
 	ssize_t ret;
 
 	/* on non-NUMA, embedding is better */
@@ -177,26 +175,22 @@ static ssize_t __init setup_pcpu_lpage(bool chosen)
 	}
 
 	/* allocate and build unit_map */
-	unit_map_size = nr_cpu_ids * sizeof(int);
-	unit_map = alloc_bootmem_nopanic(unit_map_size);
-	if (!unit_map) {
-		pr_warning("PERCPU: failed to allocate unit_map\n");
-		return -ENOMEM;
+	ai = pcpu_build_alloc_info(PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
+				   PMD_SIZE, pcpu_lpage_cpu_distance);
+	if (IS_ERR(ai)) {
+		pr_warning("PERCPU: failed to build unit_map (%ld)\n",
+			   PTR_ERR(ai));
+		return PTR_ERR(ai);
 	}
 
-	ret = pcpu_lpage_build_unit_map(PERCPU_FIRST_CHUNK_RESERVE,
-					&dyn_size, &unit_size, PMD_SIZE,
-					unit_map, pcpu_lpage_cpu_distance);
-	if (ret < 0) {
-		pr_warning("PERCPU: failed to build unit_map\n");
-		goto out_free;
-	}
-	nr_units = ret;
-
 	/* do the parameters look okay? */
 	if (!chosen) {
 		size_t vm_size = VMALLOC_END - VMALLOC_START;
-		size_t tot_size = nr_units * unit_size;
+		size_t tot_size = 0;
+		int group;
+
+		for (group = 0; group < ai->nr_groups; group++)
+			tot_size += ai->unit_size * ai->groups[group].nr_units;
 
 		/* don't consume more than 20% of vmalloc area */
 		if (tot_size > vm_size / 5) {
@@ -207,12 +201,10 @@ static ssize_t __init setup_pcpu_lpage(bool chosen)
 		}
 	}
 
-	ret = pcpu_lpage_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
-				     unit_size, PMD_SIZE, unit_map, nr_units,
-				     pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
+	ret = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free,
+				     pcpul_map);
 out_free:
-	if (ret < 0)
-		free_bootmem(__pa(unit_map), unit_map_size);
+	pcpu_free_alloc_info(ai);
 	return ret;
 }
 #else
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 570fb18de2ba..77b86be8ce4f 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -59,6 +59,25 @@
 extern void *pcpu_base_addr;
 extern const int *pcpu_unit_map;
 
+struct pcpu_group_info {
+	int			nr_units;	/* aligned # of units */
+	unsigned long		base_offset;	/* base address offset */
+	unsigned int		*cpu_map;	/* unit->cpu map, empty
+						 * entries contain NR_CPUS */
+};
+
+struct pcpu_alloc_info {
+	size_t			static_size;
+	size_t			reserved_size;
+	size_t			dyn_size;
+	size_t			unit_size;
+	size_t			atom_size;
+	size_t			alloc_size;
+	size_t			__ai_size;	/* internal, don't use */
+	int			nr_groups;	/* 0 if grouping unnecessary */
+	struct pcpu_group_info	groups[];
+};
+
 enum pcpu_fc {
 	PCPU_FC_AUTO,
 	PCPU_FC_EMBED,
@@ -78,18 +97,17 @@ typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
 typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
-#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-extern int __init pcpu_lpage_build_unit_map(
-				size_t reserved_size, ssize_t *dyn_sizep,
-				size_t *unit_sizep, size_t lpage_size,
-				int *unit_map,
+extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
+							     int nr_units);
+extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai);
+
+extern struct pcpu_alloc_info * __init pcpu_build_alloc_info(
+				size_t reserved_size, ssize_t dyn_size,
+				size_t atom_size,
 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
-#endif
 
-extern size_t __init pcpu_setup_first_chunk(
-				size_t static_size, size_t reserved_size,
-				size_t dyn_size, size_t unit_size,
-				void *base_addr, const int *unit_map);
+extern size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
+					    void *base_addr);
 
 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
 extern ssize_t __init pcpu_embed_first_chunk(
@@ -106,9 +124,7 @@ extern ssize_t __init pcpu_page_first_chunk(
 
 #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
 extern ssize_t __init pcpu_lpage_first_chunk(
-				size_t reserved_size, size_t dyn_size,
-				size_t unit_size, size_t lpage_size,
-				const int *unit_map, int nr_units,
+				const struct pcpu_alloc_info *ai,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_map_fn_t map_fn);
diff --git a/mm/percpu.c b/mm/percpu.c
index 2b9c4b2a2fc0..99f7fa682722 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -58,6 +58,7 @@
 
 #include <linux/bitmap.h>
 #include <linux/bootmem.h>
+#include <linux/err.h>
 #include <linux/list.h>
 #include <linux/log2.h>
 #include <linux/mm.h>
@@ -1245,53 +1246,108 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size,
 	return size_sum;
 }
 
-#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
 /**
- * pcpu_lpage_build_unit_map - build unit_map for large page remapping
+ * pcpu_alloc_alloc_info - allocate percpu allocation info
+ * @nr_groups: the number of groups
+ * @nr_units: the number of units
+ *
+ * Allocate ai which is large enough for @nr_groups groups containing
+ * @nr_units units.  The returned ai's groups[0].cpu_map points to the
+ * cpu_map array which is long enough for @nr_units and filled with
+ * NR_CPUS.  It's the caller's responsibility to initialize cpu_map
+ * pointer of other groups.
+ *
+ * RETURNS:
+ * Pointer to the allocated pcpu_alloc_info on success, NULL on
+ * failure.
+ */
+struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
+						      int nr_units)
+{
+	struct pcpu_alloc_info *ai;
+	size_t base_size, ai_size;
+	void *ptr;
+	int unit;
+
+	base_size = ALIGN(sizeof(*ai) + nr_groups * sizeof(ai->groups[0]),
+			  __alignof__(ai->groups[0].cpu_map[0]));
+	ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
+
+	ptr = alloc_bootmem_nopanic(PFN_ALIGN(ai_size));
+	if (!ptr)
+		return NULL;
+	ai = ptr;
+	ptr += base_size;
+
+	ai->groups[0].cpu_map = ptr;
+
+	for (unit = 0; unit < nr_units; unit++)
+		ai->groups[0].cpu_map[unit] = NR_CPUS;
+
+	ai->nr_groups = nr_groups;
+	ai->__ai_size = PFN_ALIGN(ai_size);
+
+	return ai;
+}
+
+/**
+ * pcpu_free_alloc_info - free percpu allocation info
+ * @ai: pcpu_alloc_info to free
+ *
+ * Free @ai which was allocated by pcpu_alloc_alloc_info().
+ */
+void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
+{
+	free_bootmem(__pa(ai), ai->__ai_size);
+}
+
+/**
+ * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
  * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
- * @unit_sizep: out parameter for unit size
- * @unit_map: unit_map to be filled
- * @cpu_distance_fn: callback to determine distance between cpus
+ * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @atom_size: allocation atom size
+ * @cpu_distance_fn: callback to determine distance between cpus, optional
  *
- * This function builds cpu -> unit map and determine other parameters
- * considering needed percpu size, large page size and distances
- * between CPUs in NUMA.
+ * This function determines grouping of units, their mappings to cpus
+ * and other parameters considering needed percpu size, allocation
+ * atom size and distances between CPUs.
  *
- * CPUs which are of LOCAL_DISTANCE both ways are grouped together and
- * may share units in the same large page.  The returned configuration
- * is guaranteed to have CPUs on different nodes on different large
- * pages and >=75% usage of allocated virtual address space.
+ * Groups are always mutliples of atom size and CPUs which are of
+ * LOCAL_DISTANCE both ways are grouped together and share space for
+ * units in the same group.  The returned configuration is guaranteed
+ * to have CPUs on different nodes on different groups and >=75% usage
+ * of allocated virtual address space.
  *
  * RETURNS:
- * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
- * returns the number of units to be allocated.  -errno on failure.
+ * On success, pointer to the new allocation_info is returned.  On
+ * failure, ERR_PTR value is returned.
  */
-int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
-				     size_t *unit_sizep, size_t lpage_size,
-				     int *unit_map,
-				     pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
+struct pcpu_alloc_info * __init pcpu_build_alloc_info(
+				size_t reserved_size, ssize_t dyn_size,
+				size_t atom_size,
+				pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
 {
 	static int group_map[NR_CPUS] __initdata;
 	static int group_cnt[NR_CPUS] __initdata;
 	const size_t static_size = __per_cpu_end - __per_cpu_start;
-	int group_cnt_max = 0;
+	int group_cnt_max = 0, nr_groups = 1, nr_units = 0;
 	size_t size_sum, min_unit_size, alloc_size;
 	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
-	int last_allocs;
+	int last_allocs, group, unit;
 	unsigned int cpu, tcpu;
-	int group, unit;
+	struct pcpu_alloc_info *ai;
+	unsigned int *cpu_map;
 
 	/*
 	 * Determine min_unit_size, alloc_size and max_upa such that
-	 * alloc_size is multiple of lpage_size and is the smallest
+	 * alloc_size is multiple of atom_size and is the smallest
 	 * which can accomodate 4k aligned segments which are equal to
 	 * or larger than min_unit_size.
 	 */
-	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep);
+	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
 	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
 
-	alloc_size = roundup(min_unit_size, lpage_size);
+	alloc_size = roundup(min_unit_size, atom_size);
 	upa = alloc_size / min_unit_size;
 	while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
 		upa--;
@@ -1304,10 +1360,11 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
 		for_each_possible_cpu(tcpu) {
 			if (cpu == tcpu)
 				break;
-			if (group_map[tcpu] == group &&
+			if (group_map[tcpu] == group && cpu_distance_fn &&
 			    (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
 			     cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
 				group++;
+				nr_groups = max(nr_groups, group + 1);
 				goto next_group;
 			}
 		}
@@ -1328,7 +1385,7 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
 		if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
 			continue;
 
-		for (group = 0; group_cnt[group]; group++) {
+		for (group = 0; group < nr_groups; group++) {
 			int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
 			allocs += this_allocs;
 			wasted += this_allocs * upa - group_cnt[group];
@@ -1348,75 +1405,122 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
 		last_allocs = allocs;
 		best_upa = upa;
 	}
-	*unit_sizep = alloc_size / best_upa;
+	upa = best_upa;
+
+	/* allocate and fill alloc_info */
+	for (group = 0; group < nr_groups; group++)
+		nr_units += roundup(group_cnt[group], upa);
+
+	ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
+	if (!ai)
+		return ERR_PTR(-ENOMEM);
+	cpu_map = ai->groups[0].cpu_map;
+
+	for (group = 0; group < nr_groups; group++) {
+		ai->groups[group].cpu_map = cpu_map;
+		cpu_map += roundup(group_cnt[group], upa);
+	}
+
+	ai->static_size = static_size;
+	ai->reserved_size = reserved_size;
+	ai->dyn_size = dyn_size;
+	ai->unit_size = alloc_size / upa;
+	ai->atom_size = atom_size;
+	ai->alloc_size = alloc_size;
+
+	for (group = 0, unit = 0; group_cnt[group]; group++) {
+		struct pcpu_group_info *gi = &ai->groups[group];
+
+		/*
+		 * Initialize base_offset as if all groups are located
+		 * back-to-back.  The caller should update this to
+		 * reflect actual allocation.
+		 */
+		gi->base_offset = unit * ai->unit_size;
 
-	/* assign units to cpus accordingly */
-	unit = 0;
-	for (group = 0; group_cnt[group]; group++) {
 		for_each_possible_cpu(cpu)
 			if (group_map[cpu] == group)
-				unit_map[cpu] = unit++;
-		unit = roundup(unit, best_upa);
+				gi->cpu_map[gi->nr_units++] = cpu;
+		gi->nr_units = roundup(gi->nr_units, upa);
+		unit += gi->nr_units;
 	}
+	BUG_ON(unit != nr_units);
 
-	return unit;	/* unit contains aligned number of units */
+	return ai;
 }
 
-static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
-				     unsigned int *cpup);
-
-static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
-					size_t reserved_size, size_t dyn_size,
-					size_t unit_size, size_t lpage_size,
-					const int *unit_map, int nr_units)
+/**
+ * pcpu_dump_alloc_info - print out information about pcpu_alloc_info
+ * @lvl: loglevel
+ * @ai: allocation info to dump
+ *
+ * Print out information about @ai using loglevel @lvl.
+ */
+static void pcpu_dump_alloc_info(const char *lvl,
+				 const struct pcpu_alloc_info *ai)
 {
-	int width = 1, v = nr_units;
+	int group_width = 1, cpu_width = 1, width;
 	char empty_str[] = "--------";
-	int upl, lpl;	/* units per lpage, lpage per line */
-	unsigned int cpu;
-	int lpage, unit;
+	int alloc = 0, alloc_end = 0;
+	int group, v;
+	int upa, apl;	/* units per alloc, allocs per line */
+
+	v = ai->nr_groups;
+	while (v /= 10)
+		group_width++;
 
+	v = num_possible_cpus();
 	while (v /= 10)
-		width++;
-	empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0';
+		cpu_width++;
+	empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0';
 
-	upl = max_t(int, lpage_size / unit_size, 1);
-	lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1));
+	upa = ai->alloc_size / ai->unit_size;
+	width = upa * (cpu_width + 1) + group_width + 3;
+	apl = rounddown_pow_of_two(max(60 / width, 1));
 
-	printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl,
-	       static_size, reserved_size, dyn_size, unit_size, lpage_size);
+	printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu",
+	       lvl, ai->static_size, ai->reserved_size, ai->dyn_size,
+	       ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size);
 
-	for (lpage = 0, unit = 0; unit < nr_units; unit++) {
-		if (!(unit % upl)) {
-			if (!(lpage++ % lpl)) {
+	for (group = 0; group < ai->nr_groups; group++) {
+		const struct pcpu_group_info *gi = &ai->groups[group];
+		int unit = 0, unit_end = 0;
+
+		BUG_ON(gi->nr_units % upa);
+		for (alloc_end += gi->nr_units / upa;
+		     alloc < alloc_end; alloc++) {
+			if (!(alloc % apl)) {
 				printk("\n");
-				printk("%spcpu-lpage: ", lvl);
-			} else
-				printk("| ");
+				printk("%spcpu-alloc: ", lvl);
+			}
+			printk("[%0*d] ", group_width, group);
+
+			for (unit_end += upa; unit < unit_end; unit++)
+				if (gi->cpu_map[unit] != NR_CPUS)
+					printk("%0*d ", cpu_width,
+					       gi->cpu_map[unit]);
+				else
+					printk("%s ", empty_str);
 		}
-		if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
-			printk("%0*d ", width, cpu);
-		else
-			printk("%s ", empty_str);
 	}
 	printk("\n");
 }
-#endif
 
 /**
  * pcpu_setup_first_chunk - initialize the first percpu chunk
- * @static_size: the size of static percpu area in bytes
- * @reserved_size: the size of reserved percpu area in bytes, 0 for none
- * @dyn_size: free size for dynamic allocation in bytes
- * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE
+ * @ai: pcpu_alloc_info describing how to percpu area is shaped
  * @base_addr: mapped address
- * @unit_map: cpu -> unit map, NULL for sequential mapping
  *
  * Initialize the first percpu chunk which contains the kernel static
  * perpcu area.  This function is to be called from arch percpu area
  * setup path.
  *
- * @reserved_size, if non-zero, specifies the amount of bytes to
+ * @ai contains all information necessary to initialize the first
+ * chunk and prime the dynamic percpu allocator.
+ *
+ * @ai->static_size is the size of static percpu area.
+ *
+ * @ai->reserved_size, if non-zero, specifies the amount of bytes to
  * reserve after the static area in the first chunk.  This reserves
  * the first chunk such that it's available only through reserved
  * percpu allocation.  This is primarily used to serve module percpu
@@ -1424,13 +1528,26 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
  * limited offset range for symbol relocations to guarantee module
  * percpu symbols fall inside the relocatable range.
  *
- * @dyn_size determines the number of bytes available for dynamic
- * allocation in the first chunk.  The area between @static_size +
- * @reserved_size + @dyn_size and @unit_size is unused.
+ * @ai->dyn_size determines the number of bytes available for dynamic
+ * allocation in the first chunk.  The area between @ai->static_size +
+ * @ai->reserved_size + @ai->dyn_size and @ai->unit_size is unused.
  *
- * @unit_size specifies unit size and must be aligned to PAGE_SIZE and
- * equal to or larger than @static_size + @reserved_size + if
- * non-negative, @dyn_size.
+ * @ai->unit_size specifies unit size and must be aligned to PAGE_SIZE
+ * and equal to or larger than @ai->static_size + @ai->reserved_size +
+ * @ai->dyn_size.
+ *
+ * @ai->atom_size is the allocation atom size and used as alignment
+ * for vm areas.
+ *
+ * @ai->alloc_size is the allocation size and always multiple of
+ * @ai->atom_size.  This is larger than @ai->atom_size if
+ * @ai->unit_size is larger than @ai->atom_size.
+ *
+ * @ai->nr_groups and @ai->groups describe virtual memory layout of
+ * percpu areas.  Units which should be colocated are put into the
+ * same group.  Dynamic VM areas will be allocated according to these
+ * groupings.  If @ai->nr_groups is zero, a single group containing
+ * all units is assumed.
  *
  * The caller should have mapped the first chunk at @base_addr and
  * copied static data to each unit.
@@ -1446,70 +1563,63 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access.
  */
-size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
-				     size_t dyn_size, size_t unit_size,
-				     void *base_addr, const int *unit_map)
+size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
+				     void *base_addr)
 {
 	static struct vm_struct first_vm;
 	static int smap[2], dmap[2];
-	size_t size_sum = static_size + reserved_size + dyn_size;
+	size_t dyn_size = ai->dyn_size;
+	size_t size_sum = ai->static_size + ai->reserved_size + dyn_size;
 	struct pcpu_chunk *schunk, *dchunk = NULL;
-	unsigned int cpu, tcpu;
-	int i;
+	unsigned int cpu;
+	int *unit_map;
+	int group, unit, i;
 
 	/* sanity checks */
 	BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
 		     ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
-	BUG_ON(!static_size);
+	BUG_ON(ai->nr_groups <= 0);
+	BUG_ON(!ai->static_size);
 	BUG_ON(!base_addr);
-	BUG_ON(unit_size < size_sum);
-	BUG_ON(unit_size & ~PAGE_MASK);
-	BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE);
+	BUG_ON(ai->unit_size < size_sum);
+	BUG_ON(ai->unit_size & ~PAGE_MASK);
+	BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
+
+	pcpu_dump_alloc_info(KERN_DEBUG, ai);
 
 	/* determine number of units and verify and initialize pcpu_unit_map */
-	if (unit_map) {
-		int first_unit = INT_MAX, last_unit = INT_MIN;
-
-		for_each_possible_cpu(cpu) {
-			int unit = unit_map[cpu];
-
-			BUG_ON(unit < 0);
-			for_each_possible_cpu(tcpu) {
-				if (tcpu == cpu)
-					break;
-				/* the mapping should be one-to-one */
-				BUG_ON(unit_map[tcpu] == unit);
-			}
+	unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0]));
 
-			if (unit < first_unit) {
-				pcpu_first_unit_cpu = cpu;
-				first_unit = unit;
-			}
-			if (unit > last_unit) {
-				pcpu_last_unit_cpu = cpu;
-				last_unit = unit;
-			}
-		}
-		pcpu_nr_units = last_unit + 1;
-		pcpu_unit_map = unit_map;
-	} else {
-		int *identity_map;
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+		unit_map[cpu] = NR_CPUS;
+	pcpu_first_unit_cpu = NR_CPUS;
 
-		/* #units == #cpus, identity mapped */
-		identity_map = alloc_bootmem(nr_cpu_ids *
-					     sizeof(identity_map[0]));
+	for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
+		const struct pcpu_group_info *gi = &ai->groups[group];
 
-		for_each_possible_cpu(cpu)
-			identity_map[cpu] = cpu;
+		for (i = 0; i < gi->nr_units; i++) {
+			cpu = gi->cpu_map[i];
+			if (cpu == NR_CPUS)
+				continue;
 
-		pcpu_first_unit_cpu = 0;
-		pcpu_last_unit_cpu = pcpu_nr_units - 1;
-		pcpu_nr_units = nr_cpu_ids;
-		pcpu_unit_map = identity_map;
+			BUG_ON(cpu > nr_cpu_ids || !cpu_possible(cpu));
+			BUG_ON(unit_map[cpu] != NR_CPUS);
+
+			unit_map[cpu] = unit + i;
+			if (pcpu_first_unit_cpu == NR_CPUS)
+				pcpu_first_unit_cpu = cpu;
+		}
 	}
+	pcpu_last_unit_cpu = cpu;
+	pcpu_nr_units = unit;
+
+	for_each_possible_cpu(cpu)
+		BUG_ON(unit_map[cpu] == NR_CPUS);
+
+	pcpu_unit_map = unit_map;
 
 	/* determine basic parameters */
-	pcpu_unit_pages = unit_size >> PAGE_SHIFT;
+	pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT;
 	pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
 	pcpu_chunk_size = pcpu_nr_units * pcpu_unit_size;
 	pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
@@ -1543,17 +1653,17 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 	schunk->immutable = true;
 	bitmap_fill(schunk->populated, pcpu_unit_pages);
 
-	if (reserved_size) {
-		schunk->free_size = reserved_size;
+	if (ai->reserved_size) {
+		schunk->free_size = ai->reserved_size;
 		pcpu_reserved_chunk = schunk;
-		pcpu_reserved_chunk_limit = static_size + reserved_size;
+		pcpu_reserved_chunk_limit = ai->static_size + ai->reserved_size;
 	} else {
 		schunk->free_size = dyn_size;
 		dyn_size = 0;			/* dynamic area covered */
 	}
 	schunk->contig_hint = schunk->free_size;
 
-	schunk->map[schunk->map_used++] = -static_size;
+	schunk->map[schunk->map_used++] = -ai->static_size;
 	if (schunk->free_size)
 		schunk->map[schunk->map_used++] = schunk->free_size;
 
@@ -1643,44 +1753,47 @@ early_param("percpu_alloc", percpu_alloc_setup);
  */
 ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
 {
-	const size_t static_size = __per_cpu_end - __per_cpu_start;
-	size_t size_sum, unit_size, chunk_size;
+	struct pcpu_alloc_info *ai;
+	size_t size_sum, chunk_size;
 	void *base;
-	unsigned int cpu;
+	int unit;
+	ssize_t ret;
 
-	/* determine parameters and allocate */
-	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
+	ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL);
+	if (IS_ERR(ai))
+		return PTR_ERR(ai);
+	BUG_ON(ai->nr_groups != 1);
+	BUG_ON(ai->groups[0].nr_units != num_possible_cpus());
 
-	unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
-	chunk_size = unit_size * nr_cpu_ids;
+	size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
+	chunk_size = ai->unit_size * num_possible_cpus();
 
 	base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
 				       __pa(MAX_DMA_ADDRESS));
 	if (!base) {
 		pr_warning("PERCPU: failed to allocate %zu bytes for "
 			   "embedding\n", chunk_size);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out_free_ai;
 	}
 
 	/* return the leftover and copy */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
-		void *ptr = base + cpu * unit_size;
-
-		if (cpu_possible(cpu)) {
-			free_bootmem(__pa(ptr + size_sum),
-				     unit_size - size_sum);
-			memcpy(ptr, __per_cpu_load, static_size);
-		} else
-			free_bootmem(__pa(ptr), unit_size);
+	for (unit = 0; unit < num_possible_cpus(); unit++) {
+		void *ptr = base + unit * ai->unit_size;
+
+		free_bootmem(__pa(ptr + size_sum), ai->unit_size - size_sum);
+		memcpy(ptr, __per_cpu_load, ai->static_size);
 	}
 
 	/* we're ready, commit */
 	pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
-		PFN_DOWN(size_sum), base, static_size, reserved_size, dyn_size,
-		unit_size);
+		PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
+		ai->dyn_size, ai->unit_size);
 
-	return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
-				      unit_size, base, NULL);
+	ret = pcpu_setup_first_chunk(ai, base);
+out_free_ai:
+	pcpu_free_alloc_info(ai);
+	return ret;
 }
 #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
 	  !CONFIG_HAVE_SETUP_PER_CPU_AREA */
@@ -1709,31 +1822,34 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 				     pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
 	static struct vm_struct vm;
-	const size_t static_size = __per_cpu_end - __per_cpu_start;
-	ssize_t dyn_size = -1;
-	size_t size_sum, unit_size;
+	struct pcpu_alloc_info *ai;
 	char psize_str[16];
 	int unit_pages;
 	size_t pages_size;
 	struct page **pages;
-	unsigned int cpu;
-	int i, j;
+	int unit, i, j;
 	ssize_t ret;
 
 	snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
 
-	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
-	unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
-	unit_pages = unit_size >> PAGE_SHIFT;
+	ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL);
+	if (IS_ERR(ai))
+		return PTR_ERR(ai);
+	BUG_ON(ai->nr_groups != 1);
+	BUG_ON(ai->groups[0].nr_units != num_possible_cpus());
+
+	unit_pages = ai->unit_size >> PAGE_SHIFT;
 
 	/* unaligned allocations can't be freed, round up to page size */
-	pages_size = PFN_ALIGN(unit_pages * nr_cpu_ids * sizeof(pages[0]));
+	pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
+			       sizeof(pages[0]));
 	pages = alloc_bootmem(pages_size);
 
 	/* allocate pages */
 	j = 0;
-	for_each_possible_cpu(cpu)
+	for (unit = 0; unit < num_possible_cpus(); unit++)
 		for (i = 0; i < unit_pages; i++) {
+			unsigned int cpu = ai->groups[0].cpu_map[unit];
 			void *ptr;
 
 			ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
@@ -1747,18 +1863,18 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 
 	/* allocate vm area, map the pages and copy static data */
 	vm.flags = VM_ALLOC;
-	vm.size = nr_cpu_ids * unit_size;
+	vm.size = num_possible_cpus() * ai->unit_size;
 	vm_area_register_early(&vm, PAGE_SIZE);
 
-	for_each_possible_cpu(cpu) {
+	for (unit = 0; unit < num_possible_cpus(); unit++) {
 		unsigned long unit_addr =
-			(unsigned long)vm.addr + cpu * unit_size;
+			(unsigned long)vm.addr + unit * ai->unit_size;
 
 		for (i = 0; i < unit_pages; i++)
 			populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
 
 		/* pte already populated, the following shouldn't fail */
-		ret = __pcpu_map_pages(unit_addr, &pages[cpu * unit_pages],
+		ret = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
 				       unit_pages);
 		if (ret < 0)
 			panic("failed to map percpu area, err=%zd\n", ret);
@@ -1772,16 +1888,15 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 		 */
 
 		/* copy static data */
-		memcpy((void *)unit_addr, __per_cpu_load, static_size);
+		memcpy((void *)unit_addr, __per_cpu_load, ai->static_size);
 	}
 
 	/* we're ready, commit */
 	pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n",
-		unit_pages, psize_str, vm.addr, static_size, reserved_size,
-		dyn_size);
+		unit_pages, psize_str, vm.addr, ai->static_size,
+		ai->reserved_size, ai->dyn_size);
 
-	ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
-				     unit_size, vm.addr, NULL);
+	ret = pcpu_setup_first_chunk(ai, vm.addr);
 	goto out_free_ar;
 
 enomem:
@@ -1790,6 +1905,7 @@ enomem:
 	ret = -ENOMEM;
 out_free_ar:
 	free_bootmem(__pa(pages), pages_size);
+	pcpu_free_alloc_info(ai);
 	return ret;
 }
 #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
@@ -1805,38 +1921,50 @@ static size_t pcpul_lpage_size;
 static int pcpul_nr_lpages;
 static struct pcpul_ent *pcpul_map;
 
-static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
+static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai,
 				     unsigned int *cpup)
 {
-	unsigned int cpu;
+	int group, cunit;
 
-	for_each_possible_cpu(cpu)
-		if (unit_map[cpu] == unit) {
+	for (group = 0, cunit = 0; group < ai->nr_groups; group++) {
+		const struct pcpu_group_info *gi = &ai->groups[group];
+
+		if (unit < cunit + gi->nr_units) {
 			if (cpup)
-				*cpup = cpu;
+				*cpup = gi->cpu_map[unit - cunit];
 			return true;
 		}
+		cunit += gi->nr_units;
+	}
 
 	return false;
 }
 
+static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai)
+{
+	int group, unit, i;
+
+	for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
+		const struct pcpu_group_info *gi = &ai->groups[group];
+
+		for (i = 0; i < gi->nr_units; i++)
+			if (gi->cpu_map[i] == cpu)
+				return unit + i;
+	}
+	BUG();
+}
+
 /**
  * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
- * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_size: free size for dynamic allocation in bytes
- * @unit_size: unit size in bytes
- * @lpage_size: the size of a large page
- * @unit_map: cpu -> unit mapping
- * @nr_units: the number of units
+ * @ai: pcpu_alloc_info
  * @alloc_fn: function to allocate percpu lpage, always called with lpage_size
  * @free_fn: function to free percpu memory, @size <= lpage_size
  * @map_fn: function to map percpu lpage, always called with lpage_size
  *
  * This allocator uses large page to build and map the first chunk.
- * Unlike other helpers, the caller should always specify @dyn_size
- * and @unit_size.  These parameters along with @unit_map and
- * @nr_units can be determined using pcpu_lpage_build_unit_map().
- * This two stage initialization is to allow arch code to evaluate the
+ * Unlike other helpers, the caller should provide fully initialized
+ * @ai.  This can be done using pcpu_build_alloc_info().  This two
+ * stage initialization is to allow arch code to evaluate the
  * parameters before committing to it.
  *
  * Large pages are allocated as directed by @unit_map and other
@@ -1852,27 +1980,26 @@ static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access on success, -errno on failure.
  */
-ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
-				      size_t unit_size, size_t lpage_size,
-				      const int *unit_map, int nr_units,
+ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
 				      pcpu_fc_alloc_fn_t alloc_fn,
 				      pcpu_fc_free_fn_t free_fn,
 				      pcpu_fc_map_fn_t map_fn)
 {
 	static struct vm_struct vm;
-	const size_t static_size = __per_cpu_end - __per_cpu_start;
-	size_t chunk_size = unit_size * nr_units;
-	size_t map_size;
+	const size_t lpage_size = ai->atom_size;
+	size_t chunk_size, map_size;
 	unsigned int cpu;
 	ssize_t ret;
-	int i, j, unit;
+	int i, j, unit, nr_units;
 
-	pcpul_lpage_dump_cfg(KERN_DEBUG, static_size, reserved_size, dyn_size,
-			     unit_size, lpage_size, unit_map, nr_units);
+	nr_units = 0;
+	for (i = 0; i < ai->nr_groups; i++)
+		nr_units += ai->groups[i].nr_units;
 
+	chunk_size = ai->unit_size * nr_units;
 	BUG_ON(chunk_size % lpage_size);
 
-	pcpul_size = static_size + reserved_size + dyn_size;
+	pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size;
 	pcpul_lpage_size = lpage_size;
 	pcpul_nr_lpages = chunk_size / lpage_size;
 
@@ -1883,13 +2010,13 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
 	/* allocate all pages */
 	for (i = 0; i < pcpul_nr_lpages; i++) {
 		size_t offset = i * lpage_size;
-		int first_unit = offset / unit_size;
-		int last_unit = (offset + lpage_size - 1) / unit_size;
+		int first_unit = offset / ai->unit_size;
+		int last_unit = (offset + lpage_size - 1) / ai->unit_size;
 		void *ptr;
 
 		/* find out which cpu is mapped to this unit */
 		for (unit = first_unit; unit <= last_unit; unit++)
-			if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
+			if (pcpul_unit_to_cpu(unit, ai, &cpu))
 				goto found;
 		continue;
 	found:
@@ -1905,12 +2032,12 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
 
 	/* return unused holes */
 	for (unit = 0; unit < nr_units; unit++) {
-		size_t start = unit * unit_size;
-		size_t end = start + unit_size;
+		size_t start = unit * ai->unit_size;
+		size_t end = start + ai->unit_size;
 		size_t off, next;
 
 		/* don't free used part of occupied unit */
-		if (pcpul_unit_to_cpu(unit, unit_map, NULL))
+		if (pcpul_unit_to_cpu(unit, ai, NULL))
 			start += pcpul_size;
 
 		/* unit can span more than one page, punch the holes */
@@ -1925,7 +2052,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
 	/* allocate address, map and copy */
 	vm.flags = VM_ALLOC;
 	vm.size = chunk_size;
-	vm_area_register_early(&vm, unit_size);
+	vm_area_register_early(&vm, ai->unit_size);
 
 	for (i = 0; i < pcpul_nr_lpages; i++) {
 		if (!pcpul_map[i].ptr)
@@ -1935,15 +2062,15 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
 	}
 
 	for_each_possible_cpu(cpu)
-		memcpy(vm.addr + unit_map[cpu] * unit_size, __per_cpu_load,
-		       static_size);
+		memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size,
+		       __per_cpu_load, ai->static_size);
 
 	/* we're ready, commit */
 	pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n",
-		vm.addr, static_size, reserved_size, dyn_size, unit_size);
+		vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size,
+		ai->unit_size);
 
-	ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
-				     unit_size, vm.addr, unit_map);
+	ret = pcpu_setup_first_chunk(ai, vm.addr);
 
 	/*
 	 * Sort pcpul_map array for pcpu_lpage_remapped().  Unmapped
-- 
cgit v1.2.3


From fb435d5233f8b6f9b93c11d6304d8e98fed03234 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:51 +0900
Subject: percpu: add pcpu_unit_offsets[]

Currently units are mapped sequentially into address space.  This
patch adds pcpu_unit_offsets[] which allows units to be mapped to
arbitrary offsets from the chunk base address.  This is necessary to
allow sparse embedding which might would need to allocate address
ranges and memory areas which aren't aligned to unit size but
allocation atom size (page or large page size).  This also simplifies
things a bit by removing the need to calculate offset from unit
number.

With this change, there's no need for the arch code to know
pcpu_unit_size.  Update pcpu_setup_first_chunk() and first chunk
allocators to return regular 0 or -errno return code instead of unit
size or -errno.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/smp_64.c     | 12 +++---
 arch/x86/kernel/setup_percpu.c | 51 ++++++++++-------------
 include/linux/percpu.h         | 16 ++++---
 mm/percpu.c                    | 95 +++++++++++++++++++++---------------------
 4 files changed, 84 insertions(+), 90 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index a42a4a744d14..b03fd362c629 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1478,9 +1478,10 @@ void __init setup_per_cpu_areas(void)
 	static struct vm_struct vm;
 	struct pcpu_alloc_info *ai;
 	unsigned long delta, cpu;
-	size_t size_sum, pcpu_unit_size;
+	size_t size_sum;
 	size_t ptrs_size;
 	void **ptrs;
+	int rc;
 
 	ai = pcpu_alloc_alloc_info(1, nr_cpu_ids);
 
@@ -1526,14 +1527,15 @@ void __init setup_per_cpu_areas(void)
 		pcpu_map_range(start, end, virt_to_page(ptrs[cpu]));
 	}
 
-	pcpu_unit_size = pcpu_setup_first_chunk(ai, vm.addr);
+	rc = pcpu_setup_first_chunk(ai, vm.addr);
+	if (rc)
+		panic("failed to setup percpu first chunk (%d)", rc);
 
 	free_bootmem(__pa(ptrs), ptrs_size);
 
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
-	for_each_possible_cpu(cpu) {
-		__per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
-	}
+	for_each_possible_cpu(cpu)
+		__per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
 
 	/* Setup %g5 for the boot cpu.  */
 	__local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index db5f9c49fec5..9becc5d4b518 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -157,12 +157,12 @@ static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to)
 		return REMOTE_DISTANCE;
 }
 
-static ssize_t __init setup_pcpu_lpage(bool chosen)
+static int __init setup_pcpu_lpage(bool chosen)
 {
 	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 	size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE;
 	struct pcpu_alloc_info *ai;
-	ssize_t ret;
+	int rc;
 
 	/* on non-NUMA, embedding is better */
 	if (!chosen && !pcpu_need_numa())
@@ -196,19 +196,18 @@ static ssize_t __init setup_pcpu_lpage(bool chosen)
 		if (tot_size > vm_size / 5) {
 			pr_info("PERCPU: too large chunk size %zuMB for "
 				"large page remap\n", tot_size >> 20);
-			ret = -EINVAL;
+			rc = -EINVAL;
 			goto out_free;
 		}
 	}
 
-	ret = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free,
-				     pcpul_map);
+	rc = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
 out_free:
 	pcpu_free_alloc_info(ai);
-	return ret;
+	return rc;
 }
 #else
-static ssize_t __init setup_pcpu_lpage(bool chosen)
+static int __init setup_pcpu_lpage(bool chosen)
 {
 	return -EINVAL;
 }
@@ -222,7 +221,7 @@ static ssize_t __init setup_pcpu_lpage(bool chosen)
  * mapping so that it can use PMD mapping without additional TLB
  * pressure.
  */
-static ssize_t __init setup_pcpu_embed(bool chosen)
+static int __init setup_pcpu_embed(bool chosen)
 {
 	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 
@@ -250,7 +249,7 @@ static void __init pcpup_populate_pte(unsigned long addr)
 	populate_extra_pte(addr);
 }
 
-static ssize_t __init setup_pcpu_page(void)
+static int __init setup_pcpu_page(void)
 {
 	return pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
 				     pcpu_fc_alloc, pcpu_fc_free,
@@ -274,8 +273,7 @@ void __init setup_per_cpu_areas(void)
 {
 	unsigned int cpu;
 	unsigned long delta;
-	size_t pcpu_unit_size;
-	ssize_t ret;
+	int rc;
 
 	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
 		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
@@ -285,36 +283,33 @@ void __init setup_per_cpu_areas(void)
 	 * of large page mappings.  Please read comments on top of
 	 * each allocator for details.
 	 */
-	ret = -EINVAL;
+	rc = -EINVAL;
 	if (pcpu_chosen_fc != PCPU_FC_AUTO) {
 		if (pcpu_chosen_fc != PCPU_FC_PAGE) {
 			if (pcpu_chosen_fc == PCPU_FC_LPAGE)
-				ret = setup_pcpu_lpage(true);
+				rc = setup_pcpu_lpage(true);
 			else
-				ret = setup_pcpu_embed(true);
+				rc = setup_pcpu_embed(true);
 
-			if (ret < 0)
-				pr_warning("PERCPU: %s allocator failed (%zd), "
+			if (rc < 0)
+				pr_warning("PERCPU: %s allocator failed (%d), "
 					   "falling back to page size\n",
-					   pcpu_fc_names[pcpu_chosen_fc], ret);
+					   pcpu_fc_names[pcpu_chosen_fc], rc);
 		}
 	} else {
-		ret = setup_pcpu_lpage(false);
-		if (ret < 0)
-			ret = setup_pcpu_embed(false);
+		rc = setup_pcpu_lpage(false);
+		if (rc < 0)
+			rc = setup_pcpu_embed(false);
 	}
-	if (ret < 0)
-		ret = setup_pcpu_page();
-	if (ret < 0)
-		panic("cannot initialize percpu area (err=%zd)", ret);
-
-	pcpu_unit_size = ret;
+	if (rc < 0)
+		rc = setup_pcpu_page();
+	if (rc < 0)
+		panic("cannot initialize percpu area (err=%d)", rc);
 
 	/* alrighty, percpu areas up and running */
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu) {
-		per_cpu_offset(cpu) =
-			delta + pcpu_unit_map[cpu] * pcpu_unit_size;
+		per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
 		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 		per_cpu(cpu_number, cpu) = cpu;
 		setup_percpu_segment(cpu);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 77b86be8ce4f..a7ec840f596c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -57,7 +57,7 @@
 #endif
 
 extern void *pcpu_base_addr;
-extern const int *pcpu_unit_map;
+extern const unsigned long *pcpu_unit_offsets;
 
 struct pcpu_group_info {
 	int			nr_units;	/* aligned # of units */
@@ -106,25 +106,23 @@ extern struct pcpu_alloc_info * __init pcpu_build_alloc_info(
 				size_t atom_size,
 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
 
-extern size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
-					    void *base_addr);
+extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
+					 void *base_addr);
 
 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
-extern ssize_t __init pcpu_embed_first_chunk(
-				size_t reserved_size, ssize_t dyn_size);
+extern int __init pcpu_embed_first_chunk(size_t reserved_size,
+					 ssize_t dyn_size);
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
-extern ssize_t __init pcpu_page_first_chunk(
-				size_t reserved_size,
+extern int __init pcpu_page_first_chunk(size_t reserved_size,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-extern ssize_t __init pcpu_lpage_first_chunk(
-				const struct pcpu_alloc_info *ai,
+extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_map_fn_t map_fn);
diff --git a/mm/percpu.c b/mm/percpu.c
index 99f7fa682722..653b02c40200 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -117,8 +117,8 @@ static unsigned int pcpu_last_unit_cpu __read_mostly;
 void *pcpu_base_addr __read_mostly;
 EXPORT_SYMBOL_GPL(pcpu_base_addr);
 
-/* cpu -> unit map */
-const int *pcpu_unit_map __read_mostly;
+static const int *pcpu_unit_map __read_mostly;		/* cpu -> unit */
+const unsigned long *pcpu_unit_offsets __read_mostly;	/* cpu -> unit offset */
 
 /*
  * The first chunk which always exists.  Note that unlike other
@@ -196,8 +196,8 @@ static int pcpu_page_idx(unsigned int cpu, int page_idx)
 static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
 				     unsigned int cpu, int page_idx)
 {
-	return (unsigned long)chunk->vm->addr +
-		(pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT);
+	return (unsigned long)chunk->vm->addr + pcpu_unit_offsets[cpu] +
+		(page_idx << PAGE_SHIFT);
 }
 
 static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
@@ -341,7 +341,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 	 * space.  Note that any possible cpu id can be used here, so
 	 * there's no need to worry about preemption or cpu hotplug.
 	 */
-	addr += pcpu_unit_map[smp_processor_id()] * pcpu_unit_size;
+	addr += pcpu_unit_offsets[smp_processor_id()];
 	return pcpu_get_page_chunk(vmalloc_to_page(addr));
 }
 
@@ -1560,17 +1560,17 @@ static void pcpu_dump_alloc_info(const char *lvl,
  * and available for dynamic allocation like any other chunks.
  *
  * RETURNS:
- * The determined pcpu_unit_size which can be used to initialize
- * percpu access.
+ * 0 on success, -errno on failure.
  */
-size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
-				     void *base_addr)
+int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
+				  void *base_addr)
 {
 	static struct vm_struct first_vm;
 	static int smap[2], dmap[2];
 	size_t dyn_size = ai->dyn_size;
 	size_t size_sum = ai->static_size + ai->reserved_size + dyn_size;
 	struct pcpu_chunk *schunk, *dchunk = NULL;
+	unsigned long *unit_off;
 	unsigned int cpu;
 	int *unit_map;
 	int group, unit, i;
@@ -1587,8 +1587,9 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 
 	pcpu_dump_alloc_info(KERN_DEBUG, ai);
 
-	/* determine number of units and verify and initialize pcpu_unit_map */
+	/* determine number of units and initialize unit_map and base */
 	unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0]));
+	unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0]));
 
 	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
 		unit_map[cpu] = NR_CPUS;
@@ -1606,6 +1607,8 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 			BUG_ON(unit_map[cpu] != NR_CPUS);
 
 			unit_map[cpu] = unit + i;
+			unit_off[cpu] = gi->base_offset + i * ai->unit_size;
+
 			if (pcpu_first_unit_cpu == NR_CPUS)
 				pcpu_first_unit_cpu = cpu;
 		}
@@ -1617,6 +1620,7 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 		BUG_ON(unit_map[cpu] == NR_CPUS);
 
 	pcpu_unit_map = unit_map;
+	pcpu_unit_offsets = unit_off;
 
 	/* determine basic parameters */
 	pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT;
@@ -1688,7 +1692,7 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 
 	/* we're done */
 	pcpu_base_addr = schunk->vm->addr;
-	return pcpu_unit_size;
+	return 0;
 }
 
 const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
@@ -1748,16 +1752,15 @@ early_param("percpu_alloc", percpu_alloc_setup);
  * size, the leftover is returned to the bootmem allocator.
  *
  * RETURNS:
- * The determined pcpu_unit_size which can be used to initialize
- * percpu access on success, -errno on failure.
+ * 0 on success, -errno on failure.
  */
-ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
+int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
 {
 	struct pcpu_alloc_info *ai;
 	size_t size_sum, chunk_size;
 	void *base;
 	int unit;
-	ssize_t ret;
+	int rc;
 
 	ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL);
 	if (IS_ERR(ai))
@@ -1773,7 +1776,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
 	if (!base) {
 		pr_warning("PERCPU: failed to allocate %zu bytes for "
 			   "embedding\n", chunk_size);
-		ret = -ENOMEM;
+		rc = -ENOMEM;
 		goto out_free_ai;
 	}
 
@@ -1790,10 +1793,10 @@ ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
 		PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
 		ai->dyn_size, ai->unit_size);
 
-	ret = pcpu_setup_first_chunk(ai, base);
+	rc = pcpu_setup_first_chunk(ai, base);
 out_free_ai:
 	pcpu_free_alloc_info(ai);
-	return ret;
+	return rc;
 }
 #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
 	  !CONFIG_HAVE_SETUP_PER_CPU_AREA */
@@ -1813,13 +1816,12 @@ out_free_ai:
  * page-by-page into vmalloc area.
  *
  * RETURNS:
- * The determined pcpu_unit_size which can be used to initialize
- * percpu access on success, -errno on failure.
+ * 0 on success, -errno on failure.
  */
-ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
-				     pcpu_fc_alloc_fn_t alloc_fn,
-				     pcpu_fc_free_fn_t free_fn,
-				     pcpu_fc_populate_pte_fn_t populate_pte_fn)
+int __init pcpu_page_first_chunk(size_t reserved_size,
+				 pcpu_fc_alloc_fn_t alloc_fn,
+				 pcpu_fc_free_fn_t free_fn,
+				 pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
 	static struct vm_struct vm;
 	struct pcpu_alloc_info *ai;
@@ -1827,8 +1829,7 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 	int unit_pages;
 	size_t pages_size;
 	struct page **pages;
-	int unit, i, j;
-	ssize_t ret;
+	int unit, i, j, rc;
 
 	snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
 
@@ -1874,10 +1875,10 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 			populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
 
 		/* pte already populated, the following shouldn't fail */
-		ret = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
-				       unit_pages);
-		if (ret < 0)
-			panic("failed to map percpu area, err=%zd\n", ret);
+		rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
+				      unit_pages);
+		if (rc < 0)
+			panic("failed to map percpu area, err=%d\n", rc);
 
 		/*
 		 * FIXME: Archs with virtual cache should flush local
@@ -1896,17 +1897,17 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 		unit_pages, psize_str, vm.addr, ai->static_size,
 		ai->reserved_size, ai->dyn_size);
 
-	ret = pcpu_setup_first_chunk(ai, vm.addr);
+	rc = pcpu_setup_first_chunk(ai, vm.addr);
 	goto out_free_ar;
 
 enomem:
 	while (--j >= 0)
 		free_fn(page_address(pages[j]), PAGE_SIZE);
-	ret = -ENOMEM;
+	rc = -ENOMEM;
 out_free_ar:
 	free_bootmem(__pa(pages), pages_size);
 	pcpu_free_alloc_info(ai);
-	return ret;
+	return rc;
 }
 #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
 
@@ -1977,20 +1978,18 @@ static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai)
  * pcpu_lpage_remapped().
  *
  * RETURNS:
- * The determined pcpu_unit_size which can be used to initialize
- * percpu access on success, -errno on failure.
+ * 0 on success, -errno on failure.
  */
-ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
-				      pcpu_fc_alloc_fn_t alloc_fn,
-				      pcpu_fc_free_fn_t free_fn,
-				      pcpu_fc_map_fn_t map_fn)
+int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
+				  pcpu_fc_alloc_fn_t alloc_fn,
+				  pcpu_fc_free_fn_t free_fn,
+				  pcpu_fc_map_fn_t map_fn)
 {
 	static struct vm_struct vm;
 	const size_t lpage_size = ai->atom_size;
 	size_t chunk_size, map_size;
 	unsigned int cpu;
-	ssize_t ret;
-	int i, j, unit, nr_units;
+	int i, j, unit, nr_units, rc;
 
 	nr_units = 0;
 	for (i = 0; i < ai->nr_groups; i++)
@@ -2070,7 +2069,7 @@ ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
 		vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size,
 		ai->unit_size);
 
-	ret = pcpu_setup_first_chunk(ai, vm.addr);
+	rc = pcpu_setup_first_chunk(ai, vm.addr);
 
 	/*
 	 * Sort pcpul_map array for pcpu_lpage_remapped().  Unmapped
@@ -2094,7 +2093,7 @@ ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
 	while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr)
 		pcpul_nr_lpages--;
 
-	return ret;
+	return rc;
 
 enomem:
 	for (i = 0; i < pcpul_nr_lpages; i++)
@@ -2166,21 +2165,21 @@ EXPORT_SYMBOL(__per_cpu_offset);
 
 void __init setup_per_cpu_areas(void)
 {
-	ssize_t unit_size;
 	unsigned long delta;
 	unsigned int cpu;
+	int rc;
 
 	/*
 	 * Always reserve area for module percpu variables.  That's
 	 * what the legacy allocator did.
 	 */
-	unit_size = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
-					   PERCPU_DYNAMIC_RESERVE);
-	if (unit_size < 0)
+	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+				    PERCPU_DYNAMIC_RESERVE);
+	if (rc < 0)
 		panic("Failed to initialized percpu areas.");
 
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu)
-		__per_cpu_offset[cpu] = delta + cpu * unit_size;
+		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
 }
 #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
-- 
cgit v1.2.3


From ca23e405e06d5fffb005df004c72781f76062f51 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:52 +0900
Subject: vmalloc: implement pcpu_get_vm_areas()

To directly use spread NUMA memories for percpu units, percpu
allocator will be updated to allow sparsely mapping units in a chunk.
As the distances between units can be very large, this makes
allocating single vmap area for each chunk undesirable.  This patch
implements pcpu_get_vm_areas() and pcpu_free_vm_areas() which
allocates and frees sparse congruent vmap areas.

pcpu_get_vm_areas() take @offsets and @sizes array which define
distances and sizes of vmap areas.  It scans down from the top of
vmalloc area looking for the top-most address which can accomodate all
the areas.  The top-down scan is to avoid interacting with regular
vmallocs which can push up these congruent areas up little by little
ending up wasting address space and page table.

To speed up top-down scan, the highest possible address hint is
maintained.  Although the scan is linear from the hint, given the
usual large holes between memory addresses between NUMA nodes, the
scanning is highly likely to finish after finding the first hole for
the last unit which is scanned first.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Nick Piggin <npiggin@suse.de>
---
 include/linux/vmalloc.h |   6 +
 mm/vmalloc.c            | 293 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 299 insertions(+)

(limited to 'include')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index a43ebec3a7b9..227c2a585e4f 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -115,4 +115,10 @@ extern rwlock_t vmlist_lock;
 extern struct vm_struct *vmlist;
 extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
 
+struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
+				     const size_t *sizes, int nr_vms,
+				     size_t align, gfp_t gfp_mask);
+
+void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
+
 #endif /* _LINUX_VMALLOC_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2eb461c3a46e..204b8243d8ab 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -265,6 +265,7 @@ struct vmap_area {
 static DEFINE_SPINLOCK(vmap_area_lock);
 static struct rb_root vmap_area_root = RB_ROOT;
 static LIST_HEAD(vmap_area_list);
+static unsigned long vmap_area_pcpu_hole;
 
 static struct vmap_area *__find_vmap_area(unsigned long addr)
 {
@@ -431,6 +432,15 @@ static void __free_vmap_area(struct vmap_area *va)
 	RB_CLEAR_NODE(&va->rb_node);
 	list_del_rcu(&va->list);
 
+	/*
+	 * Track the highest possible candidate for pcpu area
+	 * allocation.  Areas outside of vmalloc area can be returned
+	 * here too, consider only end addresses which fall inside
+	 * vmalloc area proper.
+	 */
+	if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
+		vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
+
 	call_rcu(&va->rcu_head, rcu_free_va);
 }
 
@@ -1038,6 +1048,9 @@ void __init vmalloc_init(void)
 		va->va_end = va->va_start + tmp->size;
 		__insert_vmap_area(va);
 	}
+
+	vmap_area_pcpu_hole = VMALLOC_END;
+
 	vmap_initialized = true;
 }
 
@@ -1821,6 +1834,286 @@ void free_vm_area(struct vm_struct *area)
 }
 EXPORT_SYMBOL_GPL(free_vm_area);
 
+static struct vmap_area *node_to_va(struct rb_node *n)
+{
+	return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
+}
+
+/**
+ * pvm_find_next_prev - find the next and prev vmap_area surrounding @end
+ * @end: target address
+ * @pnext: out arg for the next vmap_area
+ * @pprev: out arg for the previous vmap_area
+ *
+ * Returns: %true if either or both of next and prev are found,
+ *	    %false if no vmap_area exists
+ *
+ * Find vmap_areas end addresses of which enclose @end.  ie. if not
+ * NULL, *pnext->va_end > @end and *pprev->va_end <= @end.
+ */
+static bool pvm_find_next_prev(unsigned long end,
+			       struct vmap_area **pnext,
+			       struct vmap_area **pprev)
+{
+	struct rb_node *n = vmap_area_root.rb_node;
+	struct vmap_area *va = NULL;
+
+	while (n) {
+		va = rb_entry(n, struct vmap_area, rb_node);
+		if (end < va->va_end)
+			n = n->rb_left;
+		else if (end > va->va_end)
+			n = n->rb_right;
+		else
+			break;
+	}
+
+	if (!va)
+		return false;
+
+	if (va->va_end > end) {
+		*pnext = va;
+		*pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
+	} else {
+		*pprev = va;
+		*pnext = node_to_va(rb_next(&(*pprev)->rb_node));
+	}
+	return true;
+}
+
+/**
+ * pvm_determine_end - find the highest aligned address between two vmap_areas
+ * @pnext: in/out arg for the next vmap_area
+ * @pprev: in/out arg for the previous vmap_area
+ * @align: alignment
+ *
+ * Returns: determined end address
+ *
+ * Find the highest aligned address between *@pnext and *@pprev below
+ * VMALLOC_END.  *@pnext and *@pprev are adjusted so that the aligned
+ * down address is between the end addresses of the two vmap_areas.
+ *
+ * Please note that the address returned by this function may fall
+ * inside *@pnext vmap_area.  The caller is responsible for checking
+ * that.
+ */
+static unsigned long pvm_determine_end(struct vmap_area **pnext,
+				       struct vmap_area **pprev,
+				       unsigned long align)
+{
+	const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
+	unsigned long addr;
+
+	if (*pnext)
+		addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
+	else
+		addr = vmalloc_end;
+
+	while (*pprev && (*pprev)->va_end > addr) {
+		*pnext = *pprev;
+		*pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
+	}
+
+	return addr;
+}
+
+/**
+ * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
+ * @offsets: array containing offset of each area
+ * @sizes: array containing size of each area
+ * @nr_vms: the number of areas to allocate
+ * @align: alignment, all entries in @offsets and @sizes must be aligned to this
+ * @gfp_mask: allocation mask
+ *
+ * Returns: kmalloc'd vm_struct pointer array pointing to allocated
+ *	    vm_structs on success, %NULL on failure
+ *
+ * Percpu allocator wants to use congruent vm areas so that it can
+ * maintain the offsets among percpu areas.  This function allocates
+ * congruent vmalloc areas for it.  These areas tend to be scattered
+ * pretty far, distance between two areas easily going up to
+ * gigabytes.  To avoid interacting with regular vmallocs, these areas
+ * are allocated from top.
+ *
+ * Despite its complicated look, this allocator is rather simple.  It
+ * does everything top-down and scans areas from the end looking for
+ * matching slot.  While scanning, if any of the areas overlaps with
+ * existing vmap_area, the base address is pulled down to fit the
+ * area.  Scanning is repeated till all the areas fit and then all
+ * necessary data structres are inserted and the result is returned.
+ */
+struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
+				     const size_t *sizes, int nr_vms,
+				     size_t align, gfp_t gfp_mask)
+{
+	const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
+	const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
+	struct vmap_area **vas, *prev, *next;
+	struct vm_struct **vms;
+	int area, area2, last_area, term_area;
+	unsigned long base, start, end, last_end;
+	bool purged = false;
+
+	gfp_mask &= GFP_RECLAIM_MASK;
+
+	/* verify parameters and allocate data structures */
+	BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
+	for (last_area = 0, area = 0; area < nr_vms; area++) {
+		start = offsets[area];
+		end = start + sizes[area];
+
+		/* is everything aligned properly? */
+		BUG_ON(!IS_ALIGNED(offsets[area], align));
+		BUG_ON(!IS_ALIGNED(sizes[area], align));
+
+		/* detect the area with the highest address */
+		if (start > offsets[last_area])
+			last_area = area;
+
+		for (area2 = 0; area2 < nr_vms; area2++) {
+			unsigned long start2 = offsets[area2];
+			unsigned long end2 = start2 + sizes[area2];
+
+			if (area2 == area)
+				continue;
+
+			BUG_ON(start2 >= start && start2 < end);
+			BUG_ON(end2 <= end && end2 > start);
+		}
+	}
+	last_end = offsets[last_area] + sizes[last_area];
+
+	if (vmalloc_end - vmalloc_start < last_end) {
+		WARN_ON(true);
+		return NULL;
+	}
+
+	vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask);
+	vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask);
+	if (!vas || !vms)
+		goto err_free;
+
+	for (area = 0; area < nr_vms; area++) {
+		vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask);
+		vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask);
+		if (!vas[area] || !vms[area])
+			goto err_free;
+	}
+retry:
+	spin_lock(&vmap_area_lock);
+
+	/* start scanning - we scan from the top, begin with the last area */
+	area = term_area = last_area;
+	start = offsets[area];
+	end = start + sizes[area];
+
+	if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
+		base = vmalloc_end - last_end;
+		goto found;
+	}
+	base = pvm_determine_end(&next, &prev, align) - end;
+
+	while (true) {
+		BUG_ON(next && next->va_end <= base + end);
+		BUG_ON(prev && prev->va_end > base + end);
+
+		/*
+		 * base might have underflowed, add last_end before
+		 * comparing.
+		 */
+		if (base + last_end < vmalloc_start + last_end) {
+			spin_unlock(&vmap_area_lock);
+			if (!purged) {
+				purge_vmap_area_lazy();
+				purged = true;
+				goto retry;
+			}
+			goto err_free;
+		}
+
+		/*
+		 * If next overlaps, move base downwards so that it's
+		 * right below next and then recheck.
+		 */
+		if (next && next->va_start < base + end) {
+			base = pvm_determine_end(&next, &prev, align) - end;
+			term_area = area;
+			continue;
+		}
+
+		/*
+		 * If prev overlaps, shift down next and prev and move
+		 * base so that it's right below new next and then
+		 * recheck.
+		 */
+		if (prev && prev->va_end > base + start)  {
+			next = prev;
+			prev = node_to_va(rb_prev(&next->rb_node));
+			base = pvm_determine_end(&next, &prev, align) - end;
+			term_area = area;
+			continue;
+		}
+
+		/*
+		 * This area fits, move on to the previous one.  If
+		 * the previous one is the terminal one, we're done.
+		 */
+		area = (area + nr_vms - 1) % nr_vms;
+		if (area == term_area)
+			break;
+		start = offsets[area];
+		end = start + sizes[area];
+		pvm_find_next_prev(base + end, &next, &prev);
+	}
+found:
+	/* we've found a fitting base, insert all va's */
+	for (area = 0; area < nr_vms; area++) {
+		struct vmap_area *va = vas[area];
+
+		va->va_start = base + offsets[area];
+		va->va_end = va->va_start + sizes[area];
+		__insert_vmap_area(va);
+	}
+
+	vmap_area_pcpu_hole = base + offsets[last_area];
+
+	spin_unlock(&vmap_area_lock);
+
+	/* insert all vm's */
+	for (area = 0; area < nr_vms; area++)
+		insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
+				  pcpu_get_vm_areas);
+
+	kfree(vas);
+	return vms;
+
+err_free:
+	for (area = 0; area < nr_vms; area++) {
+		if (vas)
+			kfree(vas[area]);
+		if (vms)
+			kfree(vms[area]);
+	}
+	kfree(vas);
+	kfree(vms);
+	return NULL;
+}
+
+/**
+ * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
+ * @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
+ * @nr_vms: the number of allocated areas
+ *
+ * Free vm_structs and the array allocated by pcpu_get_vm_areas().
+ */
+void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
+{
+	int i;
+
+	for (i = 0; i < nr_vms; i++)
+		free_vm_area(vms[i]);
+	kfree(vms);
+}
 
 #ifdef CONFIG_PROC_FS
 static void *s_start(struct seq_file *m, loff_t *pos)
-- 
cgit v1.2.3


From c8826dd538602d730ed2c18c6753f1bbfa6c4933 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:52 +0900
Subject: percpu: update embedding first chunk allocator to handle sparse units

Now that percpu core can handle very sparse units, given that vmalloc
space is large enough, embedding first chunk allocator can use any
memory to build the first chunk.  This patch teaches
pcpu_embed_first_chunk() about distances between cpus and to use
alloc/free callbacks to allocate node specific areas for each group
and use them for the first chunk.

This brings the benefits of embedding allocator to NUMA configurations
- no extra TLB pressure with the flexibility of unified dynamic
allocator and no need to restructure arch code to build memory layout
suitable for percpu.  With units put into atom_size aligned groups
according to cpu distances, using large page for dynamic chunks is
also easily possible with falling back to reuglar pages if large
allocation fails.

Embedding allocator users are converted to specify NULL
cpu_distance_fn, so this patch doesn't cause any visible behavior
difference.  Following patches will convert them.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/setup_percpu.c |   4 +-
 include/linux/percpu.h         |   7 ++-
 mm/percpu.c                    | 113 +++++++++++++++++++++++++++++++----------
 3 files changed, 93 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9becc5d4b518..67f6314de9f1 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -234,7 +234,9 @@ static int __init setup_pcpu_embed(bool chosen)
 		return -EINVAL;
 
 	return pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
-				      reserve - PERCPU_FIRST_CHUNK_RESERVE);
+				      reserve - PERCPU_FIRST_CHUNK_RESERVE,
+				      PAGE_SIZE, NULL, pcpu_fc_alloc,
+				      pcpu_fc_free);
 }
 
 /*
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index a7ec840f596c..25359932740e 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -110,8 +110,11 @@ extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 					 void *base_addr);
 
 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
-extern int __init pcpu_embed_first_chunk(size_t reserved_size,
-					 ssize_t dyn_size);
+extern int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
+				size_t atom_size,
+				pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
+				pcpu_fc_alloc_fn_t alloc_fn,
+				pcpu_fc_free_fn_t free_fn);
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
diff --git a/mm/percpu.c b/mm/percpu.c
index cc9c4c64606d..c2826d05505c 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1747,15 +1747,25 @@ early_param("percpu_alloc", percpu_alloc_setup);
  * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
  * @reserved_size: the size of reserved percpu area in bytes
  * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @atom_size: allocation atom size
+ * @cpu_distance_fn: callback to determine distance between cpus, optional
+ * @alloc_fn: function to allocate percpu page
+ * @free_fn: funtion to free percpu page
  *
  * This is a helper to ease setting up embedded first percpu chunk and
  * can be called where pcpu_setup_first_chunk() is expected.
  *
  * If this function is used to setup the first chunk, it is allocated
- * as a contiguous area using bootmem allocator and used as-is without
- * being mapped into vmalloc area.  This enables the first chunk to
- * piggy back on the linear physical mapping which often uses larger
- * page size.
+ * by calling @alloc_fn and used as-is without being mapped into
+ * vmalloc area.  Allocations are always whole multiples of @atom_size
+ * aligned to @atom_size.
+ *
+ * This enables the first chunk to piggy back on the linear physical
+ * mapping which often uses larger page size.  Please note that this
+ * can result in very sparse cpu->unit mapping on NUMA machines thus
+ * requiring large vmalloc address space.  Don't use this allocator if
+ * vmalloc space is not orders of magnitude larger than distances
+ * between node memory addresses (ie. 32bit NUMA machines).
  *
  * When @dyn_size is positive, dynamic area might be larger than
  * specified to fill page alignment.  When @dyn_size is auto,
@@ -1763,53 +1773,88 @@ early_param("percpu_alloc", percpu_alloc_setup);
  * and reserved areas.
  *
  * If the needed size is smaller than the minimum or specified unit
- * size, the leftover is returned to the bootmem allocator.
+ * size, the leftover is returned using @free_fn.
  *
  * RETURNS:
  * 0 on success, -errno on failure.
  */
-int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
+int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
+				  size_t atom_size,
+				  pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
+				  pcpu_fc_alloc_fn_t alloc_fn,
+				  pcpu_fc_free_fn_t free_fn)
 {
+	void *base = (void *)ULONG_MAX;
+	void **areas = NULL;
 	struct pcpu_alloc_info *ai;
-	size_t size_sum, chunk_size;
-	void *base;
-	int unit;
-	int rc;
+	size_t size_sum, areas_size;
+	int group, i, rc;
 
-	ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL);
+	ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
+				   cpu_distance_fn);
 	if (IS_ERR(ai))
 		return PTR_ERR(ai);
-	BUG_ON(ai->nr_groups != 1);
-	BUG_ON(ai->groups[0].nr_units != num_possible_cpus());
 
 	size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
-	chunk_size = ai->unit_size * num_possible_cpus();
+	areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
 
-	base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
-				       __pa(MAX_DMA_ADDRESS));
-	if (!base) {
-		pr_warning("PERCPU: failed to allocate %zu bytes for "
-			   "embedding\n", chunk_size);
+	areas = alloc_bootmem_nopanic(areas_size);
+	if (!areas) {
 		rc = -ENOMEM;
-		goto out_free_ai;
+		goto out_free;
 	}
 
-	/* return the leftover and copy */
-	for (unit = 0; unit < num_possible_cpus(); unit++) {
-		void *ptr = base + unit * ai->unit_size;
+	/* allocate, copy and determine base address */
+	for (group = 0; group < ai->nr_groups; group++) {
+		struct pcpu_group_info *gi = &ai->groups[group];
+		unsigned int cpu = NR_CPUS;
+		void *ptr;
+
+		for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++)
+			cpu = gi->cpu_map[i];
+		BUG_ON(cpu == NR_CPUS);
+
+		/* allocate space for the whole group */
+		ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
+		if (!ptr) {
+			rc = -ENOMEM;
+			goto out_free_areas;
+		}
+		areas[group] = ptr;
 
-		free_bootmem(__pa(ptr + size_sum), ai->unit_size - size_sum);
-		memcpy(ptr, __per_cpu_load, ai->static_size);
+		base = min(ptr, base);
+
+		for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
+			if (gi->cpu_map[i] == NR_CPUS) {
+				/* unused unit, free whole */
+				free_fn(ptr, ai->unit_size);
+				continue;
+			}
+			/* copy and return the unused part */
+			memcpy(ptr, __per_cpu_load, ai->static_size);
+			free_fn(ptr + size_sum, ai->unit_size - size_sum);
+		}
 	}
 
-	/* we're ready, commit */
+	/* base address is now known, determine group base offsets */
+	for (group = 0; group < ai->nr_groups; group++)
+		ai->groups[group].base_offset = areas[group] - base;
+
 	pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
 		PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
 		ai->dyn_size, ai->unit_size);
 
 	rc = pcpu_setup_first_chunk(ai, base);
-out_free_ai:
+	goto out_free;
+
+out_free_areas:
+	for (group = 0; group < ai->nr_groups; group++)
+		free_fn(areas[group],
+			ai->groups[group].nr_units * ai->unit_size);
+out_free:
 	pcpu_free_alloc_info(ai);
+	if (areas)
+		free_bootmem(__pa(areas), areas_size);
 	return rc;
 }
 #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
@@ -2177,6 +2222,17 @@ void *pcpu_lpage_remapped(void *kaddr)
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
 
+static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
+				       size_t align)
+{
+	return __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS));
+}
+
+static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
+{
+	free_bootmem(__pa(ptr), size);
+}
+
 void __init setup_per_cpu_areas(void)
 {
 	unsigned long delta;
@@ -2188,7 +2244,8 @@ void __init setup_per_cpu_areas(void)
 	 * what the legacy allocator did.
 	 */
 	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
-				    PERCPU_DYNAMIC_RESERVE);
+				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
+				    pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
 	if (rc < 0)
 		panic("Failed to initialized percpu areas.");
 
-- 
cgit v1.2.3


From e933a73f48e3b2d40cfa56d81e2646f194b5a66a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:53 +0900
Subject: percpu: kill lpage first chunk allocator

With x86 converted to embedding allocator, lpage doesn't have any user
left.  Kill it along with cpa handling code.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jan Beulich <JBeulich@novell.com>
---
 Documentation/kernel-parameters.txt |  10 +-
 arch/x86/mm/pageattr.c              |  20 +--
 include/linux/percpu.h              |  16 ---
 mm/percpu.c                         | 241 ------------------------------------
 4 files changed, 6 insertions(+), 281 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index dee9ce2e6cfa..e710093e3d32 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1920,11 +1920,11 @@ and is between 256 and 4096 characters. It is defined in the file
 			See arch/parisc/kernel/pdc_chassis.c
 
 	percpu_alloc=	Select which percpu first chunk allocator to use.
-			Currently supported values are "embed", "page" and
-			"lpage".  Archs may support subset or none of the
-			selections.  See comments in mm/percpu.c for details
-			on each allocator.  This parameter is primarily	for
-			debugging and performance comparison.
+			Currently supported values are "embed" and "page".
+			Archs may support subset or none of the	selections.
+			See comments in mm/percpu.c for details on each
+			allocator.  This parameter is primarily	for debugging
+			and performance comparison.
 
 	pf.		[PARIDE]
 			See Documentation/blockdev/paride.txt.
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index dce282f65700..f53cfc7f963d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -687,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
 {
 	struct cpa_data alias_cpa;
 	unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
-	unsigned long vaddr, remapped;
+	unsigned long vaddr;
 	int ret;
 
 	if (cpa->pfn >= max_pfn_mapped)
@@ -745,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa)
 	}
 #endif
 
-	/*
-	 * If the PMD page was partially used for per-cpu remapping,
-	 * the recycled area needs to be split and modified.  Because
-	 * the area is always proper subset of a PMD page
-	 * cpa->numpages is guaranteed to be 1 for these areas, so
-	 * there's no need to loop over and check for further remaps.
-	 */
-	remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
-	if (remapped) {
-		WARN_ON(cpa->numpages > 1);
-		alias_cpa = *cpa;
-		alias_cpa.vaddr = &remapped;
-		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
-		ret = __change_page_attr_set_clr(&alias_cpa, 0);
-		if (ret)
-			return ret;
-	}
-
 	return 0;
 }
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 25359932740e..878836ca999c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -82,7 +82,6 @@ enum pcpu_fc {
 	PCPU_FC_AUTO,
 	PCPU_FC_EMBED,
 	PCPU_FC_PAGE,
-	PCPU_FC_LPAGE,
 
 	PCPU_FC_NR,
 };
@@ -95,7 +94,6 @@ typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
-typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
 extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
 							     int nr_units);
@@ -124,20 +122,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 #endif
 
-#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
-				pcpu_fc_alloc_fn_t alloc_fn,
-				pcpu_fc_free_fn_t free_fn,
-				pcpu_fc_map_fn_t map_fn);
-
-extern void *pcpu_lpage_remapped(void *kaddr);
-#else
-static inline void *pcpu_lpage_remapped(void *kaddr)
-{
-	return NULL;
-}
-#endif
-
 /*
  * Use this to get to a cpu's version of the per-cpu object
  * dynamically allocated. Non-atomic access to the current CPU's
diff --git a/mm/percpu.c b/mm/percpu.c
index c2826d05505c..77933928107d 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1713,7 +1713,6 @@ const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
 	[PCPU_FC_AUTO]	= "auto",
 	[PCPU_FC_EMBED]	= "embed",
 	[PCPU_FC_PAGE]	= "page",
-	[PCPU_FC_LPAGE]	= "lpage",
 };
 
 enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;
@@ -1729,10 +1728,6 @@ static int __init percpu_alloc_setup(char *str)
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 	else if (!strcmp(str, "page"))
 		pcpu_chosen_fc = PCPU_FC_PAGE;
-#endif
-#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-	else if (!strcmp(str, "lpage"))
-		pcpu_chosen_fc = PCPU_FC_LPAGE;
 #endif
 	else
 		pr_warning("PERCPU: unknown allocator %s specified\n", str);
@@ -1970,242 +1965,6 @@ out_free_ar:
 }
 #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
 
-#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-struct pcpul_ent {
-	void		*ptr;
-	void		*map_addr;
-};
-
-static size_t pcpul_size;
-static size_t pcpul_lpage_size;
-static int pcpul_nr_lpages;
-static struct pcpul_ent *pcpul_map;
-
-static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai,
-				     unsigned int *cpup)
-{
-	int group, cunit;
-
-	for (group = 0, cunit = 0; group < ai->nr_groups; group++) {
-		const struct pcpu_group_info *gi = &ai->groups[group];
-
-		if (unit < cunit + gi->nr_units) {
-			if (cpup)
-				*cpup = gi->cpu_map[unit - cunit];
-			return true;
-		}
-		cunit += gi->nr_units;
-	}
-
-	return false;
-}
-
-static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai)
-{
-	int group, unit, i;
-
-	for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
-		const struct pcpu_group_info *gi = &ai->groups[group];
-
-		for (i = 0; i < gi->nr_units; i++)
-			if (gi->cpu_map[i] == cpu)
-				return unit + i;
-	}
-	BUG();
-}
-
-/**
- * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
- * @ai: pcpu_alloc_info
- * @alloc_fn: function to allocate percpu lpage, always called with lpage_size
- * @free_fn: function to free percpu memory, @size <= lpage_size
- * @map_fn: function to map percpu lpage, always called with lpage_size
- *
- * This allocator uses large page to build and map the first chunk.
- * Unlike other helpers, the caller should provide fully initialized
- * @ai.  This can be done using pcpu_build_alloc_info().  This two
- * stage initialization is to allow arch code to evaluate the
- * parameters before committing to it.
- *
- * Large pages are allocated as directed by @unit_map and other
- * parameters and mapped to vmalloc space.  Unused holes are returned
- * to the page allocator.  Note that these holes end up being actively
- * mapped twice - once to the physical mapping and to the vmalloc area
- * for the first percpu chunk.  Depending on architecture, this might
- * cause problem when changing page attributes of the returned area.
- * These double mapped areas can be detected using
- * pcpu_lpage_remapped().
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
-				  pcpu_fc_alloc_fn_t alloc_fn,
-				  pcpu_fc_free_fn_t free_fn,
-				  pcpu_fc_map_fn_t map_fn)
-{
-	static struct vm_struct vm;
-	const size_t lpage_size = ai->atom_size;
-	size_t chunk_size, map_size;
-	unsigned int cpu;
-	int i, j, unit, nr_units, rc;
-
-	nr_units = 0;
-	for (i = 0; i < ai->nr_groups; i++)
-		nr_units += ai->groups[i].nr_units;
-
-	chunk_size = ai->unit_size * nr_units;
-	BUG_ON(chunk_size % lpage_size);
-
-	pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size;
-	pcpul_lpage_size = lpage_size;
-	pcpul_nr_lpages = chunk_size / lpage_size;
-
-	/* allocate pointer array and alloc large pages */
-	map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]);
-	pcpul_map = alloc_bootmem(map_size);
-
-	/* allocate all pages */
-	for (i = 0; i < pcpul_nr_lpages; i++) {
-		size_t offset = i * lpage_size;
-		int first_unit = offset / ai->unit_size;
-		int last_unit = (offset + lpage_size - 1) / ai->unit_size;
-		void *ptr;
-
-		/* find out which cpu is mapped to this unit */
-		for (unit = first_unit; unit <= last_unit; unit++)
-			if (pcpul_unit_to_cpu(unit, ai, &cpu))
-				goto found;
-		continue;
-	found:
-		ptr = alloc_fn(cpu, lpage_size, lpage_size);
-		if (!ptr) {
-			pr_warning("PERCPU: failed to allocate large page "
-				   "for cpu%u\n", cpu);
-			goto enomem;
-		}
-
-		pcpul_map[i].ptr = ptr;
-	}
-
-	/* return unused holes */
-	for (unit = 0; unit < nr_units; unit++) {
-		size_t start = unit * ai->unit_size;
-		size_t end = start + ai->unit_size;
-		size_t off, next;
-
-		/* don't free used part of occupied unit */
-		if (pcpul_unit_to_cpu(unit, ai, NULL))
-			start += pcpul_size;
-
-		/* unit can span more than one page, punch the holes */
-		for (off = start; off < end; off = next) {
-			void *ptr = pcpul_map[off / lpage_size].ptr;
-			next = min(roundup(off + 1, lpage_size), end);
-			if (ptr)
-				free_fn(ptr + off % lpage_size, next - off);
-		}
-	}
-
-	/* allocate address, map and copy */
-	vm.flags = VM_ALLOC;
-	vm.size = chunk_size;
-	vm_area_register_early(&vm, ai->unit_size);
-
-	for (i = 0; i < pcpul_nr_lpages; i++) {
-		if (!pcpul_map[i].ptr)
-			continue;
-		pcpul_map[i].map_addr = vm.addr + i * lpage_size;
-		map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr);
-	}
-
-	for_each_possible_cpu(cpu)
-		memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size,
-		       __per_cpu_load, ai->static_size);
-
-	/* we're ready, commit */
-	pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n",
-		vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size,
-		ai->unit_size);
-
-	rc = pcpu_setup_first_chunk(ai, vm.addr);
-
-	/*
-	 * Sort pcpul_map array for pcpu_lpage_remapped().  Unmapped
-	 * lpages are pushed to the end and trimmed.
-	 */
-	for (i = 0; i < pcpul_nr_lpages - 1; i++)
-		for (j = i + 1; j < pcpul_nr_lpages; j++) {
-			struct pcpul_ent tmp;
-
-			if (!pcpul_map[j].ptr)
-				continue;
-			if (pcpul_map[i].ptr &&
-			    pcpul_map[i].ptr < pcpul_map[j].ptr)
-				continue;
-
-			tmp = pcpul_map[i];
-			pcpul_map[i] = pcpul_map[j];
-			pcpul_map[j] = tmp;
-		}
-
-	while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr)
-		pcpul_nr_lpages--;
-
-	return rc;
-
-enomem:
-	for (i = 0; i < pcpul_nr_lpages; i++)
-		if (pcpul_map[i].ptr)
-			free_fn(pcpul_map[i].ptr, lpage_size);
-	free_bootmem(__pa(pcpul_map), map_size);
-	return -ENOMEM;
-}
-
-/**
- * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
- * @kaddr: the kernel address in question
- *
- * Determine whether @kaddr falls in the pcpul recycled area.  This is
- * used by pageattr to detect VM aliases and break up the pcpu large
- * page mapping such that the same physical page is not mapped under
- * different attributes.
- *
- * The recycled area is always at the tail of a partially used large
- * page.
- *
- * RETURNS:
- * Address of corresponding remapped pcpu address if match is found;
- * otherwise, NULL.
- */
-void *pcpu_lpage_remapped(void *kaddr)
-{
-	unsigned long lpage_mask = pcpul_lpage_size - 1;
-	void *lpage_addr = (void *)((unsigned long)kaddr & ~lpage_mask);
-	unsigned long offset = (unsigned long)kaddr & lpage_mask;
-	int left = 0, right = pcpul_nr_lpages - 1;
-	int pos;
-
-	/* pcpul in use at all? */
-	if (!pcpul_map)
-		return NULL;
-
-	/* okay, perform binary search */
-	while (left <= right) {
-		pos = (left + right) / 2;
-
-		if (pcpul_map[pos].ptr < lpage_addr)
-			left = pos + 1;
-		else if (pcpul_map[pos].ptr > lpage_addr)
-			right = pos - 1;
-		else
-			return pcpul_map[pos].map_addr + offset;
-	}
-
-	return NULL;
-}
-#endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */
-
 /*
  * Generic percpu area setup.
  *
-- 
cgit v1.2.3


From 31089c13bcb18d2cd2a3ddfbe3a28666346f237e Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Fri, 14 Aug 2009 15:47:18 +0200
Subject: timekeeping: Introduce timekeeping_leap_insert

Move the adjustment of xtime, wall_to_monotonic and the update of the
vsyscall variables to the timekeeping code.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
LKML-Reference: <20090814134807.609730216@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/time.h      | 1 +
 kernel/time/ntp.c         | 7 ++-----
 kernel/time/timekeeping.c | 7 +++++++
 3 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/time.h b/include/linux/time.h
index ea16c1a01d51..e7c844558884 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -147,6 +147,7 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
 extern int timekeeping_valid_for_hres(void);
 extern void update_wall_time(void);
 extern void update_xtime_cache(u64 nsec);
+extern void timekeeping_leap_insert(int leapsecond);
 
 struct tms;
 extern void do_sys_times(struct tms *);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 7fc64375ff43..4800f933910e 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -194,8 +194,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
 	case TIME_OK:
 		break;
 	case TIME_INS:
-		xtime.tv_sec--;
-		wall_to_monotonic.tv_sec++;
+		timekeeping_leap_insert(-1);
 		time_state = TIME_OOP;
 		printk(KERN_NOTICE
 			"Clock: inserting leap second 23:59:60 UTC\n");
@@ -203,9 +202,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
 		res = HRTIMER_RESTART;
 		break;
 	case TIME_DEL:
-		xtime.tv_sec++;
+		timekeeping_leap_insert(1);
 		time_tai--;
-		wall_to_monotonic.tv_sec--;
 		time_state = TIME_WAIT;
 		printk(KERN_NOTICE
 			"Clock: deleting leap second 23:59:59 UTC\n");
@@ -219,7 +217,6 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
 			time_state = TIME_OK;
 		break;
 	}
-	update_vsyscall(&xtime, clock);
 
 	write_sequnlock(&xtime_lock);
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 02c0b2c9c674..b8b70fb545fc 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -58,6 +58,13 @@ void update_xtime_cache(u64 nsec)
 
 struct clocksource *clock;
 
+/* must hold xtime_lock */
+void timekeeping_leap_insert(int leapsecond)
+{
+	xtime.tv_sec += leapsecond;
+	wall_to_monotonic.tv_sec -= leapsecond;
+	update_vsyscall(&xtime, clock);
+}
 
 #ifdef CONFIG_GENERIC_TIME
 /**
-- 
cgit v1.2.3


From a0f7d48bfb95a4c5172a2756dbc4b82afc8e9ae4 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:19 +0200
Subject: timekeeping: Remove clocksource inline functions

The three inline functions clocksource_read, clocksource_enable and
clocksource_disable are simple wrappers of an indirect call plus the
copy from and to the mult_orig value. The functions are exclusively
used by the timekeeping code which has intimate knowledge of the
clocksource anyway. Therefore remove the inline functions. No
functional change.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134807.903108946@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h | 58 ---------------------------------------------
 kernel/time/timekeeping.c   | 41 ++++++++++++++++++++++----------
 2 files changed, 28 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 1219be4fb42e..a1ef46f61c81 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -267,64 +267,6 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
 	return (u32)tmp;
 }
 
-/**
- * clocksource_read: - Access the clocksource's current cycle value
- * @cs:		pointer to clocksource being read
- *
- * Uses the clocksource to return the current cycle_t value
- */
-static inline cycle_t clocksource_read(struct clocksource *cs)
-{
-	return cs->read(cs);
-}
-
-/**
- * clocksource_enable: - enable clocksource
- * @cs:		pointer to clocksource
- *
- * Enables the specified clocksource. The clocksource callback
- * function should start up the hardware and setup mult and field
- * members of struct clocksource to reflect hardware capabilities.
- */
-static inline int clocksource_enable(struct clocksource *cs)
-{
-	int ret = 0;
-
-	if (cs->enable)
-		ret = cs->enable(cs);
-
-	/*
-	 * The frequency may have changed while the clocksource
-	 * was disabled. If so the code in ->enable() must update
-	 * the mult value to reflect the new frequency. Make sure
-	 * mult_orig follows this change.
-	 */
-	cs->mult_orig = cs->mult;
-
-	return ret;
-}
-
-/**
- * clocksource_disable: - disable clocksource
- * @cs:		pointer to clocksource
- *
- * Disables the specified clocksource. The clocksource callback
- * function should power down the now unused hardware block to
- * save power.
- */
-static inline void clocksource_disable(struct clocksource *cs)
-{
-	/*
-	 * Save mult_orig in mult so clocksource_enable() can
-	 * restore the value regardless if ->enable() updates
-	 * the value of mult or not.
-	 */
-	cs->mult = cs->mult_orig;
-
-	if (cs->disable)
-		cs->disable(cs);
-}
-
 /**
  * cyc2ns - converts clocksource cycles to nanoseconds
  * @cs:		Pointer to clocksource
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b8b70fb545fc..016a2591d719 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -79,7 +79,7 @@ static void clocksource_forward_now(void)
 	cycle_t cycle_now, cycle_delta;
 	s64 nsec;
 
-	cycle_now = clocksource_read(clock);
+	cycle_now = clock->read(clock);
 	cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 	clock->cycle_last = cycle_now;
 
@@ -114,7 +114,7 @@ void getnstimeofday(struct timespec *ts)
 		*ts = xtime;
 
 		/* read clocksource: */
-		cycle_now = clocksource_read(clock);
+		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
@@ -146,7 +146,7 @@ ktime_t ktime_get(void)
 		nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec;
 
 		/* read clocksource: */
-		cycle_now = clocksource_read(clock);
+		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
@@ -186,7 +186,7 @@ void ktime_get_ts(struct timespec *ts)
 		tomono = wall_to_monotonic;
 
 		/* read clocksource: */
-		cycle_now = clocksource_read(clock);
+		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
@@ -274,16 +274,29 @@ static void change_clocksource(void)
 
 	clocksource_forward_now();
 
-	if (clocksource_enable(new))
+	if (new->enable && !new->enable(new))
 		return;
+	/*
+	 * The frequency may have changed while the clocksource
+	 * was disabled. If so the code in ->enable() must update
+	 * the mult value to reflect the new frequency. Make sure
+	 * mult_orig follows this change.
+	 */
+	new->mult_orig = new->mult;
 
 	new->raw_time = clock->raw_time;
 	old = clock;
 	clock = new;
-	clocksource_disable(old);
+	/*
+	 * Save mult_orig in mult so that the value can be restored
+	 * regardless if ->enable() updates the value of mult or not.
+	 */
+	old->mult = old->mult_orig;
+	if (old->disable)
+		old->disable(old);
 
 	clock->cycle_last = 0;
-	clock->cycle_last = clocksource_read(clock);
+	clock->cycle_last = clock->read(clock);
 	clock->error = 0;
 	clock->xtime_nsec = 0;
 	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
@@ -373,7 +386,7 @@ void getrawmonotonic(struct timespec *ts)
 		seq = read_seqbegin(&xtime_lock);
 
 		/* read clocksource: */
-		cycle_now = clocksource_read(clock);
+		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
@@ -435,9 +448,12 @@ void __init timekeeping_init(void)
 	ntp_init();
 
 	clock = clocksource_get_next();
-	clocksource_enable(clock);
+	if (clock->enable)
+		clock->enable(clock);
+	/* set mult_orig on enable */
+	clock->mult_orig = clock->mult;
 	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
-	clock->cycle_last = clocksource_read(clock);
+	clock->cycle_last = clock->read(clock);
 
 	xtime.tv_sec = sec;
 	xtime.tv_nsec = 0;
@@ -477,8 +493,7 @@ static int timekeeping_resume(struct sys_device *dev)
 	}
 	update_xtime_cache(0);
 	/* re-base the last cycle value */
-	clock->cycle_last = 0;
-	clock->cycle_last = clocksource_read(clock);
+	clock->cycle_last = clock->read(clock);
 	clock->error = 0;
 	timekeeping_suspended = 0;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -630,7 +645,7 @@ void update_wall_time(void)
 		return;
 
 #ifdef CONFIG_GENERIC_TIME
-	offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask;
+	offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
 #else
 	offset = clock->cycle_interval;
 #endif
-- 
cgit v1.2.3


From f1b82746c1e93daf24e1ab9bfbd39bcdb2e7018b Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:21 +0200
Subject: clocksource: Cleanup clocksource selection

If a non high-resolution clocksource is first set as override clock
and then registered it becomes active even if the system is in one-shot
mode. Move the override check from sysfs_override_clocksource to the
clocksource selection. That fixes the bug and simplifies the code. The
check in clocksource_register for double registration of the same
clocksource is removed without replacement.

To find the initial clocksource a new weak function in jiffies.c is
defined that returns the jiffies clocksource. The architecture code
can then override the weak function with a more suitable clocksource,
e.g. the TOD clock on s390.

[ tglx: Folded in a fix from John Stultz ]

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134808.388024160@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/s390/kernel/time.c     |   4 ++
 include/linux/clocksource.h |   2 +
 kernel/time/clocksource.c   | 134 +++++++++++++++++---------------------------
 kernel/time/jiffies.c       |   6 +-
 kernel/time/timekeeping.c   |   4 +-
 5 files changed, 64 insertions(+), 86 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index d4c8e9c47c81..afefe514df0f 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -205,6 +205,10 @@ static struct clocksource clocksource_tod = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+struct clocksource * __init clocksource_default_clock(void)
+{
+	return &clocksource_tod;
+}
 
 void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
 {
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index a1ef46f61c81..f263b3abf46e 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -14,6 +14,7 @@
 #include <linux/list.h>
 #include <linux/cache.h>
 #include <linux/timer.h>
+#include <linux/init.h>
 #include <asm/div64.h>
 #include <asm/io.h>
 
@@ -322,6 +323,7 @@ extern void clocksource_touch_watchdog(void);
 extern struct clocksource* clocksource_get_next(void);
 extern void clocksource_change_rating(struct clocksource *cs, int rating);
 extern void clocksource_resume(void);
+extern struct clocksource * __init __weak clocksource_default_clock(void);
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
 extern void update_vsyscall(struct timespec *ts, struct clocksource *c);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 7466cb811251..e91662e87cde 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -21,7 +21,6 @@
  *
  * TODO WishList:
  *   o Allow clocksource drivers to be unregistered
- *   o get rid of clocksource_jiffies extern
  */
 
 #include <linux/clocksource.h>
@@ -107,12 +106,9 @@ u64 timecounter_cyc2time(struct timecounter *tc,
 }
 EXPORT_SYMBOL(timecounter_cyc2time);
 
-/* XXX - Would like a better way for initializing curr_clocksource */
-extern struct clocksource clocksource_jiffies;
-
 /*[Clocksource internal variables]---------
  * curr_clocksource:
- *	currently selected clocksource. Initialized to clocksource_jiffies.
+ *	currently selected clocksource.
  * next_clocksource:
  *	pending next selected clocksource.
  * clocksource_list:
@@ -123,9 +119,8 @@ extern struct clocksource clocksource_jiffies;
  * override_name:
  *	Name of the user-specified clocksource.
  */
-static struct clocksource *curr_clocksource = &clocksource_jiffies;
+static struct clocksource *curr_clocksource;
 static struct clocksource *next_clocksource;
-static struct clocksource *clocksource_override;
 static LIST_HEAD(clocksource_list);
 static DEFINE_SPINLOCK(clocksource_lock);
 static char override_name[32];
@@ -320,6 +315,7 @@ void clocksource_touch_watchdog(void)
 	clocksource_resume_watchdog();
 }
 
+#ifdef CONFIG_GENERIC_TIME
 /**
  * clocksource_get_next - Returns the selected clocksource
  *
@@ -339,56 +335,65 @@ struct clocksource *clocksource_get_next(void)
 }
 
 /**
- * select_clocksource - Selects the best registered clocksource.
+ * clocksource_select - Select the best clocksource available
  *
  * Private function. Must hold clocksource_lock when called.
  *
  * Select the clocksource with the best rating, or the clocksource,
  * which is selected by userspace override.
  */
-static struct clocksource *select_clocksource(void)
+static void clocksource_select(void)
 {
-	struct clocksource *next;
+	struct clocksource *best, *cs;
 
 	if (list_empty(&clocksource_list))
-		return NULL;
+		return;
+	/* First clocksource on the list has the best rating. */
+	best = list_first_entry(&clocksource_list, struct clocksource, list);
+	/* Check for the override clocksource. */
+	list_for_each_entry(cs, &clocksource_list, list) {
+		if (strcmp(cs->name, override_name) != 0)
+			continue;
+		/*
+		 * Check to make sure we don't switch to a non-highres
+		 * capable clocksource if the tick code is in oneshot
+		 * mode (highres or nohz)
+		 */
+		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
+		    tick_oneshot_mode_active()) {
+			/* Override clocksource cannot be used. */
+			printk(KERN_WARNING "Override clocksource %s is not "
+			       "HRT compatible. Cannot switch while in "
+			       "HRT/NOHZ mode\n", cs->name);
+			override_name[0] = 0;
+		} else
+			/* Override clocksource can be used. */
+			best = cs;
+		break;
+	}
+	if (curr_clocksource != best)
+		next_clocksource = best;
+}
 
-	if (clocksource_override)
-		next = clocksource_override;
-	else
-		next = list_entry(clocksource_list.next, struct clocksource,
-				  list);
+#else /* CONFIG_GENERIC_TIME */
 
-	if (next == curr_clocksource)
-		return NULL;
+static void clocksource_select(void) { }
 
-	return next;
-}
+#endif
 
 /*
  * Enqueue the clocksource sorted by rating
  */
-static int clocksource_enqueue(struct clocksource *c)
+static void clocksource_enqueue(struct clocksource *cs)
 {
-	struct list_head *tmp, *entry = &clocksource_list;
-
-	list_for_each(tmp, &clocksource_list) {
-		struct clocksource *cs;
+	struct list_head *entry = &clocksource_list;
+	struct clocksource *tmp;
 
-		cs = list_entry(tmp, struct clocksource, list);
-		if (cs == c)
-			return -EBUSY;
+	list_for_each_entry(tmp, &clocksource_list, list)
 		/* Keep track of the place, where to insert */
-		if (cs->rating >= c->rating)
-			entry = tmp;
-	}
-	list_add(&c->list, entry);
-
-	if (strlen(c->name) == strlen(override_name) &&
-	    !strcmp(c->name, override_name))
-		clocksource_override = c;
-
-	return 0;
+		if (tmp->rating >= cs->rating)
+			entry = &tmp->list;
+	list_add(&cs->list, entry);
 }
 
 /**
@@ -397,19 +402,16 @@ static int clocksource_enqueue(struct clocksource *c)
  *
  * Returns -EBUSY if registration fails, zero otherwise.
  */
-int clocksource_register(struct clocksource *c)
+int clocksource_register(struct clocksource *cs)
 {
 	unsigned long flags;
-	int ret;
 
 	spin_lock_irqsave(&clocksource_lock, flags);
-	ret = clocksource_enqueue(c);
-	if (!ret)
-		next_clocksource = select_clocksource();
+	clocksource_enqueue(cs);
+	clocksource_select();
 	spin_unlock_irqrestore(&clocksource_lock, flags);
-	if (!ret)
-		clocksource_check_watchdog(c);
-	return ret;
+	clocksource_check_watchdog(cs);
+	return 0;
 }
 EXPORT_SYMBOL(clocksource_register);
 
@@ -425,7 +427,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
 	list_del(&cs->list);
 	cs->rating = rating;
 	clocksource_enqueue(cs);
-	next_clocksource = select_clocksource();
+	clocksource_select();
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
 
@@ -438,9 +440,7 @@ void clocksource_unregister(struct clocksource *cs)
 
 	spin_lock_irqsave(&clocksource_lock, flags);
 	list_del(&cs->list);
-	if (clocksource_override == cs)
-		clocksource_override = NULL;
-	next_clocksource = select_clocksource();
+	clocksource_select();
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
 
@@ -478,9 +478,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 					  struct sysdev_attribute *attr,
 					  const char *buf, size_t count)
 {
-	struct clocksource *ovr = NULL;
 	size_t ret = count;
-	int len;
 
 	/* strings from sysfs write are not 0 terminated! */
 	if (count >= sizeof(override_name))
@@ -495,37 +493,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 	if (count > 0)
 		memcpy(override_name, buf, count);
 	override_name[count] = 0;
-
-	len = strlen(override_name);
-	if (len) {
-		struct clocksource *cs;
-
-		ovr = clocksource_override;
-		/* try to select it: */
-		list_for_each_entry(cs, &clocksource_list, list) {
-			if (strlen(cs->name) == len &&
-			    !strcmp(cs->name, override_name))
-				ovr = cs;
-		}
-	}
-
-	/*
-	 * Check to make sure we don't switch to a non-highres capable
-	 * clocksource if the tick code is in oneshot mode (highres or nohz)
-	 */
-	if (tick_oneshot_mode_active() && ovr &&
-	    !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) {
-		printk(KERN_WARNING "%s clocksource is not HRT compatible. "
-			"Cannot switch while in HRT/NOHZ mode\n", ovr->name);
-		ovr = NULL;
-		override_name[0] = 0;
-	}
-
-	/* Reselect, when the override name has changed */
-	if (ovr != clocksource_override) {
-		clocksource_override = ovr;
-		next_clocksource = select_clocksource();
-	}
+	clocksource_select();
 
 	spin_unlock_irq(&clocksource_lock);
 
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index c3f6c30816e3..5404a8456909 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -61,7 +61,6 @@ struct clocksource clocksource_jiffies = {
 	.read		= jiffies_read,
 	.mask		= 0xffffffff, /*32bits*/
 	.mult		= NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
-	.mult_orig	= NSEC_PER_JIFFY << JIFFIES_SHIFT,
 	.shift		= JIFFIES_SHIFT,
 };
 
@@ -71,3 +70,8 @@ static int __init init_jiffies_clocksource(void)
 }
 
 core_initcall(init_jiffies_clocksource);
+
+struct clocksource * __init __weak clocksource_default_clock(void)
+{
+	return &clocksource_jiffies;
+}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b5673016089f..325a9b63265a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -269,7 +269,7 @@ static void change_clocksource(void)
 
 	new = clocksource_get_next();
 
-	if (clock == new)
+	if (!new || clock == new)
 		return;
 
 	clocksource_forward_now();
@@ -446,7 +446,7 @@ void __init timekeeping_init(void)
 
 	ntp_init();
 
-	clock = clocksource_get_next();
+	clock = clocksource_default_clock();
 	if (clock->enable)
 		clock->enable(clock);
 	/* set mult_orig on enable */
-- 
cgit v1.2.3


From c55c87c892c1875deace0c8fc28787335277fdf2 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:25 +0200
Subject: clocksource: Move watchdog downgrade to a work queue thread

Move the downgrade of an unstable clocksource from the timer interrupt
context into the process context of a work queue thread. This is
needed to be able to do the clocksource switch with stop_machine.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134809.354926067@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h |  1 +
 kernel/time/clocksource.c   | 56 +++++++++++++++++++++++++++++++--------------
 2 files changed, 40 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index f263b3abf46e..19ad43af62d0 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -213,6 +213,7 @@ extern struct clocksource *clock;	/* current clocksource */
 
 #define CLOCK_SOURCE_WATCHDOG			0x10
 #define CLOCK_SOURCE_VALID_FOR_HRES		0x20
+#define CLOCK_SOURCE_UNSTABLE			0x40
 
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 56aaa749645d..f1508019bfb4 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -143,10 +143,13 @@ fs_initcall(clocksource_done_booting);
 static LIST_HEAD(watchdog_list);
 static struct clocksource *watchdog;
 static struct timer_list watchdog_timer;
+static struct work_struct watchdog_work;
 static DEFINE_SPINLOCK(watchdog_lock);
 static cycle_t watchdog_last;
 static int watchdog_running;
 
+static void clocksource_watchdog_work(struct work_struct *work);
+
 /*
  * Interval: 0.5sec Threshold: 0.0625s
  */
@@ -158,15 +161,16 @@ static void clocksource_unstable(struct clocksource *cs, int64_t delta)
 	printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
 	       cs->name, delta);
 	cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
-	clocksource_change_rating(cs, 0);
-	list_del(&cs->wd_list);
+	cs->flags |= CLOCK_SOURCE_UNSTABLE;
+	schedule_work(&watchdog_work);
 }
 
 static void clocksource_watchdog(unsigned long data)
 {
-	struct clocksource *cs, *tmp;
+	struct clocksource *cs;
 	cycle_t csnow, wdnow;
 	int64_t wd_nsec, cs_nsec;
+	int next_cpu;
 
 	spin_lock(&watchdog_lock);
 	if (!watchdog_running)
@@ -176,7 +180,12 @@ static void clocksource_watchdog(unsigned long data)
 	wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask);
 	watchdog_last = wdnow;
 
-	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
+	list_for_each_entry(cs, &watchdog_list, wd_list) {
+
+		/* Clocksource already marked unstable? */
+		if (cs->flags & CLOCK_SOURCE_UNSTABLE)
+			continue;
+
 		csnow = cs->read(cs);
 
 		/* Clocksource initialized ? */
@@ -207,19 +216,15 @@ static void clocksource_watchdog(unsigned long data)
 		}
 	}
 
-	if (!list_empty(&watchdog_list)) {
-		/*
-		 * Cycle through CPUs to check if the CPUs stay
-		 * synchronized to each other.
-		 */
-		int next_cpu = cpumask_next(raw_smp_processor_id(),
-					    cpu_online_mask);
-
-		if (next_cpu >= nr_cpu_ids)
-			next_cpu = cpumask_first(cpu_online_mask);
-		watchdog_timer.expires += WATCHDOG_INTERVAL;
-		add_timer_on(&watchdog_timer, next_cpu);
-	}
+	/*
+	 * Cycle through CPUs to check if the CPUs stay synchronized
+	 * to each other.
+	 */
+	next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
+	if (next_cpu >= nr_cpu_ids)
+		next_cpu = cpumask_first(cpu_online_mask);
+	watchdog_timer.expires += WATCHDOG_INTERVAL;
+	add_timer_on(&watchdog_timer, next_cpu);
 out:
 	spin_unlock(&watchdog_lock);
 }
@@ -228,6 +233,7 @@ static inline void clocksource_start_watchdog(void)
 {
 	if (watchdog_running || !watchdog || list_empty(&watchdog_list))
 		return;
+	INIT_WORK(&watchdog_work, clocksource_watchdog_work);
 	init_timer(&watchdog_timer);
 	watchdog_timer.function = clocksource_watchdog;
 	watchdog_last = watchdog->read(watchdog);
@@ -313,6 +319,22 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs)
 	spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
+static void clocksource_watchdog_work(struct work_struct *work)
+{
+	struct clocksource *cs, *tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&watchdog_lock, flags);
+	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list)
+		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
+			list_del_init(&cs->wd_list);
+			clocksource_change_rating(cs, 0);
+		}
+	/* Check if the watchdog timer needs to be stopped. */
+	clocksource_stop_watchdog();
+	spin_unlock(&watchdog_lock);
+}
+
 #else /* CONFIG_CLOCKSOURCE_WATCHDOG */
 
 static void clocksource_enqueue_watchdog(struct clocksource *cs)
-- 
cgit v1.2.3


From 155ec60226ae0ae2aadaa57c951a58a359331030 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:26 +0200
Subject: timekeeping: Introduce struct timekeeper

Add struct timekeeper to keep the internal values timekeeping.c needs
in regard to the currently selected clock source. This moves the
timekeeping intervals, xtime_nsec and the ntp error value from struct
clocksource to struct timekeeper. The raw_time is removed from the
clocksource as well. It gets treated like xtime as a global variable.
Eventually xtime raw_time should be moved to struct timekeeper.

[ tglx: minor cleanup ]

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134809.613209842@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/s390/kernel/time.c     |   1 -
 include/linux/clocksource.h |  54 +---------
 kernel/time/clocksource.c   |   6 +-
 kernel/time/timekeeping.c   | 235 +++++++++++++++++++++++++++++---------------
 4 files changed, 164 insertions(+), 132 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index afefe514df0f..e76c2e7a8b9a 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -280,7 +280,6 @@ void __init time_init(void)
 	now = get_clock();
 	tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime);
 	clocksource_tod.cycle_last = now;
-	clocksource_tod.raw_time = xtime;
 	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts);
 	set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec);
 	write_sequnlock_irqrestore(&xtime_lock, flags);
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 19ad43af62d0..e12e3095e2fb 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -155,8 +155,6 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  * @flags:		flags describing special properties
  * @vread:		vsyscall based read
  * @resume:		resume function for the clocksource, if necessary
- * @cycle_interval:	Used internally by timekeeping core, please ignore.
- * @xtime_interval:	Used internally by timekeeping core, please ignore.
  */
 struct clocksource {
 	/*
@@ -182,19 +180,12 @@ struct clocksource {
 #define CLKSRC_FSYS_MMIO_SET(mmio, addr)      do { } while (0)
 #endif
 
-	/* timekeeping specific data, ignore */
-	cycle_t cycle_interval;
-	u64	xtime_interval;
-	u32	raw_interval;
 	/*
 	 * Second part is written at each timer interrupt
 	 * Keep it in a different cache line to dirty no
 	 * more than one cache line.
 	 */
 	cycle_t cycle_last ____cacheline_aligned_in_smp;
-	u64 xtime_nsec;
-	s64 error;
-	struct timespec raw_time;
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 	/* Watchdog related data, used by the framework */
@@ -203,8 +194,6 @@ struct clocksource {
 #endif
 };
 
-extern struct clocksource *clock;	/* current clocksource */
-
 /*
  * Clock source flags bits::
  */
@@ -270,50 +259,15 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
 }
 
 /**
- * cyc2ns - converts clocksource cycles to nanoseconds
- * @cs:		Pointer to clocksource
- * @cycles:	Cycles
+ * clocksource_cyc2ns - converts clocksource cycles to nanoseconds
  *
- * Uses the clocksource and ntp ajdustment to convert cycle_ts to nanoseconds.
+ * Converts cycles to nanoseconds, using the given mult and shift.
  *
  * XXX - This could use some mult_lxl_ll() asm optimization
  */
-static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles)
+static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
 {
-	u64 ret = (u64)cycles;
-	ret = (ret * cs->mult) >> cs->shift;
-	return ret;
-}
-
-/**
- * clocksource_calculate_interval - Calculates a clocksource interval struct
- *
- * @c:		Pointer to clocksource.
- * @length_nsec: Desired interval length in nanoseconds.
- *
- * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
- * pair and interval request.
- *
- * Unless you're the timekeeping code, you should not be using this!
- */
-static inline void clocksource_calculate_interval(struct clocksource *c,
-					  	  unsigned long length_nsec)
-{
-	u64 tmp;
-
-	/* Do the ns -> cycle conversion first, using original mult */
-	tmp = length_nsec;
-	tmp <<= c->shift;
-	tmp += c->mult_orig/2;
-	do_div(tmp, c->mult_orig);
-
-	c->cycle_interval = (cycle_t)tmp;
-	if (c->cycle_interval == 0)
-		c->cycle_interval = 1;
-
-	/* Go back from cycles -> shifted ns, this time use ntp adjused mult */
-	c->xtime_interval = (u64)c->cycle_interval * c->mult;
-	c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift;
+	return ((u64) cycles * mult) >> shift;
 }
 
 
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index f1508019bfb4..f18c9a6bdcf4 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -177,7 +177,8 @@ static void clocksource_watchdog(unsigned long data)
 		goto out;
 
 	wdnow = watchdog->read(watchdog);
-	wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask);
+	wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask,
+				     watchdog->mult, watchdog->shift);
 	watchdog_last = wdnow;
 
 	list_for_each_entry(cs, &watchdog_list, wd_list) {
@@ -196,7 +197,8 @@ static void clocksource_watchdog(unsigned long data)
 		}
 
 		/* Check the deviation from the watchdog clocksource. */
-		cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask);
+		cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) &
+					     cs->mask, cs->mult, cs->shift);
 		cs->wd_last = csnow;
 		if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
 			clocksource_unstable(cs, cs_nsec - wd_nsec);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 325a9b63265a..7af45cbf6b13 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -19,6 +19,65 @@
 #include <linux/time.h>
 #include <linux/tick.h>
 
+/* Structure holding internal timekeeping values. */
+struct timekeeper {
+	/* Current clocksource used for timekeeping. */
+	struct clocksource *clock;
+
+	/* Number of clock cycles in one NTP interval. */
+	cycle_t cycle_interval;
+	/* Number of clock shifted nano seconds in one NTP interval. */
+	u64	xtime_interval;
+	/* Raw nano seconds accumulated per NTP interval. */
+	u32	raw_interval;
+
+	/* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */
+	u64	xtime_nsec;
+	/* Difference between accumulated time and NTP time in ntp
+	 * shifted nano seconds. */
+	s64	ntp_error;
+};
+
+struct timekeeper timekeeper;
+
+/**
+ * timekeeper_setup_internals - Set up internals to use clocksource clock.
+ *
+ * @clock:		Pointer to clocksource.
+ *
+ * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
+ * pair and interval request.
+ *
+ * Unless you're the timekeeping code, you should not be using this!
+ */
+static void timekeeper_setup_internals(struct clocksource *clock)
+{
+	cycle_t interval;
+	u64 tmp;
+
+	timekeeper.clock = clock;
+	clock->cycle_last = clock->read(clock);
+
+	/* Do the ns -> cycle conversion first, using original mult */
+	tmp = NTP_INTERVAL_LENGTH;
+	tmp <<= clock->shift;
+	tmp += clock->mult_orig/2;
+	do_div(tmp, clock->mult_orig);
+	if (tmp == 0)
+		tmp = 1;
+
+	interval = (cycle_t) tmp;
+	timekeeper.cycle_interval = interval;
+
+	/* Go back from cycles -> shifted ns */
+	timekeeper.xtime_interval = (u64) interval * clock->mult;
+	timekeeper.raw_interval =
+		((u64) interval * clock->mult_orig) >> clock->shift;
+
+	timekeeper.xtime_nsec = 0;
+
+	timekeeper.ntp_error = 0;
+}
 
 /*
  * This read-write spinlock protects us from races in SMP while
@@ -46,6 +105,11 @@ struct timespec xtime __attribute__ ((aligned (16)));
 struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
 static unsigned long total_sleep_time;		/* seconds */
 
+/*
+ * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
+ */
+struct timespec raw_time;
+
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
@@ -56,42 +120,42 @@ void update_xtime_cache(u64 nsec)
 	timespec_add_ns(&xtime_cache, nsec);
 }
 
-struct clocksource *clock;
-
 /* must hold xtime_lock */
 void timekeeping_leap_insert(int leapsecond)
 {
 	xtime.tv_sec += leapsecond;
 	wall_to_monotonic.tv_sec -= leapsecond;
-	update_vsyscall(&xtime, clock);
+	update_vsyscall(&xtime, timekeeper.clock);
 }
 
 #ifdef CONFIG_GENERIC_TIME
 /**
- * clocksource_forward_now - update clock to the current time
+ * timekeeping_forward_now - update clock to the current time
  *
  * Forward the current clock to update its state since the last call to
  * update_wall_time(). This is useful before significant clock changes,
  * as it avoids having to deal with this time offset explicitly.
  */
-static void clocksource_forward_now(void)
+static void timekeeping_forward_now(void)
 {
 	cycle_t cycle_now, cycle_delta;
+	struct clocksource *clock;
 	s64 nsec;
 
+	clock = timekeeper.clock;
 	cycle_now = clock->read(clock);
 	cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 	clock->cycle_last = cycle_now;
 
-	nsec = cyc2ns(clock, cycle_delta);
+	nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
 
 	/* If arch requires, add in gettimeoffset() */
 	nsec += arch_gettimeoffset();
 
 	timespec_add_ns(&xtime, nsec);
 
-	nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
-	clock->raw_time.tv_nsec += nsec;
+	nsec = clocksource_cyc2ns(cycle_delta, clock->mult_orig, clock->shift);
+	timespec_add_ns(&raw_time, nsec);
 }
 
 /**
@@ -103,6 +167,7 @@ static void clocksource_forward_now(void)
 void getnstimeofday(struct timespec *ts)
 {
 	cycle_t cycle_now, cycle_delta;
+	struct clocksource *clock;
 	unsigned long seq;
 	s64 nsecs;
 
@@ -114,13 +179,15 @@ void getnstimeofday(struct timespec *ts)
 		*ts = xtime;
 
 		/* read clocksource: */
+		clock = timekeeper.clock;
 		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs = cyc2ns(clock, cycle_delta);
+		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult,
+					   clock->shift);
 
 		/* If arch requires, add in gettimeoffset() */
 		nsecs += arch_gettimeoffset();
@@ -135,6 +202,7 @@ EXPORT_SYMBOL(getnstimeofday);
 ktime_t ktime_get(void)
 {
 	cycle_t cycle_now, cycle_delta;
+	struct clocksource *clock;
 	unsigned int seq;
 	s64 secs, nsecs;
 
@@ -146,13 +214,15 @@ ktime_t ktime_get(void)
 		nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec;
 
 		/* read clocksource: */
+		clock = timekeeper.clock;
 		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs += cyc2ns(clock, cycle_delta);
+		nsecs += clocksource_cyc2ns(cycle_delta, clock->mult,
+					    clock->shift);
 
 	} while (read_seqretry(&xtime_lock, seq));
 	/*
@@ -174,6 +244,7 @@ EXPORT_SYMBOL_GPL(ktime_get);
 void ktime_get_ts(struct timespec *ts)
 {
 	cycle_t cycle_now, cycle_delta;
+	struct clocksource *clock;
 	struct timespec tomono;
 	unsigned int seq;
 	s64 nsecs;
@@ -186,13 +257,15 @@ void ktime_get_ts(struct timespec *ts)
 		tomono = wall_to_monotonic;
 
 		/* read clocksource: */
+		clock = timekeeper.clock;
 		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs = cyc2ns(clock, cycle_delta);
+		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult,
+					   clock->shift);
 
 	} while (read_seqretry(&xtime_lock, seq));
 
@@ -233,7 +306,7 @@ int do_settimeofday(struct timespec *tv)
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 
-	clocksource_forward_now();
+	timekeeping_forward_now();
 
 	ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec;
 	ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec;
@@ -243,10 +316,10 @@ int do_settimeofday(struct timespec *tv)
 
 	update_xtime_cache(0);
 
-	clock->error = 0;
+	timekeeper.ntp_error = 0;
 	ntp_clear();
 
-	update_vsyscall(&xtime, clock);
+	update_vsyscall(&xtime, timekeeper.clock);
 
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
@@ -269,10 +342,10 @@ static void change_clocksource(void)
 
 	new = clocksource_get_next();
 
-	if (!new || clock == new)
+	if (!new || timekeeper.clock == new)
 		return;
 
-	clocksource_forward_now();
+	timekeeping_forward_now();
 
 	if (new->enable && !new->enable(new))
 		return;
@@ -284,9 +357,9 @@ static void change_clocksource(void)
 	 */
 	new->mult_orig = new->mult;
 
-	new->raw_time = clock->raw_time;
-	old = clock;
-	clock = new;
+	old = timekeeper.clock;
+	timekeeper_setup_internals(new);
+
 	/*
 	 * Save mult_orig in mult so that the value can be restored
 	 * regardless if ->enable() updates the value of mult or not.
@@ -295,22 +368,10 @@ static void change_clocksource(void)
 	if (old->disable)
 		old->disable(old);
 
-	clock->cycle_last = clock->read(clock);
-	clock->error = 0;
-	clock->xtime_nsec = 0;
-	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
-
 	tick_clock_notify();
-
-	/*
-	 * We're holding xtime lock and waking up klogd would deadlock
-	 * us on enqueue.  So no printing!
-	printk(KERN_INFO "Time: %s clocksource has been installed.\n",
-	       clock->name);
-	 */
 }
 #else /* GENERIC_TIME */
-static inline void clocksource_forward_now(void) { }
+static inline void timekeeping_forward_now(void) { }
 static inline void change_clocksource(void) { }
 
 /**
@@ -380,20 +441,23 @@ void getrawmonotonic(struct timespec *ts)
 	unsigned long seq;
 	s64 nsecs;
 	cycle_t cycle_now, cycle_delta;
+	struct clocksource *clock;
 
 	do {
 		seq = read_seqbegin(&xtime_lock);
 
 		/* read clocksource: */
+		clock = timekeeper.clock;
 		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
+		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult_orig,
+					   clock->shift);
 
-		*ts = clock->raw_time;
+		*ts = raw_time;
 
 	} while (read_seqretry(&xtime_lock, seq));
 
@@ -413,7 +477,7 @@ int timekeeping_valid_for_hres(void)
 	do {
 		seq = read_seqbegin(&xtime_lock);
 
-		ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+		ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
 	} while (read_seqretry(&xtime_lock, seq));
 
@@ -439,6 +503,7 @@ unsigned long __attribute__((weak)) read_persistent_clock(void)
  */
 void __init timekeeping_init(void)
 {
+	struct clocksource *clock;
 	unsigned long flags;
 	unsigned long sec = read_persistent_clock();
 
@@ -451,11 +516,13 @@ void __init timekeeping_init(void)
 		clock->enable(clock);
 	/* set mult_orig on enable */
 	clock->mult_orig = clock->mult;
-	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
-	clock->cycle_last = clock->read(clock);
+
+	timekeeper_setup_internals(clock);
 
 	xtime.tv_sec = sec;
 	xtime.tv_nsec = 0;
+	raw_time.tv_sec = 0;
+	raw_time.tv_nsec = 0;
 	set_normalized_timespec(&wall_to_monotonic,
 		-xtime.tv_sec, -xtime.tv_nsec);
 	update_xtime_cache(0);
@@ -492,8 +559,8 @@ static int timekeeping_resume(struct sys_device *dev)
 	}
 	update_xtime_cache(0);
 	/* re-base the last cycle value */
-	clock->cycle_last = clock->read(clock);
-	clock->error = 0;
+	timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
+	timekeeper.ntp_error = 0;
 	timekeeping_suspended = 0;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
@@ -514,7 +581,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
 	timekeeping_suspend_time = read_persistent_clock();
 
 	write_seqlock_irqsave(&xtime_lock, flags);
-	clocksource_forward_now();
+	timekeeping_forward_now();
 	timekeeping_suspended = 1;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
@@ -549,7 +616,7 @@ device_initcall(timekeeping_init_device);
  * If the error is already larger, we look ahead even further
  * to compensate for late or lost adjustments.
  */
-static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
+static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
 						 s64 *offset)
 {
 	s64 tick_error, i;
@@ -565,7 +632,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
 	 * here.  This is tuned so that an error of about 1 msec is adjusted
 	 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
 	 */
-	error2 = clock->error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
+	error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
 	error2 = abs(error2);
 	for (look_ahead = 0; error2 > 0; look_ahead++)
 		error2 >>= 2;
@@ -574,8 +641,9 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
 	 * Now calculate the error in (1 << look_ahead) ticks, but first
 	 * remove the single look ahead already included in the error.
 	 */
-	tick_error = tick_length >> (NTP_SCALE_SHIFT - clock->shift + 1);
-	tick_error -= clock->xtime_interval >> 1;
+	tick_error = tick_length >>
+			(NTP_SCALE_SHIFT - timekeeper.clock->shift + 1);
+	tick_error -= timekeeper.xtime_interval >> 1;
 	error = ((error - tick_error) >> look_ahead) + tick_error;
 
 	/* Finally calculate the adjustment shift value.  */
@@ -600,18 +668,19 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
  * this is optimized for the most common adjustments of -1,0,1,
  * for other values we can do a bit more work.
  */
-static void clocksource_adjust(s64 offset)
+static void timekeeping_adjust(s64 offset)
 {
-	s64 error, interval = clock->cycle_interval;
+	s64 error, interval = timekeeper.cycle_interval;
 	int adj;
 
-	error = clock->error >> (NTP_SCALE_SHIFT - clock->shift - 1);
+	error = timekeeper.ntp_error >>
+		(NTP_SCALE_SHIFT - timekeeper.clock->shift - 1);
 	if (error > interval) {
 		error >>= 2;
 		if (likely(error <= interval))
 			adj = 1;
 		else
-			adj = clocksource_bigadjust(error, &interval, &offset);
+			adj = timekeeping_bigadjust(error, &interval, &offset);
 	} else if (error < -interval) {
 		error >>= 2;
 		if (likely(error >= -interval)) {
@@ -619,15 +688,15 @@ static void clocksource_adjust(s64 offset)
 			interval = -interval;
 			offset = -offset;
 		} else
-			adj = clocksource_bigadjust(error, &interval, &offset);
+			adj = timekeeping_bigadjust(error, &interval, &offset);
 	} else
 		return;
 
-	clock->mult += adj;
-	clock->xtime_interval += interval;
-	clock->xtime_nsec -= offset;
-	clock->error -= (interval - offset) <<
-			(NTP_SCALE_SHIFT - clock->shift);
+	timekeeper.clock->mult += adj;
+	timekeeper.xtime_interval += interval;
+	timekeeper.xtime_nsec -= offset;
+	timekeeper.ntp_error -= (interval - offset) <<
+			(NTP_SCALE_SHIFT - timekeeper.clock->shift);
 }
 
 /**
@@ -637,53 +706,59 @@ static void clocksource_adjust(s64 offset)
  */
 void update_wall_time(void)
 {
+	struct clocksource *clock;
 	cycle_t offset;
+	s64 nsecs;
 
 	/* Make sure we're fully resumed: */
 	if (unlikely(timekeeping_suspended))
 		return;
 
+	clock = timekeeper.clock;
 #ifdef CONFIG_GENERIC_TIME
 	offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
 #else
-	offset = clock->cycle_interval;
+	offset = timekeeper.cycle_interval;
 #endif
-	clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift;
+	timekeeper.xtime_nsec = (s64)xtime.tv_nsec << clock->shift;
 
 	/* normally this loop will run just once, however in the
 	 * case of lost or late ticks, it will accumulate correctly.
 	 */
-	while (offset >= clock->cycle_interval) {
+	while (offset >= timekeeper.cycle_interval) {
+		u64 nsecps = (u64)NSEC_PER_SEC << clock->shift;
+
 		/* accumulate one interval */
-		offset -= clock->cycle_interval;
-		clock->cycle_last += clock->cycle_interval;
+		offset -= timekeeper.cycle_interval;
+		clock->cycle_last += timekeeper.cycle_interval;
 
-		clock->xtime_nsec += clock->xtime_interval;
-		if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
-			clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
+		timekeeper.xtime_nsec += timekeeper.xtime_interval;
+		if (timekeeper.xtime_nsec >= nsecps) {
+			timekeeper.xtime_nsec -= nsecps;
 			xtime.tv_sec++;
 			second_overflow();
 		}
 
-		clock->raw_time.tv_nsec += clock->raw_interval;
-		if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) {
-			clock->raw_time.tv_nsec -= NSEC_PER_SEC;
-			clock->raw_time.tv_sec++;
+		raw_time.tv_nsec += timekeeper.raw_interval;
+		if (raw_time.tv_nsec >= NSEC_PER_SEC) {
+			raw_time.tv_nsec -= NSEC_PER_SEC;
+			raw_time.tv_sec++;
 		}
 
 		/* accumulate error between NTP and clock interval */
-		clock->error += tick_length;
-		clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift);
+		timekeeper.ntp_error += tick_length;
+		timekeeper.ntp_error -= timekeeper.xtime_interval <<
+					(NTP_SCALE_SHIFT - clock->shift);
 	}
 
 	/* correct the clock when NTP error is too big */
-	clocksource_adjust(offset);
+	timekeeping_adjust(offset);
 
 	/*
 	 * Since in the loop above, we accumulate any amount of time
 	 * in xtime_nsec over a second into xtime.tv_sec, its possible for
 	 * xtime_nsec to be fairly small after the loop. Further, if we're
-	 * slightly speeding the clocksource up in clocksource_adjust(),
+	 * slightly speeding the clocksource up in timekeeping_adjust(),
 	 * its possible the required corrective factor to xtime_nsec could
 	 * cause it to underflow.
 	 *
@@ -695,24 +770,26 @@ void update_wall_time(void)
 	 * We'll correct this error next time through this function, when
 	 * xtime_nsec is not as small.
 	 */
-	if (unlikely((s64)clock->xtime_nsec < 0)) {
-		s64 neg = -(s64)clock->xtime_nsec;
-		clock->xtime_nsec = 0;
-		clock->error += neg << (NTP_SCALE_SHIFT - clock->shift);
+	if (unlikely((s64)timekeeper.xtime_nsec < 0)) {
+		s64 neg = -(s64)timekeeper.xtime_nsec;
+		timekeeper.xtime_nsec = 0;
+		timekeeper.ntp_error += neg << (NTP_SCALE_SHIFT - clock->shift);
 	}
 
 	/* store full nanoseconds into xtime after rounding it up and
 	 * add the remainder to the error difference.
 	 */
-	xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1;
-	clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
-	clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift);
+	xtime.tv_nsec = ((s64)timekeeper.xtime_nsec >> clock->shift) + 1;
+	timekeeper.xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
+	timekeeper.ntp_error += timekeeper.xtime_nsec <<
+				(NTP_SCALE_SHIFT - clock->shift);
 
-	update_xtime_cache(cyc2ns(clock, offset));
+	nsecs = clocksource_cyc2ns(offset, clock->mult, clock->shift);
+	update_xtime_cache(nsecs);
 
 	/* check to see if there is a new clocksource to use */
 	change_clocksource();
-	update_vsyscall(&xtime, clock);
+	update_vsyscall(&xtime, timekeeper.clock);
 }
 
 /**
-- 
cgit v1.2.3


From 0a54419836254a27baecd9037103171bcbabaf67 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:28 +0200
Subject: timekeeping: Move NTP adjusted clock multiplier to struct timekeeper

The clocksource structure has two multipliers, the unmodified multiplier
clock->mult_orig and the NTP corrected multiplier clock->mult. The NTP
multiplier is misplaced in the struct clocksource, this is private
information of the timekeeping code. Add the mult field to the struct
timekeeper to contain the NTP corrected value, keep the unmodifed
multiplier in clock->mult and remove clock->mult_orig.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134810.149047645@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/arm/plat-omap/common.c |  7 ++----
 include/linux/clocksource.h |  4 +---
 kernel/time/timekeeping.c   | 53 ++++++++++++++++++++-------------------------
 3 files changed, 27 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c
index ebcf006406f9..95587b6c0259 100644
--- a/arch/arm/plat-omap/common.c
+++ b/arch/arm/plat-omap/common.c
@@ -253,11 +253,8 @@ static struct clocksource clocksource_32k = {
  */
 unsigned long long sched_clock(void)
 {
-	unsigned long long ret;
-
-	ret = (unsigned long long)clocksource_32k.read(&clocksource_32k);
-	ret = (ret * clocksource_32k.mult_orig) >> clocksource_32k.shift;
-	return ret;
+	return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k),
+				  clocksource_32k.mult, clocksource_32k.shift);
 }
 
 static int __init omap_init_clocksource_32k(void)
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index e12e3095e2fb..e34015effeb6 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -149,8 +149,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  * @disable:		optional function to disable the clocksource
  * @mask:		bitmask for two's complement
  *			subtraction of non 64 bit counters
- * @mult:		cycle to nanosecond multiplier (adjusted by NTP)
- * @mult_orig:		cycle to nanosecond multiplier (unadjusted by NTP)
+ * @mult:		cycle to nanosecond multiplier
  * @shift:		cycle to nanosecond divisor (power of two)
  * @flags:		flags describing special properties
  * @vread:		vsyscall based read
@@ -168,7 +167,6 @@ struct clocksource {
 	void (*disable)(struct clocksource *cs);
 	cycle_t mask;
 	u32 mult;
-	u32 mult_orig;
 	u32 shift;
 	unsigned long flags;
 	cycle_t (*vread)(void);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index dfdab1cefe1e..f4056f6c2632 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -41,6 +41,8 @@ struct timekeeper {
 	/* Shift conversion between clock shifted nano seconds and
 	 * ntp shifted nano seconds. */
 	int	ntp_error_shift;
+	/* NTP adjusted clock multiplier */
+	u32	mult;
 };
 
 struct timekeeper timekeeper;
@@ -66,8 +68,8 @@ static void timekeeper_setup_internals(struct clocksource *clock)
 	/* Do the ns -> cycle conversion first, using original mult */
 	tmp = NTP_INTERVAL_LENGTH;
 	tmp <<= clock->shift;
-	tmp += clock->mult_orig/2;
-	do_div(tmp, clock->mult_orig);
+	tmp += clock->mult/2;
+	do_div(tmp, clock->mult);
 	if (tmp == 0)
 		tmp = 1;
 
@@ -77,13 +79,20 @@ static void timekeeper_setup_internals(struct clocksource *clock)
 	/* Go back from cycles -> shifted ns */
 	timekeeper.xtime_interval = (u64) interval * clock->mult;
 	timekeeper.raw_interval =
-		((u64) interval * clock->mult_orig) >> clock->shift;
+		((u64) interval * clock->mult) >> clock->shift;
 
 	timekeeper.xtime_nsec = 0;
 	timekeeper.shift = clock->shift;
 
 	timekeeper.ntp_error = 0;
 	timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
+
+	/*
+	 * The timekeeper keeps its own mult values for the currently
+	 * active clocksource. These value will be adjusted via NTP
+	 * to counteract clock drifting.
+	 */
+	timekeeper.mult = clock->mult;
 }
 
 /*
@@ -154,14 +163,15 @@ static void timekeeping_forward_now(void)
 	cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 	clock->cycle_last = cycle_now;
 
-	nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
+	nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult,
+				  timekeeper.shift);
 
 	/* If arch requires, add in gettimeoffset() */
 	nsec += arch_gettimeoffset();
 
 	timespec_add_ns(&xtime, nsec);
 
-	nsec = clocksource_cyc2ns(cycle_delta, clock->mult_orig, clock->shift);
+	nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
 	timespec_add_ns(&raw_time, nsec);
 }
 
@@ -193,8 +203,8 @@ void getnstimeofday(struct timespec *ts)
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult,
-					   clock->shift);
+		nsecs = clocksource_cyc2ns(cycle_delta, timekeeper.mult,
+					   timekeeper.shift);
 
 		/* If arch requires, add in gettimeoffset() */
 		nsecs += arch_gettimeoffset();
@@ -228,8 +238,8 @@ ktime_t ktime_get(void)
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs += clocksource_cyc2ns(cycle_delta, clock->mult,
-					    clock->shift);
+		nsecs += clocksource_cyc2ns(cycle_delta, timekeeper.mult,
+					    timekeeper.shift);
 
 	} while (read_seqretry(&xtime_lock, seq));
 	/*
@@ -271,8 +281,8 @@ void ktime_get_ts(struct timespec *ts)
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult,
-					   clock->shift);
+		nsecs = clocksource_cyc2ns(cycle_delta, timekeeper.mult,
+					   timekeeper.shift);
 
 	} while (read_seqretry(&xtime_lock, seq));
 
@@ -356,22 +366,10 @@ static void change_clocksource(void)
 
 	if (new->enable && !new->enable(new))
 		return;
-	/*
-	 * The frequency may have changed while the clocksource
-	 * was disabled. If so the code in ->enable() must update
-	 * the mult value to reflect the new frequency. Make sure
-	 * mult_orig follows this change.
-	 */
-	new->mult_orig = new->mult;
 
 	old = timekeeper.clock;
 	timekeeper_setup_internals(new);
 
-	/*
-	 * Save mult_orig in mult so that the value can be restored
-	 * regardless if ->enable() updates the value of mult or not.
-	 */
-	old->mult = old->mult_orig;
 	if (old->disable)
 		old->disable(old);
 
@@ -461,7 +459,7 @@ void getrawmonotonic(struct timespec *ts)
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult_orig,
+		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult,
 					   clock->shift);
 
 		*ts = raw_time;
@@ -521,9 +519,6 @@ void __init timekeeping_init(void)
 	clock = clocksource_default_clock();
 	if (clock->enable)
 		clock->enable(clock);
-	/* set mult_orig on enable */
-	clock->mult_orig = clock->mult;
-
 	timekeeper_setup_internals(clock);
 
 	xtime.tv_sec = sec;
@@ -697,7 +692,7 @@ static void timekeeping_adjust(s64 offset)
 	} else
 		return;
 
-	timekeeper.clock->mult += adj;
+	timekeeper.mult += adj;
 	timekeeper.xtime_interval += interval;
 	timekeeper.xtime_nsec -= offset;
 	timekeeper.ntp_error -= (interval - offset) <<
@@ -789,7 +784,7 @@ void update_wall_time(void)
 	timekeeper.ntp_error +=	timekeeper.xtime_nsec <<
 				timekeeper.ntp_error_shift;
 
-	nsecs = clocksource_cyc2ns(offset, clock->mult, clock->shift);
+	nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
 	update_xtime_cache(nsecs);
 
 	/* check to see if there is a new clocksource to use */
-- 
cgit v1.2.3


From 75c5158f70c065b9704b924503d96e8297838f79 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:30 +0200
Subject: timekeeping: Update clocksource with stop_machine

update_wall_time calls change_clocksource HZ times per second to check
if a new clock source is available. In close to 100% of all calls
there is no new clock. Replace the tick based check by an update done
with stop_machine.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134810.711836357@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h |   2 +
 kernel/time/clocksource.c   | 112 +++++++++++++++++---------------------------
 kernel/time/timekeeping.c   |  41 ++++++++++------
 3 files changed, 72 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index e34015effeb6..9ea40ff26f0e 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -291,4 +291,6 @@ static inline void update_vsyscall_tz(void)
 }
 #endif
 
+extern void timekeeping_notify(struct clocksource *clock);
+
 #endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index f18c9a6bdcf4..a1657b5fdeb9 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -109,35 +109,17 @@ EXPORT_SYMBOL(timecounter_cyc2time);
 /*[Clocksource internal variables]---------
  * curr_clocksource:
  *	currently selected clocksource.
- * next_clocksource:
- *	pending next selected clocksource.
  * clocksource_list:
  *	linked list with the registered clocksources
- * clocksource_lock:
- *	protects manipulations to curr_clocksource and next_clocksource
- *	and the clocksource_list
+ * clocksource_mutex:
+ *	protects manipulations to curr_clocksource and the clocksource_list
  * override_name:
  *	Name of the user-specified clocksource.
  */
 static struct clocksource *curr_clocksource;
-static struct clocksource *next_clocksource;
 static LIST_HEAD(clocksource_list);
-static DEFINE_SPINLOCK(clocksource_lock);
+static DEFINE_MUTEX(clocksource_mutex);
 static char override_name[32];
-static int finished_booting;
-
-/* clocksource_done_booting - Called near the end of core bootup
- *
- * Hack to avoid lots of clocksource churn at boot time.
- * We use fs_initcall because we want this to start before
- * device_initcall but after subsys_initcall.
- */
-static int __init clocksource_done_booting(void)
-{
-	finished_booting = 1;
-	return 0;
-}
-fs_initcall(clocksource_done_booting);
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 static LIST_HEAD(watchdog_list);
@@ -356,18 +338,16 @@ static inline void clocksource_resume_watchdog(void) { }
 void clocksource_resume(void)
 {
 	struct clocksource *cs;
-	unsigned long flags;
 
-	spin_lock_irqsave(&clocksource_lock, flags);
+	mutex_lock(&clocksource_mutex);
 
-	list_for_each_entry(cs, &clocksource_list, list) {
+	list_for_each_entry(cs, &clocksource_list, list)
 		if (cs->resume)
 			cs->resume();
-	}
 
 	clocksource_resume_watchdog();
 
-	spin_unlock_irqrestore(&clocksource_lock, flags);
+	mutex_unlock(&clocksource_mutex);
 }
 
 /**
@@ -383,28 +363,13 @@ void clocksource_touch_watchdog(void)
 }
 
 #ifdef CONFIG_GENERIC_TIME
-/**
- * clocksource_get_next - Returns the selected clocksource
- *
- */
-struct clocksource *clocksource_get_next(void)
-{
-	unsigned long flags;
 
-	spin_lock_irqsave(&clocksource_lock, flags);
-	if (next_clocksource && finished_booting) {
-		curr_clocksource = next_clocksource;
-		next_clocksource = NULL;
-	}
-	spin_unlock_irqrestore(&clocksource_lock, flags);
-
-	return curr_clocksource;
-}
+static int finished_booting;
 
 /**
  * clocksource_select - Select the best clocksource available
  *
- * Private function. Must hold clocksource_lock when called.
+ * Private function. Must hold clocksource_mutex when called.
  *
  * Select the clocksource with the best rating, or the clocksource,
  * which is selected by userspace override.
@@ -413,7 +378,7 @@ static void clocksource_select(void)
 {
 	struct clocksource *best, *cs;
 
-	if (list_empty(&clocksource_list))
+	if (!finished_booting || list_empty(&clocksource_list))
 		return;
 	/* First clocksource on the list has the best rating. */
 	best = list_first_entry(&clocksource_list, struct clocksource, list);
@@ -438,13 +403,31 @@ static void clocksource_select(void)
 			best = cs;
 		break;
 	}
-	if (curr_clocksource != best)
-		next_clocksource = best;
+	if (curr_clocksource != best) {
+		printk(KERN_INFO "Switching to clocksource %s\n", best->name);
+		curr_clocksource = best;
+		timekeeping_notify(curr_clocksource);
+	}
 }
 
+/*
+ * clocksource_done_booting - Called near the end of core bootup
+ *
+ * Hack to avoid lots of clocksource churn at boot time.
+ * We use fs_initcall because we want this to start before
+ * device_initcall but after subsys_initcall.
+ */
+static int __init clocksource_done_booting(void)
+{
+	finished_booting = 1;
+	clocksource_select();
+	return 0;
+}
+fs_initcall(clocksource_done_booting);
+
 #else /* CONFIG_GENERIC_TIME */
 
-static void clocksource_select(void) { }
+static inline void clocksource_select(void) { }
 
 #endif
 
@@ -471,13 +454,11 @@ static void clocksource_enqueue(struct clocksource *cs)
  */
 int clocksource_register(struct clocksource *cs)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&clocksource_lock, flags);
+	mutex_lock(&clocksource_mutex);
 	clocksource_enqueue(cs);
 	clocksource_select();
-	spin_unlock_irqrestore(&clocksource_lock, flags);
 	clocksource_enqueue_watchdog(cs);
+	mutex_unlock(&clocksource_mutex);
 	return 0;
 }
 EXPORT_SYMBOL(clocksource_register);
@@ -487,14 +468,12 @@ EXPORT_SYMBOL(clocksource_register);
  */
 void clocksource_change_rating(struct clocksource *cs, int rating)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&clocksource_lock, flags);
+	mutex_lock(&clocksource_mutex);
 	list_del(&cs->list);
 	cs->rating = rating;
 	clocksource_enqueue(cs);
 	clocksource_select();
-	spin_unlock_irqrestore(&clocksource_lock, flags);
+	mutex_unlock(&clocksource_mutex);
 }
 EXPORT_SYMBOL(clocksource_change_rating);
 
@@ -503,13 +482,11 @@ EXPORT_SYMBOL(clocksource_change_rating);
  */
 void clocksource_unregister(struct clocksource *cs)
 {
-	unsigned long flags;
-
+	mutex_lock(&clocksource_mutex);
 	clocksource_dequeue_watchdog(cs);
-	spin_lock_irqsave(&clocksource_lock, flags);
 	list_del(&cs->list);
 	clocksource_select();
-	spin_unlock_irqrestore(&clocksource_lock, flags);
+	mutex_unlock(&clocksource_mutex);
 }
 EXPORT_SYMBOL(clocksource_unregister);
 
@@ -527,9 +504,9 @@ sysfs_show_current_clocksources(struct sys_device *dev,
 {
 	ssize_t count = 0;
 
-	spin_lock_irq(&clocksource_lock);
+	mutex_lock(&clocksource_mutex);
 	count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
-	spin_unlock_irq(&clocksource_lock);
+	mutex_unlock(&clocksource_mutex);
 
 	return count;
 }
@@ -557,14 +534,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 	if (buf[count-1] == '\n')
 		count--;
 
-	spin_lock_irq(&clocksource_lock);
+	mutex_lock(&clocksource_mutex);
 
 	if (count > 0)
 		memcpy(override_name, buf, count);
 	override_name[count] = 0;
 	clocksource_select();
 
-	spin_unlock_irq(&clocksource_lock);
+	mutex_unlock(&clocksource_mutex);
 
 	return ret;
 }
@@ -584,7 +561,7 @@ sysfs_show_available_clocksources(struct sys_device *dev,
 	struct clocksource *src;
 	ssize_t count = 0;
 
-	spin_lock_irq(&clocksource_lock);
+	mutex_lock(&clocksource_mutex);
 	list_for_each_entry(src, &clocksource_list, list) {
 		/*
 		 * Don't show non-HRES clocksource if the tick code is
@@ -596,7 +573,7 @@ sysfs_show_available_clocksources(struct sys_device *dev,
 				  max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
 				  "%s ", src->name);
 	}
-	spin_unlock_irq(&clocksource_lock);
+	mutex_unlock(&clocksource_mutex);
 
 	count += snprintf(buf + count,
 			  max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
@@ -651,11 +628,10 @@ device_initcall(init_clocksource_sysfs);
  */
 static int __init boot_override_clocksource(char* str)
 {
-	unsigned long flags;
-	spin_lock_irqsave(&clocksource_lock, flags);
+	mutex_lock(&clocksource_mutex);
 	if (str)
 		strlcpy(override_name, str, sizeof(override_name));
-	spin_unlock_irqrestore(&clocksource_lock, flags);
+	mutex_unlock(&clocksource_mutex);
 	return 1;
 }
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 27ae01b596b7..41579e7fcf9d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -18,6 +18,7 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 #include <linux/tick.h>
+#include <linux/stop_machine.h>
 
 /* Structure holding internal timekeeping values. */
 struct timekeeper {
@@ -179,6 +180,7 @@ void timekeeping_leap_insert(int leapsecond)
 }
 
 #ifdef CONFIG_GENERIC_TIME
+
 /**
  * timekeeping_forward_now - update clock to the current time
  *
@@ -351,31 +353,40 @@ EXPORT_SYMBOL(do_settimeofday);
  *
  * Accumulates current time interval and initializes new clocksource
  */
-static void change_clocksource(void)
+static int change_clocksource(void *data)
 {
 	struct clocksource *new, *old;
 
-	new = clocksource_get_next();
-
-	if (!new || timekeeper.clock == new)
-		return;
+	new = (struct clocksource *) data;
 
 	timekeeping_forward_now();
+	if (!new->enable || new->enable(new) == 0) {
+		old = timekeeper.clock;
+		timekeeper_setup_internals(new);
+		if (old->disable)
+			old->disable(old);
+	}
+	return 0;
+}
 
-	if (new->enable && !new->enable(new))
+/**
+ * timekeeping_notify - Install a new clock source
+ * @clock:		pointer to the clock source
+ *
+ * This function is called from clocksource.c after a new, better clock
+ * source has been registered. The caller holds the clocksource_mutex.
+ */
+void timekeeping_notify(struct clocksource *clock)
+{
+	if (timekeeper.clock == clock)
 		return;
-
-	old = timekeeper.clock;
-	timekeeper_setup_internals(new);
-
-	if (old->disable)
-		old->disable(old);
-
+	stop_machine(change_clocksource, clock, NULL);
 	tick_clock_notify();
 }
+
 #else /* GENERIC_TIME */
+
 static inline void timekeeping_forward_now(void) { }
-static inline void change_clocksource(void) { }
 
 /**
  * ktime_get - get the monotonic time in ktime_t format
@@ -416,6 +427,7 @@ void ktime_get_ts(struct timespec *ts)
 				ts->tv_nsec + tomono.tv_nsec);
 }
 EXPORT_SYMBOL_GPL(ktime_get_ts);
+
 #endif /* !GENERIC_TIME */
 
 /**
@@ -773,7 +785,6 @@ void update_wall_time(void)
 	update_xtime_cache(nsecs);
 
 	/* check to see if there is a new clocksource to use */
-	change_clocksource();
 	update_vsyscall(&xtime, timekeeper.clock);
 }
 
-- 
cgit v1.2.3


From d4f587c67fc39e0030ddd718675e252e208da4d7 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:31 +0200
Subject: timekeeping: Increase granularity of read_persistent_clock()

The persistent clock of some architectures (e.g. s390) have a
better granularity than seconds. To reduce the delta between the
host clock and the guest clock in a virtualized system change the
read_persistent_clock function to return a struct timespec.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134811.013873340@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/m68knommu/kernel/time.c          |  5 ++--
 arch/mips/dec/time.c                  |  5 ++--
 arch/mips/lasat/ds1603.c              |  5 ++--
 arch/mips/lasat/sysctl.c              |  8 ++++--
 arch/mips/lemote/lm2e/setup.c         |  5 ++--
 arch/mips/mti-malta/malta-time.c      |  5 ++--
 arch/mips/pmc-sierra/yosemite/setup.c |  5 ++--
 arch/mips/sibyte/swarm/setup.c        | 15 +++++++---
 arch/mips/sni/time.c                  |  5 ++--
 arch/powerpc/kernel/time.c            |  7 +++--
 arch/s390/kernel/time.c               | 22 +++------------
 arch/sh/kernel/time.c                 |  6 ++--
 arch/x86/kernel/rtc.c                 |  5 ++--
 arch/xtensa/kernel/time.c             |  5 ++--
 include/linux/time.h                  |  2 +-
 kernel/time/timekeeping.c             | 52 +++++++++++++++++++----------------
 16 files changed, 83 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index d182b2f72211..68432248515c 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -72,9 +72,10 @@ static unsigned long read_rtc_mmss(void)
 	return  mktime(year, mon, day, hour, min, sec);;
 }
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
-	return read_rtc_mmss();
+	ts->tv_sec = read_rtc_mmss();
+	ts->tv_nsec = 0;
 }
 
 int update_persistent_clock(struct timespec now)
diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index 463136e6685a..02f505f23c32 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -18,7 +18,7 @@
 #include <asm/dec/ioasic.h>
 #include <asm/dec/machtype.h>
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
 	unsigned int year, mon, day, hour, min, sec, real_year;
 	unsigned long flags;
@@ -53,7 +53,8 @@ unsigned long read_persistent_clock(void)
 
 	year += real_year - 72 + 2000;
 
-	return mktime(year, mon, day, hour, min, sec);
+	ts->tv_sec = mktime(year, mon, day, hour, min, sec);
+	ts->tv_nsec = 0;
 }
 
 /*
diff --git a/arch/mips/lasat/ds1603.c b/arch/mips/lasat/ds1603.c
index 52cb1436a12a..c6fd96ff118d 100644
--- a/arch/mips/lasat/ds1603.c
+++ b/arch/mips/lasat/ds1603.c
@@ -135,7 +135,7 @@ static void rtc_end_op(void)
 	lasat_ndelay(1000);
 }
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
 	unsigned long word;
 	unsigned long flags;
@@ -147,7 +147,8 @@ unsigned long read_persistent_clock(void)
 	rtc_end_op();
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	return word;
+	ts->tv_sec = word;
+	ts->tv_nsec = 0;
 }
 
 int rtc_mips_set_mmss(unsigned long time)
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index 8f88886feb12..3f04d4c406b7 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -92,10 +92,12 @@ static int rtctmp;
 int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
+	struct timespec ts;
 	int r;
 
 	if (!write) {
-		rtctmp = read_persistent_clock();
+		read_persistent_clock(&ts);
+		rtctmp = ts.tv_sec;
 		/* check for time < 0 and set to 0 */
 		if (rtctmp < 0)
 			rtctmp = 0;
@@ -134,9 +136,11 @@ int sysctl_lasat_rtc(ctl_table *table,
 		    void *oldval, size_t *oldlenp,
 		    void *newval, size_t newlen)
 {
+	struct timespec ts;
 	int r;
 
-	rtctmp = read_persistent_clock();
+	read_persistent_clock(&ts);
+	rtctmp = ts.tv_sec;
 	if (rtctmp < 0)
 		rtctmp = 0;
 	r = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
diff --git a/arch/mips/lemote/lm2e/setup.c b/arch/mips/lemote/lm2e/setup.c
index ebd6ceaef2fd..24b355df6127 100644
--- a/arch/mips/lemote/lm2e/setup.c
+++ b/arch/mips/lemote/lm2e/setup.c
@@ -54,9 +54,10 @@ void __init plat_time_init(void)
 	mips_hpt_frequency = cpu_clock_freq / 2;
 }
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
-	return mc146818_get_cmos_time();
+	ts->tv_sec = mc146818_get_cmos_time();
+	ts->tv_nsec = 0;
 }
 
 void (*__wbflush)(void);
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 0b97d47691fc..3c6f190aa61c 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -100,9 +100,10 @@ static unsigned int __init estimate_cpu_frequency(void)
 	return count;
 }
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
-	return mc146818_get_cmos_time();
+	ts->tv_sec = mc146818_get_cmos_time();
+	ts->tv_nsec = 0;
 }
 
 static void __init plat_perf_setup(void)
diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c
index 2d3c0dca275d..3498ac9c35af 100644
--- a/arch/mips/pmc-sierra/yosemite/setup.c
+++ b/arch/mips/pmc-sierra/yosemite/setup.c
@@ -70,7 +70,7 @@ void __init bus_error_init(void)
 }
 
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
 	unsigned int year, month, day, hour, min, sec;
 	unsigned long flags;
@@ -92,7 +92,8 @@ unsigned long read_persistent_clock(void)
 	m48t37_base->control = 0x00;
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	return mktime(year, month, day, hour, min, sec);
+	ts->tv_sec = mktime(year, month, day, hour, min, sec);
+	ts->tv_nsec = 0;
 }
 
 int rtc_mips_set_time(unsigned long tim)
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index 672e45d495a9..623ffc933c4c 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -87,19 +87,26 @@ enum swarm_rtc_type {
 
 enum swarm_rtc_type swarm_rtc_type;
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
+	unsigned long sec;
+
 	switch (swarm_rtc_type) {
 	case RTC_XICOR:
-		return xicor_get_time();
+		sec = xicor_get_time();
+		break;
 
 	case RTC_M4LT81:
-		return m41t81_get_time();
+		sec = m41t81_get_time();
+		break;
 
 	case RTC_NONE:
 	default:
-		return mktime(2000, 1, 1, 0, 0, 0);
+		sec = mktime(2000, 1, 1, 0, 0, 0);
+		break;
 	}
+	ts->tv_sec = sec;
+	tv->tv_nsec = 0;
 }
 
 int rtc_mips_set_time(unsigned long sec)
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index 0d9ec1a5c24a..62df6a598e0a 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -182,7 +182,8 @@ void __init plat_time_init(void)
 	setup_pit_timer();
 }
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
-	return -1;
+	ts->tv_sec = -1;
+	ts->tv_nsec = 0;
 }
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index eae4511ceeac..ad63f30fe3da 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -769,7 +769,7 @@ int update_persistent_clock(struct timespec now)
 	return ppc_md.set_rtc_time(&tm);
 }
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
 	struct rtc_time tm;
 	static int first = 1;
@@ -787,8 +787,9 @@ unsigned long read_persistent_clock(void)
 	if (!ppc_md.get_rtc_time)
 		return 0;
 	ppc_md.get_rtc_time(&tm);
-	return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
-		      tm.tm_hour, tm.tm_min, tm.tm_sec);
+	ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
+			    tm.tm_hour, tm.tm_min, tm.tm_sec);
+	ts->tv_nsec = 0;
 }
 
 /* clocksource code */
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index e76c2e7a8b9a..a94ec48587b4 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -182,12 +182,9 @@ static void timing_alert_interrupt(__u16 code)
 static void etr_reset(void);
 static void stp_reset(void);
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
-	struct timespec ts;
-
-	tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, &ts);
-	return ts.tv_sec;
+	tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts);
 }
 
 static cycle_t read_tod_clock(struct clocksource *cs)
@@ -248,7 +245,6 @@ void __init time_init(void)
 {
 	struct timespec ts;
 	unsigned long flags;
-	cycle_t now;
 
 	/* Reset time synchronization interfaces. */
 	etr_reset();
@@ -266,20 +262,10 @@ void __init time_init(void)
 		panic("Could not register TOD clock source");
 
 	/*
-	 * The TOD clock is an accurate clock. The xtime should be
-	 * initialized in a way that the difference between TOD and
-	 * xtime is reasonably small. Too bad that timekeeping_init
-	 * sets xtime.tv_nsec to zero. In addition the clock source
-	 * change from the jiffies clock source to the TOD clock
-	 * source add another error of up to 1/HZ second. The same
-	 * function sets wall_to_monotonic to a value that is too
-	 * small for /proc/uptime to be accurate.
-	 * Reset xtime and wall_to_monotonic to sane values.
+	 * Reset wall_to_monotonic to the initial timestamp created
+	 * in head.S to get a precise value in /proc/uptime.
 	 */
 	write_seqlock_irqsave(&xtime_lock, flags);
-	now = get_clock();
-	tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime);
-	clocksource_tod.cycle_last = now;
 	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts);
 	set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec);
 	write_sequnlock_irqrestore(&xtime_lock, flags);
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index 9b352a1e3fb4..3f4706aa975e 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -39,11 +39,9 @@ void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
 int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
 
 #ifdef CONFIG_GENERIC_CMOS_UPDATE
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
-	struct timespec tv;
-	rtc_sh_get_time(&tv);
-	return tv.tv_sec;
+	rtc_sh_get_time(&ts);
 }
 
 int update_persistent_clock(struct timespec now)
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 5d465b207e72..bf67dcb4a44c 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -178,7 +178,7 @@ static int set_rtc_mmss(unsigned long nowtime)
 }
 
 /* not static: needed by APM */
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
 	unsigned long retval, flags;
 
@@ -186,7 +186,8 @@ unsigned long read_persistent_clock(void)
 	retval = get_wallclock();
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	return retval;
+	ts->tv_sec = retval;
+	ts->tv_nsec = 0;
 }
 
 int update_persistent_clock(struct timespec now)
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 8848120d291b..19085ff0484a 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -59,9 +59,8 @@ static struct irqaction timer_irqaction = {
 
 void __init time_init(void)
 {
-	xtime.tv_nsec = 0;
-	xtime.tv_sec = read_persistent_clock();
-
+	/* FIXME: xtime&wall_to_monotonic are set in timekeeping_init. */
+	read_persistent_clock(&xtime);
 	set_normalized_timespec(&wall_to_monotonic,
 		-xtime.tv_sec, -xtime.tv_nsec);
 
diff --git a/include/linux/time.h b/include/linux/time.h
index e7c844558884..53a3216f0d1b 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -101,7 +101,7 @@ extern struct timespec xtime;
 extern struct timespec wall_to_monotonic;
 extern seqlock_t xtime_lock;
 
-extern unsigned long read_persistent_clock(void);
+extern void read_persistent_clock(struct timespec *ts);
 extern int update_persistent_clock(struct timespec now);
 extern int no_sync_cmos_clock __read_mostly;
 void timekeeping_init(void);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 41579e7fcf9d..f1a21ce491e6 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -154,7 +154,7 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
  */
 struct timespec xtime __attribute__ ((aligned (16)));
 struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
-static unsigned long total_sleep_time;		/* seconds */
+static struct timespec total_sleep_time;
 
 /*
  * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
@@ -487,17 +487,18 @@ int timekeeping_valid_for_hres(void)
 }
 
 /**
- * read_persistent_clock -  Return time in seconds from the persistent clock.
+ * read_persistent_clock -  Return time from the persistent clock.
  *
  * Weak dummy function for arches that do not yet support it.
- * Returns seconds from epoch using the battery backed persistent clock.
- * Returns zero if unsupported.
+ * Reads the time from the battery backed persistent clock.
+ * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
  *
  *  XXX - Do be sure to remove it once all arches implement it.
  */
-unsigned long __attribute__((weak)) read_persistent_clock(void)
+void __attribute__((weak)) read_persistent_clock(struct timespec *ts)
 {
-	return 0;
+	ts->tv_sec = 0;
+	ts->tv_nsec = 0;
 }
 
 /*
@@ -507,7 +508,9 @@ void __init timekeeping_init(void)
 {
 	struct clocksource *clock;
 	unsigned long flags;
-	unsigned long sec = read_persistent_clock();
+	struct timespec now;
+
+	read_persistent_clock(&now);
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 
@@ -518,19 +521,20 @@ void __init timekeeping_init(void)
 		clock->enable(clock);
 	timekeeper_setup_internals(clock);
 
-	xtime.tv_sec = sec;
-	xtime.tv_nsec = 0;
+	xtime.tv_sec = now.tv_sec;
+	xtime.tv_nsec = now.tv_nsec;
 	raw_time.tv_sec = 0;
 	raw_time.tv_nsec = 0;
 	set_normalized_timespec(&wall_to_monotonic,
 		-xtime.tv_sec, -xtime.tv_nsec);
 	update_xtime_cache(0);
-	total_sleep_time = 0;
+	total_sleep_time.tv_sec = 0;
+	total_sleep_time.tv_nsec = 0;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 }
 
 /* time in seconds when suspend began */
-static unsigned long timekeeping_suspend_time;
+static struct timespec timekeeping_suspend_time;
 
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
@@ -543,18 +547,19 @@ static unsigned long timekeeping_suspend_time;
 static int timekeeping_resume(struct sys_device *dev)
 {
 	unsigned long flags;
-	unsigned long now = read_persistent_clock();
+	struct timespec ts;
+
+	read_persistent_clock(&ts);
 
 	clocksource_resume();
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 
-	if (now && (now > timekeeping_suspend_time)) {
-		unsigned long sleep_length = now - timekeeping_suspend_time;
-
-		xtime.tv_sec += sleep_length;
-		wall_to_monotonic.tv_sec -= sleep_length;
-		total_sleep_time += sleep_length;
+	if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
+		ts = timespec_sub(ts, timekeeping_suspend_time);
+		xtime = timespec_add_safe(xtime, ts);
+		wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
+		total_sleep_time = timespec_add_safe(total_sleep_time, ts);
 	}
 	update_xtime_cache(0);
 	/* re-base the last cycle value */
@@ -577,7 +582,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
 {
 	unsigned long flags;
 
-	timekeeping_suspend_time = read_persistent_clock();
+	read_persistent_clock(&timekeeping_suspend_time);
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 	timekeeping_forward_now();
@@ -801,9 +806,10 @@ void update_wall_time(void)
  */
 void getboottime(struct timespec *ts)
 {
-	set_normalized_timespec(ts,
-		- (wall_to_monotonic.tv_sec + total_sleep_time),
-		- wall_to_monotonic.tv_nsec);
+	struct timespec boottime;
+
+	boottime = timespec_add_safe(wall_to_monotonic, total_sleep_time);
+	set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
 }
 
 /**
@@ -812,7 +818,7 @@ void getboottime(struct timespec *ts)
  */
 void monotonic_to_bootbased(struct timespec *ts)
 {
-	ts->tv_sec += total_sleep_time;
+	*ts = timespec_add_safe(*ts, total_sleep_time);
 }
 
 unsigned long get_seconds(void)
-- 
cgit v1.2.3


From 23970e389e9cee43c4b41023935e1417271708b2 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:32 +0200
Subject: timekeeping: Introduce read_boot_clock

Add the new function read_boot_clock to get the exact time the system
has been started. For architectures without support for exact boot
time a new weak function is added that returns 0.  Use the exact boot
time to initialize wall_to_monotonic, or xtime if the read_boot_clock
returned 0.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134811.296703241@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/s390/kernel/time.c   | 17 +++++------------
 include/linux/time.h      |  1 +
 kernel/time/timekeeping.c | 24 ++++++++++++++++++++++--
 3 files changed, 28 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index a94ec48587b4..6bff1a1d9060 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -187,6 +187,11 @@ void read_persistent_clock(struct timespec *ts)
 	tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts);
 }
 
+void read_boot_clock(struct timespec *ts)
+{
+	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
+}
+
 static cycle_t read_tod_clock(struct clocksource *cs)
 {
 	return get_clock();
@@ -243,9 +248,6 @@ void update_vsyscall_tz(void)
  */
 void __init time_init(void)
 {
-	struct timespec ts;
-	unsigned long flags;
-
 	/* Reset time synchronization interfaces. */
 	etr_reset();
 	stp_reset();
@@ -261,15 +263,6 @@ void __init time_init(void)
 	if (clocksource_register(&clocksource_tod) != 0)
 		panic("Could not register TOD clock source");
 
-	/*
-	 * Reset wall_to_monotonic to the initial timestamp created
-	 * in head.S to get a precise value in /proc/uptime.
-	 */
-	write_seqlock_irqsave(&xtime_lock, flags);
-	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts);
-	set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec);
-	write_sequnlock_irqrestore(&xtime_lock, flags);
-
 	/* Enable TOD clock interrupts on the boot cpu. */
 	init_cpu_timer();
 
diff --git a/include/linux/time.h b/include/linux/time.h
index 53a3216f0d1b..f505988398e6 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -102,6 +102,7 @@ extern struct timespec wall_to_monotonic;
 extern seqlock_t xtime_lock;
 
 extern void read_persistent_clock(struct timespec *ts);
+extern void read_boot_clock(struct timespec *ts);
 extern int update_persistent_clock(struct timespec now);
 extern int no_sync_cmos_clock __read_mostly;
 void timekeeping_init(void);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f1a21ce491e6..15e06defca55 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -501,6 +501,21 @@ void __attribute__((weak)) read_persistent_clock(struct timespec *ts)
 	ts->tv_nsec = 0;
 }
 
+/**
+ * read_boot_clock -  Return time of the system start.
+ *
+ * Weak dummy function for arches that do not yet support it.
+ * Function to read the exact time the system has been started.
+ * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
+ *
+ *  XXX - Do be sure to remove it once all arches implement it.
+ */
+void __attribute__((weak)) read_boot_clock(struct timespec *ts)
+{
+	ts->tv_sec = 0;
+	ts->tv_nsec = 0;
+}
+
 /*
  * timekeeping_init - Initializes the clocksource and common timekeeping values
  */
@@ -508,9 +523,10 @@ void __init timekeeping_init(void)
 {
 	struct clocksource *clock;
 	unsigned long flags;
-	struct timespec now;
+	struct timespec now, boot;
 
 	read_persistent_clock(&now);
+	read_boot_clock(&boot);
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 
@@ -525,8 +541,12 @@ void __init timekeeping_init(void)
 	xtime.tv_nsec = now.tv_nsec;
 	raw_time.tv_sec = 0;
 	raw_time.tv_nsec = 0;
+	if (boot.tv_sec == 0 && boot.tv_nsec == 0) {
+		boot.tv_sec = xtime.tv_sec;
+		boot.tv_nsec = xtime.tv_nsec;
+	}
 	set_normalized_timespec(&wall_to_monotonic,
-		-xtime.tv_sec, -xtime.tv_nsec);
+				-boot.tv_sec, -boot.tv_nsec);
 	update_xtime_cache(0);
 	total_sleep_time.tv_sec = 0;
 	total_sleep_time.tv_nsec = 0;
-- 
cgit v1.2.3


From 0ccff1a49def92d6b838a6da166c89004b3a4d0c Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Mon, 17 Aug 2009 22:38:04 -0400
Subject: jbd2: bitfields should be unsigned

This fixes sparse noise:
  error: dubious one-bit signed bitfield

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jan Kara <jack@ucw.cz>
---
 include/linux/jbd2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index d97eb652d6ca..52695d3dfd0b 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -652,7 +652,7 @@ struct transaction_s
 	 * This transaction is being forced and some process is
 	 * waiting for it to finish.
 	 */
-	int t_synchronous_commit:1;
+	unsigned int t_synchronous_commit:1;
 
 	/*
 	 * For use by the filesystem to store fs-specific data
-- 
cgit v1.2.3


From 776f3360de6ed246e973577828f725681120fd7a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Wed, 19 Aug 2009 15:56:37 +1000
Subject: drm: fixup includes in encoder slave header files.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_encoder_slave.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_encoder_slave.h b/include/drm/drm_encoder_slave.h
index 821ec40c17d8..e5e5c94ca92c 100644
--- a/include/drm/drm_encoder_slave.h
+++ b/include/drm/drm_encoder_slave.h
@@ -27,8 +27,8 @@
 #ifndef __DRM_ENCODER_SLAVE_H__
 #define __DRM_ENCODER_SLAVE_H__
 
-#include <drm/drmP.h>
-#include <drm/drm_crtc.h>
+#include "drmP.h"
+#include "drm_crtc.h"
 
 /**
  * struct drm_encoder_slave_funcs - Entry points exposed by a slave encoder driver
-- 
cgit v1.2.3


From 53bd83899f5ba6b0da8f5ef976129273854a72d4 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Wed, 1 Jul 2009 10:04:40 -0700
Subject: drm: clarify scaling property names

Now that we're using the scaling property in the Intel driver I noticed
that the names were a bit confusing.  I've corrected them according to
our discussion on IRC and the mailing list, though I've left out
potential new additions for a new scaling property with an integer (or
two) for the scaling factor.  None of the drivers implement that today,
but if someone wants to do it, I think it could be done with the
addition of a single new type and a new property to describe the
scaling factor in the X and Y directions.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Acked-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c        |  8 ++++----
 drivers/gpu/drm/i915/intel_lvds.c | 14 +++-----------
 include/drm/drm_mode.h            |  9 +++++----
 3 files changed, 12 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 362a538cdedc..39a6bc69d223 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -68,10 +68,10 @@ DRM_ENUM_NAME_FN(drm_get_dpms_name, drm_dpms_enum_list)
  */
 static struct drm_prop_enum_list drm_scaling_mode_enum_list[] =
 {
-	{ DRM_MODE_SCALE_NON_GPU, "Non-GPU" },
-	{ DRM_MODE_SCALE_FULLSCREEN, "Fullscreen" },
-	{ DRM_MODE_SCALE_NO_SCALE, "No scale" },
-	{ DRM_MODE_SCALE_ASPECT, "Aspect" },
+	{ DRM_MODE_SCALE_NONE, "None" },
+	{ DRM_MODE_SCALE_FULLSCREEN, "Full" },
+	{ DRM_MODE_SCALE_CENTER, "Center" },
+	{ DRM_MODE_SCALE_ASPECT, "Full aspect" },
 };
 
 static struct drm_prop_enum_list drm_dithering_mode_enum_list[] =
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index b59c65d19d81..5df486fbe056 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -38,14 +38,6 @@
 #include "i915_drv.h"
 #include <linux/acpi.h>
 
-/*
- * the following four scaling options are defined.
- * #define DRM_MODE_SCALE_NON_GPU	0
- * #define DRM_MODE_SCALE_FULLSCREEN	1
- * #define DRM_MODE_SCALE_NO_SCALE	2
- * #define DRM_MODE_SCALE_ASPECT	3
- */
-
 /* Private structure for the integrated LVDS support */
 struct intel_lvds_priv {
 	int fitting_mode;
@@ -334,7 +326,7 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder,
 	I915_WRITE(BCLRPAT_B, 0);
 
 	switch (lvds_priv->fitting_mode) {
-	case DRM_MODE_SCALE_NO_SCALE:
+	case DRM_MODE_SCALE_CENTER:
 		/*
 		 * For centered modes, we have to calculate border widths &
 		 * heights and modify the values programmed into the CRTC.
@@ -670,8 +662,8 @@ static int intel_lvds_set_property(struct drm_connector *connector,
 				connector->encoder) {
 		struct drm_crtc *crtc = connector->encoder->crtc;
 		struct intel_lvds_priv *lvds_priv = intel_output->dev_priv;
-		if (value == DRM_MODE_SCALE_NON_GPU) {
-			DRM_DEBUG_KMS("non_GPU property is unsupported\n");
+		if (value == DRM_MODE_SCALE_NONE) {
+			DRM_DEBUG_KMS("no scaling not supported\n");
 			return 0;
 		}
 		if (lvds_priv->fitting_mode == value) {
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index 616aeb42b773..1f908416aedb 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -68,10 +68,11 @@
 #define DRM_MODE_DPMS_OFF	3
 
 /* Scaling mode options */
-#define DRM_MODE_SCALE_NON_GPU		0
-#define DRM_MODE_SCALE_FULLSCREEN	1
-#define DRM_MODE_SCALE_NO_SCALE		2
-#define DRM_MODE_SCALE_ASPECT		3
+#define DRM_MODE_SCALE_NONE		0 /* Unmodified timing (display or
+					     software can still scale) */
+#define DRM_MODE_SCALE_FULLSCREEN	1 /* Full screen, ignore aspect */
+#define DRM_MODE_SCALE_CENTER		2 /* Centered, no scaling */
+#define DRM_MODE_SCALE_ASPECT		3 /* Full screen, preserve aspect */
 
 /* Dithering mode options */
 #define DRM_MODE_DITHERING_OFF	0
-- 
cgit v1.2.3


From 949ef70e2d1a5c12178875f513df34fc85d91a38 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Mon, 17 Aug 2009 19:49:19 +0300
Subject: drm/kms: no need to return void value (encoder)

Cc: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Pekka Paalanen <pq@iki.fi>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_encoder_slave.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_encoder_slave.h b/include/drm/drm_encoder_slave.h
index e5e5c94ca92c..2f65633d28a7 100644
--- a/include/drm/drm_encoder_slave.h
+++ b/include/drm/drm_encoder_slave.h
@@ -154,7 +154,7 @@ static inline int drm_i2c_encoder_register(struct module *owner,
  */
 static inline void drm_i2c_encoder_unregister(struct drm_i2c_encoder_driver *driver)
 {
-	return i2c_del_driver(&driver->i2c_driver);
+	i2c_del_driver(&driver->i2c_driver);
 }
 
 void drm_i2c_encoder_destroy(struct drm_encoder *encoder);
-- 
cgit v1.2.3


From a0724fcf829e5afb66159ef68cb16a805ea11b42 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Mon, 17 Aug 2009 01:18:38 +0300
Subject: drm/ttm: optimize bo_kmap_type values

A micro-optimization on the function ttm_kmap_obj_virtual().

By defining the values of enum ttm_bo_kmap_obj::bo_kmap_type to have a
bit indicating iomem, size of the function ttm_kmap_obj_virtual() will be
reduced by 16 bytes on x86_64 (gcc 4.1.2).

ttm_kmap_obj_virtual() may be heavily used, when buffer objects are
accessed via wrappers, that work for both kinds of memory addresses:
iomem cookies and kernel virtual.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/ttm/ttm_bo_api.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index cd22ab4b495c..99dc521aa1a9 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -245,14 +245,15 @@ struct ttm_buffer_object {
  * premapped region.
  */
 
+#define TTM_BO_MAP_IOMEM_MASK 0x80
 struct ttm_bo_kmap_obj {
 	void *virtual;
 	struct page *page;
 	enum {
-		ttm_bo_map_iomap,
-		ttm_bo_map_vmap,
-		ttm_bo_map_kmap,
-		ttm_bo_map_premapped,
+		ttm_bo_map_iomap        = 1 | TTM_BO_MAP_IOMEM_MASK,
+		ttm_bo_map_vmap         = 2,
+		ttm_bo_map_kmap         = 3,
+		ttm_bo_map_premapped    = 4 | TTM_BO_MAP_IOMEM_MASK,
 	} bo_kmap_type;
 };
 
@@ -522,8 +523,7 @@ extern int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type);
 static inline void *ttm_kmap_obj_virtual(struct ttm_bo_kmap_obj *map,
 					 bool *is_iomem)
 {
-	*is_iomem = (map->bo_kmap_type == ttm_bo_map_iomap ||
-		     map->bo_kmap_type == ttm_bo_map_premapped);
+	*is_iomem = !!(map->bo_kmap_type & TTM_BO_MAP_IOMEM_MASK);
 	return map->virtual;
 }
 
-- 
cgit v1.2.3


From 327c225bd548bf7871f116a0baa5ebdac884e452 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 17 Aug 2009 16:28:37 +0200
Subject: drm: Enable drm drivers to add drm sysfs devices.

Export utility functions for drivers to add specialized devices in the
sysfs drm class subdirectory.

Initially this will be needed form TTM to add a virtual device that
handles power management.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/drm_sysfs.c | 25 +++++++++++++++++++++++++
 include/drm/drm_sysfs.h     | 12 ++++++++++++
 2 files changed, 37 insertions(+)
 create mode 100644 include/drm/drm_sysfs.h

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index adc179459c25..de154556c405 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -16,6 +16,7 @@
 #include <linux/kdev_t.h>
 #include <linux/err.h>
 
+#include "drm_sysfs.h"
 #include "drm_core.h"
 #include "drmP.h"
 
@@ -515,3 +516,27 @@ void drm_sysfs_device_remove(struct drm_minor *minor)
 {
 	device_unregister(&minor->kdev);
 }
+
+
+/**
+ * drm_class_device_register - Register a struct device in the drm class.
+ *
+ * @dev: pointer to struct device to register.
+ *
+ * @dev should have all relevant members pre-filled with the exception
+ * of the class member. In particular, the device_type member must
+ * be set.
+ */
+
+int drm_class_device_register(struct device *dev)
+{
+	dev->class = drm_class;
+	return device_register(dev);
+}
+EXPORT_SYMBOL_GPL(drm_class_device_register);
+
+void drm_class_device_unregister(struct device *dev)
+{
+	return device_unregister(dev);
+}
+EXPORT_SYMBOL_GPL(drm_class_device_unregister);
diff --git a/include/drm/drm_sysfs.h b/include/drm/drm_sysfs.h
new file mode 100644
index 000000000000..1d8e033fde67
--- /dev/null
+++ b/include/drm/drm_sysfs.h
@@ -0,0 +1,12 @@
+#ifndef _DRM_SYSFS_H_
+#define _DRM_SYSFS_H_
+
+/**
+ * This minimalistic include file is intended for users (read TTM) that
+ * don't want to include the full drmP.h file.
+ */
+
+extern int drm_class_device_register(struct device *dev);
+extern void drm_class_device_unregister(struct device *dev);
+
+#endif
-- 
cgit v1.2.3


From 5fd9cbad3a4ae82c83c55b9c621d156c326724ef Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 17 Aug 2009 16:28:39 +0200
Subject: drm/ttm: Memory accounting rework.

Use inclusive zones to simplify accounting and its sysfs representation.
Use DMA32 accounting where applicable.

Add a sysfs interface to make the heuristically determined limits
readable and configurable.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/ttm/ttm_bo.c     |   6 +-
 drivers/gpu/drm/ttm/ttm_global.c |   4 +-
 drivers/gpu/drm/ttm/ttm_memory.c | 488 +++++++++++++++++++++++++++++++++------
 drivers/gpu/drm/ttm/ttm_tt.c     |  29 +--
 include/drm/ttm/ttm_memory.h     |  43 ++--
 include/drm/ttm/ttm_module.h     |   2 +
 6 files changed, 453 insertions(+), 119 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c1c407f7cca3..f16909ceec93 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -70,7 +70,7 @@ static void ttm_bo_release_list(struct kref *list_kref)
 	if (bo->destroy)
 		bo->destroy(bo);
 	else {
-		ttm_mem_global_free(bdev->mem_glob, bo->acc_size, false);
+		ttm_mem_global_free(bdev->mem_glob, bo->acc_size);
 		kfree(bo);
 	}
 }
@@ -1065,14 +1065,14 @@ int ttm_buffer_object_create(struct ttm_bo_device *bdev,
 
 	size_t acc_size =
 	    ttm_bo_size(bdev, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
-	ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false, false);
+	ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false);
 	if (unlikely(ret != 0))
 		return ret;
 
 	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
 
 	if (unlikely(bo == NULL)) {
-		ttm_mem_global_free(mem_glob, acc_size, false);
+		ttm_mem_global_free(mem_glob, acc_size);
 		return -ENOMEM;
 	}
 
diff --git a/drivers/gpu/drm/ttm/ttm_global.c b/drivers/gpu/drm/ttm/ttm_global.c
index 0b14eb1972b8..541744d00d3e 100644
--- a/drivers/gpu/drm/ttm/ttm_global.c
+++ b/drivers/gpu/drm/ttm/ttm_global.c
@@ -71,7 +71,7 @@ int ttm_global_item_ref(struct ttm_global_reference *ref)
 
 	mutex_lock(&item->mutex);
 	if (item->refcount == 0) {
-		item->object = kmalloc(ref->size, GFP_KERNEL);
+		item->object = kzalloc(ref->size, GFP_KERNEL);
 		if (unlikely(item->object == NULL)) {
 			ret = -ENOMEM;
 			goto out_err;
@@ -89,7 +89,6 @@ int ttm_global_item_ref(struct ttm_global_reference *ref)
 	mutex_unlock(&item->mutex);
 	return 0;
 out_err:
-	kfree(item->object);
 	mutex_unlock(&item->mutex);
 	item->object = NULL;
 	return ret;
@@ -105,7 +104,6 @@ void ttm_global_item_unref(struct ttm_global_reference *ref)
 	BUG_ON(ref->object != item->object);
 	if (--item->refcount == 0) {
 		ref->release(ref);
-		kfree(item->object);
 		item->object = NULL;
 	}
 	mutex_unlock(&item->mutex);
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index 87323d4ff68d..62fb5cf0899e 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -26,15 +26,180 @@
  **************************************************************************/
 
 #include "ttm/ttm_memory.h"
+#include "ttm/ttm_module.h"
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 
-#define TTM_PFX "[TTM] "
 #define TTM_MEMORY_ALLOC_RETRIES 4
 
+struct ttm_mem_zone {
+	struct kobject kobj;
+	struct ttm_mem_global *glob;
+	const char *name;
+	uint64_t zone_mem;
+	uint64_t emer_mem;
+	uint64_t max_mem;
+	uint64_t swap_limit;
+	uint64_t used_mem;
+};
+
+static struct attribute ttm_mem_sys = {
+	.name = "zone_memory",
+	.mode = S_IRUGO
+};
+static struct attribute ttm_mem_emer = {
+	.name = "emergency_memory",
+	.mode = S_IRUGO | S_IWUSR
+};
+static struct attribute ttm_mem_max = {
+	.name = "available_memory",
+	.mode = S_IRUGO | S_IWUSR
+};
+static struct attribute ttm_mem_swap = {
+	.name = "swap_limit",
+	.mode = S_IRUGO | S_IWUSR
+};
+static struct attribute ttm_mem_used = {
+	.name = "used_memory",
+	.mode = S_IRUGO
+};
+
+static void ttm_mem_zone_kobj_release(struct kobject *kobj)
+{
+	struct ttm_mem_zone *zone =
+		container_of(kobj, struct ttm_mem_zone, kobj);
+
+	printk(KERN_INFO TTM_PFX
+	       "Zone %7s: Used memory at exit: %llu kiB.\n",
+	       zone->name, (unsigned long long) zone->used_mem >> 10);
+	kfree(zone);
+}
+
+static ssize_t ttm_mem_zone_show(struct kobject *kobj,
+				 struct attribute *attr,
+				 char *buffer)
+{
+	struct ttm_mem_zone *zone =
+		container_of(kobj, struct ttm_mem_zone, kobj);
+	uint64_t val = 0;
+
+	spin_lock(&zone->glob->lock);
+	if (attr == &ttm_mem_sys)
+		val = zone->zone_mem;
+	else if (attr == &ttm_mem_emer)
+		val = zone->emer_mem;
+	else if (attr == &ttm_mem_max)
+		val = zone->max_mem;
+	else if (attr == &ttm_mem_swap)
+		val = zone->swap_limit;
+	else if (attr == &ttm_mem_used)
+		val = zone->used_mem;
+	spin_unlock(&zone->glob->lock);
+
+	return snprintf(buffer, PAGE_SIZE, "%llu\n",
+			(unsigned long long) val >> 10);
+}
+
+static void ttm_check_swapping(struct ttm_mem_global *glob);
+
+static ssize_t ttm_mem_zone_store(struct kobject *kobj,
+				  struct attribute *attr,
+				  const char *buffer,
+				  size_t size)
+{
+	struct ttm_mem_zone *zone =
+		container_of(kobj, struct ttm_mem_zone, kobj);
+	int chars;
+	unsigned long val;
+	uint64_t val64;
+
+	chars = sscanf(buffer, "%lu", &val);
+	if (chars == 0)
+		return size;
+
+	val64 = val;
+	val64 <<= 10;
+
+	spin_lock(&zone->glob->lock);
+	if (val64 > zone->zone_mem)
+		val64 = zone->zone_mem;
+	if (attr == &ttm_mem_emer) {
+		zone->emer_mem = val64;
+		if (zone->max_mem > val64)
+			zone->max_mem = val64;
+	} else if (attr == &ttm_mem_max) {
+		zone->max_mem = val64;
+		if (zone->emer_mem < val64)
+			zone->emer_mem = val64;
+	} else if (attr == &ttm_mem_swap)
+		zone->swap_limit = val64;
+	spin_unlock(&zone->glob->lock);
+
+	ttm_check_swapping(zone->glob);
+
+	return size;
+}
+
+static struct attribute *ttm_mem_zone_attrs[] = {
+	&ttm_mem_sys,
+	&ttm_mem_emer,
+	&ttm_mem_max,
+	&ttm_mem_swap,
+	&ttm_mem_used,
+	NULL
+};
+
+static struct sysfs_ops ttm_mem_zone_ops = {
+	.show = &ttm_mem_zone_show,
+	.store = &ttm_mem_zone_store
+};
+
+static struct kobj_type ttm_mem_zone_kobj_type = {
+	.release = &ttm_mem_zone_kobj_release,
+	.sysfs_ops = &ttm_mem_zone_ops,
+	.default_attrs = ttm_mem_zone_attrs,
+};
+
+static void ttm_mem_global_kobj_release(struct kobject *kobj)
+{
+	struct ttm_mem_global *glob =
+		container_of(kobj, struct ttm_mem_global, kobj);
+
+	kfree(glob);
+}
+
+static struct kobj_type ttm_mem_glob_kobj_type = {
+	.release = &ttm_mem_global_kobj_release,
+};
+
+static bool ttm_zones_above_swap_target(struct ttm_mem_global *glob,
+					bool from_wq, uint64_t extra)
+{
+	unsigned int i;
+	struct ttm_mem_zone *zone;
+	uint64_t target;
+
+	for (i = 0; i < glob->num_zones; ++i) {
+		zone = glob->zones[i];
+
+		if (from_wq)
+			target = zone->swap_limit;
+		else if (capable(CAP_SYS_ADMIN))
+			target = zone->emer_mem;
+		else
+			target = zone->max_mem;
+
+		target = (extra > target) ? 0ULL : target;
+
+		if (zone->used_mem > target)
+			return true;
+	}
+	return false;
+}
+
 /**
  * At this point we only support a single shrink callback.
  * Extend this if needed, perhaps using a linked list of callbacks.
@@ -42,34 +207,17 @@
  * many threads may try to swap out at any given time.
  */
 
-static void ttm_shrink(struct ttm_mem_global *glob, bool from_workqueue,
+static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
 		       uint64_t extra)
 {
 	int ret;
 	struct ttm_mem_shrink *shrink;
-	uint64_t target;
-	uint64_t total_target;
 
 	spin_lock(&glob->lock);
 	if (glob->shrink == NULL)
 		goto out;
 
-	if (from_workqueue) {
-		target = glob->swap_limit;
-		total_target = glob->total_memory_swap_limit;
-	} else if (capable(CAP_SYS_ADMIN)) {
-		total_target = glob->emer_total_memory;
-		target = glob->emer_memory;
-	} else {
-		total_target = glob->max_total_memory;
-		target = glob->max_memory;
-	}
-
-	total_target = (extra >= total_target) ? 0 : total_target - extra;
-	target = (extra >= target) ? 0 : target - extra;
-
-	while (glob->used_memory > target ||
-	       glob->used_total_memory > total_target) {
+	while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
 		shrink = glob->shrink;
 		spin_unlock(&glob->lock);
 		ret = shrink->do_shrink(shrink);
@@ -81,6 +229,8 @@ out:
 	spin_unlock(&glob->lock);
 }
 
+
+
 static void ttm_shrink_work(struct work_struct *work)
 {
 	struct ttm_mem_global *glob =
@@ -89,63 +239,178 @@ static void ttm_shrink_work(struct work_struct *work)
 	ttm_shrink(glob, true, 0ULL);
 }
 
+static int ttm_mem_init_kernel_zone(struct ttm_mem_global *glob,
+				    const struct sysinfo *si)
+{
+	struct ttm_mem_zone *zone = kzalloc(sizeof(*zone), GFP_KERNEL);
+	uint64_t mem;
+
+	if (unlikely(!zone))
+		return -ENOMEM;
+
+	mem = si->totalram - si->totalhigh;
+	mem *= si->mem_unit;
+
+	zone->name = "kernel";
+	zone->zone_mem = mem;
+	zone->max_mem = mem >> 1;
+	zone->emer_mem = (mem >> 1) + (mem >> 2);
+	zone->swap_limit = zone->max_mem - (mem >> 3);
+	zone->used_mem = 0;
+	zone->glob = glob;
+	glob->zone_kernel = zone;
+	glob->zones[glob->num_zones++] = zone;
+	kobject_init(&zone->kobj, &ttm_mem_zone_kobj_type);
+	return kobject_add(&zone->kobj, &glob->kobj, zone->name);
+}
+
+#ifdef CONFIG_HIGHMEM
+static int ttm_mem_init_highmem_zone(struct ttm_mem_global *glob,
+				     const struct sysinfo *si)
+{
+	struct ttm_mem_zone *zone = kzalloc(sizeof(*zone), GFP_KERNEL);
+	uint64_t mem;
+
+	if (unlikely(!zone))
+		return -ENOMEM;
+
+	if (si->totalhigh == 0)
+		return 0;
+
+	mem = si->totalram;
+	mem *= si->mem_unit;
+
+	zone->name = "highmem";
+	zone->zone_mem = mem;
+	zone->max_mem = mem >> 1;
+	zone->emer_mem = (mem >> 1) + (mem >> 2);
+	zone->swap_limit = zone->max_mem - (mem >> 3);
+	zone->used_mem = 0;
+	zone->glob = glob;
+	glob->zone_highmem = zone;
+	glob->zones[glob->num_zones++] = zone;
+	kobject_init(&zone->kobj, &ttm_mem_zone_kobj_type);
+	return kobject_add(&zone->kobj, &glob->kobj, zone->name);
+}
+#else
+static int ttm_mem_init_dma32_zone(struct ttm_mem_global *glob,
+				   const struct sysinfo *si)
+{
+	struct ttm_mem_zone *zone = kzalloc(sizeof(*zone), GFP_KERNEL);
+	uint64_t mem;
+
+	if (unlikely(!zone))
+		return -ENOMEM;
+
+	mem = si->totalram;
+	mem *= si->mem_unit;
+
+	/**
+	 * No special dma32 zone needed.
+	 */
+
+	if (mem <= ((uint64_t) 1ULL << 32))
+		return 0;
+
+	/*
+	 * Limit max dma32 memory to 4GB for now
+	 * until we can figure out how big this
+	 * zone really is.
+	 */
+
+	mem = ((uint64_t) 1ULL << 32);
+	zone->name = "dma32";
+	zone->zone_mem = mem;
+	zone->max_mem = mem >> 1;
+	zone->emer_mem = (mem >> 1) + (mem >> 2);
+	zone->swap_limit = zone->max_mem - (mem >> 3);
+	zone->used_mem = 0;
+	zone->glob = glob;
+	glob->zone_dma32 = zone;
+	glob->zones[glob->num_zones++] = zone;
+	kobject_init(&zone->kobj, &ttm_mem_zone_kobj_type);
+	return kobject_add(&zone->kobj, &glob->kobj, zone->name);
+}
+#endif
+
 int ttm_mem_global_init(struct ttm_mem_global *glob)
 {
 	struct sysinfo si;
-	uint64_t mem;
+	int ret;
+	int i;
+	struct ttm_mem_zone *zone;
 
 	spin_lock_init(&glob->lock);
 	glob->swap_queue = create_singlethread_workqueue("ttm_swap");
 	INIT_WORK(&glob->work, ttm_shrink_work);
 	init_waitqueue_head(&glob->queue);
+	kobject_init(&glob->kobj, &ttm_mem_glob_kobj_type);
+	ret = kobject_add(&glob->kobj,
+			  ttm_get_kobj(),
+			  "memory_accounting");
+	if (unlikely(ret != 0))
+		goto out_no_zone;
 
 	si_meminfo(&si);
 
-	mem = si.totalram - si.totalhigh;
-	mem *= si.mem_unit;
-
-	glob->max_memory = mem >> 1;
-	glob->emer_memory = (mem >> 1) + (mem >> 2);
-	glob->swap_limit = glob->max_memory - (mem >> 3);
-	glob->used_memory = 0;
-	glob->used_total_memory = 0;
-	glob->shrink = NULL;
-
-	mem = si.totalram;
-	mem *= si.mem_unit;
-
-	glob->max_total_memory = mem >> 1;
-	glob->emer_total_memory = (mem >> 1) + (mem >> 2);
-
-	glob->total_memory_swap_limit = glob->max_total_memory - (mem >> 3);
-
-	printk(KERN_INFO TTM_PFX "TTM available graphics memory: %llu MiB\n",
-	       glob->max_total_memory >> 20);
-	printk(KERN_INFO TTM_PFX "TTM available object memory: %llu MiB\n",
-	       glob->max_memory >> 20);
-
+	ret = ttm_mem_init_kernel_zone(glob, &si);
+	if (unlikely(ret != 0))
+		goto out_no_zone;
+#ifdef CONFIG_HIGHMEM
+	ret = ttm_mem_init_highmem_zone(glob, &si);
+	if (unlikely(ret != 0))
+		goto out_no_zone;
+#else
+	ret = ttm_mem_init_dma32_zone(glob, &si);
+	if (unlikely(ret != 0))
+		goto out_no_zone;
+#endif
+	for (i = 0; i < glob->num_zones; ++i) {
+		zone = glob->zones[i];
+		printk(KERN_INFO TTM_PFX
+		       "Zone %7s: Available graphics memory: %llu kiB.\n",
+		       zone->name, (unsigned long long) zone->max_mem >> 10);
+	}
 	return 0;
+out_no_zone:
+	ttm_mem_global_release(glob);
+	return ret;
 }
 EXPORT_SYMBOL(ttm_mem_global_init);
 
 void ttm_mem_global_release(struct ttm_mem_global *glob)
 {
-	printk(KERN_INFO TTM_PFX "Used total memory is %llu bytes.\n",
-	       (unsigned long long)glob->used_total_memory);
+	unsigned int i;
+	struct ttm_mem_zone *zone;
+
 	flush_workqueue(glob->swap_queue);
 	destroy_workqueue(glob->swap_queue);
 	glob->swap_queue = NULL;
+	for (i = 0; i < glob->num_zones; ++i) {
+		zone = glob->zones[i];
+		kobject_del(&zone->kobj);
+		kobject_put(&zone->kobj);
+	}
+	kobject_del(&glob->kobj);
+	kobject_put(&glob->kobj);
 }
 EXPORT_SYMBOL(ttm_mem_global_release);
 
-static inline void ttm_check_swapping(struct ttm_mem_global *glob)
+static void ttm_check_swapping(struct ttm_mem_global *glob)
 {
-	bool needs_swapping;
+	bool needs_swapping = false;
+	unsigned int i;
+	struct ttm_mem_zone *zone;
 
 	spin_lock(&glob->lock);
-	needs_swapping = (glob->used_memory > glob->swap_limit ||
-			  glob->used_total_memory >
-			  glob->total_memory_swap_limit);
+	for (i = 0; i < glob->num_zones; ++i) {
+		zone = glob->zones[i];
+		if (zone->used_mem > zone->swap_limit) {
+			needs_swapping = true;
+			break;
+		}
+	}
+
 	spin_unlock(&glob->lock);
 
 	if (unlikely(needs_swapping))
@@ -153,44 +418,60 @@ static inline void ttm_check_swapping(struct ttm_mem_global *glob)
 
 }
 
-void ttm_mem_global_free(struct ttm_mem_global *glob,
-			 uint64_t amount, bool himem)
+static void ttm_mem_global_free_zone(struct ttm_mem_global *glob,
+				     struct ttm_mem_zone *single_zone,
+				     uint64_t amount)
 {
+	unsigned int i;
+	struct ttm_mem_zone *zone;
+
 	spin_lock(&glob->lock);
-	glob->used_total_memory -= amount;
-	if (!himem)
-		glob->used_memory -= amount;
-	wake_up_all(&glob->queue);
+	for (i = 0; i < glob->num_zones; ++i) {
+		zone = glob->zones[i];
+		if (single_zone && zone != single_zone)
+			continue;
+		zone->used_mem -= amount;
+	}
 	spin_unlock(&glob->lock);
 }
 
+void ttm_mem_global_free(struct ttm_mem_global *glob,
+			 uint64_t amount)
+{
+	return ttm_mem_global_free_zone(glob, NULL, amount);
+}
+
 static int ttm_mem_global_reserve(struct ttm_mem_global *glob,
-				  uint64_t amount, bool himem, bool reserve)
+				  struct ttm_mem_zone *single_zone,
+				  uint64_t amount, bool reserve)
 {
 	uint64_t limit;
-	uint64_t lomem_limit;
 	int ret = -ENOMEM;
+	unsigned int i;
+	struct ttm_mem_zone *zone;
 
 	spin_lock(&glob->lock);
+	for (i = 0; i < glob->num_zones; ++i) {
+		zone = glob->zones[i];
+		if (single_zone && zone != single_zone)
+			continue;
 
-	if (capable(CAP_SYS_ADMIN)) {
-		limit = glob->emer_total_memory;
-		lomem_limit = glob->emer_memory;
-	} else {
-		limit = glob->max_total_memory;
-		lomem_limit = glob->max_memory;
-	}
+		limit = (capable(CAP_SYS_ADMIN)) ?
+			zone->emer_mem : zone->max_mem;
 
-	if (unlikely(glob->used_total_memory + amount > limit))
-		goto out_unlock;
-	if (unlikely(!himem && glob->used_memory + amount > lomem_limit))
-		goto out_unlock;
+		if (zone->used_mem > limit)
+			goto out_unlock;
+	}
 
 	if (reserve) {
-		glob->used_total_memory += amount;
-		if (!himem)
-			glob->used_memory += amount;
+		for (i = 0; i < glob->num_zones; ++i) {
+			zone = glob->zones[i];
+			if (single_zone && zone != single_zone)
+				continue;
+			zone->used_mem += amount;
+		}
 	}
+
 	ret = 0;
 out_unlock:
 	spin_unlock(&glob->lock);
@@ -199,12 +480,17 @@ out_unlock:
 	return ret;
 }
 
-int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
-			 bool no_wait, bool interruptible, bool himem)
+
+static int ttm_mem_global_alloc_zone(struct ttm_mem_global *glob,
+				     struct ttm_mem_zone *single_zone,
+				     uint64_t memory,
+				     bool no_wait, bool interruptible)
 {
 	int count = TTM_MEMORY_ALLOC_RETRIES;
 
-	while (unlikely(ttm_mem_global_reserve(glob, memory, himem, true)
+	while (unlikely(ttm_mem_global_reserve(glob,
+					       single_zone,
+					       memory, true)
 			!= 0)) {
 		if (no_wait)
 			return -ENOMEM;
@@ -216,6 +502,56 @@ int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
 	return 0;
 }
 
+int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
+			 bool no_wait, bool interruptible)
+{
+	/**
+	 * Normal allocations of kernel memory are registered in
+	 * all zones.
+	 */
+
+	return ttm_mem_global_alloc_zone(glob, NULL, memory, no_wait,
+					 interruptible);
+}
+
+int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
+			      struct page *page,
+			      bool no_wait, bool interruptible)
+{
+
+	struct ttm_mem_zone *zone = NULL;
+
+	/**
+	 * Page allocations may be registed in a single zone
+	 * only if highmem or !dma32.
+	 */
+
+#ifdef CONFIG_HIGHMEM
+	if (PageHighMem(page) && glob->zone_highmem != NULL)
+		zone = glob->zone_highmem;
+#else
+	if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL)
+		zone = glob->zone_kernel;
+#endif
+	return ttm_mem_global_alloc_zone(glob, zone, PAGE_SIZE, no_wait,
+					 interruptible);
+}
+
+void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page)
+{
+	struct ttm_mem_zone *zone = NULL;
+
+#ifdef CONFIG_HIGHMEM
+	if (PageHighMem(page) && glob->zone_highmem != NULL)
+		zone = glob->zone_highmem;
+#else
+	if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL)
+		zone = glob->zone_kernel;
+#endif
+	ttm_mem_global_free_zone(glob, zone, PAGE_SIZE);
+}
+
+
 size_t ttm_round_pot(size_t size)
 {
 	if ((size & (size - 1)) == 0)
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 75dc8bd24592..4e1e2566d519 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -166,7 +166,7 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
 			set_page_dirty_lock(page);
 
 		ttm->pages[i] = NULL;
-		ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE, false);
+		ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE);
 		put_page(page);
 	}
 	ttm->state = tt_unpopulated;
@@ -187,21 +187,14 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
 		if (!p)
 			return NULL;
 
-		if (PageHighMem(p)) {
-			ret =
-			    ttm_mem_global_alloc(mem_glob, PAGE_SIZE,
-						 false, false, true);
-			if (unlikely(ret != 0))
-				goto out_err;
+		ret = ttm_mem_global_alloc_page(mem_glob, p, false, false);
+		if (unlikely(ret != 0))
+			goto out_err;
+
+		if (PageHighMem(p))
 			ttm->pages[--ttm->first_himem_page] = p;
-		} else {
-			ret =
-			    ttm_mem_global_alloc(mem_glob, PAGE_SIZE,
-						 false, false, false);
-			if (unlikely(ret != 0))
-				goto out_err;
+		else
 			ttm->pages[++ttm->last_lomem_page] = p;
-		}
 	}
 	return p;
 out_err:
@@ -355,8 +348,8 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
 				printk(KERN_ERR TTM_PFX
 				       "Erroneous page count. "
 				       "Leaking pages.\n");
-			ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE,
-					    PageHighMem(cur_page));
+			ttm_mem_global_free_page(ttm->bdev->mem_glob,
+						 cur_page);
 			__free_page(cur_page);
 		}
 	}
@@ -411,7 +404,7 @@ int ttm_tt_set_user(struct ttm_tt *ttm,
 	 */
 
 	ret = ttm_mem_global_alloc(mem_glob, num_pages * PAGE_SIZE,
-				   false, false, false);
+				   false, false);
 	if (unlikely(ret != 0))
 		return ret;
 
@@ -422,7 +415,7 @@ int ttm_tt_set_user(struct ttm_tt *ttm,
 
 	if (ret != num_pages && write) {
 		ttm_tt_free_user_pages(ttm);
-		ttm_mem_global_free(mem_glob, num_pages * PAGE_SIZE, false);
+		ttm_mem_global_free(mem_glob, num_pages * PAGE_SIZE);
 		return -ENOMEM;
 	}
 
diff --git a/include/drm/ttm/ttm_memory.h b/include/drm/ttm/ttm_memory.h
index d8b8f042c4f1..6983a7cf4da4 100644
--- a/include/drm/ttm/ttm_memory.h
+++ b/include/drm/ttm/ttm_memory.h
@@ -32,6 +32,7 @@
 #include <linux/spinlock.h>
 #include <linux/wait.h>
 #include <linux/errno.h>
+#include <linux/kobject.h>
 
 /**
  * struct ttm_mem_shrink - callback to shrink TTM memory usage.
@@ -60,34 +61,33 @@ struct ttm_mem_shrink {
  * @queue: Wait queue for processes suspended waiting for memory.
  * @lock: Lock to protect the @shrink - and the memory accounting members,
  * that is, essentially the whole structure with some exceptions.
- * @emer_memory: Lowmem memory limit available for root.
- * @max_memory: Lowmem memory limit available for non-root.
- * @swap_limit: Lowmem memory limit where the shrink workqueue kicks in.
- * @used_memory: Currently used lowmem memory.
- * @used_total_memory: Currently used total (lowmem + highmem) memory.
- * @total_memory_swap_limit: Total memory limit where the shrink workqueue
- * kicks in.
- * @max_total_memory: Total memory available to non-root processes.
- * @emer_total_memory: Total memory available to root processes.
+ * @zones: Array of pointers to accounting zones.
+ * @num_zones: Number of populated entries in the @zones array.
+ * @zone_kernel: Pointer to the kernel zone.
+ * @zone_highmem: Pointer to the highmem zone if there is one.
+ * @zone_dma32: Pointer to the dma32 zone if there is one.
  *
  * Note that this structure is not per device. It should be global for all
  * graphics devices.
  */
 
+#define TTM_MEM_MAX_ZONES 2
+struct ttm_mem_zone;
 struct ttm_mem_global {
+	struct kobject kobj;
 	struct ttm_mem_shrink *shrink;
 	struct workqueue_struct *swap_queue;
 	struct work_struct work;
 	wait_queue_head_t queue;
 	spinlock_t lock;
-	uint64_t emer_memory;
-	uint64_t max_memory;
-	uint64_t swap_limit;
-	uint64_t used_memory;
-	uint64_t used_total_memory;
-	uint64_t total_memory_swap_limit;
-	uint64_t max_total_memory;
-	uint64_t emer_total_memory;
+	struct ttm_mem_zone *zones[TTM_MEM_MAX_ZONES];
+	unsigned int num_zones;
+	struct ttm_mem_zone *zone_kernel;
+#ifdef CONFIG_HIGHMEM
+	struct ttm_mem_zone *zone_highmem;
+#else
+	struct ttm_mem_zone *zone_dma32;
+#endif
 };
 
 /**
@@ -146,8 +146,13 @@ static inline void ttm_mem_unregister_shrink(struct ttm_mem_global *glob,
 extern int ttm_mem_global_init(struct ttm_mem_global *glob);
 extern void ttm_mem_global_release(struct ttm_mem_global *glob);
 extern int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
-				bool no_wait, bool interruptible, bool himem);
+				bool no_wait, bool interruptible);
 extern void ttm_mem_global_free(struct ttm_mem_global *glob,
-				uint64_t amount, bool himem);
+				uint64_t amount);
+extern int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
+				     struct page *page,
+				     bool no_wait, bool interruptible);
+extern void ttm_mem_global_free_page(struct ttm_mem_global *glob,
+				     struct page *page);
 extern size_t ttm_round_pot(size_t size);
 #endif
diff --git a/include/drm/ttm/ttm_module.h b/include/drm/ttm/ttm_module.h
index 889a4c7958ae..0a72ac7c7e58 100644
--- a/include/drm/ttm/ttm_module.h
+++ b/include/drm/ttm/ttm_module.h
@@ -32,6 +32,7 @@
 #define _TTM_MODULE_H_
 
 #include <linux/kernel.h>
+struct kobject;
 
 #define TTM_PFX "[TTM]"
 
@@ -54,5 +55,6 @@ extern void ttm_global_init(void);
 extern void ttm_global_release(void);
 extern int ttm_global_item_ref(struct ttm_global_reference *ref);
 extern void ttm_global_item_unref(struct ttm_global_reference *ref);
+extern struct kobject *ttm_get_kobj(void);
 
 #endif /* _TTM_MODULE_H_ */
-- 
cgit v1.2.3


From a987fcaa805fcb24ba885c2e29fd4fdb6816f08f Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 18 Aug 2009 16:51:56 +0200
Subject: ttm: Make parts of a struct ttm_bo_device global.

Common resources, like memory accounting and swap lists should be
global and not per device. Introduce a struct ttm_bo_global to
accomodate this, and register it with sysfs. Add a small sysfs interface
to return the number of active buffer objects.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/radeon/radeon_object.h |   1 +
 drivers/gpu/drm/radeon/radeon_ttm.c    |  33 +++-
 drivers/gpu/drm/ttm/ttm_bo.c           | 292 ++++++++++++++++++++++-----------
 drivers/gpu/drm/ttm/ttm_bo_util.c      |   4 +-
 drivers/gpu/drm/ttm/ttm_tt.c           |  12 +-
 include/drm/ttm/ttm_bo_api.h           |   1 +
 include/drm/ttm/ttm_bo_driver.h        |  94 ++++++++---
 7 files changed, 296 insertions(+), 141 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 473e4775dc5a..10e8af6bb456 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -37,6 +37,7 @@
  * TTM.
  */
 struct radeon_mman {
+	struct ttm_bo_global_ref        bo_global_ref;
 	struct ttm_global_reference	mem_global_ref;
 	bool				mem_global_referenced;
 	struct ttm_bo_device		bdev;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 1227a97f5169..343b6d6b99c6 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -77,9 +77,25 @@ static int radeon_ttm_global_init(struct radeon_device *rdev)
 	global_ref->release = &radeon_ttm_mem_global_release;
 	r = ttm_global_item_ref(global_ref);
 	if (r != 0) {
-		DRM_ERROR("Failed referencing a global TTM memory object.\n");
+		DRM_ERROR("Failed setting up TTM memory accounting "
+			  "subsystem.\n");
 		return r;
 	}
+
+	rdev->mman.bo_global_ref.mem_glob =
+		rdev->mman.mem_global_ref.object;
+	global_ref = &rdev->mman.bo_global_ref.ref;
+	global_ref->global_type = TTM_GLOBAL_TTM_BO;
+	global_ref->size = sizeof(struct ttm_mem_global);
+	global_ref->init = &ttm_bo_global_init;
+	global_ref->release = &ttm_bo_global_release;
+	r = ttm_global_item_ref(global_ref);
+	if (r != 0) {
+		DRM_ERROR("Failed setting up TTM BO subsystem.\n");
+		ttm_global_item_unref(&rdev->mman.mem_global_ref);
+		return r;
+	}
+
 	rdev->mman.mem_global_referenced = true;
 	return 0;
 }
@@ -87,6 +103,7 @@ static int radeon_ttm_global_init(struct radeon_device *rdev)
 static void radeon_ttm_global_fini(struct radeon_device *rdev)
 {
 	if (rdev->mman.mem_global_referenced) {
+		ttm_global_item_unref(&rdev->mman.bo_global_ref.ref);
 		ttm_global_item_unref(&rdev->mman.mem_global_ref);
 		rdev->mman.mem_global_referenced = false;
 	}
@@ -286,9 +303,11 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
 	r = ttm_bo_move_ttm(bo, true, no_wait, new_mem);
 out_cleanup:
 	if (tmp_mem.mm_node) {
-		spin_lock(&rdev->mman.bdev.lru_lock);
+		struct ttm_bo_global *glob = rdev->mman.bdev.glob;
+
+		spin_lock(&glob->lru_lock);
 		drm_mm_put_block(tmp_mem.mm_node);
-		spin_unlock(&rdev->mman.bdev.lru_lock);
+		spin_unlock(&glob->lru_lock);
 		return r;
 	}
 	return r;
@@ -323,9 +342,11 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
 	}
 out_cleanup:
 	if (tmp_mem.mm_node) {
-		spin_lock(&rdev->mman.bdev.lru_lock);
+		struct ttm_bo_global *glob = rdev->mman.bdev.glob;
+
+		spin_lock(&glob->lru_lock);
 		drm_mm_put_block(tmp_mem.mm_node);
-		spin_unlock(&rdev->mman.bdev.lru_lock);
+		spin_unlock(&glob->lru_lock);
 		return r;
 	}
 	return r;
@@ -441,7 +462,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	}
 	/* No others user of address space so set it to 0 */
 	r = ttm_bo_device_init(&rdev->mman.bdev,
-			       rdev->mman.mem_global_ref.object,
+			       rdev->mman.bo_global_ref.ref.object,
 			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f16909ceec93..0d0b1b7afbcf 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -45,6 +45,39 @@
 static int ttm_bo_setup_vm(struct ttm_buffer_object *bo);
 static void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo);
 static int ttm_bo_swapout(struct ttm_mem_shrink *shrink);
+static void ttm_bo_global_kobj_release(struct kobject *kobj);
+
+static struct attribute ttm_bo_count = {
+	.name = "bo_count",
+	.mode = S_IRUGO
+};
+
+static ssize_t ttm_bo_global_show(struct kobject *kobj,
+				  struct attribute *attr,
+				  char *buffer)
+{
+	struct ttm_bo_global *glob =
+		container_of(kobj, struct ttm_bo_global, kobj);
+
+	return snprintf(buffer, PAGE_SIZE, "%lu\n",
+			(unsigned long) atomic_read(&glob->bo_count));
+}
+
+static struct attribute *ttm_bo_global_attrs[] = {
+	&ttm_bo_count,
+	NULL
+};
+
+static struct sysfs_ops ttm_bo_global_ops = {
+	.show = &ttm_bo_global_show
+};
+
+static struct kobj_type ttm_bo_glob_kobj_type  = {
+	.release = &ttm_bo_global_kobj_release,
+	.sysfs_ops = &ttm_bo_global_ops,
+	.default_attrs = ttm_bo_global_attrs
+};
+
 
 static inline uint32_t ttm_bo_type_flags(unsigned type)
 {
@@ -67,10 +100,11 @@ static void ttm_bo_release_list(struct kref *list_kref)
 
 	if (bo->ttm)
 		ttm_tt_destroy(bo->ttm);
+	atomic_dec(&bo->glob->bo_count);
 	if (bo->destroy)
 		bo->destroy(bo);
 	else {
-		ttm_mem_global_free(bdev->mem_glob, bo->acc_size);
+		ttm_mem_global_free(bdev->glob->mem_glob, bo->acc_size);
 		kfree(bo);
 	}
 }
@@ -107,7 +141,7 @@ static void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
 		kref_get(&bo->list_kref);
 
 		if (bo->ttm != NULL) {
-			list_add_tail(&bo->swap, &bdev->swap_lru);
+			list_add_tail(&bo->swap, &bo->glob->swap_lru);
 			kref_get(&bo->list_kref);
 		}
 	}
@@ -142,7 +176,7 @@ int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
 			  bool interruptible,
 			  bool no_wait, bool use_sequence, uint32_t sequence)
 {
-	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	int ret;
 
 	while (unlikely(atomic_cmpxchg(&bo->reserved, 0, 1) != 0)) {
@@ -154,9 +188,9 @@ int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
 		if (no_wait)
 			return -EBUSY;
 
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		ret = ttm_bo_wait_unreserved(bo, interruptible);
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 
 		if (unlikely(ret))
 			return ret;
@@ -182,16 +216,16 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo,
 		   bool interruptible,
 		   bool no_wait, bool use_sequence, uint32_t sequence)
 {
-	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	int put_count = 0;
 	int ret;
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	ret = ttm_bo_reserve_locked(bo, interruptible, no_wait, use_sequence,
 				    sequence);
 	if (likely(ret == 0))
 		put_count = ttm_bo_del_from_lru(bo);
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 
 	while (put_count--)
 		kref_put(&bo->list_kref, ttm_bo_ref_bug);
@@ -201,13 +235,13 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo,
 
 void ttm_bo_unreserve(struct ttm_buffer_object *bo)
 {
-	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	ttm_bo_add_to_lru(bo);
 	atomic_set(&bo->reserved, 0);
 	wake_up_all(&bo->event_queue);
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 }
 EXPORT_SYMBOL(ttm_bo_unreserve);
 
@@ -218,6 +252,7 @@ EXPORT_SYMBOL(ttm_bo_unreserve);
 static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	int ret = 0;
 	uint32_t page_flags = 0;
 
@@ -230,14 +265,14 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 			page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC;
 	case ttm_bo_type_kernel:
 		bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT,
-					page_flags, bdev->dummy_read_page);
+					page_flags, glob->dummy_read_page);
 		if (unlikely(bo->ttm == NULL))
 			ret = -ENOMEM;
 		break;
 	case ttm_bo_type_user:
 		bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT,
 					page_flags | TTM_PAGE_FLAG_USER,
-					bdev->dummy_read_page);
+					glob->dummy_read_page);
 		if (unlikely(bo->ttm == NULL))
 			ret = -ENOMEM;
 		break;
@@ -355,6 +390,7 @@ out_err:
 static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	struct ttm_bo_driver *driver = bdev->driver;
 	int ret;
 
@@ -366,7 +402,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 
 		spin_unlock(&bo->lock);
 
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 		ret = ttm_bo_reserve_locked(bo, false, false, false, 0);
 		BUG_ON(ret);
 		if (bo->ttm)
@@ -381,7 +417,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 			bo->mem.mm_node = NULL;
 		}
 		put_count = ttm_bo_del_from_lru(bo);
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 
 		atomic_set(&bo->reserved, 0);
 
@@ -391,14 +427,14 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 		return 0;
 	}
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	if (list_empty(&bo->ddestroy)) {
 		void *sync_obj = bo->sync_obj;
 		void *sync_obj_arg = bo->sync_obj_arg;
 
 		kref_get(&bo->list_kref);
 		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		spin_unlock(&bo->lock);
 
 		if (sync_obj)
@@ -408,7 +444,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 		ret = 0;
 
 	} else {
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		spin_unlock(&bo->lock);
 		ret = -EBUSY;
 	}
@@ -423,11 +459,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 
 static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
 {
+	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_buffer_object *entry, *nentry;
 	struct list_head *list, *next;
 	int ret;
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	list_for_each_safe(list, next, &bdev->ddestroy) {
 		entry = list_entry(list, struct ttm_buffer_object, ddestroy);
 		nentry = NULL;
@@ -444,16 +481,16 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
 		}
 		kref_get(&entry->list_kref);
 
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		ret = ttm_bo_cleanup_refs(entry, remove_all);
 		kref_put(&entry->list_kref, ttm_bo_release_list);
 
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 		if (nentry) {
 			bool next_onlist = !list_empty(next);
-			spin_unlock(&bdev->lru_lock);
+			spin_unlock(&glob->lru_lock);
 			kref_put(&nentry->list_kref, ttm_bo_release_list);
-			spin_lock(&bdev->lru_lock);
+			spin_lock(&glob->lru_lock);
 			/*
 			 * Someone might have raced us and removed the
 			 * next entry from the list. We don't bother restarting
@@ -467,7 +504,7 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
 			break;
 	}
 	ret = !list_empty(&bdev->ddestroy);
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 
 	return ret;
 }
@@ -517,6 +554,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, unsigned mem_type,
 {
 	int ret = 0;
 	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	struct ttm_mem_reg evict_mem;
 	uint32_t proposed_placement;
 
@@ -565,12 +603,12 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, unsigned mem_type,
 		goto out;
 	}
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	if (evict_mem.mm_node) {
 		drm_mm_put_block(evict_mem.mm_node);
 		evict_mem.mm_node = NULL;
 	}
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 	bo->evicted = true;
 out:
 	return ret;
@@ -585,6 +623,7 @@ static int ttm_bo_mem_force_space(struct ttm_bo_device *bdev,
 				  uint32_t mem_type,
 				  bool interruptible, bool no_wait)
 {
+	struct ttm_bo_global *glob = bdev->glob;
 	struct drm_mm_node *node;
 	struct ttm_buffer_object *entry;
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
@@ -598,7 +637,7 @@ retry_pre_get:
 	if (unlikely(ret != 0))
 		return ret;
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	do {
 		node = drm_mm_search_free(&man->manager, num_pages,
 					  mem->page_alignment, 1);
@@ -619,7 +658,7 @@ retry_pre_get:
 		if (likely(ret == 0))
 			put_count = ttm_bo_del_from_lru(entry);
 
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 
 		if (unlikely(ret != 0))
 			return ret;
@@ -635,21 +674,21 @@ retry_pre_get:
 		if (ret)
 			return ret;
 
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 	} while (1);
 
 	if (!node) {
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		return -ENOMEM;
 	}
 
 	node = drm_mm_get_block_atomic(node, num_pages, mem->page_alignment);
 	if (unlikely(!node)) {
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		goto retry_pre_get;
 	}
 
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 	mem->mm_node = node;
 	mem->mem_type = mem_type;
 	return 0;
@@ -697,6 +736,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		     bool interruptible, bool no_wait)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	struct ttm_mem_type_manager *man;
 
 	uint32_t num_prios = bdev->driver->num_mem_type_prio;
@@ -733,20 +773,20 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 				if (unlikely(ret))
 					return ret;
 
-				spin_lock(&bdev->lru_lock);
+				spin_lock(&glob->lru_lock);
 				node = drm_mm_search_free(&man->manager,
 							  mem->num_pages,
 							  mem->page_alignment,
 							  1);
 				if (unlikely(!node)) {
-					spin_unlock(&bdev->lru_lock);
+					spin_unlock(&glob->lru_lock);
 					break;
 				}
 				node = drm_mm_get_block_atomic(node,
 							       mem->num_pages,
 							       mem->
 							       page_alignment);
-				spin_unlock(&bdev->lru_lock);
+				spin_unlock(&glob->lru_lock);
 			} while (!node);
 		}
 		if (node)
@@ -816,7 +856,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 		       uint32_t proposed_placement,
 		       bool interruptible, bool no_wait)
 {
-	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	int ret = 0;
 	struct ttm_mem_reg mem;
 
@@ -852,9 +892,9 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 
 out_unlock:
 	if (ret && mem.mm_node) {
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 		drm_mm_put_block(mem.mm_node);
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 	}
 	return ret;
 }
@@ -990,6 +1030,7 @@ int ttm_buffer_object_init(struct ttm_bo_device *bdev,
 	INIT_LIST_HEAD(&bo->ddestroy);
 	INIT_LIST_HEAD(&bo->swap);
 	bo->bdev = bdev;
+	bo->glob = bdev->glob;
 	bo->type = type;
 	bo->num_pages = num_pages;
 	bo->mem.mem_type = TTM_PL_SYSTEM;
@@ -1002,6 +1043,7 @@ int ttm_buffer_object_init(struct ttm_bo_device *bdev,
 	bo->seq_valid = false;
 	bo->persistant_swap_storage = persistant_swap_storage;
 	bo->acc_size = acc_size;
+	atomic_inc(&bo->glob->bo_count);
 
 	ret = ttm_bo_check_placement(bo, flags, 0ULL);
 	if (unlikely(ret != 0))
@@ -1040,13 +1082,13 @@ out_err:
 }
 EXPORT_SYMBOL(ttm_buffer_object_init);
 
-static inline size_t ttm_bo_size(struct ttm_bo_device *bdev,
+static inline size_t ttm_bo_size(struct ttm_bo_global *glob,
 				 unsigned long num_pages)
 {
 	size_t page_array_size = (num_pages * sizeof(void *) + PAGE_SIZE - 1) &
 	    PAGE_MASK;
 
-	return bdev->ttm_bo_size + 2 * page_array_size;
+	return glob->ttm_bo_size + 2 * page_array_size;
 }
 
 int ttm_buffer_object_create(struct ttm_bo_device *bdev,
@@ -1061,10 +1103,10 @@ int ttm_buffer_object_create(struct ttm_bo_device *bdev,
 {
 	struct ttm_buffer_object *bo;
 	int ret;
-	struct ttm_mem_global *mem_glob = bdev->mem_glob;
+	struct ttm_mem_global *mem_glob = bdev->glob->mem_glob;
 
 	size_t acc_size =
-	    ttm_bo_size(bdev, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
+	    ttm_bo_size(bdev->glob, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
 	ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false);
 	if (unlikely(ret != 0))
 		return ret;
@@ -1118,6 +1160,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 				   struct list_head *head,
 				   unsigned mem_type, bool allow_errors)
 {
+	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_buffer_object *entry;
 	int ret;
 	int put_count;
@@ -1126,30 +1169,31 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 	 * Can't use standard list traversal since we're unlocking.
 	 */
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 
 	while (!list_empty(head)) {
 		entry = list_first_entry(head, struct ttm_buffer_object, lru);
 		kref_get(&entry->list_kref);
 		ret = ttm_bo_reserve_locked(entry, false, false, false, 0);
 		put_count = ttm_bo_del_from_lru(entry);
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		while (put_count--)
 			kref_put(&entry->list_kref, ttm_bo_ref_bug);
 		BUG_ON(ret);
 		ret = ttm_bo_leave_list(entry, mem_type, allow_errors);
 		ttm_bo_unreserve(entry);
 		kref_put(&entry->list_kref, ttm_bo_release_list);
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 	}
 
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 
 	return 0;
 }
 
 int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
 {
+	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
 	int ret = -EINVAL;
 
@@ -1171,13 +1215,13 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
 	if (mem_type > 0) {
 		ttm_bo_force_list_clean(bdev, &man->lru, mem_type, false);
 
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 		if (drm_mm_clean(&man->manager))
 			drm_mm_takedown(&man->manager);
 		else
 			ret = -EBUSY;
 
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 	}
 
 	return ret;
@@ -1251,11 +1295,83 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 }
 EXPORT_SYMBOL(ttm_bo_init_mm);
 
+static void ttm_bo_global_kobj_release(struct kobject *kobj)
+{
+	struct ttm_bo_global *glob =
+		container_of(kobj, struct ttm_bo_global, kobj);
+
+	printk(KERN_INFO TTM_PFX "Freeing bo global.\n");
+	ttm_mem_unregister_shrink(glob->mem_glob, &glob->shrink);
+	__free_page(glob->dummy_read_page);
+	kfree(glob);
+}
+
+void ttm_bo_global_release(struct ttm_global_reference *ref)
+{
+	struct ttm_bo_global *glob = ref->object;
+
+	kobject_del(&glob->kobj);
+	kobject_put(&glob->kobj);
+}
+EXPORT_SYMBOL(ttm_bo_global_release);
+
+int ttm_bo_global_init(struct ttm_global_reference *ref)
+{
+	struct ttm_bo_global_ref *bo_ref =
+		container_of(ref, struct ttm_bo_global_ref, ref);
+	struct ttm_bo_global *glob = ref->object;
+	int ret;
+
+	mutex_init(&glob->device_list_mutex);
+	spin_lock_init(&glob->lru_lock);
+	glob->mem_glob = bo_ref->mem_glob;
+	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
+
+	if (unlikely(glob->dummy_read_page == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_drp;
+	}
+
+	INIT_LIST_HEAD(&glob->swap_lru);
+	INIT_LIST_HEAD(&glob->device_list);
+
+	ttm_mem_init_shrink(&glob->shrink, ttm_bo_swapout);
+	ret = ttm_mem_register_shrink(glob->mem_glob, &glob->shrink);
+	if (unlikely(ret != 0)) {
+		printk(KERN_ERR TTM_PFX
+		       "Could not register buffer object swapout.\n");
+		goto out_no_shrink;
+	}
+
+	glob->ttm_bo_extra_size =
+		ttm_round_pot(sizeof(struct ttm_tt)) +
+		ttm_round_pot(sizeof(struct ttm_backend));
+
+	glob->ttm_bo_size = glob->ttm_bo_extra_size +
+		ttm_round_pot(sizeof(struct ttm_buffer_object));
+
+	atomic_set(&glob->bo_count, 0);
+
+	kobject_init(&glob->kobj, &ttm_bo_glob_kobj_type);
+	ret = kobject_add(&glob->kobj, ttm_get_kobj(), "buffer_objects");
+	if (unlikely(ret != 0))
+		kobject_put(&glob->kobj);
+	return ret;
+out_no_shrink:
+	__free_page(glob->dummy_read_page);
+out_no_drp:
+	kfree(glob);
+	return ret;
+}
+EXPORT_SYMBOL(ttm_bo_global_init);
+
+
 int ttm_bo_device_release(struct ttm_bo_device *bdev)
 {
 	int ret = 0;
 	unsigned i = TTM_NUM_MEM_TYPES;
 	struct ttm_mem_type_manager *man;
+	struct ttm_bo_global *glob = bdev->glob;
 
 	while (i--) {
 		man = &bdev->man[i];
@@ -1271,98 +1387,74 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
 		}
 	}
 
+	mutex_lock(&glob->device_list_mutex);
+	list_del(&bdev->device_list);
+	mutex_unlock(&glob->device_list_mutex);
+
 	if (!cancel_delayed_work(&bdev->wq))
 		flush_scheduled_work();
 
 	while (ttm_bo_delayed_delete(bdev, true))
 		;
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	if (list_empty(&bdev->ddestroy))
 		TTM_DEBUG("Delayed destroy list was clean\n");
 
 	if (list_empty(&bdev->man[0].lru))
 		TTM_DEBUG("Swap list was clean\n");
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 
-	ttm_mem_unregister_shrink(bdev->mem_glob, &bdev->shrink);
 	BUG_ON(!drm_mm_clean(&bdev->addr_space_mm));
 	write_lock(&bdev->vm_lock);
 	drm_mm_takedown(&bdev->addr_space_mm);
 	write_unlock(&bdev->vm_lock);
 
-	__free_page(bdev->dummy_read_page);
 	return ret;
 }
 EXPORT_SYMBOL(ttm_bo_device_release);
 
-/*
- * This function is intended to be called on drm driver load.
- * If you decide to call it from firstopen, you must protect the call
- * from a potentially racing ttm_bo_driver_finish in lastclose.
- * (This may happen on X server restart).
- */
-
 int ttm_bo_device_init(struct ttm_bo_device *bdev,
-		       struct ttm_mem_global *mem_glob,
-		       struct ttm_bo_driver *driver, uint64_t file_page_offset)
+		       struct ttm_bo_global *glob,
+		       struct ttm_bo_driver *driver,
+		       uint64_t file_page_offset)
 {
 	int ret = -EINVAL;
 
-	bdev->dummy_read_page = NULL;
 	rwlock_init(&bdev->vm_lock);
-	spin_lock_init(&bdev->lru_lock);
+	spin_lock_init(&glob->lru_lock);
 
 	bdev->driver = driver;
-	bdev->mem_glob = mem_glob;
 
 	memset(bdev->man, 0, sizeof(bdev->man));
 
-	bdev->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
-	if (unlikely(bdev->dummy_read_page == NULL)) {
-		ret = -ENOMEM;
-		goto out_err0;
-	}
-
 	/*
 	 * Initialize the system memory buffer type.
 	 * Other types need to be driver / IOCTL initialized.
 	 */
 	ret = ttm_bo_init_mm(bdev, TTM_PL_SYSTEM, 0, 0);
 	if (unlikely(ret != 0))
-		goto out_err1;
+		goto out_no_sys;
 
 	bdev->addr_space_rb = RB_ROOT;
 	ret = drm_mm_init(&bdev->addr_space_mm, file_page_offset, 0x10000000);
 	if (unlikely(ret != 0))
-		goto out_err2;
+		goto out_no_addr_mm;
 
 	INIT_DELAYED_WORK(&bdev->wq, ttm_bo_delayed_workqueue);
 	bdev->nice_mode = true;
 	INIT_LIST_HEAD(&bdev->ddestroy);
-	INIT_LIST_HEAD(&bdev->swap_lru);
 	bdev->dev_mapping = NULL;
-	ttm_mem_init_shrink(&bdev->shrink, ttm_bo_swapout);
-	ret = ttm_mem_register_shrink(mem_glob, &bdev->shrink);
-	if (unlikely(ret != 0)) {
-		printk(KERN_ERR TTM_PFX
-		       "Could not register buffer object swapout.\n");
-		goto out_err2;
-	}
+	bdev->glob = glob;
 
-	bdev->ttm_bo_extra_size =
-		ttm_round_pot(sizeof(struct ttm_tt)) +
-		ttm_round_pot(sizeof(struct ttm_backend));
-
-	bdev->ttm_bo_size = bdev->ttm_bo_extra_size +
-		ttm_round_pot(sizeof(struct ttm_buffer_object));
+	mutex_lock(&glob->device_list_mutex);
+	list_add_tail(&bdev->device_list, &glob->device_list);
+	mutex_unlock(&glob->device_list_mutex);
 
 	return 0;
-out_err2:
+out_no_addr_mm:
 	ttm_bo_clean_mm(bdev, 0);
-out_err1:
-	__free_page(bdev->dummy_read_page);
-out_err0:
+out_no_sys:
 	return ret;
 }
 EXPORT_SYMBOL(ttm_bo_device_init);
@@ -1607,21 +1699,21 @@ void ttm_bo_synccpu_write_release(struct ttm_buffer_object *bo)
 
 static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 {
-	struct ttm_bo_device *bdev =
-	    container_of(shrink, struct ttm_bo_device, shrink);
+	struct ttm_bo_global *glob =
+	    container_of(shrink, struct ttm_bo_global, shrink);
 	struct ttm_buffer_object *bo;
 	int ret = -EBUSY;
 	int put_count;
 	uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM);
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	while (ret == -EBUSY) {
-		if (unlikely(list_empty(&bdev->swap_lru))) {
-			spin_unlock(&bdev->lru_lock);
+		if (unlikely(list_empty(&glob->swap_lru))) {
+			spin_unlock(&glob->lru_lock);
 			return -EBUSY;
 		}
 
-		bo = list_first_entry(&bdev->swap_lru,
+		bo = list_first_entry(&glob->swap_lru,
 				      struct ttm_buffer_object, swap);
 		kref_get(&bo->list_kref);
 
@@ -1633,16 +1725,16 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 
 		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
 		if (unlikely(ret == -EBUSY)) {
-			spin_unlock(&bdev->lru_lock);
+			spin_unlock(&glob->lru_lock);
 			ttm_bo_wait_unreserved(bo, false);
 			kref_put(&bo->list_kref, ttm_bo_release_list);
-			spin_lock(&bdev->lru_lock);
+			spin_lock(&glob->lru_lock);
 		}
 	}
 
 	BUG_ON(ret != 0);
 	put_count = ttm_bo_del_from_lru(bo);
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 
 	while (put_count--)
 		kref_put(&bo->list_kref, ttm_bo_ref_bug);
@@ -1696,6 +1788,6 @@ out:
 
 void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
 {
-	while (ttm_bo_swapout(&bdev->shrink) == 0)
+	while (ttm_bo_swapout(&bdev->glob->shrink) == 0)
 		;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index bdec583901eb..12cd47aa18ce 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -41,9 +41,9 @@ void ttm_bo_free_old_node(struct ttm_buffer_object *bo)
 	struct ttm_mem_reg *old_mem = &bo->mem;
 
 	if (old_mem->mm_node) {
-		spin_lock(&bo->bdev->lru_lock);
+		spin_lock(&bo->glob->lru_lock);
 		drm_mm_put_block(old_mem->mm_node);
-		spin_unlock(&bo->bdev->lru_lock);
+		spin_unlock(&bo->glob->lru_lock);
 	}
 	old_mem->mm_node = NULL;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 4e1e2566d519..b0f73096d372 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -166,7 +166,7 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
 			set_page_dirty_lock(page);
 
 		ttm->pages[i] = NULL;
-		ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE);
+		ttm_mem_global_free(ttm->glob->mem_glob, PAGE_SIZE);
 		put_page(page);
 	}
 	ttm->state = tt_unpopulated;
@@ -177,8 +177,7 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
 static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
 {
 	struct page *p;
-	struct ttm_bo_device *bdev = ttm->bdev;
-	struct ttm_mem_global *mem_glob = bdev->mem_glob;
+	struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
 	int ret;
 
 	while (NULL == (p = ttm->pages[index])) {
@@ -348,7 +347,7 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
 				printk(KERN_ERR TTM_PFX
 				       "Erroneous page count. "
 				       "Leaking pages.\n");
-			ttm_mem_global_free_page(ttm->bdev->mem_glob,
+			ttm_mem_global_free_page(ttm->glob->mem_glob,
 						 cur_page);
 			__free_page(cur_page);
 		}
@@ -394,7 +393,7 @@ int ttm_tt_set_user(struct ttm_tt *ttm,
 	struct mm_struct *mm = tsk->mm;
 	int ret;
 	int write = (ttm->page_flags & TTM_PAGE_FLAG_WRITE) != 0;
-	struct ttm_mem_global *mem_glob = ttm->bdev->mem_glob;
+	struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
 
 	BUG_ON(num_pages != ttm->num_pages);
 	BUG_ON((ttm->page_flags & TTM_PAGE_FLAG_USER) == 0);
@@ -439,8 +438,7 @@ struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, unsigned long size,
 	if (!ttm)
 		return NULL;
 
-	ttm->bdev = bdev;
-
+	ttm->glob = bdev->glob;
 	ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	ttm->first_himem_page = ttm->num_pages;
 	ttm->last_lomem_page = -1;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 99dc521aa1a9..491146170522 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -155,6 +155,7 @@ struct ttm_buffer_object {
 	 * Members constant at init.
 	 */
 
+	struct ttm_bo_global *glob;
 	struct ttm_bo_device *bdev;
 	unsigned long buffer_start;
 	enum ttm_bo_type type;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 62ed733c52a2..9dc32f70b9a2 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -32,6 +32,7 @@
 
 #include "ttm/ttm_bo_api.h"
 #include "ttm/ttm_memory.h"
+#include "ttm/ttm_module.h"
 #include "drm_mm.h"
 #include "linux/workqueue.h"
 #include "linux/fs.h"
@@ -160,7 +161,7 @@ struct ttm_tt {
 	long last_lomem_page;
 	uint32_t page_flags;
 	unsigned long num_pages;
-	struct ttm_bo_device *bdev;
+	struct ttm_bo_global *glob;
 	struct ttm_backend *be;
 	struct task_struct *tsk;
 	unsigned long start;
@@ -355,24 +356,73 @@ struct ttm_bo_driver {
 	void *(*sync_obj_ref) (void *sync_obj);
 };
 
-#define TTM_NUM_MEM_TYPES 8
+/**
+ * struct ttm_bo_global_ref - Argument to initialize a struct ttm_bo_global.
+ */
+
+struct ttm_bo_global_ref {
+	struct ttm_global_reference ref;
+	struct ttm_mem_global *mem_glob;
+};
 
-#define TTM_BO_PRIV_FLAG_MOVING  0	/* Buffer object is moving and needs
-					   idling before CPU mapping */
-#define TTM_BO_PRIV_FLAG_MAX 1
 /**
- * struct ttm_bo_device - Buffer object driver device-specific data.
+ * struct ttm_bo_global - Buffer object driver global data.
  *
  * @mem_glob: Pointer to a struct ttm_mem_global object for accounting.
- * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
- * @count: Current number of buffer object.
- * @pages: Current number of pinned pages.
  * @dummy_read_page: Pointer to a dummy page used for mapping requests
  * of unpopulated pages.
- * @shrink: A shrink callback object used for buffre object swap.
+ * @shrink: A shrink callback object used for buffer object swap.
  * @ttm_bo_extra_size: Extra size (sizeof(struct ttm_buffer_object) excluded)
  * used by a buffer object. This is excluding page arrays and backing pages.
  * @ttm_bo_size: This is @ttm_bo_extra_size + sizeof(struct ttm_buffer_object).
+ * @device_list_mutex: Mutex protecting the device list.
+ * This mutex is held while traversing the device list for pm options.
+ * @lru_lock: Spinlock protecting the bo subsystem lru lists.
+ * @device_list: List of buffer object devices.
+ * @swap_lru: Lru list of buffer objects used for swapping.
+ */
+
+struct ttm_bo_global {
+
+	/**
+	 * Constant after init.
+	 */
+
+	struct kobject kobj;
+	struct ttm_mem_global *mem_glob;
+	struct page *dummy_read_page;
+	struct ttm_mem_shrink shrink;
+	size_t ttm_bo_extra_size;
+	size_t ttm_bo_size;
+	struct mutex device_list_mutex;
+	spinlock_t lru_lock;
+
+	/**
+	 * Protected by device_list_mutex.
+	 */
+	struct list_head device_list;
+
+	/**
+	 * Protected by the lru_lock.
+	 */
+	struct list_head swap_lru;
+
+	/**
+	 * Internal protection.
+	 */
+	atomic_t bo_count;
+};
+
+
+#define TTM_NUM_MEM_TYPES 8
+
+#define TTM_BO_PRIV_FLAG_MOVING  0	/* Buffer object is moving and needs
+					   idling before CPU mapping */
+#define TTM_BO_PRIV_FLAG_MAX 1
+/**
+ * struct ttm_bo_device - Buffer object driver device-specific data.
+ *
+ * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
  * @man: An array of mem_type_managers.
  * @addr_space_mm: Range manager for the device address space.
  * lru_lock: Spinlock that protects the buffer+device lru lists and
@@ -390,32 +440,21 @@ struct ttm_bo_device {
 	/*
 	 * Constant after bo device init / atomic.
 	 */
-
-	struct ttm_mem_global *mem_glob;
+	struct list_head device_list;
+	struct ttm_bo_global *glob;
 	struct ttm_bo_driver *driver;
-	struct page *dummy_read_page;
-	struct ttm_mem_shrink shrink;
-
-	size_t ttm_bo_extra_size;
-	size_t ttm_bo_size;
-
 	rwlock_t vm_lock;
+	struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
 	/*
 	 * Protected by the vm lock.
 	 */
-	struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
 	struct rb_root addr_space_rb;
 	struct drm_mm addr_space_mm;
 
 	/*
-	 * Might want to change this to one lock per manager.
-	 */
-	spinlock_t lru_lock;
-	/*
-	 * Protected by the lru lock.
+	 * Protected by the global:lru lock.
 	 */
 	struct list_head ddestroy;
-	struct list_head swap_lru;
 
 	/*
 	 * Protected by load / firstopen / lastclose /unload sync.
@@ -629,6 +668,9 @@ extern int ttm_bo_pci_offset(struct ttm_bo_device *bdev,
 			     unsigned long *bus_offset,
 			     unsigned long *bus_size);
 
+extern void ttm_bo_global_release(struct ttm_global_reference *ref);
+extern int ttm_bo_global_init(struct ttm_global_reference *ref);
+
 extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
 
 /**
@@ -646,7 +688,7 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
  * !0: Failure.
  */
 extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
-			      struct ttm_mem_global *mem_glob,
+			      struct ttm_bo_global *glob,
 			      struct ttm_bo_driver *driver,
 			      uint64_t file_page_offset);
 
-- 
cgit v1.2.3


From 4516fc0454e7ffe2f369e80045b23c2b32155004 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:54 -0400
Subject: sunrpc: add routine for comparing addresses

lockd needs these sort of routines, as does the NFSv4 callback code.

Move lockd's routines into common code and rename them so that they can
be used by others.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/clntlock.c         |  2 +-
 fs/lockd/host.c             |  4 ++--
 fs/lockd/mon.c              |  2 +-
 fs/lockd/svcsubs.c          |  2 +-
 include/linux/lockd/lockd.h | 43 ----------------------------------------
 include/linux/sunrpc/clnt.h | 48 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 53 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 1f3b0fc0d351..fc9032dc8862 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -166,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
 		 */
 		if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
 			continue;
-		if (!nlm_cmp_addr(nlm_addr(block->b_host), addr))
+		if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
 			continue;
 		if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
 			continue;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 7cb076ac6b45..4600c2037b8b 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -111,7 +111,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
 	 */
 	chain = &nlm_hosts[nlm_hash_address(ni->sap)];
 	hlist_for_each_entry(host, pos, chain, h_hash) {
-		if (!nlm_cmp_addr(nlm_addr(host), ni->sap))
+		if (!rpc_cmp_addr(nlm_addr(host), ni->sap))
 			continue;
 
 		/* See if we have an NSM handle for this client */
@@ -125,7 +125,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
 		if (host->h_server != ni->server)
 			continue;
 		if (ni->server &&
-		    !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap))
+		    !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
 			continue;
 
 		/* Move to head of hash chain. */
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 30c933188dd7..f956651d0f65 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -209,7 +209,7 @@ static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap)
 	struct nsm_handle *nsm;
 
 	list_for_each_entry(nsm, &nsm_handles, sm_link)
-		if (nlm_cmp_addr(nsm_addr(nsm), sap))
+		if (rpc_cmp_addr(nsm_addr(nsm), sap))
 			return nsm;
 	return NULL;
 }
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 9e4d6aab611b..ad478da7ca63 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
 static int
 nlmsvc_match_ip(void *datap, struct nlm_host *host)
 {
-	return nlm_cmp_addr(nlm_srcaddr(host), datap);
+	return rpc_cmp_addr(nlm_srcaddr(host), datap);
 }
 
 /**
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c325b187966b..e7a251a988c0 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -338,49 +338,6 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
 	}
 }
 
-static inline int __nlm_cmp_addr4(const struct sockaddr *sap1,
-				  const struct sockaddr *sap2)
-{
-	const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
-	const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
-	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
-				  const struct sockaddr *sap2)
-{
-	const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
-	const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
-	return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
-}
-#else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
-static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
-				  const struct sockaddr *sap2)
-{
-	return 0;
-}
-#endif	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
-
-/*
- * Compare two host addresses
- *
- * Return TRUE if the addresses are the same; otherwise FALSE.
- */
-static inline int nlm_cmp_addr(const struct sockaddr *sap1,
-			       const struct sockaddr *sap2)
-{
-	if (sap1->sa_family == sap2->sa_family) {
-		switch (sap1->sa_family) {
-		case AF_INET:
-			return __nlm_cmp_addr4(sap1, sap2);
-		case AF_INET6:
-			return __nlm_cmp_addr6(sap1, sap2);
-		}
-	}
-	return 0;
-}
-
 /*
  * Compare two NLM locks.
  * When the second lock is of type F_UNLCK, this acts like a wildcard.
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index ab3f6e90caa5..b17df361be82 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -22,6 +22,7 @@
 #include <linux/sunrpc/timer.h>
 #include <asm/signal.h>
 #include <linux/path.h>
+#include <net/ipv6.h>
 
 struct rpc_inode;
 
@@ -188,5 +189,52 @@ static inline void rpc_set_port(struct sockaddr *sap,
 #define IPV6_SCOPE_DELIMITER		'%'
 #define IPV6_SCOPE_ID_LEN		sizeof("%nnnnnnnnnn")
 
+static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1,
+				   const struct sockaddr *sap2)
+{
+	const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
+	const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
+
+	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
+				   const struct sockaddr *sap2)
+{
+	const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
+	const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
+	return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
+}
+#else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
+				   const struct sockaddr *sap2)
+{
+	return false;
+}
+#endif	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+
+/**
+ * rpc_cmp_addr - compare the address portion of two sockaddrs.
+ * @sap1: first sockaddr
+ * @sap2: second sockaddr
+ *
+ * Just compares the family and address portion. Ignores port, scope, etc.
+ * Returns true if the addrs are equal, false if they aren't.
+ */
+static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
+				const struct sockaddr *sap2)
+{
+	if (sap1->sa_family == sap2->sa_family) {
+		switch (sap1->sa_family) {
+		case AF_INET:
+			return __rpc_cmp_addr4(sap1, sap2);
+		case AF_INET6:
+			return __rpc_cmp_addr6(sap1, sap2);
+		}
+	}
+	return false;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
-- 
cgit v1.2.3


From be3ad6b0b675fd1d6b48362ca30bdee75fbef6b4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:55 -0400
Subject: sunrpc: add common routine for copying address portion of a sockaddr

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/clnt.h | 50 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index b17df361be82..044f531aee70 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -198,6 +198,17 @@ static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1,
 	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
 }
 
+static inline bool __rpc_copy_addr4(struct sockaddr *dst,
+				    const struct sockaddr *src)
+{
+	const struct sockaddr_in *ssin = (struct sockaddr_in *) src;
+	struct sockaddr_in *dsin = (struct sockaddr_in *) dst;
+
+	dsin->sin_family = ssin->sin_family;
+	dsin->sin_addr.s_addr = ssin->sin_addr.s_addr;
+	return true;
+}
+
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
 				   const struct sockaddr *sap2)
@@ -206,12 +217,29 @@ static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
 	const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
 	return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
 }
+
+static inline bool __rpc_copy_addr6(struct sockaddr *dst,
+				    const struct sockaddr *src)
+{
+	const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src;
+	struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst;
+
+	dsin6->sin6_family = ssin6->sin6_family;
+	ipv6_addr_copy(&dsin6->sin6_addr, &ssin6->sin6_addr);
+	return true;
+}
 #else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
 static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
 				   const struct sockaddr *sap2)
 {
 	return false;
 }
+
+static inline bool __rpc_copy_addr6(struct sockaddr *dst,
+				    const struct sockaddr *src)
+{
+	return false;
+}
 #endif	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
 
 /**
@@ -236,5 +264,27 @@ static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
 	return false;
 }
 
+/**
+ * rpc_copy_addr - copy the address portion of one sockaddr to another
+ * @dst: destination sockaddr
+ * @src: source sockaddr
+ *
+ * Just copies the address portion and family. Ignores port, scope, etc.
+ * Caller is responsible for making certain that dst is large enough to hold
+ * the address in src. Returns true if address family is supported. Returns
+ * false otherwise.
+ */
+static inline bool rpc_copy_addr(struct sockaddr *dst,
+				 const struct sockaddr *src)
+{
+	switch (src->sa_family) {
+	case AF_INET:
+		return __rpc_copy_addr4(dst, src);
+	case AF_INET6:
+		return __rpc_copy_addr6(dst, src);
+	}
+	return false;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
-- 
cgit v1.2.3


From 363168b4ea8ec26aeb982ac6024a09f907ecd27e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:56 -0400
Subject: nfsd: make nfs4_client->cl_addr a struct sockaddr_storage

It's currently a __be32, which isn't big enough to hold an IPv6 address.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 32 +++++++++++++++++++-------------
 include/linux/nfsd/state.h |  2 +-
 2 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9295c4b56bce..bfc14d879ea1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -55,6 +55,7 @@
 #include <linux/lockd/bind.h>
 #include <linux/module.h>
 #include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/clnt.h>
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -1220,13 +1221,15 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 	int status;
 	unsigned int		strhashval;
 	char			dname[HEXDIR_LEN];
+	char			addr_str[INET6_ADDRSTRLEN];
 	nfs4_verifier		verf = exid->verifier;
-	u32			ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
+	struct sockaddr		*sa = svc_addr(rqstp);
 
+	rpc_ntop(sa, addr_str, sizeof(addr_str));
 	dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
-		" ip_addr=%u flags %x, spa_how %d\n",
+		"ip_addr=%s flags %x, spa_how %d\n",
 		__func__, rqstp, exid, exid->clname.len, exid->clname.data,
-		ip_addr, exid->flags, exid->spa_how);
+		addr_str, exid->flags, exid->spa_how);
 
 	if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A))
 		return nfserr_inval;
@@ -1315,7 +1318,7 @@ out_new:
 
 	copy_verf(new, &verf);
 	copy_cred(&new->cl_cred, &rqstp->rq_cred);
-	new->cl_addr = ip_addr;
+	rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa);
 	gen_clid(new);
 	gen_confirm(new);
 	add_to_unconfirmed(new, strhashval);
@@ -1389,7 +1392,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
 		     struct nfsd4_create_session *cr_ses)
 {
-	u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
+	struct sockaddr *sa = svc_addr(rqstp);
 	struct nfs4_client *conf, *unconf;
 	struct nfsd4_clid_slot *cs_slot = NULL;
 	int status = 0;
@@ -1417,7 +1420,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		cs_slot->sl_seqid++;
 	} else if (unconf) {
 		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
-		    (ip_addr != unconf->cl_addr)) {
+		    !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
 			status = nfserr_clid_inuse;
 			goto out;
 		}
@@ -1564,7 +1567,7 @@ __be32
 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		  struct nfsd4_setclientid *setclid)
 {
-	struct sockaddr_in	*sin = svc_addr_in(rqstp);
+	struct sockaddr		*sa = svc_addr(rqstp);
 	struct xdr_netobj 	clname = { 
 		.len = setclid->se_namelen,
 		.data = setclid->se_name,
@@ -1596,8 +1599,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		/* RFC 3530 14.2.33 CASE 0: */
 		status = nfserr_clid_inuse;
 		if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
-			dprintk("NFSD: setclientid: string in use by client"
-				" at %pI4\n", &conf->cl_addr);
+			char addr_str[INET6_ADDRSTRLEN];
+			rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
+				 sizeof(addr_str));
+			dprintk("NFSD: setclientid: string in use by client "
+				"at %s\n", addr_str);
 			goto out;
 		}
 	}
@@ -1659,7 +1665,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		gen_clid(new);
 	}
 	copy_verf(new, &clverifier);
-	new->cl_addr = sin->sin_addr.s_addr;
+	rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa);
 	new->cl_flavor = rqstp->rq_flavor;
 	princ = svc_gss_principal(rqstp);
 	if (princ) {
@@ -1693,7 +1699,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 			 struct nfsd4_compound_state *cstate,
 			 struct nfsd4_setclientid_confirm *setclientid_confirm)
 {
-	struct sockaddr_in *sin = svc_addr_in(rqstp);
+	struct sockaddr *sa = svc_addr(rqstp);
 	struct nfs4_client *conf, *unconf;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
 	clientid_t * clid = &setclientid_confirm->sc_clientid;
@@ -1712,9 +1718,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 	unconf = find_unconfirmed_client(clid);
 
 	status = nfserr_clid_inuse;
-	if (conf && conf->cl_addr != sin->sin_addr.s_addr)
+	if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa))
 		goto out;
-	if (unconf && unconf->cl_addr != sin->sin_addr.s_addr)
+	if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa))
 		goto out;
 
 	/*
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 58bb19784e12..3510ddd4be49 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -200,7 +200,7 @@ struct nfs4_client {
 	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
 	nfs4_verifier		cl_verifier; 	/* generated by client */
 	time_t                  cl_time;        /* time of last lease renewal */
-	__be32			cl_addr; 	/* client ipaddress */
+	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
 	u32			cl_flavor;	/* setclientid pseudoflavor */
 	char			*cl_principal;	/* setclientid principal name */
 	struct svc_cred		cl_cred; 	/* setclientid principal */
-- 
cgit v1.2.3


From aa9a4ec7707a5391cde556f3fa1b0eb4bca3bcf6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:57 -0400
Subject: nfsd: convert nfs4_cb_conn struct to hold address in sockaddr_storage

...rather than as a separate address and port fields. This will be
necessary for implementing callbacks over IPv6. Also, convert
gen_callback to use the standard rpcuaddr2sockaddr routine rather than
its own private one.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 11 ++-----
 fs/nfsd/nfs4state.c        | 81 ++++++----------------------------------------
 include/linux/nfsd/state.h |  4 +--
 3 files changed, 13 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3fd23f7aceca..81d1c5285dcc 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -377,7 +377,6 @@ static int max_cb_time(void)
 
 int setup_callback_client(struct nfs4_client *clp)
 {
-	struct sockaddr_in	addr;
 	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	struct rpc_timeout	timeparms = {
 		.to_initval	= max_cb_time(),
@@ -385,8 +384,8 @@ int setup_callback_client(struct nfs4_client *clp)
 	};
 	struct rpc_create_args args = {
 		.protocol	= IPPROTO_TCP,
-		.address	= (struct sockaddr *)&addr,
-		.addrsize	= sizeof(addr),
+		.address	= (struct sockaddr *) &cb->cb_addr,
+		.addrsize	= cb->cb_addrlen,
 		.timeout	= &timeparms,
 		.program	= &cb_program,
 		.prognumber	= cb->cb_prog,
@@ -400,12 +399,6 @@ int setup_callback_client(struct nfs4_client *clp)
 	if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
 		return -EINVAL;
 
-	/* Initialize address */
-	memset(&addr, 0, sizeof(addr));
-	addr.sin_family = AF_INET;
-	addr.sin_port = htons(cb->cb_port);
-	addr.sin_addr.s_addr = htonl(cb->cb_addr);
-
 	/* Create RPC client */
 	client = rpc_create(&args);
 	if (IS_ERR(client)) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bfc14d879ea1..96a742308cee 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -897,76 +897,6 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
 	return NULL;
 }
 
-/* a helper function for parse_callback */
-static int
-parse_octet(unsigned int *lenp, char **addrp)
-{
-	unsigned int len = *lenp;
-	char *p = *addrp;
-	int n = -1;
-	char c;
-
-	for (;;) {
-		if (!len)
-			break;
-		len--;
-		c = *p++;
-		if (c == '.')
-			break;
-		if ((c < '0') || (c > '9')) {
-			n = -1;
-			break;
-		}
-		if (n < 0)
-			n = 0;
-		n = (n * 10) + (c - '0');
-		if (n > 255) {
-			n = -1;
-			break;
-		}
-	}
-	*lenp = len;
-	*addrp = p;
-	return n;
-}
-
-/* parse and set the setclientid ipv4 callback address */
-static int
-parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
-{
-	int temp = 0;
-	u32 cbaddr = 0;
-	u16 cbport = 0;
-	u32 addrlen = addr_len;
-	char *addr = addr_val;
-	int i, shift;
-
-	/* ipaddress */
-	shift = 24;
-	for(i = 4; i > 0  ; i--) {
-		if ((temp = parse_octet(&addrlen, &addr)) < 0) {
-			return 0;
-		}
-		cbaddr |= (temp << shift);
-		if (shift > 0)
-		shift -= 8;
-	}
-	*cbaddrp = cbaddr;
-
-	/* port */
-	shift = 8;
-	for(i = 2; i > 0  ; i--) {
-		if ((temp = parse_octet(&addrlen, &addr)) < 0) {
-			return 0;
-		}
-		cbport |= (temp << shift);
-		if (shift > 0)
-			shift -= 8;
-	}
-	*cbportp = cbport;
-	return 1;
-}
-
 static void
 gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 {
@@ -976,14 +906,21 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 	if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
 		goto out_err;
 
-	if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
-	                 &cb->cb_addr, &cb->cb_port)))
+	cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
+					    se->se_callback_addr_len,
+					    (struct sockaddr *) &cb->cb_addr,
+					    sizeof(cb->cb_addr));
+
+	if (!cb->cb_addrlen || cb->cb_addr.ss_family != AF_INET)
 		goto out_err;
+
 	cb->cb_minorversion = 0;
 	cb->cb_prog = se->se_callback_prog;
 	cb->cb_ident = se->se_callback_ident;
 	return;
 out_err:
+	cb->cb_addr.ss_family = AF_UNSPEC;
+	cb->cb_addrlen = 0;
 	dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
 		"will not receive delegations\n",
 		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 3510ddd4be49..fb0c404c7c5c 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -81,8 +81,8 @@ struct nfs4_delegation {
 /* client delegation callback info */
 struct nfs4_cb_conn {
 	/* SETCLIENTID info */
-	u32                     cb_addr;
-	unsigned short          cb_port;
+	struct sockaddr_storage	cb_addr;
+	size_t			cb_addrlen;
 	u32                     cb_prog;
 	u32			cb_minorversion;
 	u32                     cb_ident;	/* minorversion 0 only */
-- 
cgit v1.2.3


From fbf4665f41b02e757ab9d9198df65e319388e728 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:59 -0400
Subject: nfsd: populate sin6_scope_id on callback address with scopeid from
 rq_addr on SETCLIENTID call

When a SETCLIENTID call comes in, one of the args given is the svc_rqst.
This struct contains an rq_addr field which holds the address that sent
the call. If this is an IPv6 address, then we can use the sin6_scope_id
field in this address to populate the sin6_scope_id field in the
callback address.

AFAICT, the rq_addr.sin6_scope_id is non-zero if and only if the client
mounted the server's link-local address.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c         |  7 +++++--
 include/linux/sunrpc/clnt.h | 15 +++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9ec0ca1ef4ea..d2a052480908 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -898,7 +898,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
 }
 
 static void
-gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
+gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
 {
 	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	unsigned short expected_family;
@@ -921,6 +921,9 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 	if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family)
 		goto out_err;
 
+	if (cb->cb_addr.ss_family == AF_INET6)
+		((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid;
+
 	cb->cb_minorversion = 0;
 	cb->cb_prog = se->se_callback_prog;
 	cb->cb_ident = se->se_callback_ident;
@@ -1621,7 +1624,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	}
 	copy_cred(&new->cl_cred, &rqstp->rq_cred);
 	gen_confirm(new);
-	gen_callback(new, setclid);
+	gen_callback(new, setclid, rpc_get_scope_id(sa));
 	add_to_unconfirmed(new, strhashval);
 	setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
 	setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 044f531aee70..3d025588e56e 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -286,5 +286,20 @@ static inline bool rpc_copy_addr(struct sockaddr *dst,
 	return false;
 }
 
+/**
+ * rpc_get_scope_id - return scopeid for a given sockaddr
+ * @sa: sockaddr to get scopeid from
+ *
+ * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if
+ * not an AF_INET6 address.
+ */
+static inline u32 rpc_get_scope_id(const struct sockaddr *sa)
+{
+	if (sa->sa_family != AF_INET6)
+		return 0;
+
+	return ((struct sockaddr_in6 *) sa)->sin6_scope_id;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
-- 
cgit v1.2.3


From da15cfdae03351c689736f8d142618592e3cebc3 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Wed, 19 Aug 2009 19:13:34 -0700
Subject: time: Introduce CLOCK_REALTIME_COARSE

After talking with some application writers who want very fast, but not
fine-grained timestamps, I decided to try to implement new clock_ids
to clock_gettime(): CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE
which returns the time at the last tick. This is very fast as we don't
have to access any hardware (which can be very painful if you're using
something like the acpi_pm clocksource), and we can even use the vdso
clock_gettime() method to avoid the syscall. The only trade off is you
only get low-res tick grained time resolution.

This isn't a new idea, I know Ingo has a patch in the -rt tree that made
the vsyscall gettimeofday() return coarse grained time when the
vsyscall64 sysctrl was set to 2. However this affects all applications
on a system.

With this method, applications can choose the proper speed/granularity
trade-off for themselves.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: nikolag@ca.ibm.com
Cc: Darren Hart <dvhltc@us.ibm.com>
Cc: arjan@infradead.org
Cc: jonathan@jonmasters.org
LKML-Reference: <1250734414.6897.5.camel@localhost.localdomain>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/vgtod.h   |  1 +
 arch/x86/kernel/vsyscall_64.c  |  1 +
 arch/x86/vdso/vclock_gettime.c | 39 ++++++++++++++++++++++++++++++++++++---
 include/linux/time.h           |  4 ++++
 kernel/posix-timers.c          | 35 +++++++++++++++++++++++++++++++++++
 kernel/time/timekeeping.c      | 21 +++++++++++++++++++++
 6 files changed, 98 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index dc27a69e5d2a..3d61e204826f 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -21,6 +21,7 @@ struct vsyscall_gtod_data {
 		u32	shift;
 	} clock;
 	struct timespec wall_to_monotonic;
+	struct timespec wall_time_coarse;
 };
 extern struct vsyscall_gtod_data __vsyscall_gtod_data
 __section_vsyscall_gtod_data;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 25ee06a80aad..cf53a78e2dcf 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
 	vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
 	vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
 	vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
+	vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
 }
 
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 6a40b78b46aa..ee55754cc3c5 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -86,14 +86,47 @@ notrace static noinline int do_monotonic(struct timespec *ts)
 	return 0;
 }
 
+notrace static noinline int do_realtime_coarse(struct timespec *ts)
+{
+	unsigned long seq;
+	do {
+		seq = read_seqbegin(&gtod->lock);
+		ts->tv_sec = gtod->wall_time_coarse.tv_sec;
+		ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
+	} while (unlikely(read_seqretry(&gtod->lock, seq)));
+	return 0;
+}
+
+notrace static noinline int do_monotonic_coarse(struct timespec *ts)
+{
+	unsigned long seq, ns, secs;
+	do {
+		seq = read_seqbegin(&gtod->lock);
+		secs = gtod->wall_time_coarse.tv_sec;
+		ns = gtod->wall_time_coarse.tv_nsec;
+		secs += gtod->wall_to_monotonic.tv_sec;
+		ns += gtod->wall_to_monotonic.tv_nsec;
+	} while (unlikely(read_seqretry(&gtod->lock, seq)));
+	vset_normalized_timespec(ts, secs, ns);
+	return 0;
+}
+
 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 {
-	if (likely(gtod->sysctl_enabled && gtod->clock.vread))
+	if (likely(gtod->sysctl_enabled))
 		switch (clock) {
 		case CLOCK_REALTIME:
-			return do_realtime(ts);
+			if (likely(gtod->clock.vread))
+				return do_realtime(ts);
+			break;
 		case CLOCK_MONOTONIC:
-			return do_monotonic(ts);
+			if (likely(gtod->clock.vread))
+				return do_monotonic(ts);
+			break;
+		case CLOCK_REALTIME_COARSE:
+			return do_realtime_coarse(ts);
+		case CLOCK_MONOTONIC_COARSE:
+			return do_monotonic_coarse(ts);
 		}
 	return vdso_fallback_gettime(clock, ts);
 }
diff --git a/include/linux/time.h b/include/linux/time.h
index f505988398e6..256232f7e5e6 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -110,6 +110,8 @@ extern int timekeeping_suspended;
 
 unsigned long get_seconds(void);
 struct timespec current_kernel_time(void);
+struct timespec __current_kernel_time(void); /* does not hold xtime_lock */
+struct timespec get_monotonic_coarse(void);
 
 #define CURRENT_TIME		(current_kernel_time())
 #define CURRENT_TIME_SEC	((struct timespec) { get_seconds(), 0 })
@@ -243,6 +245,8 @@ struct itimerval {
 #define CLOCK_PROCESS_CPUTIME_ID	2
 #define CLOCK_THREAD_CPUTIME_ID		3
 #define CLOCK_MONOTONIC_RAW		4
+#define CLOCK_REALTIME_COARSE		5
+#define CLOCK_MONOTONIC_COARSE		6
 
 /*
  * The IDs of various hardware clocks:
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index d089d052c4a9..495440779ce3 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -242,6 +242,25 @@ static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
 	return 0;
 }
 
+
+static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp)
+{
+	*tp = current_kernel_time();
+	return 0;
+}
+
+static int posix_get_monotonic_coarse(clockid_t which_clock,
+						struct timespec *tp)
+{
+	*tp = get_monotonic_coarse();
+	return 0;
+}
+
+int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
+{
+	*tp = ktime_to_timespec(KTIME_LOW_RES);
+	return 0;
+}
 /*
  * Initialize everything, well, just everything in Posix clocks/timers ;)
  */
@@ -262,10 +281,26 @@ static __init int init_posix_timers(void)
 		.timer_create = no_timer_create,
 		.nsleep = no_nsleep,
 	};
+	struct k_clock clock_realtime_coarse = {
+		.clock_getres = posix_get_coarse_res,
+		.clock_get = posix_get_realtime_coarse,
+		.clock_set = do_posix_clock_nosettime,
+		.timer_create = no_timer_create,
+		.nsleep = no_nsleep,
+	};
+	struct k_clock clock_monotonic_coarse = {
+		.clock_getres = posix_get_coarse_res,
+		.clock_get = posix_get_monotonic_coarse,
+		.clock_set = do_posix_clock_nosettime,
+		.timer_create = no_timer_create,
+		.nsleep = no_nsleep,
+	};
 
 	register_posix_clock(CLOCK_REALTIME, &clock_realtime);
 	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
 	register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
+	register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
+	register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
 
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
 					sizeof (struct k_itimer), 0, SLAB_PANIC,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 15e06defca55..03cbeb34d141 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -847,6 +847,10 @@ unsigned long get_seconds(void)
 }
 EXPORT_SYMBOL(get_seconds);
 
+struct timespec __current_kernel_time(void)
+{
+	return xtime_cache;
+}
 
 struct timespec current_kernel_time(void)
 {
@@ -862,3 +866,20 @@ struct timespec current_kernel_time(void)
 	return now;
 }
 EXPORT_SYMBOL(current_kernel_time);
+
+struct timespec get_monotonic_coarse(void)
+{
+	struct timespec now, mono;
+	unsigned long seq;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+
+		now = xtime_cache;
+		mono = wall_to_monotonic;
+	} while (read_seqretry(&xtime_lock, seq));
+
+	set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
+				now.tv_nsec + mono.tv_nsec);
+	return now;
+}
-- 
cgit v1.2.3


From 05ecd5a1f76c183cca381705b3adb7d77c9a0439 Mon Sep 17 00:00:00 2001
From: Pawel Moll <pawel.moll@st.com>
Date: Mon, 24 Aug 2009 19:52:38 +0900
Subject: sh: Simplify "multi-evt" interrupt handling.

This patch changes the way in which "multi-evt" interrups are handled.
The intc_evt2irq_table and related intc_evt2irq() have been removed and
the "redirecting" handler is installed for the coupled interrupts.

Thanks to that the do_IRQ() function don't have to use another level
of indirection for all the interrupts...

Signed-off-by: Pawel Moll <pawel.moll@st.com>
Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/irq.c    |  2 +-
 drivers/sh/intc.c       | 54 ++++++++++++++++---------------------------------
 include/linux/sh_intc.h |  1 -
 3 files changed, 18 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 278c68c60488..d1053392e287 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -114,7 +114,7 @@ asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs)
 #endif
 
 	irq_enter();
-	irq = irq_demux(intc_evt2irq(irq));
+	irq = irq_demux(evt2irq(irq));
 
 #ifdef CONFIG_IRQSTACKS
 	curctx = (union irq_ctx *)current_thread_info();
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 4b1ca9d28353..a9174ec72853 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -663,16 +663,9 @@ static unsigned int __init save_reg(struct intc_desc_int *d,
 	return 0;
 }
 
-static unsigned char *intc_evt2irq_table;
-
-unsigned int intc_evt2irq(unsigned int vector)
+static void intc_redirect_irq(unsigned int irq, struct irq_desc *desc)
 {
-	unsigned int irq = evt2irq(vector);
-
-	if (intc_evt2irq_table && intc_evt2irq_table[irq])
-		irq = intc_evt2irq_table[irq];
-
-	return irq;
+	generic_handle_irq((unsigned int)get_irq_data(irq));
 }
 
 void __init register_intc_controller(struct intc_desc *desc)
@@ -745,34 +738,6 @@ void __init register_intc_controller(struct intc_desc *desc)
 
 	BUG_ON(k > 256); /* _INTC_ADDR_E() and _INTC_ADDR_D() are 8 bits */
 
-	/* keep the first vector only if same enum is used multiple times */
-	for (i = 0; i < desc->nr_vectors; i++) {
-		struct intc_vect *vect = desc->vectors + i;
-		int first_irq = evt2irq(vect->vect);
-
-		if (!vect->enum_id)
-			continue;
-
-		for (k = i + 1; k < desc->nr_vectors; k++) {
-			struct intc_vect *vect2 = desc->vectors + k;
-
-			if (vect->enum_id != vect2->enum_id)
-				continue;
-
-			vect2->enum_id = 0;
-
-			if (!intc_evt2irq_table)
-				intc_evt2irq_table = kzalloc(NR_IRQS, GFP_NOWAIT);
-
-			if (!intc_evt2irq_table) {
-				pr_warning("intc: cannot allocate evt2irq!\n");
-				continue;
-			}
-
-			intc_evt2irq_table[evt2irq(vect2->vect)] = first_irq;
-		}
-	}
-
 	/* register the vectors one by one */
 	for (i = 0; i < desc->nr_vectors; i++) {
 		struct intc_vect *vect = desc->vectors + i;
@@ -789,6 +754,21 @@ void __init register_intc_controller(struct intc_desc *desc)
 		}
 
 		intc_register_irq(desc, d, vect->enum_id, irq);
+
+		for (k = i + 1; k < desc->nr_vectors; k++) {
+			struct intc_vect *vect2 = desc->vectors + k;
+			unsigned int irq2 = evt2irq(vect2->vect);
+
+			if (vect->enum_id != vect2->enum_id)
+				continue;
+
+			vect2->enum_id = 0;
+
+			/* redirect this interrupts to the first one */
+			set_irq_chip_and_handler_name(irq2, &d->chip,
+					intc_redirect_irq, "redirect");
+			set_irq_data(irq2, (void *)irq);
+		}
 	}
 }
 
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index eb1423a0078d..68e212ff9dde 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -85,7 +85,6 @@ struct intc_desc symbol __initdata = {					\
 }
 #endif
 
-unsigned int intc_evt2irq(unsigned int vector);
 void __init register_intc_controller(struct intc_desc *desc);
 int intc_set_priority(unsigned int irq, unsigned int prio);
 
-- 
cgit v1.2.3


From 0396c215f301e92677d1e9a064b405e31501dc1d Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Tue, 25 Aug 2009 16:41:06 +0100
Subject: uwb: avoid radio controller reset loops

If a radio controller reset attempt occurs while a probe() or remove()
is in progress it fails and is retried endlessly, potentially preventing
the probe() or remove() from completing.

If a reset fails, sleep for a bit before retrying the reset.  This
allows the probe()/remove() to complete.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/uwb/hwa-rc.c  |  3 +--
 drivers/uwb/reset.c   | 21 +++++++++++----------
 drivers/uwb/umc-bus.c |  2 +-
 drivers/uwb/whc-rc.c  |  3 +--
 include/linux/uwb.h   |  2 +-
 5 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c
index 9052bcb4f528..e7eeb63fab23 100644
--- a/drivers/uwb/hwa-rc.c
+++ b/drivers/uwb/hwa-rc.c
@@ -887,8 +887,7 @@ static int hwarc_post_reset(struct usb_interface *iface)
 	struct hwarc *hwarc = usb_get_intfdata(iface);
 	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
 
-	uwb_rc_post_reset(uwb_rc);
-	return 0;
+	return uwb_rc_post_reset(uwb_rc);
 }
 
 /** USB device ID's that we handle */
diff --git a/drivers/uwb/reset.c b/drivers/uwb/reset.c
index 70f8050221ff..7f0512e43d9d 100644
--- a/drivers/uwb/reset.c
+++ b/drivers/uwb/reset.c
@@ -30,6 +30,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/err.h>
+#include <linux/delay.h>
 
 #include "uwb-internal.h"
 
@@ -323,13 +324,15 @@ int uwbd_msg_handle_reset(struct uwb_event *evt)
 
 	dev_info(&rc->uwb_dev.dev, "resetting radio controller\n");
 	ret = rc->reset(rc);
-	if (ret) {
+	if (ret < 0) {
 		dev_err(&rc->uwb_dev.dev, "failed to reset hardware: %d\n", ret);
 		goto error;
 	}
 	return 0;
 error:
-	/* Nothing can be done except try the reset again. */
+	/* Nothing can be done except try the reset again. Wait a bit
+	   to avoid reset loops during probe() or remove(). */
+	msleep(1000);
 	uwb_rc_reset_all(rc);
 	return ret;
 }
@@ -368,22 +371,20 @@ void uwb_rc_pre_reset(struct uwb_rc *rc)
 }
 EXPORT_SYMBOL_GPL(uwb_rc_pre_reset);
 
-void uwb_rc_post_reset(struct uwb_rc *rc)
+int uwb_rc_post_reset(struct uwb_rc *rc)
 {
 	int ret;
 
 	ret = rc->start(rc);
 	if (ret)
-		goto error;
+		goto out;
 	ret = uwb_rc_mac_addr_set(rc, &rc->uwb_dev.mac_addr);
 	if (ret)
-		goto error;
+		goto out;
 	ret = uwb_rc_dev_addr_set(rc, &rc->uwb_dev.dev_addr);
 	if (ret)
-		goto error;
-	return;
-error:
-	/* Nothing can be done except try the reset again. */
-	uwb_rc_reset_all(rc);
+		goto out;
+out:
+	return ret;
 }
 EXPORT_SYMBOL_GPL(uwb_rc_post_reset);
diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c
index 5ad36164c13b..cdd6c8efc9f8 100644
--- a/drivers/uwb/umc-bus.c
+++ b/drivers/uwb/umc-bus.c
@@ -66,7 +66,7 @@ int umc_controller_reset(struct umc_dev *umc)
 		return -EAGAIN;
 	ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper);
 	if (ret >= 0)
-		device_for_each_child(parent, parent, umc_bus_post_reset_helper);
+		ret = device_for_each_child(parent, parent, umc_bus_post_reset_helper);
 	up(&parent->sem);
 
 	return ret;
diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c
index 19a1dd129212..1d9a6f54658e 100644
--- a/drivers/uwb/whc-rc.c
+++ b/drivers/uwb/whc-rc.c
@@ -443,8 +443,7 @@ static int whcrc_post_reset(struct umc_dev *umc)
 	struct whcrc *whcrc = umc_get_drvdata(umc);
 	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
 
-	uwb_rc_post_reset(uwb_rc);
-	return 0;
+	return uwb_rc_post_reset(uwb_rc);
 }
 
 /* PCI device ID's that we handle [so it gets loaded] */
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index c02128991ff7..7fc9746f22cd 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -597,7 +597,7 @@ void uwb_rc_neh_grok(struct uwb_rc *, void *, size_t);
 void uwb_rc_neh_error(struct uwb_rc *, int);
 void uwb_rc_reset_all(struct uwb_rc *rc);
 void uwb_rc_pre_reset(struct uwb_rc *rc);
-void uwb_rc_post_reset(struct uwb_rc *rc);
+int uwb_rc_post_reset(struct uwb_rc *rc);
 
 /**
  * uwb_rsv_is_owner - is the owner of this reservation the RC?
-- 
cgit v1.2.3


From 9e36fda0b359d2a6ae039c3d7e71a04502a77898 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Fri, 10 Jul 2009 09:57:35 -0700
Subject: x86, pat: Add PAT reserve free to io_mapping* APIs

io_mapping_* interfaces were added, mainly for graphics drivers.
Make this interface go through the PAT reserve/free, instead of
hardcoding WC mapping. This makes sure that there are no
aliases due to unconditional WC setting.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/iomap.h |  9 ++++++---
 arch/x86/mm/iomap_32.c       | 27 +++++++++++++++++++++++++--
 include/linux/io-mapping.h   | 17 ++++++++++++-----
 3 files changed, 43 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index 0e9fe1d9d971..f35eb45d6576 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -26,13 +26,16 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
-int
-is_io_mapping_possible(resource_size_t base, unsigned long size);
-
 void *
 iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
 
 void
 iounmap_atomic(void *kvaddr, enum km_type type);
 
+int
+iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
+
+void
+iomap_free(resource_size_t base, unsigned long size);
+
 #endif /* _ASM_X86_IOMAP_H */
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index fe6f84ca121e..84e236ce76ba 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -21,7 +21,7 @@
 #include <linux/module.h>
 #include <linux/highmem.h>
 
-int is_io_mapping_possible(resource_size_t base, unsigned long size)
+static int is_io_mapping_possible(resource_size_t base, unsigned long size)
 {
 #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
 	/* There is no way to map greater than 1 << 32 address without PAE */
@@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size)
 #endif
 	return 1;
 }
-EXPORT_SYMBOL_GPL(is_io_mapping_possible);
+
+int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
+{
+	unsigned long flag = _PAGE_CACHE_WC;
+	int ret;
+
+	if (!is_io_mapping_possible(base, size))
+		return -EINVAL;
+
+	ret = io_reserve_memtype(base, base + size, &flag);
+	if (ret)
+		return ret;
+
+	*prot = __pgprot(__PAGE_KERNEL | flag);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_create_wc);
+
+void
+iomap_free(resource_size_t base, unsigned long size)
+{
+	io_free_memtype(base, base + size);
+}
+EXPORT_SYMBOL_GPL(iomap_free);
 
 void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
 {
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 0adb0f91568c..97eb928b4924 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -49,23 +49,30 @@ static inline struct io_mapping *
 io_mapping_create_wc(resource_size_t base, unsigned long size)
 {
 	struct io_mapping *iomap;
-
-	if (!is_io_mapping_possible(base, size))
-		return NULL;
+	pgprot_t prot;
 
 	iomap = kmalloc(sizeof(*iomap), GFP_KERNEL);
 	if (!iomap)
-		return NULL;
+		goto out_err;
+
+	if (iomap_create_wc(base, size, &prot))
+		goto out_free;
 
 	iomap->base = base;
 	iomap->size = size;
-	iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL));
+	iomap->prot = prot;
 	return iomap;
+
+out_free:
+	kfree(iomap);
+out_err:
+	return NULL;
 }
 
 static inline void
 io_mapping_free(struct io_mapping *mapping)
 {
+	iomap_free(mapping->base, mapping->size);
 	kfree(mapping);
 }
 
-- 
cgit v1.2.3


From 46cf98cdaef5471926010b5bddf84c44ec177fdd Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Fri, 10 Jul 2009 09:57:37 -0700
Subject: x86, pat: Generalize the use of page flag PG_uncached

Only IA64 was using PG_uncached as of now. We now intend to use this bit
in x86 as well, to keep track of memory type of those addresses that
have page struct for them. So, generalize the use of that bit across
ia64 and x86.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/ia64/Kconfig          | 4 ++++
 arch/x86/Kconfig           | 4 ++++
 include/linux/page-flags.h | 4 ++--
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 170042b420d4..e6246119932a 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -112,6 +112,10 @@ config IA64_UNCACHED_ALLOCATOR
 	bool
 	select GENERIC_ALLOCATOR
 
+config ARCH_USES_PG_UNCACHED
+	def_bool y
+	depends on IA64_UNCACHED_ALLOCATOR
+
 config AUDIT_ARCH
 	bool
 	default y
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c07f72205909..8e1595382196 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1414,6 +1414,10 @@ config X86_PAT
 
 	  If unsure, say Y.
 
+config ARCH_USES_PG_UNCACHED
+	def_bool y
+	depends on X86_PAT
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e2e5ce543595..2b87acfc5f87 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -99,7 +99,7 @@ enum pageflags {
 #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
 	PG_mlocked,		/* Page is vma mlocked */
 #endif
-#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
+#ifdef CONFIG_ARCH_USES_PG_UNCACHED
 	PG_uncached,		/* Page has been mapped as uncached */
 #endif
 	__NR_PAGEFLAGS,
@@ -257,7 +257,7 @@ PAGEFLAG_FALSE(Mlocked)
 	SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked)
 #endif
 
-#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
+#ifdef CONFIG_ARCH_USES_PG_UNCACHED
 PAGEFLAG(Uncached, uncached)
 #else
 PAGEFLAG_FALSE(Uncached)
-- 
cgit v1.2.3


From c9c97b8c75019814d8c007059bc827bb475be917 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 27 Aug 2009 09:53:47 +1000
Subject: drm/ttm: consolidate cache flushing code in one place.

This merges the TTM and drm cache flushing into one file in the
drm core.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_cache.c  | 51 +++++++++++++++++++++++++++------
 drivers/gpu/drm/ttm/ttm_tt.c | 67 ++------------------------------------------
 include/drm/drm_cache.h      | 38 +++++++++++++++++++++++++
 3 files changed, 82 insertions(+), 74 deletions(-)
 create mode 100644 include/drm/drm_cache.h

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 0e994a0e46d4..3a5575e638db 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -45,25 +45,58 @@ drm_clflush_page(struct page *page)
 		clflush(page_virtual + i);
 	kunmap_atomic(page_virtual, KM_USER0);
 }
-#endif
 
+static void drm_cache_flush_clflush(struct page *pages[],
+				    unsigned long num_pages)
+{
+	unsigned long i;
+
+	mb();
+	for (i = 0; i < num_pages; i++)
+		drm_clflush_page(*pages++);
+	mb();
+}
+
+static void
+drm_clflush_ipi_handler(void *null)
+{
+	wbinvd();
+}
+#elif !defined(__powerpc__)
+static void drm_cache_ipi_handler(void *dummy)
+{
+}
+#endif
 void
 drm_clflush_pages(struct page *pages[], unsigned long num_pages)
 {
 
 #if defined(CONFIG_X86)
 	if (cpu_has_clflush) {
-		unsigned long i;
-
-		mb();
-		for (i = 0; i < num_pages; ++i)
-			drm_clflush_page(*pages++);
-		mb();
-
+		drm_cache_flush_clflush(pages, num_pages);
 		return;
 	}
 
-	wbinvd();
+	if (on_each_cpu(drm_clflush_ipi_handler, NULL, 1) != 0)
+		printk(KERN_ERR "Timed out waiting for cache flush.\n");
+
+#elif defined(__powerpc__)
+	unsigned long i;
+	for (i = 0; i < num_pages; i++) {
+		struct page *page = pages[i];
+		void *page_virtual;
+
+		if (unlikely(page == NULL))
+			continue;
+
+		page_virtual = kmap_atomic(page, KM_USER0);
+		flush_dcache_range((unsigned long)page_virtual,
+				   (unsigned long)page_virtual + PAGE_SIZE);
+		kunmap_atomic(page_virtual, KM_USER0);
+	}
+#else
+	if (on_each_cpu(drm_clflush_ipi_handler, NULL, 1) != 0)
+		printk(KERN_ERR "Timed out waiting for drm cache flush\n");
 #endif
 }
 EXPORT_SYMBOL(drm_clflush_pages);
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 42cca5519761..a55ee1a56c16 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -34,76 +34,13 @@
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/swap.h>
+#include "drm_cache.h"
 #include "ttm/ttm_module.h"
 #include "ttm/ttm_bo_driver.h"
 #include "ttm/ttm_placement.h"
 
 static int ttm_tt_swapin(struct ttm_tt *ttm);
 
-#if defined(CONFIG_X86)
-static void ttm_tt_clflush_page(struct page *page)
-{
-	uint8_t *page_virtual;
-	unsigned int i;
-
-	if (unlikely(page == NULL))
-		return;
-
-	page_virtual = kmap_atomic(page, KM_USER0);
-
-	for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
-		clflush(page_virtual + i);
-
-	kunmap_atomic(page_virtual, KM_USER0);
-}
-
-static void ttm_tt_cache_flush_clflush(struct page *pages[],
-				       unsigned long num_pages)
-{
-	unsigned long i;
-
-	mb();
-	for (i = 0; i < num_pages; ++i)
-		ttm_tt_clflush_page(*pages++);
-	mb();
-}
-#elif !defined(__powerpc__)
-static void ttm_tt_ipi_handler(void *null)
-{
-	;
-}
-#endif
-
-void ttm_tt_cache_flush(struct page *pages[], unsigned long num_pages)
-{
-
-#if defined(CONFIG_X86)
-	if (cpu_has_clflush) {
-		ttm_tt_cache_flush_clflush(pages, num_pages);
-		return;
-	}
-#elif defined(__powerpc__)
-	unsigned long i;
-
-	for (i = 0; i < num_pages; ++i) {
-		struct page *page = pages[i];
-		void *page_virtual;
-
-		if (unlikely(page == NULL))
-			continue;
-
-		page_virtual = kmap_atomic(page, KM_USER0);
-		flush_dcache_range((unsigned long) page_virtual,
-				   (unsigned long) page_virtual + PAGE_SIZE);
-		kunmap_atomic(page_virtual, KM_USER0);
-	}
-#else
-	if (on_each_cpu(ttm_tt_ipi_handler, NULL, 1) != 0)
-		printk(KERN_ERR TTM_PFX
-		       "Timed out waiting for drm cache flush.\n");
-#endif
-}
-
 /**
  * Allocates storage for pointers to the pages that back the ttm.
  *
@@ -302,7 +239,7 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm,
 	}
 
 	if (ttm->caching_state == tt_cached)
-		ttm_tt_cache_flush(ttm->pages, ttm->num_pages);
+		drm_clflush_pages(ttm->pages, ttm->num_pages);
 
 	for (i = 0; i < ttm->num_pages; ++i) {
 		cur_page = ttm->pages[i];
diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
new file mode 100644
index 000000000000..7bfb063029d8
--- /dev/null
+++ b/include/drm/drm_cache.h
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Red Hat Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ **************************************************************************/
+/*
+ * Authors:
+ * Dave Airlie <airlied@redhat.com>
+ */
+
+#ifndef _DRM_CACHE_H_
+#define _DRM_CACHE_H_
+
+void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
+
+#endif
-- 
cgit v1.2.3


From a1a2d1d32250f6fcc317419e9dfb4a5a6946d2e6 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Sun, 23 Aug 2009 12:40:55 +0300
Subject: drm: GEM handles are u32, not int

Several functions in the GEM kernel API used int as handle type, but
user API has it __u32 which is also the intended type.

Replace int with u32.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_gem.c       | 11 +++++------
 drivers/gpu/drm/i915/i915_gem.c |  3 ++-
 include/drm/drmP.h              |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index ffe8f4394d50..230c9ffdd5e9 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -164,7 +164,7 @@ EXPORT_SYMBOL(drm_gem_object_alloc);
  * Removes the mapping from handle to filp for this object.
  */
 static int
-drm_gem_handle_delete(struct drm_file *filp, int handle)
+drm_gem_handle_delete(struct drm_file *filp, u32 handle)
 {
 	struct drm_device *dev;
 	struct drm_gem_object *obj;
@@ -207,7 +207,7 @@ drm_gem_handle_delete(struct drm_file *filp, int handle)
 int
 drm_gem_handle_create(struct drm_file *file_priv,
 		       struct drm_gem_object *obj,
-		       int *handlep)
+		       u32 *handlep)
 {
 	int	ret;
 
@@ -221,7 +221,7 @@ again:
 
 	/* do the allocation under our spinlock */
 	spin_lock(&file_priv->table_lock);
-	ret = idr_get_new_above(&file_priv->object_idr, obj, 1, handlep);
+	ret = idr_get_new_above(&file_priv->object_idr, obj, 1, (int *)handlep);
 	spin_unlock(&file_priv->table_lock);
 	if (ret == -EAGAIN)
 		goto again;
@@ -237,7 +237,7 @@ EXPORT_SYMBOL(drm_gem_handle_create);
 /** Returns a reference to the object named by the handle. */
 struct drm_gem_object *
 drm_gem_object_lookup(struct drm_device *dev, struct drm_file *filp,
-		      int handle)
+		      u32 handle)
 {
 	struct drm_gem_object *obj;
 
@@ -344,7 +344,7 @@ drm_gem_open_ioctl(struct drm_device *dev, void *data,
 	struct drm_gem_open *args = data;
 	struct drm_gem_object *obj;
 	int ret;
-	int handle;
+	u32 handle;
 
 	if (!(dev->driver->driver_features & DRIVER_GEM))
 		return -ENODEV;
@@ -539,7 +539,6 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
 	vma->vm_ops = obj->dev->driver->gem_vm_ops;
 	vma->vm_private_data = map->handle;
-	/* FIXME: use pgprot_writecombine when available */
 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 
 	/* Take a ref for this mapping of the object, so that the fault
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 140bee142fc2..0e6c9cca897c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -111,7 +111,8 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_create *args = data;
 	struct drm_gem_object *obj;
-	int handle, ret;
+	int ret;
+	u32 handle;
 
 	args->size = roundup(args->size, PAGE_SIZE);
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index e0f1c1fee58b..eeefb6369e19 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1441,7 +1441,7 @@ drm_gem_object_unreference(struct drm_gem_object *obj)
 
 int drm_gem_handle_create(struct drm_file *file_priv,
 			  struct drm_gem_object *obj,
-			  int *handlep);
+			  u32 *handlep);
 
 static inline void
 drm_gem_object_handle_reference(struct drm_gem_object *obj)
@@ -1467,7 +1467,7 @@ drm_gem_object_handle_unreference(struct drm_gem_object *obj)
 
 struct drm_gem_object *drm_gem_object_lookup(struct drm_device *dev,
 					     struct drm_file *filp,
-					     int handle);
+					     u32 handle);
 int drm_gem_close_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int drm_gem_flink_ioctl(struct drm_device *dev, void *data,
-- 
cgit v1.2.3


From f8d80cdf40fe4d2393159012b38ce9f85a488686 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Tue, 2 Jun 2009 13:28:13 +0800
Subject: ACPICA: Remove duplicate extern declarations for public globals

Some were defined twice, causes a warning with gcc
-Wredundant-decls.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/acglobal.h | 33 ++++++++++++++++++---------------
 include/acpi/acpixf.h          |  1 +
 2 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 3d87362d17ed..0b73b31c1b53 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -58,6 +58,10 @@
 #define ACPI_INIT_GLOBAL(a,b) a
 #endif
 
+#ifdef DEFINE_ACPI_GLOBALS
+
+/* Public globals, available from outside ACPICA subsystem */
+
 /*****************************************************************************
  *
  * Runtime configuration (static defaults that can be overriden at runtime)
@@ -78,7 +82,7 @@
  * 5) Allow unresolved references (invalid target name) in package objects
  * 6) Enable warning messages for behavior that is not ACPI spec compliant
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_enable_interpreter_slack, FALSE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_enable_interpreter_slack, FALSE);
 
 /*
  * Automatically serialize ALL control methods? Default is FALSE, meaning
@@ -86,27 +90,36 @@ ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_enable_interpreter_slack, FALSE);
  * Only change this if the ASL code is poorly written and cannot handle
  * reentrancy even though methods are marked "NotSerialized".
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_all_methods_serialized, FALSE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_all_methods_serialized, FALSE);
 
 /*
  * Create the predefined _OSI method in the namespace? Default is TRUE
  * because ACPI CA is fully compatible with other ACPI implementations.
  * Changing this will revert ACPI CA (and machine ASL) to pre-OSI behavior.
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_create_osi_method, TRUE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_create_osi_method, TRUE);
 
 /*
  * Disable wakeup GPEs during runtime? Default is TRUE because WAKE and
  * RUNTIME GPEs should never be shared, and WAKE GPEs should typically only
  * be enabled just before going to sleep.
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_leave_wake_gpes_disabled, TRUE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_leave_wake_gpes_disabled, TRUE);
 
 /*
  * Optionally use default values for the ACPI register widths. Set this to
  * TRUE to use the defaults, if an FADT contains incorrect widths/lengths.
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE);
+
+/* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */
+
+struct acpi_table_fadt acpi_gbl_FADT;
+u32 acpi_current_gpe_count;
+u32 acpi_gbl_trace_flags;
+acpi_name acpi_gbl_trace_method_name;
+
+#endif
 
 /*****************************************************************************
  *
@@ -114,11 +127,6 @@ ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE);
  *
  ****************************************************************************/
 
-/* Runtime configuration of debug print levels */
-
-extern u32 acpi_dbg_level;
-extern u32 acpi_dbg_layer;
-
 /* Procedure nesting level for debug output */
 
 extern u32 acpi_gbl_nesting_level;
@@ -127,10 +135,8 @@ extern u32 acpi_gbl_nesting_level;
 
 ACPI_EXTERN u32 acpi_gbl_original_dbg_level;
 ACPI_EXTERN u32 acpi_gbl_original_dbg_layer;
-ACPI_EXTERN acpi_name acpi_gbl_trace_method_name;
 ACPI_EXTERN u32 acpi_gbl_trace_dbg_level;
 ACPI_EXTERN u32 acpi_gbl_trace_dbg_layer;
-ACPI_EXTERN u32 acpi_gbl_trace_flags;
 
 /*****************************************************************************
  *
@@ -142,10 +148,8 @@ ACPI_EXTERN u32 acpi_gbl_trace_flags;
  * acpi_gbl_root_table_list is the master list of ACPI tables found in the
  * RSDT/XSDT.
  *
- * acpi_gbl_FADT is a local copy of the FADT, converted to a common format.
  */
 ACPI_EXTERN struct acpi_internal_rsdt acpi_gbl_root_table_list;
-ACPI_EXTERN struct acpi_table_fadt acpi_gbl_FADT;
 ACPI_EXTERN struct acpi_table_facs *acpi_gbl_FACS;
 
 /* These addresses are calculated from the FADT Event Block addresses */
@@ -340,7 +344,6 @@ ACPI_EXTERN struct acpi_fixed_event_handler
 ACPI_EXTERN struct acpi_gpe_xrupt_info *acpi_gbl_gpe_xrupt_list_head;
 ACPI_EXTERN struct acpi_gpe_block_info
 *acpi_gbl_gpe_fadt_blocks[ACPI_MAX_GPE_BLOCKS];
-ACPI_EXTERN u32 acpi_current_gpe_count;
 
 /*****************************************************************************
  *
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 82ec6a3c0500..2aecaa5cc06c 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -64,6 +64,7 @@ extern u8 acpi_gbl_enable_interpreter_slack;
 extern u8 acpi_gbl_all_methods_serialized;
 extern u8 acpi_gbl_create_osi_method;
 extern u8 acpi_gbl_leave_wake_gpes_disabled;
+extern u8 acpi_gbl_use_default_register_widths;
 extern acpi_name acpi_gbl_trace_method_name;
 extern u32 acpi_gbl_trace_flags;
 
-- 
cgit v1.2.3


From c6b5774caafa4c12b6019366e2fdaaff117e95a4 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 24 Jun 2009 09:44:06 +0800
Subject: ACPICA: Add 64-bit support to acpi_read and acpi_write

Needed by drivers for new ACPi tables.  Internal versions of
these functions still use 32-bit max transfers, in order to
minimize disruption and stack use for the standard ACPI registers
(FADT-based).

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/achware.h  |   8 ++
 drivers/acpi/acpica/evgpe.c    |   8 +-
 drivers/acpi/acpica/evgpeblk.c |   4 +-
 drivers/acpi/acpica/hwgpe.c    |  34 +++----
 drivers/acpi/acpica/hwregs.c   | 206 ++++++++++++++++++++++++++++++++++++++---
 drivers/acpi/acpica/hwtimer.c  |   2 +-
 drivers/acpi/acpica/hwxface.c  | 166 +++++++++++++++++++--------------
 include/acpi/acpixf.h          |   4 +-
 8 files changed, 327 insertions(+), 105 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index 4afa3d8e0efb..36192f142fbb 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -62,6 +62,14 @@ u32 acpi_hw_get_mode(void);
 /*
  * hwregs - ACPI Register I/O
  */
+acpi_status
+acpi_hw_validate_register(struct acpi_generic_address *reg,
+			  u8 max_bit_width, u64 *address);
+
+acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg);
+
+acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg);
+
 struct acpi_bit_register_info *acpi_hw_get_bit_register_info(u32 register_id);
 
 acpi_status acpi_hw_write_pm1_control(u32 pm1a_control, u32 pm1b_control);
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index b9d8ee69ca6c..afacf4416c73 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -424,8 +424,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
 			/* Read the Status Register */
 
 			status =
-			    acpi_read(&status_reg,
-				      &gpe_register_info->status_address);
+			    acpi_hw_read(&status_reg,
+					 &gpe_register_info->status_address);
 			if (ACPI_FAILURE(status)) {
 				goto unlock_and_exit;
 			}
@@ -433,8 +433,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
 			/* Read the Enable Register */
 
 			status =
-			    acpi_read(&enable_reg,
-				      &gpe_register_info->enable_address);
+			    acpi_hw_read(&enable_reg,
+					 &gpe_register_info->enable_address);
 			if (ACPI_FAILURE(status)) {
 				goto unlock_and_exit;
 			}
diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c
index 7b3463639422..a60aaa7635f3 100644
--- a/drivers/acpi/acpica/evgpeblk.c
+++ b/drivers/acpi/acpica/evgpeblk.c
@@ -843,14 +843,14 @@ acpi_ev_create_gpe_info_blocks(struct acpi_gpe_block_info *gpe_block)
 
 		/* Disable all GPEs within this register */
 
-		status = acpi_write(0x00, &this_register->enable_address);
+		status = acpi_hw_write(0x00, &this_register->enable_address);
 		if (ACPI_FAILURE(status)) {
 			goto error_exit;
 		}
 
 		/* Clear any pending GPE events within this register */
 
-		status = acpi_write(0xFF, &this_register->status_address);
+		status = acpi_hw_write(0xFF, &this_register->status_address);
 		if (ACPI_FAILURE(status)) {
 			goto error_exit;
 		}
diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index d3b7e37c9eed..c28c41b3180b 100644
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@@ -82,7 +82,7 @@ acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info)
 
 	/* Get current value of the enable register that contains this GPE */
 
-	status = acpi_read(&enable_mask, &gpe_register_info->enable_address);
+	status = acpi_hw_read(&enable_mask, &gpe_register_info->enable_address);
 	if (ACPI_FAILURE(status)) {
 		return (status);
 	}
@@ -95,7 +95,7 @@ acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info)
 
 	/* Write the updated enable mask */
 
-	status = acpi_write(enable_mask, &gpe_register_info->enable_address);
+	status = acpi_hw_write(enable_mask, &gpe_register_info->enable_address);
 	return (status);
 }
 
@@ -130,8 +130,8 @@ acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info * gpe_event_info)
 
 	/* Write the entire GPE (runtime) enable register */
 
-	status = acpi_write(gpe_register_info->enable_for_run,
-			    &gpe_register_info->enable_address);
+	status = acpi_hw_write(gpe_register_info->enable_for_run,
+			       &gpe_register_info->enable_address);
 
 	return (status);
 }
@@ -163,8 +163,8 @@ acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info * gpe_event_info)
 	 * Write a one to the appropriate bit in the status register to
 	 * clear this GPE.
 	 */
-	status = acpi_write(register_bit,
-			    &gpe_event_info->register_info->status_address);
+	status = acpi_hw_write(register_bit,
+			       &gpe_event_info->register_info->status_address);
 
 	return (status);
 }
@@ -222,7 +222,7 @@ acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info,
 
 	/* GPE currently active (status bit == 1)? */
 
-	status = acpi_read(&in_byte, &gpe_register_info->status_address);
+	status = acpi_hw_read(&in_byte, &gpe_register_info->status_address);
 	if (ACPI_FAILURE(status)) {
 		goto unlock_and_exit;
 	}
@@ -266,8 +266,8 @@ acpi_hw_disable_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 		/* Disable all GPEs in this register */
 
 		status =
-		    acpi_write(0x00,
-			       &gpe_block->register_info[i].enable_address);
+		    acpi_hw_write(0x00,
+				  &gpe_block->register_info[i].enable_address);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
@@ -303,8 +303,8 @@ acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 		/* Clear status on all GPEs in this register */
 
 		status =
-		    acpi_write(0xFF,
-			       &gpe_block->register_info[i].status_address);
+		    acpi_hw_write(0xFF,
+				  &gpe_block->register_info[i].status_address);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
@@ -345,9 +345,9 @@ acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 
 		/* Enable all "runtime" GPEs in this register */
 
-		status = acpi_write(gpe_block->register_info[i].enable_for_run,
-				    &gpe_block->register_info[i].
-				    enable_address);
+		status =
+		    acpi_hw_write(gpe_block->register_info[i].enable_for_run,
+				  &gpe_block->register_info[i].enable_address);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
@@ -387,9 +387,9 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 
 		/* Enable all "wake" GPEs in this register */
 
-		status = acpi_write(gpe_block->register_info[i].enable_for_wake,
-				    &gpe_block->register_info[i].
-				    enable_address);
+		status =
+		    acpi_hw_write(gpe_block->register_info[i].enable_for_wake,
+				  &gpe_block->register_info[i].enable_address);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
index 23d5505cb1f7..15c9ed2be853 100644
--- a/drivers/acpi/acpica/hwregs.c
+++ b/drivers/acpi/acpica/hwregs.c
@@ -62,6 +62,184 @@ acpi_hw_write_multiple(u32 value,
 		       struct acpi_generic_address *register_a,
 		       struct acpi_generic_address *register_b);
 
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_validate_register
+ *
+ * PARAMETERS:  Reg                 - GAS register structure
+ *              max_bit_width       - Max bit_width supported (32 or 64)
+ *              Address             - Pointer to where the gas->address
+ *                                    is returned
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Validate the contents of a GAS register. Checks the GAS
+ *              pointer, Address, space_id, bit_width, and bit_offset.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_hw_validate_register(struct acpi_generic_address *reg,
+			  u8 max_bit_width, u64 *address)
+{
+
+	/* Must have a valid pointer to a GAS structure */
+
+	if (!reg) {
+		return (AE_BAD_PARAMETER);
+	}
+
+	/*
+	 * Copy the target address. This handles possible alignment issues.
+	 * Address must not be null. A null address also indicates an optional
+	 * ACPI register that is not supported, so no error message.
+	 */
+	ACPI_MOVE_64_TO_64(address, &reg->address);
+	if (!(*address)) {
+		return (AE_BAD_ADDRESS);
+	}
+
+	/* Validate the space_iD */
+
+	if ((reg->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) &&
+	    (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO)) {
+		ACPI_ERROR((AE_INFO,
+			    "Unsupported address space: 0x%X", reg->space_id));
+		return (AE_SUPPORT);
+	}
+
+	/* Validate the bit_width */
+
+	if ((reg->bit_width != 8) &&
+	    (reg->bit_width != 16) &&
+	    (reg->bit_width != 32) && (reg->bit_width != max_bit_width)) {
+		ACPI_ERROR((AE_INFO,
+			    "Unsupported register bit width: 0x%X",
+			    reg->bit_width));
+		return (AE_SUPPORT);
+	}
+
+	/* Validate the bit_offset. Just a warning for now. */
+
+	if (reg->bit_offset != 0) {
+		ACPI_WARNING((AE_INFO,
+			      "Unsupported register bit offset: 0x%X",
+			      reg->bit_offset));
+	}
+
+	return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_read
+ *
+ * PARAMETERS:  Value               - Where the value is returned
+ *              Reg                 - GAS register structure
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Read from either memory or IO space. This is a 32-bit max
+ *              version of acpi_read, used internally since the overhead of
+ *              64-bit values is not needed.
+ *
+ * LIMITATIONS: <These limitations also apply to acpi_hw_write>
+ *      bit_width must be exactly 8, 16, or 32.
+ *      space_iD must be system_memory or system_iO.
+ *      bit_offset and access_width are currently ignored, as there has
+ *          not been a need to implement these.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg)
+{
+	u64 address;
+	acpi_status status;
+
+	ACPI_FUNCTION_NAME(hw_read);
+
+	/* Validate contents of the GAS register */
+
+	status = acpi_hw_validate_register(reg, 32, &address);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	/* Initialize entire 32-bit return value to zero */
+
+	*value = 0;
+
+	/*
+	 * Two address spaces supported: Memory or IO. PCI_Config is
+	 * not supported here because the GAS structure is insufficient
+	 */
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_read_memory((acpi_physical_address)
+					     address, value, reg->bit_width);
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+
+		status = acpi_hw_read_port((acpi_io_address)
+					   address, value, reg->bit_width);
+	}
+
+	ACPI_DEBUG_PRINT((ACPI_DB_IO,
+			  "Read:  %8.8X width %2d from %8.8X%8.8X (%s)\n",
+			  *value, reg->bit_width, ACPI_FORMAT_UINT64(address),
+			  acpi_ut_get_region_name(reg->space_id)));
+
+	return (status);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_write
+ *
+ * PARAMETERS:  Value               - Value to be written
+ *              Reg                 - GAS register structure
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Write to either memory or IO space. This is a 32-bit max
+ *              version of acpi_write, used internally since the overhead of
+ *              64-bit values is not needed.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg)
+{
+	u64 address;
+	acpi_status status;
+
+	ACPI_FUNCTION_NAME(hw_write);
+
+	/* Validate contents of the GAS register */
+
+	status = acpi_hw_validate_register(reg, 32, &address);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	/*
+	 * Two address spaces supported: Memory or IO. PCI_Config is
+	 * not supported here because the GAS structure is insufficient
+	 */
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_write_memory((acpi_physical_address)
+					      address, value, reg->bit_width);
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+
+		status = acpi_hw_write_port((acpi_io_address)
+					    address, value, reg->bit_width);
+	}
+
+	ACPI_DEBUG_PRINT((ACPI_DB_IO,
+			  "Wrote: %8.8X width %2d   to %8.8X%8.8X (%s)\n",
+			  value, reg->bit_width, ACPI_FORMAT_UINT64(address),
+			  acpi_ut_get_region_name(reg->space_id)));
+
+	return (status);
+}
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_hw_clear_acpi_status
@@ -152,15 +330,16 @@ acpi_status acpi_hw_write_pm1_control(u32 pm1a_control, u32 pm1b_control)
 
 	ACPI_FUNCTION_TRACE(hw_write_pm1_control);
 
-	status = acpi_write(pm1a_control, &acpi_gbl_FADT.xpm1a_control_block);
+	status =
+	    acpi_hw_write(pm1a_control, &acpi_gbl_FADT.xpm1a_control_block);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
 
 	if (acpi_gbl_FADT.xpm1b_control_block.address) {
 		status =
-		    acpi_write(pm1b_control,
-			       &acpi_gbl_FADT.xpm1b_control_block);
+		    acpi_hw_write(pm1b_control,
+				  &acpi_gbl_FADT.xpm1b_control_block);
 	}
 	return_ACPI_STATUS(status);
 }
@@ -218,12 +397,13 @@ acpi_hw_register_read(u32 register_id, u32 * return_value)
 
 	case ACPI_REGISTER_PM2_CONTROL:	/* 8-bit access */
 
-		status = acpi_read(&value, &acpi_gbl_FADT.xpm2_control_block);
+		status =
+		    acpi_hw_read(&value, &acpi_gbl_FADT.xpm2_control_block);
 		break;
 
 	case ACPI_REGISTER_PM_TIMER:	/* 32-bit access */
 
-		status = acpi_read(&value, &acpi_gbl_FADT.xpm_timer_block);
+		status = acpi_hw_read(&value, &acpi_gbl_FADT.xpm_timer_block);
 		break;
 
 	case ACPI_REGISTER_SMI_COMMAND_BLOCK:	/* 8-bit access */
@@ -340,7 +520,8 @@ acpi_status acpi_hw_register_write(u32 register_id, u32 value)
 		 * as per the ACPI spec.
 		 */
 		status =
-		    acpi_read(&read_value, &acpi_gbl_FADT.xpm2_control_block);
+		    acpi_hw_read(&read_value,
+				 &acpi_gbl_FADT.xpm2_control_block);
 		if (ACPI_FAILURE(status)) {
 			goto exit;
 		}
@@ -350,12 +531,13 @@ acpi_status acpi_hw_register_write(u32 register_id, u32 value)
 		ACPI_INSERT_BITS(value, ACPI_PM2_CONTROL_PRESERVED_BITS,
 				 read_value);
 
-		status = acpi_write(value, &acpi_gbl_FADT.xpm2_control_block);
+		status =
+		    acpi_hw_write(value, &acpi_gbl_FADT.xpm2_control_block);
 		break;
 
 	case ACPI_REGISTER_PM_TIMER:	/* 32-bit access */
 
-		status = acpi_write(value, &acpi_gbl_FADT.xpm_timer_block);
+		status = acpi_hw_write(value, &acpi_gbl_FADT.xpm_timer_block);
 		break;
 
 	case ACPI_REGISTER_SMI_COMMAND_BLOCK:	/* 8-bit access */
@@ -401,7 +583,7 @@ acpi_hw_read_multiple(u32 *value,
 
 	/* The first register is always required */
 
-	status = acpi_read(&value_a, register_a);
+	status = acpi_hw_read(&value_a, register_a);
 	if (ACPI_FAILURE(status)) {
 		return (status);
 	}
@@ -409,7 +591,7 @@ acpi_hw_read_multiple(u32 *value,
 	/* Second register is optional */
 
 	if (register_b->address) {
-		status = acpi_read(&value_b, register_b);
+		status = acpi_hw_read(&value_b, register_b);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
@@ -452,7 +634,7 @@ acpi_hw_write_multiple(u32 value,
 
 	/* The first register is always required */
 
-	status = acpi_write(value, register_a);
+	status = acpi_hw_write(value, register_a);
 	if (ACPI_FAILURE(status)) {
 		return (status);
 	}
@@ -470,7 +652,7 @@ acpi_hw_write_multiple(u32 value,
 	 * and writes have no side effects"
 	 */
 	if (register_b->address) {
-		status = acpi_write(value, register_b);
+		status = acpi_hw_write(value, register_b);
 	}
 
 	return (status);
diff --git a/drivers/acpi/acpica/hwtimer.c b/drivers/acpi/acpica/hwtimer.c
index b7f522c8f023..6b282e85d039 100644
--- a/drivers/acpi/acpica/hwtimer.c
+++ b/drivers/acpi/acpica/hwtimer.c
@@ -100,7 +100,7 @@ acpi_status acpi_get_timer(u32 * ticks)
 	}
 
 	status =
-	    acpi_hw_low_level_read(32, ticks, &acpi_gbl_FADT.xpm_timer_block);
+	    acpi_hw_read(ticks, &acpi_gbl_FADT.xpm_timer_block);
 
 	return_ACPI_STATUS(status);
 }
diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
index 9829979f2bdd..4ead85f29215 100644
--- a/drivers/acpi/acpica/hwxface.c
+++ b/drivers/acpi/acpica/hwxface.c
@@ -80,7 +80,7 @@ acpi_status acpi_reset(void)
 
 	/* Write the reset value to the reset register */
 
-	status = acpi_write(acpi_gbl_FADT.reset_value, reset_reg);
+	status = acpi_hw_write(acpi_gbl_FADT.reset_value, reset_reg);
 	return_ACPI_STATUS(status);
 }
 
@@ -97,67 +97,92 @@ ACPI_EXPORT_SYMBOL(acpi_reset)
  *
  * DESCRIPTION: Read from either memory or IO space.
  *
+ * LIMITATIONS: <These limitations also apply to acpi_write>
+ *      bit_width must be exactly 8, 16, 32, or 64.
+ *      space_iD must be system_memory or system_iO.
+ *      bit_offset and access_width are currently ignored, as there has
+ *          not been a need to implement these.
+ *
  ******************************************************************************/
-acpi_status acpi_read(u32 *value, struct acpi_generic_address *reg)
+acpi_status acpi_read(u64 *return_value, struct acpi_generic_address *reg)
 {
+	u32 value;
 	u32 width;
 	u64 address;
 	acpi_status status;
 
 	ACPI_FUNCTION_NAME(acpi_read);
 
-	/*
-	 * Must have a valid pointer to a GAS structure, and a non-zero address
-	 * within.
-	 */
-	if (!reg) {
+	if (!return_value) {
 		return (AE_BAD_PARAMETER);
 	}
 
-	/* Get a local copy of the address. Handles possible alignment issues */
+	/* Validate contents of the GAS register. Allow 64-bit transfers */
 
-	ACPI_MOVE_64_TO_64(&address, &reg->address);
-	if (!address) {
-		return (AE_BAD_ADDRESS);
+	status = acpi_hw_validate_register(reg, 64, &address);
+	if (ACPI_FAILURE(status)) {
+		return (status);
 	}
 
-	/* Supported widths are 8/16/32 */
-
 	width = reg->bit_width;
-	if ((width != 8) && (width != 16) && (width != 32)) {
-		return (AE_SUPPORT);
+	if (width == 64) {
+		width = 32;	/* Break into two 32-bit transfers */
 	}
 
-	/* Initialize entire 32-bit return value to zero */
+	/* Initialize entire 64-bit return value to zero */
 
-	*value = 0;
+	*return_value = 0;
+	value = 0;
 
 	/*
 	 * Two address spaces supported: Memory or IO. PCI_Config is
 	 * not supported here because the GAS structure is insufficient
 	 */
-	switch (reg->space_id) {
-	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_read_memory((acpi_physical_address)
+					     address, &value, width);
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
+		*return_value = value;
+
+		if (reg->bit_width == 64) {
 
-		status = acpi_os_read_memory((acpi_physical_address) address,
-					     value, width);
-		break;
+			/* Read the top 32 bits */
 
-	case ACPI_ADR_SPACE_SYSTEM_IO:
+			status = acpi_os_read_memory((acpi_physical_address)
+						     (address + 4), &value, 32);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+			*return_value |= ((u64)value << 32);
+		}
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
 
-		status =
-		    acpi_hw_read_port((acpi_io_address) address, value, width);
-		break;
+		status = acpi_hw_read_port((acpi_io_address)
+					   address, &value, width);
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
+		*return_value = value;
 
-	default:
-		ACPI_ERROR((AE_INFO,
-			    "Unsupported address space: %X", reg->space_id));
-		return (AE_BAD_PARAMETER);
+		if (reg->bit_width == 64) {
+
+			/* Read the top 32 bits */
+
+			status = acpi_hw_read_port((acpi_io_address)
+						   (address + 4), &value, 32);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+			*return_value |= ((u64)value << 32);
+		}
 	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_IO,
-			  "Read:  %8.8X width %2d from %8.8X%8.8X (%s)\n",
-			  *value, width, ACPI_FORMAT_UINT64(address),
+			  "Read:  %8.8X%8.8X width %2d from %8.8X%8.8X (%s)\n",
+			  ACPI_FORMAT_UINT64(*return_value), reg->bit_width,
+			  ACPI_FORMAT_UINT64(address),
 			  acpi_ut_get_region_name(reg->space_id)));
 
 	return (status);
@@ -169,7 +194,7 @@ ACPI_EXPORT_SYMBOL(acpi_read)
  *
  * FUNCTION:    acpi_write
  *
- * PARAMETERS:  Value               - To be written
+ * PARAMETERS:  Value               - Value to be written
  *              Reg                 - GAS register structure
  *
  * RETURN:      Status
@@ -177,7 +202,7 @@ ACPI_EXPORT_SYMBOL(acpi_read)
  * DESCRIPTION: Write to either memory or IO space.
  *
  ******************************************************************************/
-acpi_status acpi_write(u32 value, struct acpi_generic_address *reg)
+acpi_status acpi_write(u64 value, struct acpi_generic_address *reg)
 {
 	u32 width;
 	u64 address;
@@ -185,54 +210,61 @@ acpi_status acpi_write(u32 value, struct acpi_generic_address *reg)
 
 	ACPI_FUNCTION_NAME(acpi_write);
 
-	/*
-	 * Must have a valid pointer to a GAS structure, and a non-zero address
-	 * within.
-	 */
-	if (!reg) {
-		return (AE_BAD_PARAMETER);
-	}
+	/* Validate contents of the GAS register. Allow 64-bit transfers */
 
-	/* Get a local copy of the address. Handles possible alignment issues */
-
-	ACPI_MOVE_64_TO_64(&address, &reg->address);
-	if (!address) {
-		return (AE_BAD_ADDRESS);
+	status = acpi_hw_validate_register(reg, 64, &address);
+	if (ACPI_FAILURE(status)) {
+		return (status);
 	}
 
-	/* Supported widths are 8/16/32 */
-
 	width = reg->bit_width;
-	if ((width != 8) && (width != 16) && (width != 32)) {
-		return (AE_SUPPORT);
+	if (width == 64) {
+		width = 32;	/* Break into two 32-bit transfers */
 	}
 
 	/*
-	 * Two address spaces supported: Memory or IO.
-	 * PCI_Config is not supported here because the GAS struct is insufficient
+	 * Two address spaces supported: Memory or IO. PCI_Config is
+	 * not supported here because the GAS structure is insufficient
 	 */
-	switch (reg->space_id) {
-	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
-
-		status = acpi_os_write_memory((acpi_physical_address) address,
-					      value, width);
-		break;
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_write_memory((acpi_physical_address)
+					      address, ACPI_LODWORD(value),
+					      width);
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
 
-	case ACPI_ADR_SPACE_SYSTEM_IO:
+		if (reg->bit_width == 64) {
+			status = acpi_os_write_memory((acpi_physical_address)
+						      (address + 4),
+						      ACPI_HIDWORD(value), 32);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+		}
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
 
-		status = acpi_hw_write_port((acpi_io_address) address, value,
+		status = acpi_hw_write_port((acpi_io_address)
+					    address, ACPI_LODWORD(value),
 					    width);
-		break;
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
 
-	default:
-		ACPI_ERROR((AE_INFO,
-			    "Unsupported address space: %X", reg->space_id));
-		return (AE_BAD_PARAMETER);
+		if (reg->bit_width == 64) {
+			status = acpi_hw_write_port((acpi_io_address)
+						    (address + 4),
+						    ACPI_HIDWORD(value), 32);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+		}
 	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_IO,
-			  "Wrote: %8.8X width %2d   to %8.8X%8.8X (%s)\n",
-			  value, width, ACPI_FORMAT_UINT64(address),
+			  "Wrote: %8.8X%8.8X width %2d   to %8.8X%8.8X (%s)\n",
+			  ACPI_FORMAT_UINT64(value), reg->bit_width,
+			  ACPI_FORMAT_UINT64(address),
 			  acpi_ut_get_region_name(reg->space_id)));
 
 	return (status);
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 2aecaa5cc06c..b450a195319a 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -360,9 +360,9 @@ acpi_status acpi_set_firmware_waking_vector(u32 physical_address);
 acpi_status acpi_set_firmware_waking_vector64(u64 physical_address);
 #endif
 
-acpi_status acpi_read(u32 *value, struct acpi_generic_address *reg);
+acpi_status acpi_read(u64 *value, struct acpi_generic_address *reg);
 
-acpi_status acpi_write(u32 value, struct acpi_generic_address *reg);
+acpi_status acpi_write(u64 value, struct acpi_generic_address *reg);
 
 acpi_status
 acpi_get_sleep_type_data(u8 sleep_state, u8 * slp_typ_a, u8 * slp_typ_b);
-- 
cgit v1.2.3


From 15b8dd53f5ffaf8e2d9095c423f713423f576c0f Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 29 Jun 2009 13:39:29 +0800
Subject: ACPICA: Major update for acpi_get_object_info external interface

Completed a major update for the acpi_get_object_info external interface.
Changes include:
 - Support for variable, unlimited length HID, UID, and CID strings
 - Support Processor objects the same as Devices (HID,UID,CID,ADR,STA, etc.)
 - Call the _SxW power methods on behalf of a device object
 - Determine if a device is a PCI root bridge
 - Change the ACPI_BUFFER parameter to ACPI_DEVICE_INFO.
These changes will require an update to all callers of this interface.
See the ACPICA Programmer Reference for details.

Also, update all invocations of acpi_get_object_info interface

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/ia64/hp/common/sba_iommu.c    |   7 +-
 drivers/acpi/acpi_memhotplug.c     |  11 +-
 drivers/acpi/acpica/Makefile       |   2 +-
 drivers/acpi/acpica/acconfig.h     |   5 +
 drivers/acpi/acpica/acglobal.h     |   3 +-
 drivers/acpi/acpica/acinterp.h     |   4 +-
 drivers/acpi/acpica/acutils.h      |  24 ++-
 drivers/acpi/acpica/evrgnini.c     |  45 +----
 drivers/acpi/acpica/exutils.c      |  53 +++--
 drivers/acpi/acpica/nsdumpdv.c     |   7 +-
 drivers/acpi/acpica/nsxfeval.c     |  23 ++-
 drivers/acpi/acpica/nsxfname.c     | 237 +++++++++++++++++------
 drivers/acpi/acpica/uteval.c       | 375 ++++--------------------------------
 drivers/acpi/acpica/utglobal.c     |  10 +-
 drivers/acpi/acpica/utids.c        | 382 +++++++++++++++++++++++++++++++++++++
 drivers/acpi/acpica/utmisc.c       |  28 +++
 drivers/acpi/container.c           |  11 +-
 drivers/acpi/dock.c                |   8 +-
 drivers/acpi/glue.c                |   6 +-
 drivers/acpi/scan.c                | 153 +++++++++------
 drivers/char/agp/hp-agp.c          |   9 +-
 drivers/ide/ide-acpi.c             |   5 +-
 drivers/pci/hotplug/acpiphp_ibm.c  |  12 +-
 drivers/platform/x86/sony-laptop.c |   7 +-
 drivers/pnp/pnpacpi/core.c         |   6 +-
 include/acpi/acpi_bus.h            |   8 +-
 include/acpi/acpixf.h              |   3 +-
 include/acpi/actypes.h             |  87 +++++----
 28 files changed, 901 insertions(+), 630 deletions(-)
 create mode 100644 drivers/acpi/acpica/utids.c

(limited to 'include')

diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 8cfb001092ab..674a8374c6d9 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -2026,24 +2026,21 @@ acpi_sba_ioc_add(struct acpi_device *device)
 	struct ioc *ioc;
 	acpi_status status;
 	u64 hpa, length;
-	struct acpi_buffer buffer;
 	struct acpi_device_info *dev_info;
 
 	status = hp_acpi_csr_space(device->handle, &hpa, &length);
 	if (ACPI_FAILURE(status))
 		return 1;
 
-	buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
-	status = acpi_get_object_info(device->handle, &buffer);
+	status = acpi_get_object_info(device->handle, &dev_info);
 	if (ACPI_FAILURE(status))
 		return 1;
-	dev_info = buffer.pointer;
 
 	/*
 	 * For HWP0001, only SBA appears in ACPI namespace.  It encloses the PCI
 	 * root bridges, and its CSR space includes the IOC function.
 	 */
-	if (strncmp("HWP0001", dev_info->hardware_id.value, 7) == 0) {
+	if (strncmp("HWP0001", dev_info->hardware_id.string, 7) == 0) {
 		hpa += ZX1_IOC_OFFSET;
 		/* zx1 based systems default to kernel page size iommu pages */
 		if (!iovp_shift)
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 9a62224cc278..80eacbe157e2 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -481,26 +481,23 @@ static acpi_status is_memory_device(acpi_handle handle)
 {
 	char *hardware_id;
 	acpi_status status;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_device_info *info;
 
-
-	status = acpi_get_object_info(handle, &buffer);
+	status = acpi_get_object_info(handle, &info);
 	if (ACPI_FAILURE(status))
 		return status;
 
-	info = buffer.pointer;
 	if (!(info->valid & ACPI_VALID_HID)) {
-		kfree(buffer.pointer);
+		kfree(info);
 		return AE_ERROR;
 	}
 
-	hardware_id = info->hardware_id.value;
+	hardware_id = info->hardware_id.string;
 	if ((hardware_id == NULL) ||
 	    (strcmp(hardware_id, ACPI_MEMORY_DEVICE_HID)))
 		status = AE_ERROR;
 
-	kfree(buffer.pointer);
+	kfree(info);
 	return status;
 }
 
diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index 72ac28da14e3..0e7d56185f6d 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile
@@ -44,4 +44,4 @@ acpi-y += tbxface.o tbinstal.o tbutils.o tbfind.o tbfadt.o tbxfroot.o
 
 acpi-y += utalloc.o utdebug.o uteval.o utinit.o utmisc.o utxface.o \
 		utcopy.o utdelete.o utglobal.o utmath.o utobject.o \
-		utstate.o utmutex.o utobject.o utresrc.o utlock.o
+		utstate.o utmutex.o utobject.o utresrc.o utlock.o utids.o
diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h
index e6777fb883d2..6c1fb2d9f4d5 100644
--- a/drivers/acpi/acpica/acconfig.h
+++ b/drivers/acpi/acpica/acconfig.h
@@ -203,6 +203,11 @@
 
 #define ACPI_SMBUS_BUFFER_SIZE          34
 
+/* _sx_d and _sx_w control methods */
+
+#define ACPI_NUM_sx_d_METHODS           4
+#define ACPI_NUM_sx_w_METHODS           5
+
 /******************************************************************************
  *
  * ACPI AML Debugger
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 0b73b31c1b53..6389f7c1de59 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -265,7 +265,8 @@ ACPI_EXTERN u8 acpi_gbl_osi_data;
 extern u8 acpi_gbl_shutdown;
 extern u32 acpi_gbl_startup_flags;
 extern const char *acpi_gbl_sleep_state_names[ACPI_S_STATE_COUNT];
-extern const char *acpi_gbl_highest_dstate_names[4];
+extern const char *acpi_gbl_lowest_dstate_names[ACPI_NUM_sx_w_METHODS];
+extern const char *acpi_gbl_highest_dstate_names[ACPI_NUM_sx_d_METHODS];
 extern const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES];
 extern const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS];
 
diff --git a/drivers/acpi/acpica/acinterp.h b/drivers/acpi/acpica/acinterp.h
index e8db7a3143a5..5db9f2916f7c 100644
--- a/drivers/acpi/acpica/acinterp.h
+++ b/drivers/acpi/acpica/acinterp.h
@@ -461,9 +461,9 @@ void acpi_ex_acquire_global_lock(u32 rule);
 
 void acpi_ex_release_global_lock(u32 rule);
 
-void acpi_ex_eisa_id_to_string(u32 numeric_id, char *out_string);
+void acpi_ex_eisa_id_to_string(char *dest, acpi_integer compressed_id);
 
-void acpi_ex_unsigned_integer_to_string(acpi_integer value, char *out_string);
+void acpi_ex_integer_to_string(char *dest, acpi_integer value);
 
 /*
  * exregion - default op_region handlers
diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index 897810ba0ccc..b0add85de308 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -324,26 +324,30 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
 acpi_status
 acpi_ut_evaluate_numeric_object(char *object_name,
 				struct acpi_namespace_node *device_node,
-				acpi_integer * address);
+				acpi_integer *value);
 
 acpi_status
-acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
-		    struct acpica_device_id *hid);
+acpi_ut_execute_STA(struct acpi_namespace_node *device_node, u32 *status_flags);
 
 acpi_status
-acpi_ut_execute_CID(struct acpi_namespace_node *device_node,
-		    struct acpi_compatible_id_list **return_cid_list);
+acpi_ut_execute_power_methods(struct acpi_namespace_node *device_node,
+			      const char **method_names,
+			      u8 method_count, u8 *out_values);
 
+/*
+ * utids - device ID support
+ */
 acpi_status
-acpi_ut_execute_STA(struct acpi_namespace_node *device_node,
-		    u32 * status_flags);
+acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id **return_id);
 
 acpi_status
 acpi_ut_execute_UID(struct acpi_namespace_node *device_node,
-		    struct acpica_device_id *uid);
+		    struct acpica_device_id **return_id);
 
 acpi_status
-acpi_ut_execute_sxds(struct acpi_namespace_node *device_node, u8 * highest);
+acpi_ut_execute_CID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id_list **return_cid_list);
 
 /*
  * utlock - reader/writer locks
@@ -445,6 +449,8 @@ acpi_ut_short_divide(acpi_integer in_dividend,
  */
 const char *acpi_ut_validate_exception(acpi_status status);
 
+u8 acpi_ut_is_pci_root_bridge(char *id);
+
 u8 acpi_ut_is_aml_table(struct acpi_table_header *table);
 
 acpi_status acpi_ut_allocate_owner_id(acpi_owner_id * owner_id);
diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index 284a7becbe96..cf29c4953028 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c
@@ -50,8 +50,6 @@
 ACPI_MODULE_NAME("evrgnini")
 
 /* Local prototypes */
-static u8 acpi_ev_match_pci_root_bridge(char *id);
-
 static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node);
 
 /*******************************************************************************
@@ -330,37 +328,6 @@ acpi_ev_pci_config_region_setup(acpi_handle handle,
 	return_ACPI_STATUS(AE_OK);
 }
 
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ev_match_pci_root_bridge
- *
- * PARAMETERS:  Id              - The HID/CID in string format
- *
- * RETURN:      TRUE if the Id is a match for a PCI/PCI-Express Root Bridge
- *
- * DESCRIPTION: Determine if the input ID is a PCI Root Bridge ID.
- *
- ******************************************************************************/
-
-static u8 acpi_ev_match_pci_root_bridge(char *id)
-{
-
-	/*
-	 * Check if this is a PCI root.
-	 * ACPI 3.0+: check for a PCI Express root also.
-	 */
-	if (!(ACPI_STRNCMP(id,
-			   PCI_ROOT_HID_STRING,
-			   sizeof(PCI_ROOT_HID_STRING))) ||
-	    !(ACPI_STRNCMP(id,
-			   PCI_EXPRESS_ROOT_HID_STRING,
-			   sizeof(PCI_EXPRESS_ROOT_HID_STRING)))) {
-		return (TRUE);
-	}
-
-	return (FALSE);
-}
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ev_is_pci_root_bridge
@@ -377,9 +344,10 @@ static u8 acpi_ev_match_pci_root_bridge(char *id)
 static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node)
 {
 	acpi_status status;
-	struct acpica_device_id hid;
-	struct acpi_compatible_id_list *cid;
+	struct acpica_device_id *hid;
+	struct acpica_device_id_list *cid;
 	u32 i;
+	u8 match;
 
 	/* Get the _HID and check for a PCI Root Bridge */
 
@@ -388,7 +356,10 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node)
 		return (FALSE);
 	}
 
-	if (acpi_ev_match_pci_root_bridge(hid.value)) {
+	match = acpi_ut_is_pci_root_bridge(hid->string);
+	ACPI_FREE(hid);
+
+	if (match) {
 		return (TRUE);
 	}
 
@@ -402,7 +373,7 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node)
 	/* Check all _CIDs in the returned list */
 
 	for (i = 0; i < cid->count; i++) {
-		if (acpi_ev_match_pci_root_bridge(cid->id[i].value)) {
+		if (acpi_ut_is_pci_root_bridge(cid->ids[i].string)) {
 			ACPI_FREE(cid);
 			return (TRUE);
 		}
diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index 87730e944132..7d41f99f7052 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -358,50 +358,67 @@ static u32 acpi_ex_digits_needed(acpi_integer value, u32 base)
  *
  * FUNCTION:    acpi_ex_eisa_id_to_string
  *
- * PARAMETERS:  numeric_id      - EISA ID to be converted
+ * PARAMETERS:  compressed_id   - EISAID to be converted
  *              out_string      - Where to put the converted string (8 bytes)
  *
  * RETURN:      None
  *
- * DESCRIPTION: Convert a numeric EISA ID to string representation
+ * DESCRIPTION: Convert a numeric EISAID to string representation. Return
+ *              buffer must be large enough to hold the string. The string
+ *              returned is always exactly of length ACPI_EISAID_STRING_SIZE
+ *              (includes null terminator). The EISAID is always 32 bits.
  *
  ******************************************************************************/
 
-void acpi_ex_eisa_id_to_string(u32 numeric_id, char *out_string)
+void acpi_ex_eisa_id_to_string(char *out_string, acpi_integer compressed_id)
 {
-	u32 eisa_id;
+	u32 swapped_id;
 
 	ACPI_FUNCTION_ENTRY();
 
+	/* The EISAID should be a 32-bit integer */
+
+	if (compressed_id > ACPI_UINT32_MAX) {
+		ACPI_WARNING((AE_INFO,
+			      "Expected EISAID is larger than 32 bits: 0x%8.8X%8.8X, truncating",
+			      ACPI_FORMAT_UINT64(compressed_id)));
+	}
+
 	/* Swap ID to big-endian to get contiguous bits */
 
-	eisa_id = acpi_ut_dword_byte_swap(numeric_id);
+	swapped_id = acpi_ut_dword_byte_swap((u32)compressed_id);
 
-	out_string[0] = (char)('@' + (((unsigned long)eisa_id >> 26) & 0x1f));
-	out_string[1] = (char)('@' + ((eisa_id >> 21) & 0x1f));
-	out_string[2] = (char)('@' + ((eisa_id >> 16) & 0x1f));
-	out_string[3] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 12);
-	out_string[4] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 8);
-	out_string[5] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 4);
-	out_string[6] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 0);
+	/* First 3 bytes are uppercase letters. Next 4 bytes are hexadecimal */
+
+	out_string[0] =
+	    (char)(0x40 + (((unsigned long)swapped_id >> 26) & 0x1F));
+	out_string[1] = (char)(0x40 + ((swapped_id >> 21) & 0x1F));
+	out_string[2] = (char)(0x40 + ((swapped_id >> 16) & 0x1F));
+	out_string[3] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 12);
+	out_string[4] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 8);
+	out_string[5] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 4);
+	out_string[6] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 0);
 	out_string[7] = 0;
 }
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ex_unsigned_integer_to_string
+ * FUNCTION:    acpi_ex_integer_to_string
  *
- * PARAMETERS:  Value           - Value to be converted
- *              out_string      - Where to put the converted string (8 bytes)
+ * PARAMETERS:  out_string      - Where to put the converted string. At least
+ *                                21 bytes are needed to hold the largest
+ *                                possible 64-bit integer.
+ *              Value           - Value to be converted
  *
  * RETURN:      None, string
  *
- * DESCRIPTION: Convert a number to string representation. Assumes string
- *              buffer is large enough to hold the string.
+ * DESCRIPTION: Convert a 64-bit integer to decimal string representation.
+ *              Assumes string buffer is large enough to hold the string. The
+ *              largest string is (ACPI_MAX64_DECIMAL_DIGITS + 1).
  *
  ******************************************************************************/
 
-void acpi_ex_unsigned_integer_to_string(acpi_integer value, char *out_string)
+void acpi_ex_integer_to_string(char *out_string, acpi_integer value)
 {
 	u32 count;
 	u32 digits_needed;
diff --git a/drivers/acpi/acpica/nsdumpdv.c b/drivers/acpi/acpica/nsdumpdv.c
index 41994fe7fbb8..0fe87f1aef16 100644
--- a/drivers/acpi/acpica/nsdumpdv.c
+++ b/drivers/acpi/acpica/nsdumpdv.c
@@ -70,7 +70,6 @@ static acpi_status
 acpi_ns_dump_one_device(acpi_handle obj_handle,
 			u32 level, void *context, void **return_value)
 {
-	struct acpi_buffer buffer;
 	struct acpi_device_info *info;
 	acpi_status status;
 	u32 i;
@@ -80,17 +79,15 @@ acpi_ns_dump_one_device(acpi_handle obj_handle,
 	status =
 	    acpi_ns_dump_one_object(obj_handle, level, context, return_value);
 
-	buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
-	status = acpi_get_object_info(obj_handle, &buffer);
+	status = acpi_get_object_info(obj_handle, &info);
 	if (ACPI_SUCCESS(status)) {
-		info = buffer.pointer;
 		for (i = 0; i < level; i++) {
 			ACPI_DEBUG_PRINT_RAW((ACPI_DB_TABLES, " "));
 		}
 
 		ACPI_DEBUG_PRINT_RAW((ACPI_DB_TABLES,
 				      "    HID: %s, ADR: %8.8X%8.8X, Status: %X\n",
-				      info->hardware_id.value,
+				      info->hardware_id.string,
 				      ACPI_FORMAT_UINT64(info->address),
 				      info->current_status));
 		ACPI_FREE(info);
diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c
index daf4ad37896d..4929dbdbc8f0 100644
--- a/drivers/acpi/acpica/nsxfeval.c
+++ b/drivers/acpi/acpica/nsxfeval.c
@@ -535,10 +535,11 @@ acpi_ns_get_device_callback(acpi_handle obj_handle,
 	acpi_status status;
 	struct acpi_namespace_node *node;
 	u32 flags;
-	struct acpica_device_id hid;
-	struct acpi_compatible_id_list *cid;
+	struct acpica_device_id *hid;
+	struct acpica_device_id_list *cid;
 	u32 i;
-	int found;
+	u8 found;
+	int no_match;
 
 	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
 	if (ACPI_FAILURE(status)) {
@@ -582,10 +583,14 @@ acpi_ns_get_device_callback(acpi_handle obj_handle,
 			return (AE_CTRL_DEPTH);
 		}
 
-		if (ACPI_STRNCMP(hid.value, info->hid, sizeof(hid.value)) != 0) {
-
-			/* Get the list of Compatible IDs */
+		no_match = ACPI_STRCMP(hid->string, info->hid);
+		ACPI_FREE(hid);
 
+		if (no_match) {
+			/*
+			 * HID does not match, attempt match within the
+			 * list of Compatible IDs (CIDs)
+			 */
 			status = acpi_ut_execute_CID(node, &cid);
 			if (status == AE_NOT_FOUND) {
 				return (AE_OK);
@@ -597,10 +602,8 @@ acpi_ns_get_device_callback(acpi_handle obj_handle,
 
 			found = 0;
 			for (i = 0; i < cid->count; i++) {
-				if (ACPI_STRNCMP(cid->id[i].value, info->hid,
-						 sizeof(struct
-							acpi_compatible_id)) ==
-				    0) {
+				if (ACPI_STRCMP(cid->ids[i].string, info->hid)
+				    == 0) {
 					found = 1;
 					break;
 				}
diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c
index f23593d6add4..ddc84af6336e 100644
--- a/drivers/acpi/acpica/nsxfname.c
+++ b/drivers/acpi/acpica/nsxfname.c
@@ -51,6 +51,11 @@
 #define _COMPONENT          ACPI_NAMESPACE
 ACPI_MODULE_NAME("nsxfname")
 
+/* Local prototypes */
+static char *acpi_ns_copy_device_id(struct acpica_device_id *dest,
+				    struct acpica_device_id *source,
+				    char *string_area);
+
 /******************************************************************************
  *
  * FUNCTION:    acpi_get_handle
@@ -68,6 +73,7 @@ ACPI_MODULE_NAME("nsxfname")
  *              namespace handle.
  *
  ******************************************************************************/
+
 acpi_status
 acpi_get_handle(acpi_handle parent,
 		acpi_string pathname, acpi_handle * ret_handle)
@@ -208,12 +214,40 @@ acpi_get_name(acpi_handle handle, u32 name_type, struct acpi_buffer * buffer)
 
 ACPI_EXPORT_SYMBOL(acpi_get_name)
 
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_copy_device_id
+ *
+ * PARAMETERS:  Dest                - Pointer to the destination DEVICE_ID
+ *              Source              - Pointer to the source DEVICE_ID
+ *              string_area         - Pointer to where to copy the dest string
+ *
+ * RETURN:      Pointer to the next string area
+ *
+ * DESCRIPTION: Copy a single DEVICE_ID, including the string data.
+ *
+ ******************************************************************************/
+static char *acpi_ns_copy_device_id(struct acpica_device_id *dest,
+				    struct acpica_device_id *source,
+				    char *string_area)
+{
+	/* Create the destination DEVICE_ID */
+
+	dest->string = string_area;
+	dest->length = source->length;
+
+	/* Copy actual string and return a pointer to the next string area */
+
+	ACPI_MEMCPY(string_area, source->string, source->length);
+	return (string_area + source->length);
+}
+
 /******************************************************************************
  *
  * FUNCTION:    acpi_get_object_info
  *
- * PARAMETERS:  Handle          - Object Handle
- *              Buffer          - Where the info is returned
+ * PARAMETERS:  Handle              - Object Handle
+ *              return_buffer       - Where the info is returned
  *
  * RETURN:      Status
  *
@@ -221,33 +255,37 @@ ACPI_EXPORT_SYMBOL(acpi_get_name)
  *              namespace node and possibly by running several standard
  *              control methods (Such as in the case of a device.)
  *
+ * For Device and Processor objects, run the Device _HID, _UID, _CID, _STA,
+ * _ADR, _sx_w, and _sx_d methods.
+ *
+ * Note: Allocates the return buffer, must be freed by the caller.
+ *
  ******************************************************************************/
+
 acpi_status
-acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer)
+acpi_get_object_info(acpi_handle handle,
+		     struct acpi_device_info **return_buffer)
 {
-	acpi_status status;
 	struct acpi_namespace_node *node;
 	struct acpi_device_info *info;
-	struct acpi_device_info *return_info;
-	struct acpi_compatible_id_list *cid_list = NULL;
-	acpi_size size;
+	struct acpica_device_id_list *cid_list = NULL;
+	struct acpica_device_id *hid = NULL;
+	struct acpica_device_id *uid = NULL;
+	char *next_id_string;
+	acpi_object_type type;
+	acpi_name name;
+	u8 param_count = 0;
+	u8 valid = 0;
+	u32 info_size;
+	u32 i;
+	acpi_status status;
 
 	/* Parameter validation */
 
-	if (!handle || !buffer) {
+	if (!handle || !return_buffer) {
 		return (AE_BAD_PARAMETER);
 	}
 
-	status = acpi_ut_validate_buffer(buffer);
-	if (ACPI_FAILURE(status)) {
-		return (status);
-	}
-
-	info = ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_device_info));
-	if (!info) {
-		return (AE_NO_MEMORY);
-	}
-
 	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
 	if (ACPI_FAILURE(status)) {
 		goto cleanup;
@@ -256,66 +294,91 @@ acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer)
 	node = acpi_ns_map_handle_to_node(handle);
 	if (!node) {
 		(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-		status = AE_BAD_PARAMETER;
-		goto cleanup;
+		return (AE_BAD_PARAMETER);
 	}
 
-	/* Init return structure */
-
-	size = sizeof(struct acpi_device_info);
+	/* Get the namespace node data while the namespace is locked */
 
-	info->type = node->type;
-	info->name = node->name.integer;
-	info->valid = 0;
+	info_size = sizeof(struct acpi_device_info);
+	type = node->type;
+	name = node->name.integer;
 
 	if (node->type == ACPI_TYPE_METHOD) {
-		info->param_count = node->object->method.param_count;
+		param_count = node->object->method.param_count;
 	}
 
 	status = acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
 	if (ACPI_FAILURE(status)) {
-		goto cleanup;
+		return (status);
 	}
 
-	/* If not a device, we are all done */
-
-	if (info->type == ACPI_TYPE_DEVICE) {
+	if ((type == ACPI_TYPE_DEVICE) || (type == ACPI_TYPE_PROCESSOR)) {
 		/*
-		 * Get extra info for ACPI Devices objects only:
-		 * Run the Device _HID, _UID, _CID, _STA, _ADR and _sx_d methods.
+		 * Get extra info for ACPI Device/Processor objects only:
+		 * Run the Device _HID, _UID, and _CID methods.
 		 *
 		 * Note: none of these methods are required, so they may or may
-		 * not be present for this device.  The Info->Valid bitfield is used
-		 * to indicate which methods were found and ran successfully.
+		 * not be present for this device. The Info->Valid bitfield is used
+		 * to indicate which methods were found and run successfully.
 		 */
 
 		/* Execute the Device._HID method */
 
-		status = acpi_ut_execute_HID(node, &info->hardware_id);
+		status = acpi_ut_execute_HID(node, &hid);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_HID;
+			info_size += hid->length;
+			valid |= ACPI_VALID_HID;
 		}
 
 		/* Execute the Device._UID method */
 
-		status = acpi_ut_execute_UID(node, &info->unique_id);
+		status = acpi_ut_execute_UID(node, &uid);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_UID;
+			info_size += uid->length;
+			valid |= ACPI_VALID_UID;
 		}
 
 		/* Execute the Device._CID method */
 
 		status = acpi_ut_execute_CID(node, &cid_list);
 		if (ACPI_SUCCESS(status)) {
-			size += cid_list->size;
-			info->valid |= ACPI_VALID_CID;
+
+			/* Add size of CID strings and CID pointer array */
+
+			info_size +=
+			    (cid_list->list_size -
+			     sizeof(struct acpica_device_id_list));
+			valid |= ACPI_VALID_CID;
 		}
+	}
+
+	/*
+	 * Now that we have the variable-length data, we can allocate the
+	 * return buffer
+	 */
+	info = ACPI_ALLOCATE_ZEROED(info_size);
+	if (!info) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	/* Get the fixed-length data */
+
+	if ((type == ACPI_TYPE_DEVICE) || (type == ACPI_TYPE_PROCESSOR)) {
+		/*
+		 * Get extra info for ACPI Device/Processor objects only:
+		 * Run the _STA, _ADR and, sx_w, and _sx_d methods.
+		 *
+		 * Note: none of these methods are required, so they may or may
+		 * not be present for this device. The Info->Valid bitfield is used
+		 * to indicate which methods were found and run successfully.
+		 */
 
 		/* Execute the Device._STA method */
 
 		status = acpi_ut_execute_STA(node, &info->current_status);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_STA;
+			valid |= ACPI_VALID_STA;
 		}
 
 		/* Execute the Device._ADR method */
@@ -323,36 +386,100 @@ acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer)
 		status = acpi_ut_evaluate_numeric_object(METHOD_NAME__ADR, node,
 							 &info->address);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_ADR;
+			valid |= ACPI_VALID_ADR;
+		}
+
+		/* Execute the Device._sx_w methods */
+
+		status = acpi_ut_execute_power_methods(node,
+						       acpi_gbl_lowest_dstate_names,
+						       ACPI_NUM_sx_w_METHODS,
+						       info->lowest_dstates);
+		if (ACPI_SUCCESS(status)) {
+			valid |= ACPI_VALID_SXWS;
 		}
 
 		/* Execute the Device._sx_d methods */
 
-		status = acpi_ut_execute_sxds(node, info->highest_dstates);
+		status = acpi_ut_execute_power_methods(node,
+						       acpi_gbl_highest_dstate_names,
+						       ACPI_NUM_sx_d_METHODS,
+						       info->highest_dstates);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_SXDS;
+			valid |= ACPI_VALID_SXDS;
 		}
 	}
 
-	/* Validate/Allocate/Clear caller buffer */
+	/*
+	 * Create a pointer to the string area of the return buffer.
+	 * Point to the end of the base struct acpi_device_info structure.
+	 */
+	next_id_string = ACPI_CAST_PTR(char, info->compatible_id_list.ids);
+	if (cid_list) {
 
-	status = acpi_ut_initialize_buffer(buffer, size);
-	if (ACPI_FAILURE(status)) {
-		goto cleanup;
+		/* Point past the CID DEVICE_ID array */
+
+		next_id_string +=
+		    ((acpi_size) cid_list->count *
+		     sizeof(struct acpica_device_id));
 	}
 
-	/* Populate the return buffer */
+	/*
+	 * Copy the HID, UID, and CIDs to the return buffer. The variable-length
+	 * strings are copied to the reserved area at the end of the buffer.
+	 *
+	 * For HID and CID, check if the ID is a PCI Root Bridge.
+	 */
+	if (hid) {
+		next_id_string = acpi_ns_copy_device_id(&info->hardware_id,
+							hid, next_id_string);
+
+		if (acpi_ut_is_pci_root_bridge(hid->string)) {
+			info->flags |= ACPI_PCI_ROOT_BRIDGE;
+		}
+	}
 
-	return_info = buffer->pointer;
-	ACPI_MEMCPY(return_info, info, sizeof(struct acpi_device_info));
+	if (uid) {
+		next_id_string = acpi_ns_copy_device_id(&info->unique_id,
+							uid, next_id_string);
+	}
 
 	if (cid_list) {
-		ACPI_MEMCPY(&return_info->compatibility_id, cid_list,
-			    cid_list->size);
+		info->compatible_id_list.count = cid_list->count;
+		info->compatible_id_list.list_size = cid_list->list_size;
+
+		/* Copy each CID */
+
+		for (i = 0; i < cid_list->count; i++) {
+			next_id_string =
+			    acpi_ns_copy_device_id(&info->compatible_id_list.
+						   ids[i], &cid_list->ids[i],
+						   next_id_string);
+
+			if (acpi_ut_is_pci_root_bridge(cid_list->ids[i].string)) {
+				info->flags |= ACPI_PCI_ROOT_BRIDGE;
+			}
+		}
 	}
 
+	/* Copy the fixed-length data */
+
+	info->info_size = info_size;
+	info->type = type;
+	info->name = name;
+	info->param_count = param_count;
+	info->valid = valid;
+
+	*return_buffer = info;
+	status = AE_OK;
+
       cleanup:
-	ACPI_FREE(info);
+	if (hid) {
+		ACPI_FREE(hid);
+	}
+	if (uid) {
+		ACPI_FREE(uid);
+	}
 	if (cid_list) {
 		ACPI_FREE(cid_list);
 	}
diff --git a/drivers/acpi/acpica/uteval.c b/drivers/acpi/acpica/uteval.c
index 006b16c26017..5503307b8bb7 100644
--- a/drivers/acpi/acpica/uteval.c
+++ b/drivers/acpi/acpica/uteval.c
@@ -44,19 +44,10 @@
 #include <acpi/acpi.h>
 #include "accommon.h"
 #include "acnamesp.h"
-#include "acinterp.h"
 
 #define _COMPONENT          ACPI_UTILITIES
 ACPI_MODULE_NAME("uteval")
 
-/* Local prototypes */
-static void
-acpi_ut_copy_id_string(char *destination, char *source, acpi_size max_length);
-
-static acpi_status
-acpi_ut_translate_one_cid(union acpi_operand_object *obj_desc,
-			  struct acpi_compatible_id *one_cid);
-
 /*
  * Strings supported by the _OSI predefined (internal) method.
  *
@@ -213,7 +204,7 @@ acpi_status acpi_osi_invalidate(char *interface)
  * RETURN:      Status
  *
  * DESCRIPTION: Evaluates a namespace object and verifies the type of the
- *              return object.  Common code that simplifies accessing objects
+ *              return object. Common code that simplifies accessing objects
  *              that have required return objects of fixed types.
  *
  *              NOTE: Internal function, no parameter validation
@@ -298,7 +289,7 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
 
 	if ((acpi_gbl_enable_interpreter_slack) && (!expected_return_btypes)) {
 		/*
-		 * We received a return object, but one was not expected.  This can
+		 * We received a return object, but one was not expected. This can
 		 * happen frequently if the "implicit return" feature is enabled.
 		 * Just delete the return object and return AE_OK.
 		 */
@@ -340,12 +331,12 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
  *
  * PARAMETERS:  object_name         - Object name to be evaluated
  *              device_node         - Node for the device
- *              Address             - Where the value is returned
+ *              Value               - Where the value is returned
  *
  * RETURN:      Status
  *
  * DESCRIPTION: Evaluates a numeric namespace object for a selected device
- *              and stores result in *Address.
+ *              and stores result in *Value.
  *
  *              NOTE: Internal function, no parameter validation
  *
@@ -354,7 +345,7 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
 acpi_status
 acpi_ut_evaluate_numeric_object(char *object_name,
 				struct acpi_namespace_node *device_node,
-				acpi_integer * address)
+				acpi_integer *value)
 {
 	union acpi_operand_object *obj_desc;
 	acpi_status status;
@@ -369,295 +360,7 @@ acpi_ut_evaluate_numeric_object(char *object_name,
 
 	/* Get the returned Integer */
 
-	*address = obj_desc->integer.value;
-
-	/* On exit, we must delete the return object */
-
-	acpi_ut_remove_reference(obj_desc);
-	return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_copy_id_string
- *
- * PARAMETERS:  Destination         - Where to copy the string
- *              Source              - Source string
- *              max_length          - Length of the destination buffer
- *
- * RETURN:      None
- *
- * DESCRIPTION: Copies an ID string for the _HID, _CID, and _UID methods.
- *              Performs removal of a leading asterisk if present -- workaround
- *              for a known issue on a bunch of machines.
- *
- ******************************************************************************/
-
-static void
-acpi_ut_copy_id_string(char *destination, char *source, acpi_size max_length)
-{
-
-	/*
-	 * Workaround for ID strings that have a leading asterisk. This construct
-	 * is not allowed by the ACPI specification  (ID strings must be
-	 * alphanumeric), but enough existing machines have this embedded in their
-	 * ID strings that the following code is useful.
-	 */
-	if (*source == '*') {
-		source++;
-	}
-
-	/* Do the actual copy */
-
-	ACPI_STRNCPY(destination, source, max_length);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_execute_HID
- *
- * PARAMETERS:  device_node         - Node for the device
- *              Hid                 - Where the HID is returned
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Executes the _HID control method that returns the hardware
- *              ID of the device.
- *
- *              NOTE: Internal function, no parameter validation
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
-		    struct acpica_device_id *hid)
-{
-	union acpi_operand_object *obj_desc;
-	acpi_status status;
-
-	ACPI_FUNCTION_TRACE(ut_execute_HID);
-
-	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__HID,
-					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
-					 &obj_desc);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
-	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
-
-		/* Convert the Numeric HID to string */
-
-		acpi_ex_eisa_id_to_string((u32) obj_desc->integer.value,
-					  hid->value);
-	} else {
-		/* Copy the String HID from the returned object */
-
-		acpi_ut_copy_id_string(hid->value, obj_desc->string.pointer,
-				       sizeof(hid->value));
-	}
-
-	/* On exit, we must delete the return object */
-
-	acpi_ut_remove_reference(obj_desc);
-	return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_translate_one_cid
- *
- * PARAMETERS:  obj_desc            - _CID object, must be integer or string
- *              one_cid             - Where the CID string is returned
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Return a numeric or string _CID value as a string.
- *              (Compatible ID)
- *
- *              NOTE:  Assumes a maximum _CID string length of
- *                     ACPI_MAX_CID_LENGTH.
- *
- ******************************************************************************/
-
-static acpi_status
-acpi_ut_translate_one_cid(union acpi_operand_object *obj_desc,
-			  struct acpi_compatible_id *one_cid)
-{
-
-	switch (obj_desc->common.type) {
-	case ACPI_TYPE_INTEGER:
-
-		/* Convert the Numeric CID to string */
-
-		acpi_ex_eisa_id_to_string((u32) obj_desc->integer.value,
-					  one_cid->value);
-		return (AE_OK);
-
-	case ACPI_TYPE_STRING:
-
-		if (obj_desc->string.length > ACPI_MAX_CID_LENGTH) {
-			return (AE_AML_STRING_LIMIT);
-		}
-
-		/* Copy the String CID from the returned object */
-
-		acpi_ut_copy_id_string(one_cid->value, obj_desc->string.pointer,
-				       ACPI_MAX_CID_LENGTH);
-		return (AE_OK);
-
-	default:
-
-		return (AE_TYPE);
-	}
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_execute_CID
- *
- * PARAMETERS:  device_node         - Node for the device
- *              return_cid_list     - Where the CID list is returned
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Executes the _CID control method that returns one or more
- *              compatible hardware IDs for the device.
- *
- *              NOTE: Internal function, no parameter validation
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ut_execute_CID(struct acpi_namespace_node * device_node,
-		    struct acpi_compatible_id_list ** return_cid_list)
-{
-	union acpi_operand_object *obj_desc;
-	acpi_status status;
-	u32 count;
-	u32 size;
-	struct acpi_compatible_id_list *cid_list;
-	u32 i;
-
-	ACPI_FUNCTION_TRACE(ut_execute_CID);
-
-	/* Evaluate the _CID method for this device */
-
-	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__CID,
-					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING
-					 | ACPI_BTYPE_PACKAGE, &obj_desc);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
-	/* Get the number of _CIDs returned */
-
-	count = 1;
-	if (obj_desc->common.type == ACPI_TYPE_PACKAGE) {
-		count = obj_desc->package.count;
-	}
-
-	/* Allocate a worst-case buffer for the _CIDs */
-
-	size = (((count - 1) * sizeof(struct acpi_compatible_id)) +
-		sizeof(struct acpi_compatible_id_list));
-
-	cid_list = ACPI_ALLOCATE_ZEROED((acpi_size) size);
-	if (!cid_list) {
-		return_ACPI_STATUS(AE_NO_MEMORY);
-	}
-
-	/* Init CID list */
-
-	cid_list->count = count;
-	cid_list->size = size;
-
-	/*
-	 *  A _CID can return either a single compatible ID or a package of
-	 *  compatible IDs.  Each compatible ID can be one of the following:
-	 *  1) Integer (32 bit compressed EISA ID) or
-	 *  2) String (PCI ID format, e.g. "PCI\VEN_vvvv&DEV_dddd&SUBSYS_ssssssss")
-	 */
-
-	/* The _CID object can be either a single CID or a package (list) of CIDs */
-
-	if (obj_desc->common.type == ACPI_TYPE_PACKAGE) {
-
-		/* Translate each package element */
-
-		for (i = 0; i < count; i++) {
-			status =
-			    acpi_ut_translate_one_cid(obj_desc->package.
-						      elements[i],
-						      &cid_list->id[i]);
-			if (ACPI_FAILURE(status)) {
-				break;
-			}
-		}
-	} else {
-		/* Only one CID, translate to a string */
-
-		status = acpi_ut_translate_one_cid(obj_desc, cid_list->id);
-	}
-
-	/* Cleanup on error */
-
-	if (ACPI_FAILURE(status)) {
-		ACPI_FREE(cid_list);
-	} else {
-		*return_cid_list = cid_list;
-	}
-
-	/* On exit, we must delete the _CID return object */
-
-	acpi_ut_remove_reference(obj_desc);
-	return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_execute_UID
- *
- * PARAMETERS:  device_node         - Node for the device
- *              Uid                 - Where the UID is returned
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Executes the _UID control method that returns the hardware
- *              ID of the device.
- *
- *              NOTE: Internal function, no parameter validation
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ut_execute_UID(struct acpi_namespace_node *device_node,
-		    struct acpica_device_id *uid)
-{
-	union acpi_operand_object *obj_desc;
-	acpi_status status;
-
-	ACPI_FUNCTION_TRACE(ut_execute_UID);
-
-	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__UID,
-					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
-					 &obj_desc);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
-	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
-
-		/* Convert the Numeric UID to string */
-
-		acpi_ex_unsigned_integer_to_string(obj_desc->integer.value,
-						   uid->value);
-	} else {
-		/* Copy the String UID from the returned object */
-
-		acpi_ut_copy_id_string(uid->value, obj_desc->string.pointer,
-				       sizeof(uid->value));
-	}
+	*value = obj_desc->integer.value;
 
 	/* On exit, we must delete the return object */
 
@@ -716,60 +419,64 @@ acpi_ut_execute_STA(struct acpi_namespace_node *device_node, u32 * flags)
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ut_execute_Sxds
+ * FUNCTION:    acpi_ut_execute_power_methods
  *
  * PARAMETERS:  device_node         - Node for the device
- *              Flags               - Where the status flags are returned
+ *              method_names        - Array of power method names
+ *              method_count        - Number of methods to execute
+ *              out_values          - Where the power method values are returned
  *
- * RETURN:      Status
+ * RETURN:      Status, out_values
  *
- * DESCRIPTION: Executes _STA for selected device and stores results in
- *              *Flags.
+ * DESCRIPTION: Executes the specified power methods for the device and returns
+ *              the result(s).
  *
  *              NOTE: Internal function, no parameter validation
  *
- ******************************************************************************/
+******************************************************************************/
 
 acpi_status
-acpi_ut_execute_sxds(struct acpi_namespace_node *device_node, u8 * highest)
+acpi_ut_execute_power_methods(struct acpi_namespace_node *device_node,
+			      const char **method_names,
+			      u8 method_count, u8 *out_values)
 {
 	union acpi_operand_object *obj_desc;
 	acpi_status status;
+	acpi_status final_status = AE_NOT_FOUND;
 	u32 i;
 
-	ACPI_FUNCTION_TRACE(ut_execute_sxds);
+	ACPI_FUNCTION_TRACE(ut_execute_power_methods);
 
-	for (i = 0; i < 4; i++) {
-		highest[i] = 0xFF;
+	for (i = 0; i < method_count; i++) {
+		/*
+		 * Execute the power method (_sx_d or _sx_w). The only allowable
+		 * return type is an Integer.
+		 */
 		status = acpi_ut_evaluate_object(device_node,
 						 ACPI_CAST_PTR(char,
-							       acpi_gbl_highest_dstate_names
-							       [i]),
+							       method_names[i]),
 						 ACPI_BTYPE_INTEGER, &obj_desc);
-		if (ACPI_FAILURE(status)) {
-			if (status != AE_NOT_FOUND) {
-				ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
-						  "%s on Device %4.4s, %s\n",
-						  ACPI_CAST_PTR(char,
-								acpi_gbl_highest_dstate_names
-								[i]),
-						  acpi_ut_get_node_name
-						  (device_node),
-						  acpi_format_exception
-						  (status)));
-
-				return_ACPI_STATUS(status);
-			}
-		} else {
-			/* Extract the Dstate value */
-
-			highest[i] = (u8) obj_desc->integer.value;
+		if (ACPI_SUCCESS(status)) {
+			out_values[i] = (u8)obj_desc->integer.value;
 
 			/* Delete the return object */
 
 			acpi_ut_remove_reference(obj_desc);
+			final_status = AE_OK;	/* At least one value is valid */
+			continue;
 		}
+
+		out_values[i] = ACPI_UINT8_MAX;
+		if (status == AE_NOT_FOUND) {
+			continue;	/* Ignore if not found */
+		}
+
+		ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+				  "Failed %s on Device %4.4s, %s\n",
+				  ACPI_CAST_PTR(char, method_names[i]),
+				  acpi_ut_get_node_name(device_node),
+				  acpi_format_exception(status)));
 	}
 
-	return_ACPI_STATUS(AE_OK);
+	return_ACPI_STATUS(final_status);
 }
diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c
index 59e46f257c02..ed7a33c67fbe 100644
--- a/drivers/acpi/acpica/utglobal.c
+++ b/drivers/acpi/acpica/utglobal.c
@@ -90,7 +90,15 @@ const char *acpi_gbl_sleep_state_names[ACPI_S_STATE_COUNT] = {
 	"\\_S5_"
 };
 
-const char *acpi_gbl_highest_dstate_names[4] = {
+const char *acpi_gbl_lowest_dstate_names[ACPI_NUM_sx_w_METHODS] = {
+	"_S0W",
+	"_S1W",
+	"_S2W",
+	"_S3W",
+	"_S4W"
+};
+
+const char *acpi_gbl_highest_dstate_names[ACPI_NUM_sx_d_METHODS] = {
 	"_S1D",
 	"_S2D",
 	"_S3D",
diff --git a/drivers/acpi/acpica/utids.c b/drivers/acpi/acpica/utids.c
new file mode 100644
index 000000000000..52eaae404554
--- /dev/null
+++ b/drivers/acpi/acpica/utids.c
@@ -0,0 +1,382 @@
+/******************************************************************************
+ *
+ * Module Name: utids - support for device IDs - HID, UID, CID
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2009, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+#include "acinterp.h"
+
+#define _COMPONENT          ACPI_UTILITIES
+ACPI_MODULE_NAME("utids")
+
+/* Local prototypes */
+static void acpi_ut_copy_id_string(char *destination, char *source);
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_copy_id_string
+ *
+ * PARAMETERS:  Destination         - Where to copy the string
+ *              Source              - Source string
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Copies an ID string for the _HID, _CID, and _UID methods.
+ *              Performs removal of a leading asterisk if present -- workaround
+ *              for a known issue on a bunch of machines.
+ *
+ ******************************************************************************/
+
+static void acpi_ut_copy_id_string(char *destination, char *source)
+{
+
+	/*
+	 * Workaround for ID strings that have a leading asterisk. This construct
+	 * is not allowed by the ACPI specification  (ID strings must be
+	 * alphanumeric), but enough existing machines have this embedded in their
+	 * ID strings that the following code is useful.
+	 */
+	if (*source == '*') {
+		source++;
+	}
+
+	/* Do the actual copy */
+
+	ACPI_STRCPY(destination, source);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_execute_HID
+ *
+ * PARAMETERS:  device_node         - Node for the device
+ *              return_id           - Where the string HID is returned
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Executes the _HID control method that returns the hardware
+ *              ID of the device. The HID is either an 32-bit encoded EISAID
+ *              Integer or a String. A string is always returned. An EISAID
+ *              is converted to a string.
+ *
+ *              NOTE: Internal function, no parameter validation
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id **return_id)
+{
+	union acpi_operand_object *obj_desc;
+	struct acpica_device_id *hid;
+	u32 length;
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE(ut_execute_HID);
+
+	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__HID,
+					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
+					 &obj_desc);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/* Get the size of the String to be returned, includes null terminator */
+
+	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
+		length = ACPI_EISAID_STRING_SIZE;
+	} else {
+		length = obj_desc->string.length + 1;
+	}
+
+	/* Allocate a buffer for the HID */
+
+	hid =
+	    ACPI_ALLOCATE_ZEROED(sizeof(struct acpica_device_id) +
+				 (acpi_size) length);
+	if (!hid) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	/* Area for the string starts after DEVICE_ID struct */
+
+	hid->string = ACPI_ADD_PTR(char, hid, sizeof(struct acpica_device_id));
+
+	/* Convert EISAID to a string or simply copy existing string */
+
+	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
+		acpi_ex_eisa_id_to_string(hid->string, obj_desc->integer.value);
+	} else {
+		acpi_ut_copy_id_string(hid->string, obj_desc->string.pointer);
+	}
+
+	hid->length = length;
+	*return_id = hid;
+
+cleanup:
+
+	/* On exit, we must delete the return object */
+
+	acpi_ut_remove_reference(obj_desc);
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_execute_UID
+ *
+ * PARAMETERS:  device_node         - Node for the device
+ *              return_id           - Where the string UID is returned
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Executes the _UID control method that returns the unique
+ *              ID of the device. The UID is either a 64-bit Integer (NOT an
+ *              EISAID) or a string. Always returns a string. A 64-bit integer
+ *              is converted to a decimal string.
+ *
+ *              NOTE: Internal function, no parameter validation
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ut_execute_UID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id **return_id)
+{
+	union acpi_operand_object *obj_desc;
+	struct acpica_device_id *uid;
+	u32 length;
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE(ut_execute_UID);
+
+	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__UID,
+					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
+					 &obj_desc);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/* Get the size of the String to be returned, includes null terminator */
+
+	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
+		length = ACPI_MAX64_DECIMAL_DIGITS + 1;
+	} else {
+		length = obj_desc->string.length + 1;
+	}
+
+	/* Allocate a buffer for the UID */
+
+	uid =
+	    ACPI_ALLOCATE_ZEROED(sizeof(struct acpica_device_id) +
+				 (acpi_size) length);
+	if (!uid) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	/* Area for the string starts after DEVICE_ID struct */
+
+	uid->string = ACPI_ADD_PTR(char, uid, sizeof(struct acpica_device_id));
+
+	/* Convert an Integer to string, or just copy an existing string */
+
+	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
+		acpi_ex_integer_to_string(uid->string, obj_desc->integer.value);
+	} else {
+		acpi_ut_copy_id_string(uid->string, obj_desc->string.pointer);
+	}
+
+	uid->length = length;
+	*return_id = uid;
+
+cleanup:
+
+	/* On exit, we must delete the return object */
+
+	acpi_ut_remove_reference(obj_desc);
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_execute_CID
+ *
+ * PARAMETERS:  device_node         - Node for the device
+ *              return_cid_list     - Where the CID list is returned
+ *
+ * RETURN:      Status, list of CID strings
+ *
+ * DESCRIPTION: Executes the _CID control method that returns one or more
+ *              compatible hardware IDs for the device.
+ *
+ *              NOTE: Internal function, no parameter validation
+ *
+ * A _CID method can return either a single compatible ID or a package of
+ * compatible IDs. Each compatible ID can be one of the following:
+ * 1) Integer (32 bit compressed EISA ID) or
+ * 2) String (PCI ID format, e.g. "PCI\VEN_vvvv&DEV_dddd&SUBSYS_ssssssss")
+ *
+ * The Integer CIDs are converted to string format by this function.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ut_execute_CID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id_list **return_cid_list)
+{
+	union acpi_operand_object **cid_objects;
+	union acpi_operand_object *obj_desc;
+	struct acpica_device_id_list *cid_list;
+	char *next_id_string;
+	u32 string_area_size;
+	u32 length;
+	u32 cid_list_size;
+	acpi_status status;
+	u32 count;
+	u32 i;
+
+	ACPI_FUNCTION_TRACE(ut_execute_CID);
+
+	/* Evaluate the _CID method for this device */
+
+	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__CID,
+					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING
+					 | ACPI_BTYPE_PACKAGE, &obj_desc);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/*
+	 * Get the count and size of the returned _CIDs. _CID can return either
+	 * a Package of Integers/Strings or a single Integer or String.
+	 * Note: This section also validates that all CID elements are of the
+	 * correct type (Integer or String).
+	 */
+	if (obj_desc->common.type == ACPI_TYPE_PACKAGE) {
+		count = obj_desc->package.count;
+		cid_objects = obj_desc->package.elements;
+	} else {		/* Single Integer or String CID */
+
+		count = 1;
+		cid_objects = &obj_desc;
+	}
+
+	string_area_size = 0;
+	for (i = 0; i < count; i++) {
+
+		/* String lengths include null terminator */
+
+		switch (cid_objects[i]->common.type) {
+		case ACPI_TYPE_INTEGER:
+			string_area_size += ACPI_EISAID_STRING_SIZE;
+			break;
+
+		case ACPI_TYPE_STRING:
+			string_area_size += cid_objects[i]->string.length + 1;
+			break;
+
+		default:
+			status = AE_TYPE;
+			goto cleanup;
+		}
+	}
+
+	/*
+	 * Now that we know the length of the CIDs, allocate return buffer:
+	 * 1) Size of the base structure +
+	 * 2) Size of the CID DEVICE_ID array +
+	 * 3) Size of the actual CID strings
+	 */
+	cid_list_size = sizeof(struct acpica_device_id_list) +
+	    ((count - 1) * sizeof(struct acpica_device_id)) + string_area_size;
+
+	cid_list = ACPI_ALLOCATE_ZEROED(cid_list_size);
+	if (!cid_list) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	/* Area for CID strings starts after the CID DEVICE_ID array */
+
+	next_id_string = ACPI_CAST_PTR(char, cid_list->ids) +
+	    ((acpi_size) count * sizeof(struct acpica_device_id));
+
+	/* Copy/convert the CIDs to the return buffer */
+
+	for (i = 0; i < count; i++) {
+		if (cid_objects[i]->common.type == ACPI_TYPE_INTEGER) {
+
+			/* Convert the Integer (EISAID) CID to a string */
+
+			acpi_ex_eisa_id_to_string(next_id_string,
+						  cid_objects[i]->integer.
+						  value);
+			length = ACPI_EISAID_STRING_SIZE;
+		} else {	/* ACPI_TYPE_STRING */
+
+			/* Copy the String CID from the returned object */
+
+			acpi_ut_copy_id_string(next_id_string,
+					       cid_objects[i]->string.pointer);
+			length = cid_objects[i]->string.length + 1;
+		}
+
+		cid_list->ids[i].string = next_id_string;
+		cid_list->ids[i].length = length;
+		next_id_string += length;
+	}
+
+	/* Finish the CID list */
+
+	cid_list->count = count;
+	cid_list->list_size = cid_list_size;
+	*return_cid_list = cid_list;
+
+cleanup:
+
+	/* On exit, we must delete the _CID return object */
+
+	acpi_ut_remove_reference(obj_desc);
+	return_ACPI_STATUS(status);
+}
diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c
index fbe782348b0b..9cd65334ca75 100644
--- a/drivers/acpi/acpica/utmisc.c
+++ b/drivers/acpi/acpica/utmisc.c
@@ -118,6 +118,34 @@ const char *acpi_ut_validate_exception(acpi_status status)
 	return (ACPI_CAST_PTR(const char, exception));
 }
 
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_is_pci_root_bridge
+ *
+ * PARAMETERS:  Id              - The HID/CID in string format
+ *
+ * RETURN:      TRUE if the Id is a match for a PCI/PCI-Express Root Bridge
+ *
+ * DESCRIPTION: Determine if the input ID is a PCI Root Bridge ID.
+ *
+ ******************************************************************************/
+
+u8 acpi_ut_is_pci_root_bridge(char *id)
+{
+
+	/*
+	 * Check if this is a PCI root bridge.
+	 * ACPI 3.0+: check for a PCI Express root also.
+	 */
+	if (!(ACPI_STRCMP(id,
+			  PCI_ROOT_HID_STRING)) ||
+	    !(ACPI_STRCMP(id, PCI_EXPRESS_ROOT_HID_STRING))) {
+		return (TRUE);
+	}
+
+	return (FALSE);
+}
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ut_is_aml_table
diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c
index fe0cdf83641a..2aee8c24dc56 100644
--- a/drivers/acpi/container.c
+++ b/drivers/acpi/container.c
@@ -200,20 +200,17 @@ container_walk_namespace_cb(acpi_handle handle,
 			    u32 lvl, void *context, void **rv)
 {
 	char *hid = NULL;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_device_info *info;
 	acpi_status status;
 	int *action = context;
 
-
-	status = acpi_get_object_info(handle, &buffer);
-	if (ACPI_FAILURE(status) || !buffer.pointer) {
+	status = acpi_get_object_info(handle, &info);
+	if (ACPI_FAILURE(status)) {
 		return AE_OK;
 	}
 
-	info = buffer.pointer;
 	if (info->valid & ACPI_VALID_HID)
-		hid = info->hardware_id.value;
+		hid = info->hardware_id.string;
 
 	if (hid == NULL) {
 		goto end;
@@ -240,7 +237,7 @@ container_walk_namespace_cb(acpi_handle handle,
 	}
 
       end:
-	kfree(buffer.pointer);
+	kfree(info);
 
 	return AE_OK;
 }
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index efb959d6c8a9..39536b80bce7 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -231,18 +231,16 @@ static int is_ata(acpi_handle handle)
 static int is_battery(acpi_handle handle)
 {
 	struct acpi_device_info *info;
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
 	int ret = 1;
 
-	if (!ACPI_SUCCESS(acpi_get_object_info(handle, &buffer)))
+	if (!ACPI_SUCCESS(acpi_get_object_info(handle, &info)))
 		return 0;
-	info = buffer.pointer;
 	if (!(info->valid & ACPI_VALID_HID))
 		ret = 0;
 	else
-		ret = !strcmp("PNP0C0A", info->hardware_id.value);
+		ret = !strcmp("PNP0C0A", info->hardware_id.string);
 
-	kfree(buffer.pointer);
+	kfree(info);
 	return ret;
 }
 
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index a8a5c29958c8..27a7072347ea 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -93,15 +93,13 @@ do_acpi_find_child(acpi_handle handle, u32 lvl, void *context, void **rv)
 {
 	acpi_status status;
 	struct acpi_device_info *info;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_find_child *find = context;
 
-	status = acpi_get_object_info(handle, &buffer);
+	status = acpi_get_object_info(handle, &info);
 	if (ACPI_SUCCESS(status)) {
-		info = buffer.pointer;
 		if (info->address == find->address)
 			find->handle = handle;
-		kfree(buffer.pointer);
+		kfree(info);
 	}
 	return AE_OK;
 }
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 781435d7e369..0ab526de7c55 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -60,13 +60,13 @@ static int create_modalias(struct acpi_device *acpi_dev, char *modalias,
 	}
 
 	if (acpi_dev->flags.compatible_ids) {
-		struct acpi_compatible_id_list *cid_list;
+		struct acpica_device_id_list *cid_list;
 		int i;
 
 		cid_list = acpi_dev->pnp.cid_list;
 		for (i = 0; i < cid_list->count; i++) {
 			count = snprintf(&modalias[len], size, "%s:",
-					 cid_list->id[i].value);
+					 cid_list->ids[i].string);
 			if (count < 0 || count >= size) {
 				printk(KERN_ERR PREFIX "%s cid[%i] exceeds event buffer size",
 				       acpi_dev->pnp.device_name, i);
@@ -287,14 +287,14 @@ int acpi_match_device_ids(struct acpi_device *device,
 	}
 
 	if (device->flags.compatible_ids) {
-		struct acpi_compatible_id_list *cid_list = device->pnp.cid_list;
+		struct acpica_device_id_list *cid_list = device->pnp.cid_list;
 		int i;
 
 		for (id = ids; id->id[0]; id++) {
 			/* compare multiple _CID entries against driver ids */
 			for (i = 0; i < cid_list->count; i++) {
 				if (!strcmp((char*)id->id,
-					    cid_list->id[i].value))
+					    cid_list->ids[i].string))
 					return 0;
 			}
 		}
@@ -999,33 +999,89 @@ static int acpi_dock_match(struct acpi_device *device)
 	return acpi_get_handle(device->handle, "_DCK", &tmp);
 }
 
+static struct acpica_device_id_list*
+acpi_add_cid(
+	struct acpi_device_info         *info,
+	struct acpica_device_id         *new_cid)
+{
+	struct acpica_device_id_list    *cid;
+	char                            *next_id_string;
+	acpi_size                       cid_length;
+	acpi_size                       new_cid_length;
+	u32                             i;
+
+
+	/* Allocate new CID list with room for the new CID */
+
+	if (!new_cid)
+		new_cid_length = info->compatible_id_list.list_size;
+	else if (info->compatible_id_list.list_size)
+		new_cid_length = info->compatible_id_list.list_size +
+			new_cid->length + sizeof(struct acpica_device_id);
+	else
+		new_cid_length = sizeof(struct acpica_device_id_list) + new_cid->length;
+
+	cid = ACPI_ALLOCATE_ZEROED(new_cid_length);
+	if (!cid) {
+		return NULL;
+	}
+
+	cid->list_size = new_cid_length;
+	cid->count = info->compatible_id_list.count;
+	if (new_cid)
+		cid->count++;
+	next_id_string = (char *) cid->ids + (cid->count * sizeof(struct acpica_device_id));
+
+	/* Copy all existing CIDs */
+
+	for (i = 0; i < info->compatible_id_list.count; i++) {
+		cid_length = info->compatible_id_list.ids[i].length;
+		cid->ids[i].string = next_id_string;
+		cid->ids[i].length = cid_length;
+
+		ACPI_MEMCPY(next_id_string, info->compatible_id_list.ids[i].string,
+			cid_length);
+
+		next_id_string += cid_length;
+	}
+
+	/* Append the new CID */
+
+	if (new_cid) {
+		cid->ids[i].string = next_id_string;
+		cid->ids[i].length = new_cid->length;
+
+		ACPI_MEMCPY(next_id_string, new_cid->string, new_cid->length);
+	}
+
+	return cid;
+}
+
 static void acpi_device_set_id(struct acpi_device *device,
 			       struct acpi_device *parent, acpi_handle handle,
 			       int type)
 {
-	struct acpi_device_info *info;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_device_info *info = NULL;
 	char *hid = NULL;
 	char *uid = NULL;
-	struct acpi_compatible_id_list *cid_list = NULL;
-	const char *cid_add = NULL;
+	struct acpica_device_id_list *cid_list = NULL;
+	char *cid_add = NULL;
 	acpi_status status;
 
 	switch (type) {
 	case ACPI_BUS_TYPE_DEVICE:
-		status = acpi_get_object_info(handle, &buffer);
+		status = acpi_get_object_info(handle, &info);
 		if (ACPI_FAILURE(status)) {
 			printk(KERN_ERR PREFIX "%s: Error reading device info\n", __func__);
 			return;
 		}
 
-		info = buffer.pointer;
 		if (info->valid & ACPI_VALID_HID)
-			hid = info->hardware_id.value;
+			hid = info->hardware_id.string;
 		if (info->valid & ACPI_VALID_UID)
-			uid = info->unique_id.value;
+			uid = info->unique_id.string;
 		if (info->valid & ACPI_VALID_CID)
-			cid_list = &info->compatibility_id;
+			cid_list = &info->compatible_id_list;
 		if (info->valid & ACPI_VALID_ADR) {
 			device->pnp.bus_address = info->address;
 			device->flags.bus_address = 1;
@@ -1076,55 +1132,44 @@ static void acpi_device_set_id(struct acpi_device *device,
 	}
 
 	if (hid) {
-		strcpy(device->pnp.hardware_id, hid);
-		device->flags.hardware_id = 1;
-	}
+		device->pnp.hardware_id = ACPI_ALLOCATE_ZEROED(strlen (hid) + 1);
+		if (device->pnp.hardware_id) {
+			strcpy(device->pnp.hardware_id, hid);
+			device->flags.hardware_id = 1;
+		}
+	} else
+		device->pnp.hardware_id = NULL;
+
 	if (uid) {
-		strcpy(device->pnp.unique_id, uid);
-		device->flags.unique_id = 1;
-	}
+		device->pnp.unique_id = ACPI_ALLOCATE_ZEROED(strlen (uid) + 1);
+		if (device->pnp.unique_id) {
+			strcpy(device->pnp.unique_id, uid);
+			device->flags.unique_id = 1;
+		}
+	} else
+		device->pnp.unique_id = NULL;
+
 	if (cid_list || cid_add) {
-		struct  acpi_compatible_id_list *list;
-		int size = 0;
-		int count = 0;
-
-		if (cid_list) {
-			size = cid_list->size;
-		} else if (cid_add) {
-			size = sizeof(struct acpi_compatible_id_list);
-			cid_list = ACPI_ALLOCATE_ZEROED((acpi_size) size);
-			if (!cid_list) {
-				printk(KERN_ERR "Memory allocation error\n");
-				kfree(buffer.pointer);
-				return;
-			} else {
-				cid_list->count = 0;
-				cid_list->size = size;
-			}
+		struct acpica_device_id_list *list;
+
+		if (cid_add) {
+			struct acpica_device_id cid;
+			cid.length = strlen (cid_add) + 1;
+			cid.string = cid_add;
+
+			list = acpi_add_cid(info, &cid);
+		} else {
+			list = acpi_add_cid(info, NULL);
 		}
-		if (cid_add)
-			size += sizeof(struct acpi_compatible_id);
-		list = kmalloc(size, GFP_KERNEL);
 
 		if (list) {
-			if (cid_list) {
-				memcpy(list, cid_list, cid_list->size);
-				count = cid_list->count;
-			}
-			if (cid_add) {
-				strncpy(list->id[count].value, cid_add,
-					ACPI_MAX_CID_LENGTH);
-				count++;
-				device->flags.compatible_ids = 1;
-			}
-			list->size = size;
-			list->count = count;
 			device->pnp.cid_list = list;
-		} else
-			printk(KERN_ERR PREFIX "Memory allocation error\n");
+			if (cid_add)
+				device->flags.compatible_ids = 1;
+		}
 	}
 
-	kfree(buffer.pointer);
+	kfree(info);
 }
 
 static int acpi_device_set_context(struct acpi_device *device, int type)
diff --git a/drivers/char/agp/hp-agp.c b/drivers/char/agp/hp-agp.c
index 8f3d4c184914..7bead4c816ca 100644
--- a/drivers/char/agp/hp-agp.c
+++ b/drivers/char/agp/hp-agp.c
@@ -478,7 +478,6 @@ zx1_gart_probe (acpi_handle obj, u32 depth, void *context, void **ret)
 {
 	acpi_handle handle, parent;
 	acpi_status status;
-	struct acpi_buffer buffer;
 	struct acpi_device_info *info;
 	u64 lba_hpa, sba_hpa, length;
 	int match;
@@ -490,13 +489,11 @@ zx1_gart_probe (acpi_handle obj, u32 depth, void *context, void **ret)
 	/* Look for an enclosing IOC scope and find its CSR space */
 	handle = obj;
 	do {
-		buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
-		status = acpi_get_object_info(handle, &buffer);
+		status = acpi_get_object_info(handle, &info);
 		if (ACPI_SUCCESS(status)) {
 			/* TBD check _CID also */
-			info = buffer.pointer;
-			info->hardware_id.value[sizeof(info->hardware_id)-1] = '\0';
-			match = (strcmp(info->hardware_id.value, "HWP0001") == 0);
+			info->hardware_id.string[sizeof(info->hardware_id.length)-1] = '\0';
+			match = (strcmp(info->hardware_id.string, "HWP0001") == 0);
 			kfree(info);
 			if (match) {
 				status = hp_acpi_csr_space(handle, &sba_hpa, &length);
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index c509c9916464..c0cf45a11b93 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -114,8 +114,6 @@ static int ide_get_dev_handle(struct device *dev, acpi_handle *handle,
 	unsigned int bus, devnum, func;
 	acpi_integer addr;
 	acpi_handle dev_handle;
-	struct acpi_buffer buffer = {.length = ACPI_ALLOCATE_BUFFER,
-					.pointer = NULL};
 	acpi_status status;
 	struct acpi_device_info	*dinfo = NULL;
 	int ret = -ENODEV;
@@ -134,12 +132,11 @@ static int ide_get_dev_handle(struct device *dev, acpi_handle *handle,
 		goto err;
 	}
 
-	status = acpi_get_object_info(dev_handle, &buffer);
+	status = acpi_get_object_info(dev_handle, &dinfo);
 	if (ACPI_FAILURE(status)) {
 		DEBPRINT("get_object_info for device failed\n");
 		goto err;
 	}
-	dinfo = buffer.pointer;
 	if (dinfo && (dinfo->valid & ACPI_VALID_ADR) &&
 	    dinfo->address == addr) {
 		*pcidevfn = addr;
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index 5befa7e379b7..a9d926b7d805 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -398,23 +398,21 @@ static acpi_status __init ibm_find_acpi_device(acpi_handle handle,
 	acpi_handle *phandle = (acpi_handle *)context;
 	acpi_status status; 
 	struct acpi_device_info *info;
-	struct acpi_buffer info_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	int retval = 0;
 
-	status = acpi_get_object_info(handle, &info_buffer);
+	status = acpi_get_object_info(handle, &info);
 	if (ACPI_FAILURE(status)) {
 		err("%s:  Failed to get device information status=0x%x\n",
 			__func__, status);
 		return retval;
 	}
-	info = info_buffer.pointer;
-	info->hardware_id.value[sizeof(info->hardware_id.value) - 1] = '\0';
+	info->hardware_id.string[sizeof(info->hardware_id.length) - 1] = '\0';
 
 	if (info->current_status && (info->valid & ACPI_VALID_HID) &&
-			(!strcmp(info->hardware_id.value, IBM_HARDWARE_ID1) ||
-			 !strcmp(info->hardware_id.value, IBM_HARDWARE_ID2))) {
+			(!strcmp(info->hardware_id.string, IBM_HARDWARE_ID1) ||
+			 !strcmp(info->hardware_id.string, IBM_HARDWARE_ID2))) {
 		dbg("found hardware: %s, handle: %p\n",
-			info->hardware_id.value, handle);
+			info->hardware_id.string, handle);
 		*phandle = handle;
 		/* returning non-zero causes the search to stop
 		 * and returns this value to the caller of 
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index dafaa4a92df5..f9f68e0e7344 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -976,15 +976,12 @@ static acpi_status sony_walk_callback(acpi_handle handle, u32 level,
 				      void *context, void **return_value)
 {
 	struct acpi_device_info *info;
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
-
-	if (ACPI_SUCCESS(acpi_get_object_info(handle, &buffer))) {
-		info = buffer.pointer;
 
+	if (ACPI_SUCCESS(acpi_get_object_info(handle, &info))) {
 		printk(KERN_WARNING DRV_PFX "method: name: %4.4s, args %X\n",
 			(char *)&info->name, info->param_count);
 
-		kfree(buffer.pointer);
+		kfree(info);
 	}
 
 	return AE_OK;
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index 9496494f340e..c07fdb94d665 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -194,13 +194,13 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
 		pnpacpi_parse_resource_option_data(dev);
 
 	if (device->flags.compatible_ids) {
-		struct acpi_compatible_id_list *cid_list = device->pnp.cid_list;
+		struct acpica_device_id_list *cid_list = device->pnp.cid_list;
 		int i;
 
 		for (i = 0; i < cid_list->count; i++) {
-			if (!ispnpidacpi(cid_list->id[i].value))
+			if (!ispnpidacpi(cid_list->ids[i].string))
 				continue;
-			pnp_add_id(dev, cid_list->id[i].value);
+			pnp_add_id(dev, cid_list->ids[i].string);
 		}
 	}
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c65e4ce6c3af..b91420b52c6f 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -173,17 +173,15 @@ struct acpi_device_dir {
 
 typedef char acpi_bus_id[8];
 typedef unsigned long acpi_bus_address;
-typedef char acpi_hardware_id[15];
-typedef char acpi_unique_id[9];
 typedef char acpi_device_name[40];
 typedef char acpi_device_class[20];
 
 struct acpi_device_pnp {
 	acpi_bus_id bus_id;	/* Object name */
 	acpi_bus_address bus_address;	/* _ADR */
-	acpi_hardware_id hardware_id;	/* _HID */
-	struct acpi_compatible_id_list *cid_list;	/* _CIDs */
-	acpi_unique_id unique_id;	/* _UID */
+	char *hardware_id;	/* _HID */
+	struct acpica_device_id_list *cid_list;	/* _CIDs */
+	char *unique_id;	/* _UID */
 	acpi_device_name device_name;	/* Driver-determined */
 	acpi_device_class device_class;	/*        "          */
 };
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index b450a195319a..04904c7f1aa1 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -200,7 +200,8 @@ acpi_evaluate_object_typed(acpi_handle object,
 			   acpi_object_type return_type);
 
 acpi_status
-acpi_get_object_info(acpi_handle handle, struct acpi_buffer *return_buffer);
+acpi_get_object_info(acpi_handle handle,
+		     struct acpi_device_info **return_buffer);
 
 acpi_status acpi_install_method(u8 *buffer);
 
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 37ba576d06e8..7a4ff79e238c 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -338,7 +338,7 @@ typedef u32 acpi_physical_address;
 
 /* PM Timer ticks per second (HZ) */
 
-#define PM_TIMER_FREQUENCY  3579545
+#define PM_TIMER_FREQUENCY              3579545
 
 /*******************************************************************************
  *
@@ -969,38 +969,60 @@ acpi_status(*acpi_walk_callback) (acpi_handle obj_handle,
 #define ACPI_INTERRUPT_NOT_HANDLED      0x00
 #define ACPI_INTERRUPT_HANDLED          0x01
 
-/* Length of _HID, _UID, _CID, and UUID values */
+/* Length of 32-bit EISAID values when converted back to a string */
+
+#define ACPI_EISAID_STRING_SIZE         8	/* Includes null terminator */
+
+/* Length of UUID (string) values */
 
-#define ACPI_DEVICE_ID_LENGTH           0x09
-#define ACPI_MAX_CID_LENGTH             48
 #define ACPI_UUID_LENGTH                16
 
-/* Common string version of device HIDs and UIDs */
+/* Structures used for device/processor HID, UID, CID */
 
 struct acpica_device_id {
-	char value[ACPI_DEVICE_ID_LENGTH];
+	u32 length;		/* Length of string + null */
+	char *string;
 };
 
-/* Common string version of device CIDs */
-
-struct acpi_compatible_id {
-	char value[ACPI_MAX_CID_LENGTH];
+struct acpica_device_id_list {
+	u32 count;		/* Number of IDs in Ids array */
+	u32 list_size;		/* Size of list, including ID strings */
+	struct acpica_device_id ids[1];	/* ID array */
 };
 
-struct acpi_compatible_id_list {
-	u32 count;
-	u32 size;
-	struct acpi_compatible_id id[1];
+/*
+ * Structure returned from acpi_get_object_info.
+ * Optimized for both 32- and 64-bit builds
+ */
+struct acpi_device_info {
+	u32 info_size;		/* Size of info, including ID strings */
+	u32 name;		/* ACPI object Name */
+	acpi_object_type type;	/* ACPI object Type */
+	u8 param_count;		/* If a method, required parameter count */
+	u8 valid;		/* Indicates which optional fields are valid */
+	u8 flags;		/* Miscellaneous info */
+	u8 highest_dstates[4];	/* _sx_d values: 0xFF indicates not valid */
+	u8 lowest_dstates[5];	/* _sx_w values: 0xFF indicates not valid */
+	u32 current_status;	/* _STA value */
+	acpi_integer address;	/* _ADR value */
+	struct acpica_device_id hardware_id;	/* _HID value */
+	struct acpica_device_id unique_id;	/* _UID value */
+	struct acpica_device_id_list compatible_id_list;	/* _CID list <must be last> */
 };
 
-/* Structure and flags for acpi_get_object_info */
+/* Values for Flags field above (acpi_get_object_info) */
+
+#define ACPI_PCI_ROOT_BRIDGE            0x01
 
-#define ACPI_VALID_STA                  0x0001
-#define ACPI_VALID_ADR                  0x0002
-#define ACPI_VALID_HID                  0x0004
-#define ACPI_VALID_UID                  0x0008
-#define ACPI_VALID_CID                  0x0010
-#define ACPI_VALID_SXDS                 0x0020
+/* Flags for Valid field above (acpi_get_object_info) */
+
+#define ACPI_VALID_STA                  0x01
+#define ACPI_VALID_ADR                  0x02
+#define ACPI_VALID_HID                  0x04
+#define ACPI_VALID_UID                  0x08
+#define ACPI_VALID_CID                  0x10
+#define ACPI_VALID_SXDS                 0x20
+#define ACPI_VALID_SXWS                 0x40
 
 /* Flags for _STA method */
 
@@ -1011,29 +1033,6 @@ struct acpi_compatible_id_list {
 #define ACPI_STA_DEVICE_OK              0x08	/* Synonym */
 #define ACPI_STA_BATTERY_PRESENT        0x10
 
-#define ACPI_COMMON_OBJ_INFO \
-	acpi_object_type                type;           /* ACPI object type */ \
-	acpi_name                       name	/* ACPI object Name */
-
-struct acpi_obj_info_header {
-	ACPI_COMMON_OBJ_INFO;
-};
-
-/* Structure returned from Get Object Info */
-
-struct acpi_device_info {
-	ACPI_COMMON_OBJ_INFO;
-
-	u32 param_count;	/* If a method, required parameter count */
-	u32 valid;		/* Indicates which fields below are valid */
-	u32 current_status;	/* _STA value */
-	acpi_integer address;	/* _ADR value if any */
-	struct acpica_device_id hardware_id;	/* _HID value if any */
-	struct acpica_device_id unique_id;	/* _UID value if any */
-	u8 highest_dstates[4];	/* _sx_d values: 0xFF indicates not valid */
-	struct acpi_compatible_id_list compatibility_id;	/* List of _CIDs if any */
-};
-
 /* Context structs for address space handlers */
 
 struct acpi_pci_id {
-- 
cgit v1.2.3


From 6557a49a443a347d24aed58076365432ded30edc Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Wed, 24 Jun 2009 11:32:04 +0800
Subject: ACPICA: ACPI 4.0: Interpreter support for IPMI.

Adds support for IPMI which is similar to SMBus and uses a bi-directional data buffer.
ACPICA BZ 773.

http://acpica.org/bugzilla/show_bug.cgi?id=773

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/acconfig.h |  3 +-
 drivers/acpi/acpica/exfield.c  | 82 ++++++++++++++++++++++++++++--------------
 drivers/acpi/acpica/exfldio.c  |  7 ++--
 include/acpi/actypes.h         |  3 +-
 4 files changed, 63 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h
index 9123d5a11627..8e679ef5b231 100644
--- a/drivers/acpi/acpica/acconfig.h
+++ b/drivers/acpi/acpica/acconfig.h
@@ -199,9 +199,10 @@
 #define ACPI_RSDP_CHECKSUM_LENGTH       20
 #define ACPI_RSDP_XCHECKSUM_LENGTH      36
 
-/* SMBus bidirectional buffer size */
+/* SMBus and IPMI bidirectional buffer size */
 
 #define ACPI_SMBUS_BUFFER_SIZE          34
+#define ACPI_IPMI_BUFFER_SIZE           66
 
 /* _sx_d and _sx_w control methods */
 
diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c
index 546dcdd86785..0b33d6c887b9 100644
--- a/drivers/acpi/acpica/exfield.c
+++ b/drivers/acpi/acpica/exfield.c
@@ -72,6 +72,7 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
 	union acpi_operand_object *buffer_desc;
 	acpi_size length;
 	void *buffer;
+	u32 function;
 
 	ACPI_FUNCTION_TRACE_PTR(ex_read_data_from_field, obj_desc);
 
@@ -97,13 +98,27 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
 		}
 	} else if ((obj_desc->common.type == ACPI_TYPE_LOCAL_REGION_FIELD) &&
 		   (obj_desc->field.region_obj->region.space_id ==
-		    ACPI_ADR_SPACE_SMBUS)) {
+		    ACPI_ADR_SPACE_SMBUS
+		    || obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_IPMI)) {
 		/*
-		 * This is an SMBus read.  We must create a buffer to hold the data
-		 * and directly access the region handler.
+		 * This is an SMBus or IPMI read. We must create a buffer to hold
+		 * the data and then directly access the region handler.
+		 *
+		 * Note: Smbus protocol value is passed in upper 16-bits of Function
 		 */
-		buffer_desc =
-		    acpi_ut_create_buffer_object(ACPI_SMBUS_BUFFER_SIZE);
+		if (obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_SMBUS) {
+			length = ACPI_SMBUS_BUFFER_SIZE;
+			function =
+			    ACPI_READ | (obj_desc->field.attribute << 16);
+		} else {	/* IPMI */
+
+			length = ACPI_IPMI_BUFFER_SIZE;
+			function = ACPI_READ;
+		}
+
+		buffer_desc = acpi_ut_create_buffer_object(length);
 		if (!buffer_desc) {
 			return_ACPI_STATUS(AE_NO_MEMORY);
 		}
@@ -112,16 +127,13 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
 
 		acpi_ex_acquire_global_lock(obj_desc->common_field.field_flags);
 
-		/*
-		 * Perform the read.
-		 * Note: Smbus protocol value is passed in upper 16-bits of Function
-		 */
+		/* Call the region handler for the read */
+
 		status = acpi_ex_access_region(obj_desc, 0,
 					       ACPI_CAST_PTR(acpi_integer,
 							     buffer_desc->
 							     buffer.pointer),
-					       ACPI_READ | (obj_desc->field.
-							    attribute << 16));
+					       function);
 		acpi_ex_release_global_lock(obj_desc->common_field.field_flags);
 		goto exit;
 	}
@@ -212,6 +224,7 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
 	u32 length;
 	void *buffer;
 	union acpi_operand_object *buffer_desc;
+	u32 function;
 
 	ACPI_FUNCTION_TRACE_PTR(ex_write_data_to_field, obj_desc);
 
@@ -234,39 +247,56 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
 		}
 	} else if ((obj_desc->common.type == ACPI_TYPE_LOCAL_REGION_FIELD) &&
 		   (obj_desc->field.region_obj->region.space_id ==
-		    ACPI_ADR_SPACE_SMBUS)) {
+		    ACPI_ADR_SPACE_SMBUS
+		    || obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_IPMI)) {
 		/*
-		 * This is an SMBus write.  We will bypass the entire field mechanism
-		 * and handoff the buffer directly to the handler.
+		 * This is an SMBus or IPMI write. We will bypass the entire field
+		 * mechanism and handoff the buffer directly to the handler. For
+		 * these address spaces, the buffer is bi-directional; on a write,
+		 * return data is returned in the same buffer.
+		 *
+		 * Source must be a buffer of sufficient size:
+		 * ACPI_SMBUS_BUFFER_SIZE or ACPI_IPMI_BUFFER_SIZE.
 		 *
-		 * Source must be a buffer of sufficient size (ACPI_SMBUS_BUFFER_SIZE).
+		 * Note: SMBus protocol type is passed in upper 16-bits of Function
 		 */
 		if (source_desc->common.type != ACPI_TYPE_BUFFER) {
 			ACPI_ERROR((AE_INFO,
-				    "SMBus write requires Buffer, found type %s",
+				    "SMBus or IPMI write requires Buffer, found type %s",
 				    acpi_ut_get_object_type_name(source_desc)));
 
 			return_ACPI_STATUS(AE_AML_OPERAND_TYPE);
 		}
 
-		if (source_desc->buffer.length < ACPI_SMBUS_BUFFER_SIZE) {
+		if (obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_SMBUS) {
+			length = ACPI_SMBUS_BUFFER_SIZE;
+			function =
+			    ACPI_WRITE | (obj_desc->field.attribute << 16);
+		} else {	/* IPMI */
+
+			length = ACPI_IPMI_BUFFER_SIZE;
+			function = ACPI_WRITE;
+		}
+
+		if (source_desc->buffer.length < length) {
 			ACPI_ERROR((AE_INFO,
-				    "SMBus write requires Buffer of length %X, found length %X",
-				    ACPI_SMBUS_BUFFER_SIZE,
-				    source_desc->buffer.length));
+				    "SMBus or IPMI write requires Buffer of length %X, found length %X",
+				    length, source_desc->buffer.length));
 
 			return_ACPI_STATUS(AE_AML_BUFFER_LIMIT);
 		}
 
-		buffer_desc =
-		    acpi_ut_create_buffer_object(ACPI_SMBUS_BUFFER_SIZE);
+		/* Create the bi-directional buffer */
+
+		buffer_desc = acpi_ut_create_buffer_object(length);
 		if (!buffer_desc) {
 			return_ACPI_STATUS(AE_NO_MEMORY);
 		}
 
 		buffer = buffer_desc->buffer.pointer;
-		ACPI_MEMCPY(buffer, source_desc->buffer.pointer,
-			    ACPI_SMBUS_BUFFER_SIZE);
+		ACPI_MEMCPY(buffer, source_desc->buffer.pointer, length);
 
 		/* Lock entire transaction if requested */
 
@@ -275,12 +305,10 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
 		/*
 		 * Perform the write (returns status and perhaps data in the
 		 * same buffer)
-		 * Note: SMBus protocol type is passed in upper 16-bits of Function.
 		 */
 		status = acpi_ex_access_region(obj_desc, 0,
 					       (acpi_integer *) buffer,
-					       ACPI_WRITE | (obj_desc->field.
-							     attribute << 16));
+					       function);
 		acpi_ex_release_global_lock(obj_desc->common_field.field_flags);
 
 		*result_desc = buffer_desc;
diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c
index 6687be167f5f..d7b3b418fb45 100644
--- a/drivers/acpi/acpica/exfldio.c
+++ b/drivers/acpi/acpica/exfldio.c
@@ -120,12 +120,13 @@ acpi_ex_setup_region(union acpi_operand_object *obj_desc,
 	}
 
 	/*
-	 * Exit now for SMBus address space, it has a non-linear address space
+	 * Exit now for SMBus or IPMI address space, it has a non-linear address space
 	 * and the request cannot be directly validated
 	 */
-	if (rgn_desc->region.space_id == ACPI_ADR_SPACE_SMBUS) {
+	if (rgn_desc->region.space_id == ACPI_ADR_SPACE_SMBUS ||
+	    rgn_desc->region.space_id == ACPI_ADR_SPACE_IPMI) {
 
-		/* SMBus has a non-linear address space */
+		/* SMBus or IPMI has a non-linear address space */
 
 		return_ACPI_STATUS(AE_OK);
 	}
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 7a4ff79e238c..4371805d2def 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -732,7 +732,8 @@ typedef u8 acpi_adr_space_type;
 #define ACPI_ADR_SPACE_SMBUS            (acpi_adr_space_type) 4
 #define ACPI_ADR_SPACE_CMOS             (acpi_adr_space_type) 5
 #define ACPI_ADR_SPACE_PCI_BAR_TARGET   (acpi_adr_space_type) 6
-#define ACPI_ADR_SPACE_DATA_TABLE       (acpi_adr_space_type) 7
+#define ACPI_ADR_SPACE_IPMI             (acpi_adr_space_type) 7
+#define ACPI_ADR_SPACE_DATA_TABLE       (acpi_adr_space_type) 8
 #define ACPI_ADR_SPACE_FIXED_HARDWARE   (acpi_adr_space_type) 127
 
 /*
-- 
cgit v1.2.3


From 8e4319c425077c4cc540696a5bb6c4d12f017dcd Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 29 Jun 2009 13:43:27 +0800
Subject: ACPICA: Fix several acpi_attach_data problems

Handler was never invoked. Now invoked if/when host node is deleted.
Data object was not automatically deleted when host node was deleted.
Interface to handler had an unused parameter, removed it.
ACPICA BZ 778.

http://acpica.org/bugzilla/show_bug.cgi?id=778

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/acnamesp.h |  2 +
 drivers/acpi/acpica/nsalloc.c  | 88 +++++++++++++++++++++++++++++-------------
 drivers/acpi/acpica/nsload.c   |  3 +-
 drivers/acpi/bus.c             |  2 +-
 drivers/acpi/glue.c            |  2 +-
 drivers/acpi/scan.c            |  2 +-
 include/acpi/acpi_bus.h        |  4 +-
 include/acpi/actypes.h         |  2 +-
 8 files changed, 70 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index 94cdc2b8cb93..a78e02f62d5e 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -144,6 +144,8 @@ struct acpi_namespace_node *acpi_ns_create_node(u32 name);
 
 void acpi_ns_delete_node(struct acpi_namespace_node *node);
 
+void acpi_ns_remove_node(struct acpi_namespace_node *node);
+
 void
 acpi_ns_delete_namespace_subtree(struct acpi_namespace_node *parent_handle);
 
diff --git a/drivers/acpi/acpica/nsalloc.c b/drivers/acpi/acpica/nsalloc.c
index efc971ab7d65..8a58a1b85aa0 100644
--- a/drivers/acpi/acpica/nsalloc.c
+++ b/drivers/acpi/acpica/nsalloc.c
@@ -96,17 +96,68 @@ struct acpi_namespace_node *acpi_ns_create_node(u32 name)
  *
  * RETURN:      None
  *
- * DESCRIPTION: Delete a namespace node
+ * DESCRIPTION: Delete a namespace node. All node deletions must come through
+ *              here. Detaches any attached objects, including any attached
+ *              data. If a handler is associated with attached data, it is
+ *              invoked before the node is deleted.
  *
  ******************************************************************************/
 
 void acpi_ns_delete_node(struct acpi_namespace_node *node)
+{
+	union acpi_operand_object *obj_desc;
+
+	ACPI_FUNCTION_NAME(ns_delete_node);
+
+	/* Detach an object if there is one */
+
+	acpi_ns_detach_object(node);
+
+	/*
+	 * Delete an attached data object if present (an object that was created
+	 * and attached via acpi_attach_data). Note: After any normal object is
+	 * detached above, the only possible remaining object is a data object.
+	 */
+	obj_desc = node->object;
+	if (obj_desc && (obj_desc->common.type == ACPI_TYPE_LOCAL_DATA)) {
+
+		/* Invoke the attached data deletion handler if present */
+
+		if (obj_desc->data.handler) {
+			obj_desc->data.handler(node, obj_desc->data.pointer);
+		}
+
+		acpi_ut_remove_reference(obj_desc);
+	}
+
+	/* Now we can delete the node */
+
+	(void)acpi_os_release_object(acpi_gbl_namespace_cache, node);
+
+	ACPI_MEM_TRACKING(acpi_gbl_ns_node_list->total_freed++);
+	ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS, "Node %p, Remaining %X\n",
+			  node, acpi_gbl_current_node_count));
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_remove_node
+ *
+ * PARAMETERS:  Node            - Node to be removed/deleted
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Remove (unlink) and delete a namespace node
+ *
+ ******************************************************************************/
+
+void acpi_ns_remove_node(struct acpi_namespace_node *node)
 {
 	struct acpi_namespace_node *parent_node;
 	struct acpi_namespace_node *prev_node;
 	struct acpi_namespace_node *next_node;
 
-	ACPI_FUNCTION_TRACE_PTR(ns_delete_node, node);
+	ACPI_FUNCTION_TRACE_PTR(ns_remove_node, node);
 
 	parent_node = acpi_ns_get_parent_node(node);
 
@@ -142,12 +193,9 @@ void acpi_ns_delete_node(struct acpi_namespace_node *node)
 		}
 	}
 
-	ACPI_MEM_TRACKING(acpi_gbl_ns_node_list->total_freed++);
-
-	/* Detach an object if there is one, then delete the node */
+	/* Delete the node and any attached objects */
 
-	acpi_ns_detach_object(node);
-	(void)acpi_os_release_object(acpi_gbl_namespace_cache, node);
+	acpi_ns_delete_node(node);
 	return_VOID;
 }
 
@@ -273,25 +321,11 @@ void acpi_ns_delete_children(struct acpi_namespace_node *parent_node)
 				    parent_node, child_node));
 		}
 
-		/* Now we can free this child object */
-
-		ACPI_MEM_TRACKING(acpi_gbl_ns_node_list->total_freed++);
-
-		ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS,
-				  "Object %p, Remaining %X\n", child_node,
-				  acpi_gbl_current_node_count));
-
-		/* Detach an object if there is one, then free the child node */
-
-		acpi_ns_detach_object(child_node);
-
-		/* Now we can delete the node */
-
-		(void)acpi_os_release_object(acpi_gbl_namespace_cache,
-					     child_node);
-
-		/* And move on to the next child in the list */
-
+		/*
+		 * Delete this child node and move on to the next child in the list.
+		 * No need to unlink the node since we are deleting the entire branch.
+		 */
+		acpi_ns_delete_node(child_node);
 		child_node = next_node;
 
 	} while (!(flags & ANOBJ_END_OF_PEER_LIST));
@@ -433,7 +467,7 @@ void acpi_ns_delete_namespace_by_owner(acpi_owner_id owner_id)
 
 		if (deletion_node) {
 			acpi_ns_delete_children(deletion_node);
-			acpi_ns_delete_node(deletion_node);
+			acpi_ns_remove_node(deletion_node);
 			deletion_node = NULL;
 		}
 
diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c
index dcd7a6adbbbc..a7234e60e985 100644
--- a/drivers/acpi/acpica/nsload.c
+++ b/drivers/acpi/acpica/nsload.c
@@ -270,8 +270,7 @@ static acpi_status acpi_ns_delete_subtree(acpi_handle start_handle)
 
 	/* Now delete the starting object, and we are done */
 
-	acpi_ns_delete_node(child_handle);
-
+	acpi_ns_remove_node(child_handle);
 	return_ACPI_STATUS(AE_OK);
 }
 
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 2876fc70c3a9..620183f13e5e 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -141,7 +141,7 @@ int acpi_bus_get_status(struct acpi_device *device)
 EXPORT_SYMBOL(acpi_bus_get_status);
 
 void acpi_bus_private_data_handler(acpi_handle handle,
-				   u32 function, void *context)
+				   void *context)
 {
 	return;
 }
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 27a7072347ea..9a4ce33f137e 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(acpi_get_child);
 
 /* Link ACPI devices with physical devices */
 static void acpi_glue_data_handler(acpi_handle handle,
-				   u32 function, void *context)
+				   void *context)
 {
 	/* we provide an empty handler */
 }
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 0ab526de7c55..9606af13d3b8 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -687,7 +687,7 @@ acpi_bus_get_ejd(acpi_handle handle, acpi_handle *ejd)
 }
 EXPORT_SYMBOL_GPL(acpi_bus_get_ejd);
 
-void acpi_bus_data_handler(acpi_handle handle, u32 function, void *context)
+void acpi_bus_data_handler(acpi_handle handle, void *context)
 {
 
 	/* TBD */
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index b91420b52c6f..6e83a68fbd7b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -312,7 +312,7 @@ struct acpi_bus_event {
 
 extern struct kobject *acpi_kobj;
 extern int acpi_bus_generate_netlink_event(const char*, const char*, u8, int);
-void acpi_bus_private_data_handler(acpi_handle, u32, void *);
+void acpi_bus_private_data_handler(acpi_handle, void *);
 int acpi_bus_get_private_data(acpi_handle, void **);
 extern int acpi_notifier_call_chain(struct acpi_device *, u32, u32);
 extern int register_acpi_notifier(struct notifier_block *);
@@ -325,7 +325,7 @@ extern void unregister_acpi_bus_notifier(struct notifier_block *nb);
  */
 
 int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device);
-void acpi_bus_data_handler(acpi_handle handle, u32 function, void *context);
+void acpi_bus_data_handler(acpi_handle handle, void *context);
 int acpi_bus_get_status(struct acpi_device *device);
 int acpi_bus_get_power(acpi_handle handle, int *state);
 int acpi_bus_set_power(acpi_handle handle, int state);
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 4371805d2def..ef4601149f49 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -922,7 +922,7 @@ typedef
 void (*acpi_notify_handler) (acpi_handle device, u32 value, void *context);
 
 typedef
-void (*acpi_object_handler) (acpi_handle object, u32 function, void *data);
+void (*acpi_object_handler) (acpi_handle object, void *data);
 
 typedef acpi_status(*acpi_init_handler) (acpi_handle object, u32 function);
 
-- 
cgit v1.2.3


From eb2289ba1ba994de25af0d94b5e80ba93d2c1c3c Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 24 Jun 2009 13:42:00 +0800
Subject: ACPICA: ACPI 4.0: Changes for existing ACPI tables.

FACS: new flag and new OspmFlags field.
SRAT: x2APIC - add ClockDomain field to descriptor #2

Includes header and disassembler support.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/actbl.h  | 19 +++++++++++++------
 include/acpi/actbl1.h |  8 +++++---
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 222733d01f36..0649a5670026 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -161,17 +161,24 @@ struct acpi_table_facs {
 	u32 flags;
 	u64 xfirmware_waking_vector;	/* 64-bit version of the Firmware Waking Vector (ACPI 2.0+) */
 	u8 version;		/* Version of this table (ACPI 2.0+) */
-	u8 reserved[31];	/* Reserved, must be zero */
+	u8 reserved[3];		/* Reserved, must be zero */
+	u32 ospm_flags;		/* Flags to be set by OSPM (ACPI 4.0) */
+	u8 reserved1[24];	/* Reserved, must be zero */
 };
 
-/* Flag macros */
+/* global_lock flags */
+
+#define ACPI_GLOCK_PENDING          (1)	/* 00: Pending global lock ownership */
+#define ACPI_GLOCK_OWNED            (1<<1)	/* 01: Global lock is owned */
+
+/* Flags  */
 
-#define ACPI_FACS_S4_BIOS_PRESENT (1)	/* 00: S4BIOS support is present */
+#define ACPI_FACS_S4_BIOS_PRESENT   (1)	/* 00: S4BIOS support is present */
+#define ACPI_FACS_64BIT_WAKE        (1<<1)	/* 01: 64-bit wake vector supported (ACPI 4.0) */
 
-/* Global lock flags */
+/* ospm_flags */
 
-#define ACPI_GLOCK_PENDING      0x01	/* 00: Pending global lock ownership */
-#define ACPI_GLOCK_OWNED        0x02	/* 01: Global lock is owned */
+#define ACPI_FACS_64BIT_ENVIRONMENT (1)	/* 00: 64-bit wake environment is required (ACPI 4.0) */
 
 /*******************************************************************************
  *
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 59ade0752473..ec36693f868c 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -1011,7 +1011,7 @@ struct acpi_madt_interrupt_source {
 
 #define ACPI_MADT_CPEI_OVERRIDE     (1)
 
-/* 9: Processor Local X2_APIC (07/2008) */
+/* 9: Processor Local X2APIC (ACPI 4.0) */
 
 struct acpi_madt_local_x2apic {
 	struct acpi_subtable_header header;
@@ -1021,7 +1021,7 @@ struct acpi_madt_local_x2apic {
 	u32 uid;		/* ACPI processor UID */
 };
 
-/* 10: Local X2APIC NMI (07/2008) */
+/* 10: Local X2APIC NMI (ACPI 4.0) */
 
 struct acpi_madt_local_x2apic_nmi {
 	struct acpi_subtable_header header;
@@ -1211,7 +1211,7 @@ struct acpi_srat_mem_affinity {
 #define ACPI_SRAT_MEM_HOT_PLUGGABLE (1<<1)	/* 01: Memory region is hot pluggable */
 #define ACPI_SRAT_MEM_NON_VOLATILE  (1<<2)	/* 02: Memory region is non-volatile */
 
-/* 2: Processor Local X2_APIC Affinity (07/2008) */
+/* 2: Processor Local X2_APIC Affinity (ACPI 4.0) */
 
 struct acpi_srat_x2apic_cpu_affinity {
 	struct acpi_subtable_header header;
@@ -1219,6 +1219,8 @@ struct acpi_srat_x2apic_cpu_affinity {
 	u32 proximity_domain;
 	u32 apic_id;
 	u32 flags;
+	u32 clock_domain;
+	u32 reserved2;
 };
 
 /* Flags for struct acpi_srat_cpu_affinity and struct acpi_srat_x2apic_cpu_affinity */
-- 
cgit v1.2.3


From 3ce804ed83827a7fd27190836f9421b29ac64512 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 25 Jun 2009 10:31:32 -0700
Subject: ACPICA: Update version to 20090625

Update version number.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 04904c7f1aa1..063e577e791e 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20090521
+#define ACPI_CA_VERSION                 0x20090625
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3


From a5fe1a03f7720b8da8364a1737e1e5a357904e99 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Thu, 13 Aug 2009 10:43:27 +0800
Subject: ACPICA: fix leak of acpi_os_validate_address

http://bugzilla.kernel.org/show_bug.cgi?id=13620

If the dynamic region is created and added to resource list over and over again,
it has the potential to be a memory leak by growing the list every time.

This patch fixes the memory leak, as below

1) add a new field "count" to struct acpi_res_list.

   When inserting, if the region(addr, len) is already in the resource
   list, we just increase "count", otherwise, the region is inserted
   with count=1.

   When deleting, the "count" is decreased, if it's decreased to 0,
   the region is deleted from the resource list.

   With "count", the region with same address and length can only be
   inserted to the resource list once, so prevent potential memory leak.

2) add a new function acpi_os_invalidate_address, which is called when
   region is deleted.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/utdelete.c |  6 +++
 drivers/acpi/osl.c             | 94 ++++++++++++++++++++++++++++++++++++++++--
 include/acpi/acpiosxf.h        |  3 ++
 3 files changed, 100 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/utdelete.c b/drivers/acpi/acpica/utdelete.c
index bc1710315088..96e26e70c63d 100644
--- a/drivers/acpi/acpica/utdelete.c
+++ b/drivers/acpi/acpica/utdelete.c
@@ -215,6 +215,12 @@ static void acpi_ut_delete_internal_obj(union acpi_operand_object *object)
 		ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS,
 				  "***** Region %p\n", object));
 
+		/* Invalidate the region address/length via the host OS */
+
+		acpi_os_invalidate_address(object->region.space_id,
+					  object->region.address,
+					  (acpi_size) object->region.length);
+
 		second_desc = acpi_ns_get_secondary_object(object);
 		if (second_desc) {
 			/*
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 5691f165a952..c5b4f1ed9b71 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -88,6 +88,7 @@ struct acpi_res_list {
 	char name[5];   /* only can have a length of 4 chars, make use of this
 			   one instead of res->name, no need to kalloc then */
 	struct list_head resource_list;
+	int count;
 };
 
 static LIST_HEAD(resource_list_head);
@@ -1358,6 +1359,89 @@ acpi_os_validate_interface (char *interface)
 	return AE_SUPPORT;
 }
 
+static inline int acpi_res_list_add(struct acpi_res_list *res)
+{
+	struct acpi_res_list *res_list_elem;
+
+	list_for_each_entry(res_list_elem, &resource_list_head,
+			    resource_list) {
+
+		if (res->resource_type == res_list_elem->resource_type &&
+		    res->start == res_list_elem->start &&
+		    res->end == res_list_elem->end) {
+
+			/*
+			 * The Region(addr,len) already exist in the list,
+			 * just increase the count
+			 */
+
+			res_list_elem->count++;
+			return 0;
+		}
+	}
+
+	res->count = 1;
+	list_add(&res->resource_list, &resource_list_head);
+	return 1;
+}
+
+static inline void acpi_res_list_del(struct acpi_res_list *res)
+{
+	struct acpi_res_list *res_list_elem;
+
+	list_for_each_entry(res_list_elem, &resource_list_head,
+			    resource_list) {
+
+		if (res->resource_type == res_list_elem->resource_type &&
+		    res->start == res_list_elem->start &&
+		    res->end == res_list_elem->end) {
+
+			/*
+			 * If the res count is decreased to 0,
+			 * remove and free it
+			 */
+
+			if (--res_list_elem->count == 0) {
+				list_del(&res_list_elem->resource_list);
+				kfree(res_list_elem);
+			}
+			return;
+		}
+	}
+}
+
+acpi_status
+acpi_os_invalidate_address(
+    u8                   space_id,
+    acpi_physical_address   address,
+    acpi_size               length)
+{
+	struct acpi_res_list res;
+
+	switch (space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+		/* Only interference checks against SystemIO and SytemMemory
+		   are needed */
+		res.start = address;
+		res.end = address + length - 1;
+		res.resource_type = space_id;
+		spin_lock(&acpi_res_lock);
+		acpi_res_list_del(&res);
+		spin_unlock(&acpi_res_lock);
+		break;
+	case ACPI_ADR_SPACE_PCI_CONFIG:
+	case ACPI_ADR_SPACE_EC:
+	case ACPI_ADR_SPACE_SMBUS:
+	case ACPI_ADR_SPACE_CMOS:
+	case ACPI_ADR_SPACE_PCI_BAR_TARGET:
+	case ACPI_ADR_SPACE_DATA_TABLE:
+	case ACPI_ADR_SPACE_FIXED_HARDWARE:
+		break;
+	}
+	return AE_OK;
+}
+
 /******************************************************************************
  *
  * FUNCTION:    acpi_os_validate_address
@@ -1382,6 +1466,7 @@ acpi_os_validate_address (
     char *name)
 {
 	struct acpi_res_list *res;
+	int added;
 	if (acpi_enforce_resources == ENFORCE_RESOURCES_NO)
 		return AE_OK;
 
@@ -1399,14 +1484,17 @@ acpi_os_validate_address (
 		res->end = address + length - 1;
 		res->resource_type = space_id;
 		spin_lock(&acpi_res_lock);
-		list_add(&res->resource_list, &resource_list_head);
+		added = acpi_res_list_add(res);
 		spin_unlock(&acpi_res_lock);
-		pr_debug("Added %s resource: start: 0x%llx, end: 0x%llx, "
-			 "name: %s\n", (space_id == ACPI_ADR_SPACE_SYSTEM_IO)
+		pr_debug("%s %s resource: start: 0x%llx, end: 0x%llx, "
+			 "name: %s\n", added ? "Added" : "Already exist",
+			 (space_id == ACPI_ADR_SPACE_SYSTEM_IO)
 			 ? "SystemIO" : "System Memory",
 			 (unsigned long long)res->start,
 			 (unsigned long long)res->end,
 			 res->name);
+		if (!added)
+			kfree(res);
 		break;
 	case ACPI_ADR_SPACE_PCI_CONFIG:
 	case ACPI_ADR_SPACE_EC:
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index ab0b85cf21f3..eb0e7189075f 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -245,6 +245,9 @@ acpi_status acpi_osi_invalidate(char* interface);
 acpi_status
 acpi_os_validate_address(u8 space_id, acpi_physical_address address,
 			 acpi_size length, char *name);
+acpi_status
+acpi_os_invalidate_address(u8 space_id, acpi_physical_address address,
+			 acpi_size length);
 
 u64 acpi_os_get_timer(void);
 
-- 
cgit v1.2.3


From f726f30e32305a34a203ff975e60885aa7556c6a Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 4 Aug 2009 19:08:24 +0000
Subject: dma: Add set_dma_mask hook to struct dma_map_ops

POWERPC needs this hook. SPARC could use it too.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/dma-mapping.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index c0f6c3cd788c..91b761846061 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -58,6 +58,7 @@ struct dma_map_ops {
 				   enum dma_data_direction dir);
 	int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
 	int (*dma_supported)(struct device *dev, u64 mask);
+	int (*set_dma_mask)(struct device *dev, u64 mask);
 	int is_phys;
 };
 
-- 
cgit v1.2.3


From 468de9e54a900559b55aa939a4daeaea1915e572 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 27 Aug 2009 12:07:40 -0400
Subject: nfsd41: expand solo sequence check

Compounds consisting of only a sequence operation don't need any
additional caching beyond the sequence information we store in the slot
entry.  Fix nfsd4_is_solo_sequence to identify this case correctly.

The additional check for a failed sequence in nfsd4_store_cache_entry()
is redundant, since the nfsd4_is_solo_sequence call lower down catches
this case.

The final ce_cachethis set in nfsd4_sequence is also redundant.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c       | 9 ---------
 include/linux/nfsd/xdr4.h | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5f634d24861c..b44a2cfde6f1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -991,16 +991,10 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
 {
 	struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
 	struct svc_rqst *rqstp = resp->rqstp;
-	struct nfsd4_compoundargs *args = rqstp->rq_argp;
-	struct nfsd4_op *op = &args->ops[resp->opcnt];
 	struct kvec *resv = &rqstp->rq_res.head[0];
 
 	dprintk("--> %s entry %p\n", __func__, entry);
 
-	/* Don't cache a failed OP_SEQUENCE. */
-	if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status)
-		return;
-
 	nfsd4_release_respages(entry->ce_respages, entry->ce_resused);
 	entry->ce_opcnt = resp->opcnt;
 	entry->ce_status = resp->cstate.status;
@@ -1490,9 +1484,6 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	slot->sl_inuse = true;
 	slot->sl_seqid = seq->seqid;
 	slot->sl_cache_entry.ce_cachethis = seq->cachethis;
-	/* Always set the cache entry cachethis for solo sequence */
-	if (nfsd4_is_solo_sequence(resp))
-		slot->sl_cache_entry.ce_cachethis = 1;
 
 	cstate->slot = slot;
 	cstate->session = session;
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 5e4beb0deb80..3f716607c86d 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -467,7 +467,7 @@ struct nfsd4_compoundres {
 static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
 {
 	struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
-	return args->opcnt == 1;
+	return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
 }
 
 static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
-- 
cgit v1.2.3


From 7285dd7fd375763bfb8ab1ac9cf3f1206f503c16 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 28 Aug 2009 20:25:24 +0200
Subject: clocksource: Resolve cpu hotplug dead lock with TSC unstable

Martin Schwidefsky analyzed it:
To register a clocksource the clocksource_mutex is acquired and if
necessary timekeeping_notify is called to install the clocksource as
the timekeeper clock. timekeeping_notify uses stop_machine which needs
to take cpu_add_remove_lock mutex.
Starting a new cpu is done with the cpu_add_remove_lock mutex held.
native_cpu_up checks the tsc of the new cpu and if the tsc is no good
clocksource_change_rating is called. Which needs the clocksource_mutex
and the deadlock is complete.

The solution is to replace the TSC via the clocksource watchdog
mechanism. Mark the TSC as unstable and schedule the watchdog work so
it gets removed in the watchdog thread context.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: John Stultz <johnstul@us.ibm.com>
---
 arch/x86/kernel/tsc.c       |  8 +++++---
 include/linux/clocksource.h |  1 +
 kernel/time/clocksource.c   | 33 ++++++++++++++++++++++++++++++---
 3 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 968425422c46..fc3672a303d6 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -767,12 +767,14 @@ void mark_tsc_unstable(char *reason)
 {
 	if (!tsc_unstable) {
 		tsc_unstable = 1;
-		printk("Marking TSC unstable due to %s\n", reason);
+		printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
 		/* Change only the rating, when not registered */
 		if (clocksource_tsc.mult)
-			clocksource_change_rating(&clocksource_tsc, 0);
-		else
+			clocksource_mark_unstable(&clocksource_tsc);
+		else {
+			clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
 			clocksource_tsc.rating = 0;
+		}
 	}
 }
 
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 9ea40ff26f0e..83d2fbd81b93 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -277,6 +277,7 @@ extern struct clocksource* clocksource_get_next(void);
 extern void clocksource_change_rating(struct clocksource *cs, int rating);
 extern void clocksource_resume(void);
 extern struct clocksource * __init __weak clocksource_default_clock(void);
+extern void clocksource_mark_unstable(struct clocksource *cs);
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
 extern void update_vsyscall(struct timespec *ts, struct clocksource *c);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e0c86ad6e9fb..a0af4ffcb6e5 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -149,15 +149,42 @@ static void clocksource_watchdog_work(struct work_struct *work)
 	kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
 }
 
-static void clocksource_unstable(struct clocksource *cs, int64_t delta)
+static void __clocksource_unstable(struct clocksource *cs)
 {
-	printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
-	       cs->name, delta);
 	cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
 	cs->flags |= CLOCK_SOURCE_UNSTABLE;
 	schedule_work(&watchdog_work);
 }
 
+static void clocksource_unstable(struct clocksource *cs, int64_t delta)
+{
+	printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
+	       cs->name, delta);
+	__clocksource_unstable(cs);
+}
+
+/**
+ * clocksource_mark_unstable - mark clocksource unstable via watchdog
+ * @cs:		clocksource to be marked unstable
+ *
+ * This function is called instead of clocksource_change_rating from
+ * cpu hotplug code to avoid a deadlock between the clocksource mutex
+ * and the cpu hotplug mutex. It defers the update of the clocksource
+ * to the watchdog thread.
+ */
+void clocksource_mark_unstable(struct clocksource *cs)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&watchdog_lock, flags);
+	if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
+		if (list_empty(&cs->wd_list))
+			list_add(&cs->wd_list, &watchdog_list);
+		__clocksource_unstable(cs);
+	}
+	spin_unlock_irqrestore(&watchdog_lock, flags);
+}
+
 static void clocksource_watchdog(unsigned long data)
 {
 	struct clocksource *cs;
-- 
cgit v1.2.3


From b24aad44438d5bc21cbbfb94a99d9bf710d8295b Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 24 Jul 2009 13:30:17 +0800
Subject: ACPICA: Split large ACPI table header

Split out the non-acpi-defined ACPI tables into the existing
(but empty) actbl2.h file. Preparation for new ACPI 4.0 tables.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/actbl.h  |  35 +--
 include/acpi/actbl1.h | 553 +----------------------------------------------
 include/acpi/actbl2.h | 585 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 611 insertions(+), 562 deletions(-)
 create mode 100644 include/acpi/actbl2.h

(limited to 'include')

diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 0649a5670026..55fcfc6725b2 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -44,6 +44,19 @@
 #ifndef __ACTBL_H__
 #define __ACTBL_H__
 
+/*******************************************************************************
+ *
+ * Fundamental ACPI tables
+ *
+ * This file contains definitions for the ACPI tables that are directly consumed
+ * by ACPICA. All other tables are consumed by the OS-dependent ACPI-related
+ * device drivers and other OS support code.
+ *
+ * The RSDP and FACS do not use the common ACPI table header. All other ACPI
+ * tables use the header.
+ *
+ ******************************************************************************/
+
 /*
  * Values for description table header signatures. Useful because they make
  * it more difficult to inadvertently type in the wrong signature.
@@ -65,11 +78,6 @@
 #pragma pack(1)
 
 /*
- * These are the ACPI tables that are directly consumed by the subsystem.
- *
- * The RSDP and FACS do not use the common ACPI table header. All other ACPI
- * tables use the header.
- *
  * Note about bitfields: The u8 type is used for bitfields in ACPI tables.
  * This is the only type that is even remotely portable. Anything else is not
  * portable, so do not use any other bitfield types.
@@ -77,9 +85,8 @@
 
 /*******************************************************************************
  *
- * ACPI Table Header. This common header is used by all tables except the
- * RSDP and FACS. The define is used for direct inclusion of header into
- * other ACPI tables
+ * Master ACPI Table Header. This common header is used by all ACPI tables
+ * except the RSDP and FACS.
  *
  ******************************************************************************/
 
@@ -95,13 +102,16 @@ struct acpi_table_header {
 	u32 asl_compiler_revision;	/* ASL compiler version */
 };
 
-/*
+/*******************************************************************************
+ *
  * GAS - Generic Address Structure (ACPI 2.0+)
  *
  * Note: Since this structure is used in the ACPI tables, it is byte aligned.
- * If misalignment is not supported, access to the Address field must be
- * performed with care.
- */
+ * If misaliged access is not supported by the hardware, accesses to the
+ * 64-bit Address field must be performed with care.
+ *
+ ******************************************************************************/
+
 struct acpi_generic_address {
 	u8 space_id;		/* Address space where struct or register exists */
 	u8 bit_width;		/* Size in bits of given register */
@@ -325,5 +335,6 @@ struct acpi_table_desc {
  */
 
 #include <acpi/actbl1.h>
+#include <acpi/actbl2.h>
 
 #endif				/* __ACTBL_H__ */
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index ec36693f868c..582af1fcb8f5 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -46,41 +46,29 @@
 
 /*******************************************************************************
  *
- * Additional ACPI Tables
+ * Additional ACPI Tables (1)
  *
  * These tables are not consumed directly by the ACPICA subsystem, but are
  * included here to support device drivers and the AML disassembler.
  *
+ * The tables in this file are fully defined within the ACPI specification.
+ *
  ******************************************************************************/
 
 /*
  * Values for description table header signatures. Useful because they make
  * it more difficult to inadvertently type in the wrong signature.
  */
-#define ACPI_SIG_ASF            "ASF!"	/* Alert Standard Format table */
 #define ACPI_SIG_BERT           "BERT"	/* Boot Error Record Table */
-#define ACPI_SIG_BOOT           "BOOT"	/* Simple Boot Flag Table */
 #define ACPI_SIG_CPEP           "CPEP"	/* Corrected Platform Error Polling table */
-#define ACPI_SIG_DBGP           "DBGP"	/* Debug Port table */
-#define ACPI_SIG_DMAR           "DMAR"	/* DMA Remapping table */
 #define ACPI_SIG_ECDT           "ECDT"	/* Embedded Controller Boot Resources Table */
 #define ACPI_SIG_EINJ           "EINJ"	/* Error Injection table */
 #define ACPI_SIG_ERST           "ERST"	/* Error Record Serialization Table */
 #define ACPI_SIG_HEST           "HEST"	/* Hardware Error Source Table */
-#define ACPI_SIG_HPET           "HPET"	/* High Precision Event Timer table */
-#define ACPI_SIG_IBFT           "IBFT"	/* i_sCSI Boot Firmware Table */
 #define ACPI_SIG_MADT           "APIC"	/* Multiple APIC Description Table */
-#define ACPI_SIG_MCFG           "MCFG"	/* PCI Memory Mapped Configuration table */
 #define ACPI_SIG_SBST           "SBST"	/* Smart Battery Specification Table */
-#define ACPI_SIG_SLIC           "SLIC"	/* Software Licensing Description Table */
 #define ACPI_SIG_SLIT           "SLIT"	/* System Locality Distance Information Table */
-#define ACPI_SIG_SPCR           "SPCR"	/* Serial Port Console Redirection table */
-#define ACPI_SIG_SPMI           "SPMI"	/* Server Platform Management Interface table */
 #define ACPI_SIG_SRAT           "SRAT"	/* System Resource Affinity Table */
-#define ACPI_SIG_TCPA           "TCPA"	/* Trusted Computing Platform Alliance table */
-#define ACPI_SIG_UEFI           "UEFI"	/* Uefi Boot Optimization Table */
-#define ACPI_SIG_WDAT           "WDAT"	/* Watchdog Action Table */
-#define ACPI_SIG_WDRT           "WDRT"	/* Watchdog Resource Table */
 
 /*
  * All tables must be byte-packed to match the ACPI specification, since
@@ -113,115 +101,6 @@ struct acpi_whea_header {
 	u64 mask;		/* Bitmask required for this register instruction */
 };
 
-/*******************************************************************************
- *
- * ASF - Alert Standard Format table (Signature "ASF!")
- *
- * Conforms to the Alert Standard Format Specification V2.0, 23 April 2003
- *
- ******************************************************************************/
-
-struct acpi_table_asf {
-	struct acpi_table_header header;	/* Common ACPI table header */
-};
-
-/* ASF subtable header */
-
-struct acpi_asf_header {
-	u8 type;
-	u8 reserved;
-	u16 length;
-};
-
-/* Values for Type field above */
-
-enum acpi_asf_type {
-	ACPI_ASF_TYPE_INFO = 0,
-	ACPI_ASF_TYPE_ALERT = 1,
-	ACPI_ASF_TYPE_CONTROL = 2,
-	ACPI_ASF_TYPE_BOOT = 3,
-	ACPI_ASF_TYPE_ADDRESS = 4,
-	ACPI_ASF_TYPE_RESERVED = 5
-};
-
-/*
- * ASF subtables
- */
-
-/* 0: ASF Information */
-
-struct acpi_asf_info {
-	struct acpi_asf_header header;
-	u8 min_reset_value;
-	u8 min_poll_interval;
-	u16 system_id;
-	u32 mfg_id;
-	u8 flags;
-	u8 reserved2[3];
-};
-
-/* 1: ASF Alerts */
-
-struct acpi_asf_alert {
-	struct acpi_asf_header header;
-	u8 assert_mask;
-	u8 deassert_mask;
-	u8 alerts;
-	u8 data_length;
-};
-
-struct acpi_asf_alert_data {
-	u8 address;
-	u8 command;
-	u8 mask;
-	u8 value;
-	u8 sensor_type;
-	u8 type;
-	u8 offset;
-	u8 source_type;
-	u8 severity;
-	u8 sensor_number;
-	u8 entity;
-	u8 instance;
-};
-
-/* 2: ASF Remote Control */
-
-struct acpi_asf_remote {
-	struct acpi_asf_header header;
-	u8 controls;
-	u8 data_length;
-	u16 reserved2;
-};
-
-struct acpi_asf_control_data {
-	u8 function;
-	u8 address;
-	u8 command;
-	u8 value;
-};
-
-/* 3: ASF RMCP Boot Options */
-
-struct acpi_asf_rmcp {
-	struct acpi_asf_header header;
-	u8 capabilities[7];
-	u8 completion_code;
-	u32 enterprise_id;
-	u8 command;
-	u16 parameter;
-	u16 boot_options;
-	u16 oem_parameters;
-};
-
-/* 4: ASF Address */
-
-struct acpi_asf_address {
-	struct acpi_asf_header header;
-	u8 eprom_address;
-	u8 devices;
-};
-
 /*******************************************************************************
  *
  * BERT - Boot Error Record Table
@@ -251,18 +130,6 @@ struct acpi_bert_region {
 #define ACPI_BERT_MULTIPLE_UNCORRECTABLE    (4)
 #define ACPI_BERT_MULTIPLE_CORRECTABLE      (8)
 
-/*******************************************************************************
- *
- * BOOT - Simple Boot Flag Table
- *
- ******************************************************************************/
-
-struct acpi_table_boot {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 cmos_index;		/* Index in CMOS RAM for the boot register */
-	u8 reserved[3];
-};
-
 /*******************************************************************************
  *
  * CPEP - Corrected Platform Error Polling table
@@ -284,123 +151,6 @@ struct acpi_cpep_polling {
 	u32 interval;		/* Polling interval (msec) */
 };
 
-/*******************************************************************************
- *
- * DBGP - Debug Port table
- *
- ******************************************************************************/
-
-struct acpi_table_dbgp {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 type;		/* 0=full 16550, 1=subset of 16550 */
-	u8 reserved[3];
-	struct acpi_generic_address debug_port;
-};
-
-/*******************************************************************************
- *
- * DMAR - DMA Remapping table
- *	  From "Intel Virtualization Technology for Directed I/O", Sept. 2007
- *
- ******************************************************************************/
-
-struct acpi_table_dmar {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 width;		/* Host Address Width */
-	u8 flags;
-	u8 reserved[10];
-};
-
-/* Flags */
-
-#define ACPI_DMAR_INTR_REMAP	    (1)
-
-/* DMAR subtable header */
-
-struct acpi_dmar_header {
-	u16 type;
-	u16 length;
-};
-
-/* Values for subtable type in struct acpi_dmar_header */
-
-enum acpi_dmar_type {
-	ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,
-	ACPI_DMAR_TYPE_RESERVED_MEMORY = 1,
-	ACPI_DMAR_TYPE_ATSR = 2,
-	ACPI_DMAR_TYPE_RESERVED = 3	/* 3 and greater are reserved */
-};
-
-struct acpi_dmar_device_scope {
-	u8 entry_type;
-	u8 length;
-	u16 reserved;
-	u8 enumeration_id;
-	u8 bus;
-};
-
-/* Values for entry_type in struct acpi_dmar_device_scope */
-
-enum acpi_dmar_scope_type {
-	ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0,
-	ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1,
-	ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2,
-	ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3,
-	ACPI_DMAR_SCOPE_TYPE_HPET = 4,
-	ACPI_DMAR_SCOPE_TYPE_RESERVED = 5	/* 5 and greater are reserved */
-};
-
-struct acpi_dmar_pci_path {
-	u8 dev;
-	u8 fn;
-};
-
-/*
- * DMAR Sub-tables, correspond to Type in struct acpi_dmar_header
- */
-
-/* 0: Hardware Unit Definition */
-
-struct acpi_dmar_hardware_unit {
-	struct acpi_dmar_header header;
-	u8 flags;
-	u8 reserved;
-	u16 segment;
-	u64 address;		/* Register Base Address */
-};
-
-/* Flags */
-
-#define ACPI_DMAR_INCLUDE_ALL       (1)
-
-/* 1: Reserved Memory Defininition */
-
-struct acpi_dmar_reserved_memory {
-	struct acpi_dmar_header header;
-	u16 reserved;
-	u16 segment;
-	u64 base_address;		/* 4_k aligned base address */
-	u64 end_address;	/* 4_k aligned limit address */
-};
-
-/* Flags */
-
-#define ACPI_DMAR_ALLOW_ALL         (1)
-
-
-/* 2: Root Port ATS Capability Reporting Structure */
-
-struct acpi_dmar_atsr {
-       struct acpi_dmar_header header;
-       u8 flags;
-       u8 reserved;
-       u16 segment;
-};
-
-/* Flags */
-
-#define ACPI_DMAR_ALL_PORTS	    (1)
-
 /*******************************************************************************
  *
  * ECDT - Embedded Controller Boot Resources Table
@@ -762,119 +512,6 @@ struct acpi_hest_generic {
 	u32 error_status_block_length;
 };
 
-/*******************************************************************************
- *
- * HPET - High Precision Event Timer table
- *
- ******************************************************************************/
-
-struct acpi_table_hpet {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 id;			/* Hardware ID of event timer block */
-	struct acpi_generic_address address;	/* Address of event timer block */
-	u8 sequence;		/* HPET sequence number */
-	u16 minimum_tick;	/* Main counter min tick, periodic mode */
-	u8 flags;
-};
-
-/*! Flags */
-
-#define ACPI_HPET_PAGE_PROTECT      (1)	/* 00: No page protection */
-#define ACPI_HPET_PAGE_PROTECT_4    (1<<1)	/* 01: 4KB page protected */
-#define ACPI_HPET_PAGE_PROTECT_64   (1<<2)	/* 02: 64KB page protected */
-
-/*! [End] no source code translation !*/
-
-/*******************************************************************************
- *
- * IBFT - Boot Firmware Table
- *
- ******************************************************************************/
-
-struct acpi_table_ibft {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 reserved[12];
-};
-
-/* IBFT common subtable header */
-
-struct acpi_ibft_header {
-	u8 type;
-	u8 version;
-	u16 length;
-	u8 index;
-	u8 flags;
-};
-
-/* Values for Type field above */
-
-enum acpi_ibft_type {
-	ACPI_IBFT_TYPE_NOT_USED = 0,
-	ACPI_IBFT_TYPE_CONTROL = 1,
-	ACPI_IBFT_TYPE_INITIATOR = 2,
-	ACPI_IBFT_TYPE_NIC = 3,
-	ACPI_IBFT_TYPE_TARGET = 4,
-	ACPI_IBFT_TYPE_EXTENSIONS = 5,
-	ACPI_IBFT_TYPE_RESERVED = 6	/* 6 and greater are reserved */
-};
-
-/* IBFT subtables */
-
-struct acpi_ibft_control {
-	struct acpi_ibft_header header;
-	u16 extensions;
-	u16 initiator_offset;
-	u16 nic0_offset;
-	u16 target0_offset;
-	u16 nic1_offset;
-	u16 target1_offset;
-};
-
-struct acpi_ibft_initiator {
-	struct acpi_ibft_header header;
-	u8 sns_server[16];
-	u8 slp_server[16];
-	u8 primary_server[16];
-	u8 secondary_server[16];
-	u16 name_length;
-	u16 name_offset;
-};
-
-struct acpi_ibft_nic {
-	struct acpi_ibft_header header;
-	u8 ip_address[16];
-	u8 subnet_mask_prefix;
-	u8 origin;
-	u8 gateway[16];
-	u8 primary_dns[16];
-	u8 secondary_dns[16];
-	u8 dhcp[16];
-	u16 vlan;
-	u8 mac_address[6];
-	u16 pci_address;
-	u16 name_length;
-	u16 name_offset;
-};
-
-struct acpi_ibft_target {
-	struct acpi_ibft_header header;
-	u8 target_ip_address[16];
-	u16 target_ip_socket;
-	u8 target_boot_lun[8];
-	u8 chap_type;
-	u8 nic_association;
-	u16 target_name_length;
-	u16 target_name_offset;
-	u16 chap_name_length;
-	u16 chap_name_offset;
-	u16 chap_secret_length;
-	u16 chap_secret_offset;
-	u16 reverse_chap_name_length;
-	u16 reverse_chap_name_offset;
-	u16 reverse_chap_secret_length;
-	u16 reverse_chap_secret_offset;
-};
-
 /*******************************************************************************
  *
  * MADT - Multiple APIC Description Table
@@ -1056,27 +693,6 @@ struct acpi_madt_local_x2apic_nmi {
 #define ACPI_MADT_TRIGGER_RESERVED        (2<<2)
 #define ACPI_MADT_TRIGGER_LEVEL           (3<<2)
 
-/*******************************************************************************
- *
- * MCFG - PCI Memory Mapped Configuration table and sub-table
- *
- ******************************************************************************/
-
-struct acpi_table_mcfg {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 reserved[8];
-};
-
-/* Subtable */
-
-struct acpi_mcfg_allocation {
-	u64 address;		/* Base address, processor-relative */
-	u16 pci_segment;	/* PCI segment group number */
-	u8 start_bus_number;	/* Starting PCI Bus number */
-	u8 end_bus_number;	/* Final PCI Bus number */
-	u32 reserved;
-};
-
 /*******************************************************************************
  *
  * SBST - Smart Battery Specification Table
@@ -1102,59 +718,6 @@ struct acpi_table_slit {
 	u8 entry[1];		/* Real size = localities^2 */
 };
 
-/*******************************************************************************
- *
- * SPCR - Serial Port Console Redirection table
- *
- ******************************************************************************/
-
-struct acpi_table_spcr {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 interface_type;	/* 0=full 16550, 1=subset of 16550 */
-	u8 reserved[3];
-	struct acpi_generic_address serial_port;
-	u8 interrupt_type;
-	u8 pc_interrupt;
-	u32 interrupt;
-	u8 baud_rate;
-	u8 parity;
-	u8 stop_bits;
-	u8 flow_control;
-	u8 terminal_type;
-	u8 reserved1;
-	u16 pci_device_id;
-	u16 pci_vendor_id;
-	u8 pci_bus;
-	u8 pci_device;
-	u8 pci_function;
-	u32 pci_flags;
-	u8 pci_segment;
-	u32 reserved2;
-};
-
-/*******************************************************************************
- *
- * SPMI - Server Platform Management Interface table
- *
- ******************************************************************************/
-
-struct acpi_table_spmi {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 reserved;
-	u8 interface_type;
-	u16 spec_revision;	/* Version of IPMI */
-	u8 interrupt_type;
-	u8 gpe_number;		/* GPE assigned */
-	u8 reserved1;
-	u8 pci_device_flag;
-	u32 interrupt;
-	struct acpi_generic_address ipmi_register;
-	u8 pci_segment;
-	u8 pci_bus;
-	u8 pci_device;
-	u8 pci_function;
-};
-
 /*******************************************************************************
  *
  * SRAT - System Resource Affinity Table
@@ -1227,116 +790,6 @@ struct acpi_srat_x2apic_cpu_affinity {
 
 #define ACPI_SRAT_CPU_ENABLED       (1)	/* 00: Use affinity structure */
 
-/*******************************************************************************
- *
- * TCPA - Trusted Computing Platform Alliance table
- *
- ******************************************************************************/
-
-struct acpi_table_tcpa {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u16 reserved;
-	u32 max_log_length;	/* Maximum length for the event log area */
-	u64 log_address;	/* Address of the event log area */
-};
-
-/*******************************************************************************
- *
- * UEFI - UEFI Boot optimization Table
- *
- ******************************************************************************/
-
-struct acpi_table_uefi {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 identifier[16];	/* UUID identifier */
-	u16 data_offset;	/* Offset of remaining data in table */
-	u8 data;
-};
-
-/*******************************************************************************
- *
- * WDAT - Watchdog Action Table
- *
- ******************************************************************************/
-
-struct acpi_table_wdat {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 header_length;	/* Watchdog Header Length */
-	u16 pci_segment;	/* PCI Segment number */
-	u8 pci_bus;		/* PCI Bus number */
-	u8 pci_device;		/* PCI Device number */
-	u8 pci_function;	/* PCI Function number */
-	u8 reserved[3];
-	u32 timer_period;	/* Period of one timer count (msec) */
-	u32 max_count;		/* Maximum counter value supported */
-	u32 min_count;		/* Minimum counter value */
-	u8 flags;
-	u8 reserved2[3];
-	u32 entries;		/* Number of watchdog entries that follow */
-};
-
-/* WDAT Instruction Entries (actions) */
-
-struct acpi_wdat_entry {
-	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
-};
-
-/* Values for Action field above */
-
-enum acpi_wdat_actions {
-	ACPI_WDAT_RESET = 1,
-	ACPI_WDAT_GET_CURRENT_COUNTDOWN = 4,
-	ACPI_WDAT_GET_COUNTDOWN = 5,
-	ACPI_WDAT_SET_COUNTDOWN = 6,
-	ACPI_WDAT_GET_RUNNING_STATE = 8,
-	ACPI_WDAT_SET_RUNNING_STATE = 9,
-	ACPI_WDAT_GET_STOPPED_STATE = 10,
-	ACPI_WDAT_SET_STOPPED_STATE = 11,
-	ACPI_WDAT_GET_REBOOT = 16,
-	ACPI_WDAT_SET_REBOOT = 17,
-	ACPI_WDAT_GET_SHUTDOWN = 18,
-	ACPI_WDAT_SET_SHUTDOWN = 19,
-	ACPI_WDAT_GET_STATUS = 32,
-	ACPI_WDAT_SET_STATUS = 33,
-	ACPI_WDAT_ACTION_RESERVED = 34	/* 34 and greater are reserved */
-};
-
-/* Values for Instruction field above */
-
-enum acpi_wdat_instructions {
-	ACPI_WDAT_READ_VALUE = 0,
-	ACPI_WDAT_READ_COUNTDOWN = 1,
-	ACPI_WDAT_WRITE_VALUE = 2,
-	ACPI_WDAT_WRITE_COUNTDOWN = 3,
-	ACPI_WDAT_INSTRUCTION_RESERVED = 4,	/* 4 and greater are reserved */
-	ACPI_WDAT_PRESERVE_REGISTER = 0x80	/* Except for this value */
-};
-
-/*******************************************************************************
- *
- * WDRT - Watchdog Resource Table
- *
- ******************************************************************************/
-
-struct acpi_table_wdrt {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 header_length;	/* Watchdog Header Length */
-	u8 pci_segment;		/* PCI Segment number */
-	u8 pci_bus;		/* PCI Bus number */
-	u8 pci_device;		/* PCI Device number */
-	u8 pci_function;	/* PCI Function number */
-	u32 timer_period;	/* Period of one timer count (msec) */
-	u32 max_count;		/* Maximum counter value supported */
-	u32 min_count;		/* Minimum counter value */
-	u8 flags;
-	u8 reserved[3];
-	u32 entries;		/* Number of watchdog entries that follow */
-};
-
-/* Flags */
-
-#define ACPI_WDRT_TIMER_ENABLED     (1)	/* 00: Timer enabled */
-
 /* Reset to default packing */
 
 #pragma pack()
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
new file mode 100644
index 000000000000..b271aba0e524
--- /dev/null
+++ b/include/acpi/actbl2.h
@@ -0,0 +1,585 @@
+#ifndef __ACTBL2_H__
+#define __ACTBL2_H__
+
+/*******************************************************************************
+ *
+ * Additional ACPI Tables (2)
+ *
+ * These tables are not consumed directly by the ACPICA subsystem, but are
+ * included here to support device drivers and the AML disassembler.
+ *
+ * The tables in this file are defined by third-party specifications, and are
+ * not defined directly by the ACPI specification itself.
+ *
+ ******************************************************************************/
+
+/*
+ * Values for description table header signatures. Useful because they make
+ * it more difficult to inadvertently type in the wrong signature.
+ */
+#define ACPI_SIG_ASF            "ASF!"	/* Alert Standard Format table */
+#define ACPI_SIG_BOOT           "BOOT"	/* Simple Boot Flag Table */
+#define ACPI_SIG_DBGP           "DBGP"	/* Debug Port table */
+#define ACPI_SIG_DMAR           "DMAR"	/* DMA Remapping table */
+#define ACPI_SIG_HPET           "HPET"	/* High Precision Event Timer table */
+#define ACPI_SIG_IBFT           "IBFT"	/* i_sCSI Boot Firmware Table */
+#define ACPI_SIG_MCFG           "MCFG"	/* PCI Memory Mapped Configuration table */
+#define ACPI_SIG_SLIC           "SLIC"	/* Software Licensing Description Table */
+#define ACPI_SIG_SPCR           "SPCR"	/* Serial Port Console Redirection table */
+#define ACPI_SIG_SPMI           "SPMI"	/* Server Platform Management Interface table */
+#define ACPI_SIG_TCPA           "TCPA"	/* Trusted Computing Platform Alliance table */
+#define ACPI_SIG_UEFI           "UEFI"	/* Uefi Boot Optimization Table */
+#define ACPI_SIG_WDAT           "WDAT"	/* Watchdog Action Table */
+#define ACPI_SIG_WDRT           "WDRT"	/* Watchdog Resource Table */
+
+/*
+ * All tables must be byte-packed to match the ACPI specification, since
+ * the tables are provided by the system BIOS.
+ */
+#pragma pack(1)
+
+/*
+ * Note about bitfields: The u8 type is used for bitfields in ACPI tables.
+ * This is the only type that is even remotely portable. Anything else is not
+ * portable, so do not use any other bitfield types.
+ */
+
+/*******************************************************************************
+ *
+ * ASF - Alert Standard Format table (Signature "ASF!")
+ *
+ * Conforms to the Alert Standard Format Specification V2.0, 23 April 2003
+ *
+ ******************************************************************************/
+
+struct acpi_table_asf {
+	struct acpi_table_header header;	/* Common ACPI table header */
+};
+
+/* ASF subtable header */
+
+struct acpi_asf_header {
+	u8 type;
+	u8 reserved;
+	u16 length;
+};
+
+/* Values for Type field above */
+
+enum acpi_asf_type {
+	ACPI_ASF_TYPE_INFO = 0,
+	ACPI_ASF_TYPE_ALERT = 1,
+	ACPI_ASF_TYPE_CONTROL = 2,
+	ACPI_ASF_TYPE_BOOT = 3,
+	ACPI_ASF_TYPE_ADDRESS = 4,
+	ACPI_ASF_TYPE_RESERVED = 5
+};
+
+/*
+ * ASF subtables
+ */
+
+/* 0: ASF Information */
+
+struct acpi_asf_info {
+	struct acpi_asf_header header;
+	u8 min_reset_value;
+	u8 min_poll_interval;
+	u16 system_id;
+	u32 mfg_id;
+	u8 flags;
+	u8 reserved2[3];
+};
+
+/* 1: ASF Alerts */
+
+struct acpi_asf_alert {
+	struct acpi_asf_header header;
+	u8 assert_mask;
+	u8 deassert_mask;
+	u8 alerts;
+	u8 data_length;
+};
+
+struct acpi_asf_alert_data {
+	u8 address;
+	u8 command;
+	u8 mask;
+	u8 value;
+	u8 sensor_type;
+	u8 type;
+	u8 offset;
+	u8 source_type;
+	u8 severity;
+	u8 sensor_number;
+	u8 entity;
+	u8 instance;
+};
+
+/* 2: ASF Remote Control */
+
+struct acpi_asf_remote {
+	struct acpi_asf_header header;
+	u8 controls;
+	u8 data_length;
+	u16 reserved2;
+};
+
+struct acpi_asf_control_data {
+	u8 function;
+	u8 address;
+	u8 command;
+	u8 value;
+};
+
+/* 3: ASF RMCP Boot Options */
+
+struct acpi_asf_rmcp {
+	struct acpi_asf_header header;
+	u8 capabilities[7];
+	u8 completion_code;
+	u32 enterprise_id;
+	u8 command;
+	u16 parameter;
+	u16 boot_options;
+	u16 oem_parameters;
+};
+
+/* 4: ASF Address */
+
+struct acpi_asf_address {
+	struct acpi_asf_header header;
+	u8 eprom_address;
+	u8 devices;
+};
+
+/*******************************************************************************
+ *
+ * BOOT - Simple Boot Flag Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_boot {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 cmos_index;		/* Index in CMOS RAM for the boot register */
+	u8 reserved[3];
+};
+
+/*******************************************************************************
+ *
+ * DBGP - Debug Port table
+ *
+ ******************************************************************************/
+
+struct acpi_table_dbgp {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 type;		/* 0=full 16550, 1=subset of 16550 */
+	u8 reserved[3];
+	struct acpi_generic_address debug_port;
+};
+
+/*******************************************************************************
+ *
+ * DMAR - DMA Remapping table
+ *        From "Intel Virtualization Technology for Directed I/O", Sept. 2007
+ *
+ ******************************************************************************/
+
+struct acpi_table_dmar {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 width;		/* Host Address Width */
+	u8 flags;
+	u8 reserved[10];
+};
+
+/* Flags */
+
+#define ACPI_DMAR_INTR_REMAP        (1)
+
+/* DMAR subtable header */
+
+struct acpi_dmar_header {
+	u16 type;
+	u16 length;
+};
+
+/* Values for subtable type in struct acpi_dmar_header */
+
+enum acpi_dmar_type {
+	ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,
+	ACPI_DMAR_TYPE_RESERVED_MEMORY = 1,
+	ACPI_DMAR_TYPE_ATSR = 2,
+	ACPI_DMAR_TYPE_RESERVED = 3	/* 3 and greater are reserved */
+};
+
+struct acpi_dmar_device_scope {
+	u8 entry_type;
+	u8 length;
+	u16 reserved;
+	u8 enumeration_id;
+	u8 bus;
+};
+
+/* Values for entry_type in struct acpi_dmar_device_scope */
+
+enum acpi_dmar_scope_type {
+	ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0,
+	ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1,
+	ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2,
+	ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3,
+	ACPI_DMAR_SCOPE_TYPE_HPET = 4,
+	ACPI_DMAR_SCOPE_TYPE_RESERVED = 5	/* 5 and greater are reserved */
+};
+
+struct acpi_dmar_pci_path {
+	u8 dev;
+	u8 fn;
+};
+
+/*
+ * DMAR Sub-tables, correspond to Type in struct acpi_dmar_header
+ */
+
+/* 0: Hardware Unit Definition */
+
+struct acpi_dmar_hardware_unit {
+	struct acpi_dmar_header header;
+	u8 flags;
+	u8 reserved;
+	u16 segment;
+	u64 address;		/* Register Base Address */
+};
+
+/* Flags */
+
+#define ACPI_DMAR_INCLUDE_ALL       (1)
+
+/* 1: Reserved Memory Defininition */
+
+struct acpi_dmar_reserved_memory {
+	struct acpi_dmar_header header;
+	u16 reserved;
+	u16 segment;
+	u64 base_address;	/* 4_k aligned base address */
+	u64 end_address;	/* 4_k aligned limit address */
+};
+
+/* Flags */
+
+#define ACPI_DMAR_ALLOW_ALL         (1)
+
+/* 2: Root Port ATS Capability Reporting Structure */
+
+struct acpi_dmar_atsr {
+	struct acpi_dmar_header header;
+	u8 flags;
+	u8 reserved;
+	u16 segment;
+};
+
+/* Flags */
+
+#define ACPI_DMAR_ALL_PORTS         (1)
+
+/*******************************************************************************
+ *
+ * HPET - High Precision Event Timer table
+ *
+ ******************************************************************************/
+
+struct acpi_table_hpet {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 id;			/* Hardware ID of event timer block */
+	struct acpi_generic_address address;	/* Address of event timer block */
+	u8 sequence;		/* HPET sequence number */
+	u16 minimum_tick;	/* Main counter min tick, periodic mode */
+	u8 flags;
+};
+
+/*! Flags */
+
+#define ACPI_HPET_PAGE_PROTECT      (1)	/* 00: No page protection */
+#define ACPI_HPET_PAGE_PROTECT_4    (1<<1)	/* 01: 4KB page protected */
+#define ACPI_HPET_PAGE_PROTECT_64   (1<<2)	/* 02: 64KB page protected */
+
+/*! [End] no source code translation !*/
+
+/*******************************************************************************
+ *
+ * IBFT - Boot Firmware Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_ibft {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 reserved[12];
+};
+
+/* IBFT common subtable header */
+
+struct acpi_ibft_header {
+	u8 type;
+	u8 version;
+	u16 length;
+	u8 index;
+	u8 flags;
+};
+
+/* Values for Type field above */
+
+enum acpi_ibft_type {
+	ACPI_IBFT_TYPE_NOT_USED = 0,
+	ACPI_IBFT_TYPE_CONTROL = 1,
+	ACPI_IBFT_TYPE_INITIATOR = 2,
+	ACPI_IBFT_TYPE_NIC = 3,
+	ACPI_IBFT_TYPE_TARGET = 4,
+	ACPI_IBFT_TYPE_EXTENSIONS = 5,
+	ACPI_IBFT_TYPE_RESERVED = 6	/* 6 and greater are reserved */
+};
+
+/* IBFT subtables */
+
+struct acpi_ibft_control {
+	struct acpi_ibft_header header;
+	u16 extensions;
+	u16 initiator_offset;
+	u16 nic0_offset;
+	u16 target0_offset;
+	u16 nic1_offset;
+	u16 target1_offset;
+};
+
+struct acpi_ibft_initiator {
+	struct acpi_ibft_header header;
+	u8 sns_server[16];
+	u8 slp_server[16];
+	u8 primary_server[16];
+	u8 secondary_server[16];
+	u16 name_length;
+	u16 name_offset;
+};
+
+struct acpi_ibft_nic {
+	struct acpi_ibft_header header;
+	u8 ip_address[16];
+	u8 subnet_mask_prefix;
+	u8 origin;
+	u8 gateway[16];
+	u8 primary_dns[16];
+	u8 secondary_dns[16];
+	u8 dhcp[16];
+	u16 vlan;
+	u8 mac_address[6];
+	u16 pci_address;
+	u16 name_length;
+	u16 name_offset;
+};
+
+struct acpi_ibft_target {
+	struct acpi_ibft_header header;
+	u8 target_ip_address[16];
+	u16 target_ip_socket;
+	u8 target_boot_lun[8];
+	u8 chap_type;
+	u8 nic_association;
+	u16 target_name_length;
+	u16 target_name_offset;
+	u16 chap_name_length;
+	u16 chap_name_offset;
+	u16 chap_secret_length;
+	u16 chap_secret_offset;
+	u16 reverse_chap_name_length;
+	u16 reverse_chap_name_offset;
+	u16 reverse_chap_secret_length;
+	u16 reverse_chap_secret_offset;
+};
+
+/*******************************************************************************
+ *
+ * MCFG - PCI Memory Mapped Configuration table and sub-table
+ *
+ ******************************************************************************/
+
+struct acpi_table_mcfg {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 reserved[8];
+};
+
+/* Subtable */
+
+struct acpi_mcfg_allocation {
+	u64 address;		/* Base address, processor-relative */
+	u16 pci_segment;	/* PCI segment group number */
+	u8 start_bus_number;	/* Starting PCI Bus number */
+	u8 end_bus_number;	/* Final PCI Bus number */
+	u32 reserved;
+};
+
+/*******************************************************************************
+ *
+ * SPCR - Serial Port Console Redirection table
+ *
+ ******************************************************************************/
+
+struct acpi_table_spcr {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 interface_type;	/* 0=full 16550, 1=subset of 16550 */
+	u8 reserved[3];
+	struct acpi_generic_address serial_port;
+	u8 interrupt_type;
+	u8 pc_interrupt;
+	u32 interrupt;
+	u8 baud_rate;
+	u8 parity;
+	u8 stop_bits;
+	u8 flow_control;
+	u8 terminal_type;
+	u8 reserved1;
+	u16 pci_device_id;
+	u16 pci_vendor_id;
+	u8 pci_bus;
+	u8 pci_device;
+	u8 pci_function;
+	u32 pci_flags;
+	u8 pci_segment;
+	u32 reserved2;
+};
+
+/*******************************************************************************
+ *
+ * SPMI - Server Platform Management Interface table
+ *
+ ******************************************************************************/
+
+struct acpi_table_spmi {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 reserved;
+	u8 interface_type;
+	u16 spec_revision;	/* Version of IPMI */
+	u8 interrupt_type;
+	u8 gpe_number;		/* GPE assigned */
+	u8 reserved1;
+	u8 pci_device_flag;
+	u32 interrupt;
+	struct acpi_generic_address ipmi_register;
+	u8 pci_segment;
+	u8 pci_bus;
+	u8 pci_device;
+	u8 pci_function;
+};
+
+/*******************************************************************************
+ *
+ * TCPA - Trusted Computing Platform Alliance table
+ *
+ ******************************************************************************/
+
+struct acpi_table_tcpa {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u16 reserved;
+	u32 max_log_length;	/* Maximum length for the event log area */
+	u64 log_address;	/* Address of the event log area */
+};
+
+/*******************************************************************************
+ *
+ * UEFI - UEFI Boot optimization Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_uefi {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 identifier[16];	/* UUID identifier */
+	u16 data_offset;	/* Offset of remaining data in table */
+	u8 data;
+};
+
+/*******************************************************************************
+ *
+ * WDAT - Watchdog Action Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_wdat {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 header_length;	/* Watchdog Header Length */
+	u16 pci_segment;	/* PCI Segment number */
+	u8 pci_bus;		/* PCI Bus number */
+	u8 pci_device;		/* PCI Device number */
+	u8 pci_function;	/* PCI Function number */
+	u8 reserved[3];
+	u32 timer_period;	/* Period of one timer count (msec) */
+	u32 max_count;		/* Maximum counter value supported */
+	u32 min_count;		/* Minimum counter value */
+	u8 flags;
+	u8 reserved2[3];
+	u32 entries;		/* Number of watchdog entries that follow */
+};
+
+/* WDAT Instruction Entries (actions) */
+
+struct acpi_wdat_entry {
+	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
+};
+
+/* Values for Action field above */
+
+enum acpi_wdat_actions {
+	ACPI_WDAT_RESET = 1,
+	ACPI_WDAT_GET_CURRENT_COUNTDOWN = 4,
+	ACPI_WDAT_GET_COUNTDOWN = 5,
+	ACPI_WDAT_SET_COUNTDOWN = 6,
+	ACPI_WDAT_GET_RUNNING_STATE = 8,
+	ACPI_WDAT_SET_RUNNING_STATE = 9,
+	ACPI_WDAT_GET_STOPPED_STATE = 10,
+	ACPI_WDAT_SET_STOPPED_STATE = 11,
+	ACPI_WDAT_GET_REBOOT = 16,
+	ACPI_WDAT_SET_REBOOT = 17,
+	ACPI_WDAT_GET_SHUTDOWN = 18,
+	ACPI_WDAT_SET_SHUTDOWN = 19,
+	ACPI_WDAT_GET_STATUS = 32,
+	ACPI_WDAT_SET_STATUS = 33,
+	ACPI_WDAT_ACTION_RESERVED = 34	/* 34 and greater are reserved */
+};
+
+/* Values for Instruction field above */
+
+enum acpi_wdat_instructions {
+	ACPI_WDAT_READ_VALUE = 0,
+	ACPI_WDAT_READ_COUNTDOWN = 1,
+	ACPI_WDAT_WRITE_VALUE = 2,
+	ACPI_WDAT_WRITE_COUNTDOWN = 3,
+	ACPI_WDAT_INSTRUCTION_RESERVED = 4,	/* 4 and greater are reserved */
+	ACPI_WDAT_PRESERVE_REGISTER = 0x80	/* Except for this value */
+};
+
+/*******************************************************************************
+ *
+ * WDRT - Watchdog Resource Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_wdrt {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 header_length;	/* Watchdog Header Length */
+	u8 pci_segment;		/* PCI Segment number */
+	u8 pci_bus;		/* PCI Bus number */
+	u8 pci_device;		/* PCI Device number */
+	u8 pci_function;	/* PCI Function number */
+	u32 timer_period;	/* Period of one timer count (msec) */
+	u32 max_count;		/* Maximum counter value supported */
+	u32 min_count;		/* Minimum counter value */
+	u8 flags;
+	u8 reserved[3];
+	u32 entries;		/* Number of watchdog entries that follow */
+};
+
+/* Flags */
+
+#define ACPI_WDRT_TIMER_ENABLED     (1)	/* 00: Timer enabled */
+
+/* Reset to default packing */
+
+#pragma pack()
+
+#endif				/* __ACTBL2_H__ */
-- 
cgit v1.2.3


From 6e2d5ebd0d36199920676fdceaff4f4bfe66297b Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 27 Jul 2009 10:53:00 +0800
Subject: ACPICA: ACPI 4: Update headers for new and changed ACPI tables.

Add IVRS,MSCT,UEFI,WAET,WDAT.
Updated several existing tables for ACPI 4.0-related changes.
Added document references for all tables not defined in ACPI spec.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/actbl.h  |  30 +++--
 include/acpi/actbl1.h | 339 +++++++++++++++++++++++++++++++++-----------------
 include/acpi/actbl2.h | 339 +++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 557 insertions(+), 151 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 55fcfc6725b2..1b6587952604 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -58,8 +58,9 @@
  ******************************************************************************/
 
 /*
- * Values for description table header signatures. Useful because they make
- * it more difficult to inadvertently type in the wrong signature.
+ * Values for description table header signatures for tables defined in this
+ * file. Useful because they make it more difficult to inadvertently type in
+ * the wrong signature.
  */
 #define ACPI_SIG_DSDT           "DSDT"	/* Differentiated System Description Table */
 #define ACPI_SIG_FADT           "FACP"	/* Fixed ACPI Description Table */
@@ -123,6 +124,7 @@ struct acpi_generic_address {
 /*******************************************************************************
  *
  * RSDP - Root System Description Pointer (Signature is "RSD PTR ")
+ *        Version 2
  *
  ******************************************************************************/
 
@@ -143,6 +145,7 @@ struct acpi_table_rsdp {
 /*******************************************************************************
  *
  * RSDT/XSDT - Root System Description Tables
+ *             Version 1 (both)
  *
  ******************************************************************************/
 
@@ -176,23 +179,24 @@ struct acpi_table_facs {
 	u8 reserved1[24];	/* Reserved, must be zero */
 };
 
-/* global_lock flags */
+/* Masks for global_lock flag field above */
 
 #define ACPI_GLOCK_PENDING          (1)	/* 00: Pending global lock ownership */
 #define ACPI_GLOCK_OWNED            (1<<1)	/* 01: Global lock is owned */
 
-/* Flags  */
+/* Masks for Flags field above  */
 
 #define ACPI_FACS_S4_BIOS_PRESENT   (1)	/* 00: S4BIOS support is present */
 #define ACPI_FACS_64BIT_WAKE        (1<<1)	/* 01: 64-bit wake vector supported (ACPI 4.0) */
 
-/* ospm_flags */
+/* Masks for ospm_flags field above */
 
 #define ACPI_FACS_64BIT_ENVIRONMENT (1)	/* 00: 64-bit wake environment is required (ACPI 4.0) */
 
 /*******************************************************************************
  *
  * FADT - Fixed ACPI Description Table (Signature "FACP")
+ *        Version 4
  *
  ******************************************************************************/
 
@@ -253,7 +257,7 @@ struct acpi_table_fadt {
 	struct acpi_generic_address xgpe1_block;	/* 64-bit Extended General Purpose Event 1 Reg Blk address */
 };
 
-/* FADT Boot Architecture Flags (boot_flags) */
+/* Masks for FADT Boot Architecture Flags (boot_flags) */
 
 #define ACPI_FADT_LEGACY_DEVICES    (1)  	/* 00: [V2] System has LPC or ISA bus devices */
 #define ACPI_FADT_8042              (1<<1)	/* 01: [V3] System has an 8042 controller on port 60/64 */
@@ -263,7 +267,7 @@ struct acpi_table_fadt {
 
 #define FADT2_REVISION_ID               3
 
-/* FADT flags */
+/* Masks for FADT flags */
 
 #define ACPI_FADT_WBINVD            (1)	/* 00: [V1] The wbinvd instruction works properly */
 #define ACPI_FADT_WBINVD_FLUSH      (1<<1)	/* 01: [V1] wbinvd flushes but does not invalidate caches */
@@ -286,7 +290,7 @@ struct acpi_table_fadt {
 #define ACPI_FADT_APIC_CLUSTER      (1<<18)	/* 18: [V4] All local APICs must use cluster model (ACPI 3.0) */
 #define ACPI_FADT_APIC_PHYSICAL     (1<<19)	/* 19: [V4] All local x_aPICs must use physical dest mode (ACPI 3.0) */
 
-/* FADT Prefered Power Management Profiles */
+/* Values for preferred_profile (Prefered Power Management Profiles) */
 
 enum acpi_prefered_pm_profiles {
 	PM_UNSPECIFIED = 0,
@@ -304,14 +308,16 @@ enum acpi_prefered_pm_profiles {
 
 #define ACPI_FADT_OFFSET(f)             (u8) ACPI_OFFSET (struct acpi_table_fadt, f)
 
+/*
+ * Internal table-related structures
+ */
 union acpi_name_union {
 	u32 integer;
 	char ascii[4];
 };
 
-/*
- * Internal ACPI Table Descriptor. One per ACPI table
- */
+/* Internal ACPI Table Descriptor. One per ACPI table. */
+
 struct acpi_table_desc {
 	acpi_physical_address address;
 	struct acpi_table_header *pointer;
@@ -321,7 +327,7 @@ struct acpi_table_desc {
 	u8 flags;
 };
 
-/* Flags for above */
+/* Masks for Flags field above */
 
 #define ACPI_TABLE_ORIGIN_UNKNOWN       (0)
 #define ACPI_TABLE_ORIGIN_MAPPED        (1)
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 582af1fcb8f5..0417f2abc44b 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -56,8 +56,9 @@
  ******************************************************************************/
 
 /*
- * Values for description table header signatures. Useful because they make
- * it more difficult to inadvertently type in the wrong signature.
+ * Values for description table header signatures for tables defined in this
+ * file. Useful because they make it more difficult to inadvertently type in
+ * the wrong signature.
  */
 #define ACPI_SIG_BERT           "BERT"	/* Boot Error Record Table */
 #define ACPI_SIG_CPEP           "CPEP"	/* Corrected Platform Error Polling table */
@@ -66,6 +67,7 @@
 #define ACPI_SIG_ERST           "ERST"	/* Error Record Serialization Table */
 #define ACPI_SIG_HEST           "HEST"	/* Hardware Error Source Table */
 #define ACPI_SIG_MADT           "APIC"	/* Multiple APIC Description Table */
+#define ACPI_SIG_MSCT           "MSCT"	/* Maximum System Characteristics Table */
 #define ACPI_SIG_SBST           "SBST"	/* Smart Battery Specification Table */
 #define ACPI_SIG_SLIT           "SLIT"	/* System Locality Distance Information Table */
 #define ACPI_SIG_SRAT           "SRAT"	/* System Resource Affinity Table */
@@ -82,14 +84,20 @@
  * portable, so do not use any other bitfield types.
  */
 
-/* Common Subtable header (used in MADT, SRAT, etc.) */
+/*******************************************************************************
+ *
+ * Common subtable headers
+ *
+ ******************************************************************************/
+
+/* Generic subtable header (used in MADT, SRAT, etc.) */
 
 struct acpi_subtable_header {
 	u8 type;
 	u8 length;
 };
 
-/* Common Subtable header for WHEA tables (EINJ, ERST, WDAT) */
+/* Subtable header for WHEA tables (EINJ, ERST, WDAT) */
 
 struct acpi_whea_header {
 	u8 action;
@@ -103,7 +111,8 @@ struct acpi_whea_header {
 
 /*******************************************************************************
  *
- * BERT - Boot Error Record Table
+ * BERT - Boot Error Record Table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -113,26 +122,43 @@ struct acpi_table_bert {
 	u64 address;		/* Physical addresss of the error region */
 };
 
-/* Boot Error Region */
+/* Boot Error Region (not a subtable, pointed to by Address field above) */
 
 struct acpi_bert_region {
-	u32 block_status;
-	u32 raw_data_offset;
-	u32 raw_data_length;
-	u32 data_length;
-	u32 error_severity;
+	u32 block_status;	/* Type of error information */
+	u32 raw_data_offset;	/* Offset to raw error data */
+	u32 raw_data_length;	/* Length of raw error data */
+	u32 data_length;	/* Length of generic error data */
+	u32 error_severity;	/* Severity code */
 };
 
-/* block_status Flags */
+/* Values for block_status flags above */
 
 #define ACPI_BERT_UNCORRECTABLE             (1)
-#define ACPI_BERT_CORRECTABLE               (2)
-#define ACPI_BERT_MULTIPLE_UNCORRECTABLE    (4)
-#define ACPI_BERT_MULTIPLE_CORRECTABLE      (8)
+#define ACPI_BERT_CORRECTABLE               (1<<1)
+#define ACPI_BERT_MULTIPLE_UNCORRECTABLE    (1<<2)
+#define ACPI_BERT_MULTIPLE_CORRECTABLE      (1<<3)
+#define ACPI_BERT_ERROR_ENTRY_COUNT         (0xFF<<4)	/* 8 bits, error count */
+
+/* Values for error_severity above */
+
+enum acpi_bert_error_severity {
+	ACPI_BERT_ERROR_CORRECTABLE = 0,
+	ACPI_BERT_ERROR_FATAL = 1,
+	ACPI_BERT_ERROR_CORRECTED = 2,
+	ACPI_BERT_ERROR_NONE = 3,
+	ACPI_BERT_ERROR_RESERVED = 4	/* 4 and greater are reserved */
+};
+
+/*
+ * Note: The generic error data that follows the error_severity field above
+ * uses the struct acpi_hest_generic_data defined under the HEST table below
+ */
 
 /*******************************************************************************
  *
- * CPEP - Corrected Platform Error Polling table
+ * CPEP - Corrected Platform Error Polling table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -144,8 +170,7 @@ struct acpi_table_cpep {
 /* Subtable */
 
 struct acpi_cpep_polling {
-	u8 type;
-	u8 length;
+	struct acpi_subtable_header header;
 	u8 id;			/* Processor ID */
 	u8 eid;			/* Processor EID */
 	u32 interval;		/* Polling interval (msec) */
@@ -154,6 +179,7 @@ struct acpi_cpep_polling {
 /*******************************************************************************
  *
  * ECDT - Embedded Controller Boot Resources Table
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -168,14 +194,16 @@ struct acpi_table_ecdt {
 
 /*******************************************************************************
  *
- * EINJ - Error Injection Table
+ * EINJ - Error Injection Table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
 struct acpi_table_einj {
 	struct acpi_table_header header;	/* Common ACPI table header */
 	u32 header_length;
-	u32 reserved;
+	u8 flags;
+	u8 reserved[3];
 	u32 entries;
 };
 
@@ -185,6 +213,10 @@ struct acpi_einj_entry {
 	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
 };
 
+/* Masks for Flags field above */
+
+#define ACPI_EINJ_PRESERVE          (1)
+
 /* Values for Action field above */
 
 enum acpi_einj_actions {
@@ -220,9 +252,34 @@ struct acpi_einj_trigger {
 	u32 entry_count;
 };
 
+/* Command status return values */
+
+enum acpi_einj_command_status {
+	ACPI_EINJ_SUCCESS = 0,
+	ACPI_EINJ_FAILURE = 1,
+	ACPI_EINJ_INVALID_ACCESS = 2,
+	ACPI_EINJ_STATUS_RESERVED = 3	/* 3 and greater are reserved */
+};
+
+/* Error types returned from ACPI_EINJ_GET_ERROR_TYPE (bitfield) */
+
+#define ACPI_EINJ_PROCESSOR_CORRECTABLE     (1)
+#define ACPI_EINJ_PROCESSOR_UNCORRECTABLE   (1<<1)
+#define ACPI_EINJ_PROCESSOR_FATAL           (1<<2)
+#define ACPI_EINJ_MEMORY_CORRECTABLE        (1<<3)
+#define ACPI_EINJ_MEMORY_UNCORRECTABLE      (1<<4)
+#define ACPI_EINJ_MEMORY_FATAL              (1<<5)
+#define ACPI_EINJ_PCIX_CORRECTABLE          (1<<6)
+#define ACPI_EINJ_PCIX_UNCORRECTABLE        (1<<7)
+#define ACPI_EINJ_PCIX_FATAL                (1<<8)
+#define ACPI_EINJ_PLATFORM_CORRECTABLE      (1<<9)
+#define ACPI_EINJ_PLATFORM_UNCORRECTABLE    (1<<10)
+#define ACPI_EINJ_PLATFORM_FATAL            (1<<11)
+
 /*******************************************************************************
  *
- * ERST - Error Record Serialization Table
+ * ERST - Error Record Serialization Table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -239,19 +296,23 @@ struct acpi_erst_entry {
 	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
 };
 
+/* Masks for Flags field above */
+
+#define ACPI_ERST_PRESERVE          (1)
+
 /* Values for Action field above */
 
 enum acpi_erst_actions {
-	ACPI_ERST_BEGIN_WRITE_OPERATION = 0,
-	ACPI_ERST_BEGIN_READ_OPERATION = 1,
-	ACPI_ERST_BETGIN_CLEAR_OPERATION = 2,
-	ACPI_ERST_END_OPERATION = 3,
+	ACPI_ERST_BEGIN_WRITE = 0,
+	ACPI_ERST_BEGIN_READ = 1,
+	ACPI_ERST_BEGIN_CLEAR = 2,
+	ACPI_ERST_END = 3,
 	ACPI_ERST_SET_RECORD_OFFSET = 4,
 	ACPI_ERST_EXECUTE_OPERATION = 5,
 	ACPI_ERST_CHECK_BUSY_STATUS = 6,
 	ACPI_ERST_GET_COMMAND_STATUS = 7,
-	ACPI_ERST_GET_RECORD_IDENTIFIER = 8,
-	ACPI_ERST_SET_RECORD_IDENTIFIER = 9,
+	ACPI_ERST_GET_RECORD_ID = 8,
+	ACPI_ERST_SET_RECORD_ID = 9,
 	ACPI_ERST_GET_RECORD_COUNT = 10,
 	ACPI_ERST_BEGIN_DUMMY_WRIITE = 11,
 	ACPI_ERST_NOT_USED = 12,
@@ -286,9 +347,29 @@ enum acpi_erst_instructions {
 	ACPI_ERST_INSTRUCTION_RESERVED = 19	/* 19 and greater are reserved */
 };
 
+/* Command status return values */
+
+enum acpi_erst_command_status {
+	ACPI_ERST_SUCESS = 0,
+	ACPI_ERST_NO_SPACE = 1,
+	ACPI_ERST_NOT_AVAILABLE = 2,
+	ACPI_ERST_FAILURE = 3,
+	ACPI_ERST_RECORD_EMPTY = 4,
+	ACPI_ERST_NOT_FOUND = 5,
+	ACPI_ERST_STATUS_RESERVED = 6	/* 6 and greater are reserved */
+};
+
+/* Error Record Serialization Information */
+
+struct acpi_erst_info {
+	u16 signature;		/* Should be "ER" */
+	u8 data[48];
+};
+
 /*******************************************************************************
  *
- * HEST - Hardware Error Source Table
+ * HEST - Hardware Error Source Table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -301,70 +382,49 @@ struct acpi_table_hest {
 
 struct acpi_hest_header {
 	u16 type;
+	u16 source_id;
 };
 
 /* Values for Type field above for subtables */
 
 enum acpi_hest_types {
-	ACPI_HEST_TYPE_XPF_MACHINE_CHECK = 0,
-	ACPI_HEST_TYPE_XPF_CORRECTED_MACHINE_CHECK = 1,
-	ACPI_HEST_TYPE_XPF_UNUSED = 2,
-	ACPI_HEST_TYPE_XPF_NON_MASKABLE_INTERRUPT = 3,
-	ACPI_HEST_TYPE_IPF_CORRECTED_MACHINE_CHECK = 4,
-	ACPI_HEST_TYPE_IPF_CORRECTED_PLATFORM_ERROR = 5,
+	ACPI_HEST_TYPE_IA32_CHECK = 0,
+	ACPI_HEST_TYPE_IA32_CORRECTED_CHECK = 1,
+	ACPI_HEST_TYPE_IA32_NMI = 2,
+	ACPI_HEST_TYPE_NOT_USED3 = 3,
+	ACPI_HEST_TYPE_NOT_USED4 = 4,
+	ACPI_HEST_TYPE_NOT_USED5 = 5,
 	ACPI_HEST_TYPE_AER_ROOT_PORT = 6,
 	ACPI_HEST_TYPE_AER_ENDPOINT = 7,
 	ACPI_HEST_TYPE_AER_BRIDGE = 8,
-	ACPI_HEST_TYPE_GENERIC_HARDWARE_ERROR_SOURCE = 9,
+	ACPI_HEST_TYPE_GENERIC_ERROR = 9,
 	ACPI_HEST_TYPE_RESERVED = 10	/* 10 and greater are reserved */
 };
 
 /*
- * HEST Sub-subtables
+ * HEST substructures contained in subtables
  */
 
-/* XPF Machine Check Error Bank */
-
-struct acpi_hest_xpf_error_bank {
+/*
+ * IA32 Error Bank(s) - Follows the struct acpi_hest_ia_machine_check and
+ * struct acpi_hest_ia_corrected structures.
+ */
+struct acpi_hest_ia_error_bank {
 	u8 bank_number;
 	u8 clear_status_on_init;
 	u8 status_format;
-	u8 config_write_enable;
+	u8 reserved;
 	u32 control_register;
-	u64 control_init_data;
+	u64 control_data;
 	u32 status_register;
 	u32 address_register;
 	u32 misc_register;
 };
 
-/* Generic Error Status */
-
-struct acpi_hest_generic_status {
-	u32 block_status;
-	u32 raw_data_offset;
-	u32 raw_data_length;
-	u32 data_length;
-	u32 error_severity;
-};
-
-/* Generic Error Data */
-
-struct acpi_hest_generic_data {
-	u8 section_type[16];
-	u32 error_severity;
-	u16 revision;
-	u8 validation_bits;
-	u8 flags;
-	u32 error_data_length;
-	u8 fru_id[16];
-	u8 fru_text[20];
-};
-
-/* Common HEST structure for PCI/AER types below (6,7,8) */
+/* Common HEST sub-structure for PCI/AER structures below (6,7,8) */
 
 struct acpi_hest_aer_common {
-	u16 source_id;
-	u16 config_write_enable;
+	u16 reserved1;
 	u8 flags;
 	u8 enabled;
 	u32 records_to_pre_allocate;
@@ -373,13 +433,18 @@ struct acpi_hest_aer_common {
 	u16 device;
 	u16 function;
 	u16 device_control;
-	u16 reserved;
+	u16 reserved2;
 	u32 uncorrectable_error_mask;
 	u32 uncorrectable_error_severity;
 	u32 correctable_error_mask;
 	u32 advanced_error_capabilities;
 };
 
+/* Masks for HEST Flags fields */
+
+#define ACPI_HEST_FIRMWARE_FIRST        (1)
+#define ACPI_HEST_GLOBAL                (1<<1)
+
 /* Hardware Error Notification */
 
 struct acpi_hest_notify {
@@ -405,71 +470,59 @@ enum acpi_hest_notify_types {
 	ACPI_HEST_NOTIFY_RESERVED = 5	/* 5 and greater are reserved */
 };
 
+/* Values for config_write_enable bitfield above */
+
+#define ACPI_HEST_TYPE                  (1)
+#define ACPI_HEST_POLL_INTERVAL         (1<<1)
+#define ACPI_HEST_POLL_THRESHOLD_VALUE  (1<<2)
+#define ACPI_HEST_POLL_THRESHOLD_WINDOW (1<<3)
+#define ACPI_HEST_ERR_THRESHOLD_VALUE   (1<<4)
+#define ACPI_HEST_ERR_THRESHOLD_WINDOW  (1<<5)
+
 /*
  * HEST subtables
- *
- * From WHEA Design Document, 16 May 2007.
- * Note: There is no subtable type 2 in this version of the document,
- * and there are two different subtable type 3s.
  */
 
- /* 0: XPF Machine Check Exception */
+/* 0: IA32 Machine Check Exception */
 
-struct acpi_hest_xpf_machine_check {
+struct acpi_hest_ia_machine_check {
 	struct acpi_hest_header header;
-	u16 source_id;
-	u16 config_write_enable;
+	u16 reserved1;
 	u8 flags;
-	u8 reserved1;
+	u8 enabled;
 	u32 records_to_pre_allocate;
 	u32 max_sections_per_record;
 	u64 global_capability_data;
 	u64 global_control_data;
 	u8 num_hardware_banks;
-	u8 reserved2[7];
+	u8 reserved3[7];
 };
 
-/* 1: XPF Corrected Machine Check */
+/* 1: IA32 Corrected Machine Check */
 
-struct acpi_table_hest_xpf_corrected {
+struct acpi_table_hest_ia_corrected {
 	struct acpi_hest_header header;
-	u16 source_id;
-	u16 config_write_enable;
+	u16 reserved1;
 	u8 flags;
 	u8 enabled;
 	u32 records_to_pre_allocate;
 	u32 max_sections_per_record;
 	struct acpi_hest_notify notify;
 	u8 num_hardware_banks;
-	u8 reserved[3];
+	u8 reserved2[3];
 };
 
-/* 3: XPF Non-Maskable Interrupt */
+/* 2: IA32 Non-Maskable Interrupt */
 
-struct acpi_hest_xpf_nmi {
+struct acpi_hest_ia_nmi {
 	struct acpi_hest_header header;
-	u16 source_id;
 	u32 reserved;
 	u32 records_to_pre_allocate;
 	u32 max_sections_per_record;
 	u32 max_raw_data_length;
 };
 
-/* 4: IPF Corrected Machine Check */
-
-struct acpi_hest_ipf_corrected {
-	struct acpi_hest_header header;
-	u8 enabled;
-	u8 reserved;
-};
-
-/* 5: IPF Corrected Platform Error */
-
-struct acpi_hest_ipf_corrected_platform {
-	struct acpi_hest_header header;
-	u8 enabled;
-	u8 reserved;
-};
+/* 3,4,5: Not used */
 
 /* 6: PCI Express Root Port AER */
 
@@ -491,30 +544,61 @@ struct acpi_hest_aer {
 struct acpi_hest_aer_bridge {
 	struct acpi_hest_header header;
 	struct acpi_hest_aer_common aer;
-	u32 secondary_uncorrectable_error_mask;
-	u32 secondary_uncorrectable_error_severity;
-	u32 secondary_advanced_capabilities;
+	u32 second_uncorrectable_error_mask;
+	u32 second_uncorrectable_error_severity;
+	u32 second_advanced_capabilities;
 };
 
 /* 9: Generic Hardware Error Source */
 
 struct acpi_hest_generic {
 	struct acpi_hest_header header;
-	u16 source_id;
 	u16 related_source_id;
-	u8 config_write_enable;
+	u8 reserved;
 	u8 enabled;
 	u32 records_to_pre_allocate;
 	u32 max_sections_per_record;
 	u32 max_raw_data_length;
 	struct acpi_generic_address error_status_address;
 	struct acpi_hest_notify notify;
-	u32 error_status_block_length;
+	u32 error_block_length;
+};
+
+/* Generic Error Status block */
+
+struct acpi_hest_generic_status {
+	u32 block_status;
+	u32 raw_data_offset;
+	u32 raw_data_length;
+	u32 data_length;
+	u32 error_severity;
+};
+
+/* Values for block_status flags above */
+
+#define ACPI_HEST_UNCORRECTABLE             (1)
+#define ACPI_HEST_CORRECTABLE               (1<<1)
+#define ACPI_HEST_MULTIPLE_UNCORRECTABLE    (1<<2)
+#define ACPI_HEST_MULTIPLE_CORRECTABLE      (1<<3)
+#define ACPI_HEST_ERROR_ENTRY_COUNT         (0xFF<<4)	/* 8 bits, error count */
+
+/* Generic Error Data entry */
+
+struct acpi_hest_generic_data {
+	u8 section_type[16];
+	u32 error_severity;
+	u16 revision;
+	u8 validation_bits;
+	u8 flags;
+	u32 error_data_length;
+	u8 fru_id[16];
+	u8 fru_text[20];
 };
 
 /*******************************************************************************
  *
  * MADT - Multiple APIC Description Table
+ *        Version 3
  *
  ******************************************************************************/
 
@@ -524,16 +608,16 @@ struct acpi_table_madt {
 	u32 flags;
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
-#define ACPI_MADT_PCAT_COMPAT       (1)	/* 00:    System also has dual 8259s */
+#define ACPI_MADT_PCAT_COMPAT       (1)	/* 00: System also has dual 8259s */
 
 /* Values for PCATCompat flag */
 
 #define ACPI_MADT_DUAL_PIC          0
 #define ACPI_MADT_MULTIPLE_APIC     1
 
-/* Values for subtable type in struct acpi_subtable_header */
+/* Values for MADT subtable type in struct acpi_subtable_header */
 
 enum acpi_madt_type {
 	ACPI_MADT_TYPE_LOCAL_APIC = 0,
@@ -644,7 +728,7 @@ struct acpi_madt_interrupt_source {
 	u32 flags;		/* Interrupt Source Flags */
 };
 
-/* Flags field above */
+/* Masks for Flags field above */
 
 #define ACPI_MADT_CPEI_OVERRIDE     (1)
 
@@ -693,9 +777,36 @@ struct acpi_madt_local_x2apic_nmi {
 #define ACPI_MADT_TRIGGER_RESERVED        (2<<2)
 #define ACPI_MADT_TRIGGER_LEVEL           (3<<2)
 
+/*******************************************************************************
+ *
+ * MSCT - Maximum System Characteristics Table (ACPI 4.0)
+ *        Version 1
+ *
+ ******************************************************************************/
+
+struct acpi_table_msct {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 proximity_offset;	/* Location of proximity info struct(s) */
+	u32 max_proximity_domains;	/* Max number of proximity domains */
+	u32 max_clock_domains;	/* Max number of clock domains */
+	u64 max_address;	/* Max physical address in system */
+};
+
+/* Subtable - Maximum Proximity Domain Information. Version 1 */
+
+struct acpi_msct_proximity {
+	u8 revision;
+	u8 length;
+	u32 range_start;	/* Start of domain range */
+	u32 range_end;		/* End of domain range */
+	u32 processor_capacity;
+	u64 memory_capacity;	/* In bytes */
+};
+
 /*******************************************************************************
  *
  * SBST - Smart Battery Specification Table
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -709,6 +820,7 @@ struct acpi_table_sbst {
 /*******************************************************************************
  *
  * SLIT - System Locality Distance Information Table
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -721,6 +833,7 @@ struct acpi_table_slit {
 /*******************************************************************************
  *
  * SRAT - System Resource Affinity Table
+ *        Version 3
  *
  ******************************************************************************/
 
@@ -755,6 +868,10 @@ struct acpi_srat_cpu_affinity {
 	u32 reserved;		/* Reserved, must be zero */
 };
 
+/* Flags */
+
+#define ACPI_SRAT_CPU_USE_AFFINITY  (1)	/* 00: Use affinity structure */
+
 /* 1: Memory Affinity */
 
 struct acpi_srat_mem_affinity {
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index b271aba0e524..6f3dce9991e1 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -14,8 +14,9 @@
  ******************************************************************************/
 
 /*
- * Values for description table header signatures. Useful because they make
- * it more difficult to inadvertently type in the wrong signature.
+ * Values for description table header signatures for tables defined in this
+ * file. Useful because they make it more difficult to inadvertently type in
+ * the wrong signature.
  */
 #define ACPI_SIG_ASF            "ASF!"	/* Alert Standard Format table */
 #define ACPI_SIG_BOOT           "BOOT"	/* Simple Boot Flag Table */
@@ -23,12 +24,14 @@
 #define ACPI_SIG_DMAR           "DMAR"	/* DMA Remapping table */
 #define ACPI_SIG_HPET           "HPET"	/* High Precision Event Timer table */
 #define ACPI_SIG_IBFT           "IBFT"	/* i_sCSI Boot Firmware Table */
+#define ACPI_SIG_IVRS           "IVRS"	/* I/O Virtualization Reporting Structure */
 #define ACPI_SIG_MCFG           "MCFG"	/* PCI Memory Mapped Configuration table */
 #define ACPI_SIG_SLIC           "SLIC"	/* Software Licensing Description Table */
 #define ACPI_SIG_SPCR           "SPCR"	/* Serial Port Console Redirection table */
 #define ACPI_SIG_SPMI           "SPMI"	/* Server Platform Management Interface table */
 #define ACPI_SIG_TCPA           "TCPA"	/* Trusted Computing Platform Alliance table */
 #define ACPI_SIG_UEFI           "UEFI"	/* Uefi Boot Optimization Table */
+#define ACPI_SIG_WAET           "WAET"	/* Windows ACPI Emulated devices Table */
 #define ACPI_SIG_WDAT           "WDAT"	/* Watchdog Action Table */
 #define ACPI_SIG_WDRT           "WDRT"	/* Watchdog Resource Table */
 
@@ -47,6 +50,7 @@
 /*******************************************************************************
  *
  * ASF - Alert Standard Format table (Signature "ASF!")
+ *       Revision 0x10
  *
  * Conforms to the Alert Standard Format Specification V2.0, 23 April 2003
  *
@@ -91,6 +95,10 @@ struct acpi_asf_info {
 	u8 reserved2[3];
 };
 
+/* Masks for Flags field above */
+
+#define ACPI_ASF_SMBUS_PROTOCOLS    (1)
+
 /* 1: ASF Alerts */
 
 struct acpi_asf_alert {
@@ -156,6 +164,9 @@ struct acpi_asf_address {
 /*******************************************************************************
  *
  * BOOT - Simple Boot Flag Table
+ *        Version 1
+ *
+ * Conforms to the "Simple Boot Flag Specification", Version 2.1
  *
  ******************************************************************************/
 
@@ -168,6 +179,9 @@ struct acpi_table_boot {
 /*******************************************************************************
  *
  * DBGP - Debug Port table
+ *        Version 1
+ *
+ * Conforms to the "Debug Port Specification", Version 1.00, 2/9/2000
  *
  ******************************************************************************/
 
@@ -181,7 +195,10 @@ struct acpi_table_dbgp {
 /*******************************************************************************
  *
  * DMAR - DMA Remapping table
- *        From "Intel Virtualization Technology for Directed I/O", Sept. 2007
+ *        Version 1
+ *
+ * Conforms to "Intel Virtualization Technology for Directed I/O",
+ * Version 1.2, Sept. 2008
  *
  ******************************************************************************/
 
@@ -192,7 +209,7 @@ struct acpi_table_dmar {
 	u8 reserved[10];
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
 #define ACPI_DMAR_INTR_REMAP        (1)
 
@@ -209,9 +226,12 @@ enum acpi_dmar_type {
 	ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,
 	ACPI_DMAR_TYPE_RESERVED_MEMORY = 1,
 	ACPI_DMAR_TYPE_ATSR = 2,
-	ACPI_DMAR_TYPE_RESERVED = 3	/* 3 and greater are reserved */
+	ACPI_DMAR_HARDWARE_AFFINITY = 3,
+	ACPI_DMAR_TYPE_RESERVED = 4	/* 4 and greater are reserved */
 };
 
+/* DMAR Device Scope structure */
+
 struct acpi_dmar_device_scope {
 	u8 entry_type;
 	u8 length;
@@ -250,7 +270,7 @@ struct acpi_dmar_hardware_unit {
 	u64 address;		/* Register Base Address */
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
 #define ACPI_DMAR_INCLUDE_ALL       (1)
 
@@ -264,7 +284,7 @@ struct acpi_dmar_reserved_memory {
 	u64 end_address;	/* 4_k aligned limit address */
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
 #define ACPI_DMAR_ALLOW_ALL         (1)
 
@@ -277,13 +297,26 @@ struct acpi_dmar_atsr {
 	u16 segment;
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
 #define ACPI_DMAR_ALL_PORTS         (1)
 
+/* 3: Remapping Hardware Static Affinity Structure */
+
+struct acpi_dmar_rhsa {
+	struct acpi_dmar_header header;
+	u32 reserved;
+	u64 base_address;
+	u32 proximity_domain;
+};
+
 /*******************************************************************************
  *
  * HPET - High Precision Event Timer table
+ *        Version 1
+ *
+ * Conforms to "IA-PC HPET (High Precision Event Timers) Specification",
+ * Version 1.0a, October 2004
  *
  ******************************************************************************/
 
@@ -296,17 +329,28 @@ struct acpi_table_hpet {
 	u8 flags;
 };
 
-/*! Flags */
+/* Masks for Flags field above */
 
-#define ACPI_HPET_PAGE_PROTECT      (1)	/* 00: No page protection */
-#define ACPI_HPET_PAGE_PROTECT_4    (1<<1)	/* 01: 4KB page protected */
-#define ACPI_HPET_PAGE_PROTECT_64   (1<<2)	/* 02: 64KB page protected */
+#define ACPI_HPET_PAGE_PROTECT_MASK (3)
 
-/*! [End] no source code translation !*/
+/* Values for Page Protect flags */
+
+enum acpi_hpet_page_protect {
+	ACPI_HPET_NO_PAGE_PROTECT = 0,
+	ACPI_HPET_PAGE_PROTECT4 = 1,
+	ACPI_HPET_PAGE_PROTECT64 = 2
+};
 
 /*******************************************************************************
  *
  * IBFT - Boot Firmware Table
+ *        Version 1
+ *
+ * Conforms to "iSCSI Boot Firmware Table (iBFT) as Defined in ACPI 3.0b
+ * Specification", Version 1.01, March 1, 2007
+ *
+ * Note: It appears that this table is not intended to appear in the RSDT/XSDT.
+ * Therefore, it is not currently supported by the disassembler.
  *
  ******************************************************************************/
 
@@ -394,9 +438,184 @@ struct acpi_ibft_target {
 	u16 reverse_chap_secret_offset;
 };
 
+/*******************************************************************************
+ *
+ * IVRS - I/O Virtualization Reporting Structure
+ *        Version 1
+ *
+ * Conforms to "AMD I/O Virtualization Technology (IOMMU) Specification",
+ * Revision 1.26, February 2009.
+ *
+ ******************************************************************************/
+
+struct acpi_table_ivrs {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 info;		/* Common virtualization info */
+	u64 reserved;
+};
+
+/* Values for Info field above */
+
+#define ACPI_IVRS_PHYSICAL_SIZE     0x00007F00	/* 7 bits, physical address size */
+#define ACPI_IVRS_VIRTUAL_SIZE      0x003F8000	/* 7 bits, virtual address size */
+#define ACPI_IVRS_ATS_RESERVED      0x00400000	/* ATS address translation range reserved */
+
+/* IVRS subtable header */
+
+struct acpi_ivrs_header {
+	u8 type;		/* Subtable type */
+	u8 flags;
+	u16 length;		/* Subtable length */
+	u16 device_id;		/* ID of IOMMU */
+};
+
+/* Values for subtable Type above */
+
+enum acpi_ivrs_type {
+	ACPI_IVRS_TYPE_HARDWARE = 0x10,
+	ACPI_IVRS_TYPE_MEMORY1 = 0x20,
+	ACPI_IVRS_TYPE_MEMORY2 = 0x21,
+	ACPI_IVRS_TYPE_MEMORY3 = 0x22
+};
+
+/* Masks for Flags field above for IVHD subtable */
+
+#define ACPI_IVHD_TT_ENABLE         (1)
+#define ACPI_IVHD_PASS_PW           (1<<1)
+#define ACPI_IVHD_RES_PASS_PW       (1<<2)
+#define ACPI_IVHD_ISOC              (1<<3)
+#define ACPI_IVHD_IOTLB             (1<<4)
+
+/* Masks for Flags field above for IVMD subtable */
+
+#define ACPI_IVMD_UNITY             (1)
+#define ACPI_IVMD_READ              (1<<1)
+#define ACPI_IVMD_WRITE             (1<<2)
+#define ACPI_IVMD_EXCLUSION_RANGE   (1<<3)
+
+/*
+ * IVRS subtables, correspond to Type in struct acpi_ivrs_header
+ */
+
+/* 0x10: I/O Virtualization Hardware Definition Block (IVHD) */
+
+struct acpi_ivrs_hardware {
+	struct acpi_ivrs_header header;
+	u16 capability_offset;	/* Offset for IOMMU control fields */
+	u64 base_address;	/* IOMMU control registers */
+	u16 pci_segment_group;
+	u16 info;		/* MSI number and unit ID */
+	u32 reserved;
+};
+
+/* Masks for Info field above */
+
+#define ACPI_IVHD_MSI_NUMBER_MASK   0x001F	/* 5 bits, MSI message number */
+#define ACPI_IVHD_UNIT_ID_MASK      0x1F00	/* 5 bits, unit_iD */
+
+/*
+ * Device Entries for IVHD subtable, appear after struct acpi_ivrs_hardware structure.
+ * Upper two bits of the Type field are the (encoded) length of the structure.
+ * Currently, only 4 and 8 byte entries are defined. 16 and 32 byte entries
+ * are reserved for future use but not defined.
+ */
+struct acpi_ivrs_de_header {
+	u8 type;
+	u16 id;
+	u8 data_setting;
+};
+
+/* Length of device entry is in the top two bits of Type field above */
+
+#define ACPI_IVHD_ENTRY_LENGTH      0xC0
+
+/* Values for device entry Type field above */
+
+enum acpi_ivrs_device_entry_type {
+	/* 4-byte device entries, all use struct acpi_ivrs_device4 */
+
+	ACPI_IVRS_TYPE_PAD4 = 0,
+	ACPI_IVRS_TYPE_ALL = 1,
+	ACPI_IVRS_TYPE_SELECT = 2,
+	ACPI_IVRS_TYPE_START = 3,
+	ACPI_IVRS_TYPE_END = 4,
+
+	/* 8-byte device entries */
+
+	ACPI_IVRS_TYPE_PAD8 = 64,
+	ACPI_IVRS_TYPE_NOT_USED = 65,
+	ACPI_IVRS_TYPE_ALIAS_SELECT = 66,	/* Uses struct acpi_ivrs_device8a */
+	ACPI_IVRS_TYPE_ALIAS_START = 67,	/* Uses struct acpi_ivrs_device8a */
+	ACPI_IVRS_TYPE_EXT_SELECT = 70,	/* Uses struct acpi_ivrs_device8b */
+	ACPI_IVRS_TYPE_EXT_START = 71,	/* Uses struct acpi_ivrs_device8b */
+	ACPI_IVRS_TYPE_SPECIAL = 72	/* Uses struct acpi_ivrs_device8c */
+};
+
+/* Values for Data field above */
+
+#define ACPI_IVHD_INIT_PASS         (1)
+#define ACPI_IVHD_EINT_PASS         (1<<1)
+#define ACPI_IVHD_NMI_PASS          (1<<2)
+#define ACPI_IVHD_SYSTEM_MGMT       (3<<4)
+#define ACPI_IVHD_LINT0_PASS        (1<<6)
+#define ACPI_IVHD_LINT1_PASS        (1<<7)
+
+/* Types 0-4: 4-byte device entry */
+
+struct acpi_ivrs_device4 {
+	struct acpi_ivrs_de_header header;
+};
+
+/* Types 66-67: 8-byte device entry */
+
+struct acpi_ivrs_device8a {
+	struct acpi_ivrs_de_header header;
+	u8 reserved1;
+	u16 used_id;
+	u8 reserved2;
+};
+
+/* Types 70-71: 8-byte device entry */
+
+struct acpi_ivrs_device8b {
+	struct acpi_ivrs_de_header header;
+	u32 extended_data;
+};
+
+/* Values for extended_data above */
+
+#define ACPI_IVHD_ATS_DISABLED      (1<<31)
+
+/* Type 72: 8-byte device entry */
+
+struct acpi_ivrs_device8c {
+	struct acpi_ivrs_de_header header;
+	u8 handle;
+	u16 used_id;
+	u8 variety;
+};
+
+/* Values for Variety field above */
+
+#define ACPI_IVHD_IOAPIC            1
+#define ACPI_IVHD_HPET              2
+
+/* 0x20, 0x21, 0x22: I/O Virtualization Memory Definition Block (IVMD) */
+
+struct acpi_ivrs_memory {
+	struct acpi_ivrs_header header;
+	u16 aux_data;
+	u64 reserved;
+	u64 start_address;
+	u64 memory_length;
+};
+
 /*******************************************************************************
  *
  * MCFG - PCI Memory Mapped Configuration table and sub-table
+ *        Version 1
+ *
+ * Conforms to "PCI Firmware Specification", Revision 3.0, June 20, 2005
  *
  ******************************************************************************/
 
@@ -418,6 +637,10 @@ struct acpi_mcfg_allocation {
 /*******************************************************************************
  *
  * SPCR - Serial Port Console Redirection table
+ *        Version 1
+ *
+ * Conforms to "Serial Port Console Redirection Table",
+ * Version 1.00, January 11, 2002
  *
  ******************************************************************************/
 
@@ -445,16 +668,25 @@ struct acpi_table_spcr {
 	u32 reserved2;
 };
 
+/* Masks for pci_flags field above */
+
+#define ACPI_SPCR_DO_NOT_DISABLE    (1)
+
 /*******************************************************************************
  *
  * SPMI - Server Platform Management Interface table
+ *        Version 5
+ *
+ * Conforms to "Intelligent Platform Management Interface Specification
+ * Second Generation v2.0", Document Revision 1.0, February 12, 2004 with
+ * June 12, 2009 markup.
  *
  ******************************************************************************/
 
 struct acpi_table_spmi {
 	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 reserved;
 	u8 interface_type;
+	u8 reserved;		/* Must be 1 */
 	u16 spec_revision;	/* Version of IPMI */
 	u8 interrupt_type;
 	u8 gpe_number;		/* GPE assigned */
@@ -466,11 +698,27 @@ struct acpi_table_spmi {
 	u8 pci_bus;
 	u8 pci_device;
 	u8 pci_function;
+	u8 reserved2;
+};
+
+/* Values for interface_type above */
+
+enum acpi_spmi_interface_types {
+	ACPI_SPMI_NOT_USED = 0,
+	ACPI_SPMI_KEYBOARD = 1,
+	ACPI_SPMI_SMI = 2,
+	ACPI_SPMI_BLOCK_TRANSFER = 3,
+	ACPI_SPMI_SMBUS = 4,
+	ACPI_SPMI_RESERVED = 5	/* 5 and above are reserved */
 };
 
 /*******************************************************************************
  *
  * TCPA - Trusted Computing Platform Alliance table
+ *        Version 1
+ *
+ * Conforms to "TCG PC Specific Implementation Specification",
+ * Version 1.1, August 18, 2003
  *
  ******************************************************************************/
 
@@ -484,6 +732,10 @@ struct acpi_table_tcpa {
 /*******************************************************************************
  *
  * UEFI - UEFI Boot optimization Table
+ *        Version 1
+ *
+ * Conforms to "Unified Extensible Firmware Interface Specification",
+ * Version 2.3, May 8, 2009
  *
  ******************************************************************************/
 
@@ -491,12 +743,34 @@ struct acpi_table_uefi {
 	struct acpi_table_header header;	/* Common ACPI table header */
 	u8 identifier[16];	/* UUID identifier */
 	u16 data_offset;	/* Offset of remaining data in table */
-	u8 data;
 };
 
+/*******************************************************************************
+ *
+ * WAET - Windows ACPI Emulated devices Table
+ *        Version 1
+ *
+ * Conforms to "Windows ACPI Emulated Devices Table", version 1.0, April 6, 2009
+ *
+ ******************************************************************************/
+
+struct acpi_table_waet {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 flags;
+};
+
+/* Masks for Flags field above */
+
+#define ACPI_WAET_RTC_NO_ACK        (1)	/* RTC requires no int acknowledge */
+#define ACPI_WAET_TIMER_ONE_READ    (1<<1)	/* PM timer requires only one read */
+
 /*******************************************************************************
  *
  * WDAT - Watchdog Action Table
+ *        Version 1
+ *
+ * Conforms to "Hardware Watchdog Timers Design Specification",
+ * Copyright 2006 Microsoft Corporation.
  *
  ******************************************************************************/
 
@@ -516,10 +790,20 @@ struct acpi_table_wdat {
 	u32 entries;		/* Number of watchdog entries that follow */
 };
 
+/* Masks for Flags field above */
+
+#define ACPI_WDAT_ENABLED           (1)
+#define ACPI_WDAT_STOPPED           0x80
+
 /* WDAT Instruction Entries (actions) */
 
 struct acpi_wdat_entry {
-	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
+	u8 action;
+	u8 instruction;
+	u16 reserved;
+	struct acpi_generic_address register_region;
+	u32 value;		/* Value used with Read/Write register */
+	u32 mask;		/* Bitmask required for this register instruction */
 };
 
 /* Values for Action field above */
@@ -556,28 +840,27 @@ enum acpi_wdat_instructions {
 /*******************************************************************************
  *
  * WDRT - Watchdog Resource Table
+ *        Version 1
+ *
+ * Conforms to "Watchdog Timer Hardware Requirements for Windows Server 2003",
+ * Version 1.01, August 28, 2006
  *
  ******************************************************************************/
 
 struct acpi_table_wdrt {
 	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 header_length;	/* Watchdog Header Length */
-	u8 pci_segment;		/* PCI Segment number */
+	struct acpi_generic_address control_register;
+	struct acpi_generic_address count_register;
+	u16 pci_device_id;
+	u16 pci_vendor_id;
 	u8 pci_bus;		/* PCI Bus number */
 	u8 pci_device;		/* PCI Device number */
 	u8 pci_function;	/* PCI Function number */
-	u32 timer_period;	/* Period of one timer count (msec) */
-	u32 max_count;		/* Maximum counter value supported */
-	u32 min_count;		/* Minimum counter value */
-	u8 flags;
-	u8 reserved[3];
-	u32 entries;		/* Number of watchdog entries that follow */
+	u8 pci_segment;		/* PCI Segment number */
+	u16 max_count;		/* Maximum counter value supported */
+	u8 units;
 };
 
-/* Flags */
-
-#define ACPI_WDRT_TIMER_ENABLED     (1)	/* 00: Timer enabled */
-
 /* Reset to default packing */
 
 #pragma pack()
-- 
cgit v1.2.3


From c276e3884163355464a76e60ed9e272b52b4acc2 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 27 Jul 2009 14:55:02 +0800
Subject: ACPICA: Update definitions for HEST table

Eliminate duplicated code in disassembler.
Shorten identifiers that were too long.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/actbl1.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 0417f2abc44b..34b10c06bcfd 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -427,17 +427,17 @@ struct acpi_hest_aer_common {
 	u16 reserved1;
 	u8 flags;
 	u8 enabled;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	u32 bus;
 	u16 device;
 	u16 function;
 	u16 device_control;
 	u16 reserved2;
-	u32 uncorrectable_error_mask;
-	u32 uncorrectable_error_severity;
-	u32 correctable_error_mask;
-	u32 advanced_error_capabilities;
+	u32 uncorrectable_mask;
+	u32 uncorrectable_severity;
+	u32 correctable_mask;
+	u32 advanced_capabilities;
 };
 
 /* Masks for HEST Flags fields */
@@ -490,7 +490,7 @@ struct acpi_hest_ia_machine_check {
 	u16 reserved1;
 	u8 flags;
 	u8 enabled;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	u64 global_capability_data;
 	u64 global_control_data;
@@ -505,7 +505,7 @@ struct acpi_table_hest_ia_corrected {
 	u16 reserved1;
 	u8 flags;
 	u8 enabled;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	struct acpi_hest_notify notify;
 	u8 num_hardware_banks;
@@ -517,7 +517,7 @@ struct acpi_table_hest_ia_corrected {
 struct acpi_hest_ia_nmi {
 	struct acpi_hest_header header;
 	u32 reserved;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	u32 max_raw_data_length;
 };
@@ -544,9 +544,9 @@ struct acpi_hest_aer {
 struct acpi_hest_aer_bridge {
 	struct acpi_hest_header header;
 	struct acpi_hest_aer_common aer;
-	u32 second_uncorrectable_error_mask;
-	u32 second_uncorrectable_error_severity;
-	u32 second_advanced_capabilities;
+	u32 uncorrectable_mask2;
+	u32 uncorrectable_severity2;
+	u32 advanced_capabilities2;
 };
 
 /* 9: Generic Hardware Error Source */
@@ -556,7 +556,7 @@ struct acpi_hest_generic {
 	u16 related_source_id;
 	u8 reserved;
 	u8 enabled;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	u32 max_raw_data_length;
 	struct acpi_generic_address error_status_address;
-- 
cgit v1.2.3


From 1872bbc94b2d092ece22a8fbf1c3e81f0fba0052 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 13 Aug 2009 13:31:00 +0800
Subject: ACPICA: Fix typo for HEST ACPI table

Problem with the name of one of the subtables.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/actbl1.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 34b10c06bcfd..0b9b430b092b 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -500,7 +500,7 @@ struct acpi_hest_ia_machine_check {
 
 /* 1: IA32 Corrected Machine Check */
 
-struct acpi_table_hest_ia_corrected {
+struct acpi_hest_ia_corrected {
 	struct acpi_hest_header header;
 	u16 reserved1;
 	u8 flags;
-- 
cgit v1.2.3


From 49ae80c9944401222e47108883c486b5a5a24006 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 13 Aug 2009 13:43:12 +0800
Subject: ACPICA: Update version to 20090730

Version 20090730.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 063e577e791e..f3b358b7432f 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20090625
+#define ACPI_CA_VERSION                 0x20090730
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3


From a192a9580bcc41692be1f36b77c3b681827f566a Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 28 Jul 2009 16:45:54 -0400
Subject: ACPI: Move definition of PREFIX from acpi_bus.h to internal..h

Linux/ACPI core files using internal.h all PREFIX "ACPI: ",
however, not all ACPI drivers use/want it -- and they
should not have to #undef PREFIX to define their own.

Add GPL commment to internal.h while we are there.

This does not change any actual console output,
asside from a whitespace fix.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/pci/mmconfig-shared.c        |  2 ++
 drivers/acpi/ac.c                     |  2 ++
 drivers/acpi/battery.c                |  2 ++
 drivers/acpi/blacklist.c              |  2 ++
 drivers/acpi/button.c                 |  2 ++
 drivers/acpi/cm_sbs.c                 |  2 ++
 drivers/acpi/container.c              |  2 ++
 drivers/acpi/dock.c                   |  2 ++
 drivers/acpi/ec.c                     |  1 -
 drivers/acpi/event.c                  |  2 ++
 drivers/acpi/fan.c                    |  2 ++
 drivers/acpi/glue.c                   |  2 ++
 drivers/acpi/internal.h               | 22 +++++++++++++++++++++-
 drivers/acpi/numa.c                   |  2 ++
 drivers/acpi/pci_irq.c                |  2 ++
 drivers/acpi/pci_link.c               |  2 ++
 drivers/acpi/pci_root.c               |  2 ++
 drivers/acpi/power.c                  |  2 ++
 drivers/acpi/processor_core.c         |  2 ++
 drivers/acpi/processor_idle.c         |  2 ++
 drivers/acpi/processor_perflib.c      |  2 ++
 drivers/acpi/processor_thermal.c      |  2 ++
 drivers/acpi/processor_throttling.c   |  2 ++
 drivers/acpi/sbs.c                    |  2 ++
 drivers/acpi/sbshc.c                  |  2 ++
 drivers/acpi/system.c                 |  2 ++
 drivers/acpi/thermal.c                |  2 ++
 drivers/acpi/utils.c                  |  2 ++
 drivers/acpi/video.c                  |  2 ++
 drivers/acpi/video_detect.c           |  2 ++
 drivers/pci/dmar.c                    |  3 +--
 drivers/platform/x86/fujitsu-laptop.c |  4 ++--
 drivers/platform/x86/wmi.c            |  1 -
 include/acpi/acpi_bus.h               |  2 --
 34 files changed, 80 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 712443ec6d43..81d3466765ca 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -18,6 +18,8 @@
 #include <asm/e820.h>
 #include <asm/pci_x86.h>
 
+#define PREFIX "ACPI: "
+
 /* aperture is up to 256MB but BIOS may reserve less */
 #define MMCONFIG_APER_MIN	(2 * 1024*1024)
 #define MMCONFIG_APER_MAX	(256 * 1024*1024)
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 0df8fcb687d6..98b9690b0159 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -37,6 +37,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_AC_CLASS			"ac_adapter"
 #define ACPI_AC_DEVICE_NAME		"AC Adapter"
 #define ACPI_AC_FILE_STATE		"state"
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 58b4517ce712..f8c3d1bb6969 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -45,6 +45,8 @@
 #include <linux/power_supply.h>
 #endif
 
+#define PREFIX "ACPI: "
+
 #define ACPI_BATTERY_VALUE_UNKNOWN 0xFFFFFFFF
 
 #define ACPI_BATTERY_CLASS		"battery"
diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index f6baa77deefb..19152ea2b104 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -34,6 +34,8 @@
 #include <acpi/acpi_bus.h>
 #include <linux/dmi.h>
 
+#include "internal.h"
+
 enum acpi_blacklist_predicates {
 	all_versions,
 	less_than_or_equal,
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 9195deba9d94..d295bdccc09c 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -33,6 +33,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_BUTTON_CLASS		"button"
 #define ACPI_BUTTON_FILE_INFO		"info"
 #define ACPI_BUTTON_FILE_STATE		"state"
diff --git a/drivers/acpi/cm_sbs.c b/drivers/acpi/cm_sbs.c
index 332fe4b21708..6c9ee68e46fb 100644
--- a/drivers/acpi/cm_sbs.c
+++ b/drivers/acpi/cm_sbs.c
@@ -28,6 +28,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 ACPI_MODULE_NAME("cm_sbs");
 #define ACPI_AC_CLASS		"ac_adapter"
 #define ACPI_BATTERY_CLASS	"battery"
diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c
index fe0cdf83641a..5f2c3c00a315 100644
--- a/drivers/acpi/container.c
+++ b/drivers/acpi/container.c
@@ -35,6 +35,8 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/container.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_CONTAINER_DEVICE_NAME	"ACPI container device"
 #define ACPI_CONTAINER_CLASS		"container"
 
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index efb959d6c8a9..9a855669ff12 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -33,6 +33,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_DOCK_DRIVER_DESCRIPTION "ACPI Dock Station Driver"
 
 ACPI_MODULE_NAME("dock");
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 391f331674c7..5180f0f1dd02 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -47,7 +47,6 @@
 #define ACPI_EC_DEVICE_NAME		"Embedded Controller"
 #define ACPI_EC_FILE_INFO		"info"
 
-#undef PREFIX
 #define PREFIX				"ACPI: EC: "
 
 /* EC status register */
diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c
index aeb7e5fb4a04..c511071bfd79 100644
--- a/drivers/acpi/event.c
+++ b/drivers/acpi/event.c
@@ -14,6 +14,8 @@
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
+#include "internal.h"
+
 #define _COMPONENT		ACPI_SYSTEM_COMPONENT
 ACPI_MODULE_NAME("event");
 
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 53698ea08371..f419849a0d3f 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -34,6 +34,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_FAN_CLASS			"fan"
 #define ACPI_FAN_FILE_STATE		"state"
 
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index a8a5c29958c8..dc36a448de43 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -12,6 +12,8 @@
 #include <linux/rwsem.h>
 #include <linux/acpi.h>
 
+#include "internal.h"
+
 #define ACPI_GLUE_DEBUG	0
 #if ACPI_GLUE_DEBUG
 #define DBG(x...) printk(PREFIX x)
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 11a69b53004e..074cf8682d52 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -1,4 +1,24 @@
-/* For use by Linux/ACPI infrastructure, not drivers */
+/*
+ * acpi/internal.h
+ * For use by Linux/ACPI infrastructure, not drivers
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define PREFIX "ACPI: "
 
 int init_acpi_device_notify(void);
 int acpi_scan_init(void);
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index d440ccd27d91..202dd0c976a3 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -30,6 +30,8 @@
 #include <linux/acpi.h>
 #include <acpi/acpi_bus.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_NUMA	0x80000000
 #define _COMPONENT	ACPI_NUMA
 ACPI_MODULE_NAME("numa");
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index b794eb88ab90..843699ed93f2 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -40,6 +40,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT		ACPI_PCI_COMPONENT
 ACPI_MODULE_NAME("pci_irq");
 
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index 16e0f9d3d17c..394ae89409c2 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -43,6 +43,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT			ACPI_PCI_COMPONENT
 ACPI_MODULE_NAME("pci_link");
 #define ACPI_PCI_LINK_CLASS		"pci_irq_routing"
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 55b5b90c2a44..dee916707a7d 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -36,6 +36,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT		ACPI_PCI_COMPONENT
 ACPI_MODULE_NAME("pci_root");
 #define ACPI_PCI_ROOT_CLASS		"pci_bridge"
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index d74365d4a6e7..e86603f37dee 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -44,6 +44,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT			ACPI_POWER_COMPONENT
 ACPI_MODULE_NAME("power");
 #define ACPI_POWER_CLASS		"power_resource"
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 2cc4b3033872..b4a1ab297e7b 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -59,6 +59,8 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/processor.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS		"processor"
 #define ACPI_PROCESSOR_DEVICE_NAME	"Processor"
 #define ACPI_PROCESSOR_FILE_INFO	"info"
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 66393d5c4c7c..22aab1fc9b45 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -60,6 +60,8 @@
 #include <acpi/processor.h>
 #include <asm/processor.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS            "processor"
 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
 ACPI_MODULE_NAME("processor_idle");
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 60e543d3234e..11088cf10319 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -39,6 +39,8 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/processor.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS		"processor"
 #define ACPI_PROCESSOR_FILE_PERFORMANCE	"performance"
 #define _COMPONENT		ACPI_PROCESSOR_COMPONENT
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 31adda1099e0..3e3181c0efc3 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -40,6 +40,8 @@
 #include <acpi/processor.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS            "processor"
 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
 ACPI_MODULE_NAME("processor_thermal");
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index ae39797aab55..b366b9c13d4d 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -41,6 +41,8 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/processor.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS            "processor"
 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
 ACPI_MODULE_NAME("processor_throttling");
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 4b214b74ebaa..52b9db8afc20 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -46,6 +46,8 @@
 
 #include "sbshc.h"
 
+#define PREFIX "ACPI: "
+
 #define ACPI_SBS_CLASS			"sbs"
 #define ACPI_AC_CLASS			"ac_adapter"
 #define ACPI_BATTERY_CLASS		"battery"
diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c
index 0619734895b2..d9339806df45 100644
--- a/drivers/acpi/sbshc.c
+++ b/drivers/acpi/sbshc.c
@@ -15,6 +15,8 @@
 #include <linux/interrupt.h>
 #include "sbshc.h"
 
+#define PREFIX "ACPI: "
+
 #define ACPI_SMB_HC_CLASS	"smbus_host_controller"
 #define ACPI_SMB_HC_DEVICE_NAME	"ACPI SMBus HC"
 
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index 9c61ab2177cf..d11282975f35 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -31,6 +31,8 @@
 
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT		ACPI_SYSTEM_COMPONENT
 ACPI_MODULE_NAME("system");
 
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 564ea1424288..65f67815902a 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -47,6 +47,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_THERMAL_CLASS		"thermal_zone"
 #define ACPI_THERMAL_DEVICE_NAME	"Thermal Zone"
 #define ACPI_THERMAL_FILE_STATE		"state"
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index f844941089bb..811fec10462b 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -30,6 +30,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#include "internal.h"
+
 #define _COMPONENT		ACPI_BUS_COMPONENT
 ACPI_MODULE_NAME("utils");
 
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 8851315ce858..a0fa3946b507 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -44,6 +44,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_VIDEO_CLASS		"video"
 #define ACPI_VIDEO_BUS_NAME		"Video Bus"
 #define ACPI_VIDEO_DEVICE_NAME		"Video Device"
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 7cd2b63435ea..7032f25da9b5 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -38,6 +38,8 @@
 #include <linux/dmi.h>
 #include <linux/pci.h>
 
+#define PREFIX "ACPI: "
+
 ACPI_MODULE_NAME("video");
 #define _COMPONENT		ACPI_VIDEO_COMPONENT
 
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 7b287cb38b7a..998f02d2ba42 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -34,8 +34,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 
-#undef PREFIX
-#define PREFIX "DMAR:"
+#define PREFIX "DMAR: "
 
 /* No locks are needed as DMA remapping hardware unit
  * list is constructed at boot time and hotplug of
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 218b9a16ac3f..eabddc9c192b 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -700,7 +700,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 		goto end;
 	}
 
-	printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
+	printk(KERN_INFO "ACPI: %s [%s] (%s)\n",
 	       acpi_device_name(device), acpi_device_bid(device),
 	       !device->power.state ? "on" : "off");
 
@@ -874,7 +874,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		goto end;
 	}
 
-	printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
+	printk(KERN_INFO "ACPI: %s [%s] (%s)\n",
 	       acpi_device_name(device), acpi_device_bid(device),
 	       !device->power.state ? "on" : "off");
 
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index f215a5919192..177f8d767df4 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -42,7 +42,6 @@ MODULE_LICENSE("GPL");
 
 #define ACPI_WMI_CLASS "wmi"
 
-#undef PREFIX
 #define PREFIX "ACPI: WMI: "
 
 static DEFINE_MUTEX(wmi_data_lock);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c65e4ce6c3af..f485107ddc43 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -30,8 +30,6 @@
 
 #include <acpi/acpi.h>
 
-#define PREFIX			"ACPI: "
-
 /* TBD: Make dynamic */
 #define ACPI_MAX_HANDLES	10
 struct acpi_handle_list {
-- 
cgit v1.2.3


From e55a5999ffcf72dc4d43d73618957964cb87065a Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Tue, 28 Jul 2009 17:41:53 +0800
Subject: ACPI: Handle CONFIG_ACPI=n better from linux/acpi.h

linux/acpi.h is the top level header for interfacing
with the ACPI sub-system, so acpi_disabled should be
up there instead of down in asm/acpi.h -- particularly
since asm/acpi.h doesn't exist for all architectures.

Same story for acpi_table_parse(), which is a top-level
API to Linux/ACPI.

This is necessary for building some code that
used to always depend on CONFIG_ACPI=y, but will soon
also need to build with CONFIG_ACPI=n.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/acpi.h |  1 -
 include/linux/acpi.h        | 11 ++++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 20d1465a2ab0..4518dc500903 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -144,7 +144,6 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
 
 #else /* !CONFIG_ACPI */
 
-#define acpi_disabled 1
 #define acpi_lapic 0
 #define acpi_ioapic 0
 static inline void acpi_noirq_set(void) { }
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 34321cfffeab..3fce811bf9ac 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -292,7 +292,10 @@ void __init acpi_s4_no_nvs(void);
 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags);
 extern void acpi_early_init(void);
 
-#else	/* CONFIG_ACPI */
+#else	/* !CONFIG_ACPI */
+
+#define acpi_disabled 1
+
 static inline void acpi_early_init(void) { }
 
 static inline int early_acpi_boot_init(void)
@@ -331,5 +334,11 @@ static inline int acpi_check_mem_region(resource_size_t start,
 	return 0;
 }
 
+struct acpi_table_header;
+static inline int acpi_table_parse(char *id,
+				int (*handler)(struct acpi_table_header *))
+{
+	return -1;
+}
 #endif	/* !CONFIG_ACPI */
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.2.3


From 117a9ac777f8034d4675b821172d2ff71f6ec47a Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Fri, 14 Aug 2009 15:10:24 -0400
Subject: SFI: create linux/sfi.h

include/linux/include/sfi.h defines everything that customers
of SFI need to know in order to use the SFI suport in the kernel.

The primary API is sfi_table_parse(), where a driver or another part
of the kernel can supply a handler to parse the named table.

sfi.h also includes the currently defined table signatures and table
formats.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/sfi.h | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 206 insertions(+)
 create mode 100644 include/linux/sfi.h

(limited to 'include')

diff --git a/include/linux/sfi.h b/include/linux/sfi.h
new file mode 100644
index 000000000000..9a6f7607174e
--- /dev/null
+++ b/include/linux/sfi.h
@@ -0,0 +1,206 @@
+/* sfi.h Simple Firmware Interface */
+
+/*
+
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+  The full GNU General Public License is included in this distribution
+  in the file called LICENSE.GPL.
+
+  BSD LICENSE
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef _LINUX_SFI_H
+#define _LINUX_SFI_H
+
+/* Table signatures reserved by the SFI specification */
+#define SFI_SIG_SYST		"SYST"
+#define SFI_SIG_FREQ		"FREQ"
+#define SFI_SIG_IDLE		"IDLE"
+#define SFI_SIG_CPUS		"CPUS"
+#define SFI_SIG_MTMR		"MTMR"
+#define SFI_SIG_MRTC		"MRTC"
+#define SFI_SIG_MMAP		"MMAP"
+#define SFI_SIG_APIC		"APIC"
+#define SFI_SIG_XSDT		"XSDT"
+#define SFI_SIG_WAKE		"WAKE"
+#define SFI_SIG_SPIB		"SPIB"
+#define SFI_SIG_I2CB		"I2CB"
+#define SFI_SIG_GPEM		"GPEM"
+
+#define SFI_SIGNATURE_SIZE	4
+#define SFI_OEM_ID_SIZE		6
+#define SFI_OEM_TABLE_ID_SIZE	8
+
+#define SFI_SYST_SEARCH_BEGIN		0x000E0000
+#define SFI_SYST_SEARCH_END		0x000FFFFF
+
+#define SFI_GET_NUM_ENTRIES(ptable, entry_type) \
+	((ptable->header.len - sizeof(struct sfi_table_header)) / \
+	(sizeof(entry_type)))
+/*
+ * Table structures must be byte-packed to match the SFI specification,
+ * as they are provided by the BIOS.
+ */
+struct sfi_table_header {
+	char	sig[SFI_SIGNATURE_SIZE];
+	u32	len;
+	u8	rev;
+	u8	csum;
+	char	oem_id[SFI_OEM_ID_SIZE];
+	char	oem_table_id[SFI_OEM_TABLE_ID_SIZE];
+} __packed;
+
+struct sfi_table_simple {
+	struct sfi_table_header		header;
+	u64				pentry[1];
+} __packed;
+
+/* Comply with UEFI spec 2.1 */
+struct sfi_mem_entry {
+	u32	type;
+	u64	phys_start;
+	u64	virt_start;
+	u64	pages;
+	u64	attrib;
+} __packed;
+
+struct sfi_cpu_table_entry {
+	u32	apic_id;
+} __packed;
+
+struct sfi_cstate_table_entry {
+	u32	hint;		/* MWAIT hint */
+	u32	latency;	/* latency in ms */
+} __packed;
+
+struct sfi_apic_table_entry {
+	u64	phys_addr;	/* phy base addr for APIC reg */
+} __packed;
+
+struct sfi_freq_table_entry {
+	u32	freq_mhz;	/* in MHZ */
+	u32	latency;	/* transition latency in ms */
+	u32	ctrl_val;	/* value to write to PERF_CTL */
+} __packed;
+
+struct sfi_wake_table_entry {
+	u64	phys_addr;	/* pointer to where the wake vector locates */
+} __packed;
+
+struct sfi_timer_table_entry {
+	u64	phys_addr;	/* phy base addr for the timer */
+	u32	freq_hz;	/* in HZ */
+	u32	irq;
+} __packed;
+
+struct sfi_rtc_table_entry {
+	u64	phys_addr;	/* phy base addr for the RTC */
+	u32	irq;
+} __packed;
+
+struct sfi_spi_table_entry {
+	u16	host_num;	/* attached to host 0, 1...*/
+	u16	cs;		/* chip select */
+	u16	irq_info;
+	char	name[16];
+	u8	dev_info[10];
+} __packed;
+
+struct sfi_i2c_table_entry {
+	u16	host_num;
+	u16	addr;		/* slave addr */
+	u16	irq_info;
+	char	name[16];
+	u8	dev_info[10];
+} __packed;
+
+struct sfi_gpe_table_entry {
+	u16	logical_id;	/* logical id */
+	u16	phys_id;	/* physical GPE id */
+} __packed;
+
+
+typedef int (*sfi_table_handler) (struct sfi_table_header *table);
+
+#ifdef CONFIG_SFI
+extern void __init sfi_init(void);
+extern int __init sfi_platform_init(void);
+extern void __init sfi_init_late(void);
+extern int sfi_table_parse(char *signature, char *oem_id, char *oem_table_id,
+				sfi_table_handler handler);
+
+extern int sfi_disabled;
+static inline void disable_sfi(void)
+{
+	sfi_disabled = 1;
+}
+
+#else /* !CONFIG_SFI */
+
+static inline void sfi_init(void)
+{
+}
+
+static inline void sfi_init_late(void)
+{
+}
+
+#define sfi_disabled	0
+
+static inline int sfi_table_parse(char *signature, char *oem_id,
+					char *oem_table_id,
+					sfi_table_handler handler)
+{
+	return -1;
+}
+
+#endif /* !CONFIG_SFI */
+
+#endif /*_LINUX_SFI_H*/
-- 
cgit v1.2.3


From 13e82d023c4c3f13ab1e665cbb917a7ebba8935c Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Fri, 14 Aug 2009 15:17:53 -0400
Subject: SFI: add capability to parse ACPI tables

Extend SFI to access standard ACPI tables.
(eg. the PCI MCFG) using sfi_acpi_table_parse().

Note that this is _not_ a hybrid ACPI + SFI mode.
The platform boots in either ACPI mode or SFI mode.

SFI runs only with acpi_disabled=1, which can be set
at build-time via CONFIG_ACPI=n, or at boot time by
the failure to find ACPI platform support.

So this extension simply allows SFI-platforms to
re-use existing standard table formats that happen to
be defined to live in ACPI envelopes.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/sfi/sfi_acpi.c   | 175 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sfi_acpi.h |  93 +++++++++++++++++++++++++
 2 files changed, 268 insertions(+)
 create mode 100644 drivers/sfi/sfi_acpi.c
 create mode 100644 include/linux/sfi_acpi.h

(limited to 'include')

diff --git a/drivers/sfi/sfi_acpi.c b/drivers/sfi/sfi_acpi.c
new file mode 100644
index 000000000000..34aba30eb84b
--- /dev/null
+++ b/drivers/sfi/sfi_acpi.c
@@ -0,0 +1,175 @@
+/* sfi_acpi.c Simple Firmware Interface - ACPI extensions */
+
+/*
+
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+  The full GNU General Public License is included in this distribution
+  in the file called LICENSE.GPL.
+
+  BSD LICENSE
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#define KMSG_COMPONENT "SFI"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <acpi/acpi.h>
+
+#include <linux/sfi.h>
+#include "sfi_core.h"
+
+/*
+ * SFI can access ACPI-defined tables via an optional ACPI XSDT.
+ *
+ * This allows re-use, and avoids re-definition, of standard tables.
+ * For example, the "MCFG" table is defined by PCI, reserved by ACPI,
+ * and is expected to be present many SFI-only systems.
+ */
+
+static struct acpi_table_xsdt *xsdt_va __read_mostly;
+
+#define XSDT_GET_NUM_ENTRIES(ptable, entry_type) \
+	((ptable->header.length - sizeof(struct acpi_table_header)) / \
+	(sizeof(entry_type)))
+
+static inline struct sfi_table_header *acpi_to_sfi_th(
+				struct acpi_table_header *th)
+{
+	return (struct sfi_table_header *)th;
+}
+
+static inline struct acpi_table_header *sfi_to_acpi_th(
+				struct sfi_table_header *th)
+{
+	return (struct acpi_table_header *)th;
+}
+
+/*
+ * sfi_acpi_parse_xsdt()
+ *
+ * Parse the ACPI XSDT for later access by sfi_acpi_table_parse().
+ */
+static int __init sfi_acpi_parse_xsdt(struct sfi_table_header *th)
+{
+	struct sfi_table_key key = SFI_ANY_KEY;
+	int tbl_cnt, i;
+	void *ret;
+
+	xsdt_va = (struct acpi_table_xsdt *)th;
+	tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64);
+	for (i = 0; i < tbl_cnt; i++) {
+		ret = sfi_check_table(xsdt_va->table_offset_entry[i], &key);
+		if (IS_ERR(ret)) {
+			disable_sfi();
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int __init sfi_acpi_init(void)
+{
+	struct sfi_table_key xsdt_key = { .sig = SFI_SIG_XSDT };
+
+	sfi_table_parse(SFI_SIG_XSDT, NULL, NULL, sfi_acpi_parse_xsdt);
+
+	/* Only call the get_table to keep the table mapped */
+	xsdt_va = (struct acpi_table_xsdt *)sfi_get_table(&xsdt_key);
+	return 0;
+}
+
+static struct acpi_table_header *sfi_acpi_get_table(struct sfi_table_key *key)
+{
+	u32 tbl_cnt, i;
+	void *ret;
+
+	tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64);
+	for (i = 0; i < tbl_cnt; i++) {
+		ret = sfi_check_table(xsdt_va->table_offset_entry[i], key);
+		if (!IS_ERR(ret) && ret)
+			return sfi_to_acpi_th(ret);
+	}
+
+	return NULL;
+}
+
+static void sfi_acpi_put_table(struct acpi_table_header *table)
+{
+	sfi_put_table(acpi_to_sfi_th(table));
+}
+
+/*
+ * sfi_acpi_table_parse()
+ *
+ * Find specified table in XSDT, run handler on it and return its return value
+ */
+int sfi_acpi_table_parse(char *signature, char *oem_id, char *oem_table_id,
+			int(*handler)(struct acpi_table_header *))
+{
+	struct acpi_table_header *table = NULL;
+	struct sfi_table_key key;
+	int ret = 0;
+
+	if (sfi_disabled)
+		return -1;
+
+	key.sig = signature;
+	key.oem_id = oem_id;
+	key.oem_table_id = oem_table_id;
+
+	table = sfi_acpi_get_table(&key);
+	if (!table)
+		return -EINVAL;
+
+	ret = handler(table);
+	sfi_acpi_put_table(table);
+	return ret;
+}
diff --git a/include/linux/sfi_acpi.h b/include/linux/sfi_acpi.h
new file mode 100644
index 000000000000..c4a5a8cd4469
--- /dev/null
+++ b/include/linux/sfi_acpi.h
@@ -0,0 +1,93 @@
+/* sfi.h Simple Firmware Interface */
+
+/*
+
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+  The full GNU General Public License is included in this distribution
+  in the file called LICENSE.GPL.
+
+  BSD LICENSE
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef _LINUX_SFI_ACPI_H
+#define _LINUX_SFI_ACPI_H
+
+#ifdef CONFIG_SFI
+#include <acpi/acpi.h>		/* struct acpi_table_header */
+
+extern int sfi_acpi_table_parse(char *signature, char *oem_id,
+				char *oem_table_id,
+				int (*handler)(struct acpi_table_header *));
+
+static inline int acpi_sfi_table_parse(char *signature,
+				int (*handler)(struct acpi_table_header *))
+{
+	if (!acpi_table_parse(signature, handler))
+		return 0;
+
+	return sfi_acpi_table_parse(signature, NULL, NULL, handler);
+}
+#else /* !CONFIG_SFI */
+
+static inline int sfi_acpi_table_parse(char *signature, char *oem_id,
+				char *oem_table_id,
+				int (*handler)(struct acpi_table_header *))
+{
+	return -1;
+}
+
+static inline int acpi_sfi_table_parse(char *signature,
+				int (*handler)(struct acpi_table_header *))
+{
+	return acpi_table_parse(signature, handler);
+}
+#endif /* !CONFIG_SFI */
+
+#endif /*_LINUX_SFI_ACPI_H*/
-- 
cgit v1.2.3


From 2b022e3d4bf9885f781221c59d86283a2cdfc2ed Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Mon, 10 Aug 2009 10:48:59 +0800
Subject: timers: Add tracepoints for timer_list timers

Add tracepoints which cover the timer life cycle. The tracepoints are
integrated with the already existing debug_object debug points as far
as possible.

Based on patches from
Mathieu: http://marc.info/?l=linux-kernel&m=123791201816247&w=2
and
Anton: http://marc.info/?l=linux-kernel&m=124331396919301&w=2

[ tglx: Fixed timeout value in timer_start tracepoint, massaged
  comments and made the printk's more readable ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
LKML-Reference: <4A7F8A9B.3040201@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/trace/events/timer.h | 137 +++++++++++++++++++++++++++++++++++++++++++
 kernel/timer.c               |  32 ++++++++--
 2 files changed, 165 insertions(+), 4 deletions(-)
 create mode 100644 include/trace/events/timer.h

(limited to 'include')

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
new file mode 100644
index 000000000000..725892a93b49
--- /dev/null
+++ b/include/trace/events/timer.h
@@ -0,0 +1,137 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM timer
+
+#if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TIMER_H
+
+#include <linux/tracepoint.h>
+#include <linux/timer.h>
+
+/**
+ * timer_init - called when the timer is initialized
+ * @timer:	pointer to struct timer_list
+ */
+TRACE_EVENT(timer_init,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("timer %p", __entry->timer)
+);
+
+/**
+ * timer_start - called when the timer is started
+ * @timer:	pointer to struct timer_list
+ * @expires:	the timers expiry time
+ */
+TRACE_EVENT(timer_start,
+
+	TP_PROTO(struct timer_list *timer, unsigned long expires),
+
+	TP_ARGS(timer, expires),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer		)
+		__field( void *,	function	)
+		__field( unsigned long,	expires		)
+		__field( unsigned long,	now		)
+	),
+
+	TP_fast_assign(
+		__entry->timer		= timer;
+		__entry->function	= timer->function;
+		__entry->expires	= expires;
+		__entry->now		= jiffies;
+	),
+
+	TP_printk("timer %p: func %pf, expires %lu, timeout %ld",
+		  __entry->timer, __entry->function, __entry->expires,
+		  (long)__entry->expires - __entry->now)
+);
+
+/**
+ * timer_expire_entry - called immediately before the timer callback
+ * @timer:	pointer to struct timer_list
+ *
+ * Allows to determine the timer latency.
+ */
+TRACE_EVENT(timer_expire_entry,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+		__field( unsigned long,	now	)
+	),
+
+	TP_fast_assign(
+		__entry->timer		= timer;
+		__entry->now		= jiffies;
+	),
+
+	TP_printk("timer %p: now %lu", __entry->timer, __entry->now)
+);
+
+/**
+ * timer_expire_exit - called immediately after the timer callback returns
+ * @timer:	pointer to struct timer_list
+ *
+ * When used in combination with the timer_expire_entry tracepoint we can
+ * determine the runtime of the timer callback function.
+ *
+ * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might
+ * be invalid. We solely track the pointer.
+ */
+TRACE_EVENT(timer_expire_exit,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field(void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("timer %p", __entry->timer)
+);
+
+/**
+ * timer_cancel - called when the timer is canceled
+ * @timer:	pointer to struct timer_list
+ */
+TRACE_EVENT(timer_cancel,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("timer %p", __entry->timer)
+);
+
+#endif /*  _TRACE_TIMER_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/timer.c b/kernel/timer.c
index 8e92be654dad..a7352b00703c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -46,6 +46,9 @@
 #include <asm/timex.h>
 #include <asm/io.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/timer.h>
+
 u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
 
 EXPORT_SYMBOL(jiffies_64);
@@ -521,6 +524,25 @@ static inline void debug_timer_activate(struct timer_list *timer) { }
 static inline void debug_timer_deactivate(struct timer_list *timer) { }
 #endif
 
+static inline void debug_init(struct timer_list *timer)
+{
+	debug_timer_init(timer);
+	trace_timer_init(timer);
+}
+
+static inline void
+debug_activate(struct timer_list *timer, unsigned long expires)
+{
+	debug_timer_activate(timer);
+	trace_timer_start(timer, expires);
+}
+
+static inline void debug_deactivate(struct timer_list *timer)
+{
+	debug_timer_deactivate(timer);
+	trace_timer_cancel(timer);
+}
+
 static void __init_timer(struct timer_list *timer,
 			 const char *name,
 			 struct lock_class_key *key)
@@ -549,7 +571,7 @@ void init_timer_key(struct timer_list *timer,
 		    const char *name,
 		    struct lock_class_key *key)
 {
-	debug_timer_init(timer);
+	debug_init(timer);
 	__init_timer(timer, name, key);
 }
 EXPORT_SYMBOL(init_timer_key);
@@ -568,7 +590,7 @@ static inline void detach_timer(struct timer_list *timer,
 {
 	struct list_head *entry = &timer->entry;
 
-	debug_timer_deactivate(timer);
+	debug_deactivate(timer);
 
 	__list_del(entry->prev, entry->next);
 	if (clear_pending)
@@ -632,7 +654,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 			goto out_unlock;
 	}
 
-	debug_timer_activate(timer);
+	debug_activate(timer, expires);
 
 	new_base = __get_cpu_var(tvec_bases);
 
@@ -787,7 +809,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
 	BUG_ON(timer_pending(timer) || !timer->function);
 	spin_lock_irqsave(&base->lock, flags);
 	timer_set_base(timer, base);
-	debug_timer_activate(timer);
+	debug_activate(timer, timer->expires);
 	if (time_before(timer->expires, base->next_timer) &&
 	    !tbase_get_deferrable(timer->base))
 		base->next_timer = timer->expires;
@@ -1000,7 +1022,9 @@ static inline void __run_timers(struct tvec_base *base)
 				 */
 				lock_map_acquire(&lockdep_map);
 
+				trace_timer_expire_entry(timer);
 				fn(data);
+				trace_timer_expire_exit(timer);
 
 				lock_map_release(&lockdep_map);
 
-- 
cgit v1.2.3


From c6a2a1770245f654f35f60e1458d4356680f9519 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Mon, 10 Aug 2009 10:51:23 +0800
Subject: hrtimer: Add tracepoint for hrtimers

Add tracepoints which cover the life cycle of a hrtimer. The
tracepoints are integrated with the already existing debug_object
debug points as far as possible.

[ tglx: Fixed comments, made output conistent, easier to read and
  	parse. Fixed output for 32bit archs which do not use the
  	scalar representation of ktime_t. Hand current time to
  	trace_hrtimer_expiry_entry instead of calling get_time()
  	inside of the trace assignment. ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
LKML-Reference: <4A7F8B2B.5020908@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/trace/events/timer.h | 139 +++++++++++++++++++++++++++++++++++++++++++
 kernel/hrtimer.c             |  40 ++++++++++---
 2 files changed, 171 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 725892a93b49..df3c07fa0cb8 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -5,6 +5,7 @@
 #define _TRACE_TIMER_H
 
 #include <linux/tracepoint.h>
+#include <linux/hrtimer.h>
 #include <linux/timer.h>
 
 /**
@@ -131,6 +132,144 @@ TRACE_EVENT(timer_cancel,
 	TP_printk("timer %p", __entry->timer)
 );
 
+/**
+ * hrtimer_init - called when the hrtimer is initialized
+ * @timer:	pointer to struct hrtimer
+ * @clockid:	the hrtimers clock
+ * @mode:	the hrtimers mode
+ */
+TRACE_EVENT(hrtimer_init,
+
+	TP_PROTO(struct hrtimer *timer, clockid_t clockid,
+		 enum hrtimer_mode mode),
+
+	TP_ARGS(timer, clockid, mode),
+
+	TP_STRUCT__entry(
+		__field( void *,		timer		)
+		__field( clockid_t,		clockid		)
+		__field( enum hrtimer_mode,	mode		)
+	),
+
+	TP_fast_assign(
+		__entry->timer		= timer;
+		__entry->clockid	= clockid;
+		__entry->mode		= mode;
+	),
+
+	TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer,
+		  __entry->clockid == CLOCK_REALTIME ?
+			"CLOCK_REALTIME" : "CLOCK_MONOTONIC",
+		  __entry->mode == HRTIMER_MODE_ABS ?
+			"HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL")
+);
+
+/**
+ * hrtimer_start - called when the hrtimer is started
+ * @timer: pointer to struct hrtimer
+ */
+TRACE_EVENT(hrtimer_start,
+
+	TP_PROTO(struct hrtimer *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer		)
+		__field( void *,	function	)
+		__field( s64,		expires		)
+		__field( s64,		softexpires	)
+	),
+
+	TP_fast_assign(
+		__entry->timer		= timer;
+		__entry->function	= timer->function;
+		__entry->expires	= hrtimer_get_expires(timer).tv64;
+		__entry->softexpires	= hrtimer_get_softexpires(timer).tv64;
+	),
+
+	TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu",
+		  __entry->timer, __entry->function,
+		  (unsigned long long)ktime_to_ns((ktime_t) {
+				  .tv64 = __entry->expires }),
+		  (unsigned long long)ktime_to_ns((ktime_t) {
+				  .tv64 = __entry->softexpires }))
+);
+
+/**
+ * htimmer_expire_entry - called immediately before the hrtimer callback
+ * @timer:	pointer to struct hrtimer
+ * @now:	pointer to variable which contains current time of the
+ *		timers base.
+ *
+ * Allows to determine the timer latency.
+ */
+TRACE_EVENT(hrtimer_expire_entry,
+
+	TP_PROTO(struct hrtimer *timer, ktime_t *now),
+
+	TP_ARGS(timer, now),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+		__field( s64,		now	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+		__entry->now	= now->tv64;
+	),
+
+	TP_printk("hrtimer %p, now %llu", __entry->timer,
+		  (unsigned long long)ktime_to_ns((ktime_t) {
+				  .tv64 = __entry->now }))
+ );
+
+/**
+ * hrtimer_expire_exit - called immediately after the hrtimer callback returns
+ * @timer:	pointer to struct hrtimer
+ *
+ * When used in combination with the hrtimer_expire_entry tracepoint we can
+ * determine the runtime of the callback function.
+ */
+TRACE_EVENT(hrtimer_expire_exit,
+
+	TP_PROTO(struct hrtimer *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("hrtimer %p", __entry->timer)
+);
+
+/**
+ * hrtimer_cancel - called when the hrtimer is canceled
+ * @timer:	pointer to struct hrtimer
+ */
+TRACE_EVENT(hrtimer_cancel,
+
+	TP_PROTO(struct hrtimer *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("hrtimer %p", __entry->timer)
+);
+
 #endif /*  _TRACE_TIMER_H */
 
 /* This part must be outside protection */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index e2f91ecc01a8..b44d1b07377b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -48,6 +48,8 @@
 
 #include <asm/uaccess.h>
 
+#include <trace/events/timer.h>
+
 /*
  * The timer bases:
  *
@@ -441,6 +443,26 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
 #endif
 
+static inline void
+debug_init(struct hrtimer *timer, clockid_t clockid,
+	   enum hrtimer_mode mode)
+{
+	debug_hrtimer_init(timer);
+	trace_hrtimer_init(timer, clockid, mode);
+}
+
+static inline void debug_activate(struct hrtimer *timer)
+{
+	debug_hrtimer_activate(timer);
+	trace_hrtimer_start(timer);
+}
+
+static inline void debug_deactivate(struct hrtimer *timer)
+{
+	debug_hrtimer_deactivate(timer);
+	trace_hrtimer_cancel(timer);
+}
+
 /* High resolution timer related functions */
 #ifdef CONFIG_HIGH_RES_TIMERS
 
@@ -797,7 +819,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
 	struct hrtimer *entry;
 	int leftmost = 1;
 
-	debug_hrtimer_activate(timer);
+	debug_activate(timer);
 
 	/*
 	 * Find the right place in the rbtree:
@@ -883,7 +905,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
 		 * reprogramming happens in the interrupt handler. This is a
 		 * rare case and less expensive than a smp call.
 		 */
-		debug_hrtimer_deactivate(timer);
+		debug_deactivate(timer);
 		timer_stats_hrtimer_clear_start_info(timer);
 		reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
 		__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE,
@@ -1116,7 +1138,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 		  enum hrtimer_mode mode)
 {
-	debug_hrtimer_init(timer);
+	debug_init(timer, clock_id, mode);
 	__hrtimer_init(timer, clock_id, mode);
 }
 EXPORT_SYMBOL_GPL(hrtimer_init);
@@ -1140,7 +1162,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
 }
 EXPORT_SYMBOL_GPL(hrtimer_get_res);
 
-static void __run_hrtimer(struct hrtimer *timer)
+static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 {
 	struct hrtimer_clock_base *base = timer->base;
 	struct hrtimer_cpu_base *cpu_base = base->cpu_base;
@@ -1149,7 +1171,7 @@ static void __run_hrtimer(struct hrtimer *timer)
 
 	WARN_ON(!irqs_disabled());
 
-	debug_hrtimer_deactivate(timer);
+	debug_deactivate(timer);
 	__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
 	timer_stats_account_hrtimer(timer);
 	fn = timer->function;
@@ -1160,7 +1182,9 @@ static void __run_hrtimer(struct hrtimer *timer)
 	 * the timer base.
 	 */
 	spin_unlock(&cpu_base->lock);
+	trace_hrtimer_expire_entry(timer, now);
 	restart = fn(timer);
+	trace_hrtimer_expire_exit(timer);
 	spin_lock(&cpu_base->lock);
 
 	/*
@@ -1271,7 +1295,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 				break;
 			}
 
-			__run_hrtimer(timer);
+			__run_hrtimer(timer, &basenow);
 		}
 		base++;
 	}
@@ -1393,7 +1417,7 @@ void hrtimer_run_queues(void)
 					hrtimer_get_expires_tv64(timer))
 				break;
 
-			__run_hrtimer(timer);
+			__run_hrtimer(timer, &base->softirq_time);
 		}
 		spin_unlock(&cpu_base->lock);
 	}
@@ -1569,7 +1593,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 	while ((node = rb_first(&old_base->active))) {
 		timer = rb_entry(node, struct hrtimer, node);
 		BUG_ON(hrtimer_callback_running(timer));
-		debug_hrtimer_deactivate(timer);
+		debug_deactivate(timer);
 
 		/*
 		 * Mark it as STATE_MIGRATE not INACTIVE otherwise the
-- 
cgit v1.2.3


From 3f0a525ebf4b8ef041a332bbe4a73aee94bb064b Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Mon, 10 Aug 2009 10:52:30 +0800
Subject: itimers: Add tracepoints for itimer

Add tracepoints for all itimer variants: ITIMER_REAL, ITIMER_VIRTUAL
and ITIMER_PROF.

[ tglx: Fixed comments and made the output more readable, parseable
  	and consistent. Replaced pid_vnr by pid_nr because the hrtimer
  	callback can happen in any namespace ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
LKML-Reference: <4A7F8B6E.2010109@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/trace/events/timer.h | 66 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/itimer.c              |  5 ++++
 kernel/posix-cpu-timers.c    |  7 ++++-
 3 files changed, 77 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index df3c07fa0cb8..1844c48d640e 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -270,6 +270,72 @@ TRACE_EVENT(hrtimer_cancel,
 	TP_printk("hrtimer %p", __entry->timer)
 );
 
+/**
+ * itimer_state - called when itimer is started or canceled
+ * @which:	name of the interval timer
+ * @value:	the itimers value, itimer is canceled if value->it_value is
+ *		zero, otherwise it is started
+ * @expires:	the itimers expiry time
+ */
+TRACE_EVENT(itimer_state,
+
+	TP_PROTO(int which, const struct itimerval *const value,
+		 cputime_t expires),
+
+	TP_ARGS(which, value, expires),
+
+	TP_STRUCT__entry(
+		__field(	int,		which		)
+		__field(	cputime_t,	expires		)
+		__field(	long,		value_sec	)
+		__field(	long,		value_usec	)
+		__field(	long,		interval_sec	)
+		__field(	long,		interval_usec	)
+	),
+
+	TP_fast_assign(
+		__entry->which		= which;
+		__entry->expires	= expires;
+		__entry->value_sec	= value->it_value.tv_sec;
+		__entry->value_usec	= value->it_value.tv_usec;
+		__entry->interval_sec	= value->it_interval.tv_sec;
+		__entry->interval_usec	= value->it_interval.tv_usec;
+	),
+
+	TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu",
+		  __entry->which, __entry->expires,
+		  __entry->value_sec, __entry->value_usec,
+		  __entry->interval_sec, __entry->interval_usec)
+);
+
+/**
+ * itimer_expire - called when itimer expires
+ * @which:	type of the interval timer
+ * @pid:	pid of the process which owns the timer
+ * @now:	current time, used to calculate the latency of itimer
+ */
+TRACE_EVENT(itimer_expire,
+
+	TP_PROTO(int which, struct pid *pid, cputime_t now),
+
+	TP_ARGS(which, pid, now),
+
+	TP_STRUCT__entry(
+		__field( int ,		which	)
+		__field( pid_t,		pid	)
+		__field( cputime_t,	now	)
+	),
+
+	TP_fast_assign(
+		__entry->which	= which;
+		__entry->now	= now;
+		__entry->pid	= pid_nr(pid);
+	),
+
+	    TP_printk("which %d, pid %d, now %lu", __entry->which,
+		      (int) __entry->pid, __entry->now)
+);
+
 #endif /*  _TRACE_TIMER_H */
 
 /* This part must be outside protection */
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 8078a32d3b10..b03451ede528 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -12,6 +12,7 @@
 #include <linux/time.h>
 #include <linux/posix-timers.h>
 #include <linux/hrtimer.h>
+#include <trace/events/timer.h>
 
 #include <asm/uaccess.h>
 
@@ -122,6 +123,7 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
 	struct signal_struct *sig =
 		container_of(timer, struct signal_struct, real_timer);
 
+	trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0);
 	kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
 
 	return HRTIMER_NORESTART;
@@ -166,6 +168,8 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 	}
 	it->expires = nval;
 	it->incr = ninterval;
+	trace_itimer_state(clock_id == CPUCLOCK_VIRT ?
+			   ITIMER_VIRTUAL : ITIMER_PROF, value, nval);
 
 	spin_unlock_irq(&tsk->sighand->siglock);
 
@@ -217,6 +221,7 @@ again:
 		} else
 			tsk->signal->it_real_incr.tv64 = 0;
 
+		trace_itimer_state(ITIMER_REAL, value, 0);
 		spin_unlock_irq(&tsk->sighand->siglock);
 		break;
 	case ITIMER_VIRTUAL:
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 12161f74744e..5c9dc228747b 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -8,6 +8,7 @@
 #include <linux/math64.h>
 #include <asm/uaccess.h>
 #include <linux/kernel_stat.h>
+#include <trace/events/timer.h>
 
 /*
  * Called after updating RLIMIT_CPU to set timer expiration if necessary.
@@ -1090,9 +1091,13 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 							  cputime_one_jiffy);
 				it->error -= onecputick;
 			}
-		} else
+		} else {
 			it->expires = cputime_zero;
+		}
 
+		trace_itimer_expire(signo == SIGPROF ?
+				    ITIMER_PROF : ITIMER_VIRTUAL,
+				    tsk->signal->leader_pid, cur_time);
 		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 	}
 
-- 
cgit v1.2.3


From 138d15692bf76841f252d4b836a535cf5f9154e9 Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Date: Fri, 28 Aug 2009 23:29:38 +0400
Subject: ACPICA: Don't switch task then not allowed

Signed-off-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/platform/aclinux.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index fcb8e4b159b1..9d7febde10a1 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -149,10 +149,10 @@ static inline void *acpi_os_acquire_object(acpi_cache_t * cache)
 #define ACPI_FREE(a)            kfree(a)
 
 /* Used within ACPICA to show where it is safe to preempt execution */
-
+#include <linux/hardirq.h>
 #define ACPI_PREEMPTION_POINT() \
 	do { \
-		if (!irqs_disabled()) \
+		if (!in_atomic_preempt_off()) \
 			cond_resched(); \
 	} while (0)
 
-- 
cgit v1.2.3


From ad283ea4a3ce82cda2efe33163748a397b31b1eb Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 29 Aug 2009 19:09:26 -0700
Subject: async_tx: add sum check flags

Replace the flat zero_sum_result with a collection of flags to contain
the P (xor) zero-sum result, and the soon to be utilized Q (raid6 reed
solomon syndrome) zero-sum result.  Use the SUM_CHECK_ namespace instead
of DMA_ since these flags will be used on non-dma-zero-sum enabled
platforms.

Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/arm/include/asm/hardware/iop3xx-adma.h |  5 +++--
 arch/arm/mach-iop13xx/include/mach/adma.h   | 12 +++++++-----
 crypto/async_tx/async_xor.c                 |  4 ++--
 drivers/md/raid5.c                          |  2 +-
 drivers/md/raid5.h                          |  5 +++--
 include/linux/async_tx.h                    |  2 +-
 include/linux/dmaengine.h                   | 21 ++++++++++++++++++++-
 7 files changed, 37 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h
index 83e6ba338e2c..26eefea02314 100644
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ b/arch/arm/include/asm/hardware/iop3xx-adma.h
@@ -756,13 +756,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
 	hw_desc->src[0] = val;
 }
 
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
 {
 	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
 	struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
 
 	iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
-	return desc_ctrl.zero_result_err;
+	return desc_ctrl.zero_result_err << SUM_CHECK_P;
 }
 
 static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h
index 5722e86f2174..1cd31df8924d 100644
--- a/arch/arm/mach-iop13xx/include/mach/adma.h
+++ b/arch/arm/mach-iop13xx/include/mach/adma.h
@@ -428,18 +428,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
 	hw_desc->block_fill_data = val;
 }
 
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
 {
 	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
 	struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
 	struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
+	enum sum_check_flags flags;
 
 	BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
 
-	if (desc_ctrl.pq_xfer_en)
-		return byte_count.zero_result_err_q;
-	else
-		return byte_count.zero_result_err;
+	flags = byte_count.zero_result_err_q << SUM_CHECK_Q;
+	flags |= byte_count.zero_result_err << SUM_CHECK_P;
+
+	return flags;
 }
 
 static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 1e96c4df7061..78fb7780272a 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -246,7 +246,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
  */
 struct dma_async_tx_descriptor *
 async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
-	      int src_cnt, size_t len, u32 *result,
+	      int src_cnt, size_t len, enum sum_check_flags *result,
 	      struct async_submit_ctl *submit)
 {
 	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
@@ -304,7 +304,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 
 		async_tx_quiesce(&tx);
 
-		*result = page_is_zero(dest, offset, len) ? 0 : 1;
+		*result = !page_is_zero(dest, offset, len) << SUM_CHECK_P;
 
 		async_tx_sync_epilog(submit);
 		submit->flags = flags_orig;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7727954cf726..1f2a266f3cf7 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2590,7 +2590,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
 		 * we are done.  Otherwise update the mismatch count and repair
 		 * parity if !MD_RECOVERY_CHECK
 		 */
-		if (sh->ops.zero_sum_result == 0)
+		if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
 			/* parity is correct (on disc,
 			 * not in buffer any more)
 			 */
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index e7baabffee86..75f2c6c4cf90 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -2,6 +2,7 @@
 #define _RAID5_H
 
 #include <linux/raid/xor.h>
+#include <linux/dmaengine.h>
 
 /*
  *
@@ -215,8 +216,8 @@ struct stripe_head {
 	 * @target - STRIPE_OP_COMPUTE_BLK target
 	 */
 	struct stripe_operations {
-		int		   target;
-		u32		   zero_sum_result;
+		int		     target;
+		enum sum_check_flags zero_sum_result;
 	} ops;
 	struct r5dev {
 		struct bio	req;
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 00cfb637ddf2..3d21a2517518 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -148,7 +148,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 
 struct dma_async_tx_descriptor *
 async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
-	      int src_cnt, size_t len, u32 *result,
+	      int src_cnt, size_t len, enum sum_check_flags *result,
 	      struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 6768727d00d7..02447afcebad 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -86,6 +86,25 @@ enum dma_ctrl_flags {
 	DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
 };
 
+/**
+ * enum sum_check_bits - bit position of pq_check_flags
+ */
+enum sum_check_bits {
+	SUM_CHECK_P = 0,
+	SUM_CHECK_Q = 1,
+};
+
+/**
+ * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
+ * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
+ * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
+ */
+enum sum_check_flags {
+	SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
+	SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
+};
+
+
 /**
  * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
  * See linux/cpumask.h
@@ -245,7 +264,7 @@ struct dma_device {
 		unsigned int src_cnt, size_t len, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
 		struct dma_chan *chan, dma_addr_t *src,	unsigned int src_cnt,
-		size_t len, u32 *result, unsigned long flags);
+		size_t len, enum sum_check_flags *result, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
 		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
 		unsigned long flags);
-- 
cgit v1.2.3


From 95475e57113c66aac7583925736ed2e2d58c990d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 14 Jul 2009 12:19:02 -0700
Subject: async_tx: remove walk of tx->parent chain in dma_wait_for_async_tx

We currently walk the parent chain when waiting for a given tx to
complete however this walk may race with the driver cleanup routine.
The routines in async_raid6_recov.c may fall back to the synchronous
path at any point so we need to be prepared to call async_tx_quiesce()
(which calls  dma_wait_for_async_tx).  To remove the ->parent walk we
guarantee that every time a dependency is attached ->issue_pending() is
invoked, then we can simply poll the initial descriptor until
completion.

This also allows for a lighter weight 'issue pending' implementation as
there is no longer a requirement to iterate through all the channels'
->issue_pending() routines as long as operations have been submitted in
an ordered chain.  async_tx_issue_pending() is added for this case.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_tx.c | 13 +++++++------
 drivers/dma/dmaengine.c    | 45 ++++++++++-----------------------------------
 include/linux/async_tx.h   | 23 +++++++++++++++++++++++
 3 files changed, 40 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 6e37ad3f4417..60615fedcf5e 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -77,8 +77,8 @@ static void
 async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 			struct dma_async_tx_descriptor *tx)
 {
-	struct dma_chan *chan;
-	struct dma_device *device;
+	struct dma_chan *chan = depend_tx->chan;
+	struct dma_device *device = chan->device;
 	struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
 
 	/* first check to see if we can still append to depend_tx */
@@ -90,11 +90,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 	}
 	spin_unlock_bh(&depend_tx->lock);
 
-	if (!intr_tx)
+	/* attached dependency, flush the parent channel */
+	if (!intr_tx) {
+		device->device_issue_pending(chan);
 		return;
-
-	chan = depend_tx->chan;
-	device = chan->device;
+	}
 
 	/* see if we can schedule an interrupt
 	 * otherwise poll for completion
@@ -128,6 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 			intr_tx->tx_submit(intr_tx);
 			async_tx_ack(intr_tx);
 		}
+		device->device_issue_pending(chan);
 	} else {
 		if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
 			panic("%s: DMA_ERROR waiting for depend_tx\n",
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 6781e8f3c064..e002e0e0d055 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -934,49 +934,24 @@ EXPORT_SYMBOL(dma_async_tx_descriptor_init);
 
 /* dma_wait_for_async_tx - spin wait for a transaction to complete
  * @tx: in-flight transaction to wait on
- *
- * This routine assumes that tx was obtained from a call to async_memcpy,
- * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
- * and submitted).  Walking the parent chain is only meant to cover for DMA
- * drivers that do not implement the DMA_INTERRUPT capability and may race with
- * the driver's descriptor cleanup routine.
  */
 enum dma_status
 dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
 {
-	enum dma_status status;
-	struct dma_async_tx_descriptor *iter;
-	struct dma_async_tx_descriptor *parent;
+	unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
 
 	if (!tx)
 		return DMA_SUCCESS;
 
-	WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for"
-		  " %s\n", __func__, dma_chan_name(tx->chan));
-
-	/* poll through the dependency chain, return when tx is complete */
-	do {
-		iter = tx;
-
-		/* find the root of the unsubmitted dependency chain */
-		do {
-			parent = iter->parent;
-			if (!parent)
-				break;
-			else
-				iter = parent;
-		} while (parent);
-
-		/* there is a small window for ->parent == NULL and
-		 * ->cookie == -EBUSY
-		 */
-		while (iter->cookie == -EBUSY)
-			cpu_relax();
-
-		status = dma_sync_wait(iter->chan, iter->cookie);
-	} while (status == DMA_IN_PROGRESS || (iter != tx));
-
-	return status;
+	while (tx->cookie == -EBUSY) {
+		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+			pr_err("%s timeout waiting for descriptor submission\n",
+				__func__);
+			return DMA_ERROR;
+		}
+		cpu_relax();
+	}
+	return dma_sync_wait(tx->chan, tx->cookie);
 }
 EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
 
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 3d21a2517518..12a2efcbd565 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -83,6 +83,24 @@ struct async_submit_ctl {
 
 #ifdef CONFIG_DMA_ENGINE
 #define async_tx_issue_pending_all dma_issue_pending_all
+
+/**
+ * async_tx_issue_pending - send pending descriptor to the hardware channel
+ * @tx: descriptor handle to retrieve hardware context
+ *
+ * Note: any dependent operations will have already been issued by
+ * async_tx_channel_switch, or (in the case of no channel switch) will
+ * be already pending on this channel.
+ */
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+	if (likely(tx)) {
+		struct dma_chan *chan = tx->chan;
+		struct dma_device *dma = chan->device;
+
+		dma->device_issue_pending(chan);
+	}
+}
 #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 #include <asm/async_tx.h>
 #else
@@ -98,6 +116,11 @@ static inline void async_tx_issue_pending_all(void)
 	do { } while (0);
 }
 
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+	do { } while (0);
+}
+
 static inline struct dma_chan *
 async_tx_find_channel(struct async_submit_ctl *submit,
 		      enum dma_transaction_type tx_type, struct page **dst,
-- 
cgit v1.2.3


From b2f46fd8ef3dff2ab30f31126833f78b7480283a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 14 Jul 2009 12:20:36 -0700
Subject: async_tx: add support for asynchronous GF multiplication

[ Based on an original patch by Yuri Tikhonov ]

This adds support for doing asynchronous GF multiplication by adding
two additional functions to the async_tx API:

 async_gen_syndrome() does simultaneous XOR and Galois field
    multiplication of sources.

 async_syndrome_val() validates the given source buffers against known P
    and Q values.

When a request is made to run async_pq against more than the hardware
maximum number of supported sources we need to reuse the previous
generated P and Q values as sources into the next operation.  Care must
be taken to remove Q from P' and P from Q'.  For example to perform a 5
source pq op with hardware that only supports 4 sources at a time the
following approach is taken:

p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08}))
p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10}))

p' = p + q + q + src4 = p + src4
q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10}*src4

Note: 4 is the minimum acceptable maxpq otherwise we punt to
synchronous-software path.

The DMA_PREP_CONTINUE flag indicates to the driver to reuse p and q as
sources (in the above manner) and fill the remaining slots up to maxpq
with the new sources/coefficients.

Note1: Some devices have native support for P+Q continuation and can skip
this extra work.  Devices with this capability can advertise it with
dma_set_maxpq.  It is up to each driver how to handle the
DMA_PREP_CONTINUE flag.

Note2: The api supports disabling the generation of P when generating Q,
this is ignored by the synchronous path but is implemented by some dma
devices to save unnecessary writes.  In this case the continuation
algorithm is simplified to only reuse Q as a source.

Cc: H. Peter Anvin <hpa@zytor.com>
Cc: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
Signed-off-by: Ilya Yanok <yanok@emcraft.com>
Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/crypto/async-tx-api.txt |   3 +
 arch/arm/mach-iop13xx/setup.c         |   2 +-
 crypto/async_tx/Kconfig               |   4 +
 crypto/async_tx/Makefile              |   1 +
 crypto/async_tx/async_pq.c            | 388 ++++++++++++++++++++++++++++++++++
 crypto/async_tx/async_xor.c           |   2 +-
 drivers/dma/dmaengine.c               |   4 +
 drivers/dma/iop-adma.c                |   2 +-
 include/linux/async_tx.h              |   9 +
 include/linux/dmaengine.h             |  87 +++++++-
 10 files changed, 493 insertions(+), 9 deletions(-)
 create mode 100644 crypto/async_tx/async_pq.c

(limited to 'include')

diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 6b15e488c0e7..0e48e054d69a 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -64,6 +64,9 @@ xor     - xor a series of source buffers and write the result to a
 xor_val - xor a series of source buffers and set a flag if the
 	  result is zero.  The implementation attempts to prevent
 	  writes to memory
+pq	- generate the p+q (raid6 syndrome) from a series of source buffers
+pq_val  - validate that a p and or q buffer are in sync with a given series of
+	  sources
 
 3.3 Descriptor management:
 The return value is non-NULL and points to a 'descriptor' when the operation
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index 9800228b71d3..2e7ca0d75f8a 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -506,7 +506,7 @@ void __init iop13xx_platform_init(void)
 			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
 			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
-			dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask);
+			dma_cap_set(DMA_PQ, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
 			break;
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig
index d8fb39145986..cb6d7314f198 100644
--- a/crypto/async_tx/Kconfig
+++ b/crypto/async_tx/Kconfig
@@ -14,3 +14,7 @@ config ASYNC_MEMSET
 	tristate
 	select ASYNC_CORE
 
+config ASYNC_PQ
+	tristate
+	select ASYNC_CORE
+
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile
index 27baa7d52fbc..1b9926588259 100644
--- a/crypto/async_tx/Makefile
+++ b/crypto/async_tx/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o
 obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
 obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
 obj-$(CONFIG_ASYNC_XOR) += async_xor.o
+obj-$(CONFIG_ASYNC_PQ) += async_pq.o
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
new file mode 100644
index 000000000000..108b21efb499
--- /dev/null
+++ b/crypto/async_tx/async_pq.c
@@ -0,0 +1,388 @@
+/*
+ * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com>
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+/**
+ * scribble - space to hold throwaway P buffer for synchronous gen_syndrome
+ */
+static struct page *scribble;
+
+static bool is_raid6_zero_block(struct page *p)
+{
+	return p == (void *) raid6_empty_zero_page;
+}
+
+/* the struct page *blocks[] parameter passed to async_gen_syndrome()
+ * and async_syndrome_val() contains the 'P' destination address at
+ * blocks[disks-2] and the 'Q' destination address at blocks[disks-1]
+ *
+ * note: these are macros as they are used as lvalues
+ */
+#define P(b, d) (b[d-2])
+#define Q(b, d) (b[d-1])
+
+/**
+ * do_async_gen_syndrome - asynchronously calculate P and/or Q
+ */
+static __async_inline struct dma_async_tx_descriptor *
+do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
+		      const unsigned char *scfs, unsigned int offset, int disks,
+		      size_t len, dma_addr_t *dma_src,
+		      struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct dma_device *dma = chan->device;
+	enum dma_ctrl_flags dma_flags = 0;
+	enum async_tx_flags flags_orig = submit->flags;
+	dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+	dma_async_tx_callback cb_param_orig = submit->cb_param;
+	int src_cnt = disks - 2;
+	unsigned char coefs[src_cnt];
+	unsigned short pq_src_cnt;
+	dma_addr_t dma_dest[2];
+	int src_off = 0;
+	int idx;
+	int i;
+
+	/* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
+	if (P(blocks, disks))
+		dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
+					   len, DMA_BIDIRECTIONAL);
+	else
+		dma_flags |= DMA_PREP_PQ_DISABLE_P;
+	if (Q(blocks, disks))
+		dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
+					   len, DMA_BIDIRECTIONAL);
+	else
+		dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+
+	/* convert source addresses being careful to collapse 'empty'
+	 * sources and update the coefficients accordingly
+	 */
+	for (i = 0, idx = 0; i < src_cnt; i++) {
+		if (is_raid6_zero_block(blocks[i]))
+			continue;
+		dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
+					    DMA_TO_DEVICE);
+		coefs[idx] = scfs[i];
+		idx++;
+	}
+	src_cnt = idx;
+
+	while (src_cnt > 0) {
+		submit->flags = flags_orig;
+		pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags));
+		/* if we are submitting additional pqs, leave the chain open,
+		 * clear the callback parameters, and leave the destination
+		 * buffers mapped
+		 */
+		if (src_cnt > pq_src_cnt) {
+			submit->flags &= ~ASYNC_TX_ACK;
+			dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
+			submit->cb_fn = NULL;
+			submit->cb_param = NULL;
+		} else {
+			dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
+			submit->cb_fn = cb_fn_orig;
+			submit->cb_param = cb_param_orig;
+			if (cb_fn_orig)
+				dma_flags |= DMA_PREP_INTERRUPT;
+		}
+
+		/* Since we have clobbered the src_list we are committed
+		 * to doing this asynchronously.  Drivers force forward
+		 * progress in case they can not provide a descriptor
+		 */
+		for (;;) {
+			tx = dma->device_prep_dma_pq(chan, dma_dest,
+						     &dma_src[src_off],
+						     pq_src_cnt,
+						     &coefs[src_off], len,
+						     dma_flags);
+			if (likely(tx))
+				break;
+			async_tx_quiesce(&submit->depend_tx);
+			dma_async_issue_pending(chan);
+		}
+
+		async_tx_submit(chan, tx, submit);
+		submit->depend_tx = tx;
+
+		/* drop completed sources */
+		src_cnt -= pq_src_cnt;
+		src_off += pq_src_cnt;
+
+		dma_flags |= DMA_PREP_CONTINUE;
+	}
+
+	return tx;
+}
+
+/**
+ * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome
+ */
+static void
+do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+		     size_t len, struct async_submit_ctl *submit)
+{
+	void **srcs;
+	int i;
+
+	if (submit->scribble)
+		srcs = submit->scribble;
+	else
+		srcs = (void **) blocks;
+
+	for (i = 0; i < disks; i++) {
+		if (is_raid6_zero_block(blocks[i])) {
+			BUG_ON(i > disks - 3); /* P or Q can't be zero */
+			srcs[i] = blocks[i];
+		} else
+			srcs[i] = page_address(blocks[i]) + offset;
+	}
+	raid6_call.gen_syndrome(disks, len, srcs);
+	async_tx_sync_epilog(submit);
+}
+
+/**
+ * async_gen_syndrome - asynchronously calculate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @submit: submission/completion modifiers
+ *
+ * General note: This routine assumes a field of GF(2^8) with a
+ * primitive polynomial of 0x11d and a generator of {02}.
+ *
+ * 'disks' note: callers can optionally omit either P or Q (but not
+ * both) from the calculation by setting blocks[disks-2] or
+ * blocks[disks-1] to NULL.  When P or Q is omitted 'len' must be <=
+ * PAGE_SIZE as a temporary buffer of this size is used in the
+ * synchronous path.  'disks' always accounts for both destination
+ * buffers.
+ *
+ * 'blocks' note: if submit->scribble is NULL then the contents of
+ * 'blocks' may be overridden
+ */
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+		   size_t len, struct async_submit_ctl *submit)
+{
+	int src_cnt = disks - 2;
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+						      &P(blocks, disks), 2,
+						      blocks, src_cnt, len);
+	struct dma_device *device = chan ? chan->device : NULL;
+	dma_addr_t *dma_src = NULL;
+
+	BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
+
+	if (submit->scribble)
+		dma_src = submit->scribble;
+	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+		dma_src = (dma_addr_t *) blocks;
+
+	if (dma_src && device &&
+	    (src_cnt <= dma_maxpq(device, 0) ||
+	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) {
+		/* run the p+q asynchronously */
+		pr_debug("%s: (async) disks: %d len: %zu\n",
+			 __func__, disks, len);
+		return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
+					     disks, len, dma_src, submit);
+	}
+
+	/* run the pq synchronously */
+	pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
+
+	/* wait for any prerequisite operations */
+	async_tx_quiesce(&submit->depend_tx);
+
+	if (!P(blocks, disks)) {
+		P(blocks, disks) = scribble;
+		BUG_ON(len + offset > PAGE_SIZE);
+	}
+	if (!Q(blocks, disks)) {
+		Q(blocks, disks) = scribble;
+		BUG_ON(len + offset > PAGE_SIZE);
+	}
+	do_sync_gen_syndrome(blocks, offset, disks, len, submit);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(async_gen_syndrome);
+
+/**
+ * async_syndrome_val - asynchronously validate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
+ * @spare: temporary result buffer for the synchronous case
+ * @submit: submission / completion modifiers
+ *
+ * The same notes from async_gen_syndrome apply to the 'blocks',
+ * and 'disks' parameters of this routine.  The synchronous path
+ * requires a temporary result buffer and submit->scribble to be
+ * specified.
+ */
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
+		   size_t len, enum sum_check_flags *pqres, struct page *spare,
+		   struct async_submit_ctl *submit)
+{
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL,
+						      NULL, 0,  blocks, disks,
+						      len);
+	struct dma_device *device = chan ? chan->device : NULL;
+	struct dma_async_tx_descriptor *tx;
+	enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+	dma_addr_t *dma_src = NULL;
+
+	BUG_ON(disks < 4);
+
+	if (submit->scribble)
+		dma_src = submit->scribble;
+	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+		dma_src = (dma_addr_t *) blocks;
+
+	if (dma_src && device && disks <= dma_maxpq(device, 0)) {
+		struct device *dev = device->dev;
+		dma_addr_t *pq = &dma_src[disks-2];
+		int i;
+
+		pr_debug("%s: (async) disks: %d len: %zu\n",
+			 __func__, disks, len);
+		if (!P(blocks, disks))
+			dma_flags |= DMA_PREP_PQ_DISABLE_P;
+		if (!Q(blocks, disks))
+			dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+		for (i = 0; i < disks; i++)
+			if (likely(blocks[i])) {
+				BUG_ON(is_raid6_zero_block(blocks[i]));
+				dma_src[i] = dma_map_page(dev, blocks[i],
+							  offset, len,
+							  DMA_TO_DEVICE);
+			}
+
+		for (;;) {
+			tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
+							    disks - 2,
+							    raid6_gfexp,
+							    len, pqres,
+							    dma_flags);
+			if (likely(tx))
+				break;
+			async_tx_quiesce(&submit->depend_tx);
+			dma_async_issue_pending(chan);
+		}
+		async_tx_submit(chan, tx, submit);
+
+		return tx;
+	} else {
+		struct page *p_src = P(blocks, disks);
+		struct page *q_src = Q(blocks, disks);
+		enum async_tx_flags flags_orig = submit->flags;
+		dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+		void *scribble = submit->scribble;
+		void *cb_param_orig = submit->cb_param;
+		void *p, *q, *s;
+
+		pr_debug("%s: (sync) disks: %d len: %zu\n",
+			 __func__, disks, len);
+
+		/* caller must provide a temporary result buffer and
+		 * allow the input parameters to be preserved
+		 */
+		BUG_ON(!spare || !scribble);
+
+		/* wait for any prerequisite operations */
+		async_tx_quiesce(&submit->depend_tx);
+
+		/* recompute p and/or q into the temporary buffer and then
+		 * check to see the result matches the current value
+		 */
+		tx = NULL;
+		*pqres = 0;
+		if (p_src) {
+			init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+					  NULL, NULL, scribble);
+			tx = async_xor(spare, blocks, offset, disks-2, len, submit);
+			async_tx_quiesce(&tx);
+			p = page_address(p_src) + offset;
+			s = page_address(spare) + offset;
+			*pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
+		}
+
+		if (q_src) {
+			P(blocks, disks) = NULL;
+			Q(blocks, disks) = spare;
+			init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
+			tx = async_gen_syndrome(blocks, offset, disks, len, submit);
+			async_tx_quiesce(&tx);
+			q = page_address(q_src) + offset;
+			s = page_address(spare) + offset;
+			*pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
+		}
+
+		/* restore P, Q and submit */
+		P(blocks, disks) = p_src;
+		Q(blocks, disks) = q_src;
+
+		submit->cb_fn = cb_fn_orig;
+		submit->cb_param = cb_param_orig;
+		submit->flags = flags_orig;
+		async_tx_sync_epilog(submit);
+
+		return NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(async_syndrome_val);
+
+static int __init async_pq_init(void)
+{
+	scribble = alloc_page(GFP_KERNEL);
+
+	if (scribble)
+		return 0;
+
+	pr_err("%s: failed to allocate required spare page\n", __func__);
+
+	return -ENOMEM;
+}
+
+static void __exit async_pq_exit(void)
+{
+	put_page(scribble);
+}
+
+module_init(async_pq_init);
+module_exit(async_pq_exit);
+
+MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation");
+MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 78fb7780272a..56b5f98da463 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -62,7 +62,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 	while (src_cnt) {
 		submit->flags = flags_orig;
 		dma_flags = 0;
-		xor_src_cnt = min(src_cnt, dma->max_xor);
+		xor_src_cnt = min(src_cnt, (int)dma->max_xor);
 		/* if we are submitting additional xors, leave the chain open,
 		 * clear the callback parameters, and leave the destination
 		 * buffer mapped
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index e002e0e0d055..cd5673d3043b 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -646,6 +646,10 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_xor);
 	BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
 		!device->device_prep_dma_xor_val);
+	BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
+		!device->device_prep_dma_pq);
+	BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
+		!device->device_prep_dma_pq_val);
 	BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
 		!device->device_prep_dma_memset);
 	BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 6ff79a672699..4496bc606662 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -1257,7 +1257,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
 
 	dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
 	  "( %s%s%s%s%s%s%s%s%s%s)\n",
-	  dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
+	  dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
 	  dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
 	  dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
 	  dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 12a2efcbd565..e6ce5f004f98 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -185,5 +185,14 @@ async_memset(struct page *dest, int val, unsigned int offset,
 
 struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
 
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
+		   size_t len, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
+		   size_t len, enum sum_check_flags *pqres, struct page *spare,
+		   struct async_submit_ctl *submit);
+
 void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
 #endif /* _ASYNC_TX_H_ */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 02447afcebad..ce010cd991d2 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -52,7 +52,7 @@ enum dma_status {
 enum dma_transaction_type {
 	DMA_MEMCPY,
 	DMA_XOR,
-	DMA_PQ_XOR,
+	DMA_PQ,
 	DMA_DUAL_XOR,
 	DMA_PQ_UPDATE,
 	DMA_XOR_VAL,
@@ -70,20 +70,28 @@ enum dma_transaction_type {
 
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
- * 	control completion, and communicate status.
+ *  control completion, and communicate status.
  * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
- * 	this transaction
+ *  this transaction
  * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
- * 	acknowledges receipt, i.e. has has a chance to establish any
- * 	dependency chains
+ *  acknowledges receipt, i.e. has has a chance to establish any dependency
+ *  chains
  * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
  * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
+ * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
+ * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
+ * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
+ *  sources that were the result of a previous operation, in the case of a PQ
+ *  operation it continues the calculation with new sources
  */
 enum dma_ctrl_flags {
 	DMA_PREP_INTERRUPT = (1 << 0),
 	DMA_CTRL_ACK = (1 << 1),
 	DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
 	DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
+	DMA_PREP_PQ_DISABLE_P = (1 << 4),
+	DMA_PREP_PQ_DISABLE_Q = (1 << 5),
+	DMA_PREP_CONTINUE = (1 << 6),
 };
 
 /**
@@ -226,6 +234,7 @@ struct dma_async_tx_descriptor {
  * @global_node: list_head for global dma_device_list
  * @cap_mask: one or more dma_capability flags
  * @max_xor: maximum number of xor sources, 0 if no capability
+ * @max_pq: maximum number of PQ sources and PQ-continue capability
  * @dev_id: unique device ID
  * @dev: struct device reference for dma mapping api
  * @device_alloc_chan_resources: allocate resources and return the
@@ -234,6 +243,8 @@ struct dma_async_tx_descriptor {
  * @device_prep_dma_memcpy: prepares a memcpy operation
  * @device_prep_dma_xor: prepares a xor operation
  * @device_prep_dma_xor_val: prepares a xor validation operation
+ * @device_prep_dma_pq: prepares a pq operation
+ * @device_prep_dma_pq_val: prepares a pqzero_sum operation
  * @device_prep_dma_memset: prepares a memset operation
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
  * @device_prep_slave_sg: prepares a slave dma operation
@@ -248,7 +259,9 @@ struct dma_device {
 	struct list_head channels;
 	struct list_head global_node;
 	dma_cap_mask_t  cap_mask;
-	int max_xor;
+	unsigned short max_xor;
+	unsigned short max_pq;
+	#define DMA_HAS_PQ_CONTINUE (1 << 15)
 
 	int dev_id;
 	struct device *dev;
@@ -265,6 +278,14 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
 		struct dma_chan *chan, dma_addr_t *src,	unsigned int src_cnt,
 		size_t len, enum sum_check_flags *result, unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
+		struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+		unsigned int src_cnt, const unsigned char *scf,
+		size_t len, unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
+		struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+		unsigned int src_cnt, const unsigned char *scf, size_t len,
+		enum sum_check_flags *pqres, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
 		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
 		unsigned long flags);
@@ -283,6 +304,60 @@ struct dma_device {
 	void (*device_issue_pending)(struct dma_chan *chan);
 };
 
+static inline void
+dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
+{
+	dma->max_pq = maxpq;
+	if (has_pq_continue)
+		dma->max_pq |= DMA_HAS_PQ_CONTINUE;
+}
+
+static inline bool dmaf_continue(enum dma_ctrl_flags flags)
+{
+	return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
+}
+
+static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
+{
+	enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
+
+	return (flags & mask) == mask;
+}
+
+static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
+{
+	return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
+}
+
+static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
+{
+	return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
+}
+
+/* dma_maxpq - reduce maxpq in the face of continued operations
+ * @dma - dma device with PQ capability
+ * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
+ *
+ * When an engine does not support native continuation we need 3 extra
+ * source slots to reuse P and Q with the following coefficients:
+ * 1/ {00} * P : remove P from Q', but use it as a source for P'
+ * 2/ {01} * Q : use Q to continue Q' calculation
+ * 3/ {00} * Q : subtract Q from P' to cancel (2)
+ *
+ * In the case where P is disabled we only need 1 extra source:
+ * 1/ {01} * Q : use Q to continue Q' calculation
+ */
+static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
+{
+	if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
+		return dma_dev_to_maxpq(dma);
+	else if (dmaf_p_disabled_continue(flags))
+		return dma_dev_to_maxpq(dma) - 1;
+	else if (dmaf_continue(flags))
+		return dma_dev_to_maxpq(dma) - 3;
+	BUG();
+}
+
 /* --- public DMA engine API --- */
 
 #ifdef CONFIG_DMA_ENGINE
-- 
cgit v1.2.3


From 0a82a6239beecc95db6e05fe43ee62d16b381d38 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 14 Jul 2009 12:20:37 -0700
Subject: async_tx: add support for asynchronous RAID6 recovery operations

 async_raid6_2data_recov() recovers two data disk failures

 async_raid6_datap_recov() recovers a data disk and the P disk

These routines are a port of the synchronous versions found in
drivers/md/raid6recov.c.  The primary difference is breaking out the xor
operations into separate calls to async_xor.  Two helper routines are
introduced to perform scalar multiplication where needed.
async_sum_product() multiplies two sources by scalar coefficients and
then sums (xor) the result.  async_mult() simply multiplies a single
source by a scalar.

This implemention also includes, in contrast to the original
synchronous-only code, special case handling for the 4-disk and 5-disk
array cases.  In these situations the default N-disk algorithm will
present 0-source or 1-source operations to dma devices.  To cover for
dma devices where the minimum source count is 2 we implement 4-disk and
5-disk handling in the recovery code.

[ Impact: asynchronous raid6 recovery routines for 2data and datap cases ]

Cc: Yuri Tikhonov <yur@emcraft.com>
Cc: Ilya Yanok <yanok@emcraft.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: David Woodhouse <David.Woodhouse@intel.com>
Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/crypto/async-tx-api.txt |   4 +
 crypto/async_tx/Kconfig               |   5 +
 crypto/async_tx/Makefile              |   1 +
 crypto/async_tx/async_raid6_recov.c   | 448 ++++++++++++++++++++++++++++++++++
 include/linux/async_tx.h              |   8 +
 5 files changed, 466 insertions(+)
 create mode 100644 crypto/async_tx/async_raid6_recov.c

(limited to 'include')

diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 0e48e054d69a..ba046b8fa92f 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -67,6 +67,10 @@ xor_val - xor a series of source buffers and set a flag if the
 pq	- generate the p+q (raid6 syndrome) from a series of source buffers
 pq_val  - validate that a p and or q buffer are in sync with a given series of
 	  sources
+datap	- (raid6_datap_recov) recover a raid6 data block and the p block
+	  from the given sources
+2data	- (raid6_2data_recov) recover 2 raid6 data blocks from the given
+	  sources
 
 3.3 Descriptor management:
 The return value is non-NULL and points to a 'descriptor' when the operation
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig
index cb6d7314f198..e5aeb2b79e6f 100644
--- a/crypto/async_tx/Kconfig
+++ b/crypto/async_tx/Kconfig
@@ -18,3 +18,8 @@ config ASYNC_PQ
 	tristate
 	select ASYNC_CORE
 
+config ASYNC_RAID6_RECOV
+	tristate
+	select ASYNC_CORE
+	select ASYNC_PQ
+
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile
index 1b9926588259..9a1a76811b80 100644
--- a/crypto/async_tx/Makefile
+++ b/crypto/async_tx/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
 obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
 obj-$(CONFIG_ASYNC_XOR) += async_xor.o
 obj-$(CONFIG_ASYNC_PQ) += async_pq.o
+obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
new file mode 100644
index 000000000000..0c14d48c9896
--- /dev/null
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -0,0 +1,448 @@
+/*
+ * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * based on raid6recov.c:
+ *   Copyright 2002 H. Peter Anvin
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+static struct dma_async_tx_descriptor *
+async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
+		  size_t len, struct async_submit_ctl *submit)
+{
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+						      &dest, 1, srcs, 2, len);
+	struct dma_device *dma = chan ? chan->device : NULL;
+	const u8 *amul, *bmul;
+	u8 ax, bx;
+	u8 *a, *b, *c;
+
+	if (dma) {
+		dma_addr_t dma_dest[2];
+		dma_addr_t dma_src[2];
+		struct device *dev = dma->dev;
+		struct dma_async_tx_descriptor *tx;
+		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+		dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
+		dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
+		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
+					     len, dma_flags);
+		if (tx) {
+			async_tx_submit(chan, tx, submit);
+			return tx;
+		}
+	}
+
+	/* run the operation synchronously */
+	async_tx_quiesce(&submit->depend_tx);
+	amul = raid6_gfmul[coef[0]];
+	bmul = raid6_gfmul[coef[1]];
+	a = page_address(srcs[0]);
+	b = page_address(srcs[1]);
+	c = page_address(dest);
+
+	while (len--) {
+		ax    = amul[*a++];
+		bx    = bmul[*b++];
+		*c++ = ax ^ bx;
+	}
+
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
+	   struct async_submit_ctl *submit)
+{
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+						      &dest, 1, &src, 1, len);
+	struct dma_device *dma = chan ? chan->device : NULL;
+	const u8 *qmul; /* Q multiplier table */
+	u8 *d, *s;
+
+	if (dma) {
+		dma_addr_t dma_dest[2];
+		dma_addr_t dma_src[1];
+		struct device *dev = dma->dev;
+		struct dma_async_tx_descriptor *tx;
+		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+		dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
+		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
+					     len, dma_flags);
+		if (tx) {
+			async_tx_submit(chan, tx, submit);
+			return tx;
+		}
+	}
+
+	/* no channel available, or failed to allocate a descriptor, so
+	 * perform the operation synchronously
+	 */
+	async_tx_quiesce(&submit->depend_tx);
+	qmul  = raid6_gfmul[coef];
+	d = page_address(dest);
+	s = page_address(src);
+
+	while (len--)
+		*d++ = qmul[*s++];
+
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
+	      struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct page *p, *q, *a, *b;
+	struct page *srcs[2];
+	unsigned char coef[2];
+	enum async_tx_flags flags = submit->flags;
+	dma_async_tx_callback cb_fn = submit->cb_fn;
+	void *cb_param = submit->cb_param;
+	void *scribble = submit->scribble;
+
+	p = blocks[4-2];
+	q = blocks[4-1];
+
+	a = blocks[faila];
+	b = blocks[failb];
+
+	/* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
+	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+	srcs[0] = p;
+	srcs[1] = q;
+	coef[0] = raid6_gfexi[failb-faila];
+	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_sum_product(b, srcs, coef, bytes, submit);
+
+	/* Dy = P+Pxy+Dx */
+	srcs[0] = p;
+	srcs[1] = b;
+	init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn,
+			  cb_param, scribble);
+	tx = async_xor(a, srcs, 0, 2, bytes, submit);
+
+	return tx;
+
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
+	      struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct page *p, *q, *g, *dp, *dq;
+	struct page *srcs[2];
+	unsigned char coef[2];
+	enum async_tx_flags flags = submit->flags;
+	dma_async_tx_callback cb_fn = submit->cb_fn;
+	void *cb_param = submit->cb_param;
+	void *scribble = submit->scribble;
+	int uninitialized_var(good);
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		if (i == faila || i == failb)
+			continue;
+		else {
+			good = i;
+			break;
+		}
+	}
+	BUG_ON(i >= 3);
+
+	p = blocks[5-2];
+	q = blocks[5-1];
+	g = blocks[good];
+
+	/* Compute syndrome with zero for the missing data pages
+	 * Use the dead data pages as temporary storage for delta p and
+	 * delta q
+	 */
+	dp = blocks[faila];
+	dq = blocks[failb];
+
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_memcpy(dp, g, 0, 0, bytes, submit);
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+
+	/* compute P + Pxy */
+	srcs[0] = dp;
+	srcs[1] = p;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+	/* compute Q + Qxy */
+	srcs[0] = dq;
+	srcs[1] = q;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+	srcs[0] = dp;
+	srcs[1] = dq;
+	coef[0] = raid6_gfexi[failb-faila];
+	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+	/* Dy = P+Pxy+Dx */
+	srcs[0] = dp;
+	srcs[1] = dq;
+	init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+			  cb_param, scribble);
+	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+	return tx;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_n(int disks, size_t bytes, int faila, int failb,
+	      struct page **blocks, struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct page *p, *q, *dp, *dq;
+	struct page *srcs[2];
+	unsigned char coef[2];
+	enum async_tx_flags flags = submit->flags;
+	dma_async_tx_callback cb_fn = submit->cb_fn;
+	void *cb_param = submit->cb_param;
+	void *scribble = submit->scribble;
+
+	p = blocks[disks-2];
+	q = blocks[disks-1];
+
+	/* Compute syndrome with zero for the missing data pages
+	 * Use the dead data pages as temporary storage for
+	 * delta p and delta q
+	 */
+	dp = blocks[faila];
+	blocks[faila] = (void *)raid6_empty_zero_page;
+	blocks[disks-2] = dp;
+	dq = blocks[failb];
+	blocks[failb] = (void *)raid6_empty_zero_page;
+	blocks[disks-1] = dq;
+
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+
+	/* Restore pointer table */
+	blocks[faila]   = dp;
+	blocks[failb]   = dq;
+	blocks[disks-2] = p;
+	blocks[disks-1] = q;
+
+	/* compute P + Pxy */
+	srcs[0] = dp;
+	srcs[1] = p;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+	/* compute Q + Qxy */
+	srcs[0] = dq;
+	srcs[1] = q;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+	srcs[0] = dp;
+	srcs[1] = dq;
+	coef[0] = raid6_gfexi[failb-faila];
+	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+	/* Dy = P+Pxy+Dx */
+	srcs[0] = dp;
+	srcs[1] = dq;
+	init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+			  cb_param, scribble);
+	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+	return tx;
+}
+
+/**
+ * async_raid6_2data_recov - asynchronously calculate two missing data blocks
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: first failed drive index
+ * @failb: second failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+			struct page **blocks, struct async_submit_ctl *submit)
+{
+	BUG_ON(faila == failb);
+	if (failb < faila)
+		swap(faila, failb);
+
+	pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+	/* we need to preserve the contents of 'blocks' for the async
+	 * case, so punt to synchronous if a scribble buffer is not available
+	 */
+	if (!submit->scribble) {
+		void **ptrs = (void **) blocks;
+		int i;
+
+		async_tx_quiesce(&submit->depend_tx);
+		for (i = 0; i < disks; i++)
+			ptrs[i] = page_address(blocks[i]);
+
+		raid6_2data_recov(disks, bytes, faila, failb, ptrs);
+
+		async_tx_sync_epilog(submit);
+
+		return NULL;
+	}
+
+	switch (disks) {
+	case 4:
+		/* dma devices do not uniformly understand a zero source pq
+		 * operation (in contrast to the synchronous case), so
+		 * explicitly handle the 4 disk special case
+		 */
+		return __2data_recov_4(bytes, faila, failb, blocks, submit);
+	case 5:
+		/* dma devices do not uniformly understand a single
+		 * source pq operation (in contrast to the synchronous
+		 * case), so explicitly handle the 5 disk special case
+		 */
+		return __2data_recov_5(bytes, faila, failb, blocks, submit);
+	default:
+		return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
+	}
+}
+EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
+
+/**
+ * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_datap_recov(int disks, size_t bytes, int faila,
+			struct page **blocks, struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct page *p, *q, *dq;
+	u8 coef;
+	enum async_tx_flags flags = submit->flags;
+	dma_async_tx_callback cb_fn = submit->cb_fn;
+	void *cb_param = submit->cb_param;
+	void *scribble = submit->scribble;
+	struct page *srcs[2];
+
+	pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+	/* we need to preserve the contents of 'blocks' for the async
+	 * case, so punt to synchronous if a scribble buffer is not available
+	 */
+	if (!scribble) {
+		void **ptrs = (void **) blocks;
+		int i;
+
+		async_tx_quiesce(&submit->depend_tx);
+		for (i = 0; i < disks; i++)
+			ptrs[i] = page_address(blocks[i]);
+
+		raid6_datap_recov(disks, bytes, faila, ptrs);
+
+		async_tx_sync_epilog(submit);
+
+		return NULL;
+	}
+
+	p = blocks[disks-2];
+	q = blocks[disks-1];
+
+	/* Compute syndrome with zero for the missing data page
+	 * Use the dead data page as temporary storage for delta q
+	 */
+	dq = blocks[faila];
+	blocks[faila] = (void *)raid6_empty_zero_page;
+	blocks[disks-1] = dq;
+
+	/* in the 4 disk case we only need to perform a single source
+	 * multiplication
+	 */
+	if (disks == 4) {
+		int good = faila == 0 ? 1 : 0;
+		struct page *g = blocks[good];
+
+		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		tx = async_memcpy(p, g, 0, 0, bytes, submit);
+
+		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+	} else {
+		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+	}
+
+	/* Restore pointer table */
+	blocks[faila]   = dq;
+	blocks[disks-1] = q;
+
+	/* calculate g^{-faila} */
+	coef = raid6_gfinv[raid6_gfexp[faila]];
+
+	srcs[0] = dq;
+	srcs[1] = q;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_mult(dq, dq, coef, bytes, submit);
+
+	srcs[0] = p;
+	srcs[1] = dq;
+	init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+			  cb_param, scribble);
+	tx = async_xor(p, srcs, 0, 2, bytes, submit);
+
+	return tx;
+}
+EXPORT_SYMBOL_GPL(async_raid6_datap_recov);
+
+MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
+MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index e6ce5f004f98..866e61c4e2e0 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -194,5 +194,13 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
 		   size_t len, enum sum_check_flags *pqres, struct page *spare,
 		   struct async_submit_ctl *submit);
 
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
+			struct page **ptrs, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_raid6_datap_recov(int src_num, size_t bytes, int faila,
+			struct page **ptrs, struct async_submit_ctl *submit);
+
 void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
 #endif /* _ASYNC_TX_H_ */
-- 
cgit v1.2.3


From e500011ffa191d662ac64d4ada6a5187b3180e16 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sun, 30 Aug 2009 13:19:12 -0700
Subject: timers: Drop a function prototype

Drop prototype for non-existent next_timer_interrupt() function.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: akpm <akpm@linux-foundation.org>
LKML-Reference: <4A9ADEC0.70306@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timer.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index be62ec2ebea5..a2d1eb6cb3f0 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -173,11 +173,6 @@ extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
  */
 #define NEXT_TIMER_MAX_DELTA	((1UL << 30) - 1)
 
-/*
- * Return when the next timer-wheel timeout occurs (in absolute jiffies),
- * locks the timer base:
- */
-extern unsigned long next_timer_interrupt(void);
 /*
  * Return when the next timer-wheel timeout occurs (in absolute jiffies),
  * locks the timer base and does the comparison against the given
-- 
cgit v1.2.3


From f380ef86916904e4b79f7bec599deb51057b2d0c Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Wed, 19 Aug 2009 00:56:44 +0200
Subject: drm/crtc_helper: place drm_helper_encoder_in_use() in the header file

- The symbol was already exported.

Signed-off-by: Maarten Maathuis <madman2003@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_crtc_helper.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 6769ff6c1bc0..e44a4f87303c 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -98,6 +98,7 @@ extern bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 				     int x, int y,
 				     struct drm_framebuffer *old_fb);
 extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc);
+extern bool drm_helper_encoder_in_use(struct drm_encoder *encoder);
 
 extern void drm_helper_connector_dpms(struct drm_connector *connector, int mode);
 
-- 
cgit v1.2.3


From 785b93ef8c309730c2de84ce9c229e40e2d01480 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 28 Aug 2009 15:46:53 +1000
Subject: drm/kms: move driver specific fb common code to helper functions (v2)

Initially I always meant this code to be shared, but things
ran away from me before I got to it.

This refactors the i915 and radeon kms fbdev interaction layers
out into generic helpers + driver specific pieces.

It moves all the panic/sysrq enhancements to the core file,
and stores a linked list of kernel fbs. This could possibly be
improved to only store the fb which has fbcon on it for panics
etc.

radeon retains some specific codes used for a big endian
workaround.

changes:
fix oops in v1
fix freeing path for crtc_info

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Makefile                |   3 +-
 drivers/gpu/drm/drm_fb_helper.c         | 697 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_dma.c         |   3 +-
 drivers/gpu/drm/i915/intel_display.c    |  12 -
 drivers/gpu/drm/i915/intel_drv.h        |   3 -
 drivers/gpu/drm/i915/intel_fb.c         | 737 ++------------------------------
 drivers/gpu/drm/radeon/radeon_display.c |   5 +-
 drivers/gpu/drm/radeon/radeon_fb.c      | 670 ++++-------------------------
 drivers/gpu/drm/radeon/radeon_mode.h    |   2 -
 include/drm/drm_crtc.h                  |   2 +
 include/drm/drm_fb_helper.h             |  82 ++++
 11 files changed, 907 insertions(+), 1309 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_fb_helper.c
 create mode 100644 include/drm/drm_fb_helper.h

(limited to 'include')

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 5f0aec4f082a..99071684de25 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -11,7 +11,8 @@ drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
 		drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \
 		drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o \
 		drm_crtc.o drm_crtc_helper.o drm_modes.o drm_edid.o \
-		drm_info.o drm_debugfs.o drm_encoder_slave.o
+		drm_info.o drm_debugfs.o drm_encoder_slave.o \
+		drm_fb_helper.o
 
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
new file mode 100644
index 000000000000..d6ffea74a502
--- /dev/null
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -0,0 +1,697 @@
+/*
+ * Copyright (c) 2006-2009 Red Hat Inc.
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ *
+ * DRM framebuffer helper functions
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ *
+ * Authors:
+ *      Dave Airlie <airlied@linux.ie>
+ *      Jesse Barnes <jesse.barnes@intel.com>
+ */
+#include <linux/sysrq.h>
+#include <linux/fb.h>
+#include "drmP.h"
+#include "drm_crtc.h"
+#include "drm_fb_helper.h"
+#include "drm_crtc_helper.h"
+
+static LIST_HEAD(kernel_fb_helper_list);
+
+bool drm_fb_helper_force_kernel_mode(void)
+{
+	int i = 0;
+	bool ret, error = false;
+	struct drm_fb_helper *helper;
+
+	if (list_empty(&kernel_fb_helper_list))
+		return false;
+
+	list_for_each_entry(helper, &kernel_fb_helper_list, kernel_fb_list) {
+		for (i = 0; i < helper->crtc_count; i++) {
+			struct drm_mode_set *mode_set = &helper->crtc_info[i].mode_set;
+			ret = drm_crtc_helper_set_config(mode_set);
+			if (ret)
+				error = true;
+		}
+	}
+	return error;
+}
+
+int drm_fb_helper_panic(struct notifier_block *n, unsigned long ununsed,
+			void *panic_str)
+{
+	DRM_ERROR("panic occurred, switching back to text console\n");
+	return drm_fb_helper_force_kernel_mode();
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_panic);
+
+static struct notifier_block paniced = {
+	.notifier_call = drm_fb_helper_panic,
+};
+
+/**
+ * drm_fb_helper_restore - restore the framebuffer console (kernel) config
+ *
+ * Restore's the kernel's fbcon mode, used for lastclose & panic paths.
+ */
+void drm_fb_helper_restore(void)
+{
+	bool ret;
+	ret = drm_fb_helper_force_kernel_mode();
+	if (ret == true)
+		DRM_ERROR("Failed to restore crtc configuration\n");
+}
+EXPORT_SYMBOL(drm_fb_helper_restore);
+
+static void drm_fb_helper_restore_work_fn(struct work_struct *ignored)
+{
+	drm_fb_helper_restore();
+}
+static DECLARE_WORK(drm_fb_helper_restore_work, drm_fb_helper_restore_work_fn);
+
+static void drm_fb_helper_sysrq(int dummy1, struct tty_struct *dummy3)
+{
+	schedule_work(&drm_fb_helper_restore_work);
+}
+
+static struct sysrq_key_op sysrq_drm_fb_helper_restore_op = {
+	.handler = drm_fb_helper_sysrq,
+	.help_msg = "force-fb(V)",
+	.action_msg = "Restore framebuffer console",
+};
+
+static void drm_fb_helper_on(struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_device *dev = fb_helper->dev;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	int i;
+
+	/*
+	 * For each CRTC in this fb, turn the crtc on then,
+	 * find all associated encoders and turn them on.
+	 */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+
+		for (i = 0; i < fb_helper->crtc_count; i++) {
+			if (crtc->base.id == fb_helper->crtc_info[i].crtc_id)
+				break;
+		}
+
+		mutex_lock(&dev->mode_config.mutex);
+		crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
+		mutex_unlock(&dev->mode_config.mutex);
+
+		/* Found a CRTC on this fb, now find encoders */
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+			if (encoder->crtc == crtc) {
+				struct drm_encoder_helper_funcs *encoder_funcs;
+
+				encoder_funcs = encoder->helper_private;
+				mutex_lock(&dev->mode_config.mutex);
+				encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
+				mutex_unlock(&dev->mode_config.mutex);
+			}
+		}
+	}
+}
+
+static void drm_fb_helper_off(struct fb_info *info, int dpms_mode)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_device *dev = fb_helper->dev;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	int i;
+
+	/*
+	 * For each CRTC in this fb, find all associated encoders
+	 * and turn them off, then turn off the CRTC.
+	 */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+
+		for (i = 0; i < fb_helper->crtc_count; i++) {
+			if (crtc->base.id == fb_helper->crtc_info[i].crtc_id)
+				break;
+		}
+
+		/* Found a CRTC on this fb, now find encoders */
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+			if (encoder->crtc == crtc) {
+				struct drm_encoder_helper_funcs *encoder_funcs;
+
+				encoder_funcs = encoder->helper_private;
+				mutex_lock(&dev->mode_config.mutex);
+				encoder_funcs->dpms(encoder, dpms_mode);
+				mutex_unlock(&dev->mode_config.mutex);
+			}
+		}
+		if (dpms_mode == DRM_MODE_DPMS_OFF) {
+			mutex_lock(&dev->mode_config.mutex);
+			crtc_funcs->dpms(crtc, dpms_mode);
+			mutex_unlock(&dev->mode_config.mutex);
+		}
+	}
+}
+
+int drm_fb_helper_blank(int blank, struct fb_info *info)
+{
+	switch (blank) {
+	case FB_BLANK_UNBLANK:
+		drm_fb_helper_on(info);
+		break;
+	case FB_BLANK_NORMAL:
+		drm_fb_helper_off(info, DRM_MODE_DPMS_STANDBY);
+		break;
+	case FB_BLANK_HSYNC_SUSPEND:
+		drm_fb_helper_off(info, DRM_MODE_DPMS_STANDBY);
+		break;
+	case FB_BLANK_VSYNC_SUSPEND:
+		drm_fb_helper_off(info, DRM_MODE_DPMS_SUSPEND);
+		break;
+	case FB_BLANK_POWERDOWN:
+		drm_fb_helper_off(info, DRM_MODE_DPMS_OFF);
+		break;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_blank);
+
+static void drm_fb_helper_crtc_free(struct drm_fb_helper *helper)
+{
+	int i;
+
+	for (i = 0; i < helper->crtc_count; i++)
+		kfree(helper->crtc_info[i].mode_set.connectors);
+	kfree(helper->crtc_info);
+}
+
+int drm_fb_helper_init_crtc_count(struct drm_fb_helper *helper, int crtc_count, int max_conn_count)
+{
+	struct drm_device *dev = helper->dev;
+	struct drm_crtc *crtc;
+	int ret = 0;
+	int i;
+
+	helper->crtc_info = kcalloc(crtc_count, sizeof(struct drm_fb_helper_crtc), GFP_KERNEL);
+	if (!helper->crtc_info)
+		return -ENOMEM;
+
+	helper->crtc_count = crtc_count;
+
+	for (i = 0; i < crtc_count; i++) {
+		helper->crtc_info[i].mode_set.connectors =
+			kcalloc(max_conn_count,
+				sizeof(struct drm_connector *),
+				GFP_KERNEL);
+
+		if (!helper->crtc_info[i].mode_set.connectors) {
+			ret = -ENOMEM;
+			goto out_free;
+		}
+		helper->crtc_info[i].mode_set.num_connectors = 0;
+	}
+
+	i = 0;
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		helper->crtc_info[i].crtc_id = crtc->base.id;
+		helper->crtc_info[i].mode_set.crtc = crtc;
+		i++;
+	}
+	helper->conn_limit = max_conn_count;
+	return 0;
+out_free:
+	drm_fb_helper_crtc_free(helper);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(drm_fb_helper_init_crtc_count);
+
+int drm_fb_helper_setcolreg(unsigned regno,
+			    unsigned red,
+			    unsigned green,
+			    unsigned blue,
+			    unsigned transp,
+			    struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_device *dev = fb_helper->dev;
+	struct drm_crtc *crtc;
+	int i;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_framebuffer *fb = fb_helper->fb;
+
+		for (i = 0; i < fb_helper->crtc_count; i++) {
+			if (crtc->base.id == fb_helper->crtc_info[i].crtc_id)
+				break;
+		}
+		if (i == fb_helper->crtc_count)
+			continue;
+
+		if (regno > 255)
+			return 1;
+
+		if (fb->depth == 8) {
+			fb_helper->funcs->gamma_set(crtc, red, green, blue, regno);
+			return 0;
+		}
+
+		if (regno < 16) {
+			switch (fb->depth) {
+			case 15:
+				fb->pseudo_palette[regno] = ((red & 0xf800) >> 1) |
+					((green & 0xf800) >>  6) |
+					((blue & 0xf800) >> 11);
+				break;
+			case 16:
+				fb->pseudo_palette[regno] = (red & 0xf800) |
+					((green & 0xfc00) >>  5) |
+					((blue  & 0xf800) >> 11);
+				break;
+			case 24:
+			case 32:
+				fb->pseudo_palette[regno] =
+					(((red >> 8) & 0xff) << info->var.red.offset) |
+					(((green >> 8) & 0xff) << info->var.green.offset) |
+					(((blue >> 8) & 0xff) << info->var.blue.offset);
+				break;
+			}
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_setcolreg);
+
+int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
+			    struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_framebuffer *fb = fb_helper->fb;
+	int depth;
+
+	if (var->pixclock == -1 || !var->pixclock)
+		return -EINVAL;
+
+	/* Need to resize the fb object !!! */
+	if (var->xres > fb->width || var->yres > fb->height) {
+		DRM_ERROR("Requested width/height is greater than current fb "
+			   "object %dx%d > %dx%d\n", var->xres, var->yres,
+			   fb->width, fb->height);
+		DRM_ERROR("Need resizing code.\n");
+		return -EINVAL;
+	}
+
+	switch (var->bits_per_pixel) {
+	case 16:
+		depth = (var->green.length == 6) ? 16 : 15;
+		break;
+	case 32:
+		depth = (var->transp.length > 0) ? 32 : 24;
+		break;
+	default:
+		depth = var->bits_per_pixel;
+		break;
+	}
+
+	switch (depth) {
+	case 8:
+		var->red.offset = 0;
+		var->green.offset = 0;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 15:
+		var->red.offset = 10;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 5;
+		var->blue.length = 5;
+		var->transp.length = 1;
+		var->transp.offset = 15;
+		break;
+	case 16:
+		var->red.offset = 11;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 6;
+		var->blue.length = 5;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 24:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 32:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 8;
+		var->transp.offset = 24;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_check_var);
+
+/* this will let fbcon do the mode init */
+int drm_fb_helper_set_par(struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_device *dev = fb_helper->dev;
+	struct fb_var_screeninfo *var = &info->var;
+	struct drm_crtc *crtc;
+	int ret;
+	int i;
+
+	if (var->pixclock != -1) {
+		DRM_ERROR("PIXEL CLCOK SET\n");
+		return -EINVAL;
+	}
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+
+		for (i = 0; i < fb_helper->crtc_count; i++) {
+			if (crtc->base.id == fb_helper->crtc_info[i].crtc_id)
+				break;
+		}
+		if (i == fb_helper->crtc_count)
+			continue;
+
+		if (crtc->fb == fb_helper->crtc_info[i].mode_set.fb) {
+			mutex_lock(&dev->mode_config.mutex);
+			ret = crtc->funcs->set_config(&fb_helper->crtc_info->mode_set);
+			mutex_unlock(&dev->mode_config.mutex);
+			if (ret)
+				return ret;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_set_par);
+
+int drm_fb_helper_pan_display(struct fb_var_screeninfo *var,
+			      struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_device *dev = fb_helper->dev;
+	struct drm_mode_set *modeset;
+	struct drm_crtc *crtc;
+	int ret = 0;
+	int i;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		for (i = 0; i < fb_helper->crtc_count; i++) {
+			if (crtc->base.id == fb_helper->crtc_info[i].crtc_id)
+				break;
+		}
+
+		if (i == fb_helper->crtc_count)
+			continue;
+
+		modeset = &fb_helper->crtc_info[i].mode_set;
+
+		modeset->x = var->xoffset;
+		modeset->y = var->yoffset;
+
+		if (modeset->num_connectors) {
+			mutex_lock(&dev->mode_config.mutex);
+			ret = crtc->funcs->set_config(modeset);
+			mutex_unlock(&dev->mode_config.mutex);
+			if (!ret) {
+				info->var.xoffset = var->xoffset;
+				info->var.yoffset = var->yoffset;
+			}
+		}
+	}
+	return ret;
+}
+EXPORT_SYMBOL(drm_fb_helper_pan_display);
+
+int drm_fb_helper_single_fb_probe(struct drm_device *dev,
+				  int (*fb_create)(struct drm_device *dev,
+						   uint32_t fb_width,
+						   uint32_t fb_height,
+						   uint32_t surface_width,
+						   uint32_t surface_height,
+						   struct drm_framebuffer **fb_ptr))
+{
+	struct drm_crtc *crtc;
+	struct drm_connector *connector;
+	unsigned int fb_width = (unsigned)-1, fb_height = (unsigned)-1;
+	unsigned int surface_width = 0, surface_height = 0;
+	int new_fb = 0;
+	int crtc_count = 0;
+	int ret, i, conn_count = 0;
+	struct fb_info *info;
+	struct drm_framebuffer *fb;
+	struct drm_mode_set *modeset = NULL;
+	struct drm_fb_helper *fb_helper;
+
+	/* first up get a count of crtcs now in use and new min/maxes width/heights */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		if (drm_helper_crtc_in_use(crtc)) {
+			if (crtc->desired_mode) {
+				if (crtc->desired_mode->hdisplay < fb_width)
+					fb_width = crtc->desired_mode->hdisplay;
+
+				if (crtc->desired_mode->vdisplay < fb_height)
+					fb_height = crtc->desired_mode->vdisplay;
+
+				if (crtc->desired_mode->hdisplay > surface_width)
+					surface_width = crtc->desired_mode->hdisplay;
+
+				if (crtc->desired_mode->vdisplay > surface_height)
+					surface_height = crtc->desired_mode->vdisplay;
+			}
+			crtc_count++;
+		}
+	}
+
+	if (crtc_count == 0 || fb_width == -1 || fb_height == -1) {
+		/* hmm everyone went away - assume VGA cable just fell out
+		   and will come back later. */
+		return 0;
+	}
+
+	/* do we have an fb already? */
+	if (list_empty(&dev->mode_config.fb_kernel_list)) {
+		ret = (*fb_create)(dev, fb_width, fb_height, surface_width,
+				   surface_height, &fb);
+		if (ret)
+			return -EINVAL;
+		new_fb = 1;
+	} else {
+		fb = list_first_entry(&dev->mode_config.fb_kernel_list,
+				      struct drm_framebuffer, filp_head);
+
+		/* if someone hotplugs something bigger than we have already allocated, we are pwned.
+		   As really we can't resize an fbdev that is in the wild currently due to fbdev
+		   not really being designed for the lower layers moving stuff around under it.
+		   - so in the grand style of things - punt. */
+		if ((fb->width < surface_width) ||
+		    (fb->height < surface_height)) {
+			DRM_ERROR("Framebuffer not large enough to scale console onto.\n");
+			return -EINVAL;
+		}
+	}
+
+	info = fb->fbdev;
+	fb_helper = info->par;
+
+	crtc_count = 0;
+	/* okay we need to setup new connector sets in the crtcs */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		modeset = &fb_helper->crtc_info[crtc_count].mode_set;
+		modeset->fb = fb;
+		conn_count = 0;
+		list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+			if (connector->encoder)
+				if (connector->encoder->crtc == modeset->crtc) {
+					modeset->connectors[conn_count] = connector;
+					conn_count++;
+					if (conn_count > fb_helper->conn_limit)
+						BUG();
+				}
+		}
+
+		for (i = conn_count; i < fb_helper->conn_limit; i++)
+			modeset->connectors[i] = NULL;
+
+		modeset->crtc = crtc;
+		crtc_count++;
+
+		modeset->num_connectors = conn_count;
+		if (modeset->crtc->desired_mode) {
+			if (modeset->mode)
+				drm_mode_destroy(dev, modeset->mode);
+			modeset->mode = drm_mode_duplicate(dev,
+							   modeset->crtc->desired_mode);
+		}
+	}
+	fb_helper->crtc_count = crtc_count;
+	fb_helper->fb = fb;
+
+	if (new_fb) {
+		info->var.pixclock = -1;
+		if (register_framebuffer(info) < 0)
+			return -EINVAL;
+	} else {
+		drm_fb_helper_set_par(info);
+	}
+	printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node,
+	       info->fix.id);
+
+	/* Switch back to kernel console on panic */
+	/* multi card linked list maybe */
+	if (list_empty(&kernel_fb_helper_list)) {
+		printk(KERN_INFO "registered panic notifier\n");
+		atomic_notifier_chain_register(&panic_notifier_list,
+					       &paniced);
+		register_sysrq_key('v', &sysrq_drm_fb_helper_restore_op);
+	}
+	list_add(&fb_helper->kernel_fb_list, &kernel_fb_helper_list);
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_single_fb_probe);
+
+void drm_fb_helper_free(struct drm_fb_helper *helper)
+{
+	list_del(&helper->kernel_fb_list);
+	if (list_empty(&kernel_fb_helper_list)) {
+		printk(KERN_INFO "unregistered panic notifier\n");
+		atomic_notifier_chain_unregister(&panic_notifier_list,
+						 &paniced);
+		unregister_sysrq_key('v', &sysrq_drm_fb_helper_restore_op);
+	}
+	drm_fb_helper_crtc_free(helper);
+}
+EXPORT_SYMBOL(drm_fb_helper_free);
+
+void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch)
+{
+	info->fix.type = FB_TYPE_PACKED_PIXELS;
+	info->fix.visual = FB_VISUAL_TRUECOLOR;
+	info->fix.type_aux = 0;
+	info->fix.xpanstep = 1; /* doing it in hw */
+	info->fix.ypanstep = 1; /* doing it in hw */
+	info->fix.ywrapstep = 0;
+	info->fix.accel = FB_ACCEL_I830;
+	info->fix.type_aux = 0;
+
+	info->fix.line_length = pitch;
+	return;
+}
+EXPORT_SYMBOL(drm_fb_helper_fill_fix);
+
+void drm_fb_helper_fill_var(struct fb_info *info, struct drm_framebuffer *fb,
+			    uint32_t fb_width, uint32_t fb_height)
+{
+	info->pseudo_palette = fb->pseudo_palette;
+	info->var.xres_virtual = fb->width;
+	info->var.yres_virtual = fb->height;
+	info->var.bits_per_pixel = fb->bits_per_pixel;
+	info->var.xoffset = 0;
+	info->var.yoffset = 0;
+	info->var.activate = FB_ACTIVATE_NOW;
+	info->var.height = -1;
+	info->var.width = -1;
+
+	switch (fb->depth) {
+	case 8:
+		info->var.red.offset = 0;
+		info->var.green.offset = 0;
+		info->var.blue.offset = 0;
+		info->var.red.length = 8; /* 8bit DAC */
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 0;
+		info->var.transp.length = 0;
+		break;
+	case 15:
+		info->var.red.offset = 10;
+		info->var.green.offset = 5;
+		info->var.blue.offset = 0;
+		info->var.red.length = 5;
+		info->var.green.length = 5;
+		info->var.blue.length = 5;
+		info->var.transp.offset = 15;
+		info->var.transp.length = 1;
+		break;
+	case 16:
+		info->var.red.offset = 11;
+		info->var.green.offset = 5;
+		info->var.blue.offset = 0;
+		info->var.red.length = 5;
+		info->var.green.length = 6;
+		info->var.blue.length = 5;
+		info->var.transp.offset = 0;
+		break;
+	case 24:
+		info->var.red.offset = 16;
+		info->var.green.offset = 8;
+		info->var.blue.offset = 0;
+		info->var.red.length = 8;
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 0;
+		info->var.transp.length = 0;
+		break;
+	case 32:
+		info->var.red.offset = 16;
+		info->var.green.offset = 8;
+		info->var.blue.offset = 0;
+		info->var.red.length = 8;
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 24;
+		info->var.transp.length = 8;
+		break;
+	default:
+		break;
+	}
+
+	info->var.xres = fb_width;
+	info->var.yres = fb_height;
+}
+EXPORT_SYMBOL(drm_fb_helper_fill_var);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 544d889b9b16..c628c3671394 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -29,6 +29,7 @@
 #include "drmP.h"
 #include "drm.h"
 #include "drm_crtc_helper.h"
+#include "drm_fb_helper.h"
 #include "intel_drv.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
@@ -1347,7 +1348,7 @@ void i915_driver_lastclose(struct drm_device * dev)
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
 	if (!dev_priv || drm_core_check_feature(dev, DRIVER_MODESET)) {
-		intelfb_restore();
+		drm_fb_helper_restore();
 		return;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d6fce2133413..5fb7a4f4a427 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3060,8 +3060,6 @@ static void intel_crtc_destroy(struct drm_crtc *crtc)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
-	if (intel_crtc->mode_set.mode)
-		drm_mode_destroy(crtc->dev, intel_crtc->mode_set.mode);
 	drm_crtc_cleanup(crtc);
 	kfree(intel_crtc);
 }
@@ -3107,16 +3105,6 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
 	intel_crtc->cursor_addr = 0;
 	intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF;
 	drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
-
-	intel_crtc->mode_set.crtc = &intel_crtc->base;
-	intel_crtc->mode_set.connectors = (struct drm_connector **)(intel_crtc + 1);
-	intel_crtc->mode_set.num_connectors = 0;
-
-	if (i915_fbpercrtc) {
-
-
-
-	}
 }
 
 int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index d6f92ea1b553..38910f8f30ed 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -96,9 +96,6 @@ struct intel_crtc {
 	uint32_t cursor_addr;
 	u8 lut_r[256], lut_g[256], lut_b[256];
 	int dpms_mode;
-	struct intel_framebuffer *fbdev_fb;
-	/* a mode_set for fbdev users on this crtc */
-	struct drm_mode_set mode_set;
 };
 
 #define to_intel_crtc(x) container_of(x, struct intel_crtc, base)
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 1d30802e773e..3041530c3673 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -39,339 +39,34 @@
 #include "drmP.h"
 #include "drm.h"
 #include "drm_crtc.h"
+#include "drm_fb_helper.h"
 #include "intel_drv.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
 
 struct intelfb_par {
-	struct drm_device *dev;
-	struct drm_display_mode *our_mode;
+	struct drm_fb_helper helper;
 	struct intel_framebuffer *intel_fb;
-	int crtc_count;
-	/* crtc currently bound to this */
-	uint32_t crtc_ids[2];
+	struct drm_display_mode *our_mode;
 };
 
-static int intelfb_setcolreg(unsigned regno, unsigned red, unsigned green,
-			unsigned blue, unsigned transp,
-			struct fb_info *info)
-{
-	struct intelfb_par *par = info->par;
-	struct drm_device *dev = par->dev;
-	struct drm_crtc *crtc;
-	int i;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-		struct drm_mode_set *modeset = &intel_crtc->mode_set;
-		struct drm_framebuffer *fb = modeset->fb;
-
-		for (i = 0; i < par->crtc_count; i++)
-			if (crtc->base.id == par->crtc_ids[i])
-				break;
-
-		if (i == par->crtc_count)
-			continue;
-
-
-		if (regno > 255)
-			return 1;
-
-		if (fb->depth == 8) {
-			intel_crtc_fb_gamma_set(crtc, red, green, blue, regno);
-			return 0;
-		}
-
-		if (regno < 16) {
-			switch (fb->depth) {
-			case 15:
-				fb->pseudo_palette[regno] = ((red & 0xf800) >> 1) |
-					((green & 0xf800) >>  6) |
-					((blue & 0xf800) >> 11);
-				break;
-			case 16:
-				fb->pseudo_palette[regno] = (red & 0xf800) |
-					((green & 0xfc00) >>  5) |
-					((blue  & 0xf800) >> 11);
-				break;
-			case 24:
-			case 32:
-				fb->pseudo_palette[regno] = ((red & 0xff00) << 8) |
-					(green & 0xff00) |
-					((blue  & 0xff00) >> 8);
-				break;
-			}
-		}
-	}
-	return 0;
-}
-
-static int intelfb_check_var(struct fb_var_screeninfo *var,
-			struct fb_info *info)
-{
-	struct intelfb_par *par = info->par;
-	struct intel_framebuffer *intel_fb = par->intel_fb;
-	struct drm_framebuffer *fb = &intel_fb->base;
-	int depth;
-
-	if (var->pixclock == -1 || !var->pixclock)
-		return -EINVAL;
-
-	/* Need to resize the fb object !!! */
-	if (var->xres > fb->width || var->yres > fb->height) {
-		DRM_ERROR("Requested width/height is greater than current fb object %dx%d > %dx%d\n",var->xres,var->yres,fb->width,fb->height);
-		DRM_ERROR("Need resizing code.\n");
-		return -EINVAL;
-	}
-
-	switch (var->bits_per_pixel) {
-	case 16:
-		depth = (var->green.length == 6) ? 16 : 15;
-		break;
-	case 32:
-		depth = (var->transp.length > 0) ? 32 : 24;
-		break;
-	default:
-		depth = var->bits_per_pixel;
-		break;
-	}
-
-	switch (depth) {
-	case 8:
-		var->red.offset = 0;
-		var->green.offset = 0;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 15:
-		var->red.offset = 10;
-		var->green.offset = 5;
-		var->blue.offset = 0;
-		var->red.length = 5;
-		var->green.length = 5;
-		var->blue.length = 5;
-		var->transp.length = 1;
-		var->transp.offset = 15;
-		break;
-	case 16:
-		var->red.offset = 11;
-		var->green.offset = 5;
-		var->blue.offset = 0;
-		var->red.length = 5;
-		var->green.length = 6;
-		var->blue.length = 5;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 24:
-		var->red.offset = 16;
-		var->green.offset = 8;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 32:
-		var->red.offset = 16;
-		var->green.offset = 8;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 8;
-		var->transp.offset = 24;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/* this will let fbcon do the mode init */
-/* FIXME: take mode config lock? */
-static int intelfb_set_par(struct fb_info *info)
-{
-	struct intelfb_par *par = info->par;
-	struct drm_device *dev = par->dev;
-	struct fb_var_screeninfo *var = &info->var;
-	int i;
-
-	DRM_DEBUG("%d %d\n", var->xres, var->pixclock);
-
-	if (var->pixclock != -1) {
-
-		DRM_ERROR("PIXEL CLOCK SET\n");
-		return -EINVAL;
-	} else {
-		struct drm_crtc *crtc;
-		int ret;
-
-		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-			struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-
-			for (i = 0; i < par->crtc_count; i++)
-				if (crtc->base.id == par->crtc_ids[i])
-					break;
-
-			if (i == par->crtc_count)
-				continue;
-
-			if (crtc->fb == intel_crtc->mode_set.fb) {
-				mutex_lock(&dev->mode_config.mutex);
-				ret = crtc->funcs->set_config(&intel_crtc->mode_set);
-				mutex_unlock(&dev->mode_config.mutex);
-				if (ret)
-					return ret;
-			}
-		}
-		return 0;
-	}
-}
-
-static int intelfb_pan_display(struct fb_var_screeninfo *var,
-				struct fb_info *info)
-{
-	struct intelfb_par *par = info->par;
-	struct drm_device *dev = par->dev;
-	struct drm_mode_set *modeset;
-	struct drm_crtc *crtc;
-	struct intel_crtc *intel_crtc;
-	int ret = 0;
-	int i;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		for (i = 0; i < par->crtc_count; i++)
-			if (crtc->base.id == par->crtc_ids[i])
-				break;
-
-		if (i == par->crtc_count)
-			continue;
-
-		intel_crtc = to_intel_crtc(crtc);
-		modeset = &intel_crtc->mode_set;
-
-		modeset->x = var->xoffset;
-		modeset->y = var->yoffset;
-
-		if (modeset->num_connectors) {
-			mutex_lock(&dev->mode_config.mutex);
-			ret = crtc->funcs->set_config(modeset);
-			mutex_unlock(&dev->mode_config.mutex);
-			if (!ret) {
-				info->var.xoffset = var->xoffset;
-				info->var.yoffset = var->yoffset;
-			}
-		}
-	}
-
-	return ret;
-}
-
-static void intelfb_on(struct fb_info *info)
-{
-	struct intelfb_par *par = info->par;
-	struct drm_device *dev = par->dev;
-	struct drm_crtc *crtc;
-	struct drm_encoder *encoder;
-	int i;
-
-	/*
-	 * For each CRTC in this fb, find all associated encoders
-	 * and turn them off, then turn off the CRTC.
-	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-
-		for (i = 0; i < par->crtc_count; i++)
-			if (crtc->base.id == par->crtc_ids[i])
-				break;
-
-		crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
-
-		/* Found a CRTC on this fb, now find encoders */
-		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-			if (encoder->crtc == crtc) {
-				struct drm_encoder_helper_funcs *encoder_funcs;
-				encoder_funcs = encoder->helper_private;
-				encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
-			}
-		}
-	}
-}
-
-static void intelfb_off(struct fb_info *info, int dpms_mode)
-{
-	struct intelfb_par *par = info->par;
-	struct drm_device *dev = par->dev;
-	struct drm_crtc *crtc;
-	struct drm_encoder *encoder;
-	int i;
-
-	/*
-	 * For each CRTC in this fb, find all associated encoders
-	 * and turn them off, then turn off the CRTC.
-	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-
-		for (i = 0; i < par->crtc_count; i++)
-			if (crtc->base.id == par->crtc_ids[i])
-				break;
-
-		/* Found a CRTC on this fb, now find encoders */
-		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-			if (encoder->crtc == crtc) {
-				struct drm_encoder_helper_funcs *encoder_funcs;
-				encoder_funcs = encoder->helper_private;
-				encoder_funcs->dpms(encoder, dpms_mode);
-			}
-		}
-		if (dpms_mode == DRM_MODE_DPMS_OFF)
-			crtc_funcs->dpms(crtc, dpms_mode);
-	}
-}
-
-static int intelfb_blank(int blank, struct fb_info *info)
-{
-	switch (blank) {
-	case FB_BLANK_UNBLANK:
-		intelfb_on(info);
-		break;
-	case FB_BLANK_NORMAL:
-		intelfb_off(info, DRM_MODE_DPMS_STANDBY);
-		break;
-	case FB_BLANK_HSYNC_SUSPEND:
-		intelfb_off(info, DRM_MODE_DPMS_STANDBY);
-		break;
-	case FB_BLANK_VSYNC_SUSPEND:
-		intelfb_off(info, DRM_MODE_DPMS_SUSPEND);
-		break;
-	case FB_BLANK_POWERDOWN:
-		intelfb_off(info, DRM_MODE_DPMS_OFF);
-		break;
-	}
-	return 0;
-}
-
 static struct fb_ops intelfb_ops = {
 	.owner = THIS_MODULE,
-	.fb_check_var = intelfb_check_var,
-	.fb_set_par = intelfb_set_par,
-	.fb_setcolreg = intelfb_setcolreg,
+	.fb_check_var = drm_fb_helper_check_var,
+	.fb_set_par = drm_fb_helper_set_par,
+	.fb_setcolreg = drm_fb_helper_setcolreg,
 	.fb_fillrect = cfb_fillrect,
 	.fb_copyarea = cfb_copyarea,
 	.fb_imageblit = cfb_imageblit,
-	.fb_pan_display = intelfb_pan_display,
-	.fb_blank = intelfb_blank,
+	.fb_pan_display = drm_fb_helper_pan_display,
+	.fb_blank = drm_fb_helper_blank,
 };
 
+static struct drm_fb_helper_funcs intel_fb_helper_funcs = {
+	.gamma_set = intel_crtc_fb_gamma_set,
+};
+
+
 /**
  * Curretly it is assumed that the old framebuffer is reused.
  *
@@ -412,25 +107,10 @@ int intelfb_resize(struct drm_device *dev, struct drm_crtc *crtc)
 }
 EXPORT_SYMBOL(intelfb_resize);
 
-static struct drm_mode_set kernelfb_mode;
-
-static int intelfb_panic(struct notifier_block *n, unsigned long ununsed,
-			 void *panic_str)
-{
-	DRM_ERROR("panic occurred, switching back to text console\n");
-
-	intelfb_restore();
-	return 0;
-}
-
-static struct notifier_block paniced = {
-	.notifier_call = intelfb_panic,
-};
-
 static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 			  uint32_t fb_height, uint32_t surface_width,
 			  uint32_t surface_height,
-			  struct intel_framebuffer **intel_fb_p)
+			  struct drm_framebuffer **fb_p)
 {
 	struct fb_info *info;
 	struct intelfb_par *par;
@@ -479,7 +159,7 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 	list_add(&fb->filp_head, &dev->mode_config.fb_kernel_list);
 
 	intel_fb = to_intel_framebuffer(fb);
-	*intel_fb_p = intel_fb;
+	*fb_p = fb;
 
 	info = framebuffer_alloc(sizeof(struct intelfb_par), device);
 	if (!info) {
@@ -489,21 +169,19 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 
 	par = info->par;
 
+	par->helper.funcs = &intel_fb_helper_funcs;
+	par->helper.dev = dev;
+	ret = drm_fb_helper_init_crtc_count(&par->helper, 2,
+					    INTELFB_CONN_LIMIT);
+	if (ret)
+		goto out_unref;
+
 	strcpy(info->fix.id, "inteldrmfb");
-	info->fix.type = FB_TYPE_PACKED_PIXELS;
-	info->fix.visual = FB_VISUAL_TRUECOLOR;
-	info->fix.type_aux = 0;
-	info->fix.xpanstep = 1; /* doing it in hw */
-	info->fix.ypanstep = 1; /* doing it in hw */
-	info->fix.ywrapstep = 0;
-	info->fix.accel = FB_ACCEL_I830;
-	info->fix.type_aux = 0;
 
 	info->flags = FBINFO_DEFAULT;
 
 	info->fbops = &intelfb_ops;
 
-	info->fix.line_length = fb->pitch;
 
 	/* setup aperture base/size for vesafb takeover */
 	info->aperture_base = dev->mode_config.fb_base;
@@ -527,18 +205,8 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 
 //	memset(info->screen_base, 0, size);
 
-	info->pseudo_palette = fb->pseudo_palette;
-	info->var.xres_virtual = fb->width;
-	info->var.yres_virtual = fb->height;
-	info->var.bits_per_pixel = fb->bits_per_pixel;
-	info->var.xoffset = 0;
-	info->var.yoffset = 0;
-	info->var.activate = FB_ACTIVATE_NOW;
-	info->var.height = -1;
-	info->var.width = -1;
-
-	info->var.xres = fb_width;
-	info->var.yres = fb_height;
+	drm_fb_helper_fill_fix(info, fb->depth);
+	drm_fb_helper_fill_var(info, fb, fb_width, fb_height);
 
 	/* FIXME: we really shouldn't expose mmio space at all */
 	info->fix.mmio_start = pci_resource_start(dev->pdev, mmio_bar);
@@ -550,64 +218,9 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 	info->pixmap.flags = FB_PIXMAP_SYSTEM;
 	info->pixmap.scan_align = 1;
 
-	switch(fb->depth) {
-	case 8:
-		info->var.red.offset = 0;
-		info->var.green.offset = 0;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8; /* 8bit DAC */
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 15:
-		info->var.red.offset = 10;
-		info->var.green.offset = 5;
-		info->var.blue.offset = 0;
-		info->var.red.length = 5;
-		info->var.green.length = 5;
-		info->var.blue.length = 5;
-		info->var.transp.offset = 15;
-		info->var.transp.length = 1;
-		break;
-	case 16:
-		info->var.red.offset = 11;
-		info->var.green.offset = 5;
-		info->var.blue.offset = 0;
-		info->var.red.length = 5;
-		info->var.green.length = 6;
-		info->var.blue.length = 5;
-		info->var.transp.offset = 0;
-		break;
-	case 24:
-		info->var.red.offset = 16;
-		info->var.green.offset = 8;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 32:
-		info->var.red.offset = 16;
-		info->var.green.offset = 8;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 24;
-		info->var.transp.length = 8;
-		break;
-	default:
-		break;
-	}
-
 	fb->fbdev = info;
 
 	par->intel_fb = intel_fb;
-	par->dev = dev;
 
 	/* To allow resizeing without swapping buffers */
 	DRM_DEBUG("allocated %dx%d fb: 0x%08x, bo %p\n", intel_fb->base.width,
@@ -625,307 +238,12 @@ out:
 	return ret;
 }
 
-static int intelfb_multi_fb_probe_crtc(struct drm_device *dev, struct drm_crtc *crtc)
-{
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_framebuffer *intel_fb;
-	struct drm_framebuffer *fb;
-	struct drm_connector *connector;
-	struct fb_info *info;
-	struct intelfb_par *par;
-	struct drm_mode_set *modeset;
-	unsigned int width, height;
-	int new_fb = 0;
-	int ret, i, conn_count;
-
-	if (!drm_helper_crtc_in_use(crtc))
-		return 0;
-
-	if (!crtc->desired_mode)
-		return 0;
-
-	width = crtc->desired_mode->hdisplay;
-	height = crtc->desired_mode->vdisplay;
-
-	/* is there an fb bound to this crtc already */
-	if (!intel_crtc->mode_set.fb) {
-		ret = intelfb_create(dev, width, height, width, height, &intel_fb);
-		if (ret)
-			return -EINVAL;
-		new_fb = 1;
-	} else {
-		fb = intel_crtc->mode_set.fb;
-		intel_fb = to_intel_framebuffer(fb);
-		if ((intel_fb->base.width < width) || (intel_fb->base.height < height))
-			return -EINVAL;
-	}
-
-	info = intel_fb->base.fbdev;
-	par = info->par;
-
-	modeset = &intel_crtc->mode_set;
-	modeset->fb = &intel_fb->base;
-	conn_count = 0;
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		if (connector->encoder)
-			if (connector->encoder->crtc == modeset->crtc) {
-				modeset->connectors[conn_count] = connector;
-				conn_count++;
-				if (conn_count > INTELFB_CONN_LIMIT)
-					BUG();
-			}
-	}
-
-	for (i = conn_count; i < INTELFB_CONN_LIMIT; i++)
-		modeset->connectors[i] = NULL;
-
-	par->crtc_ids[0] = crtc->base.id;
-
-	modeset->num_connectors = conn_count;
-	if (modeset->crtc->desired_mode) {
-		if (modeset->mode)
-			drm_mode_destroy(dev, modeset->mode);
-		modeset->mode = drm_mode_duplicate(dev,
-						   modeset->crtc->desired_mode);
-	}
-
-	par->crtc_count = 1;
-
-	if (new_fb) {
-		info->var.pixclock = -1;
-		if (register_framebuffer(info) < 0)
-			return -EINVAL;
-	} else
-		intelfb_set_par(info);
-
-	DRM_INFO("fb%d: %s frame buffer device\n", info->node,
-	       info->fix.id);
-
-	/* Switch back to kernel console on panic */
-	kernelfb_mode = *modeset;
-	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
-	DRM_DEBUG("registered panic notifier\n");
-
-	return 0;
-}
-
-static int intelfb_multi_fb_probe(struct drm_device *dev)
-{
-
-	struct drm_crtc *crtc;
-	int ret = 0;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		ret = intelfb_multi_fb_probe_crtc(dev, crtc);
-		if (ret)
-			return ret;
-	}
-	return ret;
-}
-
-static int intelfb_single_fb_probe(struct drm_device *dev)
-{
-	struct drm_crtc *crtc;
-	struct drm_connector *connector;
-	unsigned int fb_width = (unsigned)-1, fb_height = (unsigned)-1;
-	unsigned int surface_width = 0, surface_height = 0;
-	int new_fb = 0;
-	int crtc_count = 0;
-	int ret, i, conn_count = 0;
-	struct intel_framebuffer *intel_fb;
-	struct fb_info *info;
-	struct intelfb_par *par;
-	struct drm_mode_set *modeset = NULL;
-
-	DRM_DEBUG("\n");
-
-	/* Get a count of crtcs now in use and new min/maxes width/heights */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		if (!drm_helper_crtc_in_use(crtc))
-			continue;
-
-		crtc_count++;
-		if (!crtc->desired_mode)
-			continue;
-
-		/* Smallest mode determines console size... */
-		if (crtc->desired_mode->hdisplay < fb_width)
-			fb_width = crtc->desired_mode->hdisplay;
-
-		if (crtc->desired_mode->vdisplay < fb_height)
-			fb_height = crtc->desired_mode->vdisplay;
-
-		/* ... but largest for memory allocation dimensions */
-		if (crtc->desired_mode->hdisplay > surface_width)
-			surface_width = crtc->desired_mode->hdisplay;
-
-		if (crtc->desired_mode->vdisplay > surface_height)
-			surface_height = crtc->desired_mode->vdisplay;
-	}
-
-	if (crtc_count == 0 || fb_width == -1 || fb_height == -1) {
-		/* hmm everyone went away - assume VGA cable just fell out
-		   and will come back later. */
-		DRM_DEBUG("no CRTCs available?\n");
-		return 0;
-	}
-
-//fail
-	/* Find the fb for our new config */
-	if (list_empty(&dev->mode_config.fb_kernel_list)) {
-		DRM_DEBUG("creating new fb (console size %dx%d, "
-			  "buffer size %dx%d)\n", fb_width, fb_height,
-			  surface_width, surface_height);
-		ret = intelfb_create(dev, fb_width, fb_height, surface_width,
-				     surface_height, &intel_fb);
-		if (ret)
-			return -EINVAL;
-		new_fb = 1;
-	} else {
-		struct drm_framebuffer *fb;
-
-		fb = list_first_entry(&dev->mode_config.fb_kernel_list,
-				      struct drm_framebuffer, filp_head);
-		intel_fb = to_intel_framebuffer(fb);
-
-		/* if someone hotplugs something bigger than we have already
-		 * allocated, we are pwned.  As really we can't resize an
-		 * fbdev that is in the wild currently due to fbdev not really
-		 * being designed for the lower layers moving stuff around
-		 * under it.
-		 * - so in the grand style of things - punt.
-		 */
-		if ((fb->width < surface_width) ||
-		    (fb->height < surface_height)) {
-			DRM_ERROR("fb not large enough for console\n");
-			return -EINVAL;
-		}
-	}
-// fail
-
-	info = intel_fb->base.fbdev;
-	par = info->par;
-
-	crtc_count = 0;
-	/*
-	 * For each CRTC, set up the connector list for the CRTC's mode
-	 * set configuration.
-	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-
-		modeset = &intel_crtc->mode_set;
-		modeset->fb = &intel_fb->base;
-		conn_count = 0;
-		list_for_each_entry(connector, &dev->mode_config.connector_list,
-				    head) {
-			if (!connector->encoder)
-				continue;
-
-			if(connector->encoder->crtc == modeset->crtc) {
-				modeset->connectors[conn_count++] = connector;
-				if (conn_count > INTELFB_CONN_LIMIT)
-					BUG();
-			}
-		}
-
-		/* Zero out remaining connector pointers */
-		for (i = conn_count; i < INTELFB_CONN_LIMIT; i++)
-			modeset->connectors[i] = NULL;
-
-		par->crtc_ids[crtc_count++] = crtc->base.id;
-
-		modeset->num_connectors = conn_count;
-		if (modeset->crtc->desired_mode) {
-			if (modeset->mode)
-				drm_mode_destroy(dev, modeset->mode);
-			modeset->mode = drm_mode_duplicate(dev,
-							   modeset->crtc->desired_mode);
-		}
-	}
-	par->crtc_count = crtc_count;
-
-	if (new_fb) {
-		info->var.pixclock = -1;
-		if (register_framebuffer(info) < 0)
-			return -EINVAL;
-	} else
-		intelfb_set_par(info);
-
-	DRM_INFO("fb%d: %s frame buffer device\n", info->node,
-	       info->fix.id);
-
-	/* Switch back to kernel console on panic */
-	kernelfb_mode = *modeset;
-	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
-	DRM_DEBUG("registered panic notifier\n");
-
-	return 0;
-}
-
-/**
- * intelfb_restore - restore the framebuffer console (kernel) config
- *
- * Restore's the kernel's fbcon mode, used for lastclose & panic paths.
- */
-void intelfb_restore(void)
-{
-	int ret;
-	if ((ret = drm_crtc_helper_set_config(&kernelfb_mode)) != 0) {
-		DRM_ERROR("Failed to restore crtc configuration: %d\n",
-			  ret);
-	}
-}
-
-static void intelfb_restore_work_fn(struct work_struct *ignored)
-{
-	intelfb_restore();
-}
-static DECLARE_WORK(intelfb_restore_work, intelfb_restore_work_fn);
-
-static void intelfb_sysrq(int dummy1, struct tty_struct *dummy3)
-{
-        schedule_work(&intelfb_restore_work);
-}
-
-static struct sysrq_key_op sysrq_intelfb_restore_op = {
-        .handler = intelfb_sysrq,
-        .help_msg = "force-fb(V)",
-        .action_msg = "Restore framebuffer console",
-};
-
 int intelfb_probe(struct drm_device *dev)
 {
 	int ret;
 
 	DRM_DEBUG("\n");
-
-	/* something has changed in the lower levels of hell - deal with it
-	   here */
-
-	/* two modes : a) 1 fb to rule all crtcs.
-	               b) one fb per crtc.
-	   two actions 1) new connected device
-	               2) device removed.
-	   case a/1 : if the fb surface isn't big enough - resize the surface fb.
-	              if the fb size isn't big enough - resize fb into surface.
-		      if everything big enough configure the new crtc/etc.
-	   case a/2 : undo the configuration
-	              possibly resize down the fb to fit the new configuration.
-           case b/1 : see if it is on a new crtc - setup a new fb and add it.
-	   case b/2 : teardown the new fb.
-	*/
-
-	/* mode a first */
-	/* search for an fb */
-	if (i915_fbpercrtc == 1) {
-		ret = intelfb_multi_fb_probe(dev);
-	} else {
-		ret = intelfb_single_fb_probe(dev);
-	}
-
-	register_sysrq_key('v', &sysrq_intelfb_restore_op);
-
+	ret = drm_fb_helper_single_fb_probe(dev, intelfb_create);
 	return ret;
 }
 EXPORT_SYMBOL(intelfb_probe);
@@ -940,13 +258,14 @@ int intelfb_remove(struct drm_device *dev, struct drm_framebuffer *fb)
 	info = fb->fbdev;
 
 	if (info) {
+		struct intelfb_par *par = info->par;
 		unregister_framebuffer(info);
 		iounmap(info->screen_base);
+		if (info->par)
+			drm_fb_helper_free(&par->helper);
 		framebuffer_release(info);
 	}
 
-	atomic_notifier_chain_unregister(&panic_notifier_list, &paniced);
-	memset(&kernelfb_mode, 0, sizeof(struct drm_mode_set));
 	return 0;
 }
 EXPORT_SYMBOL(intelfb_remove);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index a8fa1bb84cf7..af035605d147 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -158,9 +158,6 @@ static void radeon_crtc_destroy(struct drm_crtc *crtc)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 
-	if (radeon_crtc->mode_set.mode) {
-		drm_mode_destroy(crtc->dev, radeon_crtc->mode_set.mode);
-	}
 	drm_crtc_cleanup(crtc);
 	kfree(radeon_crtc);
 }
@@ -189,9 +186,11 @@ static void radeon_crtc_init(struct drm_device *dev, int index)
 	radeon_crtc->crtc_id = index;
 	rdev->mode_info.crtcs[index] = radeon_crtc;
 
+#if 0
 	radeon_crtc->mode_set.crtc = &radeon_crtc->base;
 	radeon_crtc->mode_set.connectors = (struct drm_connector **)(radeon_crtc + 1);
 	radeon_crtc->mode_set.num_connectors = 0;
+#endif
 
 	for (i = 0; i < 256; i++) {
 		radeon_crtc->lut_r[i] = i << 2;
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index ec383edf5f38..ebb58959f418 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -28,15 +28,7 @@
      */
 
 #include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/tty.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
 #include <linux/fb.h>
-#include <linux/init.h>
 
 #include "drmP.h"
 #include "drm.h"
@@ -45,375 +37,86 @@
 #include "radeon_drm.h"
 #include "radeon.h"
 
+#include "drm_fb_helper.h"
+
 struct radeon_fb_device {
-	struct radeon_device		*rdev;
-	struct drm_display_mode		*mode;
+	struct drm_fb_helper helper;
 	struct radeon_framebuffer	*rfb;
-	int				crtc_count;
-	/* crtc currently bound to this */
-	uint32_t			crtc_ids[2];
+	struct radeon_device		*rdev;
 };
 
-static int radeonfb_setcolreg(unsigned regno,
-			      unsigned red,
-			      unsigned green,
-			      unsigned blue,
-			      unsigned transp,
-			      struct fb_info *info)
-{
-	struct radeon_fb_device *rfbdev = info->par;
-	struct drm_device *dev = rfbdev->rdev->ddev;
-	struct drm_crtc *crtc;
-	int i;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-		struct drm_mode_set *modeset = &radeon_crtc->mode_set;
-		struct drm_framebuffer *fb = modeset->fb;
-
-		for (i = 0; i < rfbdev->crtc_count; i++) {
-			if (crtc->base.id == rfbdev->crtc_ids[i]) {
-				break;
-			}
-		}
-		if (i == rfbdev->crtc_count) {
-			continue;
-		}
-		if (regno > 255) {
-			return 1;
-		}
-		if (fb->depth == 8) {
-			radeon_crtc_fb_gamma_set(crtc, red, green, blue, regno);
-			return 0;
-		}
-
-		if (regno < 16) {
-			switch (fb->depth) {
-			case 15:
-				fb->pseudo_palette[regno] = ((red & 0xf800) >> 1) |
-					((green & 0xf800) >>  6) |
-					((blue & 0xf800) >> 11);
-				break;
-			case 16:
-				fb->pseudo_palette[regno] = (red & 0xf800) |
-					((green & 0xfc00) >>  5) |
-					((blue  & 0xf800) >> 11);
-				break;
-			case 24:
-			case 32:
-				fb->pseudo_palette[regno] =
-					(((red >> 8) & 0xff) << info->var.red.offset) |
-					(((green >> 8) & 0xff) << info->var.green.offset) |
-					(((blue >> 8) & 0xff) << info->var.blue.offset);
-				break;
-			}
-		}
-	}
-	return 0;
-}
-
-static int radeonfb_check_var(struct fb_var_screeninfo *var,
-			      struct fb_info *info)
+static int radeon_fb_check_var(struct fb_var_screeninfo *var,
+			       struct fb_info *info)
 {
-	struct radeon_fb_device *rfbdev = info->par;
-	struct radeon_framebuffer *rfb = rfbdev->rfb;
-	struct drm_framebuffer *fb = &rfb->base;
-	int depth;
-
-	if (var->pixclock == -1 || !var->pixclock) {
-		return -EINVAL;
-	}
-	/* Need to resize the fb object !!! */
-	if (var->xres > fb->width || var->yres > fb->height) {
-		DRM_ERROR("Requested width/height is greater than current fb "
-			   "object %dx%d > %dx%d\n", var->xres, var->yres,
-			   fb->width, fb->height);
-		DRM_ERROR("Need resizing code.\n");
-		return -EINVAL;
-	}
-
-	switch (var->bits_per_pixel) {
-	case 16:
-		depth = (var->green.length == 6) ? 16 : 15;
-		break;
-	case 32:
-		depth = (var->transp.length > 0) ? 32 : 24;
-		break;
-	default:
-		depth = var->bits_per_pixel;
-		break;
-	}
-
-	switch (depth) {
-	case 8:
-		var->red.offset = 0;
-		var->green.offset = 0;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-#ifdef __LITTLE_ENDIAN
-	case 15:
-		var->red.offset = 10;
-		var->green.offset = 5;
-		var->blue.offset = 0;
-		var->red.length = 5;
-		var->green.length = 5;
-		var->blue.length = 5;
-		var->transp.length = 1;
-		var->transp.offset = 15;
-		break;
-	case 16:
-		var->red.offset = 11;
-		var->green.offset = 5;
-		var->blue.offset = 0;
-		var->red.length = 5;
-		var->green.length = 6;
-		var->blue.length = 5;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 24:
-		var->red.offset = 16;
-		var->green.offset = 8;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 32:
-		var->red.offset = 16;
-		var->green.offset = 8;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 8;
-		var->transp.offset = 24;
-		break;
-#else
-	case 24:
-		var->red.offset = 8;
-		var->green.offset = 16;
-		var->blue.offset = 24;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 32:
-		var->red.offset = 8;
-		var->green.offset = 16;
-		var->blue.offset = 24;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 8;
-		var->transp.offset = 0;
-		break;
-#endif
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-
-/* this will let fbcon do the mode init */
-static int radeonfb_set_par(struct fb_info *info)
-{
-	struct radeon_fb_device *rfbdev = info->par;
-	struct drm_device *dev = rfbdev->rdev->ddev;
-	struct fb_var_screeninfo *var = &info->var;
-	struct drm_crtc *crtc;
 	int ret;
-	int i;
-
-	if (var->pixclock != -1) {
-		DRM_ERROR("PIXEL CLCOK SET\n");
-		return -EINVAL;
-	}
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-
-		for (i = 0; i < rfbdev->crtc_count; i++) {
-			if (crtc->base.id == rfbdev->crtc_ids[i]) {
-				break;
-			}
-		}
-		if (i == rfbdev->crtc_count) {
-			continue;
-		}
-		if (crtc->fb == radeon_crtc->mode_set.fb) {
-			mutex_lock(&dev->mode_config.mutex);
-			ret = crtc->funcs->set_config(&radeon_crtc->mode_set);
-			mutex_unlock(&dev->mode_config.mutex);
-			if (ret) {
-				return ret;
-			}
-		}
-	}
-	return 0;
-}
-
-static int radeonfb_pan_display(struct fb_var_screeninfo *var,
-				struct fb_info *info)
-{
-	struct radeon_fb_device *rfbdev = info->par;
-	struct drm_device *dev = rfbdev->rdev->ddev;
-	struct drm_mode_set *modeset;
-	struct drm_crtc *crtc;
-	struct radeon_crtc *radeon_crtc;
-	int ret = 0;
-	int i;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		for (i = 0; i < rfbdev->crtc_count; i++) {
-			if (crtc->base.id == rfbdev->crtc_ids[i]) {
-				break;
-			}
-		}
-
-		if (i == rfbdev->crtc_count) {
-			continue;
-		}
-
-		radeon_crtc = to_radeon_crtc(crtc);
-		modeset = &radeon_crtc->mode_set;
-
-		modeset->x = var->xoffset;
-		modeset->y = var->yoffset;
-
-		if (modeset->num_connectors) {
-			mutex_lock(&dev->mode_config.mutex);
-			ret = crtc->funcs->set_config(modeset);
-			mutex_unlock(&dev->mode_config.mutex);
-			if (!ret) {
-				info->var.xoffset = var->xoffset;
-				info->var.yoffset = var->yoffset;
-			}
+	ret = drm_fb_helper_check_var(var, info);
+	if (ret)
+		return ret;
+
+	/* big endian override for radeon endian workaround */
+#ifdef __BIG_ENDIAN
+	{
+		int depth;
+		switch (var->bits_per_pixel) {
+		case 16:
+			depth = (var->green.length == 6) ? 16 : 15;
+			break;
+		case 32:
+			depth = (var->transp.length > 0) ? 32 : 24;
+			break;
+		default:
+			depth = var->bits_per_pixel;
+			break;
 		}
-	}
-	return ret;
-}
-
-static void radeonfb_on(struct fb_info *info)
-{
-	struct radeon_fb_device *rfbdev = info->par;
-	struct drm_device *dev = rfbdev->rdev->ddev;
-	struct drm_crtc *crtc;
-	struct drm_encoder *encoder;
-	int i;
-
-	/*
-	 * For each CRTC in this fb, find all associated encoders
-	 * and turn them off, then turn off the CRTC.
-	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-
-		for (i = 0; i < rfbdev->crtc_count; i++) {
-			if (crtc->base.id == rfbdev->crtc_ids[i]) {
-				break;
-			}
-		}
-
-		mutex_lock(&dev->mode_config.mutex);
-		crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
-		mutex_unlock(&dev->mode_config.mutex);
-
-		/* Found a CRTC on this fb, now find encoders */
-		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-			if (encoder->crtc == crtc) {
-				struct drm_encoder_helper_funcs *encoder_funcs;
-
-				encoder_funcs = encoder->helper_private;
-				mutex_lock(&dev->mode_config.mutex);
-				encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
-				mutex_unlock(&dev->mode_config.mutex);
-			}
-		}
-	}
-}
-
-static void radeonfb_off(struct fb_info *info, int dpms_mode)
-{
-	struct radeon_fb_device *rfbdev = info->par;
-	struct drm_device *dev = rfbdev->rdev->ddev;
-	struct drm_crtc *crtc;
-	struct drm_encoder *encoder;
-	int i;
-
-	/*
-	 * For each CRTC in this fb, find all associated encoders
-	 * and turn them off, then turn off the CRTC.
-	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-
-		for (i = 0; i < rfbdev->crtc_count; i++) {
-			if (crtc->base.id == rfbdev->crtc_ids[i]) {
-				break;
-			}
-		}
-
-		/* Found a CRTC on this fb, now find encoders */
-		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-			if (encoder->crtc == crtc) {
-				struct drm_encoder_helper_funcs *encoder_funcs;
-
-				encoder_funcs = encoder->helper_private;
-				mutex_lock(&dev->mode_config.mutex);
-				encoder_funcs->dpms(encoder, dpms_mode);
-				mutex_unlock(&dev->mode_config.mutex);
-			}
-		}
-		if (dpms_mode == DRM_MODE_DPMS_OFF) {
-			mutex_lock(&dev->mode_config.mutex);
-			crtc_funcs->dpms(crtc, dpms_mode);
-			mutex_unlock(&dev->mode_config.mutex);
+		switch (depth) {
+		case 8:
+			var->red.offset = 0;
+			var->green.offset = 0;
+			var->blue.offset = 0;
+			var->red.length = 8;
+			var->green.length = 8;
+			var->blue.length = 8;
+			var->transp.length = 0;
+			var->transp.offset = 0;
+			break;
+		case 24:
+			var->red.offset = 8;
+			var->green.offset = 16;
+			var->blue.offset = 24;
+			var->red.length = 8;
+			var->green.length = 8;
+			var->blue.length = 8;
+			var->transp.length = 0;
+			var->transp.offset = 0;
+			break;
+		case 32:
+			var->red.offset = 8;
+			var->green.offset = 16;
+			var->blue.offset = 24;
+			var->red.length = 8;
+			var->green.length = 8;
+			var->blue.length = 8;
+			var->transp.length = 8;
+			var->transp.offset = 0;
+			break;
+		default:
+			return -EINVAL;
 		}
 	}
-}
-
-int radeonfb_blank(int blank, struct fb_info *info)
-{
-	switch (blank) {
-	case FB_BLANK_UNBLANK:
-		radeonfb_on(info);
-		break;
-	case FB_BLANK_NORMAL:
-		radeonfb_off(info, DRM_MODE_DPMS_STANDBY);
-		break;
-	case FB_BLANK_HSYNC_SUSPEND:
-		radeonfb_off(info, DRM_MODE_DPMS_STANDBY);
-		break;
-	case FB_BLANK_VSYNC_SUSPEND:
-		radeonfb_off(info, DRM_MODE_DPMS_SUSPEND);
-		break;
-	case FB_BLANK_POWERDOWN:
-		radeonfb_off(info, DRM_MODE_DPMS_OFF);
-		break;
-	}
+#endif
 	return 0;
 }
 
 static struct fb_ops radeonfb_ops = {
 	.owner = THIS_MODULE,
-	.fb_check_var = radeonfb_check_var,
-	.fb_set_par = radeonfb_set_par,
-	.fb_setcolreg = radeonfb_setcolreg,
+	.fb_check_var = radeon_fb_check_var,
+	.fb_set_par = drm_fb_helper_set_par,
+	.fb_setcolreg = drm_fb_helper_setcolreg,
 	.fb_fillrect = cfb_fillrect,
 	.fb_copyarea = cfb_copyarea,
 	.fb_imageblit = cfb_imageblit,
-	.fb_pan_display = radeonfb_pan_display,
-	.fb_blank = radeonfb_blank,
+	.fb_pan_display = drm_fb_helper_pan_display,
+	.fb_blank = drm_fb_helper_blank,
 };
 
 /**
@@ -456,21 +159,6 @@ int radeonfb_resize(struct drm_device *dev, struct drm_crtc *crtc)
 }
 EXPORT_SYMBOL(radeonfb_resize);
 
-static struct drm_mode_set panic_mode;
-
-int radeonfb_panic(struct notifier_block *n, unsigned long ununsed,
-		  void *panic_str)
-{
-	DRM_ERROR("panic occurred, switching back to text console\n");
-	drm_crtc_helper_set_config(&panic_mode);
-	return 0;
-}
-EXPORT_SYMBOL(radeonfb_panic);
-
-static struct notifier_block paniced = {
-	.notifier_call = radeonfb_panic,
-};
-
 static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp, bool tiled)
 {
 	int aligned = width;
@@ -495,11 +183,16 @@ static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp, bo
 	return aligned;
 }
 
-int radeonfb_create(struct radeon_device *rdev,
+static struct drm_fb_helper_funcs radeon_fb_helper_funcs = {
+	.gamma_set = radeon_crtc_fb_gamma_set,
+};
+
+int radeonfb_create(struct drm_device *dev,
 		    uint32_t fb_width, uint32_t fb_height,
 		    uint32_t surface_width, uint32_t surface_height,
-		    struct radeon_framebuffer **rfb_p)
+		    struct drm_framebuffer **fb_p)
 {
+	struct radeon_device *rdev = dev->dev_private;
 	struct fb_info *info;
 	struct radeon_fb_device *rfbdev;
 	struct drm_framebuffer *fb = NULL;
@@ -554,8 +247,8 @@ int radeonfb_create(struct radeon_device *rdev,
 
 	list_add(&fb->filp_head, &rdev->ddev->mode_config.fb_kernel_list);
 
+	*fb_p = fb;
 	rfb = to_radeon_framebuffer(fb);
-	*rfb_p = rfb;
 	rdev->fbdev_rfb = rfb;
 	rdev->fbdev_robj = robj;
 
@@ -564,7 +257,14 @@ int radeonfb_create(struct radeon_device *rdev,
 		ret = -ENOMEM;
 		goto out_unref;
 	}
+
 	rfbdev = info->par;
+	rfbdev->helper.funcs = &radeon_fb_helper_funcs;
+	rfbdev->helper.dev = dev;
+	ret = drm_fb_helper_init_crtc_count(&rfbdev->helper, 2,
+					    RADEONFB_CONN_LIMIT);
+	if (ret)
+		goto out_unref;
 
 	if (fb_tiled)
 		radeon_object_check_tiling(robj, 0, 0);
@@ -577,33 +277,19 @@ int radeonfb_create(struct radeon_device *rdev,
 	memset_io(fbptr, 0, aligned_size);
 
 	strcpy(info->fix.id, "radeondrmfb");
-	info->fix.type = FB_TYPE_PACKED_PIXELS;
-	info->fix.visual = FB_VISUAL_TRUECOLOR;
-	info->fix.type_aux = 0;
-	info->fix.xpanstep = 1; /* doing it in hw */
-	info->fix.ypanstep = 1; /* doing it in hw */
-	info->fix.ywrapstep = 0;
-	info->fix.accel = FB_ACCEL_NONE;
-	info->fix.type_aux = 0;
+
+	drm_fb_helper_fill_fix(info, fb->pitch);
+
 	info->flags = FBINFO_DEFAULT;
 	info->fbops = &radeonfb_ops;
-	info->fix.line_length = fb->pitch;
+
 	tmp = fb_gpuaddr - rdev->mc.vram_location;
 	info->fix.smem_start = rdev->mc.aper_base + tmp;
 	info->fix.smem_len = size;
 	info->screen_base = fbptr;
 	info->screen_size = size;
-	info->pseudo_palette = fb->pseudo_palette;
-	info->var.xres_virtual = fb->width;
-	info->var.yres_virtual = fb->height;
-	info->var.bits_per_pixel = fb->bits_per_pixel;
-	info->var.xoffset = 0;
-	info->var.yoffset = 0;
-	info->var.activate = FB_ACTIVATE_NOW;
-	info->var.height = -1;
-	info->var.width = -1;
-	info->var.xres = fb_width;
-	info->var.yres = fb_height;
+
+	drm_fb_helper_fill_var(info, fb, fb_width, fb_height);
 
 	/* setup aperture base/size for vesafb takeover */
 	info->aperture_base = rdev->ddev->mode_config.fb_base;
@@ -626,6 +312,9 @@ int radeonfb_create(struct radeon_device *rdev,
 	DRM_INFO("fb depth is %d\n", fb->depth);
 	DRM_INFO("   pitch is %d\n", fb->pitch);
 
+#ifdef __BIG_ENDIAN
+	/* fill var sets defaults for this stuff - override
+	   on big endian */
 	switch (fb->depth) {
 	case 8:
 		info->var.red.offset = 0;
@@ -637,47 +326,6 @@ int radeonfb_create(struct radeon_device *rdev,
 		info->var.transp.offset = 0;
 		info->var.transp.length = 0;
 		break;
-#ifdef __LITTLE_ENDIAN
-	case 15:
-		info->var.red.offset = 10;
-		info->var.green.offset = 5;
-		info->var.blue.offset = 0;
-		info->var.red.length = 5;
-		info->var.green.length = 5;
-		info->var.blue.length = 5;
-		info->var.transp.offset = 15;
-		info->var.transp.length = 1;
-		break;
-	case 16:
-		info->var.red.offset = 11;
-		info->var.green.offset = 5;
-		info->var.blue.offset = 0;
-		info->var.red.length = 5;
-		info->var.green.length = 6;
-		info->var.blue.length = 5;
-		info->var.transp.offset = 0;
-		break;
-	case 24:
-		info->var.red.offset = 16;
-		info->var.green.offset = 8;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 32:
-		info->var.red.offset = 16;
-		info->var.green.offset = 8;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 24;
-		info->var.transp.length = 8;
-		break;
-#else
 	case 24:
 		info->var.red.offset = 8;
 		info->var.green.offset = 16;
@@ -699,9 +347,9 @@ int radeonfb_create(struct radeon_device *rdev,
 		info->var.transp.length = 8;
 		break;
 	default:
-#endif
 		break;
 	}
+#endif
 
 	fb->fbdev = info;
 	rfbdev->rfb = rfb;
@@ -726,145 +374,10 @@ out:
 	return ret;
 }
 
-static int radeonfb_single_fb_probe(struct radeon_device *rdev)
-{
-	struct drm_crtc *crtc;
-	struct drm_connector *connector;
-	unsigned int fb_width = (unsigned)-1, fb_height = (unsigned)-1;
-	unsigned int surface_width = 0, surface_height = 0;
-	int new_fb = 0;
-	int crtc_count = 0;
-	int ret, i, conn_count = 0;
-	struct radeon_framebuffer *rfb;
-	struct fb_info *info;
-	struct radeon_fb_device *rfbdev;
-	struct drm_mode_set *modeset = NULL;
-
-	/* first up get a count of crtcs now in use and new min/maxes width/heights */
-	list_for_each_entry(crtc, &rdev->ddev->mode_config.crtc_list, head) {
-		if (drm_helper_crtc_in_use(crtc)) {
-			if (crtc->desired_mode) {
-				if (crtc->desired_mode->hdisplay < fb_width)
-					fb_width = crtc->desired_mode->hdisplay;
-
-				if (crtc->desired_mode->vdisplay < fb_height)
-					fb_height = crtc->desired_mode->vdisplay;
-
-				if (crtc->desired_mode->hdisplay > surface_width)
-					surface_width = crtc->desired_mode->hdisplay;
-
-				if (crtc->desired_mode->vdisplay > surface_height)
-					surface_height = crtc->desired_mode->vdisplay;
-			}
-			crtc_count++;
-		}
-	}
-
-	if (crtc_count == 0 || fb_width == -1 || fb_height == -1) {
-		/* hmm everyone went away - assume VGA cable just fell out
-		   and will come back later. */
-		return 0;
-	}
-
-	/* do we have an fb already? */
-	if (list_empty(&rdev->ddev->mode_config.fb_kernel_list)) {
-		/* create an fb if we don't have one */
-		ret = radeonfb_create(rdev, fb_width, fb_height, surface_width, surface_height, &rfb);
-		if (ret) {
-			return -EINVAL;
-		}
-		new_fb = 1;
-	} else {
-		struct drm_framebuffer *fb;
-		fb = list_first_entry(&rdev->ddev->mode_config.fb_kernel_list, struct drm_framebuffer, filp_head);
-		rfb = to_radeon_framebuffer(fb);
-
-		/* if someone hotplugs something bigger than we have already allocated, we are pwned.
-		   As really we can't resize an fbdev that is in the wild currently due to fbdev
-		   not really being designed for the lower layers moving stuff around under it.
-		   - so in the grand style of things - punt. */
-		if ((fb->width < surface_width) || (fb->height < surface_height)) {
-			DRM_ERROR("Framebuffer not large enough to scale console onto.\n");
-			return -EINVAL;
-		}
-	}
-
-	info = rfb->base.fbdev;
-	rdev->fbdev_info = info;
-	rfbdev = info->par;
-
-	crtc_count = 0;
-	/* okay we need to setup new connector sets in the crtcs */
-	list_for_each_entry(crtc, &rdev->ddev->mode_config.crtc_list, head) {
-		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-		modeset = &radeon_crtc->mode_set;
-		modeset->fb = &rfb->base;
-		conn_count = 0;
-		list_for_each_entry(connector, &rdev->ddev->mode_config.connector_list, head) {
-			if (connector->encoder)
-				if (connector->encoder->crtc == modeset->crtc) {
-					modeset->connectors[conn_count] = connector;
-					conn_count++;
-					if (conn_count > RADEONFB_CONN_LIMIT)
-						BUG();
-				}
-		}
-
-		for (i = conn_count; i < RADEONFB_CONN_LIMIT; i++)
-			modeset->connectors[i] = NULL;
-
-
-		rfbdev->crtc_ids[crtc_count++] = crtc->base.id;
-
-		modeset->num_connectors = conn_count;
-		if (modeset->crtc->desired_mode) {
-			if (modeset->mode) {
-				drm_mode_destroy(rdev->ddev, modeset->mode);
-			}
-			modeset->mode = drm_mode_duplicate(rdev->ddev,
-							   modeset->crtc->desired_mode);
-		}
-	}
-	rfbdev->crtc_count = crtc_count;
-
-	if (new_fb) {
-		info->var.pixclock = -1;
-		if (register_framebuffer(info) < 0)
-			return -EINVAL;
-	} else {
-		radeonfb_set_par(info);
-	}
-	printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node,
-	       info->fix.id);
-
-	/* Switch back to kernel console on panic */
-	panic_mode = *modeset;
-	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
-	printk(KERN_INFO "registered panic notifier\n");
-
-	return 0;
-}
-
 int radeonfb_probe(struct drm_device *dev)
 {
 	int ret;
-
-	/* something has changed in the lower levels of hell - deal with it
-	   here */
-
-	/* two modes : a) 1 fb to rule all crtcs.
-	               b) one fb per crtc.
-	   two actions 1) new connected device
-	               2) device removed.
-	   case a/1 : if the fb surface isn't big enough - resize the surface fb.
-	              if the fb size isn't big enough - resize fb into surface.
-		      if everything big enough configure the new crtc/etc.
-	   case a/2 : undo the configuration
-	              possibly resize down the fb to fit the new configuration.
-           case b/1 : see if it is on a new crtc - setup a new fb and add it.
-	   case b/2 : teardown the new fb.
-	*/
-	ret = radeonfb_single_fb_probe(dev->dev_private);
+	ret = drm_fb_helper_single_fb_probe(dev, &radeonfb_create);
 	return ret;
 }
 EXPORT_SYMBOL(radeonfb_probe);
@@ -880,16 +393,17 @@ int radeonfb_remove(struct drm_device *dev, struct drm_framebuffer *fb)
 	}
 	info = fb->fbdev;
 	if (info) {
+		struct radeon_fb_device *rfbdev = info->par;
 		robj = rfb->obj->driver_private;
 		unregister_framebuffer(info);
 		radeon_object_kunmap(robj);
 		radeon_object_unpin(robj);
+		drm_fb_helper_free(&rfbdev->helper);
 		framebuffer_release(info);
 	}
 
 	printk(KERN_INFO "unregistered panic notifier\n");
-	atomic_notifier_chain_unregister(&panic_notifier_list, &paniced);
-	memset(&panic_mode, 0, sizeof(struct drm_mode_set));
+
 	return 0;
 }
 EXPORT_SYMBOL(radeonfb_remove);
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 3b09a1f2d8f9..20e9509a7130 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -195,8 +195,6 @@ struct radeon_crtc {
 	bool enabled;
 	bool can_tile;
 	uint32_t crtc_offset;
-	struct radeon_framebuffer *fbdev_fb;
-	struct drm_mode_set mode_set;
 	struct drm_gem_object *cursor_bo;
 	uint64_t cursor_addr;
 	int cursor_width;
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index db92a83f8ca9..b0427a70fcbd 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -259,6 +259,8 @@ struct drm_framebuffer {
 	void *fbdev;
 	u32 pseudo_palette[17];
 	struct list_head filp_head;
+	/* if you are using the helper */
+	void *helper_private;
 };
 
 struct drm_property_blob {
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
new file mode 100644
index 000000000000..88fffbdfa26f
--- /dev/null
+++ b/include/drm/drm_fb_helper.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2006-2009 Red Hat Inc.
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ *
+ * DRM framebuffer helper functions
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ *
+ * Authors:
+ *      Dave Airlie <airlied@linux.ie>
+ *      Jesse Barnes <jesse.barnes@intel.com>
+ */
+#ifndef DRM_FB_HELPER_H
+#define DRM_FB_HELPER_H
+
+struct drm_fb_helper_crtc {
+	uint32_t crtc_id;
+	struct drm_mode_set mode_set;
+};
+
+struct drm_fb_helper_funcs {
+	void (*gamma_set)(struct drm_crtc *crtc, u16 red, u16 green,
+			  u16 blue, int regno);
+};
+
+struct drm_fb_helper {
+	struct drm_framebuffer *fb;
+	struct drm_device *dev;
+	struct drm_display_mode *mode;
+	int crtc_count;
+	struct drm_fb_helper_crtc *crtc_info;
+	struct drm_fb_helper_funcs *funcs;
+	int conn_limit;
+	struct list_head kernel_fb_list;
+};
+
+int drm_fb_helper_single_fb_probe(struct drm_device *dev,
+				  int (*fb_create)(struct drm_device *dev,
+						   uint32_t fb_width,
+						   uint32_t fb_height,
+						   uint32_t surface_width,
+						   uint32_t surface_height,
+						   struct drm_framebuffer **fb_ptr));
+int drm_fb_helper_init_crtc_count(struct drm_fb_helper *helper, int crtc_count,
+				  int max_conn);
+void drm_fb_helper_free(struct drm_fb_helper *helper);
+int drm_fb_helper_blank(int blank, struct fb_info *info);
+int drm_fb_helper_pan_display(struct fb_var_screeninfo *var,
+			      struct fb_info *info);
+int drm_fb_helper_set_par(struct fb_info *info);
+int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
+			    struct fb_info *info);
+int drm_fb_helper_setcolreg(unsigned regno,
+			    unsigned red,
+			    unsigned green,
+			    unsigned blue,
+			    unsigned transp,
+			    struct fb_info *info);
+
+void drm_fb_helper_restore(void);
+void drm_fb_helper_fill_var(struct fb_info *info, struct drm_framebuffer *fb,
+			    uint32_t fb_width, uint32_t fb_height);
+void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch);
+
+#endif
-- 
cgit v1.2.3


From 98a56ab382079f777e261e14512cbd4fb2107af4 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 17 Sep 2009 08:48:28 -0400
Subject: ext4: Fix spelling typo in the trace format for
 trace_ext4_da_writepages()

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/trace/events/ext4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 8d433c4e3709..15051d2d1219 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -243,7 +243,7 @@ TRACE_EVENT(ext4_da_writepages,
 		__entry->range_cyclic	= wbc->range_cyclic;
 	),
 
-	TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d",
+	TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d",
 		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write,
 		  __entry->pages_skipped, __entry->range_start,
 		  __entry->range_end, __entry->nonblocking,
-- 
cgit v1.2.3


From b3a3ca8ca0c3c29abc5b2bfe94bb14f3f4590df9 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 31 Aug 2009 23:13:11 -0400
Subject: ext4: Add new tracepoint: trace_ext4_da_write_pages()

Add a new tracepoint which shows the pages that will be written using
write_cache_pages() by ext4_da_writepages().

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4.h              | 15 +++++++++++++++
 fs/ext4/inode.c             | 13 +------------
 include/trace/events/ext4.h | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 41a76e163b99..81014f4ed22d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -113,6 +113,21 @@ struct ext4_allocation_request {
 	unsigned int flags;
 };
 
+/*
+ * For delayed allocation tracking
+ */
+struct mpage_da_data {
+	struct inode *inode;
+	sector_t b_blocknr;		/* start block number of extent */
+	size_t b_size;			/* size of extent */
+	unsigned long b_state;		/* state of the extent */
+	unsigned long first_page, next_page;	/* extent of pages */
+	struct writeback_control *wbc;
+	int io_done;
+	int pages_written;
+	int retval;
+};
+
 /*
  * Special inodes numbers
  */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ff659e757578..17802a96af9f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1875,18 +1875,6 @@ static void ext4_da_page_release_reservation(struct page *page,
  * Delayed allocation stuff
  */
 
-struct mpage_da_data {
-	struct inode *inode;
-	sector_t b_blocknr;		/* start block number of extent */
-	size_t b_size;			/* size of extent */
-	unsigned long b_state;		/* state of the extent */
-	unsigned long first_page, next_page;	/* extent of pages */
-	struct writeback_control *wbc;
-	int io_done;
-	int pages_written;
-	int retval;
-};
-
 /*
  * mpage_da_submit_io - walks through extent of pages and try to write
  * them with writepage() call back
@@ -2863,6 +2851,7 @@ retry:
 			mpd.io_done = 1;
 			ret = MPAGE_DA_EXTENT_TAIL;
 		}
+		trace_ext4_da_write_pages(inode, &mpd);
 		wbc->nr_to_write -= mpd.pages_written;
 
 		ext4_journal_stop(handle);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 15051d2d1219..dd43399288ea 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -251,6 +251,40 @@ TRACE_EVENT(ext4_da_writepages,
 		  __entry->range_cyclic)
 );
 
+TRACE_EVENT(ext4_da_write_pages,
+	TP_PROTO(struct inode *inode, struct mpage_da_data *mpd),
+
+	TP_ARGS(inode, mpd),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u64,	b_blocknr		)
+		__field(	__u32,	b_size			)
+		__field(	__u32,	b_state			)
+		__field(	unsigned long,	first_page	)
+		__field(	int,	io_done			)
+		__field(	int,	pages_written		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->b_blocknr	= mpd->b_blocknr;
+		__entry->b_size		= mpd->b_size;
+		__entry->b_state	= mpd->b_state;
+		__entry->first_page	= mpd->first_page;
+		__entry->io_done	= mpd->io_done;
+		__entry->pages_written	= mpd->pages_written;
+	),
+
+	TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino,
+		  __entry->b_blocknr, __entry->b_size,
+		  __entry->b_state, __entry->first_page,
+		  __entry->io_done, __entry->pages_written)
+);
+
 TRACE_EVENT(ext4_da_writepages_result,
 	TP_PROTO(struct inode *inode, struct writeback_control *wbc,
 			int ret, int pages_written),
-- 
cgit v1.2.3


From 8aa84ad8d6c740a04386f599694609ee4998e82e Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Fri, 24 Jul 2009 15:25:05 +0200
Subject: [CPUFREQ] Introduce global, not per core:
 /sys/devices/system/cpu/cpufreq

Currently everything in the cpufreq layer is per core based.
This does not reflect reality, for example ondemand on conservative
governors have global sysfs variables.

Introduce a global cpufreq directory and add the kobject to the governor
struct, so that governors can easily access it.
The directory is initialized in the cpufreq_core_init initcall and thus will
always be created if cpufreq is compiled in, even if no cpufreq driver is
active later.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/cpufreq/cpufreq.c |  9 ++++++++-
 include/linux/cpufreq.h   | 10 ++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index bbd5c2164ab6..4da28444b235 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -686,6 +686,9 @@ static struct attribute *default_attrs[] = {
 	NULL
 };
 
+struct kobject *cpufreq_global_kobject;
+EXPORT_SYMBOL(cpufreq_global_kobject);
+
 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
 #define to_attr(a) container_of(a, struct freq_attr, attr)
 
@@ -1935,7 +1938,11 @@ static int __init cpufreq_core_init(void)
 		per_cpu(policy_cpu, cpu) = -1;
 		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
 	}
+
+	cpufreq_global_kobject = kobject_create_and_add("cpufreq",
+						&cpu_sysdev_class.kset.kobj);
+	BUG_ON(!cpufreq_global_kobject);
+
 	return 0;
 }
-
 core_initcall(cpufreq_core_init);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 161042746afc..44717eb47639 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -65,6 +65,9 @@ static inline int cpufreq_unregister_notifier(struct notifier_block *nb,
 
 struct cpufreq_governor;
 
+/* /sys/devices/system/cpu/cpufreq: entry point for global variables */
+extern struct kobject *cpufreq_global_kobject;
+
 #define CPUFREQ_ETERNAL			(-1)
 struct cpufreq_cpuinfo {
 	unsigned int		max_freq;
@@ -274,6 +277,13 @@ struct freq_attr {
 	ssize_t (*store)(struct cpufreq_policy *, const char *, size_t count);
 };
 
+struct global_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct kobject *kobj,
+			struct attribute *attr, char *buf);
+	ssize_t (*store)(struct kobject *a, struct attribute *b,
+			 const char *c, size_t count);
+};
 
 /*********************************************************************
  *                        CPUFREQ 2.6. INTERFACE                     *
-- 
cgit v1.2.3


From fa8a123855e20068204982596b8fafceb1a67f0b Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Wed, 26 Aug 2009 13:13:37 +1000
Subject: drm/mm: add ability to dump mm lists via debugfs

This adds code to the drm_mm to talk to debugfs, and adds
support to radeon to add the VRAM and GTT mm lists to debugfs.

I tested with spinlock debugging and it doesn't give out.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_mm.c            | 21 ++++++++++++++
 drivers/gpu/drm/radeon/radeon_ttm.c | 56 +++++++++++++++++++++++++++++++++++++
 include/drm/drm_mm.h                |  4 +++
 3 files changed, 81 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 3e47869d6dae..c861d80fd779 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -44,6 +44,7 @@
 #include "drmP.h"
 #include "drm_mm.h"
 #include <linux/slab.h>
+#include <linux/seq_file.h>
 
 #define MM_UNUSED_TARGET 4
 
@@ -370,3 +371,23 @@ void drm_mm_takedown(struct drm_mm * mm)
 	BUG_ON(mm->num_unused != 0);
 }
 EXPORT_SYMBOL(drm_mm_takedown);
+
+#if defined(CONFIG_DEBUG_FS)
+int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm)
+{
+	struct drm_mm_node *entry;
+	int total_used = 0, total_free = 0, total = 0;
+
+	list_for_each_entry(entry, &mm->ml_entry, ml_entry) {
+		seq_printf(m, "0x%08lx-0x%08lx: 0x%08lx: %s\n", entry->start, entry->start + entry->size, entry->size, entry->free ? "free" : "used");
+		total += entry->size;
+		if (entry->free)
+			total_free += entry->size;
+		else
+			total_used += entry->size;
+	}
+	seq_printf(m, "total: %d, used %d free %d\n", total, total_free, total_used);
+	return 0;
+}
+EXPORT_SYMBOL(drm_mm_dump_table);
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 0a85e7b5d592..dc7a44274ea8 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -35,11 +35,14 @@
 #include <ttm/ttm_module.h>
 #include <drm/drmP.h>
 #include <drm/radeon_drm.h>
+#include <linux/seq_file.h>
 #include "radeon_reg.h"
 #include "radeon.h"
 
 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
 
+static int radeon_ttm_debugfs_init(struct radeon_device *rdev);
+
 static struct radeon_device *radeon_get_rdev(struct ttm_bo_device *bdev)
 {
 	struct radeon_mman *mman;
@@ -504,6 +507,12 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) {
 		rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping;
 	}
+
+	r = radeon_ttm_debugfs_init(rdev);
+	if (r) {
+		DRM_ERROR("Failed to init debugfs\n");
+		return r;
+	}
 	return 0;
 }
 
@@ -678,3 +687,50 @@ struct ttm_backend *radeon_ttm_backend_create(struct radeon_device *rdev)
 	gtt->bound = false;
 	return &gtt->backend;
 }
+
+#define RADEON_DEBUGFS_MEM_TYPES 2
+
+static struct drm_info_list radeon_mem_types_list[RADEON_DEBUGFS_MEM_TYPES];
+static char radeon_mem_types_names[RADEON_DEBUGFS_MEM_TYPES][32];
+
+#if defined(CONFIG_DEBUG_FS)
+static int radeon_mm_dump_table(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_mm *mm = (struct drm_mm *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int ret;
+	struct ttm_bo_global *glob = rdev->mman.bdev.glob;
+
+	spin_lock(&glob->lru_lock);
+	ret = drm_mm_dump_table(m, mm);
+	spin_unlock(&glob->lru_lock);
+	return ret;
+}
+#endif
+
+static int radeon_ttm_debugfs_init(struct radeon_device *rdev)
+{
+	unsigned i;
+
+#if defined(CONFIG_DEBUG_FS)
+	for (i = 0; i < RADEON_DEBUGFS_MEM_TYPES; i++) {
+		if (i == 0)
+			sprintf(radeon_mem_types_names[i], "radeon_vram_mm");
+		else
+			sprintf(radeon_mem_types_names[i], "radeon_gtt_mm");
+		radeon_mem_types_list[i].name = radeon_mem_types_names[i];
+		radeon_mem_types_list[i].show = &radeon_mm_dump_table;
+		radeon_mem_types_list[i].driver_features = 0;
+		if (i == 0)
+			radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_VRAM].manager;
+		else
+			radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_TT].manager;
+
+	}
+	return radeon_debugfs_add_files(rdev, radeon_mem_types_list, RADEON_DEBUGFS_MEM_TYPES);
+
+#endif
+	return 0;
+}
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index f8332073d277..bc5a87e8aeea 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -96,4 +96,8 @@ static inline struct drm_mm *drm_get_mm(struct drm_mm_node *block)
 	return block->mm;
 }
 
+#ifdef CONFIG_DEBUG_FS
+int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm);
+#endif
+
 #endif
-- 
cgit v1.2.3


From a3a0544b2c84e1d7a2022b558ecf66d8c6a8dd93 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 31 Aug 2009 15:16:30 +1000
Subject: drm/kms: add explicit encoder disable function and detach harder.

For shared tv-out and VGA encoders, we really need to know if
the encoder is just being switched off temporarily in blanking
or if we are really disabling it hard.

Also we need to try harder to disconnect encoders from unused
connectors so we can share more efficently.

(shared encoders stuff is coming in radeon tv-out support)

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc_helper.c | 24 ++++++++++++++++++++----
 include/drm/drm_crtc_helper.h     |  2 ++
 2 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 205349ea1075..eea5e6c4099c 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -260,13 +260,27 @@ EXPORT_SYMBOL(drm_helper_crtc_in_use);
 void drm_helper_disable_unused_functions(struct drm_device *dev)
 {
 	struct drm_encoder *encoder;
+	struct drm_connector *connector;
 	struct drm_encoder_helper_funcs *encoder_funcs;
 	struct drm_crtc *crtc;
 
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (!connector->encoder)
+			continue;
+		if (connector->status == connector_status_disconnected)
+			connector->encoder = NULL;
+	}
+
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		encoder_funcs = encoder->helper_private;
-		if (!drm_helper_encoder_in_use(encoder))
-			(*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF);
+		if (!drm_helper_encoder_in_use(encoder)) {
+			if (encoder_funcs->disable)
+				(*encoder_funcs->disable)(encoder);
+			else
+				(*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF);
+		}
+		/* disconnector encoder from any connector */
+		encoder->crtc = NULL;
 	}
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -411,7 +425,7 @@ static int drm_pick_crtcs(struct drm_device *dev,
 	c = 0;
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 
-		if ((connector->encoder->possible_crtcs & (1 << c)) == 0) {
+		if ((encoder->possible_crtcs & (1 << c)) == 0) {
 			c++;
 			continue;
 		}
@@ -496,8 +510,10 @@ static void drm_setup_crtcs(struct drm_device *dev)
 				  mode->name, crtc->base.id);
 			crtc->desired_mode = mode;
 			connector->encoder->crtc = crtc;
-		} else
+		} else {
 			connector->encoder->crtc = NULL;
+			connector->encoder = NULL;
+		}
 		i++;
 	}
 
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index e44a4f87303c..4c8dacaf4f58 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -79,6 +79,8 @@ struct drm_encoder_helper_funcs {
 	/* detect for DAC style encoders */
 	enum drm_connector_status (*detect)(struct drm_encoder *encoder,
 					    struct drm_connector *connector);
+	/* disable encoder when not in use - more explicit than dpms off */
+	void (*disable)(struct drm_encoder *encoder);
 };
 
 struct drm_connector_helper_funcs {
-- 
cgit v1.2.3


From 6d703a81ad5fdd102334751ddacb053ecc6ff046 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 1 Sep 2009 17:52:57 -0700
Subject: ide: convert to ->proc_fops

->read_proc, ->write_proc are going away, ->proc_fops should be used instead.

The only tricky place is IDENTIFY handling: if for some reason
taskfile_lib_get_identify() fails, buffer _is_ changed and at least
first byte is overwritten. Emulate old behaviour with returning
that first byte to userspace and reporting length=1 despite overall -E.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-cd.c          |  28 +++-
 drivers/ide/ide-disk_proc.c   | 129 +++++++++++------
 drivers/ide/ide-floppy_proc.c |  30 ++--
 drivers/ide/ide-proc.c        | 330 +++++++++++++++++++++++++++---------------
 drivers/ide/ide-tape.c        |  31 ++--
 include/linux/ide.h           |  24 +--
 6 files changed, 365 insertions(+), 207 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index ad0ab0c0a493..b79ca419d8d9 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -30,6 +30,7 @@
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/timer.h>
+#include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/errno.h>
@@ -1389,19 +1390,30 @@ static sector_t ide_cdrom_capacity(ide_drive_t *drive)
 	return capacity * sectors_per_frame;
 }
 
-static int proc_idecd_read_capacity(char *page, char **start, off_t off,
-					int count, int *eof, void *data)
+static int idecd_capacity_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t *drive = data;
-	int len;
+	ide_drive_t *drive = m->private;
 
-	len = sprintf(page, "%llu\n", (long long)ide_cdrom_capacity(drive));
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(m, "%llu\n", (long long)ide_cdrom_capacity(drive));
+	return 0;
+}
+
+static int idecd_capacity_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, idecd_capacity_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations idecd_capacity_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idecd_capacity_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static ide_proc_entry_t idecd_proc[] = {
-	{ "capacity", S_IFREG|S_IRUGO, proc_idecd_read_capacity, NULL },
-	{ NULL, 0, NULL, NULL }
+	{ "capacity", S_IFREG|S_IRUGO, &idecd_capacity_proc_fops },
+	{}
 };
 
 static ide_proc_entry_t *ide_cd_proc_entries(ide_drive_t *drive)
diff --git a/drivers/ide/ide-disk_proc.c b/drivers/ide/ide-disk_proc.c
index 19f263bf0a9e..60b0590ccc9c 100644
--- a/drivers/ide/ide-disk_proc.c
+++ b/drivers/ide/ide-disk_proc.c
@@ -1,5 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/ide.h>
+#include <linux/seq_file.h>
 
 #include "ide-disk.h"
 
@@ -37,77 +38,117 @@ static int get_smart_data(ide_drive_t *drive, u8 *buf, u8 sub_cmd)
 	return ide_raw_taskfile(drive, &cmd, buf, 1);
 }
 
-static int proc_idedisk_read_cache
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int idedisk_cache_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
-	char		*out = page;
-	int		len;
+	ide_drive_t	*drive = (ide_drive_t *) m->private;
 
 	if (drive->dev_flags & IDE_DFLAG_ID_READ)
-		len = sprintf(out, "%i\n", drive->id[ATA_ID_BUF_SIZE] / 2);
+		seq_printf(m, "%i\n", drive->id[ATA_ID_BUF_SIZE] / 2);
 	else
-		len = sprintf(out, "(none)\n");
+		seq_printf(m, "(none)\n");
+	return 0;
+}
 
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+static int idedisk_cache_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, idedisk_cache_proc_show, PDE(inode)->data);
 }
 
-static int proc_idedisk_read_capacity
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static const struct file_operations idedisk_cache_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idedisk_cache_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int idedisk_capacity_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t*drive = (ide_drive_t *)data;
-	int len;
+	ide_drive_t*drive = (ide_drive_t *)m->private;
 
-	len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
+	seq_printf(m, "%llu\n", (long long)ide_gd_capacity(drive));
+	return 0;
+}
 
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+static int idedisk_capacity_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, idedisk_capacity_proc_show, PDE(inode)->data);
 }
 
-static int proc_idedisk_read_smart(char *page, char **start, off_t off,
-				   int count, int *eof, void *data, u8 sub_cmd)
+static const struct file_operations idedisk_capacity_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idedisk_capacity_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __idedisk_proc_show(struct seq_file *m, ide_drive_t *drive, u8 sub_cmd)
 {
-	ide_drive_t	*drive = (ide_drive_t *)data;
-	int		len = 0, i = 0;
+	u8 *buf;
+
+	buf = kmalloc(SECTOR_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
 
 	(void)smart_enable(drive);
 
-	if (get_smart_data(drive, page, sub_cmd) == 0) {
-		unsigned short *val = (unsigned short *) page;
-		char *out = (char *)val + SECTOR_SIZE;
-
-		page = out;
-		do {
-			out += sprintf(out, "%04x%c", le16_to_cpu(*val),
-				       (++i & 7) ? ' ' : '\n');
-			val += 1;
-		} while (i < SECTOR_SIZE / 2);
-		len = out - page;
+	if (get_smart_data(drive, buf, sub_cmd) == 0) {
+		__le16 *val = (__le16 *)buf;
+		int i;
+
+		for (i = 0; i < SECTOR_SIZE / 2; i++) {
+			seq_printf(m, "%04x%c", le16_to_cpu(val[i]),
+					(i % 8) == 7 ? '\n' : ' ');
+		}
 	}
+	kfree(buf);
+	return 0;
+}
 
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+static int idedisk_sv_proc_show(struct seq_file *m, void *v)
+{
+	return __idedisk_proc_show(m, m->private, ATA_SMART_READ_VALUES);
 }
 
-static int proc_idedisk_read_sv
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int idedisk_sv_proc_open(struct inode *inode, struct file *file)
 {
-	return proc_idedisk_read_smart(page, start, off, count, eof, data,
-				       ATA_SMART_READ_VALUES);
+	return single_open(file, idedisk_sv_proc_show, PDE(inode)->data);
 }
 
-static int proc_idedisk_read_st
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static const struct file_operations idedisk_sv_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idedisk_sv_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int idedisk_st_proc_show(struct seq_file *m, void *v)
 {
-	return proc_idedisk_read_smart(page, start, off, count, eof, data,
-				       ATA_SMART_READ_THRESHOLDS);
+	return __idedisk_proc_show(m, m->private, ATA_SMART_READ_THRESHOLDS);
 }
 
+static int idedisk_st_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, idedisk_st_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations idedisk_st_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idedisk_st_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 ide_proc_entry_t ide_disk_proc[] = {
-	{ "cache",	  S_IFREG|S_IRUGO, proc_idedisk_read_cache,    NULL },
-	{ "capacity",	  S_IFREG|S_IRUGO, proc_idedisk_read_capacity, NULL },
-	{ "geometry",	  S_IFREG|S_IRUGO, proc_ide_read_geometry,     NULL },
-	{ "smart_values", S_IFREG|S_IRUSR, proc_idedisk_read_sv,       NULL },
-	{ "smart_thresholds", S_IFREG|S_IRUSR, proc_idedisk_read_st,   NULL },
-	{ NULL, 0, NULL, NULL }
+	{ "cache",	  S_IFREG|S_IRUGO, &idedisk_cache_proc_fops	},
+	{ "capacity",	  S_IFREG|S_IRUGO, &idedisk_capacity_proc_fops	},
+	{ "geometry",	  S_IFREG|S_IRUGO, &ide_geometry_proc_fops	},
+	{ "smart_values", S_IFREG|S_IRUSR, &idedisk_sv_proc_fops	},
+	{ "smart_thresholds", S_IFREG|S_IRUSR, &idedisk_st_proc_fops	},
+	{}
 };
 
 ide_devset_rw_field(bios_cyl, bios_cyl);
diff --git a/drivers/ide/ide-floppy_proc.c b/drivers/ide/ide-floppy_proc.c
index fcd4d8153df5..d711d9b883de 100644
--- a/drivers/ide/ide-floppy_proc.c
+++ b/drivers/ide/ide-floppy_proc.c
@@ -1,22 +1,34 @@
 #include <linux/kernel.h>
 #include <linux/ide.h>
+#include <linux/seq_file.h>
 
 #include "ide-floppy.h"
 
-static int proc_idefloppy_read_capacity(char *page, char **start, off_t off,
-		int count, int *eof, void *data)
+static int idefloppy_capacity_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t*drive = (ide_drive_t *)data;
-	int len;
+	ide_drive_t*drive = (ide_drive_t *)m->private;
 
-	len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(m, "%llu\n", (long long)ide_gd_capacity(drive));
+	return 0;
 }
 
+static int idefloppy_capacity_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, idefloppy_capacity_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations idefloppy_capacity_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idefloppy_capacity_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 ide_proc_entry_t ide_floppy_proc[] = {
-	{ "capacity",	S_IFREG|S_IRUGO, proc_idefloppy_read_capacity,	NULL },
-	{ "geometry",	S_IFREG|S_IRUGO, proc_ide_read_geometry,	NULL },
-	{ NULL, 0, NULL, NULL }
+	{ "capacity",	S_IFREG|S_IRUGO, &idefloppy_capacity_proc_fops	},
+	{ "geometry",	S_IFREG|S_IRUGO, &ide_geometry_proc_fops	},
+	{}
 };
 
 ide_devset_rw_field(bios_cyl, bios_cyl);
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 021de41655e6..28d09a5d8450 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -30,11 +30,9 @@
 
 static struct proc_dir_entry *proc_ide_root;
 
-static int proc_ide_read_imodel
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int ide_imodel_proc_show(struct seq_file *m, void *v)
 {
-	ide_hwif_t	*hwif = (ide_hwif_t *) data;
-	int		len;
+	ide_hwif_t	*hwif = (ide_hwif_t *) m->private;
 	const char	*name;
 
 	switch (hwif->chipset) {
@@ -53,63 +51,108 @@ static int proc_ide_read_imodel
 	case ide_acorn:		name = "acorn";		break;
 	default:		name = "(unknown)";	break;
 	}
-	len = sprintf(page, "%s\n", name);
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(m, "%s\n", name);
+	return 0;
 }
 
-static int proc_ide_read_mate
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int ide_imodel_proc_open(struct inode *inode, struct file *file)
 {
-	ide_hwif_t	*hwif = (ide_hwif_t *) data;
-	int		len;
+	return single_open(file, ide_imodel_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations ide_imodel_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_imodel_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int ide_mate_proc_show(struct seq_file *m, void *v)
+{
+	ide_hwif_t	*hwif = (ide_hwif_t *) m->private;
 
 	if (hwif && hwif->mate)
-		len = sprintf(page, "%s\n", hwif->mate->name);
+		seq_printf(m, "%s\n", hwif->mate->name);
 	else
-		len = sprintf(page, "(none)\n");
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+		seq_printf(m, "(none)\n");
+	return 0;
+}
+
+static int ide_mate_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_mate_proc_show, PDE(inode)->data);
 }
 
-static int proc_ide_read_channel
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static const struct file_operations ide_mate_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_mate_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int ide_channel_proc_show(struct seq_file *m, void *v)
 {
-	ide_hwif_t	*hwif = (ide_hwif_t *) data;
-	int		len;
+	ide_hwif_t	*hwif = (ide_hwif_t *) m->private;
 
-	page[0] = hwif->channel ? '1' : '0';
-	page[1] = '\n';
-	len = 2;
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(m, "%c\n", hwif->channel ? '1' : '0');
+	return 0;
 }
 
-static int proc_ide_read_identify
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int ide_channel_proc_open(struct inode *inode, struct file *file)
 {
-	ide_drive_t	*drive = (ide_drive_t *)data;
-	int		len = 0, i = 0;
-	int		err = 0;
+	return single_open(file, ide_channel_proc_show, PDE(inode)->data);
+}
 
-	len = sprintf(page, "\n");
+static const struct file_operations ide_channel_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_channel_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
-	if (drive) {
-		__le16 *val = (__le16 *)page;
+static int ide_identify_proc_show(struct seq_file *m, void *v)
+{
+	ide_drive_t *drive = (ide_drive_t *)m->private;
+	u8 *buf;
 
-		err = taskfile_lib_get_identify(drive, page);
-		if (!err) {
-			char *out = (char *)page + SECTOR_SIZE;
+	if (!drive) {
+		seq_putc(m, '\n');
+		return 0;
+	}
 
-			page = out;
-			do {
-				out += sprintf(out, "%04x%c",
-					le16_to_cpup(val), (++i & 7) ? ' ' : '\n');
-				val += 1;
-			} while (i < SECTOR_SIZE / 2);
-			len = out - page;
+	buf = kmalloc(SECTOR_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	if (taskfile_lib_get_identify(drive, buf) == 0) {
+		__le16 *val = (__le16 *)buf;
+		int i;
+
+		for (i = 0; i < SECTOR_SIZE / 2; i++) {
+			seq_printf(m, "%04x%c", le16_to_cpu(val[i]),
+					(i % 8) == 7 ? '\n' : ' ');
 		}
-	}
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	} else
+		seq_putc(m, buf[0]);
+	kfree(buf);
+	return 0;
+}
+
+static int ide_identify_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_identify_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations ide_identify_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_identify_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /**
  *	ide_find_setting	-	find a specific setting
  *	@st: setting table pointer
@@ -240,22 +283,20 @@ static void proc_ide_settings_warn(void)
 	warned = 1;
 }
 
-static int proc_ide_read_settings
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int ide_settings_proc_show(struct seq_file *m, void *v)
 {
 	const struct ide_proc_devset *setting, *g, *d;
 	const struct ide_devset *ds;
-	ide_drive_t	*drive = (ide_drive_t *) data;
-	char		*out = page;
-	int		len, rc, mul_factor, div_factor;
+	ide_drive_t	*drive = (ide_drive_t *) m->private;
+	int		rc, mul_factor, div_factor;
 
 	proc_ide_settings_warn();
 
 	mutex_lock(&ide_setting_mtx);
 	g = ide_generic_settings;
 	d = drive->settings;
-	out += sprintf(out, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n");
-	out += sprintf(out, "----\t\t\t-----\t\t---\t\t---\t\t----\n");
+	seq_printf(m, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n");
+	seq_printf(m, "----\t\t\t-----\t\t---\t\t---\t\t----\n");
 	while (g->name || (d && d->name)) {
 		/* read settings in the alphabetical order */
 		if (g->name && d && d->name) {
@@ -269,31 +310,35 @@ static int proc_ide_read_settings
 			setting = g++;
 		mul_factor = setting->mulf ? setting->mulf(drive) : 1;
 		div_factor = setting->divf ? setting->divf(drive) : 1;
-		out += sprintf(out, "%-24s", setting->name);
+		seq_printf(m, "%-24s", setting->name);
 		rc = ide_read_setting(drive, setting);
 		if (rc >= 0)
-			out += sprintf(out, "%-16d", rc * mul_factor / div_factor);
+			seq_printf(m, "%-16d", rc * mul_factor / div_factor);
 		else
-			out += sprintf(out, "%-16s", "write-only");
-		out += sprintf(out, "%-16d%-16d", (setting->min * mul_factor + div_factor - 1) / div_factor, setting->max * mul_factor / div_factor);
+			seq_printf(m, "%-16s", "write-only");
+		seq_printf(m, "%-16d%-16d", (setting->min * mul_factor + div_factor - 1) / div_factor, setting->max * mul_factor / div_factor);
 		ds = setting->setting;
 		if (ds->get)
-			out += sprintf(out, "r");
+			seq_printf(m, "r");
 		if (ds->set)
-			out += sprintf(out, "w");
-		out += sprintf(out, "\n");
+			seq_printf(m, "w");
+		seq_printf(m, "\n");
 	}
-	len = out - page;
 	mutex_unlock(&ide_setting_mtx);
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	return 0;
+}
+
+static int ide_settings_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_settings_proc_show, PDE(inode)->data);
 }
 
 #define MAX_LEN	30
 
-static int proc_ide_write_settings(struct file *file, const char __user *buffer,
-				   unsigned long count, void *data)
+static ssize_t ide_settings_proc_write(struct file *file, const char __user *buffer,
+				       size_t count, loff_t *pos)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
+	ide_drive_t	*drive = (ide_drive_t *) PDE(file->f_path.dentry->d_inode)->data;
 	char		name[MAX_LEN + 1];
 	int		for_real = 0, mul_factor, div_factor;
 	unsigned long	n;
@@ -388,63 +433,104 @@ static int proc_ide_write_settings(struct file *file, const char __user *buffer,
 	return count;
 parse_error:
 	free_page((unsigned long)buf);
-	printk("proc_ide_write_settings(): parse error\n");
+	printk("%s(): parse error\n", __func__);
 	return -EINVAL;
 }
 
-int proc_ide_read_capacity
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static const struct file_operations ide_settings_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_settings_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= ide_settings_proc_write,
+};
+
+static int ide_capacity_proc_show(struct seq_file *m, void *v)
 {
-	int len = sprintf(page, "%llu\n", (long long)0x7fffffff);
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(m, "%llu\n", (long long)0x7fffffff);
+	return 0;
 }
 
-EXPORT_SYMBOL_GPL(proc_ide_read_capacity);
+static int ide_capacity_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_capacity_proc_show, NULL);
+}
 
-int proc_ide_read_geometry
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+const struct file_operations ide_capacity_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_capacity_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+EXPORT_SYMBOL_GPL(ide_capacity_proc_fops);
+
+static int ide_geometry_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
-	char		*out = page;
-	int		len;
+	ide_drive_t	*drive = (ide_drive_t *) m->private;
 
-	out += sprintf(out, "physical     %d/%d/%d\n",
+	seq_printf(m, "physical     %d/%d/%d\n",
 			drive->cyl, drive->head, drive->sect);
-	out += sprintf(out, "logical      %d/%d/%d\n",
+	seq_printf(m, "logical      %d/%d/%d\n",
 			drive->bios_cyl, drive->bios_head, drive->bios_sect);
+	return 0;
+}
 
-	len = out - page;
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+static int ide_geometry_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_geometry_proc_show, PDE(inode)->data);
 }
 
-EXPORT_SYMBOL(proc_ide_read_geometry);
+const struct file_operations ide_geometry_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_geometry_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+EXPORT_SYMBOL(ide_geometry_proc_fops);
 
-static int proc_ide_read_dmodel
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int ide_dmodel_proc_show(struct seq_file *seq, void *v)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
+	ide_drive_t	*drive = (ide_drive_t *) seq->private;
 	char		*m = (char *)&drive->id[ATA_ID_PROD];
-	int		len;
 
-	len = sprintf(page, "%.40s\n", m[0] ? m : "(none)");
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(seq, "%.40s\n", m[0] ? m : "(none)");
+	return 0;
+}
+
+static int ide_dmodel_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_dmodel_proc_show, PDE(inode)->data);
 }
 
-static int proc_ide_read_driver
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static const struct file_operations ide_dmodel_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_dmodel_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int ide_driver_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t		*drive = (ide_drive_t *)data;
+	ide_drive_t		*drive = (ide_drive_t *)m->private;
 	struct device		*dev = &drive->gendev;
 	struct ide_driver	*ide_drv;
-	int			len;
 
 	if (dev->driver) {
 		ide_drv = to_ide_driver(dev->driver);
-		len = sprintf(page, "%s version %s\n",
+		seq_printf(m, "%s version %s\n",
 				dev->driver->name, ide_drv->version);
 	} else
-		len = sprintf(page, "ide-default version 0.9.newide\n");
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+		seq_printf(m, "ide-default version 0.9.newide\n");
+	return 0;
+}
+
+static int ide_driver_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_driver_proc_show, PDE(inode)->data);
 }
 
 static int ide_replace_subdriver(ide_drive_t *drive, const char *driver)
@@ -474,10 +560,10 @@ static int ide_replace_subdriver(ide_drive_t *drive, const char *driver)
 	return ret;
 }
 
-static int proc_ide_write_driver
-	(struct file *file, const char __user *buffer, unsigned long count, void *data)
+static ssize_t ide_driver_proc_write(struct file *file, const char __user *buffer,
+				     size_t count, loff_t *pos)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
+	ide_drive_t	*drive = (ide_drive_t *) PDE(file->f_path.dentry->d_inode)->data;
 	char name[32];
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -492,12 +578,19 @@ static int proc_ide_write_driver
 	return count;
 }
 
-static int proc_ide_read_media
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static const struct file_operations ide_driver_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_driver_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= ide_driver_proc_write,
+};
+
+static int ide_media_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
+	ide_drive_t	*drive = (ide_drive_t *) m->private;
 	const char	*media;
-	int		len;
 
 	switch (drive->media) {
 	case ide_disk:		media = "disk\n";	break;
@@ -507,20 +600,30 @@ static int proc_ide_read_media
 	case ide_optical:	media = "optical\n";	break;
 	default:		media = "UNKNOWN\n";	break;
 	}
-	strcpy(page, media);
-	len = strlen(media);
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_puts(m, media);
+	return 0;
+}
+
+static int ide_media_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_media_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations ide_media_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_media_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static ide_proc_entry_t generic_drive_entries[] = {
-	{ "driver",	S_IFREG|S_IRUGO,	 proc_ide_read_driver,
-						 proc_ide_write_driver },
-	{ "identify",	S_IFREG|S_IRUSR,	 proc_ide_read_identify, NULL },
-	{ "media",	S_IFREG|S_IRUGO,	 proc_ide_read_media,    NULL },
-	{ "model",	S_IFREG|S_IRUGO,	 proc_ide_read_dmodel,   NULL },
-	{ "settings",	S_IFREG|S_IRUSR|S_IWUSR, proc_ide_read_settings,
-						 proc_ide_write_settings },
-	{ NULL,	0, NULL, NULL }
+	{ "driver",	S_IFREG|S_IRUGO,	 &ide_driver_proc_fops	},
+	{ "identify",	S_IFREG|S_IRUSR,	 &ide_identify_proc_fops},
+	{ "media",	S_IFREG|S_IRUGO,	 &ide_media_proc_fops	},
+	{ "model",	S_IFREG|S_IRUGO,	 &ide_dmodel_proc_fops	},
+	{ "settings",	S_IFREG|S_IRUSR|S_IWUSR, &ide_settings_proc_fops},
+	{}
 };
 
 static void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p, void *data)
@@ -530,11 +633,8 @@ static void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p
 	if (!dir || !p)
 		return;
 	while (p->name != NULL) {
-		ent = create_proc_entry(p->name, p->mode, dir);
+		ent = proc_create_data(p->name, p->mode, dir, p->proc_fops, data);
 		if (!ent) return;
-		ent->data = data;
-		ent->read_proc = p->read_proc;
-		ent->write_proc = p->write_proc;
 		p++;
 	}
 }
@@ -617,10 +717,10 @@ void ide_proc_unregister_device(ide_drive_t *drive)
 }
 
 static ide_proc_entry_t hwif_entries[] = {
-	{ "channel",	S_IFREG|S_IRUGO,	proc_ide_read_channel,	NULL },
-	{ "mate",	S_IFREG|S_IRUGO,	proc_ide_read_mate,	NULL },
-	{ "model",	S_IFREG|S_IRUGO,	proc_ide_read_imodel,	NULL },
-	{ NULL,	0, NULL, NULL }
+	{ "channel",	S_IFREG|S_IRUGO,	&ide_channel_proc_fops	},
+	{ "mate",	S_IFREG|S_IRUGO,	&ide_mate_proc_fops	},
+	{ "model",	S_IFREG|S_IRUGO,	&ide_imodel_proc_fops	},
+	{}
 };
 
 void ide_proc_register_port(ide_hwif_t *hwif)
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 7b2032bc357b..9d6f62baac27 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -31,6 +31,7 @@
 #include <linux/major.h>
 #include <linux/errno.h>
 #include <linux/genhd.h>
+#include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
 #include <linux/ide.h>
@@ -1816,22 +1817,32 @@ static void ide_tape_release(struct device *dev)
 }
 
 #ifdef CONFIG_IDE_PROC_FS
-static int proc_idetape_read_name
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int idetape_name_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
+	ide_drive_t	*drive = (ide_drive_t *) m->private;
 	idetape_tape_t	*tape = drive->driver_data;
-	char		*out = page;
-	int		len;
 
-	len = sprintf(out, "%s\n", tape->name);
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(m, "%s\n", tape->name);
+	return 0;
+}
+
+static int idetape_name_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, idetape_name_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations idetape_name_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idetape_name_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static ide_proc_entry_t idetape_proc[] = {
-	{ "capacity",	S_IFREG|S_IRUGO,	proc_ide_read_capacity, NULL },
-	{ "name",	S_IFREG|S_IRUGO,	proc_idetape_read_name,	NULL },
-	{ NULL, 0, NULL, NULL }
+	{ "capacity",	S_IFREG|S_IRUGO,	&ide_capacity_proc_fops	},
+	{ "name",	S_IFREG|S_IRUGO,	&idetape_name_proc_fops	},
+	{}
 };
 
 static ide_proc_entry_t *ide_tape_proc_entries(ide_drive_t *drive)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 803c1ae31237..e4135d6e0556 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -919,8 +919,7 @@ __IDE_PROC_DEVSET(_name, _min, _max, NULL, NULL)
 typedef struct {
 	const char	*name;
 	mode_t		mode;
-	read_proc_t	*read_proc;
-	write_proc_t	*write_proc;
+	const struct file_operations *proc_fops;
 } ide_proc_entry_t;
 
 void proc_ide_create(void);
@@ -932,24 +931,8 @@ void ide_proc_unregister_port(ide_hwif_t *);
 void ide_proc_register_driver(ide_drive_t *, struct ide_driver *);
 void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *);
 
-read_proc_t proc_ide_read_capacity;
-read_proc_t proc_ide_read_geometry;
-
-/*
- * Standard exit stuff:
- */
-#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) \
-{					\
-	len -= off;			\
-	if (len < count) {		\
-		*eof = 1;		\
-		if (len <= 0)		\
-			return 0;	\
-	} else				\
-		len = count;		\
-	*start = page + off;		\
-	return len;			\
-}
+extern const struct file_operations ide_capacity_proc_fops;
+extern const struct file_operations ide_geometry_proc_fops;
 #else
 static inline void proc_ide_create(void) { ; }
 static inline void proc_ide_destroy(void) { ; }
@@ -961,7 +944,6 @@ static inline void ide_proc_register_driver(ide_drive_t *drive,
 					    struct ide_driver *driver) { ; }
 static inline void ide_proc_unregister_driver(ide_drive_t *drive,
 					      struct ide_driver *driver) { ; }
-#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0;
 #endif
 
 enum {
-- 
cgit v1.2.3


From 69575d388603365f2afbf4166df93152df59b165 Mon Sep 17 00:00:00 2001
From: Shane Wang <shane.wang@intel.com>
Date: Tue, 1 Sep 2009 18:25:07 -0700
Subject: x86, intel_txt: clean up the impact on generic code, unbreak non-x86

Move tboot.h from asm to linux to fix the build errors of intel_txt
patch on non-X86 platforms. Remove the tboot code from generic code
init/main.c and kernel/cpu.c.

Signed-off-by: Shane Wang <shane.wang@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/Kconfig              |   4 +
 arch/x86/include/asm/tboot.h  | 197 ------------------------------------------
 arch/x86/kernel/reboot.c      |   3 +-
 arch/x86/kernel/setup.c       |   3 +-
 arch/x86/kernel/smpboot.c     |   2 +-
 arch/x86/kernel/tboot.c       |  58 ++++++++++---
 drivers/acpi/acpica/hwsleep.c |   2 +-
 drivers/pci/dmar.c            |   2 +-
 drivers/pci/intel-iommu.c     |   2 +-
 include/linux/tboot.h         | 162 ++++++++++++++++++++++++++++++++++
 init/main.c                   |   3 -
 kernel/cpu.c                  |   6 +-
 security/Kconfig              |   2 +-
 13 files changed, 221 insertions(+), 225 deletions(-)
 delete mode 100644 arch/x86/include/asm/tboot.h
 create mode 100644 include/linux/tboot.h

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6b0f8b..b66f2102c35d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -178,6 +178,10 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	def_bool y
 
+config HAVE_INTEL_TXT
+	def_bool y
+	depends on EXPERIMENTAL && DMAR && ACPI
+
 # Use the generic interrupt handling code in kernel/irq/:
 config GENERIC_HARDIRQS
 	bool
diff --git a/arch/x86/include/asm/tboot.h b/arch/x86/include/asm/tboot.h
deleted file mode 100644
index b13929d4e5f4..000000000000
--- a/arch/x86/include/asm/tboot.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * tboot.h: shared data structure with tboot and kernel and functions
- *          used by kernel for runtime support of Intel(R) Trusted
- *          Execution Technology
- *
- * Copyright (c) 2006-2009, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-
-#ifndef _ASM_TBOOT_H
-#define _ASM_TBOOT_H
-
-#include <acpi/acpi.h>
-
-/* these must have the values from 0-5 in this order */
-enum {
-	TB_SHUTDOWN_REBOOT = 0,
-	TB_SHUTDOWN_S5,
-	TB_SHUTDOWN_S4,
-	TB_SHUTDOWN_S3,
-	TB_SHUTDOWN_HALT,
-	TB_SHUTDOWN_WFS
-};
-
-#ifdef CONFIG_INTEL_TXT
-
-/* used to communicate between tboot and the launched kernel */
-
-#define TB_KEY_SIZE             64   /* 512 bits */
-
-#define MAX_TB_MAC_REGIONS      32
-
-struct tboot_mac_region {
-	u64  start;         /* must be 64 byte -aligned */
-	u32  size;          /* must be 64 byte -granular */
-} __packed;
-
-/* GAS - Generic Address Structure (ACPI 2.0+) */
-struct tboot_acpi_generic_address {
-	u8  space_id;
-	u8  bit_width;
-	u8  bit_offset;
-	u8  access_width;
-	u64 address;
-} __packed;
-
-/*
- * combines Sx info from FADT and FACS tables per ACPI 2.0+ spec
- * (http://www.acpi.info/)
- */
-struct tboot_acpi_sleep_info {
-	struct tboot_acpi_generic_address pm1a_cnt_blk;
-	struct tboot_acpi_generic_address pm1b_cnt_blk;
-	struct tboot_acpi_generic_address pm1a_evt_blk;
-	struct tboot_acpi_generic_address pm1b_evt_blk;
-	u16 pm1a_cnt_val;
-	u16 pm1b_cnt_val;
-	u64 wakeup_vector;
-	u32 vector_width;
-	u64 kernel_s3_resume_vector;
-} __packed;
-
-/*
- * shared memory page used for communication between tboot and kernel
- */
-struct tboot {
-	/*
-	 * version 3+ fields:
-	 */
-
-	/* TBOOT_UUID */
-	u8 uuid[16];
-
-	/* version number: 5 is current */
-	u32 version;
-
-	/* physical addr of tb_log_t log */
-	u32 log_addr;
-
-	/*
-	 * physical addr of entry point for tboot shutdown and
-	 * type of shutdown (TB_SHUTDOWN_*) being requested
-	 */
-	u32 shutdown_entry;
-	u32 shutdown_type;
-
-	/* kernel-specified ACPI info for Sx shutdown */
-	struct tboot_acpi_sleep_info acpi_sinfo;
-
-	/* tboot location in memory (physical) */
-	u32 tboot_base;
-	u32 tboot_size;
-
-	/* memory regions (phys addrs) for tboot to MAC on S3 */
-	u8 num_mac_regions;
-	struct tboot_mac_region mac_regions[MAX_TB_MAC_REGIONS];
-
-
-	/*
-	 * version 4+ fields:
-	 */
-
-	/* symmetric key for use by kernel; will be encrypted on S3 */
-	u8 s3_key[TB_KEY_SIZE];
-
-
-	/*
-	 * version 5+ fields:
-	 */
-
-	/* used to 4byte-align num_in_wfs */
-	u8 reserved_align[3];
-
-	/* number of processors in wait-for-SIPI */
-	u32 num_in_wfs;
-} __packed;
-
-/*
- * UUID for tboot data struct to facilitate matching
- * defined as {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} by tboot, which is
- * represented as {} in the char array used here
- */
-#define TBOOT_UUID	{0xff, 0x8d, 0x3c, 0x66, 0xb3, 0xe8, 0x82, 0x4b, 0xbf,\
-			 0xaa, 0x19, 0xea, 0x4d, 0x5, 0x7a, 0x8}
-
-extern struct tboot *tboot;
-
-static inline int tboot_enabled(void)
-{
-	return tboot != NULL;
-}
-
-extern void tboot_probe(void);
-extern void tboot_create_trampoline(void);
-extern void tboot_shutdown(u32 shutdown_type);
-extern void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control);
-extern int tboot_wait_for_aps(int num_aps);
-extern struct acpi_table_header *tboot_get_dmar_table(
-				      struct acpi_table_header *dmar_tbl);
-extern int tboot_force_iommu(void);
-
-#else     /* CONFIG_INTEL_TXT */
-
-static inline int tboot_enabled(void)
-{
-	return 0;
-}
-
-static inline void tboot_probe(void)
-{
-}
-
-static inline void tboot_create_trampoline(void)
-{
-}
-
-static inline void tboot_shutdown(u32 shutdown_type)
-{
-}
-
-static inline void tboot_sleep(u8 sleep_state, u32 pm1a_control,
-			       u32 pm1b_control)
-{
-}
-
-static inline int tboot_wait_for_aps(int num_aps)
-{
-	return 0;
-}
-
-static inline struct acpi_table_header *tboot_get_dmar_table(
-					struct acpi_table_header *dmar_tbl)
-{
-	return dmar_tbl;
-}
-
-static inline int tboot_force_iommu(void)
-{
-	return 0;
-}
-
-#endif /* !CONFIG_INTEL_TXT */
-
-#endif /* _ASM_TBOOT_H */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 9de01c5d9794..18ce5c04242a 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -3,6 +3,7 @@
 #include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/efi.h>
+#include <linux/tboot.h>
 #include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/apic.h>
@@ -24,8 +25,6 @@
 # include <asm/iommu.h>
 #endif
 
-#include <asm/tboot.h>
-
 /*
  * Power off function, if any
  */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 80d6e9e32483..6ce0d6f38f7f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -66,6 +66,7 @@
 
 #include <linux/percpu.h>
 #include <linux/crash_dump.h>
+#include <linux/tboot.h>
 
 #include <video/edid.h>
 
@@ -145,8 +146,6 @@ struct boot_params __initdata boot_params;
 struct boot_params boot_params;
 #endif
 
-#include <asm/tboot.h>
-
 /*
  * Machine setup..
  */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 61cc40887c48..7d9d8eea20a0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -47,6 +47,7 @@
 #include <linux/bootmem.h>
 #include <linux/err.h>
 #include <linux/nmi.h>
+#include <linux/tboot.h>
 
 #include <asm/acpi.h>
 #include <asm/desc.h>
@@ -62,7 +63,6 @@
 #include <asm/vmi.h>
 #include <asm/apic.h>
 #include <asm/setup.h>
-#include <asm/tboot.h>
 #include <asm/uv/uv.h>
 #include <linux/mc146818rtc.h>
 
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index c2e760ca7b01..86c9f91b48ae 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -22,11 +22,14 @@
 #include <linux/dma_remapping.h>
 #include <linux/init_task.h>
 #include <linux/spinlock.h>
+#include <linux/delay.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/dmar.h>
+#include <linux/cpu.h>
 #include <linux/pfn.h>
 #include <linux/mm.h>
+#include <linux/tboot.h>
 
 #include <asm/trampoline.h>
 #include <asm/processor.h>
@@ -36,7 +39,6 @@
 #include <asm/fixmap.h>
 #include <asm/proto.h>
 #include <asm/setup.h>
-#include <asm/tboot.h>
 #include <asm/e820.h>
 #include <asm/io.h>
 
@@ -154,13 +156,10 @@ static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn,
 	return 0;
 }
 
-void tboot_create_trampoline(void)
+static void tboot_create_trampoline(void)
 {
 	u32 map_base, map_size;
 
-	if (!tboot_enabled())
-		return;
-
 	/* Create identity map for tboot shutdown code. */
 	map_base = PFN_DOWN(tboot->tboot_base);
 	map_size = PFN_UP(tboot->tboot_size);
@@ -295,21 +294,58 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
 	tboot_shutdown(acpi_shutdown_map[sleep_state]);
 }
 
-int tboot_wait_for_aps(int num_aps)
+static atomic_t ap_wfs_count;
+
+static int tboot_wait_for_aps(int num_aps)
 {
 	unsigned long timeout;
 
+	timeout = AP_WAIT_TIMEOUT*HZ;
+	while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps &&
+	       timeout) {
+		mdelay(1);
+		timeout--;
+	}
+
+	if (timeout)
+		pr_warning("tboot wait for APs timeout\n");
+
+	return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
+}
+
+static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb,
+			unsigned long action, void *hcpu)
+{
+	switch (action) {
+	case CPU_DYING:
+		atomic_inc(&ap_wfs_count);
+		if (num_online_cpus() == 1)
+			if (tboot_wait_for_aps(atomic_read(&ap_wfs_count)))
+				return NOTIFY_BAD;
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block tboot_cpu_notifier __cpuinitdata =
+{
+	.notifier_call = tboot_cpu_callback,
+};
+
+static __init int tboot_late_init(void)
+{
 	if (!tboot_enabled())
 		return 0;
 
-	timeout = jiffies + AP_WAIT_TIMEOUT*HZ;
-	while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps &&
-	       time_before(jiffies, timeout))
-		cpu_relax();
+	tboot_create_trampoline();
 
-	return time_before(jiffies, timeout) ? 0 : 1;
+	atomic_set(&ap_wfs_count, 0);
+	register_hotcpu_notifier(&tboot_cpu_notifier);
+	return 0;
 }
 
+late_initcall(tboot_late_init);
+
 /*
  * TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE)
  */
diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
index 8c01dd3724e0..cc22f9a585b0 100644
--- a/drivers/acpi/acpica/hwsleep.c
+++ b/drivers/acpi/acpica/hwsleep.c
@@ -45,7 +45,7 @@
 #include <acpi/acpi.h>
 #include "accommon.h"
 #include "actables.h"
-#include <asm/tboot.h>
+#include <linux/tboot.h>
 
 #define _COMPONENT          ACPI_HARDWARE
 ACPI_MODULE_NAME("hwsleep")
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 0cbc5fd26c3c..ab99783dccec 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -33,7 +33,7 @@
 #include <linux/timer.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
-#include <asm/tboot.h>
+#include <linux/tboot.h>
 
 #undef PREFIX
 #define PREFIX "DMAR:"
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 2dc72a6d7412..833509b53527 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -37,8 +37,8 @@
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <linux/sysdev.h>
+#include <linux/tboot.h>
 #include <asm/cacheflush.h>
-#include <asm/tboot.h>
 #include <asm/iommu.h>
 #include "pci.h"
 
diff --git a/include/linux/tboot.h b/include/linux/tboot.h
new file mode 100644
index 000000000000..bf2a0c748878
--- /dev/null
+++ b/include/linux/tboot.h
@@ -0,0 +1,162 @@
+/*
+ * tboot.h: shared data structure with tboot and kernel and functions
+ *          used by kernel for runtime support of Intel(R) Trusted
+ *          Execution Technology
+ *
+ * Copyright (c) 2006-2009, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef _LINUX_TBOOT_H
+#define _LINUX_TBOOT_H
+
+/* these must have the values from 0-5 in this order */
+enum {
+	TB_SHUTDOWN_REBOOT = 0,
+	TB_SHUTDOWN_S5,
+	TB_SHUTDOWN_S4,
+	TB_SHUTDOWN_S3,
+	TB_SHUTDOWN_HALT,
+	TB_SHUTDOWN_WFS
+};
+
+#ifdef CONFIG_INTEL_TXT
+#include <acpi/acpi.h>
+/* used to communicate between tboot and the launched kernel */
+
+#define TB_KEY_SIZE             64   /* 512 bits */
+
+#define MAX_TB_MAC_REGIONS      32
+
+struct tboot_mac_region {
+	u64  start;         /* must be 64 byte -aligned */
+	u32  size;          /* must be 64 byte -granular */
+} __packed;
+
+/* GAS - Generic Address Structure (ACPI 2.0+) */
+struct tboot_acpi_generic_address {
+	u8  space_id;
+	u8  bit_width;
+	u8  bit_offset;
+	u8  access_width;
+	u64 address;
+} __packed;
+
+/*
+ * combines Sx info from FADT and FACS tables per ACPI 2.0+ spec
+ * (http://www.acpi.info/)
+ */
+struct tboot_acpi_sleep_info {
+	struct tboot_acpi_generic_address pm1a_cnt_blk;
+	struct tboot_acpi_generic_address pm1b_cnt_blk;
+	struct tboot_acpi_generic_address pm1a_evt_blk;
+	struct tboot_acpi_generic_address pm1b_evt_blk;
+	u16 pm1a_cnt_val;
+	u16 pm1b_cnt_val;
+	u64 wakeup_vector;
+	u32 vector_width;
+	u64 kernel_s3_resume_vector;
+} __packed;
+
+/*
+ * shared memory page used for communication between tboot and kernel
+ */
+struct tboot {
+	/*
+	 * version 3+ fields:
+	 */
+
+	/* TBOOT_UUID */
+	u8 uuid[16];
+
+	/* version number: 5 is current */
+	u32 version;
+
+	/* physical addr of tb_log_t log */
+	u32 log_addr;
+
+	/*
+	 * physical addr of entry point for tboot shutdown and
+	 * type of shutdown (TB_SHUTDOWN_*) being requested
+	 */
+	u32 shutdown_entry;
+	u32 shutdown_type;
+
+	/* kernel-specified ACPI info for Sx shutdown */
+	struct tboot_acpi_sleep_info acpi_sinfo;
+
+	/* tboot location in memory (physical) */
+	u32 tboot_base;
+	u32 tboot_size;
+
+	/* memory regions (phys addrs) for tboot to MAC on S3 */
+	u8 num_mac_regions;
+	struct tboot_mac_region mac_regions[MAX_TB_MAC_REGIONS];
+
+
+	/*
+	 * version 4+ fields:
+	 */
+
+	/* symmetric key for use by kernel; will be encrypted on S3 */
+	u8 s3_key[TB_KEY_SIZE];
+
+
+	/*
+	 * version 5+ fields:
+	 */
+
+	/* used to 4byte-align num_in_wfs */
+	u8 reserved_align[3];
+
+	/* number of processors in wait-for-SIPI */
+	u32 num_in_wfs;
+} __packed;
+
+/*
+ * UUID for tboot data struct to facilitate matching
+ * defined as {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} by tboot, which is
+ * represented as {} in the char array used here
+ */
+#define TBOOT_UUID	{0xff, 0x8d, 0x3c, 0x66, 0xb3, 0xe8, 0x82, 0x4b, 0xbf,\
+			 0xaa, 0x19, 0xea, 0x4d, 0x5, 0x7a, 0x8}
+
+extern struct tboot *tboot;
+
+static inline int tboot_enabled(void)
+{
+	return tboot != NULL;
+}
+
+extern void tboot_probe(void);
+extern void tboot_shutdown(u32 shutdown_type);
+extern void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control);
+extern struct acpi_table_header *tboot_get_dmar_table(
+				      struct acpi_table_header *dmar_tbl);
+extern int tboot_force_iommu(void);
+
+#else
+
+#define tboot_probe()			do { } while (0)
+#define tboot_shutdown(shutdown_type)	do { } while (0)
+#define tboot_sleep(sleep_state, pm1a_control, pm1b_control)	\
+					do { } while (0)
+#define tboot_get_dmar_table(dmar_tbl)	(dmar_tbl)
+#define tboot_force_iommu()		0
+
+#endif /* !CONFIG_INTEL_TXT */
+
+#endif /* _LINUX_TBOOT_H */
diff --git a/init/main.c b/init/main.c
index 56ada27c4f47..2c5ade79eb81 100644
--- a/init/main.c
+++ b/init/main.c
@@ -73,7 +73,6 @@
 #include <asm/io.h>
 #include <asm/bugs.h>
 #include <asm/setup.h>
-#include <asm/tboot.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
 
@@ -716,8 +715,6 @@ asmlinkage void __init start_kernel(void)
 
 	ftrace_init();
 
-	tboot_create_trampoline();
-
 	/* Do the rest non-__init'ed, we're now alive */
 	rest_init();
 }
diff --git a/kernel/cpu.c b/kernel/cpu.c
index ff071e022a85..67a60076dd7e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -14,7 +14,6 @@
 #include <linux/kthread.h>
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
-#include <asm/tboot.h>
 
 #ifdef CONFIG_SMP
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
@@ -377,7 +376,7 @@ static cpumask_var_t frozen_cpus;
 
 int disable_nonboot_cpus(void)
 {
-	int cpu, first_cpu, error, num_cpus = 0;
+	int cpu, first_cpu, error;
 
 	error = stop_machine_create();
 	if (error)
@@ -392,7 +391,6 @@ int disable_nonboot_cpus(void)
 	for_each_online_cpu(cpu) {
 		if (cpu == first_cpu)
 			continue;
-		num_cpus++;
 		error = _cpu_down(cpu, 1);
 		if (!error) {
 			cpumask_set_cpu(cpu, frozen_cpus);
@@ -403,8 +401,6 @@ int disable_nonboot_cpus(void)
 			break;
 		}
 	}
-	/* ensure all CPUs have gone into wait-for-SIPI */
-	error |= tboot_wait_for_aps(num_cpus);
 
 	if (!error) {
 		BUG_ON(num_online_cpus() > 1);
diff --git a/security/Kconfig b/security/Kconfig
index 6631774672c1..5721847a7a62 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -115,7 +115,7 @@ config SECURITY_ROOTPLUG
 
 config INTEL_TXT
 	bool "Enable Intel(R) Trusted Execution Technology (Intel(R) TXT)"
-	depends on EXPERIMENTAL && X86 && DMAR && ACPI
+	depends on HAVE_INTEL_TXT
 	help
 	  This option enables support for booting the kernel with the
 	  Trusted Boot (tboot) module. This will utilize
-- 
cgit v1.2.3


From a649637c73a36174287a403cdda7607177d64523 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Fri, 28 Aug 2009 08:45:01 -0400
Subject: nfsd41: bound forechannel drc size by memory usage

By using the requested ca_maxresponsesize_cached * ca_maxresponses to bound
a forechannel drc request size, clients can tailor a session to usage.

For example, an I/O session (READ/WRITE only) can have a much smaller
ca_maxresponsesize_cached (for only WRITE compound responses) and a lot larger
ca_maxresponses to service a large in-flight data window.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 66 +++++++++++++++++++++++++++++++++-------------
 include/linux/nfsd/state.h |  8 ++++--
 2 files changed, 54 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b44a2cfde6f1..02b3ddd0bee3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -414,34 +414,64 @@ gen_sessionid(struct nfsd4_session *ses)
 }
 
 /*
- * Give the client the number of slots it requests bound by
- * NFSD_MAX_SLOTS_PER_SESSION and by nfsd_drc_max_mem.
+ * The protocol defines ca_maxresponssize_cached to include the size of
+ * the rpc header, but all we need to cache is the data starting after
+ * the end of the initial SEQUENCE operation--the rest we regenerate
+ * each time.  Therefore we can advertise a ca_maxresponssize_cached
+ * value that is the number of bytes in our cache plus a few additional
+ * bytes.  In order to stay on the safe side, and not promise more than
+ * we can cache, those additional bytes must be the minimum possible: 24
+ * bytes of rpc header (xid through accept state, with AUTH_NULL
+ * verifier), 12 for the compound header (with zero-length tag), and 44
+ * for the SEQUENCE op response:
+ */
+#define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
+
+/*
+ * Give the client the number of ca_maxresponsesize_cached slots it
+ * requests, of size bounded by NFSD_SLOT_CACHE_SIZE,
+ * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more
+ * than NFSD_MAX_SLOTS_PER_SESSION.
  *
- * If we run out of reserved DRC memory we should (up to a point) re-negotiate
- * active sessions and reduce their slot usage to make rooom for new
- * connections. For now we just fail the create session.
+ * If we run out of reserved DRC memory we should (up to a point)
+ * re-negotiate active sessions and reduce their slot usage to make
+ * rooom for new connections. For now we just fail the create session.
  */
-static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
+static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
 {
-	int mem;
+	int mem, size = fchan->maxresp_cached;
 
 	if (fchan->maxreqs < 1)
 		return nfserr_inval;
-	else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
-		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
 
-	mem = fchan->maxreqs * NFSD_SLOT_CACHE_SIZE;
+	if (size < NFSD_MIN_HDR_SEQ_SZ)
+		size = NFSD_MIN_HDR_SEQ_SZ;
+	size -= NFSD_MIN_HDR_SEQ_SZ;
+	if (size > NFSD_SLOT_CACHE_SIZE)
+		size = NFSD_SLOT_CACHE_SIZE;
+
+	/* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */
+	mem = fchan->maxreqs * size;
+	if (mem > NFSD_MAX_MEM_PER_SESSION) {
+		fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size;
+		if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
+			fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
+		mem = fchan->maxreqs * size;
+	}
 
 	spin_lock(&nfsd_drc_lock);
-	if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem)
-		mem = ((nfsd_drc_max_mem - nfsd_drc_mem_used) /
-				NFSD_SLOT_CACHE_SIZE) * NFSD_SLOT_CACHE_SIZE;
+	/* bound the total session drc memory ussage */
+	if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) {
+		fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size;
+		mem = fchan->maxreqs * size;
+	}
 	nfsd_drc_mem_used += mem;
 	spin_unlock(&nfsd_drc_lock);
 
-	fchan->maxreqs = mem / NFSD_SLOT_CACHE_SIZE;
 	if (fchan->maxreqs == 0)
 		return nfserr_resource;
+
+	fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ;
 	return 0;
 }
 
@@ -466,9 +496,6 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 		fchan->maxresp_sz = maxcount;
 	session_fchan->maxresp_sz = fchan->maxresp_sz;
 
-	session_fchan->maxresp_cached = NFSD_SLOT_CACHE_SIZE;
-	fchan->maxresp_cached = session_fchan->maxresp_cached;
-
 	/* Use the client's maxops if possible */
 	if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
 		fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
@@ -478,9 +505,12 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 	 * recover pages from existing sessions. For now fail session
 	 * creation.
 	 */
-	status = set_forechannel_maxreqs(fchan);
+	status = set_forechannel_drc_size(fchan);
 
+	session_fchan->maxresp_cached = fchan->maxresp_cached;
 	session_fchan->maxreqs = fchan->maxreqs;
+
+	dprintk("%s status %d\n", __func__, status);
 	return status;
 }
 
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index fb0c404c7c5c..ff0b771efde6 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -92,13 +92,17 @@ struct nfs4_cb_conn {
 	struct rpc_cred	*	cb_cred;
 };
 
-/* Maximum number of slots per session. 128 is useful for long haul TCP */
-#define NFSD_MAX_SLOTS_PER_SESSION	128
+/* Maximum number of slots per session. 160 is useful for long haul TCP */
+#define NFSD_MAX_SLOTS_PER_SESSION     160
 /* Maximum number of pages per slot cache entry */
 #define NFSD_PAGES_PER_SLOT	1
 #define NFSD_SLOT_CACHE_SIZE		PAGE_SIZE
 /* Maximum number of operations per session compound */
 #define NFSD_MAX_OPS_PER_COMPOUND	16
+/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
+#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32
+#define NFSD_MAX_MEM_PER_SESSION  \
+		(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
 
 struct nfsd4_cache_entry {
 	__be32		ce_status;
-- 
cgit v1.2.3


From 557ce2646e775f6bda734dd92b10d4780874b9c7 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Fri, 28 Aug 2009 08:45:04 -0400
Subject: nfsd41: replace page based DRC with buffer based DRC

Use NFSD_SLOT_CACHE_SIZE size buffers for sessions DRC instead of holding nfsd
pages in cache.

Connectathon testing has shown that 1024 bytes for encoded compound operation
responses past the sequence operation is sufficient, 512 bytes is a little too
small. Set NFSD_SLOT_CACHE_SIZE to 1024.

Allocate memory for the session DRC in the CREATE_SESSION operation
to guarantee that the memory resource is available for caching responses.
Allocate each slot individually in preparation for slot table size negotiation.

Remove struct nfsd4_cache_entry and helper functions for the old page-based
DRC.

The iov_len calculation in nfs4svc_encode_compoundres is now always
correct.  Replay is now done in nfsd4_sequence under the state lock, so
the session ref count is only bumped on non-replay. Clean up the
nfs4svc_encode_compoundres session logic.

The nfsd4_compound_state statp pointer is also not used.
Remove nfsd4_set_statp().

Move useful nfsd4_cache_entry fields into nfsd4_slot.

Signed-off-by: Andy Adamson <andros@netapp.com
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 211 +++++++++++++--------------------------------
 fs/nfsd/nfs4xdr.c          |  17 ++--
 fs/nfsd/nfssvc.c           |   4 -
 include/linux/nfsd/state.h |  27 +++---
 include/linux/nfsd/xdr4.h  |   5 +-
 5 files changed, 79 insertions(+), 185 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c9a45f49019d..46e9ac526872 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -514,12 +514,23 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 	return status;
 }
 
+static void
+free_session_slots(struct nfsd4_session *ses)
+{
+	int i;
+
+	for (i = 0; i < ses->se_fchannel.maxreqs; i++)
+		kfree(ses->se_slots[i]);
+}
+
 static int
 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 		   struct nfsd4_create_session *cses)
 {
 	struct nfsd4_session *new, tmp;
-	int idx, status = nfserr_serverfault, slotsize;
+	struct nfsd4_slot *sp;
+	int idx, slotsize, cachesize, i;
+	int status;
 
 	memset(&tmp, 0, sizeof(tmp));
 
@@ -530,14 +541,27 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 	if (status)
 		goto out;
 
-	/* allocate struct nfsd4_session and slot table in one piece */
-	slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot);
+	BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot)
+		     + sizeof(struct nfsd4_session) > PAGE_SIZE);
+
+	status = nfserr_serverfault;
+	/* allocate struct nfsd4_session and slot table pointers in one piece */
+	slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *);
 	new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
 	if (!new)
 		goto out;
 
 	memcpy(new, &tmp, sizeof(*new));
 
+	/* allocate each struct nfsd4_slot and data cache in one piece */
+	cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
+	for (i = 0; i < new->se_fchannel.maxreqs; i++) {
+		sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL);
+		if (!sp)
+			goto out_free;
+		new->se_slots[i] = sp;
+	}
+
 	new->se_client = clp;
 	gen_sessionid(new);
 	idx = hash_sessionid(&new->se_sessionid);
@@ -554,6 +578,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 	status = nfs_ok;
 out:
 	return status;
+out_free:
+	free_session_slots(new);
+	kfree(new);
+	goto out;
 }
 
 /* caller must hold sessionid_lock */
@@ -596,22 +624,16 @@ release_session(struct nfsd4_session *ses)
 	nfsd4_put_session(ses);
 }
 
-static void nfsd4_release_respages(struct page **respages, short resused);
-
 void
 free_session(struct kref *kref)
 {
 	struct nfsd4_session *ses;
-	int i;
 
 	ses = container_of(kref, struct nfsd4_session, se_ref);
-	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
-		struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry;
-		nfsd4_release_respages(e->ce_respages, e->ce_resused);
-	}
 	spin_lock(&nfsd_drc_lock);
 	nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE;
 	spin_unlock(&nfsd_drc_lock);
+	free_session_slots(ses);
 	kfree(ses);
 }
 
@@ -968,116 +990,31 @@ out_err:
 	return;
 }
 
-void
-nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
-{
-	struct nfsd4_compoundres *resp = rqstp->rq_resp;
-
-	resp->cstate.statp = statp;
-}
-
-/*
- * Dereference the result pages.
- */
-static void
-nfsd4_release_respages(struct page **respages, short resused)
-{
-	int i;
-
-	dprintk("--> %s\n", __func__);
-	for (i = 0; i < resused; i++) {
-		if (!respages[i])
-			continue;
-		put_page(respages[i]);
-		respages[i] = NULL;
-	}
-}
-
-static void
-nfsd4_copy_pages(struct page **topages, struct page **frompages, short count)
-{
-	int i;
-
-	for (i = 0; i < count; i++) {
-		topages[i] = frompages[i];
-		if (!topages[i])
-			continue;
-		get_page(topages[i]);
-	}
-}
-
 /*
- * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous
- * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total
- * length of the XDR response is less than se_fmaxresp_cached
- * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a
- * of the reply (e.g. readdir).
- *
- * Store the base and length of the rq_req.head[0] page
- * of the NFSv4.1 data, just past the rpc header.
+ * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size.
  */
 void
 nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
 {
-	struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
-	struct svc_rqst *rqstp = resp->rqstp;
-	struct kvec *resv = &rqstp->rq_res.head[0];
-
-	dprintk("--> %s entry %p\n", __func__, entry);
+	struct nfsd4_slot *slot = resp->cstate.slot;
+	unsigned int base;
 
-	nfsd4_release_respages(entry->ce_respages, entry->ce_resused);
-	entry->ce_opcnt = resp->opcnt;
-	entry->ce_status = resp->cstate.status;
+	dprintk("--> %s slot %p\n", __func__, slot);
 
-	/*
-	 * Don't need a page to cache just the sequence operation - the slot
-	 * does this for us!
-	 */
+	slot->sl_opcnt = resp->opcnt;
+	slot->sl_status = resp->cstate.status;
 
 	if (nfsd4_not_cached(resp)) {
-		entry->ce_resused = 0;
-		entry->ce_rpchdrlen = 0;
-		dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__,
-			resp->cstate.slot->sl_cache_entry.ce_cachethis);
+		slot->sl_datalen = 0;
 		return;
 	}
-	entry->ce_resused = rqstp->rq_resused;
-	if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1)
-		entry->ce_resused = NFSD_PAGES_PER_SLOT + 1;
-	nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages,
-			 entry->ce_resused);
-	entry->ce_datav.iov_base = resp->cstate.statp;
-	entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp -
-				(char *)page_address(rqstp->rq_respages[0]));
-	/* Current request rpc header length*/
-	entry->ce_rpchdrlen = (char *)resp->cstate.statp -
-				(char *)page_address(rqstp->rq_respages[0]);
-}
-
-/*
- * We keep the rpc header, but take the nfs reply from the replycache.
- */
-static int
-nfsd41_copy_replay_data(struct nfsd4_compoundres *resp,
-			struct nfsd4_cache_entry *entry)
-{
-	struct svc_rqst *rqstp = resp->rqstp;
-	struct kvec *resv = &resp->rqstp->rq_res.head[0];
-	int len;
-
-	/* Current request rpc header length*/
-	len = (char *)resp->cstate.statp -
-			(char *)page_address(rqstp->rq_respages[0]);
-	if (entry->ce_datav.iov_len + len > PAGE_SIZE) {
-		dprintk("%s v41 cached reply too large (%Zd).\n", __func__,
-			entry->ce_datav.iov_len);
-		return 0;
-	}
-	/* copy the cached reply nfsd data past the current rpc header */
-	memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base,
-		entry->ce_datav.iov_len);
-	resv->iov_len = len + entry->ce_datav.iov_len;
-	return 1;
+	slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap;
+	base = (char *)resp->cstate.datap -
+					(char *)resp->xbuf->head[0].iov_base;
+	if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data,
+				    slot->sl_datalen))
+		WARN("%s: sessions DRC could not cache compound\n", __func__);
+	return;
 }
 
 /*
@@ -1095,14 +1032,14 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
 	struct nfsd4_slot *slot = resp->cstate.slot;
 
 	dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__,
-		resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis);
+		resp->opcnt, resp->cstate.slot->sl_cachethis);
 
 	/* Encode the replayed sequence operation */
 	op = &args->ops[resp->opcnt - 1];
 	nfsd4_encode_operation(resp, op);
 
 	/* Return nfserr_retry_uncached_rep in next operation. */
-	if (args->opcnt > 1 && slot->sl_cache_entry.ce_cachethis == 0) {
+	if (args->opcnt > 1 && slot->sl_cachethis == 0) {
 		op = &args->ops[resp->opcnt++];
 		op->status = nfserr_retry_uncached_rep;
 		nfsd4_encode_operation(resp, op);
@@ -1111,57 +1048,29 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
 }
 
 /*
- * Keep the first page of the replay. Copy the NFSv4.1 data from the first
- * cached page.  Replace any futher replay pages from the cache.
+ * The sequence operation is not cached because we can use the slot and
+ * session values.
  */
 __be32
 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
 			 struct nfsd4_sequence *seq)
 {
-	struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
+	struct nfsd4_slot *slot = resp->cstate.slot;
 	__be32 status;
 
-	dprintk("--> %s entry %p\n", __func__, entry);
-
-	/*
-	 * If this is just the sequence operation, we did not keep
-	 * a page in the cache entry because we can just use the
-	 * slot info stored in struct nfsd4_sequence that was checked
-	 * against the slot in nfsd4_sequence().
-	 *
-	 * This occurs when seq->cachethis is FALSE, or when the client
-	 * session inactivity timer fires and a solo sequence operation
-	 * is sent (lease renewal).
-	 */
+	dprintk("--> %s slot %p\n", __func__, slot);
 
 	/* Either returns 0 or nfserr_retry_uncached */
 	status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
 	if (status == nfserr_retry_uncached_rep)
 		return status;
 
-	if (!nfsd41_copy_replay_data(resp, entry)) {
-		/*
-		 * Not enough room to use the replay rpc header, send the
-		 * cached header. Release all the allocated result pages.
-		 */
-		svc_free_res_pages(resp->rqstp);
-		nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages,
-			entry->ce_resused);
-	} else {
-		/* Release all but the first allocated result page */
-
-		resp->rqstp->rq_resused--;
-		svc_free_res_pages(resp->rqstp);
-
-		nfsd4_copy_pages(&resp->rqstp->rq_respages[1],
-				 &entry->ce_respages[1],
-				 entry->ce_resused - 1);
-	}
+	/* The sequence operation has been encoded, cstate->datap set. */
+	memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen);
 
-	resp->rqstp->rq_resused = entry->ce_resused;
-	resp->opcnt = entry->ce_opcnt;
-	resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen;
-	status = entry->ce_status;
+	resp->opcnt = slot->sl_opcnt;
+	resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen);
+	status = slot->sl_status;
 
 	return status;
 }
@@ -1493,7 +1402,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	if (seq->slotid >= session->se_fchannel.maxreqs)
 		goto out;
 
-	slot = &session->se_slots[seq->slotid];
+	slot = session->se_slots[seq->slotid];
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	/* We do not negotiate the number of slots yet, so set the
@@ -1506,7 +1415,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 		cstate->slot = slot;
 		cstate->session = session;
 		/* Return the cached reply status and set cstate->status
-		 * for nfsd4_svc_encode_compoundres processing */
+		 * for nfsd4_proc_compound processing */
 		status = nfsd4_replay_cache_entry(resp, seq);
 		cstate->status = nfserr_replay_cache;
 		goto out;
@@ -1517,7 +1426,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	/* Success! bump slot seqid */
 	slot->sl_inuse = true;
 	slot->sl_seqid = seq->seqid;
-	slot->sl_cache_entry.ce_cachethis = seq->cachethis;
+	slot->sl_cachethis = seq->cachethis;
 
 	cstate->slot = slot;
 	cstate->session = session;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 20c5e3db0660..00ed16a18497 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3057,6 +3057,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
 	WRITE32(0);
 
 	ADJUST_ARGS();
+	resp->cstate.datap = p; /* DRC cache data pointer */
 	return 0;
 }
 
@@ -3159,7 +3160,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
 		return status;
 
 	session = resp->cstate.session;
-	if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0)
+	if (session == NULL || slot->sl_cachethis == 0)
 		return status;
 
 	if (resp->opcnt >= args->opcnt)
@@ -3284,6 +3285,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 	/*
 	 * All that remains is to write the tag and operation count...
 	 */
+	struct nfsd4_compound_state *cs = &resp->cstate;
 	struct kvec *iov;
 	p = resp->tagp;
 	*p++ = htonl(resp->taglen);
@@ -3297,15 +3299,10 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 		iov = &rqstp->rq_res.head[0];
 	iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
 	BUG_ON(iov->iov_len > PAGE_SIZE);
-	if (nfsd4_has_session(&resp->cstate)) {
-		if (resp->cstate.status == nfserr_replay_cache &&
-				!nfsd4_not_cached(resp)) {
-			iov->iov_len = resp->cstate.iovlen;
-		} else {
-			nfsd4_store_cache_entry(resp);
-			dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
-			resp->cstate.slot->sl_inuse = 0;
-		}
+	if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) {
+		nfsd4_store_cache_entry(resp);
+		dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
+		resp->cstate.slot->sl_inuse = false;
 		nfsd4_put_session(resp->cstate.session);
 	}
 	return 1;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 675d395c4ab6..4472449c0937 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -577,10 +577,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 		+ rqstp->rq_res.head[0].iov_len;
 	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
 
-	/* NFSv4.1 DRC requires statp */
-	if (rqstp->rq_vers == 4)
-		nfsd4_set_statp(rqstp, statp);
-
 	/* Now call the procedure handler, and encode NFS status. */
 	nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
 	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index ff0b771efde6..70ef5f4abbbc 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -94,30 +94,23 @@ struct nfs4_cb_conn {
 
 /* Maximum number of slots per session. 160 is useful for long haul TCP */
 #define NFSD_MAX_SLOTS_PER_SESSION     160
-/* Maximum number of pages per slot cache entry */
-#define NFSD_PAGES_PER_SLOT	1
-#define NFSD_SLOT_CACHE_SIZE		PAGE_SIZE
 /* Maximum number of operations per session compound */
 #define NFSD_MAX_OPS_PER_COMPOUND	16
+/* Maximum  session per slot cache size */
+#define NFSD_SLOT_CACHE_SIZE		1024
 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
 #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32
 #define NFSD_MAX_MEM_PER_SESSION  \
 		(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
 
-struct nfsd4_cache_entry {
-	__be32		ce_status;
-	struct kvec	ce_datav; /* encoded NFSv4.1 data in rq_res.head[0] */
-	struct page	*ce_respages[NFSD_PAGES_PER_SLOT + 1];
-	int		ce_cachethis;
-	short		ce_resused;
-	int		ce_opcnt;
-	int		ce_rpchdrlen;
-};
-
 struct nfsd4_slot {
-	bool				sl_inuse;
-	u32				sl_seqid;
-	struct nfsd4_cache_entry	sl_cache_entry;
+	bool	sl_inuse;
+	bool	sl_cachethis;
+	u16	sl_opcnt;
+	u32	sl_seqid;
+	__be32	sl_status;
+	u32	sl_datalen;
+	char	sl_data[];
 };
 
 struct nfsd4_channel_attrs {
@@ -159,7 +152,7 @@ struct nfsd4_session {
 	struct nfs4_sessionid	se_sessionid;
 	struct nfsd4_channel_attrs se_fchannel;
 	struct nfsd4_channel_attrs se_bchannel;
-	struct nfsd4_slot	se_slots[];	/* forward channel slots */
+	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
 };
 
 static inline void
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 3f716607c86d..73164c2b3d29 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -51,7 +51,7 @@ struct nfsd4_compound_state {
 	/* For sessions DRC */
 	struct nfsd4_session	*session;
 	struct nfsd4_slot	*slot;
-	__be32			*statp;
+	__be32			*datap;
 	size_t			iovlen;
 	u32			minorversion;
 	u32			status;
@@ -472,8 +472,7 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
 
 static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
 {
-	return !resp->cstate.slot->sl_cache_entry.ce_cachethis ||
-			nfsd4_is_solo_sequence(resp);
+	return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
 }
 
 #define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
-- 
cgit v1.2.3


From cede3930f0ca6fef353fa01306c72a01420bd45e Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 31 Aug 2009 21:34:36 +0000
Subject: powerpc: Fix some late PowerMac G5 with PCIe ATI graphics

A misconfiguration by the firmware of the U4 PCIe bridge on PowerMac G5
with the U4 bridge (latest generations, may also affect the iMac G5
"iSight") is causing us to re-assign the PCI BARs of the video card,
which can get it out of sync with the firmware, thus breaking offb.

This works around it by fixing up the bridge configuration properly
at boot time. It also fixes a bug where the firmware provides us with
an incorrect set of accessible regions in the device-tree.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powermac/pci.c | 61 +++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h               |  1 +
 2 files changed, 62 insertions(+)

(limited to 'include')

diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 04cdd32624d4..e81403b245b5 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -1286,3 +1286,64 @@ static void fixup_k2_sata(struct pci_dev* dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS, 0x0240, fixup_k2_sata);
 
+/*
+ * On U4 (aka CPC945) the PCIe root complex "P2P" bridge resource ranges aren't
+ * configured by the firmware. The bridge itself seems to ignore them but it
+ * causes problems with Linux which then re-assigns devices below the bridge,
+ * thus changing addresses of those devices from what was in the device-tree,
+ * which sucks when those are video cards using offb
+ *
+ * We could just mark it transparent but I prefer fixing up the resources to
+ * properly show what's going on here, as I have some doubts about having them
+ * badly configured potentially being an issue for DMA.
+ *
+ * We leave PIO alone, it seems to be fine
+ *
+ * Oh and there's another funny bug. The OF properties advertize the region
+ * 0xf1000000..0xf1ffffff as being forwarded as memory space. But that's
+ * actually not true, this region is the memory mapped config space. So we
+ * also need to filter it out or we'll map things in the wrong place.
+ */
+static void fixup_u4_pcie(struct pci_dev* dev)
+{
+	struct pci_controller *host = pci_bus_to_host(dev->bus);
+	struct resource *region = NULL;
+	u32 reg;
+	int i;
+
+	/* Only do that on PowerMac */
+	if (!machine_is(powermac))
+		return;
+
+	/* Find the largest MMIO region */
+	for (i = 0; i < 3; i++) {
+		struct resource *r = &host->mem_resources[i];
+		if (!(r->flags & IORESOURCE_MEM))
+			continue;
+		/* Skip the 0xf0xxxxxx..f2xxxxxx regions, we know they
+		 * are reserved by HW for other things
+		 */
+		if (r->start >= 0xf0000000 && r->start < 0xf3000000)
+			continue;
+		if (!region || (r->end - r->start) >
+		    (region->end - region->start))
+			region = r;
+	}
+	/* Nothing found, bail */
+	if (region == 0)
+		return;
+
+	/* Print things out */
+	printk(KERN_INFO "PCI: Fixup U4 PCIe bridge range: %pR\n", region);
+
+	/* Fixup bridge config space. We know it's a Mac, resource aren't
+	 * offset so let's just blast them as-is. We also know that they
+	 * fit in 32 bits
+	 */
+	reg = ((region->start >> 16) & 0xfff0) | (region->end & 0xfff00000);
+	pci_write_config_dword(dev, PCI_MEMORY_BASE, reg);
+	pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0);
+	pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+	pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_U4_PCIE, fixup_u4_pcie);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 73b46b6b904f..c86bb6e3dfcc 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -877,6 +877,7 @@
 #define PCI_DEVICE_ID_APPLE_SH_SUNGEM   0x0051
 #define PCI_DEVICE_ID_APPLE_U3L_AGP	0x0058
 #define PCI_DEVICE_ID_APPLE_U3H_AGP	0x0059
+#define PCI_DEVICE_ID_APPLE_U4_PCIE	0x005b
 #define PCI_DEVICE_ID_APPLE_IPID2_AGP	0x0066
 #define PCI_DEVICE_ID_APPLE_IPID2_ATA	0x0069
 #define PCI_DEVICE_ID_APPLE_IPID2_FW	0x006a
-- 
cgit v1.2.3


From 652696efce135559b98ee5a3d7899295e8d553fa Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kmpark@infradead.org>
Date: Wed, 24 Jun 2009 12:03:51 +0900
Subject: mtd: OneNAND: 4-bit ECC status macros

Define ECC status for 4-bit ECC status

Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/onenand_regs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h
index 86a6bbef6465..acadbf53a69f 100644
--- a/include/linux/mtd/onenand_regs.h
+++ b/include/linux/mtd/onenand_regs.h
@@ -207,6 +207,9 @@
 #define ONENAND_ECC_2BIT		(1 << 1)
 #define ONENAND_ECC_2BIT_ALL		(0xAAAA)
 #define FLEXONENAND_UNCORRECTABLE_ERROR	(0x1010)
+#define ONENAND_ECC_3BIT		(1 << 2)
+#define ONENAND_ECC_4BIT		(1 << 3)
+#define ONENAND_ECC_4BIT_UNCORRECTABLE	(0x1010)
 
 /*
  * One-Time Programmable (OTP)
-- 
cgit v1.2.3


From b8bdc1d0cfc488ac0d94724639f9a61b0a5a1d40 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 31 Aug 2009 06:20:12 +0200
Subject: wm97xx_battery: Use platform_data

This patch converts the wm97xx-battery driver to use platform_data
supplied by ac97 bus.

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
---
 drivers/power/wm97xx_battery.c | 32 +++++++++++++++++++++++++++-----
 include/linux/wm97xx.h         | 18 ++++++++++++++++++
 include/linux/wm97xx_batt.h    | 18 ++++--------------
 3 files changed, 49 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/power/wm97xx_battery.c b/drivers/power/wm97xx_battery.c
index 8bde92126d34..14ebd960ebe5 100644
--- a/drivers/power/wm97xx_battery.c
+++ b/drivers/power/wm97xx_battery.c
@@ -22,17 +22,19 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
-#include <linux/wm97xx_batt.h>
 
 static DEFINE_MUTEX(bat_lock);
 static struct work_struct bat_work;
 struct mutex work_lock;
 static int bat_status = POWER_SUPPLY_STATUS_UNKNOWN;
-static struct wm97xx_batt_info *pdata;
+static struct wm97xx_batt_info *gpdata;
 static enum power_supply_property *prop;
 
 static unsigned long wm97xx_read_bat(struct power_supply *bat_ps)
 {
+	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
+
 	return wm97xx_read_aux_adc(bat_ps->dev->parent->driver_data,
 					pdata->batt_aux) * pdata->batt_mult /
 					pdata->batt_div;
@@ -40,6 +42,9 @@ static unsigned long wm97xx_read_bat(struct power_supply *bat_ps)
 
 static unsigned long wm97xx_read_temp(struct power_supply *bat_ps)
 {
+	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
+
 	return wm97xx_read_aux_adc(bat_ps->dev->parent->driver_data,
 					pdata->temp_aux) * pdata->temp_mult /
 					pdata->temp_div;
@@ -49,6 +54,9 @@ static int wm97xx_bat_get_property(struct power_supply *bat_ps,
 			    enum power_supply_property psp,
 			    union power_supply_propval *val)
 {
+	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
+
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
 		val->intval = bat_status;
@@ -97,6 +105,8 @@ static void wm97xx_bat_external_power_changed(struct power_supply *bat_ps)
 static void wm97xx_bat_update(struct power_supply *bat_ps)
 {
 	int old_status = bat_status;
+	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
 
 	mutex_lock(&work_lock);
 
@@ -149,6 +159,15 @@ static int __devinit wm97xx_bat_probe(struct platform_device *dev)
 	int ret = 0;
 	int props = 1;	/* POWER_SUPPLY_PROP_PRESENT */
 	int i = 0;
+	struct wm97xx_pdata *wmdata = dev->dev.platform_data;
+	struct wm97xx_batt_pdata *pdata;
+
+	if (gpdata) {
+		dev_err(&dev->dev, "Do not pass platform_data through "
+			"wm97xx_bat_set_pdata!\n");
+		return -EINVAL;
+	} else
+		pdata = wmdata->batt_pdata;
 
 	if (dev->id != -1)
 		return -EINVAL;
@@ -156,7 +175,7 @@ static int __devinit wm97xx_bat_probe(struct platform_device *dev)
 	mutex_init(&work_lock);
 
 	if (!pdata) {
-		dev_err(&dev->dev, "Please use wm97xx_bat_set_pdata\n");
+		dev_err(&dev->dev, "No platform_data supplied\n");
 		return -EINVAL;
 	}
 
@@ -229,6 +248,9 @@ err:
 
 static int __devexit wm97xx_bat_remove(struct platform_device *dev)
 {
+	struct wm97xx_pdata *wmdata = dev->dev.platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
+
 	if (pdata && pdata->charge_gpio && pdata->charge_gpio >= 0)
 		gpio_free(pdata->charge_gpio);
 	flush_scheduled_work();
@@ -258,9 +280,9 @@ static void __exit wm97xx_bat_exit(void)
 	platform_driver_unregister(&wm97xx_bat_driver);
 }
 
-void __init wm97xx_bat_set_pdata(struct wm97xx_batt_info *data)
+void wm97xx_bat_set_pdata(struct wm97xx_batt_info *data)
 {
-	pdata = data;
+	gpdata = data;
 }
 EXPORT_SYMBOL_GPL(wm97xx_bat_set_pdata);
 
diff --git a/include/linux/wm97xx.h b/include/linux/wm97xx.h
index 6f69968eab24..b2c2297844e3 100644
--- a/include/linux/wm97xx.h
+++ b/include/linux/wm97xx.h
@@ -286,6 +286,24 @@ struct wm97xx {
 	u16 suspend_mode;               /* PRP in suspend mode */
 };
 
+struct wm97xx_batt_pdata {
+	int	batt_aux;
+	int	temp_aux;
+	int	charge_gpio;
+	int	min_voltage;
+	int	max_voltage;
+	int	batt_div;
+	int	batt_mult;
+	int	temp_div;
+	int	temp_mult;
+	int	batt_tech;
+	char	*batt_name;
+};
+
+struct wm97xx_pdata {
+	struct wm97xx_batt_pdata	*batt_pdata;	/* battery data */
+};
+
 /*
  * Codec GPIO access (not supported on WM9705)
  * This can be used to set/get codec GPIO and Virtual GPIO status.
diff --git a/include/linux/wm97xx_batt.h b/include/linux/wm97xx_batt.h
index 9681d1ab0e4f..a1d6419c2ff8 100644
--- a/include/linux/wm97xx_batt.h
+++ b/include/linux/wm97xx_batt.h
@@ -3,22 +3,12 @@
 
 #include <linux/wm97xx.h>
 
-struct wm97xx_batt_info {
-	int	batt_aux;
-	int	temp_aux;
-	int	charge_gpio;
-	int	min_voltage;
-	int	max_voltage;
-	int	batt_div;
-	int	batt_mult;
-	int	temp_div;
-	int	temp_mult;
-	int	batt_tech;
-	char	*batt_name;
-};
+#warning This file will be removed soon, use wm97xx.h instead!
+
+#define wm97xx_batt_info wm97xx_batt_pdata
 
 #ifdef CONFIG_BATTERY_WM97XX
-void __init wm97xx_bat_set_pdata(struct wm97xx_batt_info *data);
+void wm97xx_bat_set_pdata(struct wm97xx_batt_info *data);
 #else
 static inline void wm97xx_bat_set_pdata(struct wm97xx_batt_info *data) {}
 #endif
-- 
cgit v1.2.3


From 3961f7c3cf247eee5df7fabadc7a40f2deeb98f3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 10 Aug 2009 17:43:53 +0100
Subject: power_supply: Add driver for the PMU on WM831x PMICs

The WM831x PMICs provide power path management from three sources:
a wall supply, USB and a battery with integrated charger. They also
provide an additional backup supply with integrated for maintaining
always on functionality such as the RTC and monitoring of power
switches.

After some initial configuration at startup the device operates
autonomously, the driver simply provides reporting of the current
state.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
---
 drivers/power/Kconfig          |   7 +
 drivers/power/Makefile         |   1 +
 drivers/power/wm831x_power.c   | 779 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/pmu.h | 189 ++++++++++
 4 files changed, 976 insertions(+)
 create mode 100644 drivers/power/wm831x_power.c
 create mode 100644 include/linux/mfd/wm831x/pmu.h

(limited to 'include')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index bdbc4f73fcdc..cea6cef27e89 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -29,6 +29,13 @@ config APM_POWER
 	  Say Y here to enable support APM status emulation using
 	  battery class devices.
 
+config WM831X_POWER
+	tristate "WM831X PMU support"
+	depends on MFD_WM831X
+	help
+	  Say Y here to enable support for the power management unit
+	  provided by Wolfson Microelectronics WM831x PMICs.
+
 config WM8350_POWER
         tristate "WM8350 PMU support"
         depends on MFD_WM8350
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 380d17c9ae29..b96f29d91c28 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_POWER_SUPPLY)	+= power_supply.o
 
 obj-$(CONFIG_PDA_POWER)		+= pda_power.o
 obj-$(CONFIG_APM_POWER)		+= apm_power.o
+obj-$(CONFIG_WM831X_POWER)	+= wm831x_power.o
 obj-$(CONFIG_WM8350_POWER)	+= wm8350_power.o
 
 obj-$(CONFIG_BATTERY_DS2760)	+= ds2760_battery.o
diff --git a/drivers/power/wm831x_power.c b/drivers/power/wm831x_power.c
new file mode 100644
index 000000000000..2a4c8b0b829c
--- /dev/null
+++ b/drivers/power/wm831x_power.c
@@ -0,0 +1,779 @@
+/*
+ * PMU driver for Wolfson Microelectronics wm831x PMICs
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/auxadc.h>
+#include <linux/mfd/wm831x/pmu.h>
+#include <linux/mfd/wm831x/pdata.h>
+
+struct wm831x_power {
+	struct wm831x *wm831x;
+	struct power_supply wall;
+	struct power_supply backup;
+	struct power_supply usb;
+	struct power_supply battery;
+};
+
+static int wm831x_power_check_online(struct wm831x *wm831x, int supply,
+				     union power_supply_propval *val)
+{
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_SYSTEM_STATUS);
+	if (ret < 0)
+		return ret;
+
+	if (ret & supply)
+		val->intval = 1;
+	else
+		val->intval = 0;
+
+	return 0;
+}
+
+static int wm831x_power_read_voltage(struct wm831x *wm831x,
+				     enum wm831x_auxadc src,
+				     union power_supply_propval *val)
+{
+	int ret;
+
+	ret = wm831x_auxadc_read_uv(wm831x, src);
+	if (ret >= 0)
+		val->intval = ret;
+
+	return ret;
+}
+
+/*********************************************************************
+ *		WALL Power
+ *********************************************************************/
+static int wm831x_wall_get_prop(struct power_supply *psy,
+				enum power_supply_property psp,
+				union power_supply_propval *val)
+{
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = wm831x_power_check_online(wm831x, WM831X_PWR_WALL, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = wm831x_power_read_voltage(wm831x, WM831X_AUX_WALL, val);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm831x_wall_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+/*********************************************************************
+ *		USB Power
+ *********************************************************************/
+static int wm831x_usb_get_prop(struct power_supply *psy,
+			       enum power_supply_property psp,
+			       union power_supply_propval *val)
+{
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = wm831x_power_check_online(wm831x, WM831X_PWR_USB, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = wm831x_power_read_voltage(wm831x, WM831X_AUX_USB, val);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm831x_usb_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+/*********************************************************************
+ *		Battery properties
+ *********************************************************************/
+
+struct chg_map {
+	int val;
+	int reg_val;
+};
+
+static struct chg_map trickle_ilims[] = {
+	{  50, 0 << WM831X_CHG_TRKL_ILIM_SHIFT },
+	{ 100, 1 << WM831X_CHG_TRKL_ILIM_SHIFT },
+	{ 150, 2 << WM831X_CHG_TRKL_ILIM_SHIFT },
+	{ 200, 3 << WM831X_CHG_TRKL_ILIM_SHIFT },
+};
+
+static struct chg_map vsels[] = {
+	{ 4050, 0 << WM831X_CHG_VSEL_SHIFT },
+	{ 4100, 1 << WM831X_CHG_VSEL_SHIFT },
+	{ 4150, 2 << WM831X_CHG_VSEL_SHIFT },
+	{ 4200, 3 << WM831X_CHG_VSEL_SHIFT },
+};
+
+static struct chg_map fast_ilims[] = {
+	{    0,  0 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{   50,  1 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  100,  2 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  150,  3 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  200,  4 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  250,  5 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  300,  6 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  350,  7 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  400,  8 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  450,  9 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  500, 10 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  600, 11 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  700, 12 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  800, 13 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  900, 14 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{ 1000, 15 << WM831X_CHG_FAST_ILIM_SHIFT },
+};
+
+static struct chg_map eoc_iterms[] = {
+	{ 20, 0 << WM831X_CHG_ITERM_SHIFT },
+	{ 30, 1 << WM831X_CHG_ITERM_SHIFT },
+	{ 40, 2 << WM831X_CHG_ITERM_SHIFT },
+	{ 50, 3 << WM831X_CHG_ITERM_SHIFT },
+	{ 60, 4 << WM831X_CHG_ITERM_SHIFT },
+	{ 70, 5 << WM831X_CHG_ITERM_SHIFT },
+	{ 80, 6 << WM831X_CHG_ITERM_SHIFT },
+	{ 90, 7 << WM831X_CHG_ITERM_SHIFT },
+};
+
+static struct chg_map chg_times[] = {
+	{  60,  0 << WM831X_CHG_TIME_SHIFT },
+	{  90,  1 << WM831X_CHG_TIME_SHIFT },
+	{ 120,  2 << WM831X_CHG_TIME_SHIFT },
+	{ 150,  3 << WM831X_CHG_TIME_SHIFT },
+	{ 180,  4 << WM831X_CHG_TIME_SHIFT },
+	{ 210,  5 << WM831X_CHG_TIME_SHIFT },
+	{ 240,  6 << WM831X_CHG_TIME_SHIFT },
+	{ 270,  7 << WM831X_CHG_TIME_SHIFT },
+	{ 300,  8 << WM831X_CHG_TIME_SHIFT },
+	{ 330,  9 << WM831X_CHG_TIME_SHIFT },
+	{ 360, 10 << WM831X_CHG_TIME_SHIFT },
+	{ 390, 11 << WM831X_CHG_TIME_SHIFT },
+	{ 420, 12 << WM831X_CHG_TIME_SHIFT },
+	{ 450, 13 << WM831X_CHG_TIME_SHIFT },
+	{ 480, 14 << WM831X_CHG_TIME_SHIFT },
+	{ 510, 15 << WM831X_CHG_TIME_SHIFT },
+};
+
+static void wm831x_battey_apply_config(struct wm831x *wm831x,
+				       struct chg_map *map, int count, int val,
+				       int *reg, const char *name,
+				       const char *units)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		if (val == map[i].val)
+			break;
+	if (i == count) {
+		dev_err(wm831x->dev, "Invalid %s %d%s\n",
+			name, val, units);
+	} else {
+		*reg |= map[i].reg_val;
+		dev_dbg(wm831x->dev, "Set %s of %d%s\n", name, val, units);
+	}
+}
+
+static void wm831x_config_battery(struct wm831x *wm831x)
+{
+	struct wm831x_pdata *wm831x_pdata = wm831x->dev->platform_data;
+	struct wm831x_battery_pdata *pdata;
+	int ret, reg1, reg2;
+
+	if (!wm831x_pdata || !wm831x_pdata->battery) {
+		dev_warn(wm831x->dev,
+			 "No battery charger configuration\n");
+		return;
+	}
+
+	pdata = wm831x_pdata->battery;
+
+	reg1 = 0;
+	reg2 = 0;
+
+	if (!pdata->enable) {
+		dev_info(wm831x->dev, "Battery charger disabled\n");
+		return;
+	}
+
+	reg1 |= WM831X_CHG_ENA;
+	if (pdata->off_mask)
+		reg2 |= WM831X_CHG_OFF_MSK;
+	if (pdata->fast_enable)
+		reg1 |= WM831X_CHG_FAST;
+
+	wm831x_battey_apply_config(wm831x, trickle_ilims,
+				   ARRAY_SIZE(trickle_ilims),
+				   pdata->trickle_ilim, &reg2,
+				   "trickle charge current limit", "mA");
+
+	wm831x_battey_apply_config(wm831x, vsels, ARRAY_SIZE(vsels),
+				   pdata->vsel, &reg2,
+				   "target voltage", "mV");
+
+	wm831x_battey_apply_config(wm831x, fast_ilims, ARRAY_SIZE(fast_ilims),
+				   pdata->fast_ilim, &reg2,
+				   "fast charge current limit", "mA");
+
+	wm831x_battey_apply_config(wm831x, eoc_iterms, ARRAY_SIZE(eoc_iterms),
+				   pdata->eoc_iterm, &reg1,
+				   "end of charge current threshold", "mA");
+
+	wm831x_battey_apply_config(wm831x, chg_times, ARRAY_SIZE(chg_times),
+				   pdata->timeout, &reg2,
+				   "charger timeout", "min");
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret != 0) {
+		dev_err(wm831x->dev, "Failed to unlock registers: %d\n", ret);
+		return;
+	}
+
+	ret = wm831x_set_bits(wm831x, WM831X_CHARGER_CONTROL_1,
+			      WM831X_CHG_ENA_MASK |
+			      WM831X_CHG_FAST_MASK |
+			      WM831X_CHG_ITERM_MASK |
+			      WM831X_CHG_ITERM_MASK,
+			      reg1);
+	if (ret != 0)
+		dev_err(wm831x->dev, "Failed to set charger control 1: %d\n",
+			ret);
+
+	ret = wm831x_set_bits(wm831x, WM831X_CHARGER_CONTROL_2,
+			      WM831X_CHG_OFF_MSK |
+			      WM831X_CHG_TIME_MASK |
+			      WM831X_CHG_FAST_ILIM_MASK |
+			      WM831X_CHG_TRKL_ILIM_MASK |
+			      WM831X_CHG_VSEL_MASK,
+			      reg2);
+	if (ret != 0)
+		dev_err(wm831x->dev, "Failed to set charger control 2: %d\n",
+			ret);
+
+	wm831x_reg_lock(wm831x);
+}
+
+static int wm831x_bat_check_status(struct wm831x *wm831x, int *status)
+{
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_SYSTEM_STATUS);
+	if (ret < 0)
+		return ret;
+
+	if (ret & WM831X_PWR_SRC_BATT) {
+		*status = POWER_SUPPLY_STATUS_DISCHARGING;
+		return 0;
+	}
+
+	ret = wm831x_reg_read(wm831x, WM831X_CHARGER_STATUS);
+	if (ret < 0)
+		return ret;
+
+	switch (ret & WM831X_CHG_STATE_MASK) {
+	case WM831X_CHG_STATE_OFF:
+		*status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		break;
+	case WM831X_CHG_STATE_TRICKLE:
+	case WM831X_CHG_STATE_FAST:
+		*status = POWER_SUPPLY_STATUS_CHARGING;
+		break;
+
+	default:
+		*status = POWER_SUPPLY_STATUS_UNKNOWN;
+		break;
+	}
+
+	return 0;
+}
+
+static int wm831x_bat_check_type(struct wm831x *wm831x, int *type)
+{
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_CHARGER_STATUS);
+	if (ret < 0)
+		return ret;
+
+	switch (ret & WM831X_CHG_STATE_MASK) {
+	case WM831X_CHG_STATE_TRICKLE:
+	case WM831X_CHG_STATE_TRICKLE_OT:
+		*type = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
+		break;
+	case WM831X_CHG_STATE_FAST:
+	case WM831X_CHG_STATE_FAST_OT:
+		*type = POWER_SUPPLY_CHARGE_TYPE_FAST;
+		break;
+	default:
+		*type = POWER_SUPPLY_CHARGE_TYPE_NONE;
+		break;
+	}
+
+	return 0;
+}
+
+static int wm831x_bat_check_health(struct wm831x *wm831x, int *health)
+{
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_CHARGER_STATUS);
+	if (ret < 0)
+		return ret;
+
+	if (ret & WM831X_BATT_HOT_STS) {
+		*health = POWER_SUPPLY_HEALTH_OVERHEAT;
+		return 0;
+	}
+
+	if (ret & WM831X_BATT_COLD_STS) {
+		*health = POWER_SUPPLY_HEALTH_COLD;
+		return 0;
+	}
+
+	if (ret & WM831X_BATT_OV_STS) {
+		*health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+		return 0;
+	}
+
+	switch (ret & WM831X_CHG_STATE_MASK) {
+	case WM831X_CHG_STATE_TRICKLE_OT:
+	case WM831X_CHG_STATE_FAST_OT:
+		*health = POWER_SUPPLY_HEALTH_OVERHEAT;
+		break;
+	case WM831X_CHG_STATE_DEFECTIVE:
+		*health = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+		break;
+	default:
+		*health = POWER_SUPPLY_HEALTH_GOOD;
+		break;
+	}
+
+	return 0;
+}
+
+static int wm831x_bat_get_prop(struct power_supply *psy,
+			       enum power_supply_property psp,
+			       union power_supply_propval *val)
+{
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = wm831x_bat_check_status(wm831x, &val->intval);
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = wm831x_power_check_online(wm831x, WM831X_PWR_SRC_BATT,
+						val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = wm831x_power_read_voltage(wm831x, WM831X_AUX_BATT, val);
+		break;
+	case POWER_SUPPLY_PROP_HEALTH:
+		ret = wm831x_bat_check_health(wm831x, &val->intval);
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_TYPE:
+		ret = wm831x_bat_check_type(wm831x, &val->intval);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm831x_bat_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_CHARGE_TYPE,
+};
+
+static const char *wm831x_bat_irqs[] = {
+	"BATT HOT",
+	"BATT COLD",
+	"BATT FAIL",
+	"OV",
+	"END",
+	"TO",
+	"MODE",
+	"START",
+};
+
+static irqreturn_t wm831x_bat_irq(int irq, void *data)
+{
+	struct wm831x_power *wm831x_power = data;
+	struct wm831x *wm831x = wm831x_power->wm831x;
+
+	dev_dbg(wm831x->dev, "Battery status changed: %d\n", irq);
+
+	/* The battery charger is autonomous so we don't need to do
+	 * anything except kick user space */
+	power_supply_changed(&wm831x_power->battery);
+
+	return IRQ_HANDLED;
+}
+
+
+/*********************************************************************
+ *		Backup supply properties
+ *********************************************************************/
+
+static void wm831x_config_backup(struct wm831x *wm831x)
+{
+	struct wm831x_pdata *wm831x_pdata = wm831x->dev->platform_data;
+	struct wm831x_backup_pdata *pdata;
+	int ret, reg;
+
+	if (!wm831x_pdata || !wm831x_pdata->backup) {
+		dev_warn(wm831x->dev,
+			 "No backup battery charger configuration\n");
+		return;
+	}
+
+	pdata = wm831x_pdata->backup;
+
+	reg = 0;
+
+	if (pdata->charger_enable)
+		reg |= WM831X_BKUP_CHG_ENA | WM831X_BKUP_BATT_DET_ENA;
+	if (pdata->no_constant_voltage)
+		reg |= WM831X_BKUP_CHG_MODE;
+
+	switch (pdata->vlim) {
+	case 2500:
+		break;
+	case 3100:
+		reg |= WM831X_BKUP_CHG_VLIM;
+		break;
+	default:
+		dev_err(wm831x->dev, "Invalid backup voltage limit %dmV\n",
+			pdata->vlim);
+	}
+
+	switch (pdata->ilim) {
+	case 100:
+		break;
+	case 200:
+		reg |= 1;
+		break;
+	case 300:
+		reg |= 2;
+		break;
+	case 400:
+		reg |= 3;
+		break;
+	default:
+		dev_err(wm831x->dev, "Invalid backup current limit %duA\n",
+			pdata->ilim);
+	}
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret != 0) {
+		dev_err(wm831x->dev, "Failed to unlock registers: %d\n", ret);
+		return;
+	}
+
+	ret = wm831x_set_bits(wm831x, WM831X_BACKUP_CHARGER_CONTROL,
+			      WM831X_BKUP_CHG_ENA_MASK |
+			      WM831X_BKUP_CHG_MODE_MASK |
+			      WM831X_BKUP_BATT_DET_ENA_MASK |
+			      WM831X_BKUP_CHG_VLIM_MASK |
+			      WM831X_BKUP_CHG_ILIM_MASK,
+			      reg);
+	if (ret != 0)
+		dev_err(wm831x->dev,
+			"Failed to set backup charger config: %d\n", ret);
+
+	wm831x_reg_lock(wm831x);
+}
+
+static int wm831x_backup_get_prop(struct power_supply *psy,
+				  enum power_supply_property psp,
+				  union power_supply_propval *val)
+{
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int ret = 0;
+
+	ret = wm831x_reg_read(wm831x, WM831X_BACKUP_CHARGER_CONTROL);
+	if (ret < 0)
+		return ret;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		if (ret & WM831X_BKUP_CHG_STS)
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		else
+			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		break;
+
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = wm831x_power_read_voltage(wm831x, WM831X_AUX_BKUP_BATT,
+						val);
+		break;
+
+	case POWER_SUPPLY_PROP_PRESENT:
+		if (ret & WM831X_BKUP_CHG_STS)
+			val->intval = 1;
+		else
+			val->intval = 0;
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm831x_backup_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_PRESENT,
+};
+
+/*********************************************************************
+ *		Initialisation
+ *********************************************************************/
+
+static irqreturn_t wm831x_syslo_irq(int irq, void *data)
+{
+	struct wm831x_power *wm831x_power = data;
+	struct wm831x *wm831x = wm831x_power->wm831x;
+
+	/* Not much we can actually *do* but tell people for
+	 * posterity, we're probably about to run out of power. */
+	dev_crit(wm831x->dev, "SYSVDD under voltage\n");
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t wm831x_pwr_src_irq(int irq, void *data)
+{
+	struct wm831x_power *wm831x_power = data;
+	struct wm831x *wm831x = wm831x_power->wm831x;
+
+	dev_dbg(wm831x->dev, "Power source changed\n");
+
+	/* Just notify for everything - little harm in overnotifying.
+	 * The backup battery is not a power source while the system
+	 * is running so skip that.
+	 */
+	power_supply_changed(&wm831x_power->battery);
+	power_supply_changed(&wm831x_power->usb);
+	power_supply_changed(&wm831x_power->wall);
+
+	return IRQ_HANDLED;
+}
+
+static __devinit int wm831x_power_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_power *power;
+	struct power_supply *usb;
+	struct power_supply *battery;
+	struct power_supply *wall;
+	struct power_supply *backup;
+	int ret, irq, i;
+
+	power = kzalloc(sizeof(struct wm831x_power), GFP_KERNEL);
+	if (power == NULL)
+		return -ENOMEM;
+
+	power->wm831x = wm831x;
+	platform_set_drvdata(pdev, power);
+
+	usb = &power->usb;
+	battery = &power->battery;
+	wall = &power->wall;
+	backup = &power->backup;
+
+	/* We ignore configuration failures since we can still read back
+	 * the status without enabling either of the chargers.
+	 */
+	wm831x_config_battery(wm831x);
+	wm831x_config_backup(wm831x);
+
+	wall->name = "wm831x-wall";
+	wall->type = POWER_SUPPLY_TYPE_MAINS;
+	wall->properties = wm831x_wall_props;
+	wall->num_properties = ARRAY_SIZE(wm831x_wall_props);
+	wall->get_property = wm831x_wall_get_prop;
+	ret = power_supply_register(&pdev->dev, wall);
+	if (ret)
+		goto err_kmalloc;
+
+	battery->name = "wm831x-battery";
+	battery->properties = wm831x_bat_props;
+	battery->num_properties = ARRAY_SIZE(wm831x_bat_props);
+	battery->get_property = wm831x_bat_get_prop;
+	battery->use_for_apm = 1;
+	ret = power_supply_register(&pdev->dev, battery);
+	if (ret)
+		goto err_wall;
+
+	usb->name = "wm831x-usb",
+	usb->type = POWER_SUPPLY_TYPE_USB;
+	usb->properties = wm831x_usb_props;
+	usb->num_properties = ARRAY_SIZE(wm831x_usb_props);
+	usb->get_property = wm831x_usb_get_prop;
+	ret = power_supply_register(&pdev->dev, usb);
+	if (ret)
+		goto err_battery;
+
+	backup->name = "wm831x-backup";
+	backup->type = POWER_SUPPLY_TYPE_BATTERY;
+	backup->properties = wm831x_backup_props;
+	backup->num_properties = ARRAY_SIZE(wm831x_backup_props);
+	backup->get_property = wm831x_backup_get_prop;
+	ret = power_supply_register(&pdev->dev, backup);
+	if (ret)
+		goto err_usb;
+
+	irq = platform_get_irq_byname(pdev, "SYSLO");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_syslo_irq,
+				 IRQF_TRIGGER_RISING, "SYSLO",
+				 power);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request SYSLO IRQ %d: %d\n",
+			irq, ret);
+		goto err_backup;
+	}
+
+	irq = platform_get_irq_byname(pdev, "PWR SRC");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_pwr_src_irq,
+				 IRQF_TRIGGER_RISING, "Power source",
+				 power);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request PWR SRC IRQ %d: %d\n",
+			irq, ret);
+		goto err_syslo;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(wm831x_bat_irqs); i++) {
+		irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]);
+		ret = wm831x_request_irq(wm831x, irq, wm831x_bat_irq,
+					 IRQF_TRIGGER_RISING,
+					 wm831x_bat_irqs[i],
+					 power);
+		if (ret != 0) {
+			dev_err(&pdev->dev,
+				"Failed to request %s IRQ %d: %d\n",
+				wm831x_bat_irqs[i], irq, ret);
+			goto err_bat_irq;
+		}
+	}
+
+	return ret;
+
+err_bat_irq:
+	for (; i >= 0; i--) {
+		irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]);
+		wm831x_free_irq(wm831x, irq, power);
+	}
+	irq = platform_get_irq_byname(pdev, "PWR SRC");
+	wm831x_free_irq(wm831x, irq, power);
+err_syslo:
+	irq = platform_get_irq_byname(pdev, "SYSLO");
+	wm831x_free_irq(wm831x, irq, power);
+err_backup:
+	power_supply_unregister(backup);
+err_usb:
+	power_supply_unregister(usb);
+err_battery:
+	power_supply_unregister(battery);
+err_wall:
+	power_supply_unregister(wall);
+err_kmalloc:
+	kfree(power);
+	return ret;
+}
+
+static __devexit int wm831x_power_remove(struct platform_device *pdev)
+{
+	struct wm831x_power *wm831x_power = platform_get_drvdata(pdev);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int irq, i;
+
+	for (i = 0; i < ARRAY_SIZE(wm831x_bat_irqs); i++) {
+		irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]);
+		wm831x_free_irq(wm831x, irq, wm831x_power);
+	}
+
+	irq = platform_get_irq_byname(pdev, "PWR SRC");
+	wm831x_free_irq(wm831x, irq, wm831x_power);
+
+	irq = platform_get_irq_byname(pdev, "SYSLO");
+	wm831x_free_irq(wm831x, irq, wm831x_power);
+
+	power_supply_unregister(&wm831x_power->backup);
+	power_supply_unregister(&wm831x_power->battery);
+	power_supply_unregister(&wm831x_power->wall);
+	power_supply_unregister(&wm831x_power->usb);
+	return 0;
+}
+
+static struct platform_driver wm831x_power_driver = {
+	.probe = wm831x_power_probe,
+	.remove = __devexit_p(wm831x_power_remove),
+	.driver = {
+		.name = "wm831x-power",
+	},
+};
+
+static int __init wm831x_power_init(void)
+{
+	return platform_driver_register(&wm831x_power_driver);
+}
+module_init(wm831x_power_init);
+
+static void __exit wm831x_power_exit(void)
+{
+	platform_driver_unregister(&wm831x_power_driver);
+}
+module_exit(wm831x_power_exit);
+
+MODULE_DESCRIPTION("Power supply driver for WM831x PMICs");
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm831x-power");
diff --git a/include/linux/mfd/wm831x/pmu.h b/include/linux/mfd/wm831x/pmu.h
new file mode 100644
index 000000000000..b18cbb027bc3
--- /dev/null
+++ b/include/linux/mfd/wm831x/pmu.h
@@ -0,0 +1,189 @@
+/*
+ * include/linux/mfd/wm831x/pmu.h -- PMU for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_PMU_H__
+#define __MFD_WM831X_PMU_H__
+
+/*
+ * R16387 (0x4003) - Power State
+ */
+#define WM831X_CHIP_ON                          0x8000  /* CHIP_ON */
+#define WM831X_CHIP_ON_MASK                     0x8000  /* CHIP_ON */
+#define WM831X_CHIP_ON_SHIFT                        15  /* CHIP_ON */
+#define WM831X_CHIP_ON_WIDTH                         1  /* CHIP_ON */
+#define WM831X_CHIP_SLP                         0x4000  /* CHIP_SLP */
+#define WM831X_CHIP_SLP_MASK                    0x4000  /* CHIP_SLP */
+#define WM831X_CHIP_SLP_SHIFT                       14  /* CHIP_SLP */
+#define WM831X_CHIP_SLP_WIDTH                        1  /* CHIP_SLP */
+#define WM831X_REF_LP                           0x1000  /* REF_LP */
+#define WM831X_REF_LP_MASK                      0x1000  /* REF_LP */
+#define WM831X_REF_LP_SHIFT                         12  /* REF_LP */
+#define WM831X_REF_LP_WIDTH                          1  /* REF_LP */
+#define WM831X_PWRSTATE_DLY_MASK                0x0C00  /* PWRSTATE_DLY - [11:10] */
+#define WM831X_PWRSTATE_DLY_SHIFT                   10  /* PWRSTATE_DLY - [11:10] */
+#define WM831X_PWRSTATE_DLY_WIDTH                    2  /* PWRSTATE_DLY - [11:10] */
+#define WM831X_SWRST_DLY                        0x0200  /* SWRST_DLY */
+#define WM831X_SWRST_DLY_MASK                   0x0200  /* SWRST_DLY */
+#define WM831X_SWRST_DLY_SHIFT                       9  /* SWRST_DLY */
+#define WM831X_SWRST_DLY_WIDTH                       1  /* SWRST_DLY */
+#define WM831X_USB100MA_STARTUP_MASK            0x0030  /* USB100MA_STARTUP - [5:4] */
+#define WM831X_USB100MA_STARTUP_SHIFT                4  /* USB100MA_STARTUP - [5:4] */
+#define WM831X_USB100MA_STARTUP_WIDTH                2  /* USB100MA_STARTUP - [5:4] */
+#define WM831X_USB_CURR_STS                     0x0008  /* USB_CURR_STS */
+#define WM831X_USB_CURR_STS_MASK                0x0008  /* USB_CURR_STS */
+#define WM831X_USB_CURR_STS_SHIFT                    3  /* USB_CURR_STS */
+#define WM831X_USB_CURR_STS_WIDTH                    1  /* USB_CURR_STS */
+#define WM831X_USB_ILIM_MASK                    0x0007  /* USB_ILIM - [2:0] */
+#define WM831X_USB_ILIM_SHIFT                        0  /* USB_ILIM - [2:0] */
+#define WM831X_USB_ILIM_WIDTH                        3  /* USB_ILIM - [2:0] */
+
+/*
+ * R16397 (0x400D) - System Status
+ */
+#define WM831X_THW_STS                          0x8000  /* THW_STS */
+#define WM831X_THW_STS_MASK                     0x8000  /* THW_STS */
+#define WM831X_THW_STS_SHIFT                        15  /* THW_STS */
+#define WM831X_THW_STS_WIDTH                         1  /* THW_STS */
+#define WM831X_PWR_SRC_BATT                     0x0400  /* PWR_SRC_BATT */
+#define WM831X_PWR_SRC_BATT_MASK                0x0400  /* PWR_SRC_BATT */
+#define WM831X_PWR_SRC_BATT_SHIFT                   10  /* PWR_SRC_BATT */
+#define WM831X_PWR_SRC_BATT_WIDTH                    1  /* PWR_SRC_BATT */
+#define WM831X_PWR_WALL                         0x0200  /* PWR_WALL */
+#define WM831X_PWR_WALL_MASK                    0x0200  /* PWR_WALL */
+#define WM831X_PWR_WALL_SHIFT                        9  /* PWR_WALL */
+#define WM831X_PWR_WALL_WIDTH                        1  /* PWR_WALL */
+#define WM831X_PWR_USB                          0x0100  /* PWR_USB */
+#define WM831X_PWR_USB_MASK                     0x0100  /* PWR_USB */
+#define WM831X_PWR_USB_SHIFT                         8  /* PWR_USB */
+#define WM831X_PWR_USB_WIDTH                         1  /* PWR_USB */
+#define WM831X_MAIN_STATE_MASK                  0x001F  /* MAIN_STATE - [4:0] */
+#define WM831X_MAIN_STATE_SHIFT                      0  /* MAIN_STATE - [4:0] */
+#define WM831X_MAIN_STATE_WIDTH                      5  /* MAIN_STATE - [4:0] */
+
+/*
+ * R16456 (0x4048) - Charger Control 1
+ */
+#define WM831X_CHG_ENA                          0x8000  /* CHG_ENA */
+#define WM831X_CHG_ENA_MASK                     0x8000  /* CHG_ENA */
+#define WM831X_CHG_ENA_SHIFT                        15  /* CHG_ENA */
+#define WM831X_CHG_ENA_WIDTH                         1  /* CHG_ENA */
+#define WM831X_CHG_FRC                          0x4000  /* CHG_FRC */
+#define WM831X_CHG_FRC_MASK                     0x4000  /* CHG_FRC */
+#define WM831X_CHG_FRC_SHIFT                        14  /* CHG_FRC */
+#define WM831X_CHG_FRC_WIDTH                         1  /* CHG_FRC */
+#define WM831X_CHG_ITERM_MASK                   0x1C00  /* CHG_ITERM - [12:10] */
+#define WM831X_CHG_ITERM_SHIFT                      10  /* CHG_ITERM - [12:10] */
+#define WM831X_CHG_ITERM_WIDTH                       3  /* CHG_ITERM - [12:10] */
+#define WM831X_CHG_FAST                         0x0020  /* CHG_FAST */
+#define WM831X_CHG_FAST_MASK                    0x0020  /* CHG_FAST */
+#define WM831X_CHG_FAST_SHIFT                        5  /* CHG_FAST */
+#define WM831X_CHG_FAST_WIDTH                        1  /* CHG_FAST */
+#define WM831X_CHG_IMON_ENA                     0x0002  /* CHG_IMON_ENA */
+#define WM831X_CHG_IMON_ENA_MASK                0x0002  /* CHG_IMON_ENA */
+#define WM831X_CHG_IMON_ENA_SHIFT                    1  /* CHG_IMON_ENA */
+#define WM831X_CHG_IMON_ENA_WIDTH                    1  /* CHG_IMON_ENA */
+#define WM831X_CHG_CHIP_TEMP_MON                0x0001  /* CHG_CHIP_TEMP_MON */
+#define WM831X_CHG_CHIP_TEMP_MON_MASK           0x0001  /* CHG_CHIP_TEMP_MON */
+#define WM831X_CHG_CHIP_TEMP_MON_SHIFT               0  /* CHG_CHIP_TEMP_MON */
+#define WM831X_CHG_CHIP_TEMP_MON_WIDTH               1  /* CHG_CHIP_TEMP_MON */
+
+/*
+ * R16457 (0x4049) - Charger Control 2
+ */
+#define WM831X_CHG_OFF_MSK                      0x4000  /* CHG_OFF_MSK */
+#define WM831X_CHG_OFF_MSK_MASK                 0x4000  /* CHG_OFF_MSK */
+#define WM831X_CHG_OFF_MSK_SHIFT                    14  /* CHG_OFF_MSK */
+#define WM831X_CHG_OFF_MSK_WIDTH                     1  /* CHG_OFF_MSK */
+#define WM831X_CHG_TIME_MASK                    0x0F00  /* CHG_TIME - [11:8] */
+#define WM831X_CHG_TIME_SHIFT                        8  /* CHG_TIME - [11:8] */
+#define WM831X_CHG_TIME_WIDTH                        4  /* CHG_TIME - [11:8] */
+#define WM831X_CHG_TRKL_ILIM_MASK               0x00C0  /* CHG_TRKL_ILIM - [7:6] */
+#define WM831X_CHG_TRKL_ILIM_SHIFT                   6  /* CHG_TRKL_ILIM - [7:6] */
+#define WM831X_CHG_TRKL_ILIM_WIDTH                   2  /* CHG_TRKL_ILIM - [7:6] */
+#define WM831X_CHG_VSEL_MASK                    0x0030  /* CHG_VSEL - [5:4] */
+#define WM831X_CHG_VSEL_SHIFT                        4  /* CHG_VSEL - [5:4] */
+#define WM831X_CHG_VSEL_WIDTH                        2  /* CHG_VSEL - [5:4] */
+#define WM831X_CHG_FAST_ILIM_MASK               0x000F  /* CHG_FAST_ILIM - [3:0] */
+#define WM831X_CHG_FAST_ILIM_SHIFT                   0  /* CHG_FAST_ILIM - [3:0] */
+#define WM831X_CHG_FAST_ILIM_WIDTH                   4  /* CHG_FAST_ILIM - [3:0] */
+
+/*
+ * R16458 (0x404A) - Charger Status
+ */
+#define WM831X_BATT_OV_STS                      0x8000  /* BATT_OV_STS */
+#define WM831X_BATT_OV_STS_MASK                 0x8000  /* BATT_OV_STS */
+#define WM831X_BATT_OV_STS_SHIFT                    15  /* BATT_OV_STS */
+#define WM831X_BATT_OV_STS_WIDTH                     1  /* BATT_OV_STS */
+#define WM831X_CHG_STATE_MASK                   0x7000  /* CHG_STATE - [14:12] */
+#define WM831X_CHG_STATE_SHIFT                      12  /* CHG_STATE - [14:12] */
+#define WM831X_CHG_STATE_WIDTH                       3  /* CHG_STATE - [14:12] */
+#define WM831X_BATT_HOT_STS                     0x0800  /* BATT_HOT_STS */
+#define WM831X_BATT_HOT_STS_MASK                0x0800  /* BATT_HOT_STS */
+#define WM831X_BATT_HOT_STS_SHIFT                   11  /* BATT_HOT_STS */
+#define WM831X_BATT_HOT_STS_WIDTH                    1  /* BATT_HOT_STS */
+#define WM831X_BATT_COLD_STS                    0x0400  /* BATT_COLD_STS */
+#define WM831X_BATT_COLD_STS_MASK               0x0400  /* BATT_COLD_STS */
+#define WM831X_BATT_COLD_STS_SHIFT                  10  /* BATT_COLD_STS */
+#define WM831X_BATT_COLD_STS_WIDTH                   1  /* BATT_COLD_STS */
+#define WM831X_CHG_TOPOFF                       0x0200  /* CHG_TOPOFF */
+#define WM831X_CHG_TOPOFF_MASK                  0x0200  /* CHG_TOPOFF */
+#define WM831X_CHG_TOPOFF_SHIFT                      9  /* CHG_TOPOFF */
+#define WM831X_CHG_TOPOFF_WIDTH                      1  /* CHG_TOPOFF */
+#define WM831X_CHG_ACTIVE                       0x0100  /* CHG_ACTIVE */
+#define WM831X_CHG_ACTIVE_MASK                  0x0100  /* CHG_ACTIVE */
+#define WM831X_CHG_ACTIVE_SHIFT                      8  /* CHG_ACTIVE */
+#define WM831X_CHG_ACTIVE_WIDTH                      1  /* CHG_ACTIVE */
+#define WM831X_CHG_TIME_ELAPSED_MASK            0x00FF  /* CHG_TIME_ELAPSED - [7:0] */
+#define WM831X_CHG_TIME_ELAPSED_SHIFT                0  /* CHG_TIME_ELAPSED - [7:0] */
+#define WM831X_CHG_TIME_ELAPSED_WIDTH                8  /* CHG_TIME_ELAPSED - [7:0] */
+
+#define WM831X_CHG_STATE_OFF         (0 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_TRICKLE     (1 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_FAST        (2 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_TRICKLE_OT  (3 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_FAST_OT     (4 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_DEFECTIVE   (5 << WM831X_CHG_STATE_SHIFT)
+
+/*
+ * R16459 (0x404B) - Backup Charger Control
+ */
+#define WM831X_BKUP_CHG_ENA                     0x8000  /* BKUP_CHG_ENA */
+#define WM831X_BKUP_CHG_ENA_MASK                0x8000  /* BKUP_CHG_ENA */
+#define WM831X_BKUP_CHG_ENA_SHIFT                   15  /* BKUP_CHG_ENA */
+#define WM831X_BKUP_CHG_ENA_WIDTH                    1  /* BKUP_CHG_ENA */
+#define WM831X_BKUP_CHG_STS                     0x4000  /* BKUP_CHG_STS */
+#define WM831X_BKUP_CHG_STS_MASK                0x4000  /* BKUP_CHG_STS */
+#define WM831X_BKUP_CHG_STS_SHIFT                   14  /* BKUP_CHG_STS */
+#define WM831X_BKUP_CHG_STS_WIDTH                    1  /* BKUP_CHG_STS */
+#define WM831X_BKUP_CHG_MODE                    0x1000  /* BKUP_CHG_MODE */
+#define WM831X_BKUP_CHG_MODE_MASK               0x1000  /* BKUP_CHG_MODE */
+#define WM831X_BKUP_CHG_MODE_SHIFT                  12  /* BKUP_CHG_MODE */
+#define WM831X_BKUP_CHG_MODE_WIDTH                   1  /* BKUP_CHG_MODE */
+#define WM831X_BKUP_BATT_DET_ENA                0x0800  /* BKUP_BATT_DET_ENA */
+#define WM831X_BKUP_BATT_DET_ENA_MASK           0x0800  /* BKUP_BATT_DET_ENA */
+#define WM831X_BKUP_BATT_DET_ENA_SHIFT              11  /* BKUP_BATT_DET_ENA */
+#define WM831X_BKUP_BATT_DET_ENA_WIDTH               1  /* BKUP_BATT_DET_ENA */
+#define WM831X_BKUP_BATT_STS                    0x0400  /* BKUP_BATT_STS */
+#define WM831X_BKUP_BATT_STS_MASK               0x0400  /* BKUP_BATT_STS */
+#define WM831X_BKUP_BATT_STS_SHIFT                  10  /* BKUP_BATT_STS */
+#define WM831X_BKUP_BATT_STS_WIDTH                   1  /* BKUP_BATT_STS */
+#define WM831X_BKUP_CHG_VLIM                    0x0010  /* BKUP_CHG_VLIM */
+#define WM831X_BKUP_CHG_VLIM_MASK               0x0010  /* BKUP_CHG_VLIM */
+#define WM831X_BKUP_CHG_VLIM_SHIFT                   4  /* BKUP_CHG_VLIM */
+#define WM831X_BKUP_CHG_VLIM_WIDTH                   1  /* BKUP_CHG_VLIM */
+#define WM831X_BKUP_CHG_ILIM_MASK               0x0003  /* BKUP_CHG_ILIM - [1:0] */
+#define WM831X_BKUP_CHG_ILIM_SHIFT                   0  /* BKUP_CHG_ILIM - [1:0] */
+#define WM831X_BKUP_CHG_ILIM_WIDTH                   2  /* BKUP_CHG_ILIM - [1:0] */
+
+#endif
-- 
cgit v1.2.3


From a3710fd1ee8cd542c5de63cf2c39f8912031f867 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 17 Sep 2009 08:50:18 -0400
Subject: ext4: fix tracepoint format string warnings

Unlike on some other architectures ino_t is an unsigned int on s390.
So add an explicit cast to avoid lots of compile warnings:

In file included from include/trace/ftrace.h:285,
                 from include/trace/define_trace.h:61,
                 from include/trace/events/ext4.h:711,
                 from fs/ext4/super.c:50:
include/trace/events/ext4.h: In function 'ftrace_raw_output_ext4_free_inode':
include/trace/events/ext4.h:12: warning: format '%lu' expects type 'long unsigned int', but argument 4 has type 'ino_t'

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/trace/events/ext4.h | 75 ++++++++++++++++++++++++---------------------
 include/trace/events/jbd2.h |  2 +-
 2 files changed, 41 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index dd43399288ea..68b53c7ef8a7 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -33,8 +33,8 @@ TRACE_EVENT(ext4_free_inode,
 	),
 
 	TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode,
-		  __entry->uid, __entry->gid,
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->mode, __entry->uid, __entry->gid,
 		  (unsigned long long) __entry->blocks)
 );
 
@@ -56,7 +56,8 @@ TRACE_EVENT(ext4_request_inode,
 	),
 
 	TP_printk("dev %s dir %lu mode %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->dir, __entry->mode)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir,
+		  __entry->mode)
 );
 
 TRACE_EVENT(ext4_allocate_inode,
@@ -79,7 +80,8 @@ TRACE_EVENT(ext4_allocate_inode,
 	),
 
 	TP_printk("dev %s ino %lu dir %lu mode %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->dir, __entry->mode)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  (unsigned long) __entry->dir, __entry->mode)
 );
 
 TRACE_EVENT(ext4_write_begin,
@@ -106,8 +108,8 @@ TRACE_EVENT(ext4_write_begin,
 	),
 
 	TP_printk("dev %s ino %lu pos %llu len %u flags %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
-		  __entry->flags)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pos, __entry->len, __entry->flags)
 );
 
 TRACE_EVENT(ext4_ordered_write_end,
@@ -133,8 +135,8 @@ TRACE_EVENT(ext4_ordered_write_end,
 	),
 
 	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
-		  __entry->copied)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pos, __entry->len, __entry->copied)
 );
 
 TRACE_EVENT(ext4_writeback_write_end,
@@ -160,8 +162,8 @@ TRACE_EVENT(ext4_writeback_write_end,
 	),
 
 	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
-		  __entry->copied)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pos, __entry->len, __entry->copied)
 );
 
 TRACE_EVENT(ext4_journalled_write_end,
@@ -186,8 +188,8 @@ TRACE_EVENT(ext4_journalled_write_end,
 	),
 
 	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
-		  __entry->copied)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pos, __entry->len, __entry->copied)
 );
 
 TRACE_EVENT(ext4_writepage,
@@ -209,7 +211,8 @@ TRACE_EVENT(ext4_writepage,
 	),
 
 	TP_printk("dev %s ino %lu page_index %lu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->index)
 );
 
 TRACE_EVENT(ext4_da_writepages,
@@ -244,7 +247,8 @@ TRACE_EVENT(ext4_da_writepages,
 	),
 
 	TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write,
+		  jbd2_dev_to_name(__entry->dev),
+		  (unsigned long) __entry->ino, __entry->nr_to_write,
 		  __entry->pages_skipped, __entry->range_start,
 		  __entry->range_end, __entry->nonblocking,
 		  __entry->for_kupdate, __entry->for_reclaim,
@@ -279,7 +283,7 @@ TRACE_EVENT(ext4_da_write_pages,
 	),
 
 	TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino,
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
 		  __entry->b_blocknr, __entry->b_size,
 		  __entry->b_state, __entry->first_page,
 		  __entry->io_done, __entry->pages_written)
@@ -314,7 +318,8 @@ TRACE_EVENT(ext4_da_writepages_result,
 	),
 
 	TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->ret,
+		  jbd2_dev_to_name(__entry->dev),
+		  (unsigned long) __entry->ino, __entry->ret,
 		  __entry->pages_written, __entry->pages_skipped,
 		  __entry->encountered_congestion, __entry->more_io,
 		  __entry->no_nrwrite_index_update)
@@ -343,8 +348,8 @@ TRACE_EVENT(ext4_da_write_begin,
 	),
 
 	TP_printk("dev %s ino %lu pos %llu len %u flags %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
-		  __entry->flags)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pos, __entry->len, __entry->flags)
 );
 
 TRACE_EVENT(ext4_da_write_end,
@@ -370,8 +375,8 @@ TRACE_EVENT(ext4_da_write_end,
 	),
 
 	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
-		  __entry->copied)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pos, __entry->len, __entry->copied)
 );
 
 TRACE_EVENT(ext4_discard_blocks,
@@ -421,8 +426,8 @@ TRACE_EVENT(ext4_mb_new_inode_pa,
 	),
 
 	TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart,
-		  __entry->pa_len, __entry->pa_lstart)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
 );
 
 TRACE_EVENT(ext4_mb_new_group_pa,
@@ -449,8 +454,8 @@ TRACE_EVENT(ext4_mb_new_group_pa,
 	),
 
 	TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart,
-		  __entry->pa_len, __entry->pa_lstart)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
 );
 
 TRACE_EVENT(ext4_mb_release_inode_pa,
@@ -476,8 +481,8 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
 	),
 
 	TP_printk("dev %s ino %lu block %llu count %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block,
-		  __entry->count)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->block, __entry->count)
 );
 
 TRACE_EVENT(ext4_mb_release_group_pa,
@@ -522,7 +527,7 @@ TRACE_EVENT(ext4_discard_preallocations,
 	),
 
 	TP_printk("dev %s ino %lu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
 );
 
 TRACE_EVENT(ext4_mb_discard_preallocations,
@@ -577,8 +582,8 @@ TRACE_EVENT(ext4_request_blocks,
 	),
 
 	TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags,
-		  __entry->len,
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->flags, __entry->len,
 		  (unsigned long long) __entry->logical,
 		  (unsigned long long) __entry->goal,
 		  (unsigned long long) __entry->lleft,
@@ -621,8 +626,8 @@ TRACE_EVENT(ext4_allocate_blocks,
 	),
 
 	TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags,
-		  __entry->len, __entry->block,
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->flags, __entry->len, __entry->block,
 		  (unsigned long long) __entry->logical,
 		  (unsigned long long) __entry->goal,
 		  (unsigned long long) __entry->lleft,
@@ -655,8 +660,8 @@ TRACE_EVENT(ext4_free_blocks,
 	),
 
 	TP_printk("dev %s ino %lu block %llu count %lu metadata %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block,
-		  __entry->count, __entry->metadata)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->block, __entry->count, __entry->metadata)
 );
 
 TRACE_EVENT(ext4_sync_file,
@@ -679,8 +684,8 @@ TRACE_EVENT(ext4_sync_file,
 	),
 
 	TP_printk("dev %s ino %ld parent %ld datasync %d ",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->parent,
-		  __entry->datasync)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  (unsigned long) __entry->parent, __entry->datasync)
 );
 
 TRACE_EVENT(ext4_sync_fs,
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 10813fa0c8d0..b851f0b4701c 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -159,7 +159,7 @@ TRACE_EVENT(jbd2_submit_inode_data,
 	),
 
 	TP_printk("dev %s ino %lu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
 );
 
 #endif /* _TRACE_JBD2_H */
-- 
cgit v1.2.3


From f0fda0a47b26aba986fe65897891956c1792b526 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Thu, 3 Sep 2009 09:33:48 +0800
Subject: drm/kms: add a function that can add the mode for the output device
 without EDID

Add a function that can be used to add the default mode for the output device
without EDID.
It will add the default mode that meets with the requirements of given
hdisplay/vdisplay limit.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_edid.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 include/drm/drm_crtc.h     |  2 ++
 2 files changed, 48 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index f84a98f2e373..e2d5f515f7b2 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1215,3 +1215,49 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 	return num_modes;
 }
 EXPORT_SYMBOL(drm_add_edid_modes);
+
+/**
+ * drm_add_modes_noedid - add modes for the connectors without EDID
+ * @connector: connector we're probing
+ * @hdisplay: the horizontal display limit
+ * @vdisplay: the vertical display limit
+ *
+ * Add the specified modes to the connector's mode list. Only when the
+ * hdisplay/vdisplay is not beyond the given limit, it will be added.
+ *
+ * Return number of modes added or 0 if we couldn't find any.
+ */
+int drm_add_modes_noedid(struct drm_connector *connector,
+			int hdisplay, int vdisplay)
+{
+	int i, count, num_modes = 0;
+	struct drm_display_mode *mode, *ptr;
+	struct drm_device *dev = connector->dev;
+
+	count = sizeof(drm_dmt_modes) / sizeof(struct drm_display_mode);
+	if (hdisplay < 0)
+		hdisplay = 0;
+	if (vdisplay < 0)
+		vdisplay = 0;
+
+	for (i = 0; i < count; i++) {
+		ptr = &drm_dmt_modes[i];
+		if (hdisplay && vdisplay) {
+			/*
+			 * Only when two are valid, they will be used to check
+			 * whether the mode should be added to the mode list of
+			 * the connector.
+			 */
+			if (ptr->hdisplay > hdisplay ||
+					ptr->vdisplay > vdisplay)
+				continue;
+		}
+		mode = drm_mode_duplicate(dev, ptr);
+		if (mode) {
+			drm_mode_probed_add(connector, mode);
+			num_modes++;
+		}
+	}
+	return num_modes;
+}
+EXPORT_SYMBOL(drm_add_modes_noedid);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index b0427a70fcbd..ae1e9e166959 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -750,4 +750,6 @@ extern struct drm_display_mode *drm_cvt_mode(struct drm_device *dev,
 extern struct drm_display_mode *drm_gtf_mode(struct drm_device *dev,
 				int hdisplay, int vdisplay, int vrefresh,
 				bool interlaced, int margins);
+extern int drm_add_modes_noedid(struct drm_connector *connector,
+				int hdisplay, int vdisplay);
 #endif /* __DRM_CRTC_H__ */
-- 
cgit v1.2.3


From c746b5519a88b8803d43946ad06326ece4829116 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 5 Sep 2009 14:09:21 +0100
Subject: leds: Add WM831x status LED driver

The WM831x devices feature two software controlled status LEDs with
hardware assisted blinking.

The device can also autonomously control the LEDs based on a selection
of sources.  This can be configured at boot time using either platform
data or the chip OTP.  A sysfs file in the style of that for triggers
allowing the control source to be configured at run time.  Triggers
can't be used here since they can't depend on the implementation details
of a specific LED type.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/leds/Kconfig              |   7 +
 drivers/leds/Makefile             |   1 +
 drivers/leds/leds-wm831x-status.c | 341 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/status.h |  34 ++++
 4 files changed, 383 insertions(+)
 create mode 100644 drivers/leds/leds-wm831x-status.c
 create mode 100644 include/linux/mfd/wm831x/status.h

(limited to 'include')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 7c8e7122aaa9..edfd4e302be7 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -195,6 +195,13 @@ config LEDS_PCA955X
 	  LED driver chips accessed via the I2C bus.  Supported
 	  devices include PCA9550, PCA9551, PCA9552, and PCA9553.
 
+config LEDS_WM831X_STATUS
+	tristate "LED support for status LEDs on WM831x PMICs"
+	depends on LEDS_CLASS && MFD_WM831X
+	help
+	  This option enables support for the status LEDs of the WM831x
+          series of PMICs.
+
 config LEDS_WM8350
 	tristate "LED Support for WM8350 AudioPlus PMIC"
 	depends on LEDS_CLASS && MFD_WM8350
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index e8cdcf77a4c3..46d72704d606 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_LEDS_HP6XX)		+= leds-hp6xx.o
 obj-$(CONFIG_LEDS_FSG)			+= leds-fsg.o
 obj-$(CONFIG_LEDS_PCA955X)		+= leds-pca955x.o
 obj-$(CONFIG_LEDS_DA903X)		+= leds-da903x.o
+obj-$(CONFIG_LEDS_WM831X_STATUS)	+= leds-wm831x-status.o
 obj-$(CONFIG_LEDS_WM8350)		+= leds-wm8350.o
 obj-$(CONFIG_LEDS_PWM)			+= leds-pwm.o
 
diff --git a/drivers/leds/leds-wm831x-status.c b/drivers/leds/leds-wm831x-status.c
new file mode 100644
index 000000000000..c586d05e336a
--- /dev/null
+++ b/drivers/leds/leds-wm831x-status.c
@@ -0,0 +1,341 @@
+/*
+ * LED driver for WM831x status LEDs
+ *
+ * Copyright(C) 2009 Wolfson Microelectronics PLC.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/leds.h>
+#include <linux/err.h>
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/pdata.h>
+#include <linux/mfd/wm831x/status.h>
+
+
+struct wm831x_status {
+	struct led_classdev cdev;
+	struct wm831x *wm831x;
+	struct work_struct work;
+	struct mutex mutex;
+
+	spinlock_t value_lock;
+	int reg;     /* Control register */
+	int reg_val; /* Control register value */
+
+	int blink;
+	int blink_time;
+	int blink_cyc;
+	int src;
+	enum led_brightness brightness;
+};
+
+#define to_wm831x_status(led_cdev) \
+	container_of(led_cdev, struct wm831x_status, cdev)
+
+static void wm831x_status_work(struct work_struct *work)
+{
+	struct wm831x_status *led = container_of(work, struct wm831x_status,
+						 work);
+	unsigned long flags;
+
+	mutex_lock(&led->mutex);
+
+	led->reg_val &= ~(WM831X_LED_SRC_MASK | WM831X_LED_MODE_MASK |
+			  WM831X_LED_DUTY_CYC_MASK | WM831X_LED_DUR_MASK);
+
+	spin_lock_irqsave(&led->value_lock, flags);
+
+	led->reg_val |= led->src << WM831X_LED_SRC_SHIFT;
+	if (led->blink) {
+		led->reg_val |= 2 << WM831X_LED_MODE_SHIFT;
+		led->reg_val |= led->blink_time << WM831X_LED_DUR_SHIFT;
+		led->reg_val |= led->blink_cyc;
+	} else {
+		if (led->brightness != LED_OFF)
+			led->reg_val |= 1 << WM831X_LED_MODE_SHIFT;
+	}
+
+	spin_unlock_irqrestore(&led->value_lock, flags);
+
+	wm831x_reg_write(led->wm831x, led->reg, led->reg_val);
+
+	mutex_unlock(&led->mutex);
+}
+
+static void wm831x_status_set(struct led_classdev *led_cdev,
+			   enum led_brightness value)
+{
+	struct wm831x_status *led = to_wm831x_status(led_cdev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&led->value_lock, flags);
+	led->brightness = value;
+	if (value == LED_OFF)
+		led->blink = 0;
+	schedule_work(&led->work);
+	spin_unlock_irqrestore(&led->value_lock, flags);
+}
+
+static int wm831x_status_blink_set(struct led_classdev *led_cdev,
+				   unsigned long *delay_on,
+				   unsigned long *delay_off)
+{
+	struct wm831x_status *led = to_wm831x_status(led_cdev);
+	unsigned long flags;
+	int ret = 0;
+
+	/* Pick some defaults if we've not been given times */
+	if (*delay_on == 0 && *delay_off == 0) {
+		*delay_on = 250;
+		*delay_off = 250;
+	}
+
+	spin_lock_irqsave(&led->value_lock, flags);
+
+	/* We only have a limited selection of settings, see if we can
+	 * support the configuration we're being given */
+	switch (*delay_on) {
+	case 1000:
+		led->blink_time = 0;
+		break;
+	case 250:
+		led->blink_time = 1;
+		break;
+	case 125:
+		led->blink_time = 2;
+		break;
+	case 62:
+	case 63:
+		/* Actually 62.5ms */
+		led->blink_time = 3;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret == 0) {
+		switch (*delay_off / *delay_on) {
+		case 1:
+			led->blink_cyc = 0;
+			break;
+		case 3:
+			led->blink_cyc = 1;
+			break;
+		case 4:
+			led->blink_cyc = 2;
+			break;
+		case 8:
+			led->blink_cyc = 3;
+			break;
+		default:
+			ret = -EINVAL;
+			break;
+		}
+	}
+
+	if (ret == 0)
+		led->blink = 1;
+	else
+		led->blink = 0;
+
+	/* Always update; if we fail turn off blinking since we expect
+	 * a software fallback. */
+	schedule_work(&led->work);
+
+	spin_unlock_irqrestore(&led->value_lock, flags);
+
+	return ret;
+}
+
+static const char *led_src_texts[] = {
+	"otp",
+	"power",
+	"charger",
+	"soft",
+};
+
+static ssize_t wm831x_status_src_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+	struct wm831x_status *led = to_wm831x_status(led_cdev);
+	int i;
+	ssize_t ret = 0;
+
+	mutex_lock(&led->mutex);
+
+	for (i = 0; i < ARRAY_SIZE(led_src_texts); i++)
+		if (i == led->src)
+			ret += sprintf(&buf[ret], "[%s] ", led_src_texts[i]);
+		else
+			ret += sprintf(&buf[ret], "%s ", led_src_texts[i]);
+
+	mutex_unlock(&led->mutex);
+
+	ret += sprintf(&buf[ret], "\n");
+
+	return ret;
+}
+
+static ssize_t wm831x_status_src_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t size)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+	struct wm831x_status *led = to_wm831x_status(led_cdev);
+	char name[20];
+	int i;
+	size_t len;
+
+	name[sizeof(name) - 1] = '\0';
+	strncpy(name, buf, sizeof(name) - 1);
+	len = strlen(name);
+
+	if (len && name[len - 1] == '\n')
+		name[len - 1] = '\0';
+
+	for (i = 0; i < ARRAY_SIZE(led_src_texts); i++) {
+		if (!strcmp(name, led_src_texts[i])) {
+			mutex_lock(&led->mutex);
+
+			led->src = i;
+			schedule_work(&led->work);
+
+			mutex_unlock(&led->mutex);
+		}
+	}
+
+	return size;
+}
+
+static DEVICE_ATTR(src, 0644, wm831x_status_src_show, wm831x_status_src_store);
+
+static int wm831x_status_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *chip_pdata;
+	struct wm831x_status_pdata pdata;
+	struct wm831x_status *drvdata;
+	struct resource *res;
+	int id = pdev->id % ARRAY_SIZE(chip_pdata->status);
+	int ret;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "No I/O resource\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	drvdata = kzalloc(sizeof(struct wm831x_status), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+	dev_set_drvdata(&pdev->dev, drvdata);
+
+	drvdata->wm831x = wm831x;
+	drvdata->reg = res->start;
+
+	if (wm831x->dev->platform_data)
+		chip_pdata = wm831x->dev->platform_data;
+	else
+		chip_pdata = NULL;
+
+	memset(&pdata, 0, sizeof(pdata));
+	if (chip_pdata && chip_pdata->status[id])
+		memcpy(&pdata, chip_pdata->status[id], sizeof(pdata));
+	else
+		pdata.name = dev_name(&pdev->dev);
+
+	mutex_init(&drvdata->mutex);
+	INIT_WORK(&drvdata->work, wm831x_status_work);
+	spin_lock_init(&drvdata->value_lock);
+
+	/* We cache the configuration register and read startup values
+	 * from it. */
+	drvdata->reg_val = wm831x_reg_read(wm831x, drvdata->reg);
+
+	if (drvdata->reg_val & WM831X_LED_MODE_MASK)
+		drvdata->brightness = LED_FULL;
+	else
+		drvdata->brightness = LED_OFF;
+
+	/* Set a default source if configured, otherwise leave the
+	 * current hardware setting.
+	 */
+	if (pdata.default_src == WM831X_STATUS_PRESERVE) {
+		drvdata->src = drvdata->reg_val;
+		drvdata->src &= WM831X_LED_SRC_MASK;
+		drvdata->src >>= WM831X_LED_SRC_SHIFT;
+	} else {
+		drvdata->src = pdata.default_src - 1;
+	}
+
+	drvdata->cdev.name = pdata.name;
+	drvdata->cdev.default_trigger = pdata.default_trigger;
+	drvdata->cdev.brightness_set = wm831x_status_set;
+	drvdata->cdev.blink_set = wm831x_status_blink_set;
+
+	ret = led_classdev_register(wm831x->dev, &drvdata->cdev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to register LED: %d\n", ret);
+		goto err_led;
+	}
+
+	ret = device_create_file(drvdata->cdev.dev, &dev_attr_src);
+	if (ret != 0)
+		dev_err(&pdev->dev,
+			"No source control for LED: %d\n", ret);
+
+	return 0;
+
+err_led:
+	led_classdev_unregister(&drvdata->cdev);
+	kfree(drvdata);
+err:
+	return ret;
+}
+
+static int wm831x_status_remove(struct platform_device *pdev)
+{
+	struct wm831x_status *drvdata = platform_get_drvdata(pdev);
+
+	device_remove_file(drvdata->cdev.dev, &dev_attr_src);
+	led_classdev_unregister(&drvdata->cdev);
+	kfree(drvdata);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_status_driver = {
+	.driver = {
+		   .name = "wm831x-status",
+		   .owner = THIS_MODULE,
+		   },
+	.probe = wm831x_status_probe,
+	.remove = wm831x_status_remove,
+};
+
+static int __devinit wm831x_status_init(void)
+{
+	return platform_driver_register(&wm831x_status_driver);
+}
+module_init(wm831x_status_init);
+
+static void wm831x_status_exit(void)
+{
+	platform_driver_unregister(&wm831x_status_driver);
+}
+module_exit(wm831x_status_exit);
+
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_DESCRIPTION("WM831x status LED driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm831x-status");
diff --git a/include/linux/mfd/wm831x/status.h b/include/linux/mfd/wm831x/status.h
new file mode 100644
index 000000000000..6bc090d0e3ac
--- /dev/null
+++ b/include/linux/mfd/wm831x/status.h
@@ -0,0 +1,34 @@
+/*
+ * include/linux/mfd/wm831x/status.h -- Status LEDs for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_STATUS_H__
+#define __MFD_WM831X_STATUS_H__
+
+#define WM831X_LED_SRC_MASK                    0xC000  /* LED_SRC - [15:14] */
+#define WM831X_LED_SRC_SHIFT                       14  /* LED_SRC - [15:14] */
+#define WM831X_LED_SRC_WIDTH                        2  /* LED_SRC - [15:14] */
+#define WM831X_LED_MODE_MASK                   0x0300  /* LED_MODE - [9:8] */
+#define WM831X_LED_MODE_SHIFT                       8  /* LED_MODE - [9:8] */
+#define WM831X_LED_MODE_WIDTH                       2  /* LED_MODE - [9:8] */
+#define WM831X_LED_SEQ_LEN_MASK                0x0030  /* LED_SEQ_LEN - [5:4] */
+#define WM831X_LED_SEQ_LEN_SHIFT                    4  /* LED_SEQ_LEN - [5:4] */
+#define WM831X_LED_SEQ_LEN_WIDTH                    2  /* LED_SEQ_LEN - [5:4] */
+#define WM831X_LED_DUR_MASK                    0x000C  /* LED_DUR - [3:2] */
+#define WM831X_LED_DUR_SHIFT                        2  /* LED_DUR - [3:2] */
+#define WM831X_LED_DUR_WIDTH                        2  /* LED_DUR - [3:2] */
+#define WM831X_LED_DUTY_CYC_MASK               0x0003  /* LED_DUTY_CYC - [1:0] */
+#define WM831X_LED_DUTY_CYC_SHIFT                   0  /* LED_DUTY_CYC - [1:0] */
+#define WM831X_LED_DUTY_CYC_WIDTH                   2  /* LED_DUTY_CYC - [1:0] */
+
+#endif
-- 
cgit v1.2.3


From 5036cc41e07d6614350e329666ee8a79cea6f793 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Thu, 6 Aug 2009 16:07:10 -0700
Subject: backlight: spi driver for LMS283GF05 LCD

ADd support for the SPI part of LMS283GF05 LCD.  The LCD uses SPI for
initialization and powerdown sequences.  No further defails are specified
in the datasheet about the initialization/powerdown sequence, just the
magic numbers that have to be sent over SPI bus.  This LCD can be found in
the Aeronix Zipit Z2 handheld.

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/Kconfig      |   7 +
 drivers/video/backlight/Makefile     |   1 +
 drivers/video/backlight/lms283gf05.c | 242 +++++++++++++++++++++++++++++++++++
 include/linux/spi/lms283gf05.h       |  28 ++++
 4 files changed, 278 insertions(+)
 create mode 100644 drivers/video/backlight/lms283gf05.c
 create mode 100644 include/linux/spi/lms283gf05.h

(limited to 'include')

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index f86dbfd209ad..c0d4a536ea87 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -31,6 +31,13 @@ config LCD_CORGI
 	  Say y here to support the LCD panels usually found on SHARP
 	  corgi (C7x0) and spitz (Cxx00) models.
 
+config LCD_LMS283GF05
+	tristate "Samsung LMS283GF05 LCD"
+	depends on LCD_CLASS_DEVICE && SPI_MASTER && GENERIC_GPIO
+	help
+	  SPI driver for Samsung LMS283GF05. This provides basic support
+	  for powering the LCD up/down through a sysfs interface.
+
 config LCD_LTV350QV
 	tristate "Samsung LTV350QV LCD Panel"
 	depends on LCD_CLASS_DEVICE && SPI_MASTER
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index df0b67ce88b6..0abc014215ba 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -3,6 +3,7 @@
 obj-$(CONFIG_LCD_CLASS_DEVICE)     += lcd.o
 obj-$(CONFIG_LCD_CORGI)		   += corgi_lcd.o
 obj-$(CONFIG_LCD_HP700)		   += jornada720_lcd.o
+obj-$(CONFIG_LCD_LMS283GF05)	   += lms283gf05.o
 obj-$(CONFIG_LCD_LTV350QV)	   += ltv350qv.o
 obj-$(CONFIG_LCD_ILI9320)	   += ili9320.o
 obj-$(CONFIG_LCD_PLATFORM)	   += platform_lcd.o
diff --git a/drivers/video/backlight/lms283gf05.c b/drivers/video/backlight/lms283gf05.c
new file mode 100644
index 000000000000..447b542a20ca
--- /dev/null
+++ b/drivers/video/backlight/lms283gf05.c
@@ -0,0 +1,242 @@
+/*
+ * lms283gf05.c -- support for Samsung LMS283GF05 LCD
+ *
+ * Copyright (c) 2009 Marek Vasut <marek.vasut@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/lcd.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/lms283gf05.h>
+
+struct lms283gf05_state {
+	struct spi_device	*spi;
+	struct lcd_device	*ld;
+};
+
+struct lms283gf05_seq {
+	unsigned char		reg;
+	unsigned short		value;
+	unsigned char		delay;
+};
+
+/* Magic sequences supplied by manufacturer, for details refer to datasheet */
+static struct lms283gf05_seq disp_initseq[] = {
+	/* REG, VALUE, DELAY */
+	{ 0x07, 0x0000, 0 },
+	{ 0x13, 0x0000, 10 },
+
+	{ 0x11, 0x3004, 0 },
+	{ 0x14, 0x200F, 0 },
+	{ 0x10, 0x1a20, 0 },
+	{ 0x13, 0x0040, 50 },
+
+	{ 0x13, 0x0060, 0 },
+	{ 0x13, 0x0070, 200 },
+
+	{ 0x01, 0x0127, 0 },
+	{ 0x02,	0x0700, 0 },
+	{ 0x03, 0x1030, 0 },
+	{ 0x08, 0x0208, 0 },
+	{ 0x0B, 0x0620, 0 },
+	{ 0x0C, 0x0110, 0 },
+	{ 0x30, 0x0120, 0 },
+	{ 0x31, 0x0127, 0 },
+	{ 0x32, 0x0000, 0 },
+	{ 0x33, 0x0503, 0 },
+	{ 0x34, 0x0727, 0 },
+	{ 0x35, 0x0124, 0 },
+	{ 0x36, 0x0706, 0 },
+	{ 0x37, 0x0701, 0 },
+	{ 0x38, 0x0F00, 0 },
+	{ 0x39, 0x0F00, 0 },
+	{ 0x40, 0x0000, 0 },
+	{ 0x41, 0x0000, 0 },
+	{ 0x42, 0x013f, 0 },
+	{ 0x43, 0x0000, 0 },
+	{ 0x44, 0x013f, 0 },
+	{ 0x45, 0x0000, 0 },
+	{ 0x46, 0xef00, 0 },
+	{ 0x47, 0x013f, 0 },
+	{ 0x48, 0x0000, 0 },
+	{ 0x07, 0x0015, 30 },
+
+	{ 0x07, 0x0017, 0 },
+
+	{ 0x20, 0x0000, 0 },
+	{ 0x21, 0x0000, 0 },
+	{ 0x22, 0x0000, 0 }
+};
+
+static struct lms283gf05_seq disp_pdwnseq[] = {
+	{ 0x07, 0x0016, 30 },
+
+	{ 0x07, 0x0004, 0 },
+	{ 0x10, 0x0220, 20 },
+
+	{ 0x13, 0x0060, 50 },
+
+	{ 0x13, 0x0040, 50 },
+
+	{ 0x13, 0x0000, 0 },
+	{ 0x10, 0x0000, 0 }
+};
+
+
+static void lms283gf05_reset(unsigned long gpio, bool inverted)
+{
+	gpio_set_value(gpio, !inverted);
+	mdelay(100);
+	gpio_set_value(gpio, inverted);
+	mdelay(20);
+	gpio_set_value(gpio, !inverted);
+	mdelay(20);
+}
+
+static void lms283gf05_toggle(struct spi_device *spi,
+			struct lms283gf05_seq *seq, int sz)
+{
+	char buf[3];
+	int i;
+
+	for (i = 0; i < sz; i++) {
+		buf[0] = 0x74;
+		buf[1] = 0x00;
+		buf[2] = seq[i].reg;
+		spi_write(spi, buf, 3);
+
+		buf[0] = 0x76;
+		buf[1] = seq[i].value >> 8;
+		buf[2] = seq[i].value & 0xff;
+		spi_write(spi, buf, 3);
+
+		mdelay(seq[i].delay);
+	}
+}
+
+static int lms283gf05_power_set(struct lcd_device *ld, int power)
+{
+	struct lms283gf05_state *st = lcd_get_data(ld);
+	struct spi_device *spi = st->spi;
+	struct lms283gf05_pdata *pdata = spi->dev.platform_data;
+
+	if (power) {
+		if (pdata)
+			lms283gf05_reset(pdata->reset_gpio,
+					pdata->reset_inverted);
+		lms283gf05_toggle(spi, disp_initseq, ARRAY_SIZE(disp_initseq));
+	} else {
+		lms283gf05_toggle(spi, disp_pdwnseq, ARRAY_SIZE(disp_pdwnseq));
+		if (pdata)
+			gpio_set_value(pdata->reset_gpio,
+					pdata->reset_inverted);
+	}
+
+	return 0;
+}
+
+static struct lcd_ops lms_ops = {
+	.set_power	= lms283gf05_power_set,
+	.get_power	= NULL,
+};
+
+static int __devinit lms283gf05_probe(struct spi_device *spi)
+{
+	struct lms283gf05_state *st;
+	struct lms283gf05_pdata *pdata = spi->dev.platform_data;
+	struct lcd_device *ld;
+	int ret = 0;
+
+	if (pdata != NULL) {
+		ret = gpio_request(pdata->reset_gpio, "LMS285GF05 RESET");
+		if (ret)
+			return ret;
+
+		ret = gpio_direction_output(pdata->reset_gpio,
+						!pdata->reset_inverted);
+		if (ret)
+			goto err;
+	}
+
+	st = kzalloc(sizeof(struct lms283gf05_state), GFP_KERNEL);
+	if (st == NULL) {
+		dev_err(&spi->dev, "No memory for device state\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ld = lcd_device_register("lms283gf05", &spi->dev, st, &lms_ops);
+	if (IS_ERR(ld)) {
+		ret = PTR_ERR(ld);
+		goto err2;
+	}
+
+	st->spi = spi;
+	st->ld = ld;
+
+	dev_set_drvdata(&spi->dev, st);
+
+	/* kick in the LCD */
+	if (pdata)
+		lms283gf05_reset(pdata->reset_gpio, pdata->reset_inverted);
+	lms283gf05_toggle(spi, disp_initseq, ARRAY_SIZE(disp_initseq));
+
+	return 0;
+
+err2:
+	kfree(st);
+err:
+	if (pdata != NULL)
+		gpio_free(pdata->reset_gpio);
+
+	return ret;
+}
+
+static int __devexit lms283gf05_remove(struct spi_device *spi)
+{
+	struct lms283gf05_state *st = dev_get_drvdata(&spi->dev);
+	struct lms283gf05_pdata *pdata = st->spi->dev.platform_data;
+
+	lcd_device_unregister(st->ld);
+
+	if (pdata != NULL)
+		gpio_free(pdata->reset_gpio);
+
+	kfree(st);
+
+	return 0;
+}
+
+static struct spi_driver lms283gf05_driver = {
+	.driver = {
+		.name	= "lms283gf05",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= lms283gf05_probe,
+	.remove		= __devexit_p(lms283gf05_remove),
+};
+
+static __init int lms283gf05_init(void)
+{
+	return spi_register_driver(&lms283gf05_driver);
+}
+
+static __exit void lms283gf05_exit(void)
+{
+	spi_unregister_driver(&lms283gf05_driver);
+}
+
+module_init(lms283gf05_init);
+module_exit(lms283gf05_exit);
+
+MODULE_AUTHOR("Marek Vasut <marek.vasut@gmail.com>");
+MODULE_DESCRIPTION("LCD283GF05 LCD");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/spi/lms283gf05.h b/include/linux/spi/lms283gf05.h
new file mode 100644
index 000000000000..555d254e6606
--- /dev/null
+++ b/include/linux/spi/lms283gf05.h
@@ -0,0 +1,28 @@
+/*
+ * lms283gf05.h - Platform glue for Samsung LMS283GF05 LCD
+ *
+ * Copyright (C) 2009 Marek Vasut <marek.vasut@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifndef _INCLUDE_LINUX_SPI_LMS283GF05_H_
+#define _INCLUDE_LINUX_SPI_LMS283GF05_H_
+
+struct lms283gf05_pdata {
+	unsigned long	reset_gpio;
+	bool		reset_inverted;
+};
+
+#endif /* _INCLUDE_LINUX_SPI_LMS283GF05_H_ */
-- 
cgit v1.2.3


From f1938cd6e900a85de64184e46d841efc9efd3484 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 8 Sep 2009 11:32:08 +1000
Subject: drm: include seq_file.h for debugfs builds.

Fixes a warning seen on powerpc.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_mm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index bc5a87e8aeea..62329f9a42cb 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -37,6 +37,9 @@
  * Generic range manager structs
  */
 #include <linux/list.h>
+#ifdef CONFIG_DEBUG_FS
+#include <linux/seq_file.h>
+#endif
 
 struct drm_mm_node {
 	struct list_head fl_entry;
-- 
cgit v1.2.3


From 0403e3827788d878163f9ef0541b748b0f88ca5d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:42:50 -0700
Subject: dmaengine: add fence support

Some engines optimize operation by reading ahead in the descriptor chain
such that descriptor2 may start execution before descriptor1 completes.
If descriptor2 depends on the result from descriptor1 then a fence is
required (on descriptor2) to disable this optimization.  The async_tx
api could implicitly identify dependencies via the 'depend_tx'
parameter, but that would constrain cases where the dependency chain
only specifies a completion order rather than a data dependency.  So,
provide an ASYNC_TX_FENCE to explicitly identify data dependencies.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_memcpy.c      |  7 ++++--
 crypto/async_tx/async_memset.c      |  7 ++++--
 crypto/async_tx/async_pq.c          |  5 ++++
 crypto/async_tx/async_raid6_recov.c | 47 +++++++++++++++++++++----------------
 crypto/async_tx/async_xor.c         | 11 ++++++---
 drivers/md/raid5.c                  | 37 ++++++++++++++++++-----------
 include/linux/async_tx.h            |  3 +++
 include/linux/dmaengine.h           |  3 +++
 8 files changed, 79 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 98e15bd0dcb5..b38cbb3fd527 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -52,9 +52,12 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 
 	if (device) {
 		dma_addr_t dma_dest, dma_src;
-		unsigned long dma_prep_flags;
+		unsigned long dma_prep_flags = 0;
 
-		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+		if (submit->cb_fn)
+			dma_prep_flags |= DMA_PREP_INTERRUPT;
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_prep_flags |= DMA_PREP_FENCE;
 		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
 					DMA_FROM_DEVICE);
 
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index b896a6e5f673..a374784e3329 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -49,9 +49,12 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len,
 
 	if (device) {
 		dma_addr_t dma_dest;
-		unsigned long dma_prep_flags;
+		unsigned long dma_prep_flags = 0;
 
-		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+		if (submit->cb_fn)
+			dma_prep_flags |= DMA_PREP_INTERRUPT;
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_prep_flags |= DMA_PREP_FENCE;
 		dma_dest = dma_map_page(device->dev, dest, offset, len,
 					DMA_FROM_DEVICE);
 
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 108b21efb499..a25e290c39fb 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -101,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
 		 */
 		if (src_cnt > pq_src_cnt) {
 			submit->flags &= ~ASYNC_TX_ACK;
+			submit->flags |= ASYNC_TX_FENCE;
 			dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
 			submit->cb_fn = NULL;
 			submit->cb_param = NULL;
@@ -111,6 +112,8 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
 			if (cb_fn_orig)
 				dma_flags |= DMA_PREP_INTERRUPT;
 		}
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 
 		/* Since we have clobbered the src_list we are committed
 		 * to doing this asynchronously.  Drivers force forward
@@ -282,6 +285,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
 			dma_flags |= DMA_PREP_PQ_DISABLE_P;
 		if (!Q(blocks, disks))
 			dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 		for (i = 0; i < disks; i++)
 			if (likely(blocks[i])) {
 				BUG_ON(is_raid6_zero_block(blocks[i]));
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index 0c14d48c9896..822a42d10061 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -44,6 +44,8 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
 		struct dma_async_tx_descriptor *tx;
 		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
 		dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
 		dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
@@ -89,6 +91,8 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
 		struct dma_async_tx_descriptor *tx;
 		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
 		dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
 		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
@@ -138,7 +142,7 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
 	srcs[1] = q;
 	coef[0] = raid6_gfexi[failb-faila];
 	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_sum_product(b, srcs, coef, bytes, submit);
 
 	/* Dy = P+Pxy+Dx */
@@ -188,23 +192,23 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
 	dp = blocks[faila];
 	dq = blocks[failb];
 
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_memcpy(dp, g, 0, 0, bytes, submit);
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
 
 	/* compute P + Pxy */
 	srcs[0] = dp;
 	srcs[1] = p;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
 
 	/* compute Q + Qxy */
 	srcs[0] = dq;
 	srcs[1] = q;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
 
 	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
@@ -212,7 +216,7 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
 	srcs[1] = dq;
 	coef[0] = raid6_gfexi[failb-faila];
 	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_sum_product(dq, srcs, coef, bytes, submit);
 
 	/* Dy = P+Pxy+Dx */
@@ -252,7 +256,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
 	blocks[failb] = (void *)raid6_empty_zero_page;
 	blocks[disks-1] = dq;
 
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
 
 	/* Restore pointer table */
@@ -264,15 +268,15 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
 	/* compute P + Pxy */
 	srcs[0] = dp;
 	srcs[1] = p;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
 
 	/* compute Q + Qxy */
 	srcs[0] = dq;
 	srcs[1] = q;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
 
 	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
@@ -280,7 +284,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
 	srcs[1] = dq;
 	coef[0] = raid6_gfexi[failb-faila];
 	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_sum_product(dq, srcs, coef, bytes, submit);
 
 	/* Dy = P+Pxy+Dx */
@@ -407,13 +411,16 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
 		int good = faila == 0 ? 1 : 0;
 		struct page *g = blocks[good];
 
-		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+				  scribble);
 		tx = async_memcpy(p, g, 0, 0, bytes, submit);
 
-		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+				  scribble);
 		tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
 	} else {
-		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+				  scribble);
 		tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
 	}
 
@@ -426,11 +433,11 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
 
 	srcs[0] = dq;
 	srcs[1] = q;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
 
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_mult(dq, dq, coef, bytes, submit);
 
 	srcs[0] = p;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 56b5f98da463..db279872ef3d 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -69,6 +69,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 		 */
 		if (src_cnt > xor_src_cnt) {
 			submit->flags &= ~ASYNC_TX_ACK;
+			submit->flags |= ASYNC_TX_FENCE;
 			dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
 			submit->cb_fn = NULL;
 			submit->cb_param = NULL;
@@ -78,7 +79,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 		}
 		if (submit->cb_fn)
 			dma_flags |= DMA_PREP_INTERRUPT;
-
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 		/* Since we have clobbered the src_list we are committed
 		 * to doing this asynchronously.  Drivers force forward progress
 		 * in case they can not provide a descriptor
@@ -264,12 +266,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 		dma_src = (dma_addr_t *) src_list;
 
 	if (dma_src && device && src_cnt <= device->max_xor) {
-		unsigned long dma_prep_flags;
+		unsigned long dma_prep_flags = 0;
 		int i;
 
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
 
-		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+		if (submit->cb_fn)
+			dma_prep_flags |= DMA_PREP_INTERRUPT;
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_prep_flags |= DMA_PREP_FENCE;
 		for (i = 0; i < src_cnt; i++)
 			dma_src[i] = dma_map_page(device->dev, src_list[i],
 						  offset, len, DMA_TO_DEVICE);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0a5cf2171214..54ef8d75541d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -502,13 +502,17 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
 	int i;
 	int page_offset;
 	struct async_submit_ctl submit;
+	enum async_tx_flags flags = 0;
 
 	if (bio->bi_sector >= sector)
 		page_offset = (signed)(bio->bi_sector - sector) * 512;
 	else
 		page_offset = (signed)(sector - bio->bi_sector) * -512;
 
-	init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
+	if (frombio)
+		flags |= ASYNC_TX_FENCE;
+	init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
+
 	bio_for_each_segment(bvl, bio, i) {
 		int len = bio_iovec_idx(bio, i)->bv_len;
 		int clen;
@@ -685,7 +689,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
 
 	atomic_inc(&sh->count);
 
-	init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
 			  ops_complete_compute, sh, to_addr_conv(sh, percpu));
 	if (unlikely(count == 1))
 		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
@@ -763,7 +767,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
 		count = set_syndrome_sources(blocks, sh);
 		blocks[count] = NULL; /* regenerating p is not necessary */
 		BUG_ON(blocks[count+1] != dest); /* q should already be set */
-		init_async_submit(&submit, 0, NULL, ops_complete_compute, sh,
+		init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+				  ops_complete_compute, sh,
 				  to_addr_conv(sh, percpu));
 		tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
 	} else {
@@ -775,8 +780,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
 			blocks[count++] = sh->dev[i].page;
 		}
 
-		init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
-				  ops_complete_compute, sh,
+		init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+				  NULL, ops_complete_compute, sh,
 				  to_addr_conv(sh, percpu));
 		tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
 	}
@@ -837,8 +842,9 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
 		/* Q disk is one of the missing disks */
 		if (faila == syndrome_disks) {
 			/* Missing P+Q, just recompute */
-			init_async_submit(&submit, 0, NULL, ops_complete_compute,
-					  sh, to_addr_conv(sh, percpu));
+			init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+					  ops_complete_compute, sh,
+					  to_addr_conv(sh, percpu));
 			return async_gen_syndrome(blocks, 0, count+2,
 						  STRIPE_SIZE, &submit);
 		} else {
@@ -859,21 +865,24 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
 				blocks[count++] = sh->dev[i].page;
 			}
 			dest = sh->dev[data_target].page;
-			init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
-					  NULL, NULL, to_addr_conv(sh, percpu));
+			init_async_submit(&submit,
+					  ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+					  NULL, NULL, NULL,
+					  to_addr_conv(sh, percpu));
 			tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
 				       &submit);
 
 			count = set_syndrome_sources(blocks, sh);
-			init_async_submit(&submit, 0, tx, ops_complete_compute,
-					  sh, to_addr_conv(sh, percpu));
+			init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+					  ops_complete_compute, sh,
+					  to_addr_conv(sh, percpu));
 			return async_gen_syndrome(blocks, 0, count+2,
 						  STRIPE_SIZE, &submit);
 		}
 	}
 
-	init_async_submit(&submit, 0, NULL, ops_complete_compute, sh,
-			  to_addr_conv(sh, percpu));
+	init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute,
+			  sh, to_addr_conv(sh, percpu));
 	if (failb == syndrome_disks) {
 		/* We're missing D+P. */
 		return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE,
@@ -916,7 +925,7 @@ ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
 			xor_srcs[count++] = dev->page;
 	}
 
-	init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx,
+	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
 			  ops_complete_prexor, sh, to_addr_conv(sh, percpu));
 	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 866e61c4e2e0..a1c486a88e88 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -58,11 +58,14 @@ struct dma_chan_ref {
  * array.
  * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
  * dependency chain
+ * @ASYNC_TX_FENCE: specify that the next operation in the dependency
+ * chain uses this operation's result as an input
  */
 enum async_tx_flags {
 	ASYNC_TX_XOR_ZERO_DST	 = (1 << 0),
 	ASYNC_TX_XOR_DROP_DST	 = (1 << 1),
 	ASYNC_TX_ACK		 = (1 << 2),
+	ASYNC_TX_FENCE		 = (1 << 3),
 };
 
 /**
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 1012f1abcb54..4d6c1c925fd4 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -87,6 +87,8 @@ enum dma_transaction_type {
  * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
  *  sources that were the result of a previous operation, in the case of a PQ
  *  operation it continues the calculation with new sources
+ * @DMA_PREP_FENCE - tell the driver that subsequent operations depend
+ *  on the result of this operation
  */
 enum dma_ctrl_flags {
 	DMA_PREP_INTERRUPT = (1 << 0),
@@ -98,6 +100,7 @@ enum dma_ctrl_flags {
 	DMA_PREP_PQ_DISABLE_P = (1 << 6),
 	DMA_PREP_PQ_DISABLE_Q = (1 << 7),
 	DMA_PREP_CONTINUE = (1 << 8),
+	DMA_PREP_FENCE = (1 << 9),
 };
 
 /**
-- 
cgit v1.2.3


From 138f4c359d23d2ec38d18bd70dd9613ae515fe93 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:42:51 -0700
Subject: dmaengine, async_tx: add a "no channel switch" allocator

Channel switching is problematic for some dmaengine drivers as the
architecture precludes separating the ->prep from ->submit.  In these
cases the driver can select ASYNC_TX_DISABLE_CHANNEL_SWITCH to modify
the async_tx allocator to only return channels that support all of the
required asynchronous operations.

For example MD_RAID456=y selects support for asynchronous xor, xor
validate, pq, pq validate, and memcpy.  When
ASYNC_TX_DISABLE_CHANNEL_SWITCH=y any channel with all these
capabilities is marked DMA_ASYNC_TX allowing async_tx_find_channel() to
quickly locate compatible channels with the guarantee that dependency
chains will remain on one channel.  When
ASYNC_TX_DISABLE_CHANNEL_SWITCH=n async_tx_find_channel() may select
channels that lead to operation chains that need to cross channel
boundaries using the async_tx channel switch capability.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_tx.c |  4 ++++
 drivers/dma/Kconfig        |  4 ++++
 drivers/dma/dmaengine.c    | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/dmaengine.h  | 10 +++++++++-
 4 files changed, 57 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 60615fedcf5e..f9cdf04fe7c0 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -81,6 +81,10 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 	struct dma_device *device = chan->device;
 	struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
 
+	#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+	BUG();
+	#endif
+
 	/* first check to see if we can still append to depend_tx */
 	spin_lock_bh(&depend_tx->lock);
 	if (depend_tx->parent && depend_tx->chan == tx->chan) {
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 912a51b5cbd3..ddcd9793b25c 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -17,11 +17,15 @@ if DMADEVICES
 
 comment "DMA Devices"
 
+config ASYNC_TX_DISABLE_CHANNEL_SWITCH
+	bool
+
 config INTEL_IOATDMA
 	tristate "Intel I/OAT DMA support"
 	depends on PCI && X86
 	select DMA_ENGINE
 	select DCA
+	select ASYNC_TX_DISABLE_CHANNEL_SWITCH
 	help
 	  Enable support for the Intel(R) I/OAT DMA engine present
 	  in recent Intel Xeon chipsets.
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 96598479eece..d5bc628d207c 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -608,6 +608,40 @@ void dmaengine_put(void)
 }
 EXPORT_SYMBOL(dmaengine_put);
 
+static bool device_has_all_tx_types(struct dma_device *device)
+{
+	/* A device that satisfies this test has channels that will never cause
+	 * an async_tx channel switch event as all possible operation types can
+	 * be handled.
+	 */
+	#ifdef CONFIG_ASYNC_TX_DMA
+	if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
+	if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
+	if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
+	if (!dma_has_cap(DMA_XOR, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
+	if (!dma_has_cap(DMA_PQ, device->cap_mask))
+		return false;
+	#endif
+
+	return true;
+}
+
 static int get_dma_id(struct dma_device *device)
 {
 	int rc;
@@ -665,6 +699,12 @@ int dma_async_device_register(struct dma_device *device)
 	BUG_ON(!device->device_issue_pending);
 	BUG_ON(!device->dev);
 
+	/* note: this only matters in the
+	 * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
+	 */
+	if (device_has_all_tx_types(device))
+		dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
+
 	idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
 	if (!idr_ref)
 		return -ENOMEM;
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 4d6c1c925fd4..86853ed7970b 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -48,6 +48,9 @@ enum dma_status {
 
 /**
  * enum dma_transaction_type - DMA transaction types/indexes
+ *
+ * Note: The DMA_ASYNC_TX capability is not to be set by drivers.  It is
+ * automatically set as dma devices are registered.
  */
 enum dma_transaction_type {
 	DMA_MEMCPY,
@@ -61,6 +64,7 @@ enum dma_transaction_type {
 	DMA_MEMCPY_CRC32C,
 	DMA_INTERRUPT,
 	DMA_PRIVATE,
+	DMA_ASYNC_TX,
 	DMA_SLAVE,
 };
 
@@ -396,7 +400,11 @@ static inline void net_dmaengine_put(void)
 #ifdef CONFIG_ASYNC_TX_DMA
 #define async_dmaengine_get()	dmaengine_get()
 #define async_dmaengine_put()	dmaengine_put()
+#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX)
+#else
 #define async_dma_find_channel(type) dma_find_channel(type)
+#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */
 #else
 static inline void async_dmaengine_get(void)
 {
@@ -409,7 +417,7 @@ async_dma_find_channel(enum dma_transaction_type type)
 {
 	return NULL;
 }
-#endif
+#endif /* CONFIG_ASYNC_TX_DMA */
 
 dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
 	void *dest, void *src, size_t len);
-- 
cgit v1.2.3


From 9308add6ea4fedeba37b0d7c4630a542bd34f214 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:42:52 -0700
Subject: dmaengine: cleanup unused transaction types

No drivers currently implement these operation types, so they can be
deleted.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/arm/mach-iop13xx/setup.c | 7 -------
 arch/arm/plat-iop/adma.c      | 2 --
 drivers/dma/iop-adma.c        | 5 +----
 include/linux/dmaengine.h     | 3 ---
 4 files changed, 1 insertion(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index faaef95342b6..5c147fb66a01 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -477,10 +477,8 @@ void __init iop13xx_platform_init(void)
 			plat_data = &iop13xx_adma_0_data;
 			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR, plat_data->cap_mask);
-			dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
 			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
 			break;
 		case IOP13XX_INIT_ADMA_1:
@@ -489,10 +487,8 @@ void __init iop13xx_platform_init(void)
 			plat_data = &iop13xx_adma_1_data;
 			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR, plat_data->cap_mask);
-			dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
 			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
 			break;
 		case IOP13XX_INIT_ADMA_2:
@@ -501,13 +497,10 @@ void __init iop13xx_platform_init(void)
 			plat_data = &iop13xx_adma_2_data;
 			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR, plat_data->cap_mask);
-			dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
 			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ, plat_data->cap_mask);
-			dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
 			break;
 		}
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
index da1dd0dab07c..1ff6a37e893c 100644
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -179,7 +179,6 @@ static int __init iop3xx_adma_cap_init(void)
 	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
 	#else
 	dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
-	dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
 	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
 	#endif
 
@@ -188,7 +187,6 @@ static int __init iop3xx_adma_cap_init(void)
 	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
 	#else
 	dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
-	dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
 	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
 	#endif
 
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 4496bc606662..cecb6d657d55 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -1256,15 +1256,12 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
 	}
 
 	dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
-	  "( %s%s%s%s%s%s%s%s%s%s)\n",
+	  "( %s%s%s%s%s%s%s)\n",
 	  dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
-	  dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
 	  dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
 	  dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
-	  dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
 	  dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
 	  dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)  ? "fill " : "",
-	  dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
 	  dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
 	  dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 86853ed7970b..db23fd583f98 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -56,12 +56,9 @@ enum dma_transaction_type {
 	DMA_MEMCPY,
 	DMA_XOR,
 	DMA_PQ,
-	DMA_DUAL_XOR,
-	DMA_PQ_UPDATE,
 	DMA_XOR_VAL,
 	DMA_PQ_VAL,
 	DMA_MEMSET,
-	DMA_MEMCPY_CRC32C,
 	DMA_INTERRUPT,
 	DMA_PRIVATE,
 	DMA_ASYNC_TX,
-- 
cgit v1.2.3


From 83544ae9f3991bfc7d5e0fe9a3008cd05a8d57b7 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:42:53 -0700
Subject: dmaengine, async_tx: support alignment checks

Some engines have transfer size and address alignment restrictions.  Add
a per-operation alignment property to struct dma_device that the async
routines and dmatest can use to check alignment capabilities.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_memcpy.c |  2 +-
 crypto/async_tx/async_memset.c |  2 +-
 crypto/async_tx/async_pq.c     |  6 ++++--
 crypto/async_tx/async_xor.c    |  5 +++--
 drivers/dma/dmatest.c          | 14 ++++++++++++++
 include/linux/dmaengine.h      | 44 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 67 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index b38cbb3fd527..0ec1fb69d4ea 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -50,7 +50,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
-	if (device) {
+	if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
 		dma_addr_t dma_dest, dma_src;
 		unsigned long dma_prep_flags = 0;
 
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index a374784e3329..58e4a8752aee 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -47,7 +47,7 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len,
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
-	if (device) {
+	if (device && is_dma_fill_aligned(device, offset, 0, len)) {
 		dma_addr_t dma_dest;
 		unsigned long dma_prep_flags = 0;
 
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index a25e290c39fb..b88db6d1dc65 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -211,7 +211,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
 
 	if (dma_src && device &&
 	    (src_cnt <= dma_maxpq(device, 0) ||
-	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) {
+	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
+	    is_dma_pq_aligned(device, offset, 0, len)) {
 		/* run the p+q asynchronously */
 		pr_debug("%s: (async) disks: %d len: %zu\n",
 			 __func__, disks, len);
@@ -274,7 +275,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
 	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
 		dma_src = (dma_addr_t *) blocks;
 
-	if (dma_src && device && disks <= dma_maxpq(device, 0)) {
+	if (dma_src && device && disks <= dma_maxpq(device, 0) &&
+	    is_dma_pq_aligned(device, offset, 0, len)) {
 		struct device *dev = device->dev;
 		dma_addr_t *pq = &dma_src[disks-2];
 		int i;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index db279872ef3d..b459a9034aac 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -193,7 +193,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
 		dma_src = (dma_addr_t *) src_list;
 
-	if (dma_src && chan) {
+	if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
 		/* run the xor asynchronously */
 		pr_debug("%s (async): len: %zu\n", __func__, len);
 
@@ -265,7 +265,8 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
 		dma_src = (dma_addr_t *) src_list;
 
-	if (dma_src && device && src_cnt <= device->max_xor) {
+	if (dma_src && device && src_cnt <= device->max_xor &&
+	    is_dma_xor_aligned(device, offset, 0, len)) {
 		unsigned long dma_prep_flags = 0;
 		int i;
 
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 58e49e41c7a3..a3722a7384b5 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -288,6 +288,7 @@ static int dmatest_func(void *data)
 		dma_addr_t dma_dsts[dst_cnt];
 		struct completion cmp;
 		unsigned long tmo = msecs_to_jiffies(3000);
+		u8 align = 0;
 
 		total_tests++;
 
@@ -295,6 +296,18 @@ static int dmatest_func(void *data)
 		src_off = dmatest_random() % (test_buf_size - len + 1);
 		dst_off = dmatest_random() % (test_buf_size - len + 1);
 
+		/* honor alignment restrictions */
+		if (thread->type == DMA_MEMCPY)
+			align = dev->copy_align;
+		else if (thread->type == DMA_XOR)
+			align = dev->xor_align;
+		else if (thread->type == DMA_PQ)
+			align = dev->pq_align;
+
+		len = (len >> align) << align;
+		src_off = (src_off >> align) << align;
+		dst_off = (dst_off >> align) << align;
+
 		dmatest_init_srcs(thread->srcs, src_off, len);
 		dmatest_init_dsts(thread->dsts, dst_off, len);
 
@@ -311,6 +324,7 @@ static int dmatest_func(void *data)
 						     DMA_BIDIRECTIONAL);
 		}
 
+
 		if (thread->type == DMA_MEMCPY)
 			tx = dev->device_prep_dma_memcpy(chan,
 							 dma_dsts[0] + dst_off,
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index db23fd583f98..835b9c7bf1c2 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -245,6 +245,10 @@ struct dma_async_tx_descriptor {
  * @cap_mask: one or more dma_capability flags
  * @max_xor: maximum number of xor sources, 0 if no capability
  * @max_pq: maximum number of PQ sources and PQ-continue capability
+ * @copy_align: alignment shift for memcpy operations
+ * @xor_align: alignment shift for xor operations
+ * @pq_align: alignment shift for pq operations
+ * @fill_align: alignment shift for memset operations
  * @dev_id: unique device ID
  * @dev: struct device reference for dma mapping api
  * @device_alloc_chan_resources: allocate resources and return the
@@ -271,6 +275,10 @@ struct dma_device {
 	dma_cap_mask_t  cap_mask;
 	unsigned short max_xor;
 	unsigned short max_pq;
+	u8 copy_align;
+	u8 xor_align;
+	u8 pq_align;
+	u8 fill_align;
 	#define DMA_HAS_PQ_CONTINUE (1 << 15)
 
 	int dev_id;
@@ -314,6 +322,42 @@ struct dma_device {
 	void (*device_issue_pending)(struct dma_chan *chan);
 };
 
+static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
+{
+	size_t mask;
+
+	if (!align)
+		return true;
+	mask = (1 << align) - 1;
+	if (mask & (off1 | off2 | len))
+		return false;
+	return true;
+}
+
+static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1,
+				       size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->copy_align, off1, off2, len);
+}
+
+static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1,
+				      size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->xor_align, off1, off2, len);
+}
+
+static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
+				     size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->pq_align, off1, off2, len);
+}
+
+static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
+				       size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->fill_align, off1, off2, len);
+}
+
 static inline void
 dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
 {
-- 
cgit v1.2.3


From b265b11fc1a0bd6ae5a7fde12e374583a52ab326 Mon Sep 17 00:00:00 2001
From: Tom Picard <tom.s.picard@intel.com>
Date: Tue, 8 Sep 2009 17:43:01 -0700
Subject: ioat3: ioat3.2 pci ids for Jasper Forest

Jasper Forest introduces raid offload support via ioat3.2 support.  When
raid offload is enabled two (out of 8 channels) will report raid5/raid6
offload capabilities.  The remaining channels will only report ioat3.0
capabilities (memcpy).

Signed-off-by: Tom Picard <tom.s.picard@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/ioat/pci.c  | 13 +++++++++++++
 include/linux/pci_ids.h | 10 ++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index 626a508a4f8b..6c1aac5b3598 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -58,6 +58,19 @@ static struct pci_device_id ioat_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+
+	/* I/OAT v3.2 platforms */
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+
 	{ 0, }
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0f71812d67d3..2b4b8ce53256 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2529,6 +2529,16 @@
 #define PCI_DEVICE_ID_INTEL_E7525_MCH	0x359e
 #define PCI_DEVICE_ID_INTEL_IOAT_CNB	0x360b
 #define PCI_DEVICE_ID_INTEL_FBD_CNB	0x360c
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF0	0x3710
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF1	0x3711
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF2	0x3712
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF3	0x3713
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF4	0x3714
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF5	0x3715
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF6	0x3716
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF7	0x3717
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF8	0x3718
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF9	0x3719
 #define PCI_DEVICE_ID_INTEL_ICH10_0	0x3a14
 #define PCI_DEVICE_ID_INTEL_ICH10_1	0x3a16
 #define PCI_DEVICE_ID_INTEL_ICH10_2	0x3a18
-- 
cgit v1.2.3


From 0803172778901e24a75ab074798d98c2b7411559 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:53:04 -0700
Subject: dmaengine: kill tx_list

The tx_list attribute of struct dma_async_tx_descriptor is common to
most, but not all dma driver implementations.  None of the upper level
code (dmaengine/async_tx) uses it, so allow drivers to implement it
locally if they need it.  This saves sizeof(struct list_head) bytes for
drivers that do not manage descriptors with a linked list (e.g.: ioatdma
v2,3).

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmaengine.c   | 1 -
 include/linux/dmaengine.h | 3 ---
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 5a87384ea4ff..562d182eae66 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -933,7 +933,6 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
 {
 	tx->chan = chan;
 	spin_lock_init(&tx->lock);
-	INIT_LIST_HEAD(&tx->tx_list);
 }
 EXPORT_SYMBOL(dma_async_tx_descriptor_init);
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ffefba81c818..f114bc7790bc 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -180,8 +180,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param);
  * @flags: flags to augment operation preparation, control completion, and
  * 	communicate status
  * @phys: physical address of the descriptor
- * @tx_list: driver common field for operations that require multiple
- *	descriptors
  * @chan: target channel for this operation
  * @tx_submit: set the prepared descriptor(s) to be executed by the engine
  * @callback: routine to call after this operation is complete
@@ -195,7 +193,6 @@ struct dma_async_tx_descriptor {
 	dma_cookie_t cookie;
 	enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */
 	dma_addr_t phys;
-	struct list_head tx_list;
 	struct dma_chan *chan;
 	dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
 	dma_async_tx_callback callback;
-- 
cgit v1.2.3


From c9766237afa92e8d7f27bbcd4964f1b43fa0bce8 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 4 Sep 2009 08:56:17 +0800
Subject: ACPICA: Update version to 20090903.

Version 20090903.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index f3b358b7432f..e723b0fd8e41 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20090730
+#define ACPI_CA_VERSION                 0x20090903
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3


From 7839c5d5519b6d9e2ccf3cdbf1c39e3817ad0835 Mon Sep 17 00:00:00 2001
From: Fabian Henze <hoacha@quantentunnel.de>
Date: Tue, 8 Sep 2009 00:59:59 +0800
Subject: drm/i915: add B43 chipset support

Signed-off-by: Fabian Henze <hoacha@quantentunnel.de>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_drv.h | 2 ++
 include/drm/drm_pciids.h        | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 77ed060b4292..e5f20e40ac59 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -863,6 +863,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
 		       (dev)->pci_device == 0x2E12 || \
 		       (dev)->pci_device == 0x2E22 || \
 		       (dev)->pci_device == 0x2E32 || \
+		       (dev)->pci_device == 0x2E42 || \
 		       (dev)->pci_device == 0x0042 || \
 		       (dev)->pci_device == 0x0046)
 
@@ -875,6 +876,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
 		     (dev)->pci_device == 0x2E12 || \
 		     (dev)->pci_device == 0x2E22 || \
 		     (dev)->pci_device == 0x2E32 || \
+		     (dev)->pci_device == 0x2E42 || \
 		     IS_GM45(dev))
 
 #define IS_IGDG(dev) ((dev)->pci_device == 0xa001)
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 853508499d20..3f6e545609be 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -552,6 +552,7 @@
 	{0x8086, 0x2e12, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0x2e32, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
+	{0x8086, 0x2e42, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
-- 
cgit v1.2.3


From a7db50405216610c8a0d62b8b400180b6f366733 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Mon, 22 Jun 2009 08:08:07 -0600
Subject: PCI: remove pcibios_scan_all_fns()

This was #define'd as 0 on all platforms, so let's get rid of it.

This change makes pci_scan_slot() slightly easier to read.

Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Tony Luck <tony.luck@intel.com>
Cc: David Howells <dhowells@redhat.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Acked-by: Russell King <linux@arm.linux.org.uk>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/alpha/include/asm/pci.h    |  1 -
 arch/arm/include/asm/pci.h      |  2 --
 arch/h8300/include/asm/pci.h    |  1 -
 arch/ia64/include/asm/pci.h     | 14 ++++++++++++--
 arch/mips/include/asm/pci.h     |  2 --
 arch/mn10300/include/asm/pci.h  | 13 ++++++++++++-
 arch/parisc/include/asm/pci.h   |  1 -
 arch/powerpc/include/asm/pci.h  |  1 -
 arch/sh/include/asm/pci.h       |  1 -
 arch/sparc/include/asm/pci_32.h |  1 -
 arch/sparc/include/asm/pci_64.h |  1 -
 arch/um/include/asm/pci.h       |  1 -
 arch/x86/include/asm/pci.h      |  1 -
 drivers/pci/probe.c             |  3 +--
 include/asm-generic/pci.h       | 13 ++++++++++++-
 15 files changed, 37 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
index d22ace99d13d..dd8dcabf160f 100644
--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -52,7 +52,6 @@ struct pci_controller {
    bus numbers.  */
 
 #define pcibios_assign_all_busses()	1
-#define pcibios_scan_all_fns(a, b)	0
 
 #define PCIBIOS_MIN_IO		alpha_mv.min_io_address
 #define PCIBIOS_MIN_MEM		alpha_mv.min_mem_address
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index 0abf386ba3d3..226cddd2fb65 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -6,8 +6,6 @@
 
 #include <mach/hardware.h> /* for PCIBIOS_MIN_* */
 
-#define pcibios_scan_all_fns(a, b)	0
-
 #ifdef CONFIG_PCI_HOST_ITE8152
 /* ITE bridge requires setting latency timer to avoid early bus access
    termination by PIC bus mater devices
diff --git a/arch/h8300/include/asm/pci.h b/arch/h8300/include/asm/pci.h
index 97389b35aa35..cc9762091c0a 100644
--- a/arch/h8300/include/asm/pci.h
+++ b/arch/h8300/include/asm/pci.h
@@ -8,7 +8,6 @@
  */
 
 #define pcibios_assign_all_busses()	0
-#define pcibios_scan_all_fns(a, b)	0
 
 static inline void pcibios_set_master(struct pci_dev *dev)
 {
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index fcfca56bb850..55281aabe5f2 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -17,7 +17,6 @@
  * loader.
  */
 #define pcibios_assign_all_busses()     0
-#define pcibios_scan_all_fns(a, b)	0
 
 #define PCIBIOS_MIN_IO		0x1000
 #define PCIBIOS_MIN_MEM		0x10000000
@@ -135,7 +134,18 @@ extern void pcibios_resource_to_bus(struct pci_dev *dev,
 extern void pcibios_bus_to_resource(struct pci_dev *dev,
 		struct resource *res, struct pci_bus_region *region);
 
-#define pcibios_scan_all_fns(a, b)	0
+static inline struct resource *
+pcibios_select_root(struct pci_dev *pdev, struct resource *res)
+{
+	struct resource *root = NULL;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &ioport_resource;
+	if (res->flags & IORESOURCE_MEM)
+		root = &iomem_resource;
+
+	return root;
+}
 
 #define HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index a68d111e55e9..5ebf82572ec0 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -65,8 +65,6 @@ extern int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin);
 
 extern unsigned int pcibios_assign_all_busses(void);
 
-#define pcibios_scan_all_fns(a, b)	0
-
 extern unsigned long PCIBIOS_MIN_IO;
 extern unsigned long PCIBIOS_MIN_MEM;
 
diff --git a/arch/mn10300/include/asm/pci.h b/arch/mn10300/include/asm/pci.h
index 19aecc90f7a4..6095a28561dd 100644
--- a/arch/mn10300/include/asm/pci.h
+++ b/arch/mn10300/include/asm/pci.h
@@ -101,7 +101,18 @@ extern void pcibios_bus_to_resource(struct pci_dev *dev,
 				    struct resource *res,
 				    struct pci_bus_region *region);
 
-#define pcibios_scan_all_fns(a, b)	0
+static inline struct resource *
+pcibios_select_root(struct pci_dev *pdev, struct resource *res)
+{
+	struct resource *root = NULL;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &ioport_resource;
+	if (res->flags & IORESOURCE_MEM)
+		root = &iomem_resource;
+
+	return root;
+}
 
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h
index 7d842d699df2..64c7aa590ae5 100644
--- a/arch/parisc/include/asm/pci.h
+++ b/arch/parisc/include/asm/pci.h
@@ -233,7 +233,6 @@ static inline void pcibios_register_hba(struct pci_hba_data *x)
  *   rp7420/8420 boxes and then revisit this issue.
  */
 #define pcibios_assign_all_busses()     (1)
-#define pcibios_scan_all_fns(a, b)	(0)
 
 #define PCIBIOS_MIN_IO          0x10
 #define PCIBIOS_MIN_MEM         0x1000 /* NBPG - but pci/setup-res.c dies */
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index d9483c504d2d..36057c821ff4 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -40,7 +40,6 @@ struct pci_dev;
  */
 #define pcibios_assign_all_busses() \
 	(ppc_pci_has_flag(PPC_PCI_REASSIGN_ALL_BUS))
-#define pcibios_scan_all_fns(a, b)	0
 
 static inline void pcibios_set_master(struct pci_dev *dev)
 {
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index d3633f513ebc..4163950cd1c6 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -10,7 +10,6 @@
    or architectures with incomplete PCI setup by the loader */
 
 #define pcibios_assign_all_busses()	1
-#define pcibios_scan_all_fns(a, b)	0
 
 /*
  * A board can define one or more PCI channels that represent built-in (or
diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h
index b41c4c198159..810d9248e23f 100644
--- a/arch/sparc/include/asm/pci_32.h
+++ b/arch/sparc/include/asm/pci_32.h
@@ -10,7 +10,6 @@
  * or architectures with incomplete PCI setup by the loader.
  */
 #define pcibios_assign_all_busses()	0
-#define pcibios_scan_all_fns(a, b)	0
 
 #define PCIBIOS_MIN_IO		0UL
 #define PCIBIOS_MIN_MEM		0UL
diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h
index 7a1e3566e59c..a32970888287 100644
--- a/arch/sparc/include/asm/pci_64.h
+++ b/arch/sparc/include/asm/pci_64.h
@@ -10,7 +10,6 @@
  * or architectures with incomplete PCI setup by the loader.
  */
 #define pcibios_assign_all_busses()	0
-#define pcibios_scan_all_fns(a, b)	0
 
 #define PCIBIOS_MIN_IO		0UL
 #define PCIBIOS_MIN_MEM		0UL
diff --git a/arch/um/include/asm/pci.h b/arch/um/include/asm/pci.h
index 59923199cdc3..b44cf59ede1e 100644
--- a/arch/um/include/asm/pci.h
+++ b/arch/um/include/asm/pci.h
@@ -2,6 +2,5 @@
 #define __UM_PCI_H
 
 #define PCI_DMA_BUS_IS_PHYS     (1)
-#define pcibios_scan_all_fns(a, b)	0
 
 #endif
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 1ff685ca221c..f76a162c082c 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -48,7 +48,6 @@ extern unsigned int pcibios_assign_all_busses(void);
 #else
 #define pcibios_assign_all_busses()	0
 #endif
-#define pcibios_scan_all_fns(a, b)	0
 
 extern unsigned long pci_mem_start;
 #define PCIBIOS_MIN_IO		0x1000
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 40e75f6a5056..b9d4e95aafba 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1061,8 +1061,7 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
 	if (dev && !dev->is_added)	/* new device? */
 		nr++;
 
-	if ((dev && dev->multifunction) ||
-	    (!dev && pcibios_scan_all_fns(bus, devfn))) {
+	if (dev && dev->multifunction) {
 		for (fn = 1; fn < 8; fn++) {
 			dev = pci_scan_single_device(bus, devfn + fn);
 			if (dev) {
diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h
index b4326b5466eb..26373cff4546 100644
--- a/include/asm-generic/pci.h
+++ b/include/asm-generic/pci.h
@@ -30,7 +30,18 @@ pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 	res->end = region->end;
 }
 
-#define pcibios_scan_all_fns(a, b)	0
+static inline struct resource *
+pcibios_select_root(struct pci_dev *pdev, struct resource *res)
+{
+	struct resource *root = NULL;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &ioport_resource;
+	if (res->flags & IORESOURCE_MEM)
+		root = &iomem_resource;
+
+	return root;
+}
 
 #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-- 
cgit v1.2.3


From 76d56de57ae60c6be383e48e7068fd973d5fb08a Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Thu, 23 Jul 2009 17:03:00 -0600
Subject: ACPI: export acpi_pci_root and friends

We can simplify ACPI drivers if we can tell whether a handle is an
ACPI PCI root or not.

Reviewed-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_root.c | 17 ++---------------
 include/acpi/acpi_bus.h | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 55b5b90c2a44..31b961c2f22f 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -61,20 +61,6 @@ static struct acpi_driver acpi_pci_root_driver = {
 		},
 };
 
-struct acpi_pci_root {
-	struct list_head node;
-	struct acpi_device *device;
-	struct pci_bus *bus;
-	u16 segment;
-	u8 bus_nr;
-
-	u32 osc_support_set;	/* _OSC state of support bits */
-	u32 osc_control_set;	/* _OSC state of control bits */
-	u32 osc_control_qry;	/* the latest _OSC query result */
-
-	u32 osc_queried:1;	/* has _OSC control been queried? */
-};
-
 static LIST_HEAD(acpi_pci_roots);
 
 static struct acpi_pci_driver *sub_driver;
@@ -317,7 +303,7 @@ static acpi_status acpi_pci_osc_support(struct acpi_pci_root *root, u32 flags)
 	return status;
 }
 
-static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
+struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
 {
 	struct acpi_pci_root *root;
 
@@ -327,6 +313,7 @@ static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
 	}
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(acpi_pci_find_root);
 
 struct acpi_handle_node {
 	struct list_head node;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c65e4ce6c3af..c3ace75d57b4 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -369,10 +369,26 @@ int register_acpi_bus_type(struct acpi_bus_type *);
 int unregister_acpi_bus_type(struct acpi_bus_type *);
 struct device *acpi_get_physical_device(acpi_handle);
 
+struct acpi_pci_root {
+	struct list_head node;
+	struct acpi_device * device;
+	struct acpi_pci_id id;
+	struct pci_bus *bus;
+	u16 segment;
+	u8 bus_nr;
+
+	u32 osc_support_set;	/* _OSC state of support bits */
+	u32 osc_control_set;	/* _OSC state of control bits */
+	u32 osc_control_qry;	/* the latest _OSC query result */
+
+	u32 osc_queried:1;	/* has _OSC control been queried? */
+};
+
 /* helper */
 acpi_handle acpi_get_child(acpi_handle, acpi_integer);
 int acpi_is_root_bridge(acpi_handle);
 acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int);
+struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
 #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle))
 
 #ifdef CONFIG_PM_SLEEP
-- 
cgit v1.2.3


From 711d57796f5ce2d02d6e62c9034afbb16aedda31 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 27 Jul 2009 23:37:48 +0300
Subject: PCI: expose function reset capability in sysfs

Some devices allow an individual function to be reset without affecting
other functions in the same device: that's what pci_reset_function does.
For devices that have this support, expose reset attribite in sysfs.

This is useful e.g. for virtualization, where a qemu userspace
process wants to reset the device when the guest is reset,
to emulate machine reboot as closely as possible.

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/ABI/testing/sysfs-bus-pci | 10 +++++++++
 drivers/pci/pci-sysfs.c                 | 37 +++++++++++++++++++++++++++++++++
 drivers/pci/pci.c                       | 16 ++++++++++++++
 drivers/pci/pci.h                       |  1 +
 include/linux/pci.h                     |  1 +
 5 files changed, 65 insertions(+)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 6bf68053e4b8..25be3250f7d6 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -84,6 +84,16 @@ Description:
 		from this part of the device tree.
 		Depends on CONFIG_HOTPLUG.
 
+What:		/sys/bus/pci/devices/.../reset
+Date:		July 2009
+Contact:	Michael S. Tsirkin <mst@redhat.com>
+Description:
+		Some devices allow an individual function to be reset
+		without affecting other functions in the same device.
+		For devices that have this support, a file named reset
+		will be present in sysfs.  Writing 1 to this file
+		will perform reset.
+
 What:		/sys/bus/pci/devices/.../vpd
 Date:		February 2008
 Contact:	Ben Hutchings <bhutchings@solarflare.com>
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 85ebd02a64a7..0f6382f090ee 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -916,6 +916,24 @@ int __attribute__ ((weak)) pcibios_add_platform_entries(struct pci_dev *dev)
 	return 0;
 }
 
+static ssize_t reset_store(struct device *dev,
+			   struct device_attribute *attr, const char *buf,
+			   size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	unsigned long val;
+	ssize_t result = strict_strtoul(buf, 0, &val);
+
+	if (result < 0)
+		return result;
+
+	if (val != 1)
+		return -EINVAL;
+	return pci_reset_function(pdev);
+}
+
+static struct device_attribute reset_attr = __ATTR(reset, 0200, NULL, reset_store);
+
 static int pci_create_capabilities_sysfs(struct pci_dev *dev)
 {
 	int retval;
@@ -943,7 +961,22 @@ static int pci_create_capabilities_sysfs(struct pci_dev *dev)
 	/* Active State Power Management */
 	pcie_aspm_create_sysfs_dev_files(dev);
 
+	if (!pci_probe_reset_function(dev)) {
+		retval = device_create_file(&dev->dev, &reset_attr);
+		if (retval)
+			goto error;
+		dev->reset_fn = 1;
+	}
 	return 0;
+
+error:
+	pcie_aspm_remove_sysfs_dev_files(dev);
+	if (dev->vpd && dev->vpd->attr) {
+		sysfs_remove_bin_file(&dev->dev.kobj, dev->vpd->attr);
+		kfree(dev->vpd->attr);
+	}
+
+	return retval;
 }
 
 int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
@@ -1037,6 +1070,10 @@ static void pci_remove_capabilities_sysfs(struct pci_dev *dev)
 	}
 
 	pcie_aspm_remove_sysfs_dev_files(dev);
+	if (dev->reset_fn) {
+		device_remove_file(&dev->dev, &reset_attr);
+		dev->reset_fn = 0;
+	}
 }
 
 /**
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7b70312181d7..7d55039ffa05 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2261,6 +2261,22 @@ int __pci_reset_function(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(__pci_reset_function);
 
+/**
+ * pci_probe_reset_function - check whether the device can be safely reset
+ * @dev: PCI device to reset
+ *
+ * Some devices allow an individual function to be reset without affecting
+ * other functions in the same device.  The PCI device must be responsive
+ * to PCI config space in order to use this function.
+ *
+ * Returns 0 if the device function can be reset or negative if the
+ * device doesn't support resetting a single function.
+ */
+int pci_probe_reset_function(struct pci_dev *dev)
+{
+	return pci_dev_reset(dev, 1);
+}
+
 /**
  * pci_reset_function - quiesce and reset a PCI device function
  * @dev: PCI device to reset
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 5ff4d25bf0e9..73d9d92715a0 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -16,6 +16,7 @@ extern void pci_cleanup_rom(struct pci_dev *dev);
 extern int pci_mmap_fits(struct pci_dev *pdev, int resno,
 			 struct vm_area_struct *vma);
 #endif
+int pci_probe_reset_function(struct pci_dev *dev);
 
 /**
  * struct pci_platform_pm_ops - Firmware PM callbacks
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 115fb7ba5089..a90f94020798 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -276,6 +276,7 @@ struct pci_dev {
 	unsigned int	state_saved:1;
 	unsigned int	is_physfn:1;
 	unsigned int	is_virtfn:1;
+	unsigned int	reset_fn:1;
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
-- 
cgit v1.2.3


From 1d4a433fc4e9c7dbfc2069a8f1a1f4338b982427 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 6 Aug 2009 15:13:59 -0400
Subject: PCI: Document pci_ids.h addition policy.

IDs should generally only be added to pci_ids.h when they're shared
across several files in the tree.  IDs that are just used by a single
driver should be defined in the driver instead.

Perhaps documenting this is a good idea to prevent things being moved there,
as it still seems to be happening judging from the git log.

(based on discussion w/gregkh and others).

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_ids.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 73b46b6b904f..6235ce2bea04 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2,6 +2,9 @@
  *	PCI Class, Vendor and Device IDs
  *
  *	Please keep sorted.
+ *
+ *	Do not add new entries to this file unless the definitions
+ *	are shared between multiple drivers.
  */
 
 /* Device classes and subclasses */
-- 
cgit v1.2.3


From deb2d2ecd43dfc51efe71eed7128fda514da96c6 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 11 Aug 2009 15:52:06 +1000
Subject: PCI/GPU: implement VGA arbitration on Linux

Background:
Graphic devices are accessed through ranges in I/O or memory space. While most
modern devices allow relocation of such ranges, some "Legacy" VGA devices
implemented on PCI will typically have the same "hard-decoded" addresses as
they did on ISA. For more details see "PCI Bus Binding to IEEE Std 1275-1994
Standard for Boot (Initialization Configuration) Firmware Revision 2.1"
Section 7, Legacy Devices.

The Resource Access Control (RAC) module inside the X server currently does
the task of arbitration when more than one legacy device co-exists on the same
machine. But the problem happens when these devices are trying to be accessed
by different userspace clients (e.g. two server in parallel). Their address
assignments conflict. Therefore an arbitration scheme _outside_ of the X
server is needed to control the sharing of these resources. This document
introduces the operation of the VGA arbiter implemented for Linux kernel.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Tiago Vignatti <tiago.vignatti@nokia.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/Makefile     |    2 +-
 drivers/gpu/vga/Kconfig  |   10 +
 drivers/gpu/vga/Makefile |    1 +
 drivers/gpu/vga/vgaarb.c | 1206 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/pci.c        |   44 ++
 drivers/video/Kconfig    |    2 +
 include/linux/pci.h      |    2 +
 include/linux/vgaarb.h   |  195 ++++++++
 8 files changed, 1461 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/vga/Kconfig
 create mode 100644 drivers/gpu/vga/Makefile
 create mode 100644 drivers/gpu/vga/vgaarb.c
 create mode 100644 include/linux/vgaarb.h

(limited to 'include')

diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index de566cf0414c..30879df3daea 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -1 +1 @@
-obj-y			+= drm/
+obj-y			+= drm/ vga/
diff --git a/drivers/gpu/vga/Kconfig b/drivers/gpu/vga/Kconfig
new file mode 100644
index 000000000000..790e675b13eb
--- /dev/null
+++ b/drivers/gpu/vga/Kconfig
@@ -0,0 +1,10 @@
+config VGA_ARB
+	bool "VGA Arbitration" if EMBEDDED
+	default y
+	depends on PCI
+	help
+	  Some "legacy" VGA devices implemented on PCI typically have the same
+	  hard-decoded addresses as they did on ISA. When multiple PCI devices
+	  are accessed at same time they need some kind of coordination. Please
+	  see Documentation/vgaarbiter.txt for more details. Select this to
+	  enable VGA arbiter.
diff --git a/drivers/gpu/vga/Makefile b/drivers/gpu/vga/Makefile
new file mode 100644
index 000000000000..7cc8c1ed645b
--- /dev/null
+++ b/drivers/gpu/vga/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_VGA_ARB)  += vgaarb.o
diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
new file mode 100644
index 000000000000..199138f241e0
--- /dev/null
+++ b/drivers/gpu/vga/vgaarb.c
@@ -0,0 +1,1206 @@
+/*
+ * vgaarb.c
+ *
+ * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com>
+ * (C) Copyright 2007, 2009 Tiago Vignatti <vignatti@freedesktop.org>
+ *
+ * Implements the VGA arbitration. For details refer to
+ * Documentation/vgaarbiter.txt
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/poll.h>
+#include <linux/miscdevice.h>
+
+#include <linux/uaccess.h>
+
+#include <linux/vgaarb.h>
+
+static void vga_arbiter_notify_clients(void);
+/*
+ * We keep a list of all vga devices in the system to speed
+ * up the various operations of the arbiter
+ */
+struct vga_device {
+	struct list_head list;
+	struct pci_dev *pdev;
+	unsigned int decodes;	/* what does it decodes */
+	unsigned int owns;	/* what does it owns */
+	unsigned int locks;	/* what does it locks */
+	unsigned int io_lock_cnt;	/* legacy IO lock count */
+	unsigned int mem_lock_cnt;	/* legacy MEM lock count */
+	unsigned int io_norm_cnt;	/* normal IO count */
+	unsigned int mem_norm_cnt;	/* normal MEM count */
+
+	/* allow IRQ enable/disable hook */
+	void *cookie;
+	void (*irq_set_state)(void *cookie, bool enable);
+	unsigned int (*set_vga_decode)(void *cookie, bool decode);
+};
+
+static LIST_HEAD(vga_list);
+static int vga_count, vga_decode_count;
+static bool vga_arbiter_used;
+static DEFINE_SPINLOCK(vga_lock);
+static DECLARE_WAIT_QUEUE_HEAD(vga_wait_queue);
+
+
+static const char *vga_iostate_to_str(unsigned int iostate)
+{
+	/* Ignore VGA_RSRC_IO and VGA_RSRC_MEM */
+	iostate &= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+	switch (iostate) {
+	case VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM:
+		return "io+mem";
+	case VGA_RSRC_LEGACY_IO:
+		return "io";
+	case VGA_RSRC_LEGACY_MEM:
+		return "mem";
+	}
+	return "none";
+}
+
+static int vga_str_to_iostate(char *buf, int str_size, int *io_state)
+{
+	/* we could in theory hand out locks on IO and mem
+	 * separately to userspace but it can cause deadlocks */
+	if (strncmp(buf, "none", 4) == 0) {
+		*io_state = VGA_RSRC_NONE;
+		return 1;
+	}
+
+	/* XXX We're not chekcing the str_size! */
+	if (strncmp(buf, "io+mem", 6) == 0)
+		goto both;
+	else if (strncmp(buf, "io", 2) == 0)
+		goto both;
+	else if (strncmp(buf, "mem", 3) == 0)
+		goto both;
+	return 0;
+both:
+	*io_state = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+	return 1;
+}
+
+#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
+/* this is only used a cookie - it should not be dereferenced */
+static struct pci_dev *vga_default;
+#endif
+
+static void vga_arb_device_card_gone(struct pci_dev *pdev);
+
+/* Find somebody in our list */
+static struct vga_device *vgadev_find(struct pci_dev *pdev)
+{
+	struct vga_device *vgadev;
+
+	list_for_each_entry(vgadev, &vga_list, list)
+		if (pdev == vgadev->pdev)
+			return vgadev;
+	return NULL;
+}
+
+/* Returns the default VGA device (vgacon's babe) */
+#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
+struct pci_dev *vga_default_device(void)
+{
+	return vga_default;
+}
+#endif
+
+static inline void vga_irq_set_state(struct vga_device *vgadev, bool state)
+{
+	if (vgadev->irq_set_state)
+		vgadev->irq_set_state(vgadev->cookie, state);
+}
+
+
+/* If we don't ever use VGA arb we should avoid
+   turning off anything anywhere due to old X servers getting
+   confused about the boot device not being VGA */
+static void vga_check_first_use(void)
+{
+	/* we should inform all GPUs in the system that
+	 * VGA arb has occured and to try and disable resources
+	 * if they can */
+	if (!vga_arbiter_used) {
+		vga_arbiter_used = true;
+		vga_arbiter_notify_clients();
+	}
+}
+
+static struct vga_device *__vga_tryget(struct vga_device *vgadev,
+				       unsigned int rsrc)
+{
+	unsigned int wants, legacy_wants, match;
+	struct vga_device *conflict;
+	unsigned int pci_bits;
+	/* Account for "normal" resources to lock. If we decode the legacy,
+	 * counterpart, we need to request it as well
+	 */
+	if ((rsrc & VGA_RSRC_NORMAL_IO) &&
+	    (vgadev->decodes & VGA_RSRC_LEGACY_IO))
+		rsrc |= VGA_RSRC_LEGACY_IO;
+	if ((rsrc & VGA_RSRC_NORMAL_MEM) &&
+	    (vgadev->decodes & VGA_RSRC_LEGACY_MEM))
+		rsrc |= VGA_RSRC_LEGACY_MEM;
+
+	pr_devel("%s: %d\n", __func__, rsrc);
+	pr_devel("%s: owns: %d\n", __func__, vgadev->owns);
+
+	/* Check what resources we need to acquire */
+	wants = rsrc & ~vgadev->owns;
+
+	/* We already own everything, just mark locked & bye bye */
+	if (wants == 0)
+		goto lock_them;
+
+	/* We don't need to request a legacy resource, we just enable
+	 * appropriate decoding and go
+	 */
+	legacy_wants = wants & VGA_RSRC_LEGACY_MASK;
+	if (legacy_wants == 0)
+		goto enable_them;
+
+	/* Ok, we don't, let's find out how we need to kick off */
+	list_for_each_entry(conflict, &vga_list, list) {
+		unsigned int lwants = legacy_wants;
+		unsigned int change_bridge = 0;
+
+		/* Don't conflict with myself */
+		if (vgadev == conflict)
+			continue;
+
+		/* Check if the architecture allows a conflict between those
+		 * 2 devices or if they are on separate domains
+		 */
+		if (!vga_conflicts(vgadev->pdev, conflict->pdev))
+			continue;
+
+		/* We have a possible conflict. before we go further, we must
+		 * check if we sit on the same bus as the conflicting device.
+		 * if we don't, then we must tie both IO and MEM resources
+		 * together since there is only a single bit controlling
+		 * VGA forwarding on P2P bridges
+		 */
+		if (vgadev->pdev->bus != conflict->pdev->bus) {
+			change_bridge = 1;
+			lwants = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+		}
+
+		/* Check if the guy has a lock on the resource. If he does,
+		 * return the conflicting entry
+		 */
+		if (conflict->locks & lwants)
+			return conflict;
+
+		/* Ok, now check if he owns the resource we want. We don't need
+		 * to check "decodes" since it should be impossible to own
+		 * own legacy resources you don't decode unless I have a bug
+		 * in this code...
+		 */
+		WARN_ON(conflict->owns & ~conflict->decodes);
+		match = lwants & conflict->owns;
+		if (!match)
+			continue;
+
+		/* looks like he doesn't have a lock, we can steal
+		 * them from him
+		 */
+		vga_irq_set_state(conflict, false);
+
+		pci_bits = 0;
+		if (lwants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM))
+			pci_bits |= PCI_COMMAND_MEMORY;
+		if (lwants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO))
+			pci_bits |= PCI_COMMAND_IO;
+
+		pci_set_vga_state(conflict->pdev, false, pci_bits,
+				  change_bridge);
+		conflict->owns &= ~lwants;
+		/* If he also owned non-legacy, that is no longer the case */
+		if (lwants & VGA_RSRC_LEGACY_MEM)
+			conflict->owns &= ~VGA_RSRC_NORMAL_MEM;
+		if (lwants & VGA_RSRC_LEGACY_IO)
+			conflict->owns &= ~VGA_RSRC_NORMAL_IO;
+	}
+
+enable_them:
+	/* ok dude, we got it, everybody conflicting has been disabled, let's
+	 * enable us. Make sure we don't mark a bit in "owns" that we don't
+	 * also have in "decodes". We can lock resources we don't decode but
+	 * not own them.
+	 */
+	pci_bits = 0;
+	if (wants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM))
+		pci_bits |= PCI_COMMAND_MEMORY;
+	if (wants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO))
+		pci_bits |= PCI_COMMAND_IO;
+	pci_set_vga_state(vgadev->pdev, true, pci_bits, !!(wants & VGA_RSRC_LEGACY_MASK));
+
+	vga_irq_set_state(vgadev, true);
+	vgadev->owns |= (wants & vgadev->decodes);
+lock_them:
+	vgadev->locks |= (rsrc & VGA_RSRC_LEGACY_MASK);
+	if (rsrc & VGA_RSRC_LEGACY_IO)
+		vgadev->io_lock_cnt++;
+	if (rsrc & VGA_RSRC_LEGACY_MEM)
+		vgadev->mem_lock_cnt++;
+	if (rsrc & VGA_RSRC_NORMAL_IO)
+		vgadev->io_norm_cnt++;
+	if (rsrc & VGA_RSRC_NORMAL_MEM)
+		vgadev->mem_norm_cnt++;
+
+	return NULL;
+}
+
+static void __vga_put(struct vga_device *vgadev, unsigned int rsrc)
+{
+	unsigned int old_locks = vgadev->locks;
+
+	pr_devel("%s\n", __func__);
+
+	/* Update our counters, and account for equivalent legacy resources
+	 * if we decode them
+	 */
+	if ((rsrc & VGA_RSRC_NORMAL_IO) && vgadev->io_norm_cnt > 0) {
+		vgadev->io_norm_cnt--;
+		if (vgadev->decodes & VGA_RSRC_LEGACY_IO)
+			rsrc |= VGA_RSRC_LEGACY_IO;
+	}
+	if ((rsrc & VGA_RSRC_NORMAL_MEM) && vgadev->mem_norm_cnt > 0) {
+		vgadev->mem_norm_cnt--;
+		if (vgadev->decodes & VGA_RSRC_LEGACY_MEM)
+			rsrc |= VGA_RSRC_LEGACY_MEM;
+	}
+	if ((rsrc & VGA_RSRC_LEGACY_IO) && vgadev->io_lock_cnt > 0)
+		vgadev->io_lock_cnt--;
+	if ((rsrc & VGA_RSRC_LEGACY_MEM) && vgadev->mem_lock_cnt > 0)
+		vgadev->mem_lock_cnt--;
+
+	/* Just clear lock bits, we do lazy operations so we don't really
+	 * have to bother about anything else at this point
+	 */
+	if (vgadev->io_lock_cnt == 0)
+		vgadev->locks &= ~VGA_RSRC_LEGACY_IO;
+	if (vgadev->mem_lock_cnt == 0)
+		vgadev->locks &= ~VGA_RSRC_LEGACY_MEM;
+
+	/* Kick the wait queue in case somebody was waiting if we actually
+	 * released something
+	 */
+	if (old_locks != vgadev->locks)
+		wake_up_all(&vga_wait_queue);
+}
+
+int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible)
+{
+	struct vga_device *vgadev, *conflict;
+	unsigned long flags;
+	wait_queue_t wait;
+	int rc = 0;
+
+	vga_check_first_use();
+	/* The one who calls us should check for this, but lets be sure... */
+	if (pdev == NULL)
+		pdev = vga_default_device();
+	if (pdev == NULL)
+		return 0;
+
+	for (;;) {
+		spin_lock_irqsave(&vga_lock, flags);
+		vgadev = vgadev_find(pdev);
+		if (vgadev == NULL) {
+			spin_unlock_irqrestore(&vga_lock, flags);
+			rc = -ENODEV;
+			break;
+		}
+		conflict = __vga_tryget(vgadev, rsrc);
+		spin_unlock_irqrestore(&vga_lock, flags);
+		if (conflict == NULL)
+			break;
+
+
+		/* We have a conflict, we wait until somebody kicks the
+		 * work queue. Currently we have one work queue that we
+		 * kick each time some resources are released, but it would
+		 * be fairly easy to have a per device one so that we only
+		 * need to attach to the conflicting device
+		 */
+		init_waitqueue_entry(&wait, current);
+		add_wait_queue(&vga_wait_queue, &wait);
+		set_current_state(interruptible ?
+				  TASK_INTERRUPTIBLE :
+				  TASK_UNINTERRUPTIBLE);
+		if (signal_pending(current)) {
+			rc = -EINTR;
+			break;
+		}
+		schedule();
+		remove_wait_queue(&vga_wait_queue, &wait);
+		set_current_state(TASK_RUNNING);
+	}
+	return rc;
+}
+EXPORT_SYMBOL(vga_get);
+
+int vga_tryget(struct pci_dev *pdev, unsigned int rsrc)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+	int rc = 0;
+
+	vga_check_first_use();
+
+	/* The one who calls us should check for this, but lets be sure... */
+	if (pdev == NULL)
+		pdev = vga_default_device();
+	if (pdev == NULL)
+		return 0;
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL) {
+		rc = -ENODEV;
+		goto bail;
+	}
+	if (__vga_tryget(vgadev, rsrc))
+		rc = -EBUSY;
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+	return rc;
+}
+EXPORT_SYMBOL(vga_tryget);
+
+void vga_put(struct pci_dev *pdev, unsigned int rsrc)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+
+	/* The one who calls us should check for this, but lets be sure... */
+	if (pdev == NULL)
+		pdev = vga_default_device();
+	if (pdev == NULL)
+		return;
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL)
+		goto bail;
+	__vga_put(vgadev, rsrc);
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+}
+EXPORT_SYMBOL(vga_put);
+
+/*
+ * Currently, we assume that the "initial" setup of the system is
+ * not sane, that is we come up with conflicting devices and let
+ * the arbiter's client decides if devices decodes or not legacy
+ * things.
+ */
+static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+	struct pci_bus *bus;
+	struct pci_dev *bridge;
+	u16 cmd;
+
+	/* Only deal with VGA class devices */
+	if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
+		return false;
+
+	/* Allocate structure */
+	vgadev = kmalloc(sizeof(struct vga_device), GFP_KERNEL);
+	if (vgadev == NULL) {
+		pr_err("vgaarb: failed to allocate pci device\n");
+		/* What to do on allocation failure ? For now, let's
+		 * just do nothing, I'm not sure there is anything saner
+		 * to be done
+		 */
+		return false;
+	}
+
+	memset(vgadev, 0, sizeof(*vgadev));
+
+	/* Take lock & check for duplicates */
+	spin_lock_irqsave(&vga_lock, flags);
+	if (vgadev_find(pdev) != NULL) {
+		BUG_ON(1);
+		goto fail;
+	}
+	vgadev->pdev = pdev;
+
+	/* By default, assume we decode everything */
+	vgadev->decodes = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
+			  VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+
+	/* by default mark it as decoding */
+	vga_decode_count++;
+	/* Mark that we "own" resources based on our enables, we will
+	 * clear that below if the bridge isn't forwarding
+	 */
+	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+	if (cmd & PCI_COMMAND_IO)
+		vgadev->owns |= VGA_RSRC_LEGACY_IO;
+	if (cmd & PCI_COMMAND_MEMORY)
+		vgadev->owns |= VGA_RSRC_LEGACY_MEM;
+
+	/* Check if VGA cycles can get down to us */
+	bus = pdev->bus;
+	while (bus) {
+		bridge = bus->self;
+		if (bridge) {
+			u16 l;
+			pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
+					     &l);
+			if (!(l & PCI_BRIDGE_CTL_VGA)) {
+				vgadev->owns = 0;
+				break;
+			}
+		}
+		bus = bus->parent;
+	}
+
+	/* Deal with VGA default device. Use first enabled one
+	 * by default if arch doesn't have it's own hook
+	 */
+#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
+	if (vga_default == NULL &&
+	    ((vgadev->owns & VGA_RSRC_LEGACY_MASK) == VGA_RSRC_LEGACY_MASK))
+		vga_default = pci_dev_get(pdev);
+#endif
+
+	/* Add to the list */
+	list_add(&vgadev->list, &vga_list);
+	vga_count++;
+	pr_info("vgaarb: device added: PCI:%s,decodes=%s,owns=%s,locks=%s\n",
+		pci_name(pdev),
+		vga_iostate_to_str(vgadev->decodes),
+		vga_iostate_to_str(vgadev->owns),
+		vga_iostate_to_str(vgadev->locks));
+
+	spin_unlock_irqrestore(&vga_lock, flags);
+	return true;
+fail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+	kfree(vgadev);
+	return false;
+}
+
+static bool vga_arbiter_del_pci_device(struct pci_dev *pdev)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+	bool ret = true;
+
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL) {
+		ret = false;
+		goto bail;
+	}
+
+	if (vga_default == pdev) {
+		pci_dev_put(vga_default);
+		vga_default = NULL;
+	}
+
+	if (vgadev->decodes & (VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM))
+		vga_decode_count--;
+
+	/* Remove entry from list */
+	list_del(&vgadev->list);
+	vga_count--;
+	/* Notify userland driver that the device is gone so it discards
+	 * it's copies of the pci_dev pointer
+	 */
+	vga_arb_device_card_gone(pdev);
+
+	/* Wake up all possible waiters */
+	wake_up_all(&vga_wait_queue);
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+	kfree(vgadev);
+	return ret;
+}
+
+/* this is called with the lock */
+static inline void vga_update_device_decodes(struct vga_device *vgadev,
+					     int new_decodes)
+{
+	int old_decodes;
+	struct vga_device *new_vgadev, *conflict;
+
+	old_decodes = vgadev->decodes;
+	vgadev->decodes = new_decodes;
+
+	pr_info("vgaarb: device changed decodes: PCI:%s,olddecodes=%s,decodes=%s:owns=%s\n",
+		pci_name(vgadev->pdev),
+		vga_iostate_to_str(old_decodes),
+		vga_iostate_to_str(vgadev->decodes),
+		vga_iostate_to_str(vgadev->owns));
+
+
+	/* if we own the decodes we should move them along to
+	   another card */
+	if ((vgadev->owns & old_decodes) && (vga_count > 1)) {
+		/* set us to own nothing */
+		vgadev->owns &= ~old_decodes;
+		list_for_each_entry(new_vgadev, &vga_list, list) {
+			if ((new_vgadev != vgadev) &&
+			    (new_vgadev->decodes & VGA_RSRC_LEGACY_MASK)) {
+				pr_info("vgaarb: transferring owner from PCI:%s to PCI:%s\n", pci_name(vgadev->pdev), pci_name(new_vgadev->pdev));
+				conflict = __vga_tryget(new_vgadev, VGA_RSRC_LEGACY_MASK);
+				if (!conflict)
+					__vga_put(new_vgadev, VGA_RSRC_LEGACY_MASK);
+				break;
+			}
+		}
+	}
+
+	/* change decodes counter */
+	if (old_decodes != new_decodes) {
+		if (new_decodes & (VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM))
+			vga_decode_count++;
+		else
+			vga_decode_count--;
+	}
+}
+
+void __vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes, bool userspace)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+
+	decodes &= VGA_RSRC_LEGACY_MASK;
+
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL)
+		goto bail;
+
+	/* don't let userspace futz with kernel driver decodes */
+	if (userspace && vgadev->set_vga_decode)
+		goto bail;
+
+	/* update the device decodes + counter */
+	vga_update_device_decodes(vgadev, decodes);
+
+	/* XXX if somebody is going from "doesn't decode" to "decodes" state
+	 * here, additional care must be taken as we may have pending owner
+	 * ship of non-legacy region ...
+	 */
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+}
+
+void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes)
+{
+	__vga_set_legacy_decoding(pdev, decodes, false);
+}
+EXPORT_SYMBOL(vga_set_legacy_decoding);
+
+/* return number of active VGA devices */
+/* call with NULL to unregister */
+int vga_client_register(struct pci_dev *pdev, void *cookie,
+			void (*irq_set_state)(void *cookie, bool state),
+			unsigned int (*set_vga_decode)(void *cookie, bool decode))
+{
+	int ret = -1;
+	struct vga_device *vgadev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (!vgadev)
+		goto bail;
+
+	vgadev->irq_set_state = irq_set_state;
+	vgadev->set_vga_decode = set_vga_decode;
+	vgadev->cookie = cookie;
+	ret = 0;
+
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+	return ret;
+
+}
+EXPORT_SYMBOL(vga_client_register);
+
+/*
+ * Char driver implementation
+ *
+ * Semantics is:
+ *
+ *  open       : open user instance of the arbitrer. by default, it's
+ *                attached to the default VGA device of the system.
+ *
+ *  close      : close user instance, release locks
+ *
+ *  read       : return a string indicating the status of the target.
+ *                an IO state string is of the form {io,mem,io+mem,none},
+ *                mc and ic are respectively mem and io lock counts (for
+ *                debugging/diagnostic only). "decodes" indicate what the
+ *                card currently decodes, "owns" indicates what is currently
+ *                enabled on it, and "locks" indicates what is locked by this
+ *                card. If the card is unplugged, we get "invalid" then for
+ *                card_ID and an -ENODEV error is returned for any command
+ *                until a new card is targeted
+ *
+ *   "<card_ID>,decodes=<io_state>,owns=<io_state>,locks=<io_state> (ic,mc)"
+ *
+ * write       : write a command to the arbiter. List of commands is:
+ *
+ *   target <card_ID>   : switch target to card <card_ID> (see below)
+ *   lock <io_state>    : acquires locks on target ("none" is invalid io_state)
+ *   trylock <io_state> : non-blocking acquire locks on target
+ *   unlock <io_state>  : release locks on target
+ *   unlock all         : release all locks on target held by this user
+ *   decodes <io_state> : set the legacy decoding attributes for the card
+ *
+ * poll         : event if something change on any card (not just the target)
+ *
+ * card_ID is of the form "PCI:domain:bus:dev.fn". It can be set to "default"
+ * to go back to the system default card (TODO: not implemented yet).
+ * Currently, only PCI is supported as a prefix, but the userland API may
+ * support other bus types in the future, even if the current kernel
+ * implementation doesn't.
+ *
+ * Note about locks:
+ *
+ * The driver keeps track of which user has what locks on which card. It
+ * supports stacking, like the kernel one. This complexifies the implementation
+ * a bit, but makes the arbiter more tolerant to userspace problems and able
+ * to properly cleanup in all cases when a process dies.
+ * Currently, a max of 16 cards simultaneously can have locks issued from
+ * userspace for a given user (file descriptor instance) of the arbiter.
+ *
+ * If the device is hot-unplugged, there is a hook inside the module to notify
+ * they being added/removed in the system and automatically added/removed in
+ * the arbiter.
+ */
+
+#define MAX_USER_CARDS         16
+#define PCI_INVALID_CARD       ((struct pci_dev *)-1UL)
+
+/*
+ * Each user has an array of these, tracking which cards have locks
+ */
+struct vga_arb_user_card {
+	struct pci_dev *pdev;
+	unsigned int mem_cnt;
+	unsigned int io_cnt;
+};
+
+struct vga_arb_private {
+	struct list_head list;
+	struct pci_dev *target;
+	struct vga_arb_user_card cards[MAX_USER_CARDS];
+	spinlock_t lock;
+};
+
+static LIST_HEAD(vga_user_list);
+static DEFINE_SPINLOCK(vga_user_lock);
+
+
+/*
+ * This function gets a string in the format: "PCI:domain:bus:dev.fn" and
+ * returns the respective values. If the string is not in this format,
+ * it returns 0.
+ */
+static int vga_pci_str_to_vars(char *buf, int count, unsigned int *domain,
+			       unsigned int *bus, unsigned int *devfn)
+{
+	int n;
+	unsigned int slot, func;
+
+
+	n = sscanf(buf, "PCI:%x:%x:%x.%x", domain, bus, &slot, &func);
+	if (n != 4)
+		return 0;
+
+	*devfn = PCI_DEVFN(slot, func);
+
+	return 1;
+}
+
+static ssize_t vga_arb_read(struct file *file, char __user * buf,
+			    size_t count, loff_t *ppos)
+{
+	struct vga_arb_private *priv = file->private_data;
+	struct vga_device *vgadev;
+	struct pci_dev *pdev;
+	unsigned long flags;
+	size_t len;
+	int rc;
+	char *lbuf;
+
+	lbuf = kmalloc(1024, GFP_KERNEL);
+	if (lbuf == NULL)
+		return -ENOMEM;
+
+	/* Shields against vga_arb_device_card_gone (pci_dev going
+	 * away), and allows access to vga list
+	 */
+	spin_lock_irqsave(&vga_lock, flags);
+
+	/* If we are targetting the default, use it */
+	pdev = priv->target;
+	if (pdev == NULL || pdev == PCI_INVALID_CARD) {
+		spin_unlock_irqrestore(&vga_lock, flags);
+		len = sprintf(lbuf, "invalid");
+		goto done;
+	}
+
+	/* Find card vgadev structure */
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL) {
+		/* Wow, it's not in the list, that shouldn't happen,
+		 * let's fix us up and return invalid card
+		 */
+		if (pdev == priv->target)
+			vga_arb_device_card_gone(pdev);
+		spin_unlock_irqrestore(&vga_lock, flags);
+		len = sprintf(lbuf, "invalid");
+		goto done;
+	}
+
+	/* Fill the buffer with infos */
+	len = snprintf(lbuf, 1024,
+		       "count:%d,PCI:%s,decodes=%s,owns=%s,locks=%s(%d:%d)\n",
+		       vga_decode_count, pci_name(pdev),
+		       vga_iostate_to_str(vgadev->decodes),
+		       vga_iostate_to_str(vgadev->owns),
+		       vga_iostate_to_str(vgadev->locks),
+		       vgadev->io_lock_cnt, vgadev->mem_lock_cnt);
+
+	spin_unlock_irqrestore(&vga_lock, flags);
+done:
+
+	/* Copy that to user */
+	if (len > count)
+		len = count;
+	rc = copy_to_user(buf, lbuf, len);
+	kfree(lbuf);
+	if (rc)
+		return -EFAULT;
+	return len;
+}
+
+/*
+ * TODO: To avoid parsing inside kernel and to improve the speed we may
+ * consider use ioctl here
+ */
+static ssize_t vga_arb_write(struct file *file, const char __user * buf,
+			     size_t count, loff_t *ppos)
+{
+	struct vga_arb_private *priv = file->private_data;
+	struct vga_arb_user_card *uc = NULL;
+	struct pci_dev *pdev;
+
+	unsigned int io_state;
+
+	char *kbuf, *curr_pos;
+	size_t remaining = count;
+
+	int ret_val;
+	int i;
+
+
+	kbuf = kmalloc(count + 1, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	if (copy_from_user(kbuf, buf, count)) {
+		kfree(kbuf);
+		return -EFAULT;
+	}
+	curr_pos = kbuf;
+	kbuf[count] = '\0';	/* Just to make sure... */
+
+	if (strncmp(curr_pos, "lock ", 5) == 0) {
+		curr_pos += 5;
+		remaining -= 5;
+
+		pr_devel("client 0x%X called 'lock'\n", (int)priv);
+
+		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
+			ret_val = -EPROTO;
+			goto done;
+		}
+		if (io_state == VGA_RSRC_NONE) {
+			ret_val = -EPROTO;
+			goto done;
+		}
+
+		pdev = priv->target;
+		if (priv->target == NULL) {
+			ret_val = -ENODEV;
+			goto done;
+		}
+
+		vga_get_uninterruptible(pdev, io_state);
+
+		/* Update the client's locks lists... */
+		for (i = 0; i < MAX_USER_CARDS; i++) {
+			if (priv->cards[i].pdev == pdev) {
+				if (io_state & VGA_RSRC_LEGACY_IO)
+					priv->cards[i].io_cnt++;
+				if (io_state & VGA_RSRC_LEGACY_MEM)
+					priv->cards[i].mem_cnt++;
+				break;
+			}
+		}
+
+		ret_val = count;
+		goto done;
+	} else if (strncmp(curr_pos, "unlock ", 7) == 0) {
+		curr_pos += 7;
+		remaining -= 7;
+
+		pr_devel("client 0x%X called 'unlock'\n", (int)priv);
+
+		if (strncmp(curr_pos, "all", 3) == 0)
+			io_state = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+		else {
+			if (!vga_str_to_iostate
+			    (curr_pos, remaining, &io_state)) {
+				ret_val = -EPROTO;
+				goto done;
+			}
+			/* TODO: Add this?
+			   if (io_state == VGA_RSRC_NONE) {
+			   ret_val = -EPROTO;
+			   goto done;
+			   }
+			  */
+		}
+
+		pdev = priv->target;
+		if (priv->target == NULL) {
+			ret_val = -ENODEV;
+			goto done;
+		}
+		for (i = 0; i < MAX_USER_CARDS; i++) {
+			if (priv->cards[i].pdev == pdev)
+				uc = &priv->cards[i];
+		}
+
+		if (!uc)
+			return -EINVAL;
+
+		if (io_state & VGA_RSRC_LEGACY_IO && uc->io_cnt == 0)
+			return -EINVAL;
+
+		if (io_state & VGA_RSRC_LEGACY_MEM && uc->mem_cnt == 0)
+			return -EINVAL;
+
+		vga_put(pdev, io_state);
+
+		if (io_state & VGA_RSRC_LEGACY_IO)
+			uc->io_cnt--;
+		if (io_state & VGA_RSRC_LEGACY_MEM)
+			uc->mem_cnt--;
+
+		ret_val = count;
+		goto done;
+	} else if (strncmp(curr_pos, "trylock ", 8) == 0) {
+		curr_pos += 8;
+		remaining -= 8;
+
+		pr_devel("client 0x%X called 'trylock'\n", (int)priv);
+
+		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
+			ret_val = -EPROTO;
+			goto done;
+		}
+		/* TODO: Add this?
+		   if (io_state == VGA_RSRC_NONE) {
+		   ret_val = -EPROTO;
+		   goto done;
+		   }
+		 */
+
+		pdev = priv->target;
+		if (priv->target == NULL) {
+			ret_val = -ENODEV;
+			goto done;
+		}
+
+		if (vga_tryget(pdev, io_state)) {
+			/* Update the client's locks lists... */
+			for (i = 0; i < MAX_USER_CARDS; i++) {
+				if (priv->cards[i].pdev == pdev) {
+					if (io_state & VGA_RSRC_LEGACY_IO)
+						priv->cards[i].io_cnt++;
+					if (io_state & VGA_RSRC_LEGACY_MEM)
+						priv->cards[i].mem_cnt++;
+					break;
+				}
+			}
+			ret_val = count;
+			goto done;
+		} else {
+			ret_val = -EBUSY;
+			goto done;
+		}
+
+	} else if (strncmp(curr_pos, "target ", 7) == 0) {
+		unsigned int domain, bus, devfn;
+		struct vga_device *vgadev;
+
+		curr_pos += 7;
+		remaining -= 7;
+		pr_devel("client 0x%X called 'target'\n", (int)priv);
+		/* if target is default */
+		if (!strncmp(buf, "default", 7))
+			pdev = pci_dev_get(vga_default_device());
+		else {
+			if (!vga_pci_str_to_vars(curr_pos, remaining,
+						 &domain, &bus, &devfn)) {
+				ret_val = -EPROTO;
+				goto done;
+			}
+
+			pdev = pci_get_bus_and_slot(bus, devfn);
+			if (!pdev) {
+				pr_info("vgaarb: invalid PCI address!\n");
+				ret_val = -ENODEV;
+				goto done;
+			}
+		}
+
+		vgadev = vgadev_find(pdev);
+		if (vgadev == NULL) {
+			pr_info("vgaarb: this pci device is not a vga device\n");
+			pci_dev_put(pdev);
+			ret_val = -ENODEV;
+			goto done;
+		}
+
+		priv->target = pdev;
+		for (i = 0; i < MAX_USER_CARDS; i++) {
+			if (priv->cards[i].pdev == pdev)
+				break;
+			if (priv->cards[i].pdev == NULL) {
+				priv->cards[i].pdev = pdev;
+				priv->cards[i].io_cnt = 0;
+				priv->cards[i].mem_cnt = 0;
+				break;
+			}
+		}
+		if (i == MAX_USER_CARDS) {
+			pr_err("vgaarb: maximum user cards number reached!\n");
+			pci_dev_put(pdev);
+			/* XXX: which value to return? */
+			ret_val =  -ENOMEM;
+			goto done;
+		}
+
+		ret_val = count;
+		pci_dev_put(pdev);
+		goto done;
+
+
+	} else if (strncmp(curr_pos, "decodes ", 8) == 0) {
+		curr_pos += 8;
+		remaining -= 8;
+		pr_devel("vgaarb: client 0x%X called 'decodes'\n", (int)priv);
+
+		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
+			ret_val = -EPROTO;
+			goto done;
+		}
+		pdev = priv->target;
+		if (priv->target == NULL) {
+			ret_val = -ENODEV;
+			goto done;
+		}
+
+		__vga_set_legacy_decoding(pdev, io_state, true);
+		ret_val = count;
+		goto done;
+	}
+	/* If we got here, the message written is not part of the protocol! */
+	kfree(kbuf);
+	return -EPROTO;
+
+done:
+	kfree(kbuf);
+	return ret_val;
+}
+
+static unsigned int vga_arb_fpoll(struct file *file, poll_table * wait)
+{
+	struct vga_arb_private *priv = file->private_data;
+
+	pr_devel("%s\n", __func__);
+
+	if (priv == NULL)
+		return -ENODEV;
+	poll_wait(file, &vga_wait_queue, wait);
+	return POLLIN;
+}
+
+static int vga_arb_open(struct inode *inode, struct file *file)
+{
+	struct vga_arb_private *priv;
+	unsigned long flags;
+
+	pr_devel("%s\n", __func__);
+
+	priv = kmalloc(sizeof(struct vga_arb_private), GFP_KERNEL);
+	if (priv == NULL)
+		return -ENOMEM;
+	memset(priv, 0, sizeof(*priv));
+	spin_lock_init(&priv->lock);
+	file->private_data = priv;
+
+	spin_lock_irqsave(&vga_user_lock, flags);
+	list_add(&priv->list, &vga_user_list);
+	spin_unlock_irqrestore(&vga_user_lock, flags);
+
+	/* Set the client' lists of locks */
+	priv->target = vga_default_device(); /* Maybe this is still null! */
+	priv->cards[0].pdev = priv->target;
+	priv->cards[0].io_cnt = 0;
+	priv->cards[0].mem_cnt = 0;
+
+
+	return 0;
+}
+
+static int vga_arb_release(struct inode *inode, struct file *file)
+{
+	struct vga_arb_private *priv = file->private_data;
+	struct vga_arb_user_card *uc;
+	unsigned long flags;
+	int i;
+
+	pr_devel("%s\n", __func__);
+
+	if (priv == NULL)
+		return -ENODEV;
+
+	spin_lock_irqsave(&vga_user_lock, flags);
+	list_del(&priv->list);
+	for (i = 0; i < MAX_USER_CARDS; i++) {
+		uc = &priv->cards[i];
+		if (uc->pdev == NULL)
+			continue;
+		pr_devel("uc->io_cnt == %d, uc->mem_cnt == %d\n",
+			 uc->io_cnt, uc->mem_cnt);
+		while (uc->io_cnt--)
+			vga_put(uc->pdev, VGA_RSRC_LEGACY_IO);
+		while (uc->mem_cnt--)
+			vga_put(uc->pdev, VGA_RSRC_LEGACY_MEM);
+	}
+	spin_unlock_irqrestore(&vga_user_lock, flags);
+
+	kfree(priv);
+
+	return 0;
+}
+
+static void vga_arb_device_card_gone(struct pci_dev *pdev)
+{
+}
+
+/*
+ * callback any registered clients to let them know we have a
+ * change in VGA cards
+ */
+static void vga_arbiter_notify_clients(void)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+	uint32_t new_decodes;
+	bool new_state;
+
+	if (!vga_arbiter_used)
+		return;
+
+	spin_lock_irqsave(&vga_lock, flags);
+	list_for_each_entry(vgadev, &vga_list, list) {
+		if (vga_count > 1)
+			new_state = false;
+		else
+			new_state = true;
+		if (vgadev->set_vga_decode) {
+			new_decodes = vgadev->set_vga_decode(vgadev->cookie, new_state);
+			vga_update_device_decodes(vgadev, new_decodes);
+		}
+	}
+	spin_unlock_irqrestore(&vga_lock, flags);
+}
+
+static int pci_notify(struct notifier_block *nb, unsigned long action,
+		      void *data)
+{
+	struct device *dev = data;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	bool notify = false;
+
+	pr_devel("%s\n", __func__);
+
+	/* For now we're only intereted in devices added and removed. I didn't
+	 * test this thing here, so someone needs to double check for the
+	 * cases of hotplugable vga cards. */
+	if (action == BUS_NOTIFY_ADD_DEVICE)
+		notify = vga_arbiter_add_pci_device(pdev);
+	else if (action == BUS_NOTIFY_DEL_DEVICE)
+		notify = vga_arbiter_del_pci_device(pdev);
+
+	if (notify)
+		vga_arbiter_notify_clients();
+	return 0;
+}
+
+static struct notifier_block pci_notifier = {
+	.notifier_call = pci_notify,
+};
+
+static const struct file_operations vga_arb_device_fops = {
+	.read = vga_arb_read,
+	.write = vga_arb_write,
+	.poll = vga_arb_fpoll,
+	.open = vga_arb_open,
+	.release = vga_arb_release,
+};
+
+static struct miscdevice vga_arb_device = {
+	MISC_DYNAMIC_MINOR, "vga_arbiter", &vga_arb_device_fops
+};
+
+static int __init vga_arb_device_init(void)
+{
+	int rc;
+	struct pci_dev *pdev;
+
+	rc = misc_register(&vga_arb_device);
+	if (rc < 0)
+		pr_err("vgaarb: error %d registering device\n", rc);
+
+	bus_register_notifier(&pci_bus_type, &pci_notifier);
+
+	/* We add all pci devices satisfying vga class in the arbiter by
+	 * default */
+	pdev = NULL;
+	while ((pdev =
+		pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
+			       PCI_ANY_ID, pdev)) != NULL)
+		vga_arbiter_add_pci_device(pdev);
+
+	pr_info("vgaarb: loaded\n");
+	return rc;
+}
+subsys_initcall(vga_arb_device_init);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7d55039ffa05..bd993351db45 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2520,6 +2520,50 @@ int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type)
 	return 0;
 }
 
+/**
+ * pci_set_vga_state - set VGA decode state on device and parents if requested
+ * @dev the PCI device
+ * @decode - true = enable decoding, false = disable decoding
+ * @command_bits PCI_COMMAND_IO and/or PCI_COMMAND_MEMORY
+ * @change_bridge - traverse ancestors and change bridges
+ */
+int pci_set_vga_state(struct pci_dev *dev, bool decode,
+		      unsigned int command_bits, bool change_bridge)
+{
+	struct pci_bus *bus;
+	struct pci_dev *bridge;
+	u16 cmd;
+
+	WARN_ON(command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY));
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	if (decode == true)
+		cmd |= command_bits;
+	else
+		cmd &= ~command_bits;
+	pci_write_config_word(dev, PCI_COMMAND, cmd);
+
+	if (change_bridge == false)
+		return 0;
+
+	bus = dev->bus;
+	while (bus) {
+		bridge = bus->self;
+		if (bridge) {
+			pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
+					     &cmd);
+			if (decode == true)
+				cmd |= PCI_BRIDGE_CTL_VGA;
+			else
+				cmd &= ~PCI_BRIDGE_CTL_VGA;
+			pci_write_config_word(bridge, PCI_BRIDGE_CONTROL,
+					      cmd);
+		}
+		bus = bus->parent;
+	}
+	return 0;
+}
+
 #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
 static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
 spinlock_t resource_alignment_lock = SPIN_LOCK_UNLOCKED;
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 3b54b3940178..a0d9ee1df349 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -7,6 +7,8 @@ menu "Graphics support"
 
 source "drivers/char/agp/Kconfig"
 
+source "drivers/gpu/vga/Kconfig"
+
 source "drivers/gpu/drm/Kconfig"
 
 config VGASTATE
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a90f94020798..6dbb1fd30e5a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -806,6 +806,8 @@ int pci_cfg_space_size_ext(struct pci_dev *dev);
 int pci_cfg_space_size(struct pci_dev *dev);
 unsigned char pci_bus_max_busnr(struct pci_bus *bus);
 
+int pci_set_vga_state(struct pci_dev *pdev, bool decode,
+		      unsigned int command_bits, bool change_bridge);
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
 #include <linux/dmapool.h>
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
new file mode 100644
index 000000000000..68229ce80fde
--- /dev/null
+++ b/include/linux/vgaarb.h
@@ -0,0 +1,195 @@
+/*
+ * vgaarb.c
+ *
+ * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com>
+ * (C) Copyright 2007, 2009 Tiago Vignatti <vignatti@freedesktop.org>
+ */
+
+#ifndef LINUX_VGA_H
+
+#include <asm/vga.h>
+
+/* Legacy VGA regions */
+#define VGA_RSRC_NONE	       0x00
+#define VGA_RSRC_LEGACY_IO     0x01
+#define VGA_RSRC_LEGACY_MEM    0x02
+#define VGA_RSRC_LEGACY_MASK   (VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM)
+/* Non-legacy access */
+#define VGA_RSRC_NORMAL_IO     0x04
+#define VGA_RSRC_NORMAL_MEM    0x08
+
+/* Passing that instead of a pci_dev to use the system "default"
+ * device, that is the one used by vgacon. Archs will probably
+ * have to provide their own vga_default_device();
+ */
+#define VGA_DEFAULT_DEVICE     (NULL)
+
+/* For use by clients */
+
+/**
+ *     vga_set_legacy_decoding
+ *
+ *     @pdev: pci device of the VGA card
+ *     @decodes: bit mask of what legacy regions the card decodes
+ *
+ *     Indicates to the arbiter if the card decodes legacy VGA IOs,
+ *     legacy VGA Memory, both, or none. All cards default to both,
+ *     the card driver (fbdev for example) should tell the arbiter
+ *     if it has disabled legacy decoding, so the card can be left
+ *     out of the arbitration process (and can be safe to take
+ *     interrupts at any time.
+ */
+extern void vga_set_legacy_decoding(struct pci_dev *pdev,
+									unsigned int decodes);
+
+/**
+ *     vga_get         - acquire & locks VGA resources
+ *
+ *     pdev: pci device of the VGA card or NULL for the system default
+ *     rsrc: bit mask of resources to acquire and lock
+ *     interruptible: blocking should be interruptible by signals ?
+ *
+ *     This function acquires VGA resources for the given
+ *     card and mark those resources locked. If the resource requested
+ *     are "normal" (and not legacy) resources, the arbiter will first check
+ *     wether the card is doing legacy decoding for that type of resource. If
+ *     yes, the lock is "converted" into a legacy resource lock.
+ *     The arbiter will first look for all VGA cards that might conflict
+ *     and disable their IOs and/or Memory access, inlcuding VGA forwarding
+ *     on P2P bridges if necessary, so that the requested resources can
+ *     be used. Then, the card is marked as locking these resources and
+ *     the IO and/or Memory accesse are enabled on the card (including
+ *     VGA forwarding on parent P2P bridges if any).
+ *     This function will block if some conflicting card is already locking
+ *     one of the required resources (or any resource on a different bus
+ *     segment, since P2P bridges don't differenciate VGA memory and IO
+ *     afaik). You can indicate wether this blocking should be interruptible
+ *     by a signal (for userland interface) or not.
+ *     Must not be called at interrupt time or in atomic context.
+ *     If the card already owns the resources, the function succeeds.
+ *     Nested calls are supported (a per-resource counter is maintained)
+ */
+
+extern int vga_get(struct pci_dev *pdev, unsigned int rsrc,
+											int interruptible);
+
+/**
+ *     vga_get_interruptible
+ *
+ *     Shortcut to vga_get
+ */
+
+static inline int vga_get_interruptible(struct pci_dev *pdev,
+										unsigned int rsrc)
+{
+       return vga_get(pdev, rsrc, 1);
+}
+
+/**
+ *     vga_get_interruptible
+ *
+ *     Shortcut to vga_get
+ */
+
+static inline int vga_get_uninterruptible(struct pci_dev *pdev,
+											unsigned int rsrc)
+{
+       return vga_get(pdev, rsrc, 0);
+}
+
+/**
+ *     vga_tryget      - try to acquire & lock legacy VGA resources
+ *
+ *     @pdev: pci devivce of VGA card or NULL for system default
+ *     @rsrc: bit mask of resources to acquire and lock
+ *
+ *     This function performs the same operation as vga_get(), but
+ *     will return an error (-EBUSY) instead of blocking if the resources
+ *     are already locked by another card. It can be called in any context
+ */
+
+extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc);
+
+/**
+ *     vga_put         - release lock on legacy VGA resources
+ *
+ *     @pdev: pci device of VGA card or NULL for system default
+ *     @rsrc: but mask of resource to release
+ *
+ *     This function releases resources previously locked by vga_get()
+ *     or vga_tryget(). The resources aren't disabled right away, so
+ *     that a subsequence vga_get() on the same card will succeed
+ *     immediately. Resources have a counter, so locks are only
+ *     released if the counter reaches 0.
+ */
+
+extern void vga_put(struct pci_dev *pdev, unsigned int rsrc);
+
+
+/**
+ *     vga_default_device
+ *
+ *     This can be defined by the platform. The default implementation
+ *     is rather dumb and will probably only work properly on single
+ *     vga card setups and/or x86 platforms.
+ *
+ *     If your VGA default device is not PCI, you'll have to return
+ *     NULL here. In this case, I assume it will not conflict with
+ *     any PCI card. If this is not true, I'll have to define two archs
+ *     hooks for enabling/disabling the VGA default device if that is
+ *     possible. This may be a problem with real _ISA_ VGA cards, in
+ *     addition to a PCI one. I don't know at this point how to deal
+ *     with that card. Can theirs IOs be disabled at all ? If not, then
+ *     I suppose it's a matter of having the proper arch hook telling
+ *     us about it, so we basically never allow anybody to succeed a
+ *     vga_get()...
+ */
+
+#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
+extern struct pci_dev *vga_default_device(void);
+#endif
+
+/**
+ *     vga_conflicts
+ *
+ *     Architectures should define this if they have several
+ *     independant PCI domains that can afford concurrent VGA
+ *     decoding
+ */
+
+#ifndef __ARCH_HAS_VGA_CONFLICT
+static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2)
+{
+       return 1;
+}
+#endif
+
+/*
+ * Register a client with the VGA arbitration logic
+ * return value: number of VGA devices in system.
+ *
+ * Clients have two callback mechanisms they can use.
+ * irq enable/disable callback -
+ *    If a client can't disable its GPUs VGA resources, then we
+ *    need to be able to ask it to turn off its irqs when we
+ *    turn off its mem and io decoding.
+ * set_vga_decode
+ *    If a client can disable its GPU VGA resource, it will
+ *    get a callback from this to set the encode/decode state
+ *
+ * Clients with disable abilities should check the return value
+ * of this function and if the VGA device count is > 1, should
+ * disable VGA decoding resources.
+ *
+ * Rationale: we cannot disable VGA decode resources unconditionally
+ * some single GPU laptops seem to require ACPI or BIOS access to the
+ * VGA registers to control things like backlights etc.
+ * Hopefully newer multi-GPU laptops do something saner, and desktops
+ * won't have any special ACPI for this.
+ */
+int vga_client_register(struct pci_dev *pdev, void *cookie,
+			void (*irq_set_state)(void *cookie, bool state),
+			unsigned int (*set_vga_decode)(void *cookie, bool state));
+
+#endif /* LINUX_VGA_H */
-- 
cgit v1.2.3


From 260d703adc5f275e3ba7ddff6e2e0217bc613b35 Mon Sep 17 00:00:00 2001
From: Mike Mason <mmlnx@us.ibm.com>
Date: Thu, 30 Jul 2009 15:33:21 -0700
Subject: PCI: support for PCI Express fundamental reset

This is the first of three patches that implement a bit field that PCI
Express device drivers can use to indicate they need a fundamental reset
during error recovery.

By default, the EEH framework on powerpc does what's known as a "hot
reset" during recovery of a PCI Express device.  We've found a case
where the device needs a "fundamental reset" to recover properly.  The
current PCI error recovery and EEH frameworks do not support this
distinction.

The attached patch (courtesy of Richard Lary) adds a bit field to
pci_dev that indicates whether the device requires a fundamental reset
during recovery.

These patches supersede the previously submitted patch that implemented
a fundamental reset bit field.

Signed-off-by: Mike Mason <mmlnx@us.ibm.com>
Signed-off-by: Richard Lary <rlary@us.ibm.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6dbb1fd30e5a..da90217a7b0e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -273,6 +273,7 @@ struct pci_dev {
 	unsigned int	ari_enabled:1;	/* ARI forwarding */
 	unsigned int	is_managed:1;
 	unsigned int	is_pcie:1;
+	unsigned int    needs_freset:1; /* Dev requires fundamental reset */
 	unsigned int	state_saved:1;
 	unsigned int	is_physfn:1;
 	unsigned int	is_virtfn:1;
-- 
cgit v1.2.3


From 6ac3bd527007eeecb148b67ca47b21731fd8a503 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Wed, 19 Aug 2009 15:21:57 +1000
Subject: PCI/vgaarb: cleanup some warnings + cleanup some comments.

Fix some warnings reported in linux-next + also cleanup some
comment errors noticed by Pekka Paalanen.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/vga/vgaarb.c | 11 +++++------
 include/linux/vgaarb.h   | 49 ++++++++++++++++++++++++++----------------------
 2 files changed, 32 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index 199138f241e0..1ac0c93603c9 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -609,7 +609,6 @@ void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes)
 }
 EXPORT_SYMBOL(vga_set_legacy_decoding);
 
-/* return number of active VGA devices */
 /* call with NULL to unregister */
 int vga_client_register(struct pci_dev *pdev, void *cookie,
 			void (*irq_set_state)(void *cookie, bool state),
@@ -831,7 +830,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf,
 		curr_pos += 5;
 		remaining -= 5;
 
-		pr_devel("client 0x%X called 'lock'\n", (int)priv);
+		pr_devel("client 0x%p called 'lock'\n", priv);
 
 		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
 			ret_val = -EPROTO;
@@ -867,7 +866,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf,
 		curr_pos += 7;
 		remaining -= 7;
 
-		pr_devel("client 0x%X called 'unlock'\n", (int)priv);
+		pr_devel("client 0x%p called 'unlock'\n", priv);
 
 		if (strncmp(curr_pos, "all", 3) == 0)
 			io_state = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
@@ -917,7 +916,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf,
 		curr_pos += 8;
 		remaining -= 8;
 
-		pr_devel("client 0x%X called 'trylock'\n", (int)priv);
+		pr_devel("client 0x%p called 'trylock'\n", priv);
 
 		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
 			ret_val = -EPROTO;
@@ -960,7 +959,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf,
 
 		curr_pos += 7;
 		remaining -= 7;
-		pr_devel("client 0x%X called 'target'\n", (int)priv);
+		pr_devel("client 0x%p called 'target'\n", priv);
 		/* if target is default */
 		if (!strncmp(buf, "default", 7))
 			pdev = pci_dev_get(vga_default_device());
@@ -1014,7 +1013,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf,
 	} else if (strncmp(curr_pos, "decodes ", 8) == 0) {
 		curr_pos += 8;
 		remaining -= 8;
-		pr_devel("vgaarb: client 0x%X called 'decodes'\n", (int)priv);
+		pr_devel("vgaarb: client 0x%p called 'decodes'\n", priv);
 
 		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
 			ret_val = -EPROTO;
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index 68229ce80fde..e81c64af80c1 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -46,9 +46,9 @@ extern void vga_set_legacy_decoding(struct pci_dev *pdev,
 /**
  *     vga_get         - acquire & locks VGA resources
  *
- *     pdev: pci device of the VGA card or NULL for the system default
- *     rsrc: bit mask of resources to acquire and lock
- *     interruptible: blocking should be interruptible by signals ?
+ *     @pdev: pci device of the VGA card or NULL for the system default
+ *     @rsrc: bit mask of resources to acquire and lock
+ *     @interruptible: blocking should be interruptible by signals ?
  *
  *     This function acquires VGA resources for the given
  *     card and mark those resources locked. If the resource requested
@@ -81,19 +81,19 @@ extern int vga_get(struct pci_dev *pdev, unsigned int rsrc,
  */
 
 static inline int vga_get_interruptible(struct pci_dev *pdev,
-										unsigned int rsrc)
+					unsigned int rsrc)
 {
        return vga_get(pdev, rsrc, 1);
 }
 
 /**
- *     vga_get_interruptible
+ *     vga_get_uninterruptible
  *
  *     Shortcut to vga_get
  */
 
 static inline int vga_get_uninterruptible(struct pci_dev *pdev,
-											unsigned int rsrc)
+					  unsigned int rsrc)
 {
        return vga_get(pdev, rsrc, 0);
 }
@@ -165,28 +165,33 @@ static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2)
 }
 #endif
 
-/*
- * Register a client with the VGA arbitration logic
- * return value: number of VGA devices in system.
- *
- * Clients have two callback mechanisms they can use.
- * irq enable/disable callback -
- *    If a client can't disable its GPUs VGA resources, then we
- *    need to be able to ask it to turn off its irqs when we
- *    turn off its mem and io decoding.
- * set_vga_decode
- *    If a client can disable its GPU VGA resource, it will
- *    get a callback from this to set the encode/decode state
- *
- * Clients with disable abilities should check the return value
- * of this function and if the VGA device count is > 1, should
- * disable VGA decoding resources.
+/**
+ *	vga_client_register
+ *
+ *	@pdev: pci device of the VGA client
+ *	@cookie: client cookie to be used in callbacks
+ *	@irq_set_state: irq state change callback
+ *	@set_vga_decode: vga decode change callback
+ *
+ * 	return value: 0 on success, -1 on failure
+ * 	Register a client with the VGA arbitration logic
+ *
+ *	Clients have two callback mechanisms they can use.
+ *	irq enable/disable callback -
+ *		If a client can't disable its GPUs VGA resources, then we
+ *		need to be able to ask it to turn off its irqs when we
+ *		turn off its mem and io decoding.
+ *	set_vga_decode
+ *		If a client can disable its GPU VGA resource, it will
+ *		get a callback from this to set the encode/decode state
  *
  * Rationale: we cannot disable VGA decode resources unconditionally
  * some single GPU laptops seem to require ACPI or BIOS access to the
  * VGA registers to control things like backlights etc.
  * Hopefully newer multi-GPU laptops do something saner, and desktops
  * won't have any special ACPI for this.
+ * They driver will get a callback when VGA arbitration is first used
+ * by userspace since we some older X servers have issues.
  */
 int vga_client_register(struct pci_dev *pdev, void *cookie,
 			void (*irq_set_state)(void *cookie, bool state),
-- 
cgit v1.2.3


From 825c423a35a80a8fd66398a3f9bde7f0b0187a76 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Wed, 29 Jul 2009 14:39:58 +0900
Subject: PCI hotplug: add support for 5.0G link speed

Add support for PCI-E 5.0 GT/s in max_bus_speed and cur_bus_speed.

Reviewed-by: Matthew Wilcox <matthew@wil.cx>
Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pci_hotplug_core.c |  1 +
 drivers/pci/hotplug/pciehp_hpc.c       | 10 ++++++++--
 include/linux/pci_hotplug.h            |  3 ++-
 3 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 9d44669bf098..0325d989bb46 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -87,6 +87,7 @@ static char *pci_bus_speed_strings[] = {
 	"100 MHz PCIX 533",	/* 0x12 */
 	"133 MHz PCIX 533",	/* 0x13 */
 	"2.5 GT/s PCI-E",	/* 0x14 */
+	"5.0 GT/s PCI-E",	/* 0x15 */
 };
 
 #ifdef CONFIG_HOTPLUG_PCI_CPCI
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 52813257e5bf..271f917b6f2c 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -693,7 +693,10 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 
 	switch (lnk_cap & 0x000F) {
 	case 1:
-		lnk_speed = PCIE_2PT5GB;
+		lnk_speed = PCIE_2_5GB;
+		break;
+	case 2:
+		lnk_speed = PCIE_5_0GB;
 		break;
 	default:
 		lnk_speed = PCIE_LNK_SPEED_UNKNOWN;
@@ -772,7 +775,10 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 
 	switch (lnk_status & PCI_EXP_LNKSTA_CLS) {
 	case 1:
-		lnk_speed = PCIE_2PT5GB;
+		lnk_speed = PCIE_2_5GB;
+		break;
+	case 2:
+		lnk_speed = PCIE_5_0GB;
 		break;
 	default:
 		lnk_speed = PCIE_LNK_SPEED_UNKNOWN;
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 4391741b99dc..1b00cc3177fc 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -62,7 +62,8 @@ enum pcie_link_width {
 };
 
 enum pcie_link_speed {
-	PCIE_2PT5GB		= 0x14,
+	PCIE_2_5GB		= 0x14,
+	PCIE_5_0GB		= 0x15,
 	PCIE_LNK_SPEED_UNKNOWN	= 0xFF,
 };
 
-- 
cgit v1.2.3


From 9dba910e9de2c4aa15ec1286f10052c107ef48ca Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 3 Sep 2009 15:26:36 +0900
Subject: PCI: separate out pci_add_dynid()

Separate out pci_add_dynid() from store_new_id() and export it so that
in-kernel code can add PCI IDs dynamically.  As the function will be
available regardless of HOTPLUG, put it and pull pci_free_dynids()
outside of CONFIG_HOTPLUG.

This will be used by pci-stub to initialize initial IDs via module
param.

While at it, remove bogus get_driver() failure check.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Reviewed-by: Grant Grundler <grundler@parisc-linux.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci-driver.c | 119 +++++++++++++++++++++++++++++------------------
 include/linux/pci.h      |   5 ++
 2 files changed, 79 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index f99bc7f089f1..c66dc4341fa0 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -19,37 +19,98 @@
 #include <linux/cpu.h>
 #include "pci.h"
 
-/*
- * Dynamic device IDs are disabled for !CONFIG_HOTPLUG
- */
-
 struct pci_dynid {
 	struct list_head node;
 	struct pci_device_id id;
 };
 
-#ifdef CONFIG_HOTPLUG
+/**
+ * pci_add_dynid - add a new PCI device ID to this driver and re-probe devices
+ * @drv: target pci driver
+ * @vendor: PCI vendor ID
+ * @device: PCI device ID
+ * @subvendor: PCI subvendor ID
+ * @subdevice: PCI subdevice ID
+ * @class: PCI class
+ * @class_mask: PCI class mask
+ * @driver_data: private driver data
+ *
+ * Adds a new dynamic pci device ID to this driver and causes the
+ * driver to probe for all devices again.  @drv must have been
+ * registered prior to calling this function.
+ *
+ * CONTEXT:
+ * Does GFP_KERNEL allocation.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int pci_add_dynid(struct pci_driver *drv,
+		  unsigned int vendor, unsigned int device,
+		  unsigned int subvendor, unsigned int subdevice,
+		  unsigned int class, unsigned int class_mask,
+		  unsigned long driver_data)
+{
+	struct pci_dynid *dynid;
+	int retval;
+
+	dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
+	if (!dynid)
+		return -ENOMEM;
+
+	dynid->id.vendor = vendor;
+	dynid->id.device = device;
+	dynid->id.subvendor = subvendor;
+	dynid->id.subdevice = subdevice;
+	dynid->id.class = class;
+	dynid->id.class_mask = class_mask;
+	dynid->id.driver_data = driver_data;
 
+	spin_lock(&drv->dynids.lock);
+	list_add_tail(&dynid->node, &drv->dynids.list);
+	spin_unlock(&drv->dynids.lock);
+
+	get_driver(&drv->driver);
+	retval = driver_attach(&drv->driver);
+	put_driver(&drv->driver);
+
+	return retval;
+}
+
+static void pci_free_dynids(struct pci_driver *drv)
+{
+	struct pci_dynid *dynid, *n;
+
+	spin_lock(&drv->dynids.lock);
+	list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
+		list_del(&dynid->node);
+		kfree(dynid);
+	}
+	spin_unlock(&drv->dynids.lock);
+}
+
+/*
+ * Dynamic device ID manipulation via sysfs is disabled for !CONFIG_HOTPLUG
+ */
+#ifdef CONFIG_HOTPLUG
 /**
- * store_new_id - add a new PCI device ID to this driver and re-probe devices
+ * store_new_id - sysfs frontend to pci_add_dynid()
  * @driver: target device driver
  * @buf: buffer for scanning device ID data
  * @count: input size
  *
- * Adds a new dynamic pci device ID to this driver,
- * and causes the driver to probe for all devices again.
+ * Allow PCI IDs to be added to an existing driver via sysfs.
  */
 static ssize_t
 store_new_id(struct device_driver *driver, const char *buf, size_t count)
 {
-	struct pci_dynid *dynid;
 	struct pci_driver *pdrv = to_pci_driver(driver);
 	const struct pci_device_id *ids = pdrv->id_table;
 	__u32 vendor, device, subvendor=PCI_ANY_ID,
 		subdevice=PCI_ANY_ID, class=0, class_mask=0;
 	unsigned long driver_data=0;
 	int fields=0;
-	int retval=0;
+	int retval;
 
 	fields = sscanf(buf, "%x %x %x %x %x %x %lx",
 			&vendor, &device, &subvendor, &subdevice,
@@ -72,27 +133,8 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 			return retval;
 	}
 
-	dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
-	if (!dynid)
-		return -ENOMEM;
-
-	dynid->id.vendor = vendor;
-	dynid->id.device = device;
-	dynid->id.subvendor = subvendor;
-	dynid->id.subdevice = subdevice;
-	dynid->id.class = class;
-	dynid->id.class_mask = class_mask;
-	dynid->id.driver_data = driver_data;
-
-	spin_lock(&pdrv->dynids.lock);
-	list_add_tail(&dynid->node, &pdrv->dynids.list);
-	spin_unlock(&pdrv->dynids.lock);
-
-	if (get_driver(&pdrv->driver)) {
-		retval = driver_attach(&pdrv->driver);
-		put_driver(&pdrv->driver);
-	}
-
+	retval = pci_add_dynid(pdrv, vendor, device, subvendor, subdevice,
+			       class, class_mask, driver_data);
 	if (retval)
 		return retval;
 	return count;
@@ -145,19 +187,6 @@ store_remove_id(struct device_driver *driver, const char *buf, size_t count)
 }
 static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id);
 
-static void
-pci_free_dynids(struct pci_driver *drv)
-{
-	struct pci_dynid *dynid, *n;
-
-	spin_lock(&drv->dynids.lock);
-	list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
-		list_del(&dynid->node);
-		kfree(dynid);
-	}
-	spin_unlock(&drv->dynids.lock);
-}
-
 static int
 pci_create_newid_file(struct pci_driver *drv)
 {
@@ -186,7 +215,6 @@ static void pci_remove_removeid_file(struct pci_driver *drv)
 	driver_remove_file(&drv->driver, &driver_attr_remove_id);
 }
 #else /* !CONFIG_HOTPLUG */
-static inline void pci_free_dynids(struct pci_driver *drv) {}
 static inline int pci_create_newid_file(struct pci_driver *drv)
 {
 	return 0;
@@ -1106,6 +1134,7 @@ static int __init pci_driver_init(void)
 
 postcore_initcall(pci_driver_init);
 
+EXPORT_SYMBOL_GPL(pci_add_dynid);
 EXPORT_SYMBOL(pci_match_id);
 EXPORT_SYMBOL(__pci_register_driver);
 EXPORT_SYMBOL(pci_unregister_driver);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index da90217a7b0e..d75668317705 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -796,6 +796,11 @@ int __must_check __pci_register_driver(struct pci_driver *, struct module *,
 void pci_unregister_driver(struct pci_driver *dev);
 void pci_remove_behind_bridge(struct pci_dev *dev);
 struct pci_driver *pci_dev_driver(const struct pci_dev *dev);
+int pci_add_dynid(struct pci_driver *drv,
+		  unsigned int vendor, unsigned int device,
+		  unsigned int subvendor, unsigned int subdevice,
+		  unsigned int class, unsigned int class_mask,
+		  unsigned long driver_data);
 const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
 					 struct pci_dev *dev);
 int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
-- 
cgit v1.2.3


From 28760489a3f1e136c5ae8581c0fa8f63511f2f4c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Wed, 9 Sep 2009 14:09:24 -0700
Subject: PCI: pcie: Ensure hotplug ports have a minimum number of resources

In general a BIOS may goof or we may hotplug in a hotplug controller.
In either case the kernel needs to reserve resources for plugging
in more devices in the future instead of creating a minimal resource
assignment.

We already do this for cardbus bridges I am just adding a variant
for pcie bridges.

v2: Make testing for pcie hotplug bridges based on a flag.

    So far we only set the flag for pcie but a header_quirk
    could easily be added for the non-standard pci hotplug
    bridges.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c       | 10 ++++++++++
 drivers/pci/probe.c     | 18 ++++++++++++++++++
 drivers/pci/setup-bus.c | 22 +++++++++++++++++-----
 include/linux/pci.h     |  4 ++++
 4 files changed, 49 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index bd993351db45..8c663d628d03 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -41,6 +41,12 @@ int pci_domains_supported = 1;
 unsigned long pci_cardbus_io_size = DEFAULT_CARDBUS_IO_SIZE;
 unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;
 
+#define DEFAULT_HOTPLUG_IO_SIZE		(256)
+#define DEFAULT_HOTPLUG_MEM_SIZE	(2*1024*1024)
+/* pci=hpmemsize=nnM,hpiosize=nn can override this */
+unsigned long pci_hotplug_io_size  = DEFAULT_HOTPLUG_IO_SIZE;
+unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
+
 /**
  * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
  * @bus: pointer to PCI bus structure to search
@@ -2732,6 +2738,10 @@ static int __init pci_setup(char *str)
 							strlen(str + 19));
 			} else if (!strncmp(str, "ecrc=", 5)) {
 				pcie_ecrc_get_policy(str + 5);
+			} else if (!strncmp(str, "hpiosize=", 9)) {
+				pci_hotplug_io_size = memparse(str + 9, &str);
+			} else if (!strncmp(str, "hpmemsize=", 10)) {
+				pci_hotplug_mem_size = memparse(str + 10, &str);
 			} else {
 				printk(KERN_ERR "PCI: Unknown option `%s'\n",
 						str);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ab52840f4753..882383b61d30 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -697,6 +697,23 @@ static void set_pcie_port_type(struct pci_dev *pdev)
 	pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
 }
 
+static void set_pcie_hotplug_bridge(struct pci_dev *pdev)
+{
+	int pos;
+	u16 reg16;
+	u32 reg32;
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return;
+	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
+	if (!(reg16 & PCI_EXP_FLAGS_SLOT))
+		return;
+	pci_read_config_dword(pdev, pos + PCI_EXP_SLTCAP, &reg32);
+	if (reg32 & PCI_EXP_SLTCAP_HPC)
+		pdev->is_hotplug_bridge = 1;
+}
+
 #define LEGACY_IO_RESOURCE	(IORESOURCE_IO | IORESOURCE_PCI_FIXED)
 
 /**
@@ -804,6 +821,7 @@ int pci_setup_device(struct pci_dev *dev)
 		pci_read_irq(dev);
 		dev->transparent = ((dev->class & 0xff) == 1);
 		pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
+		set_pcie_hotplug_bridge(dev);
 		break;
 
 	case PCI_HEADER_TYPE_CARDBUS:		    /* CardBus bridge header */
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 7c443b4583ab..cb1a027eb552 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -309,7 +309,7 @@ static struct resource *find_free_bus_resource(struct pci_bus *bus, unsigned lon
    since these windows have 4K granularity and the IO ranges
    of non-bridge PCI devices are limited to 256 bytes.
    We must be careful with the ISA aliasing though. */
-static void pbus_size_io(struct pci_bus *bus)
+static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size)
 {
 	struct pci_dev *dev;
 	struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
@@ -336,6 +336,8 @@ static void pbus_size_io(struct pci_bus *bus)
 				size1 += r_size;
 		}
 	}
+	if (size < min_size)
+		size = min_size;
 /* To be fixed in 2.5: we should have sort of HAVE_ISA
    flag in the struct pci_bus. */
 #if defined(CONFIG_ISA) || defined(CONFIG_EISA)
@@ -354,7 +356,8 @@ static void pbus_size_io(struct pci_bus *bus)
 
 /* Calculate the size of the bus and minimal alignment which
    guarantees that all child resources fit in this size. */
-static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long type)
+static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
+			 unsigned long type, resource_size_t min_size)
 {
 	struct pci_dev *dev;
 	resource_size_t min_align, align, size;
@@ -404,6 +407,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 			mem64_mask &= r->flags & IORESOURCE_MEM_64;
 		}
 	}
+	if (size < min_size)
+		size = min_size;
 
 	align = 0;
 	min_align = 0;
@@ -483,6 +488,7 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
 {
 	struct pci_dev *dev;
 	unsigned long mask, prefmask;
+	resource_size_t min_mem_size = 0, min_io_size = 0;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		struct pci_bus *b = dev->subordinate;
@@ -512,8 +518,12 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
 
 	case PCI_CLASS_BRIDGE_PCI:
 		pci_bridge_check_ranges(bus);
+		if (bus->self->is_hotplug_bridge) {
+			min_io_size  = pci_hotplug_io_size;
+			min_mem_size = pci_hotplug_mem_size;
+		}
 	default:
-		pbus_size_io(bus);
+		pbus_size_io(bus, min_io_size);
 		/* If the bridge supports prefetchable range, size it
 		   separately. If it doesn't, or its prefetchable window
 		   has already been allocated by arch code, try
@@ -521,9 +531,11 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
 		   resources. */
 		mask = IORESOURCE_MEM;
 		prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
-		if (pbus_size_mem(bus, prefmask, prefmask))
+		if (pbus_size_mem(bus, prefmask, prefmask, min_mem_size))
 			mask = prefmask; /* Success, size non-prefetch only. */
-		pbus_size_mem(bus, mask, IORESOURCE_MEM);
+		else
+			min_mem_size += min_mem_size;
+		pbus_size_mem(bus, mask, IORESOURCE_MEM, min_mem_size);
 		break;
 	}
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d75668317705..d80ed872a3c4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -278,6 +278,7 @@ struct pci_dev {
 	unsigned int	is_physfn:1;
 	unsigned int	is_virtfn:1;
 	unsigned int	reset_fn:1;
+	unsigned int    is_hotplug_bridge:1;
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
@@ -1245,6 +1246,9 @@ extern int pci_pci_problems;
 extern unsigned long pci_cardbus_io_size;
 extern unsigned long pci_cardbus_mem_size;
 
+extern unsigned long pci_hotplug_io_size;
+extern unsigned long pci_hotplug_mem_size;
+
 int pcibios_add_platform_entries(struct pci_dev *dev);
 void pcibios_disable_device(struct pci_dev *dev);
 int pcibios_set_pcie_reset_state(struct pci_dev *dev,
-- 
cgit v1.2.3


From e80bb09d2c73d76a2a4cd79e4a83802dd901c642 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 8 Sep 2009 23:14:49 +0200
Subject: PCI PM: Introduce device flag wakeup_prepared

Introduce a new PCI device flag, wakeup_prepared, to prevent PCI
wake-up preparation code from being executed twice in a row for the
same device and for the same purpose.

Reviewed-by: Matthew Garrett <mjg59@srcf.ucam.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 8 ++++++++
 include/linux/pci.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 84a6d6d30c5a..dcdfb2212ca3 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1225,6 +1225,10 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
 	if (enable && !device_may_wakeup(&dev->dev))
 		return -EINVAL;
 
+	/* Don't do the same thing twice in a row for one device. */
+	if (!!enable == !!dev->wakeup_prepared)
+		return 0;
+
 	/*
 	 * According to "PCI System Architecture" 4th ed. by Tom Shanley & Don
 	 * Anderson we should be doing PME# wake enable followed by ACPI wake
@@ -1241,9 +1245,12 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
 		error = platform_pci_sleep_wake(dev, true);
 		if (ret)
 			ret = error;
+		if (!ret)
+			dev->wakeup_prepared = true;
 	} else {
 		platform_pci_sleep_wake(dev, false);
 		pci_pme_active(dev, false);
+		dev->wakeup_prepared = false;
 	}
 
 	return ret;
@@ -1365,6 +1372,7 @@ void pci_pm_init(struct pci_dev *dev)
 	int pm;
 	u16 pmc;
 
+	dev->wakeup_prepared = false;
 	dev->pm_cap = 0;
 
 	/* find PCI PM capability in list */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d80ed872a3c4..f5c7cd343e56 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -241,6 +241,7 @@ struct pci_dev {
 	unsigned int	d1_support:1;	/* Low power state D1 is supported */
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
 	unsigned int	no_d1d2:1;	/* Only allow D0 and D3 */
+	unsigned int	wakeup_prepared:1;
 
 #ifdef CONFIG_PCIEASPM
 	struct pcie_link_state	*link_state;	/* ASPM link state. */
-- 
cgit v1.2.3


From 9b83ccd2f14f647936dcfbf4a9a20c501007dd69 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 8 Sep 2009 23:15:31 +0200
Subject: ACPI PM: Replace wakeup.prepared with reference counter

The wakeup.prepared flag is used for marking devices that have the
wake-up power already enabled, so that the wake-up power is not
enabled twice in a row for the same device.  This assumes, however,
that device wake-up power will only be enabled once, while the device
is being prepared for a system-wide sleep transition, and the second
attempt is made by acpi_enable_wakeup_device_prep().

With the upcoming PCI wake-up rework this assumption will not hold
any more for PCI bridges and the root bridge whose wake-up power
may be enabled as a result of wake-up enable propagation from other
devices (eg. add-on devices that are not associated with any GPEs).
Thus, there may be many attempts to enable wake-up power on a PCI
bridge or the root bridge during a system power state transition
and it's better to replace wakeup.prepared with a reference counter.

Reviewed-by: Matthew Garrett <mjg59@srcf.ucam.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/power.c    | 58 +++++++++++++++++++++++++++++--------------------
 drivers/acpi/scan.c     |  1 +
 drivers/acpi/wakeup.c   |  4 ++--
 include/acpi/acpi_bus.h |  2 +-
 4 files changed, 39 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index d74365d4a6e7..5a09bf392ec1 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -44,6 +44,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#include "sleep.h"
+
 #define _COMPONENT			ACPI_POWER_COMPONENT
 ACPI_MODULE_NAME("power");
 #define ACPI_POWER_CLASS		"power_resource"
@@ -361,17 +363,15 @@ int acpi_device_sleep_wake(struct acpi_device *dev,
  */
 int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state)
 {
-	int i, err;
+	int i, err = 0;
 
 	if (!dev || !dev->wakeup.flags.valid)
 		return -EINVAL;
 
-	/*
-	 * Do not execute the code below twice in a row without calling
-	 * acpi_disable_wakeup_device_power() in between for the same device
-	 */
-	if (dev->wakeup.flags.prepared)
-		return 0;
+	mutex_lock(&acpi_device_lock);
+
+	if (dev->wakeup.prepare_count++)
+		goto out;
 
 	/* Open power resource */
 	for (i = 0; i < dev->wakeup.resources.count; i++) {
@@ -379,7 +379,8 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state)
 		if (ret) {
 			printk(KERN_ERR PREFIX "Transition power state\n");
 			dev->wakeup.flags.valid = 0;
-			return -ENODEV;
+			err = -ENODEV;
+			goto err_out;
 		}
 	}
 
@@ -388,9 +389,13 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state)
 	 * in arbitrary power state afterwards.
 	 */
 	err = acpi_device_sleep_wake(dev, 1, sleep_state, 3);
-	if (!err)
-		dev->wakeup.flags.prepared = 1;
 
+ err_out:
+	if (err)
+		dev->wakeup.prepare_count = 0;
+
+ out:
+	mutex_unlock(&acpi_device_lock);
 	return err;
 }
 
@@ -402,35 +407,42 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state)
  */
 int acpi_disable_wakeup_device_power(struct acpi_device *dev)
 {
-	int i, ret;
+	int i, err = 0;
 
 	if (!dev || !dev->wakeup.flags.valid)
 		return -EINVAL;
 
+	mutex_lock(&acpi_device_lock);
+
+	if (--dev->wakeup.prepare_count > 0)
+		goto out;
+
 	/*
-	 * Do not execute the code below twice in a row without calling
-	 * acpi_enable_wakeup_device_power() in between for the same device
+	 * Executing the code below even if prepare_count is already zero when
+	 * the function is called may be useful, for example for initialisation.
 	 */
-	if (!dev->wakeup.flags.prepared)
-		return 0;
+	if (dev->wakeup.prepare_count < 0)
+		dev->wakeup.prepare_count = 0;
 
-	dev->wakeup.flags.prepared = 0;
-
-	ret = acpi_device_sleep_wake(dev, 0, 0, 0);
-	if (ret)
-		return ret;
+	err = acpi_device_sleep_wake(dev, 0, 0, 0);
+	if (err)
+		goto out;
 
 	/* Close power resource */
 	for (i = 0; i < dev->wakeup.resources.count; i++) {
-		ret = acpi_power_off_device(dev->wakeup.resources.handles[i], dev);
+		int ret = acpi_power_off_device(
+				dev->wakeup.resources.handles[i], dev);
 		if (ret) {
 			printk(KERN_ERR PREFIX "Transition power state\n");
 			dev->wakeup.flags.valid = 0;
-			return -ENODEV;
+			err = -ENODEV;
+			goto out;
 		}
 	}
 
-	return ret;
+ out:
+	mutex_unlock(&acpi_device_lock);
+	return err;
 }
 
 /* --------------------------------------------------------------------------
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 781435d7e369..318b1ea7a5bf 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -781,6 +781,7 @@ static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
 	kfree(buffer.pointer);
 
 	device->wakeup.flags.valid = 1;
+	device->wakeup.prepare_count = 0;
 	/* Call _PSW/_DSW object to disable its ability to wake the sleeping
 	 * system for the ACPI device with the _PRW object.
 	 * The _PSW object is depreciated in ACPI 3.0 and is replaced by _DSW.
diff --git a/drivers/acpi/wakeup.c b/drivers/acpi/wakeup.c
index 88725dcdf8bc..e0ee0c036f5a 100644
--- a/drivers/acpi/wakeup.c
+++ b/drivers/acpi/wakeup.c
@@ -68,7 +68,7 @@ void acpi_enable_wakeup_device(u8 sleep_state)
 		/* If users want to disable run-wake GPE,
 		 * we only disable it for wake and leave it for runtime
 		 */
-		if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared)
+		if ((!dev->wakeup.state.enabled && !dev->wakeup.prepare_count)
 		    || sleep_state > (u32) dev->wakeup.sleep_state) {
 			if (dev->wakeup.flags.run_wake) {
 				/* set_gpe_type will disable GPE, leave it like that */
@@ -100,7 +100,7 @@ void acpi_disable_wakeup_device(u8 sleep_state)
 		if (!dev->wakeup.flags.valid)
 			continue;
 
-		if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared)
+		if ((!dev->wakeup.state.enabled && !dev->wakeup.prepare_count)
 		    || sleep_state > (u32) dev->wakeup.sleep_state) {
 			if (dev->wakeup.flags.run_wake) {
 				acpi_set_gpe_type(dev->wakeup.gpe_device,
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c3ace75d57b4..1fa3ffb7c93b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -248,7 +248,6 @@ struct acpi_device_perf {
 /* Wakeup Management */
 struct acpi_device_wakeup_flags {
 	u8 valid:1;		/* Can successfully enable wakeup? */
-	u8 prepared:1;		/* Has the wake-up capability been enabled? */
 	u8 run_wake:1;		/* Run-Wake GPE devices */
 };
 
@@ -263,6 +262,7 @@ struct acpi_device_wakeup {
 	struct acpi_handle_list resources;
 	struct acpi_device_wakeup_state state;
 	struct acpi_device_wakeup_flags flags;
+	int prepare_count;
 };
 
 /* Device */
-- 
cgit v1.2.3


From 1a5aeeecd550ee4344cfba1791f1134739b16dc6 Mon Sep 17 00:00:00 2001
From: Maciej Sosnowski <maciej.sosnowski@intel.com>
Date: Thu, 10 Sep 2009 15:05:58 +0200
Subject: dca: registering requesters in multiple dca domains

This patch enables DCA support on multiple-IOH/multiple-IIO architectures.
It modifies dca module by replacing single dca_providers list
with dca_domains list, each domain containing separate list of providers.
This approach lets dca driver manage multiple domains, i.e. sets of providers
and requesters mapped back to the same PCI root complex device.
The driver takes care to register each requester to a provider
from the same domain.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
---
 drivers/dca/dca-core.c | 122 +++++++++++++++++++++++++++++++++++++++++++------
 drivers/dma/ioat/pci.c |   2 +-
 include/linux/dca.h    |  11 ++++-
 3 files changed, 120 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index 25b743abfb59..7e318de0904b 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -28,7 +28,7 @@
 #include <linux/device.h>
 #include <linux/dca.h>
 
-#define DCA_VERSION "1.8"
+#define DCA_VERSION "1.12.1"
 
 MODULE_VERSION(DCA_VERSION);
 MODULE_LICENSE("GPL");
@@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation");
 
 static DEFINE_SPINLOCK(dca_lock);
 
-static LIST_HEAD(dca_providers);
+static LIST_HEAD(dca_domains);
 
-static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+static struct pci_bus *dca_pci_rc_from_dev(struct device *dev)
 {
-	struct dca_provider *dca, *ret = NULL;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_bus *bus = pdev->bus;
 
-	list_for_each_entry(dca, &dca_providers, node) {
-		if ((!dev) || (dca->ops->dev_managed(dca, dev))) {
-			ret = dca;
-			break;
-		}
+	while (bus->parent)
+		bus = bus->parent;
+
+	return bus;
+}
+
+static struct dca_domain *dca_allocate_domain(struct pci_bus *rc)
+{
+	struct dca_domain *domain;
+
+	domain = kzalloc(sizeof(*domain), GFP_NOWAIT);
+	if (!domain)
+		return NULL;
+
+	INIT_LIST_HEAD(&domain->dca_providers);
+	domain->pci_rc = rc;
+
+	return domain;
+}
+
+static void dca_free_domain(struct dca_domain *domain)
+{
+	list_del(&domain->node);
+	kfree(domain);
+}
+
+static struct dca_domain *dca_find_domain(struct pci_bus *rc)
+{
+	struct dca_domain *domain;
+
+	list_for_each_entry(domain, &dca_domains, node)
+		if (domain->pci_rc == rc)
+			return domain;
+
+	return NULL;
+}
+
+static struct dca_domain *dca_get_domain(struct device *dev)
+{
+	struct pci_bus *rc;
+	struct dca_domain *domain;
+
+	rc = dca_pci_rc_from_dev(dev);
+	domain = dca_find_domain(rc);
+
+	if (!domain) {
+		domain = dca_allocate_domain(rc);
+		if (domain)
+			list_add(&domain->node, &dca_domains);
+	}
+
+	return domain;
+}
+
+static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+{
+	struct dca_provider *dca;
+	struct pci_bus *rc;
+	struct dca_domain *domain;
+
+	if (dev) {
+		rc = dca_pci_rc_from_dev(dev);
+		domain = dca_find_domain(rc);
+		if (!domain)
+			return NULL;
+	} else {
+		if (!list_empty(&dca_domains))
+			domain = list_first_entry(&dca_domains,
+						  struct dca_domain,
+						  node);
+		else
+			return NULL;
 	}
 
-	return ret;
+	list_for_each_entry(dca, &domain->dca_providers, node)
+		if ((!dev) || (dca->ops->dev_managed(dca, dev)))
+			return dca;
+
+	return NULL;
 }
 
 /**
@@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev)
 	struct dca_provider *dca;
 	int err, slot = -ENODEV;
 	unsigned long flags;
+	struct pci_bus *pci_rc;
+	struct dca_domain *domain;
 
 	if (!dev)
 		return -EFAULT;
@@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev)
 		return -EEXIST;
 	}
 
-	list_for_each_entry(dca, &dca_providers, node) {
+	pci_rc = dca_pci_rc_from_dev(dev);
+	domain = dca_find_domain(pci_rc);
+	if (!domain) {
+		spin_unlock_irqrestore(&dca_lock, flags);
+		return -ENODEV;
+	}
+
+	list_for_each_entry(dca, &domain->dca_providers, node) {
 		slot = dca->ops->add_requester(dca, dev);
 		if (slot >= 0)
 			break;
@@ -222,13 +303,19 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev)
 {
 	int err;
 	unsigned long flags;
+	struct dca_domain *domain;
 
 	err = dca_sysfs_add_provider(dca, dev);
 	if (err)
 		return err;
 
 	spin_lock_irqsave(&dca_lock, flags);
-	list_add(&dca->node, &dca_providers);
+	domain = dca_get_domain(dev);
+	if (!domain) {
+		spin_unlock_irqrestore(&dca_lock, flags);
+		return -ENODEV;
+	}
+	list_add(&dca->node, &domain->dca_providers);
 	spin_unlock_irqrestore(&dca_lock, flags);
 
 	blocking_notifier_call_chain(&dca_provider_chain,
@@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider);
  * unregister_dca_provider - remove a dca provider
  * @dca - struct created by alloc_dca_provider()
  */
-void unregister_dca_provider(struct dca_provider *dca)
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev)
 {
 	unsigned long flags;
+	struct pci_bus *pci_rc;
+	struct dca_domain *domain;
 
 	blocking_notifier_call_chain(&dca_provider_chain,
 				     DCA_PROVIDER_REMOVE, NULL);
 
 	spin_lock_irqsave(&dca_lock, flags);
+
 	list_del(&dca->node);
+
+	pci_rc = dca_pci_rc_from_dev(dev);
+	domain = dca_find_domain(pci_rc);
+	if (list_empty(&domain->dca_providers))
+		dca_free_domain(domain);
+
 	spin_unlock_irqrestore(&dca_lock, flags);
 
 	dca_sysfs_remove_provider(dca);
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index c788fa266470..d545fae30f37 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -175,7 +175,7 @@ static void __devexit ioat_remove(struct pci_dev *pdev)
 
 	dev_err(&pdev->dev, "Removing dma and dca services\n");
 	if (device->dca) {
-		unregister_dca_provider(device->dca);
+		unregister_dca_provider(device->dca, &pdev->dev);
 		free_dca_provider(device->dca);
 		device->dca = NULL;
 	}
diff --git a/include/linux/dca.h b/include/linux/dca.h
index 9c20c7e87d0a..d27a7a05718d 100644
--- a/include/linux/dca.h
+++ b/include/linux/dca.h
@@ -20,6 +20,9 @@
  */
 #ifndef DCA_H
 #define DCA_H
+
+#include <linux/pci.h>
+
 /* DCA Provider API */
 
 /* DCA Notifier Interface */
@@ -36,6 +39,12 @@ struct dca_provider {
 	int			 id;
 };
 
+struct dca_domain {
+	struct list_head	node;
+	struct list_head	dca_providers;
+	struct pci_bus		*pci_rc;
+};
+
 struct dca_ops {
 	int	(*add_requester)    (struct dca_provider *, struct device *);
 	int	(*remove_requester) (struct dca_provider *, struct device *);
@@ -47,7 +56,7 @@ struct dca_ops {
 struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
 void free_dca_provider(struct dca_provider *dca);
 int register_dca_provider(struct dca_provider *dca, struct device *dev);
-void unregister_dca_provider(struct dca_provider *dca);
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev);
 
 static inline void *dca_priv(struct dca_provider *dca)
 {
-- 
cgit v1.2.3


From 7e12715ecc47a8a59154afe2746e48998225bb69 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Thu, 10 Sep 2009 15:28:02 -0700
Subject: ACPI button: provide lid status functions

Some drivers need to know when a lid event occurs and get the current
status.  This can be useful for when a platform firmware clobbers some
hardware state at lid time, and a driver needs to restore things when
the lid is opened again.

Acked-by: Matthew Garrett <mjg59@srcf.ucam.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/acpi/button.c | 45 +++++++++++++++++++++++++++++++++++++++++++--
 include/acpi/button.h | 10 ++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)
 create mode 100644 include/acpi/button.h

(limited to 'include')

diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 9195deba9d94..ebb593e9c380 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -113,6 +113,9 @@ static const struct file_operations acpi_button_state_fops = {
 	.release = single_release,
 };
 
+static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier);
+static struct acpi_device *lid_device;
+
 /* --------------------------------------------------------------------------
                               FS Interface (/proc)
    -------------------------------------------------------------------------- */
@@ -229,11 +232,38 @@ static int acpi_button_remove_fs(struct acpi_device *device)
 /* --------------------------------------------------------------------------
                                 Driver Interface
    -------------------------------------------------------------------------- */
+int acpi_lid_notifier_register(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&acpi_lid_notifier, nb);
+}
+EXPORT_SYMBOL(acpi_lid_notifier_register);
+
+int acpi_lid_notifier_unregister(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&acpi_lid_notifier, nb);
+}
+EXPORT_SYMBOL(acpi_lid_notifier_unregister);
+
+int acpi_lid_open(void)
+{
+	acpi_status status;
+	unsigned long long state;
+
+	status = acpi_evaluate_integer(lid_device->handle, "_LID", NULL,
+				       &state);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	return !!state;
+}
+EXPORT_SYMBOL(acpi_lid_open);
+
 static int acpi_lid_send_state(struct acpi_device *device)
 {
 	struct acpi_button *button = acpi_driver_data(device);
 	unsigned long long state;
 	acpi_status status;
+	int ret;
 
 	status = acpi_evaluate_integer(device->handle, "_LID", NULL, &state);
 	if (ACPI_FAILURE(status))
@@ -242,7 +272,12 @@ static int acpi_lid_send_state(struct acpi_device *device)
 	/* input layer checks if event is redundant */
 	input_report_switch(button->input, SW_LID, !state);
 	input_sync(button->input);
-	return 0;
+
+	ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
+	if (ret == NOTIFY_DONE)
+		ret = blocking_notifier_call_chain(&acpi_lid_notifier, state,
+						   device);
+	return ret;
 }
 
 static void acpi_button_notify(struct acpi_device *device, u32 event)
@@ -364,8 +399,14 @@ static int acpi_button_add(struct acpi_device *device)
 	error = input_register_device(input);
 	if (error)
 		goto err_remove_fs;
-	if (button->type == ACPI_BUTTON_TYPE_LID)
+	if (button->type == ACPI_BUTTON_TYPE_LID) {
 		acpi_lid_send_state(device);
+		/*
+		 * This assumes there's only one lid device, or if there are
+		 * more we only care about the last one...
+		 */
+		lid_device = device;
+	}
 
 	if (device->wakeup.flags.valid) {
 		/* Button's GPE is run-wake GPE */
diff --git a/include/acpi/button.h b/include/acpi/button.h
new file mode 100644
index 000000000000..bb643a79d651
--- /dev/null
+++ b/include/acpi/button.h
@@ -0,0 +1,10 @@
+#ifndef ACPI_BUTTON_H
+#define ACPI_BUTTON_H
+
+#include <linux/notifier.h>
+
+extern int acpi_lid_notifier_register(struct notifier_block *nb);
+extern int acpi_lid_notifier_unregister(struct notifier_block *nb);
+extern int acpi_lid_open(void);
+
+#endif /* ACPI_BUTTON_H */
-- 
cgit v1.2.3


From 48659d31195bb76d688e99dabd816c5472fb1656 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 11 Sep 2009 11:36:23 -0400
Subject: tracing: move tgid out of generic entry and into userstack

The userstack trace required the recording of the tgid entry.
Unfortunately, it was added to the generic entry where it wasted
4 bytes of every entry and was only used by one entry.

This patch moves it out of the generic field and moves it into the
only user (userstack_entry).

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 1 -
 kernel/trace/trace.c         | 2 +-
 kernel/trace/trace.h         | 1 +
 kernel/trace/trace_events.c  | 5 +----
 kernel/trace/trace_output.c  | 2 +-
 5 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 23f7179bf74e..06c795b536c2 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -34,7 +34,6 @@ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
-	int			tgid;
 };
 
 #define FTRACE_MAX_EVENT						\
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5c75deeefe30..1a37da2e8534 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -886,7 +886,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
-	entry->tgid			= (tsk) ? tsk->tgid : 0;
 	entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1068,6 +1067,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 		return;
 	entry	= ring_buffer_event_data(event);
 
+	entry->tgid		= current->tgid;
 	memset(&entry->caller, 0, sizeof(entry->caller));
 
 	trace.nr_entries	= 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index acaa68060ebc..b69697b4b846 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -101,6 +101,7 @@ struct stack_entry {
 
 struct userstack_entry {
 	struct trace_entry	ent;
+	unsigned int		tgid;
 	unsigned long		caller[FTRACE_STACK_ENTRIES];
 };
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 78b1ed230177..28d92027a93c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -86,7 +86,6 @@ int trace_define_common_fields(struct ftrace_event_call *call)
 	__common_field(unsigned char, flags);
 	__common_field(unsigned char, preempt_count);
 	__common_field(int, pid);
-	__common_field(int, tgid);
 
 	return ret;
 }
@@ -572,13 +571,11 @@ static int trace_write_header(struct trace_seq *s)
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\n",
 				FIELD(unsigned short, type),
 				FIELD(unsigned char, flags),
 				FIELD(unsigned char, preempt_count),
-				FIELD(int, pid),
-				FIELD(int, tgid));
+				FIELD(int, pid));
 }
 
 static ssize_t
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index e0c2545622e8..be34a6aa7e4d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -407,7 +407,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
 		 * since individual threads might have already quit!
 		 */
 		rcu_read_lock();
-		task = find_task_by_vpid(entry->ent.tgid);
+		task = find_task_by_vpid(entry->tgid);
 		if (task)
 			mm = get_task_mm(task);
 		rcu_read_unlock();
-- 
cgit v1.2.3


From 074835f0143b83845af5044af2739c52c9f53808 Mon Sep 17 00:00:00 2001
From: Youquan Song <youquan.song@intel.com>
Date: Wed, 9 Sep 2009 12:05:39 -0400
Subject: intel-iommu: Fix kernel hang if interrupt remapping disabled in BIOS

BIOS clear DMAR table INTR_REMAP flag to disable interrupt remapping. Current
kernel only check interrupt remapping(IR) flag in DRHD's extended capability
register to decide interrupt remapping support or not. But IR flag will not
change when BIOS disable/enable interrupt remapping.

When user disable interrupt remapping in BIOS or BIOS often defaultly disable
interrupt remapping feature when BIOS is not mature.Though BIOS disable
interrupt remapping but intr_remapping_supported function will always report
to OS support interrupt remapping if VT-d2 chipset populated. On this
cases, kernel will continue enable interrupt remapping and result kernel panic.
This bug exist on almost all platforms with interrupt remapping support.

This patch add DMAR table INTR_REMAP flag check before enable interrupt
remapping.

Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/dmar.c           | 10 ++++++++++
 drivers/pci/intr_remapping.c |  3 +++
 include/linux/intel-iommu.h  |  2 ++
 3 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index fba4f6891680..270ed222a075 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -1316,3 +1316,13 @@ int dmar_reenable_qi(struct intel_iommu *iommu)
 
 	return 0;
 }
+
+/*
+ * Check interrupt remapping support in DMAR table description.
+ */
+int dmar_ir_support(void)
+{
+	struct acpi_table_dmar *dmar;
+	dmar = (struct acpi_table_dmar *)dmar_tbl;
+	return dmar->flags & 0x1;
+}
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index ebfa47b79c5b..ac065144c01c 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -611,6 +611,9 @@ int __init intr_remapping_supported(void)
 	if (disable_intremap)
 		return 0;
 
+	if (!dmar_ir_support())
+		return 0;
+
 	for_each_drhd_unit(drhd) {
 		struct intel_iommu *iommu = drhd->iommu;
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 482dc91fd53a..4f0a72a9740c 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -360,4 +360,6 @@ extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
 
 extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
+extern int dmar_ir_support(void);
+
 #endif
-- 
cgit v1.2.3


From 52a7a1cec88acdaf3f8b36a6b1fe904f6eca7ee5 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 10 Sep 2009 15:26:30 +0200
Subject: [ARM] pxafb: add accelerator ID for PXA3xx GCU

Add ID 99 for PXA3xx frame buffers and report it in the pxa frame buffer
conditionally, depending on a new flag in struct pxafb_mach_info.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: linux-fbdev-devel@lists.sourceforge.net
Cc: Dennis Oliver Kropp <dok@directfb.org>
Cc: Sven Neumann <s.neumann@raumfeld.com>
Cc: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Eric Miao <eric.y.miao@gmail.com>
---
 arch/arm/mach-pxa/include/mach/pxafb.h | 3 ++-
 drivers/video/pxafb.c                  | 3 +++
 include/linux/fb.h                     | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/arm/mach-pxa/include/mach/pxafb.h b/arch/arm/mach-pxa/include/mach/pxafb.h
index 6932720ba04e..f73061c90b5e 100644
--- a/arch/arm/mach-pxa/include/mach/pxafb.h
+++ b/arch/arm/mach-pxa/include/mach/pxafb.h
@@ -118,7 +118,8 @@ struct pxafb_mach_info {
 	u_int		fixed_modes:1,
 			cmap_inverse:1,
 			cmap_static:1,
-			unused:29;
+			acceleration_enabled:1,
+			unused:28;
 
 	/* The following should be defined in LCCR0
 	 *      LCCR0_Act or LCCR0_Pas          Active or Passive
diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index 3a002a634ecf..1820c4a24434 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -2083,6 +2083,9 @@ static int __devinit pxafb_probe(struct platform_device *dev)
 		goto failed;
 	}
 
+	if (cpu_is_pxa3xx() && inf->acceleration_enabled)
+		fbi->fb.fix.accel = FB_ACCEL_PXA3XX;
+
 	fbi->backlight_power = inf->pxafb_backlight_power;
 	fbi->lcd_power = inf->pxafb_lcd_power;
 
diff --git a/include/linux/fb.h b/include/linux/fb.h
index f847df9e99b6..a34bdf5a9d23 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -133,6 +133,7 @@ struct dentry;
 #define FB_ACCEL_NEOMAGIC_NM2230 96	/* NeoMagic NM2230              */
 #define FB_ACCEL_NEOMAGIC_NM2360 97	/* NeoMagic NM2360              */
 #define FB_ACCEL_NEOMAGIC_NM2380 98	/* NeoMagic NM2380              */
+#define FB_ACCEL_PXA3XX		 99	/* PXA3xx			*/
 
 #define FB_ACCEL_SAVAGE4        0x80	/* S3 Savage4                   */
 #define FB_ACCEL_SAVAGE3D       0x81	/* S3 Savage3D                  */
-- 
cgit v1.2.3


From 637e7e864103a7a68c1ce43ada27dfc25c0d113f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 11 Sep 2009 13:55:35 -0400
Subject: tracing: add lock depth to entries

This patch adds the lock depth of the big kernel lock to the generic
entry header. This way we can see the depth of the lock and help
in removing the BKL.

Example:

 #                  _------=> CPU#
 #                 / _-----=> irqs-off
 #                | / _----=> need-resched
 #                || / _---=> hardirq/softirq
 #                ||| / _--=> preempt-depth
 #                |||| /_--=> lock-depth
 #                |||||/     delay
 #  cmd     pid   |||||| time  |   caller
 #     \   /      ||||||   \   |   /
   <idle>-0       2.N..3 5902255250us+: lock_acquire: read rcu_read_lock
   <idle>-0       2.N..3 5902255253us+: lock_release: rcu_read_lock
   <idle>-0       2dN..3 5902255257us+: lock_acquire: xtime_lock
   <idle>-0       2dN..4 5902255259us : lock_acquire: clocksource_lock
   <idle>-0       2dN..4 5902255261us+: lock_release: clocksource_lock

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h         |  1 +
 kernel/trace/trace.c                 |  9 +++++----
 kernel/trace/trace_events.c          |  5 ++++-
 kernel/trace/trace_functions_graph.c | 16 ++++++++++++----
 kernel/trace/trace_output.c          | 10 +++++++++-
 5 files changed, 31 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 06c795b536c2..0608b0ff2635 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -34,6 +34,7 @@ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
+	int			lock_depth;
 };
 
 #define FTRACE_MAX_EVENT						\
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1a37da2e8534..3b918283cf94 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -886,6 +886,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
+	entry->lock_depth		= (tsk) ? tsk->lock_depth : 0;
 	entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1530,10 +1531,10 @@ static void print_lat_help_header(struct seq_file *m)
 	seq_puts(m, "#                | / _----=> need-resched    \n");
 	seq_puts(m, "#                || / _---=> hardirq/softirq \n");
 	seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
-	seq_puts(m, "#                |||| /                      \n");
-	seq_puts(m, "#                |||||     delay             \n");
-	seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
-	seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
+	seq_puts(m, "#                |||| /_--=> lock-depth       \n");
+	seq_puts(m, "#                |||||/     delay             \n");
+	seq_puts(m, "#  cmd     pid   |||||| time  |   caller      \n");
+	seq_puts(m, "#     \\   /      ||||||   \\   |   /           \n");
 }
 
 static void print_func_help_header(struct seq_file *m)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 28d92027a93c..975f324a07e7 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -86,6 +86,7 @@ int trace_define_common_fields(struct ftrace_event_call *call)
 	__common_field(unsigned char, flags);
 	__common_field(unsigned char, preempt_count);
 	__common_field(int, pid);
+	__common_field(int, lock_depth);
 
 	return ret;
 }
@@ -571,11 +572,13 @@ static int trace_write_header(struct trace_seq *s)
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
+				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\n",
 				FIELD(unsigned short, type),
 				FIELD(unsigned char, flags),
 				FIELD(unsigned char, preempt_count),
-				FIELD(int, pid));
+				FIELD(int, pid),
+				FIELD(int, lock_depth));
 }
 
 static ssize_t
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index ee791a9650c5..48af49374384 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -368,6 +368,7 @@ static enum print_line_t
 print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
 {
 	int hardirq, softirq;
+	int ret;
 
 	hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
 	softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
@@ -382,6 +383,13 @@ print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
 				hardirq ? 'h' : softirq ? 's' : '.'))
 		return 0;
 
+	if (entry->lock_depth < 0)
+		ret = trace_seq_putc(s, '.');
+	else
+		ret = trace_seq_printf(s, "%d", entry->lock_depth);
+	if (!ret)
+		return 0;
+
 	if (entry->preempt_count)
 		return trace_seq_printf(s, "%x", entry->preempt_count);
 	return trace_seq_puts(s, ".");
@@ -1001,8 +1009,8 @@ static void print_lat_header(struct seq_file *s)
 	seq_printf(s, "#%.*s / _----=> need-resched    \n", size, spaces);
 	seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces);
 	seq_printf(s, "#%.*s|| / _--=> preempt-depth   \n", size, spaces);
-	seq_printf(s, "#%.*s||| /                      \n", size, spaces);
-	seq_printf(s, "#%.*s||||                       \n", size, spaces);
+	seq_printf(s, "#%.*s||| / _-=> lock-depth      \n", size, spaces);
+	seq_printf(s, "#%.*s|||| /                     \n", size, spaces);
 }
 
 static void print_graph_headers(struct seq_file *s)
@@ -1021,7 +1029,7 @@ static void print_graph_headers(struct seq_file *s)
 	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
 		seq_printf(s, "  TASK/PID       ");
 	if (lat)
-		seq_printf(s, "||||");
+		seq_printf(s, "|||||");
 	if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
 		seq_printf(s, "  DURATION   ");
 	seq_printf(s, "               FUNCTION CALLS\n");
@@ -1035,7 +1043,7 @@ static void print_graph_headers(struct seq_file *s)
 	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
 		seq_printf(s, "   |    |        ");
 	if (lat)
-		seq_printf(s, "||||");
+		seq_printf(s, "|||||");
 	if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
 		seq_printf(s, "   |   |      ");
 	seq_printf(s, "               |   |   |   |\n");
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index be34a6aa7e4d..29a370a45582 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -465,6 +465,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 {
 	int hardirq, softirq;
 	char comm[TASK_COMM_LEN];
+	int ret;
 
 	trace_find_cmdline(entry->pid, comm);
 	hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
@@ -481,9 +482,16 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 				hardirq ? 'h' : softirq ? 's' : '.'))
 		return 0;
 
+	if (entry->lock_depth < 0)
+		ret = trace_seq_putc(s, '.');
+	else
+		ret = trace_seq_printf(s, "%d", entry->lock_depth);
+	if (!ret)
+		return 0;
+
 	if (entry->preempt_count)
 		return trace_seq_printf(s, "%x", entry->preempt_count);
-	return trace_seq_puts(s, ".");
+	return trace_seq_putc(s, '.');
 }
 
 static unsigned long preempt_mark_thresh = 100;
-- 
cgit v1.2.3


From 4cfc7e6019caa3e97d2a81c48c8d575d7b38d751 Mon Sep 17 00:00:00 2001
From: Rahul Iyer <iyer@netapp.com>
Date: Thu, 10 Sep 2009 17:32:28 +0300
Subject: nfsd41: sunrpc: Added rpc server-side backchannel handling

When the call direction is a reply, copy the xid and call direction into the
req->rq_private_buf.head[0].iov_base otherwise rpc_verify_header returns
rpc_garbage.

Signed-off-by: Rahul Iyer <iyer@netapp.com>
Signed-off-by: Mike Sager <sager@netapp.com>
Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[get rid of CONFIG_NFSD_V4_1]
[sunrpc: refactoring of svc_tcp_recvfrom]
[nfsd41: sunrpc: create common send routine for the fore and the back channels]
[nfsd41: sunrpc: Use free_page() to free server backchannel pages]
[nfsd41: sunrpc: Document server backchannel locking]
[nfsd41: sunrpc: remove bc_connect_worker()]
[nfsd41: sunrpc: Define xprt_server_backchannel()[
[nfsd41: sunrpc: remove bc_close and bc_init_auto_disconnect dummy functions]
[nfsd41: sunrpc: eliminate unneeded switch statement in xs_setup_tcp()]
[nfsd41: sunrpc: Don't auto close the server backchannel connection]
[nfsd41: sunrpc: Remove unused functions]
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: change bc_sock to bc_xprt]
[nfsd41: sunrpc: move struct rpc_buffer def into a common header file]
[nfsd41: sunrpc: use rpc_sleep in bc_send_request so not to block on mutex]
[removed cosmetic changes]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[sunrpc: add new xprt class for nfsv4.1 backchannel]
[sunrpc: v2.1 change handling of auto_close and init_auto_disconnect operations for the nfsv4.1 backchannel]
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
[reverted more cosmetic leftovers]
[got rid of xprt_server_backchannel]
[separated "nfsd41: sunrpc: add new xprt class for nfsv4.1 backchannel"]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Cc: Trond Myklebust <trond.myklebust@netapp.com>
[sunrpc: change idle timeout value for the backchannel]
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Acked-by: Trond Myklebust <trond.myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/svc_xprt.h |   1 +
 include/linux/sunrpc/svcsock.h  |   1 +
 include/linux/sunrpc/xprt.h     |   1 +
 net/sunrpc/sunrpc.h             |   4 +
 net/sunrpc/svc_xprt.c           |   2 +
 net/sunrpc/svcsock.c            | 172 ++++++++++++++++++++++++++++++++--------
 net/sunrpc/xprt.c               |  15 +++-
 net/sunrpc/xprtsock.c           | 146 ++++++++++++++++++++++++++++++++++
 8 files changed, 303 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 2223ae0b5ed5..5f4e18b3ce73 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -65,6 +65,7 @@ struct svc_xprt {
 	size_t			xpt_locallen;	/* length of address */
 	struct sockaddr_storage	xpt_remote;	/* remote peer's address */
 	size_t			xpt_remotelen;	/* length of address */
+	struct rpc_wait_queue	xpt_bc_pending;	/* backchannel wait queue */
 };
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 04dba23c59f2..1b353a76c304 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -28,6 +28,7 @@ struct svc_sock {
 	/* private TCP part */
 	u32			sk_reclen;	/* length of record */
 	u32			sk_tcplen;	/* current read length */
+	struct rpc_xprt		*sk_bc_xprt;	/* NFSv4.1 backchannel xprt */
 };
 
 /*
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index c090df442572..228d694dbb90 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -179,6 +179,7 @@ struct rpc_xprt {
 	spinlock_t		reserve_lock;	/* lock slot table */
 	u32			xid;		/* Next XID value to use */
 	struct rpc_task *	snd_task;	/* Task blocked in send */
+	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
 #if defined(CONFIG_NFS_V4_1)
 	struct svc_serv		*bc_serv;       /* The RPC service which will */
 						/* process the callback */
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 13171e63f51b..90c292e2738b 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -43,5 +43,9 @@ static inline int rpc_reply_expected(struct rpc_task *task)
 		(task->tk_msg.rpc_proc->p_decode != NULL);
 }
 
+int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
+		    struct page *headpage, unsigned long headoffset,
+		    struct page *tailpage, unsigned long tailoffset);
+
 #endif /* _NET_SUNRPC_SUNRPC_H */
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 912dea558ccc..df124f78ee48 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -160,6 +160,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 	mutex_init(&xprt->xpt_mutex);
 	spin_lock_init(&xprt->xpt_lock);
 	set_bit(XPT_BUSY, &xprt->xpt_flags);
+	rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
@@ -810,6 +811,7 @@ int svc_send(struct svc_rqst *rqstp)
 	else
 		len = xprt->xpt_ops->xpo_sendto(rqstp);
 	mutex_unlock(&xprt->xpt_mutex);
+	rpc_wake_up(&xprt->xpt_bc_pending);
 	svc_xprt_release(rqstp);
 
 	if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 76a380d37de4..ccc5e83cae5d 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -49,6 +49,7 @@
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/xprt.h>
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
@@ -153,49 +154,27 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 }
 
 /*
- * Generic sendto routine
+ * send routine intended to be shared by the fore- and back-channel
  */
-static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
+int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
+		    struct page *headpage, unsigned long headoffset,
+		    struct page *tailpage, unsigned long tailoffset)
 {
-	struct svc_sock	*svsk =
-		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
-	struct socket	*sock = svsk->sk_sock;
-	int		slen;
-	union {
-		struct cmsghdr	hdr;
-		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
-	} buffer;
-	struct cmsghdr *cmh = &buffer.hdr;
-	int		len = 0;
 	int		result;
 	int		size;
 	struct page	**ppage = xdr->pages;
 	size_t		base = xdr->page_base;
 	unsigned int	pglen = xdr->page_len;
 	unsigned int	flags = MSG_MORE;
-	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
+	int		slen;
+	int		len = 0;
 
 	slen = xdr->len;
 
-	if (rqstp->rq_prot == IPPROTO_UDP) {
-		struct msghdr msg = {
-			.msg_name	= &rqstp->rq_addr,
-			.msg_namelen	= rqstp->rq_addrlen,
-			.msg_control	= cmh,
-			.msg_controllen	= sizeof(buffer),
-			.msg_flags	= MSG_MORE,
-		};
-
-		svc_set_cmsg_data(rqstp, cmh);
-
-		if (sock_sendmsg(sock, &msg, 0) < 0)
-			goto out;
-	}
-
 	/* send head */
 	if (slen == xdr->head[0].iov_len)
 		flags = 0;
-	len = kernel_sendpage(sock, rqstp->rq_respages[0], 0,
+	len = kernel_sendpage(sock, headpage, headoffset,
 				  xdr->head[0].iov_len, flags);
 	if (len != xdr->head[0].iov_len)
 		goto out;
@@ -219,16 +198,58 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 		base = 0;
 		ppage++;
 	}
+
 	/* send tail */
 	if (xdr->tail[0].iov_len) {
-		result = kernel_sendpage(sock, rqstp->rq_respages[0],
-					     ((unsigned long)xdr->tail[0].iov_base)
-						& (PAGE_SIZE-1),
-					     xdr->tail[0].iov_len, 0);
-
+		result = kernel_sendpage(sock, tailpage, tailoffset,
+				   xdr->tail[0].iov_len, 0);
 		if (result > 0)
 			len += result;
 	}
+
+out:
+	return len;
+}
+
+
+/*
+ * Generic sendto routine
+ */
+static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
+{
+	struct svc_sock	*svsk =
+		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
+	struct socket	*sock = svsk->sk_sock;
+	union {
+		struct cmsghdr	hdr;
+		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
+	} buffer;
+	struct cmsghdr *cmh = &buffer.hdr;
+	int		len = 0;
+	unsigned long tailoff;
+	unsigned long headoff;
+	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
+
+	if (rqstp->rq_prot == IPPROTO_UDP) {
+		struct msghdr msg = {
+			.msg_name	= &rqstp->rq_addr,
+			.msg_namelen	= rqstp->rq_addrlen,
+			.msg_control	= cmh,
+			.msg_controllen	= sizeof(buffer),
+			.msg_flags	= MSG_MORE,
+		};
+
+		svc_set_cmsg_data(rqstp, cmh);
+
+		if (sock_sendmsg(sock, &msg, 0) < 0)
+			goto out;
+	}
+
+	tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1);
+	headoff = 0;
+	len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff,
+			       rqstp->rq_respages[0], tailoff);
+
 out:
 	dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
 		svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
@@ -951,6 +972,57 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 	return -EAGAIN;
 }
 
+static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp,
+			       struct rpc_rqst **reqpp, struct kvec *vec)
+{
+	struct rpc_rqst *req = NULL;
+	u32 *p;
+	u32 xid;
+	u32 calldir;
+	int len;
+
+	len = svc_recvfrom(rqstp, vec, 1, 8);
+	if (len < 0)
+		goto error;
+
+	p = (u32 *)rqstp->rq_arg.head[0].iov_base;
+	xid = *p++;
+	calldir = *p;
+
+	if (calldir == 0) {
+		/* REQUEST is the most common case */
+		vec[0] = rqstp->rq_arg.head[0];
+	} else {
+		/* REPLY */
+		if (svsk->sk_bc_xprt)
+			req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid);
+
+		if (!req) {
+			printk(KERN_NOTICE
+				"%s: Got unrecognized reply: "
+				"calldir 0x%x sk_bc_xprt %p xid %08x\n",
+				__func__, ntohl(calldir),
+				svsk->sk_bc_xprt, xid);
+			vec[0] = rqstp->rq_arg.head[0];
+			goto out;
+		}
+
+		memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+		       sizeof(struct xdr_buf));
+		/* copy the xid and call direction */
+		memcpy(req->rq_private_buf.head[0].iov_base,
+		       rqstp->rq_arg.head[0].iov_base, 8);
+		vec[0] = req->rq_private_buf.head[0];
+	}
+ out:
+	vec[0].iov_base += 8;
+	vec[0].iov_len -= 8;
+	len = svsk->sk_reclen - 8;
+ error:
+	*reqpp = req;
+	return len;
+}
+
 /*
  * Receive data from a TCP socket.
  */
@@ -962,6 +1034,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	int		len;
 	struct kvec *vec;
 	int pnum, vlen;
+	struct rpc_rqst *req = NULL;
 
 	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
 		svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
@@ -975,9 +1048,27 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	vec = rqstp->rq_vec;
 	vec[0] = rqstp->rq_arg.head[0];
 	vlen = PAGE_SIZE;
+
+	/*
+	 * We have enough data for the whole tcp record. Let's try and read the
+	 * first 8 bytes to get the xid and the call direction. We can use this
+	 * to figure out if this is a call or a reply to a callback. If
+	 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
+	 * In that case, don't bother with the calldir and just read the data.
+	 * It will be rejected in svc_process.
+	 */
+	if (len >= 8) {
+		len = svc_process_calldir(svsk, rqstp, &req, vec);
+		if (len < 0)
+			goto err_again;
+		vlen -= 8;
+	}
+
 	pnum = 1;
 	while (vlen < len) {
-		vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]);
+		vec[pnum].iov_base = (req) ?
+			page_address(req->rq_private_buf.pages[pnum - 1]) :
+			page_address(rqstp->rq_pages[pnum]);
 		vec[pnum].iov_len = PAGE_SIZE;
 		pnum++;
 		vlen += PAGE_SIZE;
@@ -989,6 +1080,16 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	if (len < 0)
 		goto err_again;
 
+	/*
+	 * Account for the 8 bytes we read earlier
+	 */
+	len += 8;
+
+	if (req) {
+		xprt_complete_rqst(req->rq_task, len);
+		len = 0;
+		goto out;
+	}
 	dprintk("svc: TCP complete record (%d bytes)\n", len);
 	rqstp->rq_arg.len = len;
 	rqstp->rq_arg.page_base = 0;
@@ -1002,6 +1103,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	rqstp->rq_xprt_ctxt   = NULL;
 	rqstp->rq_prot	      = IPPROTO_TCP;
 
+out:
 	/* Reset TCP read info */
 	svsk->sk_reclen = 0;
 	svsk->sk_tcplen = 0;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f412a852bc73..fd46d42afa89 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -832,6 +832,11 @@ static void xprt_timer(struct rpc_task *task)
 	spin_unlock_bh(&xprt->transport_lock);
 }
 
+static inline int xprt_has_timer(struct rpc_xprt *xprt)
+{
+	return xprt->idle_timeout != 0;
+}
+
 /**
  * xprt_prepare_transmit - reserve the transport before sending a request
  * @task: RPC task about to send a request
@@ -1013,7 +1018,7 @@ void xprt_release(struct rpc_task *task)
 	if (!list_empty(&req->rq_list))
 		list_del(&req->rq_list);
 	xprt->last_used = jiffies;
-	if (list_empty(&xprt->recv))
+	if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
 		mod_timer(&xprt->timer,
 				xprt->last_used + xprt->idle_timeout);
 	spin_unlock_bh(&xprt->transport_lock);
@@ -1082,8 +1087,11 @@ found:
 #endif /* CONFIG_NFS_V4_1 */
 
 	INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
-	setup_timer(&xprt->timer, xprt_init_autodisconnect,
-			(unsigned long)xprt);
+	if (xprt_has_timer(xprt))
+		setup_timer(&xprt->timer, xprt_init_autodisconnect,
+			    (unsigned long)xprt);
+	else
+		init_timer(&xprt->timer);
 	xprt->last_used = jiffies;
 	xprt->cwnd = RPC_INITCWND;
 	xprt->bind_index = 0;
@@ -1102,7 +1110,6 @@ found:
 
 	dprintk("RPC:       created transport %p with %u slots\n", xprt,
 			xprt->max_reqs);
-
 	return xprt;
 }
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 62438f3a914d..d9a2b815714e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -32,6 +32,7 @@
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/xprtsock.h>
 #include <linux/file.h>
 #ifdef CONFIG_NFS_V4_1
@@ -43,6 +44,7 @@
 #include <net/udp.h>
 #include <net/tcp.h>
 
+#include "sunrpc.h"
 /*
  * xprtsock tunables
  */
@@ -2098,6 +2100,134 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 			xprt->stat.bklog_u);
 }
 
+/*
+ * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason
+ * we allocate pages instead doing a kmalloc like rpc_malloc is because we want
+ * to use the server side send routines.
+ */
+void *bc_malloc(struct rpc_task *task, size_t size)
+{
+	struct page *page;
+	struct rpc_buffer *buf;
+
+	BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer));
+	page = alloc_page(GFP_KERNEL);
+
+	if (!page)
+		return NULL;
+
+	buf = page_address(page);
+	buf->len = PAGE_SIZE;
+
+	return buf->data;
+}
+
+/*
+ * Free the space allocated in the bc_alloc routine
+ */
+void bc_free(void *buffer)
+{
+	struct rpc_buffer *buf;
+
+	if (!buffer)
+		return;
+
+	buf = container_of(buffer, struct rpc_buffer, data);
+	free_page((unsigned long)buf);
+}
+
+/*
+ * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex
+ * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request.
+ */
+static int bc_sendto(struct rpc_rqst *req)
+{
+	int len;
+	struct xdr_buf *xbufp = &req->rq_snd_buf;
+	struct rpc_xprt *xprt = req->rq_xprt;
+	struct sock_xprt *transport =
+				container_of(xprt, struct sock_xprt, xprt);
+	struct socket *sock = transport->sock;
+	unsigned long headoff;
+	unsigned long tailoff;
+
+	/*
+	 * Set up the rpc header and record marker stuff
+	 */
+	xs_encode_tcp_record_marker(xbufp);
+
+	tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
+	headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
+	len = svc_send_common(sock, xbufp,
+			      virt_to_page(xbufp->head[0].iov_base), headoff,
+			      xbufp->tail[0].iov_base, tailoff);
+
+	if (len != xbufp->len) {
+		printk(KERN_NOTICE "Error sending entire callback!\n");
+		len = -EAGAIN;
+	}
+
+	return len;
+}
+
+/*
+ * The send routine. Borrows from svc_send
+ */
+static int bc_send_request(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct svc_xprt	*xprt;
+	struct svc_sock         *svsk;
+	u32                     len;
+
+	dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid));
+	/*
+	 * Get the server socket associated with this callback xprt
+	 */
+	xprt = req->rq_xprt->bc_xprt;
+	svsk = container_of(xprt, struct svc_sock, sk_xprt);
+
+	/*
+	 * Grab the mutex to serialize data as the connection is shared
+	 * with the fore channel
+	 */
+	if (!mutex_trylock(&xprt->xpt_mutex)) {
+		rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL);
+		if (!mutex_trylock(&xprt->xpt_mutex))
+			return -EAGAIN;
+		rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task);
+	}
+	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
+		len = -ENOTCONN;
+	else
+		len = bc_sendto(req);
+	mutex_unlock(&xprt->xpt_mutex);
+
+	if (len > 0)
+		len = 0;
+
+	return len;
+}
+
+/*
+ * The close routine. Since this is client initiated, we do nothing
+ */
+
+static void bc_close(struct rpc_xprt *xprt)
+{
+	return;
+}
+
+/*
+ * The xprt destroy routine. Again, because this connection is client
+ * initiated, we do nothing
+ */
+
+static void bc_destroy(struct rpc_xprt *xprt)
+{
+	return;
+}
+
 static struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
@@ -2134,6 +2264,22 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.print_stats		= xs_tcp_print_stats,
 };
 
+/*
+ * The rpc_xprt_ops for the server backchannel
+ */
+
+static struct rpc_xprt_ops bc_tcp_ops = {
+	.reserve_xprt		= xprt_reserve_xprt,
+	.release_xprt		= xprt_release_xprt,
+	.buf_alloc		= bc_malloc,
+	.buf_free		= bc_free,
+	.send_request		= bc_send_request,
+	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
+	.close			= bc_close,
+	.destroy		= bc_destroy,
+	.print_stats		= xs_tcp_print_stats,
+};
+
 static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
 				      unsigned int slot_table_size)
 {
-- 
cgit v1.2.3


From 18668ff9a3232d5f942a2f7abc1ad67d2760dcdf Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 6 Sep 2009 18:49:48 +0200
Subject: firewire: core: header file cleanup

fw_card_get, fw_card_put, fw_card_release are currently not exported for
use outside the firewire-core.  Move their definitions/ declarations
from the subsystem header file to the core header file.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core.h  | 14 ++++++++++++++
 include/linux/firewire.h | 14 --------------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 6052816be353..7ff6e7585152 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -96,6 +96,20 @@ int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset);
 int fw_compute_block_crc(u32 *block);
 void fw_schedule_bm_work(struct fw_card *card, unsigned long delay);
 
+static inline struct fw_card *fw_card_get(struct fw_card *card)
+{
+	kref_get(&card->kref);
+
+	return card;
+}
+
+void fw_card_release(struct kref *kref);
+
+static inline void fw_card_put(struct fw_card *card)
+{
+	kref_put(&card->kref, fw_card_release);
+}
+
 
 /* -cdev */
 
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 192d1e43c43c..7e1d4dec83e7 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -134,20 +134,6 @@ struct fw_card {
 	u32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
 };
 
-static inline struct fw_card *fw_card_get(struct fw_card *card)
-{
-	kref_get(&card->kref);
-
-	return card;
-}
-
-void fw_card_release(struct kref *kref);
-
-static inline void fw_card_put(struct fw_card *card)
-{
-	kref_put(&card->kref, fw_card_release);
-}
-
 struct fw_attribute_group {
 	struct attribute_group *groups[2];
 	struct attribute_group group;
-- 
cgit v1.2.3


From 65dd2f93febf6345ce9e39d2f6e35ce1122f4a4a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Tue, 30 Jun 2009 16:14:17 -0300
Subject: V4L/DVB (12149): videodev2.h: Reorganize fourcc table

With the changes this file suffered along the time, things got a little disorganized.
In particular, V4L2_PIX_FMT_YVYU were shown as a device-specific format, instead of
yet another variant of YUV.

There's no functional change on this patch. It just adds some comments and reorder
fourcc formats to their proper places.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 74f16876f38d..eddd5ff99c80 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -275,7 +275,9 @@ struct v4l2_pix_format {
 	__u32			priv;		/* private data, depends on pixelformat */
 };
 
-/*      Pixel format         FOURCC                        depth  Description  */
+/*      Pixel format         FOURCC                          depth  Description  */
+
+/* RGB formats */
 #define V4L2_PIX_FMT_RGB332  v4l2_fourcc('R', 'G', 'B', '1') /*  8  RGB-3-3-2     */
 #define V4L2_PIX_FMT_RGB444  v4l2_fourcc('R', '4', '4', '4') /* 16  xxxxrrrr ggggbbbb */
 #define V4L2_PIX_FMT_RGB555  v4l2_fourcc('R', 'G', 'B', 'O') /* 16  RGB-5-5-5     */
@@ -286,12 +288,20 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_RGB24   v4l2_fourcc('R', 'G', 'B', '3') /* 24  RGB-8-8-8     */
 #define V4L2_PIX_FMT_BGR32   v4l2_fourcc('B', 'G', 'R', '4') /* 32  BGR-8-8-8-8   */
 #define V4L2_PIX_FMT_RGB32   v4l2_fourcc('R', 'G', 'B', '4') /* 32  RGB-8-8-8-8   */
+
+/* Grey formats */
 #define V4L2_PIX_FMT_GREY    v4l2_fourcc('G', 'R', 'E', 'Y') /*  8  Greyscale     */
 #define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
+
+/* Palette formats */
 #define V4L2_PIX_FMT_PAL8    v4l2_fourcc('P', 'A', 'L', '8') /*  8  8-bit palette */
+
+/* Luminance+Chrominance formats */
 #define V4L2_PIX_FMT_YVU410  v4l2_fourcc('Y', 'V', 'U', '9') /*  9  YVU 4:1:0     */
 #define V4L2_PIX_FMT_YVU420  v4l2_fourcc('Y', 'V', '1', '2') /* 12  YVU 4:2:0     */
 #define V4L2_PIX_FMT_YUYV    v4l2_fourcc('Y', 'U', 'Y', 'V') /* 16  YUV 4:2:2     */
+#define V4L2_PIX_FMT_YYUV    v4l2_fourcc('Y', 'Y', 'U', 'V') /* 16  YUV 4:2:2     */
+#define V4L2_PIX_FMT_YVYU    v4l2_fourcc('Y', 'V', 'Y', 'U') /* 16 YVU 4:2:2 */
 #define V4L2_PIX_FMT_UYVY    v4l2_fourcc('U', 'Y', 'V', 'Y') /* 16  YUV 4:2:2     */
 #define V4L2_PIX_FMT_VYUY    v4l2_fourcc('V', 'Y', 'U', 'Y') /* 16  YUV 4:2:2     */
 #define V4L2_PIX_FMT_YUV422P v4l2_fourcc('4', '2', '2', 'P') /* 16  YVU422 planar */
@@ -301,6 +311,10 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_YUV555  v4l2_fourcc('Y', 'U', 'V', 'O') /* 16  YUV-5-5-5     */
 #define V4L2_PIX_FMT_YUV565  v4l2_fourcc('Y', 'U', 'V', 'P') /* 16  YUV-5-6-5     */
 #define V4L2_PIX_FMT_YUV32   v4l2_fourcc('Y', 'U', 'V', '4') /* 32  YUV-8-8-8-8   */
+#define V4L2_PIX_FMT_YUV410  v4l2_fourcc('Y', 'U', 'V', '9') /*  9  YUV 4:1:0     */
+#define V4L2_PIX_FMT_YUV420  v4l2_fourcc('Y', 'U', '1', '2') /* 12  YUV 4:2:0     */
+#define V4L2_PIX_FMT_HI240   v4l2_fourcc('H', 'I', '2', '4') /*  8  8-bit color   */
+#define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
 
 /* two planes -- one Y, one Cr + Cb interleaved  */
 #define V4L2_PIX_FMT_NV12    v4l2_fourcc('N', 'V', '1', '2') /* 12  Y/CbCr 4:2:0  */
@@ -308,25 +322,17 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_NV16    v4l2_fourcc('N', 'V', '1', '6') /* 16  Y/CbCr 4:2:2  */
 #define V4L2_PIX_FMT_NV61    v4l2_fourcc('N', 'V', '6', '1') /* 16  Y/CrCb 4:2:2  */
 
-/*  The following formats are not defined in the V4L2 specification */
-#define V4L2_PIX_FMT_YUV410  v4l2_fourcc('Y', 'U', 'V', '9') /*  9  YUV 4:1:0     */
-#define V4L2_PIX_FMT_YUV420  v4l2_fourcc('Y', 'U', '1', '2') /* 12  YUV 4:2:0     */
-#define V4L2_PIX_FMT_YYUV    v4l2_fourcc('Y', 'Y', 'U', 'V') /* 16  YUV 4:2:2     */
-#define V4L2_PIX_FMT_HI240   v4l2_fourcc('H', 'I', '2', '4') /*  8  8-bit color   */
-#define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
-
-/* see http://www.siliconimaging.com/RGB%20Bayer.htm */
+/* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */
 #define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B', 'A', '8', '1') /*  8  BGBG.. GRGR.. */
 #define V4L2_PIX_FMT_SGBRG8  v4l2_fourcc('G', 'B', 'R', 'G') /*  8  GBGB.. RGRG.. */
 #define V4L2_PIX_FMT_SGRBG8  v4l2_fourcc('G', 'R', 'B', 'G') /*  8  GRGR.. BGBG.. */
-
-/*
- * 10bit raw bayer, expanded to 16 bits
- * xxxxrrrrrrrrrrxxxxgggggggggg xxxxggggggggggxxxxbbbbbbbbbb...
- */
-#define V4L2_PIX_FMT_SGRBG10 v4l2_fourcc('B', 'A', '1', '0')
-/* 10bit raw bayer DPCM compressed to 8 bits */
+#define V4L2_PIX_FMT_SGRBG10 v4l2_fourcc('B', 'A', '1', '0') /* 10bit raw bayer */
+	/* 10bit raw bayer DPCM compressed to 8 bits */
 #define V4L2_PIX_FMT_SGRBG10DPCM8 v4l2_fourcc('B', 'D', '1', '0')
+	/*
+	 * 10bit raw bayer, expanded to 16 bits
+	 * xxxxrrrrrrrrrrxxxxgggggggggg xxxxggggggggggxxxxbbbbbbbbbb...
+	 */
 #define V4L2_PIX_FMT_SBGGR16 v4l2_fourcc('B', 'Y', 'R', '2') /* 16  BGBG.. GRGR.. */
 
 /* compressed formats */
@@ -350,7 +356,6 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_MR97310A v4l2_fourcc('M', '3', '1', '0') /* compressed BGGR bayer */
 #define V4L2_PIX_FMT_SQ905C   v4l2_fourcc('9', '0', '5', 'C') /* compressed RGGB bayer */
 #define V4L2_PIX_FMT_PJPG     v4l2_fourcc('P', 'J', 'P', 'G') /* Pixart 73xx JPEG */
-#define V4L2_PIX_FMT_YVYU     v4l2_fourcc('Y', 'V', 'Y', 'U') /* 16 YVU 4:2:2 */
 #define V4L2_PIX_FMT_OV511    v4l2_fourcc('O', '5', '1', '1') /* ov511 JPEG */
 #define V4L2_PIX_FMT_OV518    v4l2_fourcc('O', '5', '1', '8') /* ov518 JPEG */
 
-- 
cgit v1.2.3


From b7f2cef0c80c3ac29c13d2f4fc31691f2bd75f05 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <lyakh@axis700.grange>
Date: Wed, 24 Jun 2009 10:31:25 -0300
Subject: V4L/DVB (12158): v4l: add cropping prototypes to struct
 v4l2_subdev_video_ops

Add g_crop, s_crop and cropcap methods to video v4l2-subdev operations.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Reviewed-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-subdev.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 5dcb36785529..89a39ce17294 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -220,6 +220,9 @@ struct v4l2_subdev_video_ops {
 	int (*g_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt);
 	int (*try_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt);
 	int (*s_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt);
+	int (*cropcap)(struct v4l2_subdev *sd, struct v4l2_cropcap *cc);
+	int (*g_crop)(struct v4l2_subdev *sd, struct v4l2_crop *crop);
+	int (*s_crop)(struct v4l2_subdev *sd, struct v4l2_crop *crop);
 	int (*g_parm)(struct v4l2_subdev *sd, struct v4l2_streamparm *param);
 	int (*s_parm)(struct v4l2_subdev *sd, struct v4l2_streamparm *param);
 	int (*enum_framesizes)(struct v4l2_subdev *sd, struct v4l2_frmsizeenum *fsize);
-- 
cgit v1.2.3


From de05f63430e8cb8cde0a21260bc6b01765111e5c Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Fri, 26 Jun 2009 12:15:38 -0300
Subject: V4L/DVB (12187): uvcvideo: Move UVC definitions to linux/usb/video.h

To make UVC constants accessible by a future UVC gadget driver, move them from
drivers/media/video/uvc/uvcvideo.h to include/linux/usb/video.h.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/uvc/uvcvideo.h | 136 +-----------------------------
 include/linux/usb/video.h          | 164 +++++++++++++++++++++++++++++++++++++
 2 files changed, 165 insertions(+), 135 deletions(-)
 create mode 100644 include/linux/usb/video.h

(limited to 'include')

diff --git a/drivers/media/video/uvc/uvcvideo.h b/drivers/media/video/uvc/uvcvideo.h
index cc5023e09ca9..5cf68f590725 100644
--- a/drivers/media/video/uvc/uvcvideo.h
+++ b/drivers/media/video/uvc/uvcvideo.h
@@ -67,144 +67,12 @@ struct uvc_xu_control {
 #ifdef __KERNEL__
 
 #include <linux/poll.h>
+#include <linux/usb/video.h>
 
 /* --------------------------------------------------------------------------
  * UVC constants
  */
 
-#define UVC_SC_UNDEFINED				0x00
-#define UVC_SC_VIDEOCONTROL				0x01
-#define UVC_SC_VIDEOSTREAMING				0x02
-#define UVC_SC_VIDEO_INTERFACE_COLLECTION		0x03
-
-#define UVC_PC_PROTOCOL_UNDEFINED			0x00
-
-/* VideoControl class specific interface descriptor */
-#define UVC_VC_DESCRIPTOR_UNDEFINED			0x00
-#define UVC_VC_HEADER					0x01
-#define UVC_VC_INPUT_TERMINAL				0x02
-#define UVC_VC_OUTPUT_TERMINAL				0x03
-#define UVC_VC_SELECTOR_UNIT				0x04
-#define UVC_VC_PROCESSING_UNIT				0x05
-#define UVC_VC_EXTENSION_UNIT				0x06
-
-/* VideoStreaming class specific interface descriptor */
-#define UVC_VS_UNDEFINED				0x00
-#define UVC_VS_INPUT_HEADER				0x01
-#define UVC_VS_OUTPUT_HEADER				0x02
-#define UVC_VS_STILL_IMAGE_FRAME			0x03
-#define UVC_VS_FORMAT_UNCOMPRESSED			0x04
-#define UVC_VS_FRAME_UNCOMPRESSED			0x05
-#define UVC_VS_FORMAT_MJPEG				0x06
-#define UVC_VS_FRAME_MJPEG				0x07
-#define UVC_VS_FORMAT_MPEG2TS				0x0a
-#define UVC_VS_FORMAT_DV				0x0c
-#define UVC_VS_COLORFORMAT				0x0d
-#define UVC_VS_FORMAT_FRAME_BASED			0x10
-#define UVC_VS_FRAME_FRAME_BASED			0x11
-#define UVC_VS_FORMAT_STREAM_BASED			0x12
-
-/* Endpoint type */
-#define UVC_EP_UNDEFINED				0x00
-#define UVC_EP_GENERAL					0x01
-#define UVC_EP_ENDPOINT					0x02
-#define UVC_EP_INTERRUPT				0x03
-
-/* Request codes */
-#define UVC_RC_UNDEFINED				0x00
-#define UVC_SET_CUR					0x01
-#define UVC_GET_CUR					0x81
-#define UVC_GET_MIN					0x82
-#define UVC_GET_MAX					0x83
-#define UVC_GET_RES					0x84
-#define UVC_GET_LEN					0x85
-#define UVC_GET_INFO					0x86
-#define UVC_GET_DEF					0x87
-
-/* VideoControl interface controls */
-#define UVC_VC_CONTROL_UNDEFINED			0x00
-#define UVC_VC_VIDEO_POWER_MODE_CONTROL			0x01
-#define UVC_VC_REQUEST_ERROR_CODE_CONTROL		0x02
-
-/* Terminal controls */
-#define UVC_TE_CONTROL_UNDEFINED			0x00
-
-/* Selector Unit controls */
-#define UVC_SU_CONTROL_UNDEFINED			0x00
-#define UVC_SU_INPUT_SELECT_CONTROL			0x01
-
-/* Camera Terminal controls */
-#define UVC_CT_CONTROL_UNDEFINED			0x00
-#define UVC_CT_SCANNING_MODE_CONTROL			0x01
-#define UVC_CT_AE_MODE_CONTROL				0x02
-#define UVC_CT_AE_PRIORITY_CONTROL			0x03
-#define UVC_CT_EXPOSURE_TIME_ABSOLUTE_CONTROL		0x04
-#define UVC_CT_EXPOSURE_TIME_RELATIVE_CONTROL		0x05
-#define UVC_CT_FOCUS_ABSOLUTE_CONTROL			0x06
-#define UVC_CT_FOCUS_RELATIVE_CONTROL			0x07
-#define UVC_CT_FOCUS_AUTO_CONTROL			0x08
-#define UVC_CT_IRIS_ABSOLUTE_CONTROL			0x09
-#define UVC_CT_IRIS_RELATIVE_CONTROL			0x0a
-#define UVC_CT_ZOOM_ABSOLUTE_CONTROL			0x0b
-#define UVC_CT_ZOOM_RELATIVE_CONTROL			0x0c
-#define UVC_CT_PANTILT_ABSOLUTE_CONTROL			0x0d
-#define UVC_CT_PANTILT_RELATIVE_CONTROL			0x0e
-#define UVC_CT_ROLL_ABSOLUTE_CONTROL			0x0f
-#define UVC_CT_ROLL_RELATIVE_CONTROL			0x10
-#define UVC_CT_PRIVACY_CONTROL				0x11
-
-/* Processing Unit controls */
-#define UVC_PU_CONTROL_UNDEFINED			0x00
-#define UVC_PU_BACKLIGHT_COMPENSATION_CONTROL		0x01
-#define UVC_PU_BRIGHTNESS_CONTROL			0x02
-#define UVC_PU_CONTRAST_CONTROL				0x03
-#define UVC_PU_GAIN_CONTROL				0x04
-#define UVC_PU_POWER_LINE_FREQUENCY_CONTROL		0x05
-#define UVC_PU_HUE_CONTROL				0x06
-#define UVC_PU_SATURATION_CONTROL			0x07
-#define UVC_PU_SHARPNESS_CONTROL			0x08
-#define UVC_PU_GAMMA_CONTROL				0x09
-#define UVC_PU_WHITE_BALANCE_TEMPERATURE_CONTROL	0x0a
-#define UVC_PU_WHITE_BALANCE_TEMPERATURE_AUTO_CONTROL	0x0b
-#define UVC_PU_WHITE_BALANCE_COMPONENT_CONTROL		0x0c
-#define UVC_PU_WHITE_BALANCE_COMPONENT_AUTO_CONTROL	0x0d
-#define UVC_PU_DIGITAL_MULTIPLIER_CONTROL		0x0e
-#define UVC_PU_DIGITAL_MULTIPLIER_LIMIT_CONTROL		0x0f
-#define UVC_PU_HUE_AUTO_CONTROL				0x10
-#define UVC_PU_ANALOG_VIDEO_STANDARD_CONTROL		0x11
-#define UVC_PU_ANALOG_LOCK_STATUS_CONTROL		0x12
-
-/* VideoStreaming interface controls */
-#define UVC_VS_CONTROL_UNDEFINED			0x00
-#define UVC_VS_PROBE_CONTROL				0x01
-#define UVC_VS_COMMIT_CONTROL				0x02
-#define UVC_VS_STILL_PROBE_CONTROL			0x03
-#define UVC_VS_STILL_COMMIT_CONTROL			0x04
-#define UVC_VS_STILL_IMAGE_TRIGGER_CONTROL		0x05
-#define UVC_VS_STREAM_ERROR_CODE_CONTROL		0x06
-#define UVC_VS_GENERATE_KEY_FRAME_CONTROL		0x07
-#define UVC_VS_UPDATE_FRAME_SEGMENT_CONTROL		0x08
-#define UVC_VS_SYNC_DELAY_CONTROL			0x09
-
-#define UVC_TT_VENDOR_SPECIFIC				0x0100
-#define UVC_TT_STREAMING				0x0101
-
-/* Input Terminal types */
-#define UVC_ITT_VENDOR_SPECIFIC				0x0200
-#define UVC_ITT_CAMERA					0x0201
-#define UVC_ITT_MEDIA_TRANSPORT_INPUT			0x0202
-
-/* Output Terminal types */
-#define UVC_OTT_VENDOR_SPECIFIC				0x0300
-#define UVC_OTT_DISPLAY					0x0301
-#define UVC_OTT_MEDIA_TRANSPORT_OUTPUT			0x0302
-
-/* External Terminal types */
-#define UVC_EXTERNAL_VENDOR_SPECIFIC			0x0400
-#define UVC_COMPOSITE_CONNECTOR				0x0401
-#define UVC_SVIDEO_CONNECTOR				0x0402
-#define UVC_COMPONENT_CONNECTOR				0x0403
-
 #define UVC_TERM_INPUT			0x0000
 #define UVC_TERM_OUTPUT			0x8000
 
@@ -216,8 +84,6 @@ struct uvc_xu_control {
 #define UVC_ENTITY_IS_OTERM(entity) \
 	(((entity)->type & 0x8000) == UVC_TERM_OUTPUT)
 
-#define UVC_STATUS_TYPE_CONTROL		1
-#define UVC_STATUS_TYPE_STREAMING	2
 
 /* ------------------------------------------------------------------------
  * GUIDs
diff --git a/include/linux/usb/video.h b/include/linux/usb/video.h
new file mode 100644
index 000000000000..be436d9ee479
--- /dev/null
+++ b/include/linux/usb/video.h
@@ -0,0 +1,164 @@
+/*
+ * USB Video Class definitions.
+ *
+ * Copyright (C) 2009 Laurent Pinchart <laurent.pinchart@skynet.be>
+ *
+ * This file holds USB constants and structures defined by the USB Device
+ * Class Definition for Video Devices. Unless otherwise stated, comments
+ * below reference relevant sections of the USB Video Class 1.1 specification
+ * available at
+ *
+ * http://www.usb.org/developers/devclass_docs/USB_Video_Class_1_1.zip
+ */
+
+#ifndef __LINUX_USB_VIDEO_H
+#define __LINUX_USB_VIDEO_H
+
+#include <linux/types.h>
+
+/* --------------------------------------------------------------------------
+ * UVC constants
+ */
+
+/* A.2. Video Interface Subclass Codes */
+#define UVC_SC_UNDEFINED				0x00
+#define UVC_SC_VIDEOCONTROL				0x01
+#define UVC_SC_VIDEOSTREAMING				0x02
+#define UVC_SC_VIDEO_INTERFACE_COLLECTION		0x03
+
+/* A.3. Video Interface Protocol Codes */
+#define UVC_PC_PROTOCOL_UNDEFINED			0x00
+
+/* A.5. Video Class-Specific VC Interface Descriptor Subtypes */
+#define UVC_VC_DESCRIPTOR_UNDEFINED			0x00
+#define UVC_VC_HEADER					0x01
+#define UVC_VC_INPUT_TERMINAL				0x02
+#define UVC_VC_OUTPUT_TERMINAL				0x03
+#define UVC_VC_SELECTOR_UNIT				0x04
+#define UVC_VC_PROCESSING_UNIT				0x05
+#define UVC_VC_EXTENSION_UNIT				0x06
+
+/* A.6. Video Class-Specific VS Interface Descriptor Subtypes */
+#define UVC_VS_UNDEFINED				0x00
+#define UVC_VS_INPUT_HEADER				0x01
+#define UVC_VS_OUTPUT_HEADER				0x02
+#define UVC_VS_STILL_IMAGE_FRAME			0x03
+#define UVC_VS_FORMAT_UNCOMPRESSED			0x04
+#define UVC_VS_FRAME_UNCOMPRESSED			0x05
+#define UVC_VS_FORMAT_MJPEG				0x06
+#define UVC_VS_FRAME_MJPEG				0x07
+#define UVC_VS_FORMAT_MPEG2TS				0x0a
+#define UVC_VS_FORMAT_DV				0x0c
+#define UVC_VS_COLORFORMAT				0x0d
+#define UVC_VS_FORMAT_FRAME_BASED			0x10
+#define UVC_VS_FRAME_FRAME_BASED			0x11
+#define UVC_VS_FORMAT_STREAM_BASED			0x12
+
+/* A.7. Video Class-Specific Endpoint Descriptor Subtypes */
+#define UVC_EP_UNDEFINED				0x00
+#define UVC_EP_GENERAL					0x01
+#define UVC_EP_ENDPOINT					0x02
+#define UVC_EP_INTERRUPT				0x03
+
+/* A.8. Video Class-Specific Request Codes */
+#define UVC_RC_UNDEFINED				0x00
+#define UVC_SET_CUR					0x01
+#define UVC_GET_CUR					0x81
+#define UVC_GET_MIN					0x82
+#define UVC_GET_MAX					0x83
+#define UVC_GET_RES					0x84
+#define UVC_GET_LEN					0x85
+#define UVC_GET_INFO					0x86
+#define UVC_GET_DEF					0x87
+
+/* A.9.1. VideoControl Interface Control Selectors */
+#define UVC_VC_CONTROL_UNDEFINED			0x00
+#define UVC_VC_VIDEO_POWER_MODE_CONTROL			0x01
+#define UVC_VC_REQUEST_ERROR_CODE_CONTROL		0x02
+
+/* A.9.2. Terminal Control Selectors */
+#define UVC_TE_CONTROL_UNDEFINED			0x00
+
+/* A.9.3. Selector Unit Control Selectors */
+#define UVC_SU_CONTROL_UNDEFINED			0x00
+#define UVC_SU_INPUT_SELECT_CONTROL			0x01
+
+/* A.9.4. Camera Terminal Control Selectors */
+#define UVC_CT_CONTROL_UNDEFINED			0x00
+#define UVC_CT_SCANNING_MODE_CONTROL			0x01
+#define UVC_CT_AE_MODE_CONTROL				0x02
+#define UVC_CT_AE_PRIORITY_CONTROL			0x03
+#define UVC_CT_EXPOSURE_TIME_ABSOLUTE_CONTROL		0x04
+#define UVC_CT_EXPOSURE_TIME_RELATIVE_CONTROL		0x05
+#define UVC_CT_FOCUS_ABSOLUTE_CONTROL			0x06
+#define UVC_CT_FOCUS_RELATIVE_CONTROL			0x07
+#define UVC_CT_FOCUS_AUTO_CONTROL			0x08
+#define UVC_CT_IRIS_ABSOLUTE_CONTROL			0x09
+#define UVC_CT_IRIS_RELATIVE_CONTROL			0x0a
+#define UVC_CT_ZOOM_ABSOLUTE_CONTROL			0x0b
+#define UVC_CT_ZOOM_RELATIVE_CONTROL			0x0c
+#define UVC_CT_PANTILT_ABSOLUTE_CONTROL			0x0d
+#define UVC_CT_PANTILT_RELATIVE_CONTROL			0x0e
+#define UVC_CT_ROLL_ABSOLUTE_CONTROL			0x0f
+#define UVC_CT_ROLL_RELATIVE_CONTROL			0x10
+#define UVC_CT_PRIVACY_CONTROL				0x11
+
+/* A.9.5. Processing Unit Control Selectors */
+#define UVC_PU_CONTROL_UNDEFINED			0x00
+#define UVC_PU_BACKLIGHT_COMPENSATION_CONTROL		0x01
+#define UVC_PU_BRIGHTNESS_CONTROL			0x02
+#define UVC_PU_CONTRAST_CONTROL				0x03
+#define UVC_PU_GAIN_CONTROL				0x04
+#define UVC_PU_POWER_LINE_FREQUENCY_CONTROL		0x05
+#define UVC_PU_HUE_CONTROL				0x06
+#define UVC_PU_SATURATION_CONTROL			0x07
+#define UVC_PU_SHARPNESS_CONTROL			0x08
+#define UVC_PU_GAMMA_CONTROL				0x09
+#define UVC_PU_WHITE_BALANCE_TEMPERATURE_CONTROL	0x0a
+#define UVC_PU_WHITE_BALANCE_TEMPERATURE_AUTO_CONTROL	0x0b
+#define UVC_PU_WHITE_BALANCE_COMPONENT_CONTROL		0x0c
+#define UVC_PU_WHITE_BALANCE_COMPONENT_AUTO_CONTROL	0x0d
+#define UVC_PU_DIGITAL_MULTIPLIER_CONTROL		0x0e
+#define UVC_PU_DIGITAL_MULTIPLIER_LIMIT_CONTROL		0x0f
+#define UVC_PU_HUE_AUTO_CONTROL				0x10
+#define UVC_PU_ANALOG_VIDEO_STANDARD_CONTROL		0x11
+#define UVC_PU_ANALOG_LOCK_STATUS_CONTROL		0x12
+
+/* A.9.7. VideoStreaming Interface Control Selectors */
+#define UVC_VS_CONTROL_UNDEFINED			0x00
+#define UVC_VS_PROBE_CONTROL				0x01
+#define UVC_VS_COMMIT_CONTROL				0x02
+#define UVC_VS_STILL_PROBE_CONTROL			0x03
+#define UVC_VS_STILL_COMMIT_CONTROL			0x04
+#define UVC_VS_STILL_IMAGE_TRIGGER_CONTROL		0x05
+#define UVC_VS_STREAM_ERROR_CODE_CONTROL		0x06
+#define UVC_VS_GENERATE_KEY_FRAME_CONTROL		0x07
+#define UVC_VS_UPDATE_FRAME_SEGMENT_CONTROL		0x08
+#define UVC_VS_SYNC_DELAY_CONTROL			0x09
+
+/* B.1. USB Terminal Types */
+#define UVC_TT_VENDOR_SPECIFIC				0x0100
+#define UVC_TT_STREAMING				0x0101
+
+/* B.2. Input Terminal Types */
+#define UVC_ITT_VENDOR_SPECIFIC				0x0200
+#define UVC_ITT_CAMERA					0x0201
+#define UVC_ITT_MEDIA_TRANSPORT_INPUT			0x0202
+
+/* B.3. Output Terminal Types */
+#define UVC_OTT_VENDOR_SPECIFIC				0x0300
+#define UVC_OTT_DISPLAY					0x0301
+#define UVC_OTT_MEDIA_TRANSPORT_OUTPUT			0x0302
+
+/* B.4. External Terminal Types */
+#define UVC_EXTERNAL_VENDOR_SPECIFIC			0x0400
+#define UVC_COMPOSITE_CONNECTOR				0x0401
+#define UVC_SVIDEO_CONNECTOR				0x0402
+#define UVC_COMPONENT_CONNECTOR				0x0403
+
+/* 2.4.2.2. Status Packet Type */
+#define UVC_STATUS_TYPE_CONTROL				1
+#define UVC_STATUS_TYPE_STREAMING			2
+
+#endif /* __LINUX_USB_VIDEO_H */
+
-- 
cgit v1.2.3


From ef5b5b7e73038f2c7d0d1b020c6eac6435c2b552 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 20 Jun 2009 05:37:27 -0300
Subject: V4L/DVB (12212): v4l2: add RDS API to videodev2.h

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index eddd5ff99c80..22fc99d58d34 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1186,6 +1186,7 @@ struct v4l2_modulator {
 #define V4L2_TUNER_CAP_LANG2		0x0020
 #define V4L2_TUNER_CAP_SAP		0x0020
 #define V4L2_TUNER_CAP_LANG1		0x0040
+#define V4L2_TUNER_CAP_RDS		0x0080
 
 /*  Flags for the 'rxsubchans' field */
 #define V4L2_TUNER_SUB_MONO		0x0001
@@ -1193,6 +1194,7 @@ struct v4l2_modulator {
 #define V4L2_TUNER_SUB_LANG2		0x0004
 #define V4L2_TUNER_SUB_SAP		0x0004
 #define V4L2_TUNER_SUB_LANG1		0x0008
+#define V4L2_TUNER_SUB_RDS		0x0010
 
 /*  Values for the 'audmode' field */
 #define V4L2_TUNER_MODE_MONO		0x0000
@@ -1217,6 +1219,27 @@ struct v4l2_hw_freq_seek {
 	__u32		      reserved[8];
 };
 
+/*
+ *	R D S
+ */
+
+struct v4l2_rds_data {
+	__u8 	lsb;
+	__u8 	msb;
+	__u8 	block;
+} __attribute__ ((packed));
+
+#define V4L2_RDS_BLOCK_MSK 	 0x7
+#define V4L2_RDS_BLOCK_A 	 0
+#define V4L2_RDS_BLOCK_B 	 1
+#define V4L2_RDS_BLOCK_C 	 2
+#define V4L2_RDS_BLOCK_D 	 3
+#define V4L2_RDS_BLOCK_C_ALT 	 4
+#define V4L2_RDS_BLOCK_INVALID 	 7
+
+#define V4L2_RDS_BLOCK_CORRECTED 0x40
+#define V4L2_RDS_BLOCK_ERROR 	 0x80
+
 /*
  *	A U D I O
  */
-- 
cgit v1.2.3


From 1cb662a3144992259edfd3cf9f54a6b25a913a0f Mon Sep 17 00:00:00 2001
From: Andreas Oberritter <obi@linuxtv.org>
Date: Tue, 14 Jul 2009 20:28:50 -0300
Subject: V4L/DVB (12275): Add two new ioctls: DMX_ADD_PID and DMX_REMOVE_PID

DMX_ADD_PID allows to add multiple PIDs to a transport stream filter
previously set up with DMX_SET_PES_FILTER and output=DMX_OUT_TSDEMUX_TAP.

DMX_REMOVE_PID is used to drop a PID from a filter.

These ioctls are to be used by readers of /dev/dvb/adapterX/demuxY. They
may be called at any time, i.e. before or after the first filter on the
shared file descriptor was started.

They make it possible to record multiple services without the need to de-
or re-multiplex TS packets.

To accomplish this, dmxdev_filter->feed.ts has been converted to a list
of struct dmxdev_feeds, each containing a PID value and a pointer to a
struct dmx_ts_feed.

Signed-off-by: Andreas Oberritter <obi@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dmxdev.c | 231 +++++++++++++++++++++++++++---------
 drivers/media/dvb/dvb-core/dmxdev.h |   9 +-
 include/linux/dvb/dmx.h             |   2 +
 3 files changed, 184 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c
index 6d6121eb5d59..3750ff48cba1 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.c
+++ b/drivers/media/dvb/dvb-core/dmxdev.c
@@ -430,6 +430,8 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len,
 /* stop feed but only mark the specified filter as stopped (state set) */
 static int dvb_dmxdev_feed_stop(struct dmxdev_filter *dmxdevfilter)
 {
+	struct dmxdev_feed *feed;
+
 	dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_SET);
 
 	switch (dmxdevfilter->type) {
@@ -438,7 +440,8 @@ static int dvb_dmxdev_feed_stop(struct dmxdev_filter *dmxdevfilter)
 		dmxdevfilter->feed.sec->stop_filtering(dmxdevfilter->feed.sec);
 		break;
 	case DMXDEV_TYPE_PES:
-		dmxdevfilter->feed.ts->stop_filtering(dmxdevfilter->feed.ts);
+		list_for_each_entry(feed, &dmxdevfilter->feed.ts, next)
+			feed->ts->stop_filtering(feed->ts);
 		break;
 	default:
 		return -EINVAL;
@@ -449,13 +452,23 @@ static int dvb_dmxdev_feed_stop(struct dmxdev_filter *dmxdevfilter)
 /* start feed associated with the specified filter */
 static int dvb_dmxdev_feed_start(struct dmxdev_filter *filter)
 {
+	struct dmxdev_feed *feed;
+	int ret;
+
 	dvb_dmxdev_filter_state_set(filter, DMXDEV_STATE_GO);
 
 	switch (filter->type) {
 	case DMXDEV_TYPE_SEC:
 		return filter->feed.sec->start_filtering(filter->feed.sec);
 	case DMXDEV_TYPE_PES:
-		return filter->feed.ts->start_filtering(filter->feed.ts);
+		list_for_each_entry(feed, &filter->feed.ts, next) {
+			ret = feed->ts->start_filtering(feed->ts);
+			if (ret < 0) {
+				dvb_dmxdev_feed_stop(filter);
+				return ret;
+			}
+		}
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -487,6 +500,9 @@ static int dvb_dmxdev_feed_restart(struct dmxdev_filter *filter)
 
 static int dvb_dmxdev_filter_stop(struct dmxdev_filter *dmxdevfilter)
 {
+	struct dmxdev_feed *feed;
+	struct dmx_demux *demux;
+
 	if (dmxdevfilter->state < DMXDEV_STATE_GO)
 		return 0;
 
@@ -503,13 +519,12 @@ static int dvb_dmxdev_filter_stop(struct dmxdev_filter *dmxdevfilter)
 		dmxdevfilter->feed.sec = NULL;
 		break;
 	case DMXDEV_TYPE_PES:
-		if (!dmxdevfilter->feed.ts)
-			break;
 		dvb_dmxdev_feed_stop(dmxdevfilter);
-		dmxdevfilter->dev->demux->
-		    release_ts_feed(dmxdevfilter->dev->demux,
-				    dmxdevfilter->feed.ts);
-		dmxdevfilter->feed.ts = NULL;
+		demux = dmxdevfilter->dev->demux;
+		list_for_each_entry(feed, &dmxdevfilter->feed.ts, next) {
+			demux->release_ts_feed(demux, feed->ts);
+			feed->ts = NULL;
+		}
 		break;
 	default:
 		if (dmxdevfilter->state == DMXDEV_STATE_ALLOCATED)
@@ -521,19 +536,88 @@ static int dvb_dmxdev_filter_stop(struct dmxdev_filter *dmxdevfilter)
 	return 0;
 }
 
+static void dvb_dmxdev_delete_pids(struct dmxdev_filter *dmxdevfilter)
+{
+	struct dmxdev_feed *feed, *tmp;
+
+	/* delete all PIDs */
+	list_for_each_entry_safe(feed, tmp, &dmxdevfilter->feed.ts, next) {
+		list_del(&feed->next);
+		kfree(feed);
+	}
+
+	BUG_ON(!list_empty(&dmxdevfilter->feed.ts));
+}
+
 static inline int dvb_dmxdev_filter_reset(struct dmxdev_filter *dmxdevfilter)
 {
 	if (dmxdevfilter->state < DMXDEV_STATE_SET)
 		return 0;
 
+	if (dmxdevfilter->type == DMXDEV_TYPE_PES)
+		dvb_dmxdev_delete_pids(dmxdevfilter);
+
 	dmxdevfilter->type = DMXDEV_TYPE_NONE;
 	dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_ALLOCATED);
 	return 0;
 }
 
+static int dvb_dmxdev_start_feed(struct dmxdev *dmxdev,
+				 struct dmxdev_filter *filter,
+				 struct dmxdev_feed *feed)
+{
+	struct timespec timeout = { 0 };
+	struct dmx_pes_filter_params *para = &filter->params.pes;
+	dmx_output_t otype;
+	int ret;
+	int ts_type;
+	enum dmx_ts_pes ts_pes;
+	struct dmx_ts_feed *tsfeed;
+
+	feed->ts = NULL;
+	otype = para->output;
+
+	ts_pes = (enum dmx_ts_pes)para->pes_type;
+
+	if (ts_pes < DMX_PES_OTHER)
+		ts_type = TS_DECODER;
+	else
+		ts_type = 0;
+
+	if (otype == DMX_OUT_TS_TAP)
+		ts_type |= TS_PACKET;
+	else if (otype == DMX_OUT_TSDEMUX_TAP)
+		ts_type |= TS_PACKET | TS_DEMUX;
+	else if (otype == DMX_OUT_TAP)
+		ts_type |= TS_PACKET | TS_DEMUX | TS_PAYLOAD_ONLY;
+
+	ret = dmxdev->demux->allocate_ts_feed(dmxdev->demux, &feed->ts,
+					      dvb_dmxdev_ts_callback);
+	if (ret < 0)
+		return ret;
+
+	tsfeed = feed->ts;
+	tsfeed->priv = filter;
+
+	ret = tsfeed->set(tsfeed, feed->pid, ts_type, ts_pes, 32768, timeout);
+	if (ret < 0) {
+		dmxdev->demux->release_ts_feed(dmxdev->demux, tsfeed);
+		return ret;
+	}
+
+	ret = tsfeed->start_filtering(tsfeed);
+	if (ret < 0) {
+		dmxdev->demux->release_ts_feed(dmxdev->demux, tsfeed);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int dvb_dmxdev_filter_start(struct dmxdev_filter *filter)
 {
 	struct dmxdev *dmxdev = filter->dev;
+	struct dmxdev_feed *feed;
 	void *mem;
 	int ret, i;
 
@@ -631,56 +715,14 @@ static int dvb_dmxdev_filter_start(struct dmxdev_filter *filter)
 		break;
 	}
 	case DMXDEV_TYPE_PES:
-	{
-		struct timespec timeout = { 0 };
-		struct dmx_pes_filter_params *para = &filter->params.pes;
-		dmx_output_t otype;
-		int ts_type;
-		enum dmx_ts_pes ts_pes;
-		struct dmx_ts_feed **tsfeed = &filter->feed.ts;
-
-		filter->feed.ts = NULL;
-		otype = para->output;
-
-		ts_pes = (enum dmx_ts_pes)para->pes_type;
-
-		if (ts_pes < DMX_PES_OTHER)
-			ts_type = TS_DECODER;
-		else
-			ts_type = 0;
-
-		if (otype == DMX_OUT_TS_TAP)
-			ts_type |= TS_PACKET;
-		else if (otype == DMX_OUT_TSDEMUX_TAP)
-			ts_type |= TS_PACKET | TS_DEMUX;
-		else if (otype == DMX_OUT_TAP)
-			ts_type |= TS_PACKET | TS_DEMUX | TS_PAYLOAD_ONLY;
-
-		ret = dmxdev->demux->allocate_ts_feed(dmxdev->demux,
-						      tsfeed,
-						      dvb_dmxdev_ts_callback);
-		if (ret < 0)
-			return ret;
-
-		(*tsfeed)->priv = filter;
-
-		ret = (*tsfeed)->set(*tsfeed, para->pid, ts_type, ts_pes,
-				     32768, timeout);
-		if (ret < 0) {
-			dmxdev->demux->release_ts_feed(dmxdev->demux,
-						       *tsfeed);
-			return ret;
-		}
-
-		ret = filter->feed.ts->start_filtering(filter->feed.ts);
-		if (ret < 0) {
-			dmxdev->demux->release_ts_feed(dmxdev->demux,
-						       *tsfeed);
-			return ret;
+		list_for_each_entry(feed, &filter->feed.ts, next) {
+			ret = dvb_dmxdev_start_feed(dmxdev, filter, feed);
+			if (ret < 0) {
+				dvb_dmxdev_filter_stop(filter);
+				return ret;
+			}
 		}
-
 		break;
-	}
 	default:
 		return -EINVAL;
 	}
@@ -718,7 +760,7 @@ static int dvb_demux_open(struct inode *inode, struct file *file)
 	dvb_ringbuffer_init(&dmxdevfilter->buffer, NULL, 8192);
 	dmxdevfilter->type = DMXDEV_TYPE_NONE;
 	dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_ALLOCATED);
-	dmxdevfilter->feed.ts = NULL;
+	INIT_LIST_HEAD(&dmxdevfilter->feed.ts);
 	init_timer(&dmxdevfilter->timer);
 
 	dvbdev->users++;
@@ -760,6 +802,55 @@ static inline void invert_mode(dmx_filter_t *filter)
 		filter->mode[i] ^= 0xff;
 }
 
+static int dvb_dmxdev_add_pid(struct dmxdev *dmxdev,
+			      struct dmxdev_filter *filter, u16 pid)
+{
+	struct dmxdev_feed *feed;
+
+	if ((filter->type != DMXDEV_TYPE_PES) ||
+	    (filter->state < DMXDEV_STATE_SET))
+		return -EINVAL;
+
+	/* only TS packet filters may have multiple PIDs */
+	if ((filter->params.pes.output != DMX_OUT_TSDEMUX_TAP) &&
+	    (!list_empty(&filter->feed.ts)))
+		return -EINVAL;
+
+	feed = kzalloc(sizeof(struct dmxdev_feed), GFP_KERNEL);
+	if (feed == NULL)
+		return -ENOMEM;
+
+	feed->pid = pid;
+	list_add(&feed->next, &filter->feed.ts);
+
+	if (filter->state >= DMXDEV_STATE_GO)
+		return dvb_dmxdev_start_feed(dmxdev, filter, feed);
+
+	return 0;
+}
+
+static int dvb_dmxdev_remove_pid(struct dmxdev *dmxdev,
+				  struct dmxdev_filter *filter, u16 pid)
+{
+	struct dmxdev_feed *feed, *tmp;
+
+	if ((filter->type != DMXDEV_TYPE_PES) ||
+	    (filter->state < DMXDEV_STATE_SET))
+		return -EINVAL;
+
+	list_for_each_entry_safe(feed, tmp, &filter->feed.ts, next) {
+		if ((feed->pid == pid) && (feed->ts != NULL)) {
+			feed->ts->stop_filtering(feed->ts);
+			filter->dev->demux->release_ts_feed(filter->dev->demux,
+							    feed->ts);
+			list_del(&feed->next);
+			kfree(feed);
+		}
+	}
+
+	return 0;
+}
+
 static int dvb_dmxdev_filter_set(struct dmxdev *dmxdev,
 				 struct dmxdev_filter *dmxdevfilter,
 				 struct dmx_sct_filter_params *params)
@@ -784,7 +875,10 @@ static int dvb_dmxdev_pes_filter_set(struct dmxdev *dmxdev,
 				     struct dmxdev_filter *dmxdevfilter,
 				     struct dmx_pes_filter_params *params)
 {
+	int ret;
+
 	dvb_dmxdev_filter_stop(dmxdevfilter);
+	dvb_dmxdev_filter_reset(dmxdevfilter);
 
 	if (params->pes_type > DMX_PES_OTHER || params->pes_type < 0)
 		return -EINVAL;
@@ -795,6 +889,11 @@ static int dvb_dmxdev_pes_filter_set(struct dmxdev *dmxdev,
 
 	dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_SET);
 
+	ret = dvb_dmxdev_add_pid(dmxdev, dmxdevfilter,
+				 dmxdevfilter->params.pes.pid);
+	if (ret < 0)
+		return ret;
+
 	if (params->flags & DMX_IMMEDIATE_START)
 		return dvb_dmxdev_filter_start(dmxdevfilter);
 
@@ -958,6 +1057,24 @@ static int dvb_demux_do_ioctl(struct inode *inode, struct file *file,
 					     &((struct dmx_stc *)parg)->base);
 		break;
 
+	case DMX_ADD_PID:
+		if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		ret = dvb_dmxdev_add_pid(dmxdev, dmxdevfilter, *(u16 *)parg);
+		mutex_unlock(&dmxdevfilter->mutex);
+		break;
+
+	case DMX_REMOVE_PID:
+		if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		ret = dvb_dmxdev_remove_pid(dmxdev, dmxdevfilter, *(u16 *)parg);
+		mutex_unlock(&dmxdevfilter->mutex);
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/media/dvb/dvb-core/dmxdev.h b/drivers/media/dvb/dvb-core/dmxdev.h
index 29746e70d325..c1379b56dfb4 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.h
+++ b/drivers/media/dvb/dvb-core/dmxdev.h
@@ -53,13 +53,20 @@ enum dmxdev_state {
 	DMXDEV_STATE_TIMEDOUT
 };
 
+struct dmxdev_feed {
+	u16 pid;
+	struct dmx_ts_feed *ts;
+	struct list_head next;
+};
+
 struct dmxdev_filter {
 	union {
 		struct dmx_section_filter *sec;
 	} filter;
 
 	union {
-		struct dmx_ts_feed *ts;
+		/* list of TS and PES feeds (struct dmxdev_feed) */
+		struct list_head ts;
 		struct dmx_section_feed *sec;
 	} feed;
 
diff --git a/include/linux/dvb/dmx.h b/include/linux/dvb/dmx.h
index fef943738a24..f078f3ac82d4 100644
--- a/include/linux/dvb/dmx.h
+++ b/include/linux/dvb/dmx.h
@@ -151,5 +151,7 @@ struct dmx_stc {
 #define DMX_GET_CAPS             _IOR('o', 48, dmx_caps_t)
 #define DMX_SET_SOURCE           _IOW('o', 49, dmx_source_t)
 #define DMX_GET_STC              _IOWR('o', 50, struct dmx_stc)
+#define DMX_ADD_PID              _IOW('o', 51, __u16)
+#define DMX_REMOVE_PID           _IOW('o', 52, __u16)
 
 #endif /*_DVBDMX_H_*/
-- 
cgit v1.2.3


From 6ace40effd34331a604c5eeae90838cf8dd7eb8f Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 20 Jun 2009 09:50:39 -0300
Subject: V4L/DVB (12316): v4l: add V4L2_CAP_RDS_OUTPUT and V4L2_CAP_MODULATOR
 caps

Add capabilities to describe an FM transmitter device.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 22fc99d58d34..293ba181f95a 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -252,10 +252,12 @@ struct v4l2_capability {
 #define V4L2_CAP_RDS_CAPTURE		0x00000100  /* RDS data capture */
 #define V4L2_CAP_VIDEO_OUTPUT_OVERLAY	0x00000200  /* Can do video output overlay */
 #define V4L2_CAP_HW_FREQ_SEEK		0x00000400  /* Can do hardware frequency seek  */
+#define V4L2_CAP_RDS_OUTPUT		0x00000800  /* Is an RDS encoder */
 
 #define V4L2_CAP_TUNER			0x00010000  /* has a tuner */
 #define V4L2_CAP_AUDIO			0x00020000  /* has audio support */
 #define V4L2_CAP_RADIO			0x00040000  /* is a radio device */
+#define V4L2_CAP_MODULATOR		0x00080000  /* has a modulator */
 
 #define V4L2_CAP_READWRITE              0x01000000  /* read/write systemcalls */
 #define V4L2_CAP_ASYNCIO                0x02000000  /* async I/O */
-- 
cgit v1.2.3


From e3e1920b28d47cb18b477fc9884b889f9622fc97 Mon Sep 17 00:00:00 2001
From: Andy Walls <awalls@radix.net>
Date: Wed, 22 Jul 2009 21:02:44 -0300
Subject: V4L/DVB (12334): tuner-simple: Add an entry for the Partsnic
 PTI-5NF05 NTSC tuner

Signed-off-by: Andy Walls <awalls@radix.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/CARDLIST.tuner  |  1 +
 drivers/media/common/tuners/tuner-types.c | 25 +++++++++++++++++++++++++
 include/media/tuner.h                     |  1 +
 3 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner
index be67844074dd..ba9fa679e2d3 100644
--- a/Documentation/video4linux/CARDLIST.tuner
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -78,3 +78,4 @@ tuner=77 - TCL tuner MF02GIP-5N-E
 tuner=78 - Philips FMD1216MEX MK3 Hybrid Tuner
 tuner=79 - Philips PAL/SECAM multi (FM1216 MK5)
 tuner=80 - Philips FQ1216LME MK3 PAL/SECAM w/active loopthrough
+tuner=81 - Partsnic (Daewoo) PTI-5NF05
diff --git a/drivers/media/common/tuners/tuner-types.c b/drivers/media/common/tuners/tuner-types.c
index 6a7f1a417c27..5c6ef1e23c94 100644
--- a/drivers/media/common/tuners/tuner-types.c
+++ b/drivers/media/common/tuners/tuner-types.c
@@ -1301,6 +1301,25 @@ static struct tuner_params tuner_fq1216lme_mk3_params[] = {
 	},
 };
 
+/* ----- TUNER_PARTSNIC_PTI_5NF05 - Partsnic (Daewoo) PTI-5NF05 NTSC ----- */
+
+static struct tuner_range tuner_partsnic_pti_5nf05_ranges[] = {
+	/* The datasheet specified channel ranges and the bandswitch byte */
+	/* The control byte value of 0x8e is just a guess */
+	{ 16 * 133.25 /*MHz*/, 0x8e, 0x01, }, /* Channels    2 -    B */
+	{ 16 * 367.25 /*MHz*/, 0x8e, 0x02, }, /* Channels    C - W+11 */
+	{ 16 * 999.99        , 0x8e, 0x08, }, /* Channels W+12 -   69 */
+};
+
+static struct tuner_params tuner_partsnic_pti_5nf05_params[] = {
+	{
+		.type   = TUNER_PARAM_TYPE_NTSC,
+		.ranges = tuner_partsnic_pti_5nf05_ranges,
+		.count  = ARRAY_SIZE(tuner_partsnic_pti_5nf05_ranges),
+		.cb_first_if_lower_freq = 1, /* not specified but safe to do */
+	},
+};
+
 /* --------------------------------------------------------------------- */
 
 struct tunertype tuners[] = {
@@ -1753,6 +1772,12 @@ struct tunertype tuners[] = {
 		.params = tuner_fq1216lme_mk3_params,
 		.count  = ARRAY_SIZE(tuner_fq1216lme_mk3_params),
 	},
+
+	[TUNER_PARTSNIC_PTI_5NF05] = {
+		.name = "Partsnic (Daewoo) PTI-5NF05",
+		.params = tuner_partsnic_pti_5nf05_params,
+		.count  = ARRAY_SIZE(tuner_partsnic_pti_5nf05_params),
+	},
 };
 EXPORT_SYMBOL(tuners);
 
diff --git a/include/media/tuner.h b/include/media/tuner.h
index cbf97f45fbec..c146f2f530b0 100644
--- a/include/media/tuner.h
+++ b/include/media/tuner.h
@@ -126,6 +126,7 @@
 #define TUNER_PHILIPS_FMD1216MEX_MK3	78
 #define TUNER_PHILIPS_FM1216MK5		79
 #define TUNER_PHILIPS_FQ1216LME_MK3	80	/* Active loopthrough, no FM */
+#define TUNER_PARTSNIC_PTI_5NF05	81
 
 /* tv card specific */
 #define TDA9887_PRESENT 		(1<<0)
-- 
cgit v1.2.3


From 1b6e59e3f48eecdab97bdd1568422e22f7d2f4f5 Mon Sep 17 00:00:00 2001
From: Andy Walls <awalls@radix.net>
Date: Tue, 28 Jul 2009 11:44:05 -0300
Subject: V4L/DVB (12366): ir-kbd-i2c: Allow use of ir-kdb-i2c internal get_key
 funcs and set ir_type

This patch augments the init data passed by bridge drivers to
ir-kbd-i2c, so that the ir_type can be set explicitly, and so
ir-kbd-i2c internal get_key functions can be reused without
requiring symbols from ir-kbd-i2c in the bridge driver.

Signed-off-by: Andy Walls <awalls@radix.net>
Reviewed-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Douglas Schilling Landgraf <dougsland@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/ir-kbd-i2c.c | 31 ++++++++++++++++++++++++++++++-
 include/media/ir-kbd-i2c.h       | 17 +++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index 6e4a2341edf8..f144acc400cd 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -392,7 +392,36 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
 		ir_codes = init_data->ir_codes;
 		name = init_data->name;
-		ir->get_key = init_data->get_key;
+		if (init_data->type)
+			ir_type = init_data->type;
+
+		switch (init_data->internal_get_key_func) {
+		case IR_KBD_GET_KEY_CUSTOM:
+			/* The bridge driver provided us its own function */
+			ir->get_key = init_data->get_key;
+			break;
+		case IR_KBD_GET_KEY_PIXELVIEW:
+			ir->get_key = get_key_pixelview;
+			break;
+		case IR_KBD_GET_KEY_PV951:
+			ir->get_key = get_key_pv951;
+			break;
+		case IR_KBD_GET_KEY_HAUP:
+			ir->get_key = get_key_haup;
+			break;
+		case IR_KBD_GET_KEY_KNC1:
+			ir->get_key = get_key_knc1;
+			break;
+		case IR_KBD_GET_KEY_FUSIONHDTV:
+			ir->get_key = get_key_fusionhdtv;
+			break;
+		case IR_KBD_GET_KEY_HAUP_XVR:
+			ir->get_key = get_key_haup_xvr;
+			break;
+		case IR_KBD_GET_KEY_AVERMEDIA_CARDBUS:
+			ir->get_key = get_key_avermedia_cardbus;
+			break;
+		}
 	}
 
 	/* Make sure we are all setup before going on */
diff --git a/include/media/ir-kbd-i2c.h b/include/media/ir-kbd-i2c.h
index 3ad4ed5402fb..defef3b18dfd 100644
--- a/include/media/ir-kbd-i2c.h
+++ b/include/media/ir-kbd-i2c.h
@@ -20,10 +20,27 @@ struct IR_i2c {
 	int                    (*get_key)(struct IR_i2c*, u32*, u32*);
 };
 
+enum ir_kbd_get_key_fn {
+	IR_KBD_GET_KEY_CUSTOM = 0,
+	IR_KBD_GET_KEY_PIXELVIEW,
+	IR_KBD_GET_KEY_PV951,
+	IR_KBD_GET_KEY_HAUP,
+	IR_KBD_GET_KEY_KNC1,
+	IR_KBD_GET_KEY_FUSIONHDTV,
+	IR_KBD_GET_KEY_HAUP_XVR,
+	IR_KBD_GET_KEY_AVERMEDIA_CARDBUS,
+};
+
 /* Can be passed when instantiating an ir_video i2c device */
 struct IR_i2c_init_data {
 	IR_KEYTAB_TYPE         *ir_codes;
 	const char             *name;
+	int                    type; /* IR_TYPE_RC5, IR_TYPE_PD, etc */
+	/*
+	 * Specify either a function pointer or a value indicating one of
+	 * ir_kbd_i2c's internal get_key functions
+	 */
 	int                    (*get_key)(struct IR_i2c*, u32*, u32*);
+	enum ir_kbd_get_key_fn internal_get_key_func;
 };
 #endif
-- 
cgit v1.2.3


From 8cd9aaefad5968f8f5aff3852a67870550ce941d Mon Sep 17 00:00:00 2001
From: Devin Heitmueller <dheitmueller@linuxtv.org>
Date: Mon, 3 Aug 2009 23:56:51 -0300
Subject: V4L/DVB (12444): em28xx: add support for Terratec Cinergy Hybrid T
 USB XS remote control

Add support for the remote control that comes with the Cinergy Hybrid T USB XS

Thanks to Jelle de Jong for providing sample hardware to test with.

Cc: Jelle de Jong <jelledejong@powercraft.nl>
Signed-off-by: Devin Heitmueller <dheitmueller@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/ir-keymaps.c         | 55 +++++++++++++++++++++++++++++++
 drivers/media/video/em28xx/em28xx-cards.c |  2 ++
 include/media/ir-common.h                 |  1 +
 3 files changed, 58 insertions(+)

(limited to 'include')

diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c
index 4216328552f6..58d3e54db5a5 100644
--- a/drivers/media/common/ir-keymaps.c
+++ b/drivers/media/common/ir-keymaps.c
@@ -2751,6 +2751,61 @@ IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE] = {
 };
 EXPORT_SYMBOL_GPL(ir_codes_dm1105_nec);
 
+
+/* Terratec Cinergy Hybrid T USB XS
+   Devin Heitmueller <dheitmueller@linuxtv.org>
+ */
+IR_KEYTAB_TYPE ir_codes_terratec_cinergy_xs[IR_KEYTAB_SIZE] = {
+	[0x41] = KEY_HOME,
+	[0x01] = KEY_POWER,
+	[0x42] = KEY_MENU,
+	[0x02] = KEY_1,
+	[0x03] = KEY_2,
+	[0x04] = KEY_3,
+	[0x43] = KEY_SUBTITLE,
+	[0x05] = KEY_4,
+	[0x06] = KEY_5,
+	[0x07] = KEY_6,
+	[0x44] = KEY_TEXT,
+	[0x08] = KEY_7,
+	[0x09] = KEY_8,
+	[0x0a] = KEY_9,
+	[0x45] = KEY_DELETE,
+	[0x0b] = KEY_TUNER,
+	[0x0c] = KEY_0,
+	[0x0d] = KEY_MODE,
+	[0x46] = KEY_TV,
+	[0x47] = KEY_DVD,
+	[0x49] = KEY_VIDEO,
+	[0x4b] = KEY_AUX,
+	[0x10] = KEY_UP,
+	[0x11] = KEY_LEFT,
+	[0x12] = KEY_OK,
+	[0x13] = KEY_RIGHT,
+	[0x14] = KEY_DOWN,
+	[0x0f] = KEY_EPG,
+	[0x16] = KEY_INFO,
+	[0x4d] = KEY_BACKSPACE,
+	[0x1c] = KEY_VOLUMEUP,
+	[0x4c] = KEY_PLAY,
+	[0x1b] = KEY_CHANNELUP,
+	[0x1e] = KEY_VOLUMEDOWN,
+	[0x1d] = KEY_MUTE,
+	[0x1f] = KEY_CHANNELDOWN,
+	[0x17] = KEY_RED,
+	[0x18] = KEY_GREEN,
+	[0x19] = KEY_YELLOW,
+	[0x1a] = KEY_BLUE,
+	[0x58] = KEY_RECORD,
+	[0x48] = KEY_STOP,
+	[0x40] = KEY_PAUSE,
+	[0x54] = KEY_LAST,
+	[0x4e] = KEY_REWIND,
+	[0x4f] = KEY_FASTFORWARD,
+	[0x5c] = KEY_NEXT,
+};
+EXPORT_SYMBOL_GPL(ir_codes_terratec_cinergy_xs);
+
 /* EVGA inDtube
    Devin Heitmueller <devin.heitmueller@gmail.com>
  */
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index 02e24411b134..66c377683708 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -870,6 +870,8 @@ struct em28xx_board em28xx_boards[] = {
 		.decoder        = EM28XX_TVP5150,
 		.has_dvb        = 1,
 		.dvb_gpio       = default_digital,
+		.ir_codes       = ir_codes_terratec_cinergy_xs,
+		.xclk           = EM28XX_XCLK_FREQUENCY_12MHZ, /* NEC IR */
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
 			.vmux     = TVP5150_COMPOSITE0,
diff --git a/include/media/ir-common.h b/include/media/ir-common.h
index 9dcb632f6083..922bad37deec 100644
--- a/include/media/ir-common.h
+++ b/include/media/ir-common.h
@@ -163,6 +163,7 @@ extern IR_KEYTAB_TYPE ir_codes_kworld_plus_tv_analog[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_kaiomy[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE];
+extern IR_KEYTAB_TYPE ir_codes_terratec_cinergy_xs[IR_KEYTAB_SIZE];
 
 #endif
 
-- 
cgit v1.2.3


From ecfcfec80493097967aa40e3433d65a8ff65c86b Mon Sep 17 00:00:00 2001
From: "Igor M. Liplianin" <liplianin@me.by>
Date: Thu, 13 Aug 2009 21:42:21 -0300
Subject: V4L/DVB (12463): Add support for Compro VideoMate S350 DVB-S PCI
 card.

Add Compro VideoMate S350 DVB-S driver.
The card uses zl10313, zl10039, saa7130 integrated circuits.

Signed-off-by: Igor M. Liplianin <liplianin@me.by>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/ir-keymaps.c           | 48 +++++++++++++++++++++++++++++
 drivers/media/video/saa7134/Kconfig         |  1 +
 drivers/media/video/saa7134/saa7134-cards.c | 31 ++++++++++++++++++-
 drivers/media/video/saa7134/saa7134-dvb.c   | 15 +++++++++
 drivers/media/video/saa7134/saa7134-input.c |  5 +++
 drivers/media/video/saa7134/saa7134.h       |  1 +
 include/media/ir-common.h                   |  1 +
 7 files changed, 101 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c
index 58d3e54db5a5..c222b7f15184 100644
--- a/drivers/media/common/ir-keymaps.c
+++ b/drivers/media/common/ir-keymaps.c
@@ -2828,3 +2828,51 @@ IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE] = {
 	[0x13] = KEY_CAMERA,
 };
 EXPORT_SYMBOL_GPL(ir_codes_evga_indtube);
+
+IR_KEYTAB_TYPE ir_codes_videomate_s350[IR_KEYTAB_SIZE] = {
+	[0x00] = KEY_TV,
+	[0x01] = KEY_DVD,
+	[0x04] = KEY_RECORD,
+	[0x05] = KEY_VIDEO, /* TV/Video */
+	[0x07] = KEY_STOP,
+	[0x08] = KEY_PLAYPAUSE,
+	[0x0a] = KEY_REWIND,
+	[0x0f] = KEY_FASTFORWARD,
+	[0x10] = KEY_CHANNELUP,
+	[0x12] = KEY_VOLUMEUP,
+	[0x13] = KEY_CHANNELDOWN,
+	[0x14] = KEY_MUTE,
+	[0x15] = KEY_VOLUMEDOWN,
+	[0x16] = KEY_1,
+	[0x17] = KEY_2,
+	[0x18] = KEY_3,
+	[0x19] = KEY_4,
+	[0x1a] = KEY_5,
+	[0x1b] = KEY_6,
+	[0x1c] = KEY_7,
+	[0x1d] = KEY_8,
+	[0x1e] = KEY_9,
+	[0x1f] = KEY_0,
+	[0x21] = KEY_SLEEP,
+	[0x24] = KEY_ZOOM,
+	[0x25] = KEY_LAST,    /* Recall */
+	[0x26] = KEY_SUBTITLE, /* CC */
+	[0x27] = KEY_LANGUAGE, /* MTS */
+	[0x29] = KEY_CHANNEL, /* SURF */
+	[0x2b] = KEY_A,
+	[0x2c] = KEY_B,
+	[0x2f] = KEY_SHUFFLE, /* Snapshot */
+	[0x23] = KEY_RADIO,
+	[0x02] = KEY_PREVIOUSSONG,
+	[0x06] = KEY_NEXTSONG,
+	[0x03] = KEY_EPG,
+	[0x09] = KEY_SETUP,
+	[0x22] = KEY_BACKSPACE,
+	[0x0c] = KEY_UP,
+	[0x0e] = KEY_DOWN,
+	[0x0b] = KEY_LEFT,
+	[0x0d] = KEY_RIGHT,
+	[0x11] = KEY_ENTER,
+	[0x20] = KEY_TEXT,
+};
+EXPORT_SYMBOL_GPL(ir_codes_videomate_s350);
diff --git a/drivers/media/video/saa7134/Kconfig b/drivers/media/video/saa7134/Kconfig
index 5bcce092e804..22bfd62c9551 100644
--- a/drivers/media/video/saa7134/Kconfig
+++ b/drivers/media/video/saa7134/Kconfig
@@ -47,6 +47,7 @@ config VIDEO_SAA7134_DVB
 	select DVB_TDA10048 if !DVB_FE_CUSTOMISE
 	select MEDIA_TUNER_TDA18271 if !MEDIA_TUNER_CUSTOMISE
 	select MEDIA_TUNER_TDA8290 if !MEDIA_TUNER_CUSTOMISE
+	select DVB_ZL10039 if !DVB_FE_CUSTOMISE
 	---help---
 	  This adds support for DVB cards based on the
 	  Philips saa7134 chip.
diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c
index e1d89b232392..b15d696dff47 100644
--- a/drivers/media/video/saa7134/saa7134-cards.c
+++ b/drivers/media/video/saa7134/saa7134-cards.c
@@ -5116,6 +5116,25 @@ struct saa7134_board saa7134_boards[] = {
 			.gpio = 0x00,
 		},
 	},
+	[SAA7134_BOARD_VIDEOMATE_S350] = {
+		/* Jan D. Louw <jd.louw@mweb.co.za */
+		.name		= "Compro VideoMate S350/S300",
+		.audio_clock	= 0x00187de7,
+		.tuner_type	= TUNER_ABSENT,
+		.radio_type	= UNSET,
+		.tuner_addr	= ADDR_UNSET,
+		.radio_addr	= ADDR_UNSET,
+		.mpeg		= SAA7134_MPEG_DVB,
+		.inputs = { {
+			.name	= name_comp1,
+			.vmux	= 0,
+			.amux	= LINE1,
+		}, {
+			.name	= name_svideo,
+			.vmux	= 8, /* Not tested */
+			.amux	= LINE1
+		} },
+	},
 };
 
 const unsigned int saa7134_bcount = ARRAY_SIZE(saa7134_boards);
@@ -6223,7 +6242,12 @@ struct pci_device_id saa7134_pci_tbl[] = {
 		.subvendor    = 0x1461, /* Avermedia Technologies Inc */
 		.subdevice    = 0xf31d,
 		.driver_data  = SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS,
-
+	}, {
+		.vendor       = PCI_VENDOR_ID_PHILIPS,
+		.device       = PCI_DEVICE_ID_PHILIPS_SAA7130,
+		.subvendor    = 0x185b,
+		.subdevice    = 0xc900,
+		.driver_data  = SAA7134_BOARD_VIDEOMATE_S350,
 	}, {
 		/* --- boards without eeprom + subsystem ID --- */
 		.vendor       = PCI_VENDOR_ID_PHILIPS,
@@ -6673,6 +6697,11 @@ int saa7134_board_init1(struct saa7134_dev *dev)
 		saa_andorl(SAA7134_GPIO_GPMODE0 >> 2,   0x80040100, 0x80040100);
 		saa_andorl(SAA7134_GPIO_GPSTATUS0 >> 2, 0x80040100, 0x00040100);
 		break;
+	case SAA7134_BOARD_VIDEOMATE_S350:
+		dev->has_remote = SAA7134_REMOTE_GPIO;
+		saa_andorl(SAA7134_GPIO_GPMODE0 >> 2,   0x00008000, 0x00008000);
+		saa_andorl(SAA7134_GPIO_GPSTATUS0 >> 2, 0x00008000, 0x00008000);
+		break;
 	}
 	return 0;
 }
diff --git a/drivers/media/video/saa7134/saa7134-dvb.c b/drivers/media/video/saa7134/saa7134-dvb.c
index 2ac3f1f2f53e..ebde21dba7e3 100644
--- a/drivers/media/video/saa7134/saa7134-dvb.c
+++ b/drivers/media/video/saa7134/saa7134-dvb.c
@@ -56,6 +56,7 @@
 #include "zl10353.h"
 
 #include "zl10036.h"
+#include "zl10039.h"
 #include "mt312.h"
 
 MODULE_AUTHOR("Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]");
@@ -968,6 +969,10 @@ static struct zl10036_config avertv_a700_tuner = {
 	.tuner_address = 0x60,
 };
 
+static struct mt312_config zl10313_compro_s350_config = {
+	.demod_address = 0x0e,
+};
+
 static struct lgdt3305_config hcw_lgdt3305_config = {
 	.i2c_addr           = 0x0e,
 	.mpeg_mode          = LGDT3305_MPEG_SERIAL,
@@ -1472,6 +1477,16 @@ static int dvb_init(struct saa7134_dev *dev)
 					__func__);
 			}
 		}
+		break;
+	case SAA7134_BOARD_VIDEOMATE_S350:
+		fe0->dvb.frontend = dvb_attach(mt312_attach,
+				&zl10313_compro_s350_config, &dev->i2c_adap);
+		if (fe0->dvb.frontend)
+			if (dvb_attach(zl10039_attach, fe0->dvb.frontend,
+					0x60, &dev->i2c_adap) == NULL)
+				wprintk("%s: No zl10039 found!\n",
+					__func__);
+
 		break;
 	default:
 		wprintk("Huh? unknown DVB card?\n");
diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c
index 6e219c2db841..059661fd1634 100644
--- a/drivers/media/video/saa7134/saa7134-input.c
+++ b/drivers/media/video/saa7134/saa7134-input.c
@@ -605,6 +605,11 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 		mask_keycode = 0x7f;
 		polling = 40; /* ms */
 		break;
+	case SAA7134_BOARD_VIDEOMATE_S350:
+		ir_codes     = ir_codes_videomate_s350;
+		mask_keycode = 0x003f00;
+		mask_keydown = 0x040000;
+		break;
 	}
 	if (NULL == ir_codes) {
 		printk("%s: Oops: IR config error [card=%d]\n",
diff --git a/drivers/media/video/saa7134/saa7134.h b/drivers/media/video/saa7134/saa7134.h
index ab28a691e09a..de9a7dd6508a 100644
--- a/drivers/media/video/saa7134/saa7134.h
+++ b/drivers/media/video/saa7134/saa7134.h
@@ -292,6 +292,7 @@ struct saa7134_format {
 #define SAA7134_BOARD_BEHOLD_607RDS_MK5     166
 #define SAA7134_BOARD_BEHOLD_609RDS_MK3     167
 #define SAA7134_BOARD_BEHOLD_609RDS_MK5     168
+#define SAA7134_BOARD_VIDEOMATE_S350        169
 
 #define SAA7134_MAXBOARDS 32
 #define SAA7134_INPUT_MAX 8
diff --git a/include/media/ir-common.h b/include/media/ir-common.h
index 922bad37deec..8a607db492a5 100644
--- a/include/media/ir-common.h
+++ b/include/media/ir-common.h
@@ -164,6 +164,7 @@ extern IR_KEYTAB_TYPE ir_codes_kaiomy[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_terratec_cinergy_xs[IR_KEYTAB_SIZE];
+extern IR_KEYTAB_TYPE ir_codes_videomate_s350[IR_KEYTAB_SIZE];
 
 #endif
 
-- 
cgit v1.2.3


From 6b5a9492ca0c991bab1ac495624e17520e9edf18 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 11 Aug 2009 18:47:18 -0300
Subject: V4L/DVB (12543): v4l: introduce string control support.

The upcoming RDS encoder needs support for string controls. This patch
implements the core implementation.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-common.c         |  2 +
 drivers/media/video/v4l2-compat-ioctl32.c | 65 ++++++++++++++++++++++---------
 drivers/media/video/v4l2-ioctl.c          | 10 ++---
 include/linux/videodev2.h                 |  6 ++-
 4 files changed, 57 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index b91d66a767d7..536150c34067 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -156,6 +156,8 @@ int v4l2_ctrl_check(struct v4l2_ext_control *ctrl, struct v4l2_queryctrl *qctrl,
 		return -EINVAL;
 	if (qctrl->flags & V4L2_CTRL_FLAG_GRABBED)
 		return -EBUSY;
+	if (qctrl->type == V4L2_CTRL_TYPE_STRING)
+		return 0;
 	if (qctrl->type == V4L2_CTRL_TYPE_BUTTON ||
 	    qctrl->type == V4L2_CTRL_TYPE_INTEGER64 ||
 	    qctrl->type == V4L2_CTRL_TYPE_CTRL_CLASS)
diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index 0056b115b42e..f788c41a3a5c 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -600,9 +600,35 @@ struct v4l2_ext_controls32 {
        compat_caddr_t controls; /* actually struct v4l2_ext_control32 * */
 };
 
+struct v4l2_ext_control32 {
+	__u32 id;
+	__u32 size;
+	__u32 reserved2[1];
+	union {
+		__s32 value;
+		__s64 value64;
+		compat_caddr_t string; /* actually char * */
+	};
+} __attribute__ ((packed));
+
+/* The following function really belong in v4l2-common, but that causes
+   a circular dependency between modules. We need to think about this, but
+   for now this will do. */
+
+/* Return non-zero if this control is a pointer type. Currently only
+ * type STRING is a pointer type.
+ *
+ * Note that there are currently no controls of this type, but at least the
+ * compat32 code is in place to properly handle such controls. Please
+ * remove this note once the first pointer controls are added. */
+static inline int ctrl_is_pointer(u32 id)
+{
+	return 0;
+}
+
 static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up)
 {
-	struct v4l2_ext_control __user *ucontrols;
+	struct v4l2_ext_control32 __user *ucontrols;
 	struct v4l2_ext_control __user *kcontrols;
 	int n;
 	compat_caddr_t p;
@@ -626,15 +652,17 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext
 	kcontrols = compat_alloc_user_space(n * sizeof(struct v4l2_ext_control));
 	kp->controls = kcontrols;
 	while (--n >= 0) {
-		if (copy_in_user(&kcontrols->id, &ucontrols->id, sizeof(__u32)))
-			return -EFAULT;
-		if (copy_in_user(&kcontrols->reserved2, &ucontrols->reserved2, sizeof(ucontrols->reserved2)))
-			return -EFAULT;
-		/* Note: if the void * part of the union ever becomes relevant
-		   then we need to know the type of the control in order to do
-		   the right thing here. Luckily, that is not yet an issue. */
-		if (copy_in_user(&kcontrols->value, &ucontrols->value, sizeof(ucontrols->value)))
+		if (copy_in_user(kcontrols, ucontrols, sizeof(*kcontrols)))
 			return -EFAULT;
+		if (ctrl_is_pointer(kcontrols->id)) {
+			void __user *s;
+
+			if (get_user(p, &ucontrols->string))
+				return -EFAULT;
+			s = compat_ptr(p);
+			if (put_user(s, &kcontrols->string))
+				return -EFAULT;
+		}
 		ucontrols++;
 		kcontrols++;
 	}
@@ -643,7 +671,7 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext
 
 static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up)
 {
-	struct v4l2_ext_control __user *ucontrols;
+	struct v4l2_ext_control32 __user *ucontrols;
 	struct v4l2_ext_control __user *kcontrols = kp->controls;
 	int n = kp->count;
 	compat_caddr_t p;
@@ -664,15 +692,14 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext
 		return -EFAULT;
 
 	while (--n >= 0) {
-		if (copy_in_user(&ucontrols->id, &kcontrols->id, sizeof(__u32)))
-			return -EFAULT;
-		if (copy_in_user(&ucontrols->reserved2, &kcontrols->reserved2,
-					sizeof(ucontrols->reserved2)))
-			return -EFAULT;
-		/* Note: if the void * part of the union ever becomes relevant
-		   then we need to know the type of the control in order to do
-		   the right thing here. Luckily, that is not yet an issue. */
-		if (copy_in_user(&ucontrols->value, &kcontrols->value, sizeof(ucontrols->value)))
+		unsigned size = sizeof(*ucontrols);
+
+		/* Do not modify the pointer when copying a pointer control.
+		   The contents of the pointer was changed, not the pointer
+		   itself. */
+		if (ctrl_is_pointer(kcontrols->id))
+			size -= sizeof(ucontrols->value64);
+		if (copy_in_user(ucontrols, kcontrols, size))
 			return -EFAULT;
 		ucontrols++;
 		kcontrols++;
diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index c32e67608ad4..30cc3347ae52 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -513,11 +513,12 @@ static inline void v4l_print_ext_ctrls(unsigned int cmd,
 	dbgarg(cmd, "");
 	printk(KERN_CONT "class=0x%x", c->ctrl_class);
 	for (i = 0; i < c->count; i++) {
-		if (show_vals)
+		if (show_vals && !c->controls[i].size)
 			printk(KERN_CONT " id/val=0x%x/0x%x",
 				c->controls[i].id, c->controls[i].value);
 		else
-			printk(KERN_CONT " id=0x%x", c->controls[i].id);
+			printk(KERN_CONT " id=0x%x,size=%u",
+				c->controls[i].id, c->controls[i].size);
 	}
 	printk(KERN_CONT "\n");
 };
@@ -528,10 +529,9 @@ static inline int check_ext_ctrls(struct v4l2_ext_controls *c, int allow_priv)
 
 	/* zero the reserved fields */
 	c->reserved[0] = c->reserved[1] = 0;
-	for (i = 0; i < c->count; i++) {
+	for (i = 0; i < c->count; i++)
 		c->controls[i].reserved2[0] = 0;
-		c->controls[i].reserved2[1] = 0;
-	}
+
 	/* V4L2_CID_PRIVATE_BASE cannot be used as control class
 	   when using extended controls.
 	   Only when passed in through VIDIOC_G_CTRL and VIDIOC_S_CTRL
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 293ba181f95a..9ab4cbe676fd 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -167,6 +167,7 @@ enum v4l2_ctrl_type {
 	V4L2_CTRL_TYPE_BUTTON	     = 4,
 	V4L2_CTRL_TYPE_INTEGER64     = 5,
 	V4L2_CTRL_TYPE_CTRL_CLASS    = 6,
+	V4L2_CTRL_TYPE_STRING        = 7,
 };
 
 enum v4l2_tuner_type {
@@ -795,11 +796,12 @@ struct v4l2_control {
 
 struct v4l2_ext_control {
 	__u32 id;
-	__u32 reserved2[2];
+	__u32 size;
+	__u32 reserved2[1];
 	union {
 		__s32 value;
 		__s64 value64;
-		void *reserved;
+		char *string;
 	};
 } __attribute__ ((packed));
 
-- 
cgit v1.2.3


From d6bacea6cfc3bb8a385f8c4104ee6b3d5768af8a Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Sat, 8 Aug 2009 08:34:18 -0300
Subject: V4L/DVB (12547): v4l2-subdev.h: Add g/s_modulator callbacks to subdev
 api

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-subdev.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 89a39ce17294..d411345f244b 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -137,6 +137,8 @@ struct v4l2_subdev_tuner_ops {
 	int (*g_frequency)(struct v4l2_subdev *sd, struct v4l2_frequency *freq);
 	int (*g_tuner)(struct v4l2_subdev *sd, struct v4l2_tuner *vt);
 	int (*s_tuner)(struct v4l2_subdev *sd, struct v4l2_tuner *vt);
+	int (*g_modulator)(struct v4l2_subdev *sd, struct v4l2_modulator *vm);
+	int (*s_modulator)(struct v4l2_subdev *sd, struct v4l2_modulator *vm);
 	int (*s_type_addr)(struct v4l2_subdev *sd, struct tuner_setup *type);
 	int (*s_config)(struct v4l2_subdev *sd, const struct v4l2_priv_tun_config *config);
 	int (*s_standby)(struct v4l2_subdev *sd);
-- 
cgit v1.2.3


From 6b4249413abb634d9b2ff44c685da744f02e49d1 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Sat, 8 Aug 2009 08:38:58 -0300
Subject: V4L/DVB (12548): v4l2: video device: Add V4L2_CTRL_CLASS_FM_TX
 controls

This patch adds a new class of extended controls. This class
is intended to support FM Radio Modulators properties such as:
rds, audio limiters, audio compression, pilot tone generation,
tuning power levels and preemphasis properties.

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 9ab4cbe676fd..5758066cb57b 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -817,6 +817,7 @@ struct v4l2_ext_controls {
 #define V4L2_CTRL_CLASS_USER 0x00980000	/* Old-style 'user' controls */
 #define V4L2_CTRL_CLASS_MPEG 0x00990000	/* MPEG-compression controls */
 #define V4L2_CTRL_CLASS_CAMERA 0x009a0000	/* Camera class controls */
+#define V4L2_CTRL_CLASS_FM_TX 0x009b0000	/* FM Modulator control class */
 
 #define V4L2_CTRL_ID_MASK      	  (0x0fffffff)
 #define V4L2_CTRL_ID2CLASS(id)    ((id) & 0x0fff0000UL)
@@ -1156,6 +1157,39 @@ enum  v4l2_exposure_auto_type {
 
 #define V4L2_CID_PRIVACY			(V4L2_CID_CAMERA_CLASS_BASE+16)
 
+/* FM Modulator class control IDs */
+#define V4L2_CID_FM_TX_CLASS_BASE		(V4L2_CTRL_CLASS_FM_TX | 0x900)
+#define V4L2_CID_FM_TX_CLASS			(V4L2_CTRL_CLASS_FM_TX | 1)
+
+#define V4L2_CID_RDS_TX_DEVIATION		(V4L2_CID_FM_TX_CLASS_BASE + 1)
+#define V4L2_CID_RDS_TX_PI			(V4L2_CID_FM_TX_CLASS_BASE + 2)
+#define V4L2_CID_RDS_TX_PTY			(V4L2_CID_FM_TX_CLASS_BASE + 3)
+#define V4L2_CID_RDS_TX_PS_NAME			(V4L2_CID_FM_TX_CLASS_BASE + 5)
+#define V4L2_CID_RDS_TX_RADIO_TEXT		(V4L2_CID_FM_TX_CLASS_BASE + 6)
+
+#define V4L2_CID_AUDIO_LIMITER_ENABLED		(V4L2_CID_FM_TX_CLASS_BASE + 64)
+#define V4L2_CID_AUDIO_LIMITER_RELEASE_TIME	(V4L2_CID_FM_TX_CLASS_BASE + 65)
+#define V4L2_CID_AUDIO_LIMITER_DEVIATION	(V4L2_CID_FM_TX_CLASS_BASE + 66)
+
+#define V4L2_CID_AUDIO_COMPRESSION_ENABLED	(V4L2_CID_FM_TX_CLASS_BASE + 80)
+#define V4L2_CID_AUDIO_COMPRESSION_GAIN		(V4L2_CID_FM_TX_CLASS_BASE + 81)
+#define V4L2_CID_AUDIO_COMPRESSION_THRESHOLD	(V4L2_CID_FM_TX_CLASS_BASE + 82)
+#define V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME	(V4L2_CID_FM_TX_CLASS_BASE + 83)
+#define V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME	(V4L2_CID_FM_TX_CLASS_BASE + 84)
+
+#define V4L2_CID_PILOT_TONE_ENABLED		(V4L2_CID_FM_TX_CLASS_BASE + 96)
+#define V4L2_CID_PILOT_TONE_DEVIATION		(V4L2_CID_FM_TX_CLASS_BASE + 97)
+#define V4L2_CID_PILOT_TONE_FREQUENCY		(V4L2_CID_FM_TX_CLASS_BASE + 98)
+
+#define V4L2_CID_TUNE_PREEMPHASIS		(V4L2_CID_FM_TX_CLASS_BASE + 112)
+enum v4l2_preemphasis {
+	V4L2_PREEMPHASIS_DISABLED	= 0,
+	V4L2_PREEMPHASIS_50_uS		= 1,
+	V4L2_PREEMPHASIS_75_uS		= 2,
+};
+#define V4L2_CID_TUNE_POWER_LEVEL		(V4L2_CID_FM_TX_CLASS_BASE + 113)
+#define V4L2_CID_TUNE_ANTENNA_CAPACITOR		(V4L2_CID_FM_TX_CLASS_BASE + 114)
+
 /*
  *	T U N I N G
  */
-- 
cgit v1.2.3


From 1fd2121c08eeef2e9a792719628a467e0fe97b96 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Sat, 8 Aug 2009 08:45:49 -0300
Subject: V4L/DVB (12551): FM TX: si4713: Add files to add radio interface for
 si4713

This patch adds files which creates the radio interface
for si4713 FM transmitter (modulator) devices.

In order to do the real access to device registers, this
driver uses the v4l2 subdev interface exported by si4713 i2c driver.

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/radio/radio-si4713.c | 367 +++++++++++++++++++++++++++++++++++++
 include/media/radio-si4713.h       |  30 +++
 2 files changed, 397 insertions(+)
 create mode 100644 drivers/media/radio/radio-si4713.c
 create mode 100644 include/media/radio-si4713.h

(limited to 'include')

diff --git a/drivers/media/radio/radio-si4713.c b/drivers/media/radio/radio-si4713.c
new file mode 100644
index 000000000000..65c14b704586
--- /dev/null
+++ b/drivers/media/radio/radio-si4713.c
@@ -0,0 +1,367 @@
+/*
+ * drivers/media/radio/radio-si4713.c
+ *
+ * Platform Driver for Silicon Labs Si4713 FM Radio Transmitter:
+ *
+ * Copyright (c) 2008 Instituto Nokia de Tecnologia - INdT
+ * Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-common.h>
+#include <media/v4l2-ioctl.h>
+#include <media/radio-si4713.h>
+
+/* module parameters */
+static int radio_nr = -1;	/* radio device minor (-1 ==> auto assign) */
+module_param(radio_nr, int, 0);
+MODULE_PARM_DESC(radio_nr,
+		 "Minor number for radio device (-1 ==> auto assign)");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eduardo Valentin <eduardo.valentin@nokia.com>");
+MODULE_DESCRIPTION("Platform driver for Si4713 FM Radio Transmitter");
+MODULE_VERSION("0.0.1");
+
+/* Driver state struct */
+struct radio_si4713_device {
+	struct v4l2_device		v4l2_dev;
+	struct video_device		*radio_dev;
+};
+
+/* radio_si4713_fops - file operations interface */
+static const struct v4l2_file_operations radio_si4713_fops = {
+	.owner		= THIS_MODULE,
+	.ioctl		= video_ioctl2,
+};
+
+/* Video4Linux Interface */
+static int radio_si4713_fill_audout(struct v4l2_audioout *vao)
+{
+	/* TODO: check presence of audio output */
+	strlcpy(vao->name, "FM Modulator Audio Out", 32);
+
+	return 0;
+}
+
+static int radio_si4713_enumaudout(struct file *file, void *priv,
+						struct v4l2_audioout *vao)
+{
+	return radio_si4713_fill_audout(vao);
+}
+
+static int radio_si4713_g_audout(struct file *file, void *priv,
+					struct v4l2_audioout *vao)
+{
+	int rval = radio_si4713_fill_audout(vao);
+
+	vao->index = 0;
+
+	return rval;
+}
+
+static int radio_si4713_s_audout(struct file *file, void *priv,
+					struct v4l2_audioout *vao)
+{
+	return vao->index ? -EINVAL : 0;
+}
+
+/* radio_si4713_querycap - query device capabilities */
+static int radio_si4713_querycap(struct file *file, void *priv,
+					struct v4l2_capability *capability)
+{
+	struct radio_si4713_device *rsdev;
+
+	rsdev = video_get_drvdata(video_devdata(file));
+
+	strlcpy(capability->driver, "radio-si4713", sizeof(capability->driver));
+	strlcpy(capability->card, "Silicon Labs Si4713 Modulator",
+				sizeof(capability->card));
+	capability->capabilities = V4L2_CAP_MODULATOR | V4L2_CAP_RDS_OUTPUT;
+
+	return 0;
+}
+
+/* radio_si4713_queryctrl - enumerate control items */
+static int radio_si4713_queryctrl(struct file *file, void *priv,
+						struct v4l2_queryctrl *qc)
+{
+	/* Must be sorted from low to high control ID! */
+	static const u32 user_ctrls[] = {
+		V4L2_CID_USER_CLASS,
+		V4L2_CID_AUDIO_MUTE,
+		0
+	};
+
+	/* Must be sorted from low to high control ID! */
+	static const u32 fmtx_ctrls[] = {
+		V4L2_CID_FM_TX_CLASS,
+		V4L2_CID_RDS_TX_DEVIATION,
+		V4L2_CID_RDS_TX_PI,
+		V4L2_CID_RDS_TX_PTY,
+		V4L2_CID_RDS_TX_PS_NAME,
+		V4L2_CID_RDS_TX_RADIO_TEXT,
+		V4L2_CID_AUDIO_LIMITER_ENABLED,
+		V4L2_CID_AUDIO_LIMITER_RELEASE_TIME,
+		V4L2_CID_AUDIO_LIMITER_DEVIATION,
+		V4L2_CID_AUDIO_COMPRESSION_ENABLED,
+		V4L2_CID_AUDIO_COMPRESSION_GAIN,
+		V4L2_CID_AUDIO_COMPRESSION_THRESHOLD,
+		V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME,
+		V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME,
+		V4L2_CID_PILOT_TONE_ENABLED,
+		V4L2_CID_PILOT_TONE_DEVIATION,
+		V4L2_CID_PILOT_TONE_FREQUENCY,
+		V4L2_CID_TUNE_PREEMPHASIS,
+		V4L2_CID_TUNE_POWER_LEVEL,
+		V4L2_CID_TUNE_ANTENNA_CAPACITOR,
+		0
+	};
+	static const u32 *ctrl_classes[] = {
+		user_ctrls,
+		fmtx_ctrls,
+		NULL
+	};
+	struct radio_si4713_device *rsdev;
+
+	rsdev = video_get_drvdata(video_devdata(file));
+
+	qc->id = v4l2_ctrl_next(ctrl_classes, qc->id);
+	if (qc->id == 0)
+		return -EINVAL;
+
+	if (qc->id == V4L2_CID_USER_CLASS || qc->id == V4L2_CID_FM_TX_CLASS)
+		return v4l2_ctrl_query_fill(qc, 0, 0, 0, 0);
+
+	return v4l2_device_call_until_err(&rsdev->v4l2_dev, 0, core,
+						queryctrl, qc);
+}
+
+/*
+ * v4l2 ioctl call backs.
+ * we are just a wrapper for v4l2_sub_devs.
+ */
+static inline struct v4l2_device *get_v4l2_dev(struct file *file)
+{
+	return &((struct radio_si4713_device *)video_drvdata(file))->v4l2_dev;
+}
+
+static int radio_si4713_g_ext_ctrls(struct file *file, void *p,
+						struct v4l2_ext_controls *vecs)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, core,
+							g_ext_ctrls, vecs);
+}
+
+static int radio_si4713_s_ext_ctrls(struct file *file, void *p,
+						struct v4l2_ext_controls *vecs)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, core,
+							s_ext_ctrls, vecs);
+}
+
+static int radio_si4713_g_ctrl(struct file *file, void *p,
+						struct v4l2_control *vc)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, core,
+							g_ctrl, vc);
+}
+
+static int radio_si4713_s_ctrl(struct file *file, void *p,
+						struct v4l2_control *vc)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, core,
+							s_ctrl, vc);
+}
+
+static int radio_si4713_g_modulator(struct file *file, void *p,
+						struct v4l2_modulator *vm)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, tuner,
+							g_modulator, vm);
+}
+
+static int radio_si4713_s_modulator(struct file *file, void *p,
+						struct v4l2_modulator *vm)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, tuner,
+							s_modulator, vm);
+}
+
+static int radio_si4713_g_frequency(struct file *file, void *p,
+						struct v4l2_frequency *vf)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, tuner,
+							g_frequency, vf);
+}
+
+static int radio_si4713_s_frequency(struct file *file, void *p,
+						struct v4l2_frequency *vf)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, tuner,
+							s_frequency, vf);
+}
+
+static long radio_si4713_default(struct file *file, void *p, int cmd, void *arg)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, core,
+							ioctl, cmd, arg);
+}
+
+static struct v4l2_ioctl_ops radio_si4713_ioctl_ops = {
+	.vidioc_enumaudout	= radio_si4713_enumaudout,
+	.vidioc_g_audout	= radio_si4713_g_audout,
+	.vidioc_s_audout	= radio_si4713_s_audout,
+	.vidioc_querycap	= radio_si4713_querycap,
+	.vidioc_queryctrl	= radio_si4713_queryctrl,
+	.vidioc_g_ext_ctrls	= radio_si4713_g_ext_ctrls,
+	.vidioc_s_ext_ctrls	= radio_si4713_s_ext_ctrls,
+	.vidioc_g_ctrl		= radio_si4713_g_ctrl,
+	.vidioc_s_ctrl		= radio_si4713_s_ctrl,
+	.vidioc_g_modulator	= radio_si4713_g_modulator,
+	.vidioc_s_modulator	= radio_si4713_s_modulator,
+	.vidioc_g_frequency	= radio_si4713_g_frequency,
+	.vidioc_s_frequency	= radio_si4713_s_frequency,
+	.vidioc_default		= radio_si4713_default,
+};
+
+/* radio_si4713_vdev_template - video device interface */
+static struct video_device radio_si4713_vdev_template = {
+	.fops			= &radio_si4713_fops,
+	.name			= "radio-si4713",
+	.release		= video_device_release,
+	.ioctl_ops		= &radio_si4713_ioctl_ops,
+};
+
+/* Platform driver interface */
+/* radio_si4713_pdriver_probe - probe for the device */
+static int radio_si4713_pdriver_probe(struct platform_device *pdev)
+{
+	struct radio_si4713_platform_data *pdata = pdev->dev.platform_data;
+	struct radio_si4713_device *rsdev;
+	struct i2c_adapter *adapter;
+	struct v4l2_subdev *sd;
+	int rval = 0;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "Cannot proceed without platform data.\n");
+		rval = -EINVAL;
+		goto exit;
+	}
+
+	rsdev = kzalloc(sizeof *rsdev, GFP_KERNEL);
+	if (!rsdev) {
+		dev_err(&pdev->dev, "Failed to alloc video device.\n");
+		rval = -ENOMEM;
+		goto exit;
+	}
+
+	rval = v4l2_device_register(&pdev->dev, &rsdev->v4l2_dev);
+	if (rval) {
+		dev_err(&pdev->dev, "Failed to register v4l2 device.\n");
+		goto free_rsdev;
+	}
+
+	adapter = i2c_get_adapter(pdata->i2c_bus);
+	if (!adapter) {
+		dev_err(&pdev->dev, "Cannot get i2c adapter %d\n",
+							pdata->i2c_bus);
+		rval = -ENODEV;
+		goto unregister_v4l2_dev;
+	}
+
+	sd = v4l2_i2c_new_subdev_board(&rsdev->v4l2_dev, adapter, "si4713_i2c",
+					pdata->subdev_board_info, NULL);
+	if (!sd) {
+		dev_err(&pdev->dev, "Cannot get v4l2 subdevice\n");
+		rval = -ENODEV;
+		goto unregister_v4l2_dev;
+	}
+
+	rsdev->radio_dev = video_device_alloc();
+	if (!rsdev->radio_dev) {
+		dev_err(&pdev->dev, "Failed to alloc video device.\n");
+		rval = -ENOMEM;
+		goto unregister_v4l2_dev;
+	}
+
+	memcpy(rsdev->radio_dev, &radio_si4713_vdev_template,
+			sizeof(radio_si4713_vdev_template));
+	video_set_drvdata(rsdev->radio_dev, rsdev);
+	if (video_register_device(rsdev->radio_dev, VFL_TYPE_RADIO, radio_nr)) {
+		dev_err(&pdev->dev, "Could not register video device.\n");
+		rval = -EIO;
+		goto free_vdev;
+	}
+	dev_info(&pdev->dev, "New device successfully probed\n");
+
+	goto exit;
+
+free_vdev:
+	video_device_release(rsdev->radio_dev);
+unregister_v4l2_dev:
+	v4l2_device_unregister(&rsdev->v4l2_dev);
+free_rsdev:
+	kfree(rsdev);
+exit:
+	return rval;
+}
+
+/* radio_si4713_pdriver_remove - remove the device */
+static int __exit radio_si4713_pdriver_remove(struct platform_device *pdev)
+{
+	struct v4l2_device *v4l2_dev = platform_get_drvdata(pdev);
+	struct radio_si4713_device *rsdev = container_of(v4l2_dev,
+						struct radio_si4713_device,
+						v4l2_dev);
+
+	video_unregister_device(rsdev->radio_dev);
+	v4l2_device_unregister(&rsdev->v4l2_dev);
+	kfree(rsdev);
+
+	return 0;
+}
+
+static struct platform_driver radio_si4713_pdriver = {
+	.driver		= {
+		.name	= "radio-si4713",
+	},
+	.probe		= radio_si4713_pdriver_probe,
+	.remove         = __exit_p(radio_si4713_pdriver_remove),
+};
+
+/* Module Interface */
+static int __init radio_si4713_module_init(void)
+{
+	return platform_driver_register(&radio_si4713_pdriver);
+}
+
+static void __exit radio_si4713_module_exit(void)
+{
+	platform_driver_unregister(&radio_si4713_pdriver);
+}
+
+module_init(radio_si4713_module_init);
+module_exit(radio_si4713_module_exit);
+
diff --git a/include/media/radio-si4713.h b/include/media/radio-si4713.h
new file mode 100644
index 000000000000..f6aae29c7741
--- /dev/null
+++ b/include/media/radio-si4713.h
@@ -0,0 +1,30 @@
+/*
+ * include/media/radio-si4713.h
+ *
+ * Board related data definitions for Si4713 radio transmitter chip.
+ *
+ * Copyright (c) 2009 Nokia Corporation
+ * Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ */
+
+#ifndef RADIO_SI4713_H
+#define RADIO_SI4713_H
+
+#include <linux/i2c.h>
+
+#define SI4713_NAME "radio-si4713"
+
+/*
+ * Platform dependent definition
+ */
+struct radio_si4713_platform_data {
+	int i2c_bus;
+	struct i2c_board_info *subdev_board_info;
+};
+
+#endif /* ifndef RADIO_SI4713_H*/
-- 
cgit v1.2.3


From 02bee89e79b1302776e32214b8ca96a00c70c446 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Sat, 8 Aug 2009 08:46:53 -0300
Subject: V4L/DVB (12552): FM TX: si4713: Add files to handle si4713 i2c device

This patch adds files to control si4713 devices.
Internal functions to control device properties
and initialization procedures are into these files.
Also, a v4l2 subdev interface is also exported.
This way other drivers can use this as v4l2 i2c subdevice.

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/radio/si4713-i2c.c | 2067 ++++++++++++++++++++++++++++++++++++++
 drivers/media/radio/si4713-i2c.h |  237 +++++
 include/media/si4713.h           |   49 +
 3 files changed, 2353 insertions(+)
 create mode 100644 drivers/media/radio/si4713-i2c.c
 create mode 100644 drivers/media/radio/si4713-i2c.h
 create mode 100644 include/media/si4713.h

(limited to 'include')

diff --git a/drivers/media/radio/si4713-i2c.c b/drivers/media/radio/si4713-i2c.c
new file mode 100644
index 000000000000..8cbbe48b01bd
--- /dev/null
+++ b/drivers/media/radio/si4713-i2c.c
@@ -0,0 +1,2067 @@
+/*
+ * drivers/media/radio/si4713-i2c.c
+ *
+ * Silicon Labs Si4713 FM Radio Transmitter I2C commands.
+ *
+ * Copyright (c) 2009 Nokia Corporation
+ * Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/mutex.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/i2c.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-common.h>
+
+#include "si4713-i2c.h"
+
+/* module parameters */
+static int debug;
+module_param(debug, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Debug level (0 - 2)");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eduardo Valentin <eduardo.valentin@nokia.com>");
+MODULE_DESCRIPTION("I2C driver for Si4713 FM Radio Transmitter");
+MODULE_VERSION("0.0.1");
+
+#define DEFAULT_RDS_PI			0x00
+#define DEFAULT_RDS_PTY			0x00
+#define DEFAULT_RDS_PS_NAME		""
+#define DEFAULT_RDS_RADIO_TEXT		DEFAULT_RDS_PS_NAME
+#define DEFAULT_RDS_DEVIATION		0x00C8
+#define DEFAULT_RDS_PS_REPEAT_COUNT	0x0003
+#define DEFAULT_LIMITER_RTIME		0x1392
+#define DEFAULT_LIMITER_DEV		0x102CA
+#define DEFAULT_PILOT_FREQUENCY 	0x4A38
+#define DEFAULT_PILOT_DEVIATION		0x1A5E
+#define DEFAULT_ACOMP_ATIME		0x0000
+#define DEFAULT_ACOMP_RTIME		0xF4240L
+#define DEFAULT_ACOMP_GAIN		0x0F
+#define DEFAULT_ACOMP_THRESHOLD 	(-0x28)
+#define DEFAULT_MUTE			0x01
+#define DEFAULT_POWER_LEVEL		88
+#define DEFAULT_FREQUENCY		8800
+#define DEFAULT_PREEMPHASIS		FMPE_EU
+#define DEFAULT_TUNE_RNL		0xFF
+
+#define to_si4713_device(sd)	container_of(sd, struct si4713_device, sd)
+
+/* frequency domain transformation (using times 10 to avoid floats) */
+#define FREQDEV_UNIT	100000
+#define FREQV4L2_MULTI	625
+#define si4713_to_v4l2(f)	((f * FREQDEV_UNIT) / FREQV4L2_MULTI)
+#define v4l2_to_si4713(f)	((f * FREQV4L2_MULTI) / FREQDEV_UNIT)
+#define FREQ_RANGE_LOW			7600
+#define FREQ_RANGE_HIGH			10800
+
+#define MAX_ARGS 7
+
+#define RDS_BLOCK			8
+#define RDS_BLOCK_CLEAR			0x03
+#define RDS_BLOCK_LOAD			0x04
+#define RDS_RADIOTEXT_2A		0x20
+#define RDS_RADIOTEXT_BLK_SIZE		4
+#define RDS_RADIOTEXT_INDEX_MAX		0x0F
+#define RDS_CARRIAGE_RETURN		0x0D
+
+#define rds_ps_nblocks(len)	((len / RDS_BLOCK) + (len % RDS_BLOCK ? 1 : 0))
+
+#define get_status_bit(p, b, m)	(((p) & (m)) >> (b))
+#define set_bits(p, v, b, m)	(((p) & ~(m)) | ((v) << (b)))
+
+#define ATTACK_TIME_UNIT	500
+
+#define POWER_OFF			0x00
+#define POWER_ON			0x01
+
+#define msb(x)                  ((u8)((u16) x >> 8))
+#define lsb(x)                  ((u8)((u16) x &  0x00FF))
+#define compose_u16(msb, lsb)	(((u16)msb << 8) | lsb)
+#define check_command_failed(status)	(!(status & SI4713_CTS) || \
+					(status & SI4713_ERR))
+/* mute definition */
+#define set_mute(p)	((p & 1) | ((p & 1) << 1));
+#define get_mute(p)	(p & 0x01)
+
+#ifdef DEBUG
+#define DBG_BUFFER(device, message, buffer, size)			\
+	{								\
+		int i;							\
+		char str[(size)*5];					\
+		for (i = 0; i < size; i++)				\
+			sprintf(str + i * 5, " 0x%02x", buffer[i]);	\
+		v4l2_dbg(2, debug, device, "%s:%s\n", message, str);	\
+	}
+#else
+#define DBG_BUFFER(device, message, buffer, size)
+#endif
+
+/*
+ * Values for limiter release time (sorted by second column)
+ *	device	release
+ *	value	time (us)
+ */
+static long limiter_times[] = {
+	2000,	250,
+	1000,	500,
+	510,	1000,
+	255,	2000,
+	170,	3000,
+	127,	4020,
+	102,	5010,
+	85,	6020,
+	73,	7010,
+	64,	7990,
+	57,	8970,
+	51,	10030,
+	25,	20470,
+	17,	30110,
+	13,	39380,
+	10,	51190,
+	8,	63690,
+	7,	73140,
+	6,	85330,
+	5,	102390,
+};
+
+/*
+ * Values for audio compression release time (sorted by second column)
+ *	device	release
+ *	value	time (us)
+ */
+static unsigned long acomp_rtimes[] = {
+	0,	100000,
+	1,	200000,
+	2,	350000,
+	3,	525000,
+	4,	1000000,
+};
+
+/*
+ * Values for preemphasis (sorted by second column)
+ *	device	preemphasis
+ *	value	value (v4l2)
+ */
+static unsigned long preemphasis_values[] = {
+	FMPE_DISABLED,	V4L2_PREEMPHASIS_DISABLED,
+	FMPE_EU,	V4L2_PREEMPHASIS_50_uS,
+	FMPE_USA,	V4L2_PREEMPHASIS_75_uS,
+};
+
+static int usecs_to_dev(unsigned long usecs, unsigned long const array[],
+			int size)
+{
+	int i;
+	int rval = -EINVAL;
+
+	for (i = 0; i < size / 2; i++)
+		if (array[(i * 2) + 1] >= usecs) {
+			rval = array[i * 2];
+			break;
+		}
+
+	return rval;
+}
+
+static unsigned long dev_to_usecs(int value, unsigned long const array[],
+			int size)
+{
+	int i;
+	int rval = -EINVAL;
+
+	for (i = 0; i < size / 2; i++)
+		if (array[i * 2] == value) {
+			rval = array[(i * 2) + 1];
+			break;
+		}
+
+	return rval;
+}
+
+/* si4713_handler: IRQ handler, just complete work */
+static irqreturn_t si4713_handler(int irq, void *dev)
+{
+	struct si4713_device *sdev = dev;
+
+	v4l2_dbg(2, debug, &sdev->sd,
+			"%s: sending signal to completion work.\n", __func__);
+	complete(&sdev->work);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * si4713_send_command - sends a command to si4713 and waits its response
+ * @sdev: si4713_device structure for the device we are communicating
+ * @command: command id
+ * @args: command arguments we are sending (up to 7)
+ * @argn: actual size of @args
+ * @response: buffer to place the expected response from the device (up to 15)
+ * @respn: actual size of @response
+ * @usecs: amount of time to wait before reading the response (in usecs)
+ */
+static int si4713_send_command(struct si4713_device *sdev, const u8 command,
+				const u8 args[], const int argn,
+				u8 response[], const int respn, const int usecs)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(&sdev->sd);
+	u8 data1[MAX_ARGS + 1];
+	int err;
+
+	if (!client->adapter)
+		return -ENODEV;
+
+	/* First send the command and its arguments */
+	data1[0] = command;
+	memcpy(data1 + 1, args, argn);
+	DBG_BUFFER(&sdev->sd, "Parameters", data1, argn + 1);
+
+	err = i2c_master_send(client, data1, argn + 1);
+	if (err != argn + 1) {
+		v4l2_err(&sdev->sd, "Error while sending command 0x%02x\n",
+			command);
+		return (err > 0) ? -EIO : err;
+	}
+
+	/* Wait response from interrupt */
+	if (!wait_for_completion_timeout(&sdev->work,
+				usecs_to_jiffies(usecs) + 1))
+		v4l2_warn(&sdev->sd,
+				"(%s) Device took too much time to answer.\n",
+				__func__);
+
+	/* Then get the response */
+	err = i2c_master_recv(client, response, respn);
+	if (err != respn) {
+		v4l2_err(&sdev->sd,
+			"Error while reading response for command 0x%02x\n",
+			command);
+		return (err > 0) ? -EIO : err;
+	}
+
+	DBG_BUFFER(&sdev->sd, "Response", response, respn);
+	if (check_command_failed(response[0]))
+		return -EBUSY;
+
+	return 0;
+}
+
+/*
+ * si4713_read_property - reads a si4713 property
+ * @sdev: si4713_device structure for the device we are communicating
+ * @prop: property identification number
+ * @pv: property value to be returned on success
+ */
+static int si4713_read_property(struct si4713_device *sdev, u16 prop, u32 *pv)
+{
+	int err;
+	u8 val[SI4713_GET_PROP_NRESP];
+	/*
+	 * 	.First byte = 0
+	 * 	.Second byte = property's MSB
+	 * 	.Third byte = property's LSB
+	 */
+	const u8 args[SI4713_GET_PROP_NARGS] = {
+		0x00,
+		msb(prop),
+		lsb(prop),
+	};
+
+	err = si4713_send_command(sdev, SI4713_CMD_GET_PROPERTY,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (err < 0)
+		return err;
+
+	*pv = compose_u16(val[2], val[3]);
+
+	v4l2_dbg(1, debug, &sdev->sd,
+			"%s: property=0x%02x value=0x%02x status=0x%02x\n",
+			__func__, prop, *pv, val[0]);
+
+	return err;
+}
+
+/*
+ * si4713_write_property - modifies a si4713 property
+ * @sdev: si4713_device structure for the device we are communicating
+ * @prop: property identification number
+ * @val: new value for that property
+ */
+static int si4713_write_property(struct si4713_device *sdev, u16 prop, u16 val)
+{
+	int rval;
+	u8 resp[SI4713_SET_PROP_NRESP];
+	/*
+	 * 	.First byte = 0
+	 * 	.Second byte = property's MSB
+	 * 	.Third byte = property's LSB
+	 * 	.Fourth byte = value's MSB
+	 * 	.Fifth byte = value's LSB
+	 */
+	const u8 args[SI4713_SET_PROP_NARGS] = {
+		0x00,
+		msb(prop),
+		lsb(prop),
+		msb(val),
+		lsb(val),
+	};
+
+	rval = si4713_send_command(sdev, SI4713_CMD_SET_PROPERTY,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					DEFAULT_TIMEOUT);
+
+	if (rval < 0)
+		return rval;
+
+	v4l2_dbg(1, debug, &sdev->sd,
+			"%s: property=0x%02x value=0x%02x status=0x%02x\n",
+			__func__, prop, val, resp[0]);
+
+	/*
+	 * As there is no command response for SET_PROPERTY,
+	 * wait Tcomp time to finish before proceed, in order
+	 * to have property properly set.
+	 */
+	msleep(TIMEOUT_SET_PROPERTY);
+
+	return rval;
+}
+
+/*
+ * si4713_powerup - Powers the device up
+ * @sdev: si4713_device structure for the device we are communicating
+ */
+static int si4713_powerup(struct si4713_device *sdev)
+{
+	int err;
+	u8 resp[SI4713_PWUP_NRESP];
+	/*
+	 * 	.First byte = Enabled interrupts and boot function
+	 * 	.Second byte = Input operation mode
+	 */
+	const u8 args[SI4713_PWUP_NARGS] = {
+		SI4713_PWUP_CTSIEN | SI4713_PWUP_GPO2OEN | SI4713_PWUP_FUNC_TX,
+		SI4713_PWUP_OPMOD_ANALOG,
+	};
+
+	if (sdev->power_state)
+		return 0;
+
+	sdev->platform_data->set_power(1);
+	err = si4713_send_command(sdev, SI4713_CMD_POWER_UP,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					TIMEOUT_POWER_UP);
+
+	if (!err) {
+		v4l2_dbg(1, debug, &sdev->sd, "Powerup response: 0x%02x\n",
+				resp[0]);
+		v4l2_dbg(1, debug, &sdev->sd, "Device in power up mode\n");
+		sdev->power_state = POWER_ON;
+
+		err = si4713_write_property(sdev, SI4713_GPO_IEN,
+						SI4713_STC_INT | SI4713_CTS);
+	} else {
+		sdev->platform_data->set_power(0);
+	}
+
+	return err;
+}
+
+/*
+ * si4713_powerdown - Powers the device down
+ * @sdev: si4713_device structure for the device we are communicating
+ */
+static int si4713_powerdown(struct si4713_device *sdev)
+{
+	int err;
+	u8 resp[SI4713_PWDN_NRESP];
+
+	if (!sdev->power_state)
+		return 0;
+
+	err = si4713_send_command(sdev, SI4713_CMD_POWER_DOWN,
+					NULL, 0,
+					resp, ARRAY_SIZE(resp),
+					DEFAULT_TIMEOUT);
+
+	if (!err) {
+		v4l2_dbg(1, debug, &sdev->sd, "Power down response: 0x%02x\n",
+				resp[0]);
+		v4l2_dbg(1, debug, &sdev->sd, "Device in reset mode\n");
+		sdev->platform_data->set_power(0);
+		sdev->power_state = POWER_OFF;
+	}
+
+	return err;
+}
+
+/*
+ * si4713_checkrev - Checks if we are treating a device with the correct rev.
+ * @sdev: si4713_device structure for the device we are communicating
+ */
+static int si4713_checkrev(struct si4713_device *sdev)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(&sdev->sd);
+	int rval;
+	u8 resp[SI4713_GETREV_NRESP];
+
+	mutex_lock(&sdev->mutex);
+
+	rval = si4713_send_command(sdev, SI4713_CMD_GET_REV,
+					NULL, 0,
+					resp, ARRAY_SIZE(resp),
+					DEFAULT_TIMEOUT);
+
+	if (rval < 0)
+		goto unlock;
+
+	if (resp[1] == SI4713_PRODUCT_NUMBER) {
+		v4l2_info(&sdev->sd, "chip found @ 0x%02x (%s)\n",
+				client->addr << 1, client->adapter->name);
+	} else {
+		v4l2_err(&sdev->sd, "Invalid product number\n");
+		rval = -EINVAL;
+	}
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+/*
+ * si4713_wait_stc - Waits STC interrupt and clears status bits. Usefull
+ *		     for TX_TUNE_POWER, TX_TUNE_FREQ and TX_TUNE_MEAS
+ * @sdev: si4713_device structure for the device we are communicating
+ * @usecs: timeout to wait for STC interrupt signal
+ */
+static int si4713_wait_stc(struct si4713_device *sdev, const int usecs)
+{
+	int err;
+	u8 resp[SI4713_GET_STATUS_NRESP];
+
+	/* Wait response from STC interrupt */
+	if (!wait_for_completion_timeout(&sdev->work,
+			usecs_to_jiffies(usecs) + 1))
+		v4l2_warn(&sdev->sd,
+			"%s: device took too much time to answer (%d usec).\n",
+				__func__, usecs);
+
+	/* Clear status bits */
+	err = si4713_send_command(sdev, SI4713_CMD_GET_INT_STATUS,
+					NULL, 0,
+					resp, ARRAY_SIZE(resp),
+					DEFAULT_TIMEOUT);
+
+	if (err < 0)
+		goto exit;
+
+	v4l2_dbg(1, debug, &sdev->sd,
+			"%s: status bits: 0x%02x\n", __func__, resp[0]);
+
+	if (!(resp[0] & SI4713_STC_INT))
+		err = -EIO;
+
+exit:
+	return err;
+}
+
+/*
+ * si4713_tx_tune_freq - Sets the state of the RF carrier and sets the tuning
+ * 			frequency between 76 and 108 MHz in 10 kHz units and
+ * 			steps of 50 kHz.
+ * @sdev: si4713_device structure for the device we are communicating
+ * @frequency: desired frequency (76 - 108 MHz, unit 10 KHz, step 50 kHz)
+ */
+static int si4713_tx_tune_freq(struct si4713_device *sdev, u16 frequency)
+{
+	int err;
+	u8 val[SI4713_TXFREQ_NRESP];
+	/*
+	 * 	.First byte = 0
+	 * 	.Second byte = frequency's MSB
+	 * 	.Third byte = frequency's LSB
+	 */
+	const u8 args[SI4713_TXFREQ_NARGS] = {
+		0x00,
+		msb(frequency),
+		lsb(frequency),
+	};
+
+	err = si4713_send_command(sdev, SI4713_CMD_TX_TUNE_FREQ,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (err < 0)
+		return err;
+
+	v4l2_dbg(1, debug, &sdev->sd,
+			"%s: frequency=0x%02x status=0x%02x\n", __func__,
+			frequency, val[0]);
+
+	err = si4713_wait_stc(sdev, TIMEOUT_TX_TUNE);
+	if (err < 0)
+		return err;
+
+	return compose_u16(args[1], args[2]);
+}
+
+/*
+ * si4713_tx_tune_power - Sets the RF voltage level between 88 and 115 dBuV in
+ * 			1 dB units. A value of 0x00 indicates off. The command
+ * 			also sets the antenna tuning capacitance. A value of 0
+ * 			indicates autotuning, and a value of 1 - 191 indicates
+ * 			a manual override, which results in a tuning
+ * 			capacitance of 0.25 pF x @antcap.
+ * @sdev: si4713_device structure for the device we are communicating
+ * @power: tuning power (88 - 115 dBuV, unit/step 1 dB)
+ * @antcap: value of antenna tuning capacitor (0 - 191)
+ */
+static int si4713_tx_tune_power(struct si4713_device *sdev, u8 power,
+				u8 antcap)
+{
+	int err;
+	u8 val[SI4713_TXPWR_NRESP];
+	/*
+	 * 	.First byte = 0
+	 * 	.Second byte = 0
+	 * 	.Third byte = power
+	 * 	.Fourth byte = antcap
+	 */
+	const u8 args[SI4713_TXPWR_NARGS] = {
+		0x00,
+		0x00,
+		power,
+		antcap,
+	};
+
+	if (((power > 0) && (power < SI4713_MIN_POWER)) ||
+		power > SI4713_MAX_POWER || antcap > SI4713_MAX_ANTCAP)
+		return -EDOM;
+
+	err = si4713_send_command(sdev, SI4713_CMD_TX_TUNE_POWER,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (err < 0)
+		return err;
+
+	v4l2_dbg(1, debug, &sdev->sd,
+			"%s: power=0x%02x antcap=0x%02x status=0x%02x\n",
+			__func__, power, antcap, val[0]);
+
+	return si4713_wait_stc(sdev, TIMEOUT_TX_TUNE_POWER);
+}
+
+/*
+ * si4713_tx_tune_measure - Enters receive mode and measures the received noise
+ * 			level in units of dBuV on the selected frequency.
+ * 			The Frequency must be between 76 and 108 MHz in 10 kHz
+ * 			units and steps of 50 kHz. The command also sets the
+ * 			antenna	tuning capacitance. A value of 0 means
+ * 			autotuning, and a value of 1 to 191 indicates manual
+ * 			override.
+ * @sdev: si4713_device structure for the device we are communicating
+ * @frequency: desired frequency (76 - 108 MHz, unit 10 KHz, step 50 kHz)
+ * @antcap: value of antenna tuning capacitor (0 - 191)
+ */
+static int si4713_tx_tune_measure(struct si4713_device *sdev, u16 frequency,
+					u8 antcap)
+{
+	int err;
+	u8 val[SI4713_TXMEA_NRESP];
+	/*
+	 * 	.First byte = 0
+	 * 	.Second byte = frequency's MSB
+	 * 	.Third byte = frequency's LSB
+	 * 	.Fourth byte = antcap
+	 */
+	const u8 args[SI4713_TXMEA_NARGS] = {
+		0x00,
+		msb(frequency),
+		lsb(frequency),
+		antcap,
+	};
+
+	sdev->tune_rnl = DEFAULT_TUNE_RNL;
+
+	if (antcap > SI4713_MAX_ANTCAP)
+		return -EDOM;
+
+	err = si4713_send_command(sdev, SI4713_CMD_TX_TUNE_MEASURE,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (err < 0)
+		return err;
+
+	v4l2_dbg(1, debug, &sdev->sd,
+			"%s: frequency=0x%02x antcap=0x%02x status=0x%02x\n",
+			__func__, frequency, antcap, val[0]);
+
+	return si4713_wait_stc(sdev, TIMEOUT_TX_TUNE);
+}
+
+/*
+ * si4713_tx_tune_status- Returns the status of the tx_tune_freq, tx_tune_mea or
+ * 			tx_tune_power commands. This command return the current
+ * 			frequency, output voltage in dBuV, the antenna tunning
+ * 			capacitance value and the received noise level. The
+ * 			command also clears the stcint interrupt bit when the
+ * 			first bit of its arguments is high.
+ * @sdev: si4713_device structure for the device we are communicating
+ * @intack: 0x01 to clear the seek/tune complete interrupt status indicator.
+ * @frequency: returned frequency
+ * @power: returned power
+ * @antcap: returned antenna capacitance
+ * @noise: returned noise level
+ */
+static int si4713_tx_tune_status(struct si4713_device *sdev, u8 intack,
+					u16 *frequency,	u8 *power,
+					u8 *antcap, u8 *noise)
+{
+	int err;
+	u8 val[SI4713_TXSTATUS_NRESP];
+	/*
+	 * 	.First byte = intack bit
+	 */
+	const u8 args[SI4713_TXSTATUS_NARGS] = {
+		intack & SI4713_INTACK_MASK,
+	};
+
+	err = si4713_send_command(sdev, SI4713_CMD_TX_TUNE_STATUS,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (!err) {
+		v4l2_dbg(1, debug, &sdev->sd,
+			"%s: status=0x%02x\n", __func__, val[0]);
+		*frequency = compose_u16(val[2], val[3]);
+		sdev->frequency = *frequency;
+		*power = val[5];
+		*antcap = val[6];
+		*noise = val[7];
+		v4l2_dbg(1, debug, &sdev->sd, "%s: response: %d x 10 kHz "
+				"(power %d, antcap %d, rnl %d)\n", __func__,
+				*frequency, *power, *antcap, *noise);
+	}
+
+	return err;
+}
+
+/*
+ * si4713_tx_rds_buff - Loads the RDS group buffer FIFO or circular buffer.
+ * @sdev: si4713_device structure for the device we are communicating
+ * @mode: the buffer operation mode.
+ * @rdsb: RDS Block B
+ * @rdsc: RDS Block C
+ * @rdsd: RDS Block D
+ * @cbleft: returns the number of available circular buffer blocks minus the
+ *          number of used circular buffer blocks.
+ */
+static int si4713_tx_rds_buff(struct si4713_device *sdev, u8 mode, u16 rdsb,
+				u16 rdsc, u16 rdsd, s8 *cbleft)
+{
+	int err;
+	u8 val[SI4713_RDSBUFF_NRESP];
+
+	const u8 args[SI4713_RDSBUFF_NARGS] = {
+		mode & SI4713_RDSBUFF_MODE_MASK,
+		msb(rdsb),
+		lsb(rdsb),
+		msb(rdsc),
+		lsb(rdsc),
+		msb(rdsd),
+		lsb(rdsd),
+	};
+
+	err = si4713_send_command(sdev, SI4713_CMD_TX_RDS_BUFF,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (!err) {
+		v4l2_dbg(1, debug, &sdev->sd,
+			"%s: status=0x%02x\n", __func__, val[0]);
+		*cbleft = (s8)val[2] - val[3];
+		v4l2_dbg(1, debug, &sdev->sd, "%s: response: interrupts"
+				" 0x%02x cb avail: %d cb used %d fifo avail"
+				" %d fifo used %d\n", __func__, val[1],
+				val[2], val[3], val[4], val[5]);
+	}
+
+	return err;
+}
+
+/*
+ * si4713_tx_rds_ps - Loads the program service buffer.
+ * @sdev: si4713_device structure for the device we are communicating
+ * @psid: program service id to be loaded.
+ * @pschar: assumed 4 size char array to be loaded into the program service
+ */
+static int si4713_tx_rds_ps(struct si4713_device *sdev, u8 psid,
+				unsigned char *pschar)
+{
+	int err;
+	u8 val[SI4713_RDSPS_NRESP];
+
+	const u8 args[SI4713_RDSPS_NARGS] = {
+		psid & SI4713_RDSPS_PSID_MASK,
+		pschar[0],
+		pschar[1],
+		pschar[2],
+		pschar[3],
+	};
+
+	err = si4713_send_command(sdev, SI4713_CMD_TX_RDS_PS,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (err < 0)
+		return err;
+
+	v4l2_dbg(1, debug, &sdev->sd, "%s: status=0x%02x\n", __func__, val[0]);
+
+	return err;
+}
+
+static int si4713_set_power_state(struct si4713_device *sdev, u8 value)
+{
+	int rval;
+
+	mutex_lock(&sdev->mutex);
+
+	if (value)
+		rval = si4713_powerup(sdev);
+	else
+		rval = si4713_powerdown(sdev);
+
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+static int si4713_set_mute(struct si4713_device *sdev, u16 mute)
+{
+	int rval = 0;
+
+	mute = set_mute(mute);
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state)
+		rval = si4713_write_property(sdev,
+				SI4713_TX_LINE_INPUT_MUTE, mute);
+
+	if (rval >= 0)
+		sdev->mute = get_mute(mute);
+
+	mutex_unlock(&sdev->mutex);
+
+	return rval;
+}
+
+static int si4713_set_rds_ps_name(struct si4713_device *sdev, char *ps_name)
+{
+	int rval = 0, i;
+	u8 len = 0;
+
+	/* We want to clear the whole thing */
+	if (!strlen(ps_name))
+		memset(ps_name, 0, MAX_RDS_PS_NAME + 1);
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		/* Write the new ps name and clear the padding */
+		for (i = 0; i < MAX_RDS_PS_NAME; i += (RDS_BLOCK / 2)) {
+			rval = si4713_tx_rds_ps(sdev, (i / (RDS_BLOCK / 2)),
+						ps_name + i);
+			if (rval < 0)
+				goto unlock;
+		}
+
+		/* Setup the size to be sent */
+		if (strlen(ps_name))
+			len = strlen(ps_name) - 1;
+		else
+			len = 1;
+
+		rval = si4713_write_property(sdev,
+				SI4713_TX_RDS_PS_MESSAGE_COUNT,
+				rds_ps_nblocks(len));
+		if (rval < 0)
+			goto unlock;
+
+		rval = si4713_write_property(sdev,
+				SI4713_TX_RDS_PS_REPEAT_COUNT,
+				DEFAULT_RDS_PS_REPEAT_COUNT * 2);
+		if (rval < 0)
+			goto unlock;
+	}
+
+	strncpy(sdev->rds_info.ps_name, ps_name, MAX_RDS_PS_NAME);
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+static int si4713_set_rds_radio_text(struct si4713_device *sdev, char *rt)
+{
+	int rval = 0, i;
+	u16 t_index = 0;
+	u8 b_index = 0, cr_inserted = 0;
+	s8 left;
+
+	mutex_lock(&sdev->mutex);
+
+	if (!sdev->power_state)
+		goto copy;
+
+	rval = si4713_tx_rds_buff(sdev, RDS_BLOCK_CLEAR, 0, 0, 0, &left);
+	if (rval < 0)
+		goto unlock;
+
+	if (!strlen(rt))
+		goto copy;
+
+	do {
+		/* RDS spec says that if the last block isn't used,
+		 * then apply a carriage return
+		 */
+		if (t_index < (RDS_RADIOTEXT_INDEX_MAX *
+			RDS_RADIOTEXT_BLK_SIZE)) {
+			for (i = 0; i < RDS_RADIOTEXT_BLK_SIZE; i++) {
+				if (!rt[t_index + i] || rt[t_index + i] ==
+					RDS_CARRIAGE_RETURN) {
+					rt[t_index + i] = RDS_CARRIAGE_RETURN;
+					cr_inserted = 1;
+					break;
+				}
+			}
+		}
+
+		rval = si4713_tx_rds_buff(sdev, RDS_BLOCK_LOAD,
+				compose_u16(RDS_RADIOTEXT_2A, b_index++),
+				compose_u16(rt[t_index], rt[t_index + 1]),
+				compose_u16(rt[t_index + 2], rt[t_index + 3]),
+				&left);
+		if (rval < 0)
+			goto unlock;
+
+		t_index += RDS_RADIOTEXT_BLK_SIZE;
+
+		if (cr_inserted)
+			break;
+	} while (left > 0);
+
+copy:
+	strncpy(sdev->rds_info.radio_text, rt, MAX_RDS_RADIO_TEXT);
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+static int si4713_choose_econtrol_action(struct si4713_device *sdev, u32 id,
+		u32 **shadow, s32 *bit, s32 *mask, u16 *property, int *mul,
+		unsigned long **table, int *size)
+{
+	s32 rval = 0;
+
+	switch (id) {
+	/* FM_TX class controls */
+	case V4L2_CID_RDS_TX_PI:
+		*property = SI4713_TX_RDS_PI;
+		*mul = 1;
+		*shadow = &sdev->rds_info.pi;
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_THRESHOLD:
+		*property = SI4713_TX_ACOMP_THRESHOLD;
+		*mul = 1;
+		*shadow = &sdev->acomp_info.threshold;
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_GAIN:
+		*property = SI4713_TX_ACOMP_GAIN;
+		*mul = 1;
+		*shadow = &sdev->acomp_info.gain;
+		break;
+	case V4L2_CID_PILOT_TONE_FREQUENCY:
+		*property = SI4713_TX_PILOT_FREQUENCY;
+		*mul = 1;
+		*shadow = &sdev->pilot_info.frequency;
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME:
+		*property = SI4713_TX_ACOMP_ATTACK_TIME;
+		*mul = ATTACK_TIME_UNIT;
+		*shadow = &sdev->acomp_info.attack_time;
+		break;
+	case V4L2_CID_PILOT_TONE_DEVIATION:
+		*property = SI4713_TX_PILOT_DEVIATION;
+		*mul = 10;
+		*shadow = &sdev->pilot_info.deviation;
+		break;
+	case V4L2_CID_AUDIO_LIMITER_DEVIATION:
+		*property = SI4713_TX_AUDIO_DEVIATION;
+		*mul = 10;
+		*shadow = &sdev->limiter_info.deviation;
+		break;
+	case V4L2_CID_RDS_TX_DEVIATION:
+		*property = SI4713_TX_RDS_DEVIATION;
+		*mul = 1;
+		*shadow = &sdev->rds_info.deviation;
+		break;
+
+	case V4L2_CID_RDS_TX_PTY:
+		*property = SI4713_TX_RDS_PS_MISC;
+		*bit = 5;
+		*mask = 0x1F << 5;
+		*shadow = &sdev->rds_info.pty;
+		break;
+	case V4L2_CID_AUDIO_LIMITER_ENABLED:
+		*property = SI4713_TX_ACOMP_ENABLE;
+		*bit = 1;
+		*mask = 1 << 1;
+		*shadow = &sdev->limiter_info.enabled;
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_ENABLED:
+		*property = SI4713_TX_ACOMP_ENABLE;
+		*bit = 0;
+		*mask = 1 << 0;
+		*shadow = &sdev->acomp_info.enabled;
+		break;
+	case V4L2_CID_PILOT_TONE_ENABLED:
+		*property = SI4713_TX_COMPONENT_ENABLE;
+		*bit = 0;
+		*mask = 1 << 0;
+		*shadow = &sdev->pilot_info.enabled;
+		break;
+
+	case V4L2_CID_AUDIO_LIMITER_RELEASE_TIME:
+		*property = SI4713_TX_LIMITER_RELEASE_TIME;
+		*table = limiter_times;
+		*size = ARRAY_SIZE(limiter_times);
+		*shadow = &sdev->limiter_info.release_time;
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME:
+		*property = SI4713_TX_ACOMP_RELEASE_TIME;
+		*table = acomp_rtimes;
+		*size = ARRAY_SIZE(acomp_rtimes);
+		*shadow = &sdev->acomp_info.release_time;
+		break;
+	case V4L2_CID_TUNE_PREEMPHASIS:
+		*property = SI4713_TX_PREEMPHASIS;
+		*table = preemphasis_values;
+		*size = ARRAY_SIZE(preemphasis_values);
+		*shadow = &sdev->preemphasis;
+		break;
+
+	default:
+		rval = -EINVAL;
+	};
+
+	return rval;
+}
+
+static int si4713_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc);
+
+/* write string property */
+static int si4713_write_econtrol_string(struct si4713_device *sdev,
+				struct v4l2_ext_control *control)
+{
+	struct v4l2_queryctrl vqc;
+	int len;
+	s32 rval = 0;
+
+	vqc.id = control->id;
+	rval = si4713_queryctrl(&sdev->sd, &vqc);
+	if (rval < 0)
+		goto exit;
+
+	switch (control->id) {
+	case V4L2_CID_RDS_TX_PS_NAME: {
+		char ps_name[MAX_RDS_PS_NAME + 1];
+
+		len = control->size - 1;
+		if (len > MAX_RDS_PS_NAME) {
+			rval = -ERANGE;
+			goto exit;
+		}
+		rval = copy_from_user(ps_name, control->string, len);
+		if (rval < 0)
+			goto exit;
+		ps_name[len] = '\0';
+
+		if (strlen(ps_name) % vqc.step) {
+			rval = -ERANGE;
+			goto exit;
+		}
+
+		rval = si4713_set_rds_ps_name(sdev, ps_name);
+	}
+		break;
+
+	case V4L2_CID_RDS_TX_RADIO_TEXT: {
+		char radio_text[MAX_RDS_RADIO_TEXT + 1];
+
+		len = control->size - 1;
+		if (len > MAX_RDS_RADIO_TEXT) {
+			rval = -ERANGE;
+			goto exit;
+		}
+		rval = copy_from_user(radio_text, control->string, len);
+		if (rval < 0)
+			goto exit;
+		radio_text[len] = '\0';
+
+		if (strlen(radio_text) % vqc.step) {
+			rval = -ERANGE;
+			goto exit;
+		}
+
+		rval = si4713_set_rds_radio_text(sdev, radio_text);
+	}
+		break;
+
+	default:
+		rval = -EINVAL;
+		break;
+	};
+
+exit:
+	return rval;
+}
+
+static int validate_range(struct v4l2_subdev *sd,
+					struct v4l2_ext_control *control)
+{
+	struct v4l2_queryctrl vqc;
+	int rval;
+
+	vqc.id = control->id;
+	rval = si4713_queryctrl(sd, &vqc);
+	if (rval < 0)
+		goto exit;
+
+	if (control->value < vqc.minimum || control->value > vqc.maximum)
+		rval = -ERANGE;
+
+exit:
+	return rval;
+}
+
+/* properties which use tx_tune_power*/
+static int si4713_write_econtrol_tune(struct si4713_device *sdev,
+				struct v4l2_ext_control *control)
+{
+	s32 rval = 0;
+	u8 power, antcap;
+
+	rval = validate_range(&sdev->sd, control);
+	if (rval < 0)
+		goto exit;
+
+	mutex_lock(&sdev->mutex);
+
+	switch (control->id) {
+	case V4L2_CID_TUNE_POWER_LEVEL:
+		power = control->value;
+		antcap = sdev->antenna_capacitor;
+		break;
+	case V4L2_CID_TUNE_ANTENNA_CAPACITOR:
+		power = sdev->power_level;
+		antcap = control->value;
+		break;
+	default:
+		rval = -EINVAL;
+		goto unlock;
+	};
+
+	if (sdev->power_state)
+		rval = si4713_tx_tune_power(sdev, power, antcap);
+
+	if (rval == 0) {
+		sdev->power_level = power;
+		sdev->antenna_capacitor = antcap;
+	}
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+exit:
+	return rval;
+}
+
+static int si4713_write_econtrol_integers(struct si4713_device *sdev,
+					struct v4l2_ext_control *control)
+{
+	s32 rval;
+	u32 *shadow = NULL, val = 0;
+	s32 bit = 0, mask = 0;
+	u16 property = 0;
+	int mul = 0;
+	unsigned long *table = NULL;
+	int size = 0;
+
+	rval = validate_range(&sdev->sd, control);
+	if (rval < 0)
+		goto exit;
+
+	rval = si4713_choose_econtrol_action(sdev, control->id, &shadow, &bit,
+			&mask, &property, &mul, &table, &size);
+	if (rval < 0)
+		goto exit;
+
+	val = control->value;
+	if (mul) {
+		val = control->value / mul;
+	} else if (table) {
+		rval = usecs_to_dev(control->value, table, size);
+		if (rval < 0)
+			goto exit;
+		val = rval;
+		rval = 0;
+	}
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		if (mask) {
+			rval = si4713_read_property(sdev, property, &val);
+			if (rval < 0)
+				goto unlock;
+			val = set_bits(val, control->value, bit, mask);
+		}
+
+		rval = si4713_write_property(sdev, property, val);
+		if (rval < 0)
+			goto unlock;
+		if (mask)
+			val = control->value;
+	}
+
+	if (mul) {
+		*shadow = val * mul;
+	} else if (table) {
+		rval = dev_to_usecs(val, table, size);
+		if (rval < 0)
+			goto unlock;
+		*shadow = rval;
+		rval = 0;
+	} else {
+		*shadow = val;
+	}
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+exit:
+	return rval;
+}
+
+static int si4713_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f);
+static int si4713_s_modulator(struct v4l2_subdev *sd, struct v4l2_modulator *);
+/*
+ * si4713_setup - Sets the device up with current configuration.
+ * @sdev: si4713_device structure for the device we are communicating
+ */
+static int si4713_setup(struct si4713_device *sdev)
+{
+	struct v4l2_ext_control ctrl;
+	struct v4l2_frequency f;
+	struct v4l2_modulator vm;
+	struct si4713_device *tmp;
+	int rval = 0;
+
+	tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	/* Get a local copy to avoid race */
+	mutex_lock(&sdev->mutex);
+	memcpy(tmp, sdev, sizeof(*sdev));
+	mutex_unlock(&sdev->mutex);
+
+	ctrl.id = V4L2_CID_RDS_TX_PI;
+	ctrl.value = tmp->rds_info.pi;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_COMPRESSION_THRESHOLD;
+	ctrl.value = tmp->acomp_info.threshold;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_COMPRESSION_GAIN;
+	ctrl.value = tmp->acomp_info.gain;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_PILOT_TONE_FREQUENCY;
+	ctrl.value = tmp->pilot_info.frequency;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME;
+	ctrl.value = tmp->acomp_info.attack_time;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_PILOT_TONE_DEVIATION;
+	ctrl.value = tmp->pilot_info.deviation;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_LIMITER_DEVIATION;
+	ctrl.value = tmp->limiter_info.deviation;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_RDS_TX_DEVIATION;
+	ctrl.value = tmp->rds_info.deviation;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_RDS_TX_PTY;
+	ctrl.value = tmp->rds_info.pty;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_LIMITER_ENABLED;
+	ctrl.value = tmp->limiter_info.enabled;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_COMPRESSION_ENABLED;
+	ctrl.value = tmp->acomp_info.enabled;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_PILOT_TONE_ENABLED;
+	ctrl.value = tmp->pilot_info.enabled;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_LIMITER_RELEASE_TIME;
+	ctrl.value = tmp->limiter_info.release_time;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME;
+	ctrl.value = tmp->acomp_info.release_time;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_TUNE_PREEMPHASIS;
+	ctrl.value = tmp->preemphasis;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_RDS_TX_PS_NAME;
+	rval |= si4713_set_rds_ps_name(sdev, tmp->rds_info.ps_name);
+
+	ctrl.id = V4L2_CID_RDS_TX_RADIO_TEXT;
+	rval |= si4713_set_rds_radio_text(sdev, tmp->rds_info.radio_text);
+
+	/* Device procedure needs to set frequency first */
+	f.frequency = tmp->frequency ? tmp->frequency : DEFAULT_FREQUENCY;
+	f.frequency = si4713_to_v4l2(f.frequency);
+	rval |= si4713_s_frequency(&sdev->sd, &f);
+
+	ctrl.id = V4L2_CID_TUNE_POWER_LEVEL;
+	ctrl.value = tmp->power_level;
+	rval |= si4713_write_econtrol_tune(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_TUNE_ANTENNA_CAPACITOR;
+	ctrl.value = tmp->antenna_capacitor;
+	rval |= si4713_write_econtrol_tune(sdev, &ctrl);
+
+	vm.index = 0;
+	if (tmp->stereo)
+		vm.txsubchans = V4L2_TUNER_SUB_STEREO;
+	else
+		vm.txsubchans = V4L2_TUNER_SUB_MONO;
+	if (tmp->rds_info.enabled)
+		vm.txsubchans |= V4L2_TUNER_SUB_RDS;
+	si4713_s_modulator(&sdev->sd, &vm);
+
+	kfree(tmp);
+
+	return rval;
+}
+
+/*
+ * si4713_initialize - Sets the device up with default configuration.
+ * @sdev: si4713_device structure for the device we are communicating
+ */
+static int si4713_initialize(struct si4713_device *sdev)
+{
+	int rval;
+
+	rval = si4713_set_power_state(sdev, POWER_ON);
+	if (rval < 0)
+		goto exit;
+
+	rval = si4713_checkrev(sdev);
+	if (rval < 0)
+		goto exit;
+
+	rval = si4713_set_power_state(sdev, POWER_OFF);
+	if (rval < 0)
+		goto exit;
+
+	mutex_lock(&sdev->mutex);
+
+	sdev->rds_info.pi = DEFAULT_RDS_PI;
+	sdev->rds_info.pty = DEFAULT_RDS_PTY;
+	sdev->rds_info.deviation = DEFAULT_RDS_DEVIATION;
+	strlcpy(sdev->rds_info.ps_name, DEFAULT_RDS_PS_NAME, MAX_RDS_PS_NAME);
+	strlcpy(sdev->rds_info.radio_text, DEFAULT_RDS_RADIO_TEXT,
+							MAX_RDS_RADIO_TEXT);
+	sdev->rds_info.enabled = 1;
+
+	sdev->limiter_info.release_time = DEFAULT_LIMITER_RTIME;
+	sdev->limiter_info.deviation = DEFAULT_LIMITER_DEV;
+	sdev->limiter_info.enabled = 1;
+
+	sdev->pilot_info.deviation = DEFAULT_PILOT_DEVIATION;
+	sdev->pilot_info.frequency = DEFAULT_PILOT_FREQUENCY;
+	sdev->pilot_info.enabled = 1;
+
+	sdev->acomp_info.release_time = DEFAULT_ACOMP_RTIME;
+	sdev->acomp_info.attack_time = DEFAULT_ACOMP_ATIME;
+	sdev->acomp_info.threshold = DEFAULT_ACOMP_THRESHOLD;
+	sdev->acomp_info.gain = DEFAULT_ACOMP_GAIN;
+	sdev->acomp_info.enabled = 1;
+
+	sdev->frequency = DEFAULT_FREQUENCY;
+	sdev->preemphasis = DEFAULT_PREEMPHASIS;
+	sdev->mute = DEFAULT_MUTE;
+	sdev->power_level = DEFAULT_POWER_LEVEL;
+	sdev->antenna_capacitor = 0;
+	sdev->stereo = 1;
+	sdev->tune_rnl = DEFAULT_TUNE_RNL;
+
+	mutex_unlock(&sdev->mutex);
+
+exit:
+	return rval;
+}
+
+/* read string property */
+static int si4713_read_econtrol_string(struct si4713_device *sdev,
+				struct v4l2_ext_control *control)
+{
+	s32 rval = 0;
+
+	switch (control->id) {
+	case V4L2_CID_RDS_TX_PS_NAME:
+		if (strlen(sdev->rds_info.ps_name) + 1 > control->size) {
+			control->size = MAX_RDS_PS_NAME + 1;
+			rval = -ENOSPC;
+			goto exit;
+		}
+		rval = copy_to_user(control->string, sdev->rds_info.ps_name,
+					strlen(sdev->rds_info.ps_name) + 1);
+		break;
+
+	case V4L2_CID_RDS_TX_RADIO_TEXT:
+		if (strlen(sdev->rds_info.radio_text) + 1 > control->size) {
+			control->size = MAX_RDS_RADIO_TEXT + 1;
+			rval = -ENOSPC;
+			goto exit;
+		}
+		rval = copy_to_user(control->string, sdev->rds_info.radio_text,
+					strlen(sdev->rds_info.radio_text) + 1);
+		break;
+
+	default:
+		rval = -EINVAL;
+		break;
+	};
+
+exit:
+	return rval;
+}
+
+/*
+ * si4713_update_tune_status - update properties from tx_tune_status
+ * command. Must be called with sdev->mutex held.
+ * @sdev: si4713_device structure for the device we are communicating
+ */
+static int si4713_update_tune_status(struct si4713_device *sdev)
+{
+	int rval;
+	u16 f = 0;
+	u8 p = 0, a = 0, n = 0;
+
+	rval = si4713_tx_tune_status(sdev, 0x00, &f, &p, &a, &n);
+
+	if (rval < 0)
+		goto exit;
+
+	sdev->power_level = p;
+	sdev->antenna_capacitor = a;
+	sdev->tune_rnl = n;
+
+exit:
+	return rval;
+}
+
+/* properties which use tx_tune_status */
+static int si4713_read_econtrol_tune(struct si4713_device *sdev,
+				struct v4l2_ext_control *control)
+{
+	s32 rval = 0;
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		rval = si4713_update_tune_status(sdev);
+		if (rval < 0)
+			goto unlock;
+	}
+
+	switch (control->id) {
+	case V4L2_CID_TUNE_POWER_LEVEL:
+		control->value = sdev->power_level;
+		break;
+	case V4L2_CID_TUNE_ANTENNA_CAPACITOR:
+		control->value = sdev->antenna_capacitor;
+		break;
+	default:
+		rval = -EINVAL;
+	};
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+static int si4713_read_econtrol_integers(struct si4713_device *sdev,
+				struct v4l2_ext_control *control)
+{
+	s32 rval;
+	u32 *shadow = NULL, val = 0;
+	s32 bit = 0, mask = 0;
+	u16 property = 0;
+	int mul = 0;
+	unsigned long *table = NULL;
+	int size = 0;
+
+	rval = si4713_choose_econtrol_action(sdev, control->id, &shadow, &bit,
+			&mask, &property, &mul, &table, &size);
+	if (rval < 0)
+		goto exit;
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		rval = si4713_read_property(sdev, property, &val);
+		if (rval < 0)
+			goto unlock;
+
+		/* Keep negative values for threshold */
+		if (control->id == V4L2_CID_AUDIO_COMPRESSION_THRESHOLD)
+			*shadow = (s16)val;
+		else if (mask)
+			*shadow = get_status_bit(val, bit, mask);
+		else if (mul)
+			*shadow = val * mul;
+		else
+			*shadow = dev_to_usecs(val, table, size);
+	}
+
+	control->value = *shadow;
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+exit:
+	return rval;
+}
+
+/*
+ * Video4Linux Subdev Interface
+ */
+/* si4713_s_ext_ctrls - set extended controls value */
+static int si4713_s_ext_ctrls(struct v4l2_subdev *sd,
+				struct v4l2_ext_controls *ctrls)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int i;
+
+	if (ctrls->ctrl_class != V4L2_CTRL_CLASS_FM_TX)
+		return -EINVAL;
+
+	for (i = 0; i < ctrls->count; i++) {
+		int err;
+
+		switch ((ctrls->controls + i)->id) {
+		case V4L2_CID_RDS_TX_PS_NAME:
+		case V4L2_CID_RDS_TX_RADIO_TEXT:
+			err = si4713_write_econtrol_string(sdev,
+							ctrls->controls + i);
+			break;
+		case V4L2_CID_TUNE_ANTENNA_CAPACITOR:
+		case V4L2_CID_TUNE_POWER_LEVEL:
+			err = si4713_write_econtrol_tune(sdev,
+							ctrls->controls + i);
+			break;
+		default:
+			err = si4713_write_econtrol_integers(sdev,
+							ctrls->controls + i);
+		}
+
+		if (err < 0) {
+			ctrls->error_idx = i;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/* si4713_g_ext_ctrls - get extended controls value */
+static int si4713_g_ext_ctrls(struct v4l2_subdev *sd,
+				struct v4l2_ext_controls *ctrls)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int i;
+
+	if (ctrls->ctrl_class != V4L2_CTRL_CLASS_FM_TX)
+		return -EINVAL;
+
+	for (i = 0; i < ctrls->count; i++) {
+		int err;
+
+		switch ((ctrls->controls + i)->id) {
+		case V4L2_CID_RDS_TX_PS_NAME:
+		case V4L2_CID_RDS_TX_RADIO_TEXT:
+			err = si4713_read_econtrol_string(sdev,
+							ctrls->controls + i);
+			break;
+		case V4L2_CID_TUNE_ANTENNA_CAPACITOR:
+		case V4L2_CID_TUNE_POWER_LEVEL:
+			err = si4713_read_econtrol_tune(sdev,
+							ctrls->controls + i);
+			break;
+		default:
+			err = si4713_read_econtrol_integers(sdev,
+							ctrls->controls + i);
+		}
+
+		if (err < 0) {
+			ctrls->error_idx = i;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/* si4713_queryctrl - enumerate control items */
+static int si4713_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc)
+{
+	int rval = 0;
+
+	switch (qc->id) {
+	/* User class controls */
+	case V4L2_CID_AUDIO_MUTE:
+		rval = v4l2_ctrl_query_fill(qc, 0, 1, 1, DEFAULT_MUTE);
+		break;
+	/* FM_TX class controls */
+	case V4L2_CID_RDS_TX_PI:
+		rval = v4l2_ctrl_query_fill(qc, 0, 0xFFFF, 1, DEFAULT_RDS_PI);
+		break;
+	case V4L2_CID_RDS_TX_PTY:
+		rval = v4l2_ctrl_query_fill(qc, 0, 31, 1, DEFAULT_RDS_PTY);
+		break;
+	case V4L2_CID_RDS_TX_DEVIATION:
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_RDS_DEVIATION,
+						10, DEFAULT_RDS_DEVIATION);
+		break;
+	case V4L2_CID_RDS_TX_PS_NAME:
+		/*
+		 * Report step as 8. From RDS spec, psname
+		 * should be 8. But there are receivers which scroll strings
+		 * sized as 8xN.
+		 */
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_RDS_PS_NAME, 8, 0);
+		break;
+	case V4L2_CID_RDS_TX_RADIO_TEXT:
+		/*
+		 * Report step as 32 (2A block). From RDS spec,
+		 * radio text should be 32 for 2A block. But there are receivers
+		 * which scroll strings sized as 32xN. Setting default to 32.
+		 */
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_RDS_RADIO_TEXT, 32, 0);
+		break;
+
+	case V4L2_CID_AUDIO_LIMITER_ENABLED:
+		rval = v4l2_ctrl_query_fill(qc, 0, 1, 1, 1);
+		break;
+	case V4L2_CID_AUDIO_LIMITER_RELEASE_TIME:
+		rval = v4l2_ctrl_query_fill(qc, 250, MAX_LIMITER_RELEASE_TIME,
+						50, DEFAULT_LIMITER_RTIME);
+		break;
+	case V4L2_CID_AUDIO_LIMITER_DEVIATION:
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_LIMITER_DEVIATION,
+						10, DEFAULT_LIMITER_DEV);
+		break;
+
+	case V4L2_CID_AUDIO_COMPRESSION_ENABLED:
+		rval = v4l2_ctrl_query_fill(qc, 0, 1, 1, 1);
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_GAIN:
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_ACOMP_GAIN, 1,
+						DEFAULT_ACOMP_GAIN);
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_THRESHOLD:
+		rval = v4l2_ctrl_query_fill(qc, MIN_ACOMP_THRESHOLD,
+						MAX_ACOMP_THRESHOLD, 1,
+						DEFAULT_ACOMP_THRESHOLD);
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME:
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_ACOMP_ATTACK_TIME,
+						500, DEFAULT_ACOMP_ATIME);
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME:
+		rval = v4l2_ctrl_query_fill(qc, 100000, MAX_ACOMP_RELEASE_TIME,
+						100000, DEFAULT_ACOMP_RTIME);
+		break;
+
+	case V4L2_CID_PILOT_TONE_ENABLED:
+		rval = v4l2_ctrl_query_fill(qc, 0, 1, 1, 1);
+		break;
+	case V4L2_CID_PILOT_TONE_DEVIATION:
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_PILOT_DEVIATION,
+						10, DEFAULT_PILOT_DEVIATION);
+		break;
+	case V4L2_CID_PILOT_TONE_FREQUENCY:
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_PILOT_FREQUENCY,
+						1, DEFAULT_PILOT_FREQUENCY);
+		break;
+
+	case V4L2_CID_TUNE_PREEMPHASIS:
+		rval = v4l2_ctrl_query_fill(qc, V4L2_PREEMPHASIS_DISABLED,
+						V4L2_PREEMPHASIS_75_uS, 1,
+						V4L2_PREEMPHASIS_50_uS);
+		break;
+	case V4L2_CID_TUNE_POWER_LEVEL:
+		rval = v4l2_ctrl_query_fill(qc, 0, 120, 1, DEFAULT_POWER_LEVEL);
+		break;
+	case V4L2_CID_TUNE_ANTENNA_CAPACITOR:
+		rval = v4l2_ctrl_query_fill(qc, 0, 191, 1, 0);
+		break;
+	default:
+		rval = -EINVAL;
+		break;
+	};
+
+	return rval;
+}
+
+/* si4713_g_ctrl - get the value of a control */
+static int si4713_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int rval = 0;
+
+	if (!sdev)
+		return -ENODEV;
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		rval = si4713_read_property(sdev, SI4713_TX_LINE_INPUT_MUTE,
+						&sdev->mute);
+
+		if (rval < 0)
+			goto unlock;
+	}
+
+	switch (ctrl->id) {
+	case V4L2_CID_AUDIO_MUTE:
+		ctrl->value = get_mute(sdev->mute);
+		break;
+	}
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+/* si4713_s_ctrl - set the value of a control */
+static int si4713_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int rval = 0;
+
+	if (!sdev)
+		return -ENODEV;
+
+	switch (ctrl->id) {
+	case V4L2_CID_AUDIO_MUTE:
+		if (ctrl->value) {
+			rval = si4713_set_mute(sdev, ctrl->value);
+			if (rval < 0)
+				goto exit;
+
+			rval = si4713_set_power_state(sdev, POWER_DOWN);
+		} else {
+			rval = si4713_set_power_state(sdev, POWER_UP);
+			if (rval < 0)
+				goto exit;
+
+			rval = si4713_setup(sdev);
+			if (rval < 0)
+				goto exit;
+
+			rval = si4713_set_mute(sdev, ctrl->value);
+		}
+		break;
+	}
+
+exit:
+	return rval;
+}
+
+/* si4713_ioctl - deal with private ioctls (only rnl for now) */
+long si4713_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	struct si4713_rnl *rnl = arg;
+	u16 frequency;
+	int rval = 0;
+
+	if (!arg)
+		return -EINVAL;
+
+	mutex_lock(&sdev->mutex);
+	switch (cmd) {
+	case SI4713_IOC_MEASURE_RNL:
+		frequency = v4l2_to_si4713(rnl->frequency);
+
+		if (sdev->power_state) {
+			/* Set desired measurement frequency */
+			rval = si4713_tx_tune_measure(sdev, frequency, 0);
+			if (rval < 0)
+				goto unlock;
+			/* get results from tune status */
+			rval = si4713_update_tune_status(sdev);
+			if (rval < 0)
+				goto unlock;
+		}
+		rnl->rnl = sdev->tune_rnl;
+		break;
+
+	default:
+		/* nothing */
+		rval = -ENOIOCTLCMD;
+	}
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+static const struct v4l2_subdev_core_ops si4713_subdev_core_ops = {
+	.queryctrl	= si4713_queryctrl,
+	.g_ext_ctrls	= si4713_g_ext_ctrls,
+	.s_ext_ctrls	= si4713_s_ext_ctrls,
+	.g_ctrl		= si4713_g_ctrl,
+	.s_ctrl		= si4713_s_ctrl,
+	.ioctl		= si4713_ioctl,
+};
+
+/* si4713_g_modulator - get modulator attributes */
+static int si4713_g_modulator(struct v4l2_subdev *sd, struct v4l2_modulator *vm)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int rval = 0;
+
+	if (!sdev) {
+		rval = -ENODEV;
+		goto exit;
+	}
+
+	if (vm->index > 0) {
+		rval = -EINVAL;
+		goto exit;
+	}
+
+	strncpy(vm->name, "FM Modulator", 32);
+	vm->capability = V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_LOW |
+						V4L2_TUNER_CAP_RDS;
+
+	/* Report current frequency range limits */
+	vm->rangelow = si4713_to_v4l2(FREQ_RANGE_LOW);
+	vm->rangehigh = si4713_to_v4l2(FREQ_RANGE_HIGH);
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		u32 comp_en = 0;
+
+		rval = si4713_read_property(sdev, SI4713_TX_COMPONENT_ENABLE,
+						&comp_en);
+		if (rval < 0)
+			goto unlock;
+
+		sdev->stereo = get_status_bit(comp_en, 1, 1 << 1);
+		sdev->rds_info.enabled = get_status_bit(comp_en, 2, 1 << 2);
+	}
+
+	/* Report current audio mode: mono or stereo */
+	if (sdev->stereo)
+		vm->txsubchans = V4L2_TUNER_SUB_STEREO;
+	else
+		vm->txsubchans = V4L2_TUNER_SUB_MONO;
+
+	/* Report rds feature status */
+	if (sdev->rds_info.enabled)
+		vm->txsubchans |= V4L2_TUNER_SUB_RDS;
+	else
+		vm->txsubchans &= ~V4L2_TUNER_SUB_RDS;
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+exit:
+	return rval;
+}
+
+/* si4713_s_modulator - set modulator attributes */
+static int si4713_s_modulator(struct v4l2_subdev *sd, struct v4l2_modulator *vm)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int rval = 0;
+	u16 stereo, rds;
+	u32 p;
+
+	if (!sdev) {
+		rval = -ENODEV;
+		goto exit;
+	}
+
+	if (vm->index > 0) {
+		rval = -EINVAL;
+		goto exit;
+	}
+
+	/* Set audio mode: mono or stereo */
+	if (vm->txsubchans & V4L2_TUNER_SUB_STEREO)
+		stereo = 1;
+	else if (vm->txsubchans & V4L2_TUNER_SUB_MONO)
+		stereo = 0;
+	else
+		rval = -EINVAL;
+	if (rval < 0)
+		goto exit;
+
+	rds = !!(vm->txsubchans & V4L2_TUNER_SUB_RDS);
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		rval = si4713_read_property(sdev,
+						SI4713_TX_COMPONENT_ENABLE, &p);
+		if (rval < 0)
+			goto unlock;
+
+		p = set_bits(p, stereo, 1, 1 << 1);
+		p = set_bits(p, rds, 2, 1 << 2);
+
+		rval = si4713_write_property(sdev,
+						SI4713_TX_COMPONENT_ENABLE, p);
+		if (rval < 0)
+			goto unlock;
+	}
+
+	sdev->stereo = stereo;
+	sdev->rds_info.enabled = rds;
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+exit:
+	return rval;
+}
+
+/* si4713_g_frequency - get tuner or modulator radio frequency */
+static int si4713_g_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int rval = 0;
+
+	f->type = V4L2_TUNER_RADIO;
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		u16 freq;
+		u8 p, a, n;
+
+		rval = si4713_tx_tune_status(sdev, 0x00, &freq, &p, &a, &n);
+		if (rval < 0)
+			goto unlock;
+
+		sdev->frequency = freq;
+	}
+
+	f->frequency = si4713_to_v4l2(sdev->frequency);
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+/* si4713_s_frequency - set tuner or modulator radio frequency */
+static int si4713_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int rval = 0;
+	u16 frequency = v4l2_to_si4713(f->frequency);
+
+	/* Check frequency range */
+	if (frequency < FREQ_RANGE_LOW || frequency > FREQ_RANGE_HIGH)
+		return -EDOM;
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		rval = si4713_tx_tune_freq(sdev, frequency);
+		if (rval < 0)
+			goto unlock;
+		frequency = rval;
+		rval = 0;
+	}
+	sdev->frequency = frequency;
+	f->frequency = si4713_to_v4l2(frequency);
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+static const struct v4l2_subdev_tuner_ops si4713_subdev_tuner_ops = {
+	.g_frequency	= si4713_g_frequency,
+	.s_frequency	= si4713_s_frequency,
+	.g_modulator	= si4713_g_modulator,
+	.s_modulator	= si4713_s_modulator,
+};
+
+static const struct v4l2_subdev_ops si4713_subdev_ops = {
+	.core		= &si4713_subdev_core_ops,
+	.tuner		= &si4713_subdev_tuner_ops,
+};
+
+/*
+ * I2C driver interface
+ */
+/* si4713_probe - probe for the device */
+static int si4713_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct si4713_device *sdev;
+	int rval;
+
+	sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
+	if (!sdev) {
+		dev_err(&client->dev, "Failed to alloc video device.\n");
+		rval = -ENOMEM;
+		goto exit;
+	}
+
+	sdev->platform_data = client->dev.platform_data;
+	if (!sdev->platform_data) {
+		v4l2_err(&sdev->sd, "No platform data registered.\n");
+		rval = -ENODEV;
+		goto free_sdev;
+	}
+
+	v4l2_i2c_subdev_init(&sdev->sd, client, &si4713_subdev_ops);
+
+	mutex_init(&sdev->mutex);
+	init_completion(&sdev->work);
+
+	if (client->irq) {
+		rval = request_irq(client->irq,
+			si4713_handler, IRQF_TRIGGER_FALLING | IRQF_DISABLED,
+			client->name, sdev);
+		if (rval < 0) {
+			v4l2_err(&sdev->sd, "Could not request IRQ\n");
+			goto free_sdev;
+		}
+		v4l2_dbg(1, debug, &sdev->sd, "IRQ requested.\n");
+	} else {
+		v4l2_warn(&sdev->sd, "IRQ not configured. Using timeouts.\n");
+	}
+
+	rval = si4713_initialize(sdev);
+	if (rval < 0) {
+		v4l2_err(&sdev->sd, "Failed to probe device information.\n");
+		goto free_irq;
+	}
+
+	return 0;
+
+free_irq:
+	if (client->irq)
+		free_irq(client->irq, sdev);
+free_sdev:
+	kfree(sdev);
+exit:
+	return rval;
+}
+
+/* si4713_remove - remove the device */
+static int si4713_remove(struct i2c_client *client)
+{
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+	struct si4713_device *sdev = to_si4713_device(sd);
+
+	if (sdev->power_state)
+		si4713_set_power_state(sdev, POWER_DOWN);
+
+	if (client->irq > 0)
+		free_irq(client->irq, sdev);
+
+	v4l2_device_unregister_subdev(sd);
+
+	kfree(sdev);
+
+	return 0;
+}
+
+/* si4713_i2c_driver - i2c driver interface */
+static const struct i2c_device_id si4713_id[] = {
+	{ "si4713" , 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, si4713_id);
+
+static struct i2c_driver si4713_i2c_driver = {
+	.driver		= {
+		.name	= "si4713",
+	},
+	.probe		= si4713_probe,
+	.remove         = si4713_remove,
+	.id_table       = si4713_id,
+};
+
+/* Module Interface */
+static int __init si4713_module_init(void)
+{
+	return i2c_add_driver(&si4713_i2c_driver);
+}
+
+static void __exit si4713_module_exit(void)
+{
+	i2c_del_driver(&si4713_i2c_driver);
+}
+
+module_init(si4713_module_init);
+module_exit(si4713_module_exit);
+
diff --git a/drivers/media/radio/si4713-i2c.h b/drivers/media/radio/si4713-i2c.h
new file mode 100644
index 000000000000..faf8cff124f1
--- /dev/null
+++ b/drivers/media/radio/si4713-i2c.h
@@ -0,0 +1,237 @@
+/*
+ * drivers/media/radio/si4713-i2c.h
+ *
+ * Property and commands definitions for Si4713 radio transmitter chip.
+ *
+ * Copyright (c) 2008 Instituto Nokia de Tecnologia - INdT
+ * Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ */
+
+#ifndef SI4713_I2C_H
+#define SI4713_I2C_H
+
+#include <media/v4l2-subdev.h>
+#include <media/si4713.h>
+
+#define SI4713_PRODUCT_NUMBER		0x0D
+
+/* Command Timeouts */
+#define DEFAULT_TIMEOUT			500
+#define TIMEOUT_SET_PROPERTY		20
+#define TIMEOUT_TX_TUNE_POWER		30000
+#define TIMEOUT_TX_TUNE			110000
+#define TIMEOUT_POWER_UP		200000
+
+/*
+ * Command and its arguments definitions
+ */
+#define SI4713_PWUP_CTSIEN		(1<<7)
+#define SI4713_PWUP_GPO2OEN		(1<<6)
+#define SI4713_PWUP_PATCH		(1<<5)
+#define SI4713_PWUP_XOSCEN		(1<<4)
+#define SI4713_PWUP_FUNC_TX		0x02
+#define SI4713_PWUP_FUNC_PATCH		0x0F
+#define SI4713_PWUP_OPMOD_ANALOG	0x50
+#define SI4713_PWUP_OPMOD_DIGITAL	0x0F
+#define SI4713_PWUP_NARGS		2
+#define SI4713_PWUP_NRESP		1
+#define SI4713_CMD_POWER_UP		0x01
+
+#define SI4713_GETREV_NRESP		9
+#define SI4713_CMD_GET_REV		0x10
+
+#define SI4713_PWDN_NRESP		1
+#define SI4713_CMD_POWER_DOWN		0x11
+
+#define SI4713_SET_PROP_NARGS		5
+#define SI4713_SET_PROP_NRESP		1
+#define SI4713_CMD_SET_PROPERTY		0x12
+
+#define SI4713_GET_PROP_NARGS		3
+#define SI4713_GET_PROP_NRESP		4
+#define SI4713_CMD_GET_PROPERTY		0x13
+
+#define SI4713_GET_STATUS_NRESP		1
+#define SI4713_CMD_GET_INT_STATUS	0x14
+
+#define SI4713_CMD_PATCH_ARGS		0x15
+#define SI4713_CMD_PATCH_DATA		0x16
+
+#define SI4713_MAX_FREQ			10800
+#define SI4713_MIN_FREQ			7600
+#define SI4713_TXFREQ_NARGS		3
+#define SI4713_TXFREQ_NRESP		1
+#define SI4713_CMD_TX_TUNE_FREQ		0x30
+
+#define SI4713_MAX_POWER		120
+#define SI4713_MIN_POWER		88
+#define SI4713_MAX_ANTCAP		191
+#define SI4713_MIN_ANTCAP		0
+#define SI4713_TXPWR_NARGS		4
+#define SI4713_TXPWR_NRESP		1
+#define SI4713_CMD_TX_TUNE_POWER	0x31
+
+#define SI4713_TXMEA_NARGS		4
+#define SI4713_TXMEA_NRESP		1
+#define SI4713_CMD_TX_TUNE_MEASURE	0x32
+
+#define SI4713_INTACK_MASK		0x01
+#define SI4713_TXSTATUS_NARGS		1
+#define SI4713_TXSTATUS_NRESP		8
+#define SI4713_CMD_TX_TUNE_STATUS	0x33
+
+#define SI4713_OVERMOD_BIT		(1 << 2)
+#define SI4713_IALH_BIT			(1 << 1)
+#define SI4713_IALL_BIT			(1 << 0)
+#define SI4713_ASQSTATUS_NARGS		1
+#define SI4713_ASQSTATUS_NRESP		5
+#define SI4713_CMD_TX_ASQ_STATUS	0x34
+
+#define SI4713_RDSBUFF_MODE_MASK	0x87
+#define SI4713_RDSBUFF_NARGS		7
+#define SI4713_RDSBUFF_NRESP		6
+#define SI4713_CMD_TX_RDS_BUFF		0x35
+
+#define SI4713_RDSPS_PSID_MASK		0x1F
+#define SI4713_RDSPS_NARGS		5
+#define SI4713_RDSPS_NRESP		1
+#define SI4713_CMD_TX_RDS_PS		0x36
+
+#define SI4713_CMD_GPO_CTL		0x80
+#define SI4713_CMD_GPO_SET		0x81
+
+/*
+ * Bits from status response
+ */
+#define SI4713_CTS			(1<<7)
+#define SI4713_ERR			(1<<6)
+#define SI4713_RDS_INT			(1<<2)
+#define SI4713_ASQ_INT			(1<<1)
+#define SI4713_STC_INT			(1<<0)
+
+/*
+ * Property definitions
+ */
+#define SI4713_GPO_IEN			0x0001
+#define SI4713_DIG_INPUT_FORMAT		0x0101
+#define SI4713_DIG_INPUT_SAMPLE_RATE	0x0103
+#define SI4713_REFCLK_FREQ		0x0201
+#define SI4713_REFCLK_PRESCALE		0x0202
+#define SI4713_TX_COMPONENT_ENABLE	0x2100
+#define SI4713_TX_AUDIO_DEVIATION	0x2101
+#define SI4713_TX_PILOT_DEVIATION	0x2102
+#define SI4713_TX_RDS_DEVIATION		0x2103
+#define SI4713_TX_LINE_INPUT_LEVEL	0x2104
+#define SI4713_TX_LINE_INPUT_MUTE	0x2105
+#define SI4713_TX_PREEMPHASIS		0x2106
+#define SI4713_TX_PILOT_FREQUENCY	0x2107
+#define SI4713_TX_ACOMP_ENABLE		0x2200
+#define SI4713_TX_ACOMP_THRESHOLD	0x2201
+#define SI4713_TX_ACOMP_ATTACK_TIME	0x2202
+#define SI4713_TX_ACOMP_RELEASE_TIME	0x2203
+#define SI4713_TX_ACOMP_GAIN		0x2204
+#define SI4713_TX_LIMITER_RELEASE_TIME	0x2205
+#define SI4713_TX_ASQ_INTERRUPT_SOURCE	0x2300
+#define SI4713_TX_ASQ_LEVEL_LOW		0x2301
+#define SI4713_TX_ASQ_DURATION_LOW	0x2302
+#define SI4713_TX_ASQ_LEVEL_HIGH	0x2303
+#define SI4713_TX_ASQ_DURATION_HIGH	0x2304
+#define SI4713_TX_RDS_INTERRUPT_SOURCE	0x2C00
+#define SI4713_TX_RDS_PI		0x2C01
+#define SI4713_TX_RDS_PS_MIX		0x2C02
+#define SI4713_TX_RDS_PS_MISC		0x2C03
+#define SI4713_TX_RDS_PS_REPEAT_COUNT	0x2C04
+#define SI4713_TX_RDS_PS_MESSAGE_COUNT	0x2C05
+#define SI4713_TX_RDS_PS_AF		0x2C06
+#define SI4713_TX_RDS_FIFO_SIZE		0x2C07
+
+#define PREEMPHASIS_USA			75
+#define PREEMPHASIS_EU			50
+#define PREEMPHASIS_DISABLED		0
+#define FMPE_USA			0x00
+#define FMPE_EU				0x01
+#define FMPE_DISABLED			0x02
+
+#define POWER_UP			0x01
+#define POWER_DOWN			0x00
+
+struct rds_info {
+	u32 pi;
+#define MAX_RDS_PTY			31
+	u32 pty;
+#define MAX_RDS_DEVIATION		90000
+	u32 deviation;
+/*
+ * PSNAME is known to be defined as 8 character sized (RDS Spec).
+ * However, there is receivers which scroll PSNAME 8xN sized.
+ */
+#define MAX_RDS_PS_NAME			96
+	u8 ps_name[MAX_RDS_PS_NAME + 1];
+/*
+ * MAX_RDS_RADIO_TEXT is known to be defined as 32 (2A group) or 64 (2B group)
+ * character sized (RDS Spec).
+ * However, there is receivers which scroll them as well.
+ */
+#define MAX_RDS_RADIO_TEXT		384
+	u8 radio_text[MAX_RDS_RADIO_TEXT + 1];
+	u32 enabled;
+};
+
+struct limiter_info {
+#define MAX_LIMITER_RELEASE_TIME	102390
+	u32 release_time;
+#define MAX_LIMITER_DEVIATION		90000
+	u32 deviation;
+	u32 enabled;
+};
+
+struct pilot_info {
+#define MAX_PILOT_DEVIATION		90000
+	u32 deviation;
+#define MAX_PILOT_FREQUENCY		19000
+	u32 frequency;
+	u32 enabled;
+};
+
+struct acomp_info {
+#define MAX_ACOMP_RELEASE_TIME		1000000
+	u32 release_time;
+#define MAX_ACOMP_ATTACK_TIME		5000
+	u32 attack_time;
+#define MAX_ACOMP_THRESHOLD		0
+#define MIN_ACOMP_THRESHOLD		(-40)
+	s32 threshold;
+#define MAX_ACOMP_GAIN			20
+	u32 gain;
+	u32 enabled;
+};
+
+/*
+ * si4713_device - private data
+ */
+struct si4713_device {
+	/* v4l2_subdev and i2c reference (v4l2_subdev priv data) */
+	struct v4l2_subdev sd;
+	/* private data structures */
+	struct mutex mutex;
+	struct completion work;
+	struct si4713_platform_data *platform_data;
+	struct rds_info rds_info;
+	struct limiter_info limiter_info;
+	struct pilot_info pilot_info;
+	struct acomp_info acomp_info;
+	u32 frequency;
+	u32 preemphasis;
+	u32 mute;
+	u32 power_level;
+	u32 power_state;
+	u32 antenna_capacitor;
+	u32 stereo;
+	u32 tune_rnl;
+};
+#endif /* ifndef SI4713_I2C_H */
diff --git a/include/media/si4713.h b/include/media/si4713.h
new file mode 100644
index 000000000000..99850a54ed09
--- /dev/null
+++ b/include/media/si4713.h
@@ -0,0 +1,49 @@
+/*
+ * include/media/si4713.h
+ *
+ * Board related data definitions for Si4713 i2c device driver.
+ *
+ * Copyright (c) 2009 Nokia Corporation
+ * Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ */
+
+#ifndef SI4713_H
+#define SI4713_H
+
+/* The SI4713 I2C sensor chip has a fixed slave address of 0xc6 or 0x22. */
+#define SI4713_I2C_ADDR_BUSEN_HIGH	0x63
+#define SI4713_I2C_ADDR_BUSEN_LOW	0x11
+
+/*
+ * Platform dependent definition
+ */
+struct si4713_platform_data {
+	/* Set power state, zero is off, non-zero is on. */
+	int (*set_power)(int power);
+};
+
+/*
+ * Structure to query for Received Noise Level (RNL).
+ */
+struct si4713_rnl {
+	__u32 index;		/* modulator index */
+	__u32 frequency;	/* frequency to peform rnl measurement */
+	__s32 rnl;		/* result of measurement in dBuV */
+	__u32 reserved[4];	/* drivers and apps must init this to 0 */
+};
+
+/*
+ * This is the ioctl number to query for rnl. Users must pass a
+ * struct si4713_rnl pointer specifying desired frequency in 'frequency' field
+ * following driver capabilities (i.e V4L2_TUNER_CAP_LOW).
+ * Driver must return measured value in the same struture, filling 'rnl' field.
+ */
+#define SI4713_IOC_MEASURE_RNL	_IOWR('V', BASE_VIDIOC_PRIVATE + 0, \
+						struct si4713_rnl)
+
+#endif /* ifndef SI4713_H*/
-- 
cgit v1.2.3


From 6d888a66be1c50c2f5193c53d6ea556e01dd60e3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sun, 30 Aug 2009 13:05:56 -0300
Subject: V4L/DVB (12591): em28xx: Add entry for GADMEI UTV330+ and related IR
 keymap

[mchehab@redhat.com: Fix a few wrong IR keymaps]
Signed-off-by: Shine Liu <shinel@foxmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/CARDLIST.em28xx |  1 +
 drivers/media/common/ir-keymaps.c         | 44 +++++++++++++++++++++++++++++++
 drivers/media/video/em28xx/em28xx-cards.c | 22 ++++++++++++++++
 drivers/media/video/em28xx/em28xx.h       |  1 +
 include/media/ir-common.h                 |  1 +
 5 files changed, 69 insertions(+)

(limited to 'include')

diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx
index 66e5f829577c..b37eff3c9888 100644
--- a/Documentation/video4linux/CARDLIST.em28xx
+++ b/Documentation/video4linux/CARDLIST.em28xx
@@ -67,3 +67,4 @@
  69 -> KWorld ATSC 315U HDTV TV Box             (em2882)        [eb1a:a313]
  70 -> Evga inDtube                             (em2882)
  71 -> Silvercrest Webcam 1.3mpix               (em2820/em2840)
+ 72 -> Gadmei UTV330+                           (em2861)
diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c
index 0b8bfac14313..c93a5269f223 100644
--- a/drivers/media/common/ir-keymaps.c
+++ b/drivers/media/common/ir-keymaps.c
@@ -2826,3 +2826,47 @@ IR_KEYTAB_TYPE ir_codes_videomate_s350[IR_KEYTAB_SIZE] = {
 	[0x20] = KEY_TEXT,
 };
 EXPORT_SYMBOL_GPL(ir_codes_videomate_s350);
+
+
+/* GADMEI UTV330+ RM008Z remote
+   Shine Liu <shinel@foxmail.com>
+ */
+IR_KEYTAB_TYPE ir_codes_gadmei_rm008z[IR_KEYTAB_SIZE] = {
+	[0x14] = KEY_POWER2,		/* POWER OFF */
+	[0x0c] = KEY_MUTE,		/* MUTE */
+
+	[0x18] = KEY_TV,		/* TV */
+	[0x0e] = KEY_VIDEO,		/* AV */
+	[0x0b] = KEY_AUDIO,		/* SV */
+	[0x0f] = KEY_RADIO,		/* FM */
+
+	[0x00] = KEY_1,
+	[0x01] = KEY_2,
+	[0x02] = KEY_3,
+	[0x03] = KEY_4,
+	[0x04] = KEY_5,
+	[0x05] = KEY_6,
+	[0x06] = KEY_7,
+	[0x07] = KEY_8,
+	[0x08] = KEY_9,
+	[0x09] = KEY_0,
+	[0x0a] = KEY_INFO,		/* OSD */
+	[0x1c] = KEY_BACKSPACE,		/* LAST */
+
+	[0x0d] = KEY_PLAY,		/* PLAY */
+	[0x1e] = KEY_CAMERA,		/* SNAPSHOT */
+	[0x1a] = KEY_RECORD,		/* RECORD */
+	[0x17] = KEY_STOP,		/* STOP */
+
+	[0x1f] = KEY_UP,		/* UP */
+	[0x44] = KEY_DOWN,		/* DOWN */
+	[0x46] = KEY_TAB,		/* BACK */
+	[0x4a] = KEY_ZOOM,		/* FULLSECREEN */
+
+	[0x10] = KEY_VOLUMEUP,		/* VOLUMEUP */
+	[0x11] = KEY_VOLUMEDOWN,	/* VOLUMEDOWN */
+	[0x12] = KEY_CHANNELUP,		/* CHANNELUP */
+	[0x13] = KEY_CHANNELDOWN,	/* CHANNELDOWN */
+	[0x15] = KEY_ENTER,		/* OK */
+};
+EXPORT_SYMBOL_GPL(ir_codes_gadmei_rm008z);
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index 66c377683708..b184d482c497 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -558,6 +558,27 @@ struct em28xx_board em28xx_boards[] = {
 			.amux     = EM28XX_AMUX_LINE_IN,
 		} },
 	},
+	[EM2861_BOARD_GADMEI_UTV330PLUS] = {
+		.name         = "Gadmei UTV330+",
+		.tuner_type   = TUNER_TNF_5335MF,
+		.tda9887_conf = TDA9887_PRESENT,
+		.ir_codes     = ir_codes_gadmei_rm008z,
+		.decoder      = EM28XX_SAA711X,
+		.xclk         = EM28XX_XCLK_FREQUENCY_12MHZ,
+		.input        = { {
+			.type     = EM28XX_VMUX_TELEVISION,
+			.vmux     = SAA7115_COMPOSITE2,
+			.amux     = EM28XX_AMUX_VIDEO,
+		}, {
+			.type     = EM28XX_VMUX_COMPOSITE1,
+			.vmux     = SAA7115_COMPOSITE0,
+			.amux     = EM28XX_AMUX_LINE_IN,
+		}, {
+			.type     = EM28XX_VMUX_SVIDEO,
+			.vmux     = SAA7115_SVIDEO3,
+			.amux     = EM28XX_AMUX_LINE_IN,
+		} },
+	},
 	[EM2860_BOARD_TERRATEC_HYBRID_XS] = {
 		.name         = "Terratec Cinergy A Hybrid XS",
 		.valid        = EM28XX_BOARD_NOT_VALIDATED,
@@ -1676,6 +1697,7 @@ static struct em28xx_hash_table em28xx_i2c_hash[] = {
 	{0xf51200e3, EM2800_BOARD_VGEAR_POCKETTV, TUNER_LG_PAL_NEW_TAPC},
 	{0x1ba50080, EM2860_BOARD_SAA711X_REFERENCE_DESIGN, TUNER_ABSENT},
 	{0xc51200e3, EM2820_BOARD_GADMEI_TVR200, TUNER_LG_PAL_NEW_TAPC},
+	{0x4ba50080, EM2861_BOARD_GADMEI_UTV330PLUS, TUNER_TNF_5335MF},
 };
 
 /* I2C possible address to saa7115, tvp5150, msp3400, tvaudio */
diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h
index a2add61f7d59..23f34dd691e9 100644
--- a/drivers/media/video/em28xx/em28xx.h
+++ b/drivers/media/video/em28xx/em28xx.h
@@ -108,6 +108,7 @@
 #define EM2882_BOARD_KWORLD_ATSC_315U		  69
 #define EM2882_BOARD_EVGA_INDTUBE		  70
 #define EM2820_BOARD_SILVERCREST_WEBCAM           71
+#define EM2861_BOARD_GADMEI_UTV330PLUS           72
 
 /* Limits minimum and default number of buffers */
 #define EM28XX_MIN_BUF 4
diff --git a/include/media/ir-common.h b/include/media/ir-common.h
index 8a607db492a5..74a7e55734f0 100644
--- a/include/media/ir-common.h
+++ b/include/media/ir-common.h
@@ -165,6 +165,7 @@ extern IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_terratec_cinergy_xs[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_videomate_s350[IR_KEYTAB_SIZE];
+extern IR_KEYTAB_TYPE ir_codes_gadmei_rm008z[IR_KEYTAB_SIZE];
 
 #endif
 
-- 
cgit v1.2.3


From 715a223323c8c8bcbe7739e20f6c619f7343b595 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sat, 29 Aug 2009 14:15:55 -0300
Subject: V4L/DVB (12595): common/ir: use a struct for keycode tables

Currently, V4L uses a scancode table whose index is the scancode and
the value is the keycode. While this works, it has some drawbacks:

1) It requires that the scancode to be at the range 00-7f;

2) keycodes should be masked on 7 bits in order for it to work;

3) due to the 7 bits approach, sometimes it is not possible to replace
the default keyboard to another one with a different encoding rule;

4) it is different than what is done with dvb-usb approach;

5) it requires a typedef for it to work. This is not a recommended
Linux CodingStyle.

This patch is part of a larger series of IR changes. It basically
replaces the IR_KEYTAB_TYPE tables by a structured table:
struct ir_scancode {
       u16     scancode;
       u32     keycode;
};

This is very close to what dvb does. So, a further integration with DVB
code will be easy.

While we've changed the tables, for now, the IR keycode handling is still
based on the old approach.

The only notable effect is the redution of about 35% of the ir-common
module size:

   text    data     bss     dec     hex filename
   6721   29208       4   35933    8c5d old/ir-common.ko
   5756   18040       4   23800    5cf8 new/ir-common.ko

In thesis, we could be using above u8 for scancode, reducing even more the size
of the module, but defining it as u16 is more convenient, since, on dvb, each
scancode has up to 16 bits, and we currently have a few troubles with rc5, as their
scancodes are defined with more than 8 bits.

This patch itself shouldn't be doing any functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/ir-functions.c         |   15 +-
 drivers/media/common/ir-keymaps.c           | 5085 ++++++++++++++-------------
 drivers/media/dvb/dm1105/dm1105.c           |    2 +-
 drivers/media/dvb/ttpci/budget-ci.c         |    6 +-
 drivers/media/video/bt8xx/bttv-input.c      |   26 +-
 drivers/media/video/cx18/cx18-i2c.c         |    2 +-
 drivers/media/video/cx231xx/cx231xx.h       |    2 +-
 drivers/media/video/cx88/cx88-input.c       |   52 +-
 drivers/media/video/em28xx/em28xx-cards.c   |   26 +-
 drivers/media/video/em28xx/em28xx.h         |    2 +-
 drivers/media/video/ir-kbd-i2c.c            |   20 +-
 drivers/media/video/saa7134/saa7134-input.c |   66 +-
 include/media/ir-common.h                   |  141 +-
 include/media/ir-kbd-i2c.h                  |    5 +-
 14 files changed, 2871 insertions(+), 2579 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/ir-functions.c b/drivers/media/common/ir-functions.c
index 16792a68a449..655474b29e21 100644
--- a/drivers/media/common/ir-functions.c
+++ b/drivers/media/common/ir-functions.c
@@ -58,13 +58,24 @@ static void ir_input_key_event(struct input_dev *dev, struct ir_input_state *ir)
 /* -------------------------------------------------------------------------- */
 
 void ir_input_init(struct input_dev *dev, struct ir_input_state *ir,
-		   int ir_type, IR_KEYTAB_TYPE *ir_codes)
+		   int ir_type, struct ir_scancode_table *ir_codes)
 {
 	int i;
 
 	ir->ir_type = ir_type;
+
+	memset(ir->ir_codes, sizeof(ir->ir_codes), 0);
+
+	/*
+	 * FIXME: This is a temporary workaround to use the new IR tables
+	 * with the old approach. Later patches will replace this to a
+	 * proper method
+	 */
+
 	if (ir_codes)
-		memcpy(ir->ir_codes, ir_codes, sizeof(ir->ir_codes));
+		for (i = 0; i < ir_codes->size; i++)
+			if (ir_codes->scan[i].scancode < IR_KEYTAB_SIZE)
+				ir->ir_codes[ir_codes->scan[i].scancode] = ir_codes->scan[i].keycode;
 
 	dev->keycode     = ir->ir_codes;
 	dev->keycodesize = sizeof(IR_KEYTAB_TYPE);
diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c
index c93a5269f223..f6790172736a 100644
--- a/drivers/media/common/ir-keymaps.c
+++ b/drivers/media/common/ir-keymaps.c
@@ -29,562 +29,627 @@
 #include <media/ir-common.h>
 
 /* empty keytable, can be used as placeholder for not-yet created keytables */
-IR_KEYTAB_TYPE ir_codes_empty[IR_KEYTAB_SIZE] = {
-	[0x2a] = KEY_COFFEE,
+static struct ir_scancode ir_codes_empty[] = {
+	{ 0x2a, KEY_COFFEE },
 };
-EXPORT_SYMBOL_GPL(ir_codes_empty);
+
+struct ir_scancode_table ir_codes_empty_table = {
+	.scan = ir_codes_empty,
+	.size = ARRAY_SIZE(ir_codes_empty),
+};
+EXPORT_SYMBOL_GPL(ir_codes_empty_table);
 
 /* Michal Majchrowicz <mmajchrowicz@gmail.com> */
-IR_KEYTAB_TYPE ir_codes_proteus_2309[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_proteus_2309[] = {
 	/* numeric */
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x5c] = KEY_POWER,		/* power       */
-	[0x20] = KEY_ZOOM,		/* full screen */
-	[0x0f] = KEY_BACKSPACE,		/* recall      */
-	[0x1b] = KEY_ENTER,		/* mute        */
-	[0x41] = KEY_RECORD,		/* record      */
-	[0x43] = KEY_STOP,		/* stop        */
-	[0x16] = KEY_S,
-	[0x1a] = KEY_POWER2,		/* off         */
-	[0x2e] = KEY_RED,
-	[0x1f] = KEY_CHANNELDOWN,	/* channel -   */
-	[0x1c] = KEY_CHANNELUP,		/* channel +   */
-	[0x10] = KEY_VOLUMEDOWN,	/* volume -    */
-	[0x1e] = KEY_VOLUMEUP,		/* volume +    */
-	[0x14] = KEY_F1,
-};
-EXPORT_SYMBOL_GPL(ir_codes_proteus_2309);
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x5c, KEY_POWER },		/* power       */
+	{ 0x20, KEY_ZOOM },		/* full screen */
+	{ 0x0f, KEY_BACKSPACE },	/* recall      */
+	{ 0x1b, KEY_ENTER },		/* mute        */
+	{ 0x41, KEY_RECORD },		/* record      */
+	{ 0x43, KEY_STOP },		/* stop        */
+	{ 0x16, KEY_S },
+	{ 0x1a, KEY_POWER2 },		/* off         */
+	{ 0x2e, KEY_RED },
+	{ 0x1f, KEY_CHANNELDOWN },	/* channel -   */
+	{ 0x1c, KEY_CHANNELUP },	/* channel +   */
+	{ 0x10, KEY_VOLUMEDOWN },	/* volume -    */
+	{ 0x1e, KEY_VOLUMEUP },		/* volume +    */
+	{ 0x14, KEY_F1 },
+};
+
+struct ir_scancode_table ir_codes_proteus_2309_table = {
+	.scan = ir_codes_proteus_2309,
+	.size = ARRAY_SIZE(ir_codes_proteus_2309),
+};
+EXPORT_SYMBOL_GPL(ir_codes_proteus_2309_table);
 
 /* Matt Jesson <dvb@jesson.eclipse.co.uk */
-IR_KEYTAB_TYPE ir_codes_avermedia_dvbt[IR_KEYTAB_SIZE] = {
-	[0x28] = KEY_0,			/* '0' / 'enter' */
-	[0x22] = KEY_1,			/* '1' */
-	[0x12] = KEY_2,			/* '2' / 'up arrow' */
-	[0x32] = KEY_3,			/* '3' */
-	[0x24] = KEY_4,			/* '4' / 'left arrow' */
-	[0x14] = KEY_5,			/* '5' */
-	[0x34] = KEY_6,			/* '6' / 'right arrow' */
-	[0x26] = KEY_7,			/* '7' */
-	[0x16] = KEY_8,			/* '8' / 'down arrow' */
-	[0x36] = KEY_9,			/* '9' */
-
-	[0x20] = KEY_LIST,		/* 'source' */
-	[0x10] = KEY_TEXT,		/* 'teletext' */
-	[0x00] = KEY_POWER,		/* 'power' */
-	[0x04] = KEY_AUDIO,		/* 'audio' */
-	[0x06] = KEY_ZOOM,		/* 'full screen' */
-	[0x18] = KEY_VIDEO,		/* 'display' */
-	[0x38] = KEY_SEARCH,		/* 'loop' */
-	[0x08] = KEY_INFO,		/* 'preview' */
-	[0x2a] = KEY_REWIND,		/* 'backward <<' */
-	[0x1a] = KEY_FASTFORWARD,	/* 'forward >>' */
-	[0x3a] = KEY_RECORD,		/* 'capture' */
-	[0x0a] = KEY_MUTE,		/* 'mute' */
-	[0x2c] = KEY_RECORD,		/* 'record' */
-	[0x1c] = KEY_PAUSE,		/* 'pause' */
-	[0x3c] = KEY_STOP,		/* 'stop' */
-	[0x0c] = KEY_PLAY,		/* 'play' */
-	[0x2e] = KEY_RED,		/* 'red' */
-	[0x01] = KEY_BLUE,		/* 'blue' / 'cancel' */
-	[0x0e] = KEY_YELLOW,		/* 'yellow' / 'ok' */
-	[0x21] = KEY_GREEN,		/* 'green' */
-	[0x11] = KEY_CHANNELDOWN,	/* 'channel -' */
-	[0x31] = KEY_CHANNELUP,		/* 'channel +' */
-	[0x1e] = KEY_VOLUMEDOWN,	/* 'volume -' */
-	[0x3e] = KEY_VOLUMEUP,		/* 'volume +' */
-};
-EXPORT_SYMBOL_GPL(ir_codes_avermedia_dvbt);
+static struct ir_scancode ir_codes_avermedia_dvbt[] = {
+	{ 0x28, KEY_0 },		/* '0' / 'enter' */
+	{ 0x22, KEY_1 },		/* '1' */
+	{ 0x12, KEY_2 },		/* '2' / 'up arrow' */
+	{ 0x32, KEY_3 },		/* '3' */
+	{ 0x24, KEY_4 },		/* '4' / 'left arrow' */
+	{ 0x14, KEY_5 },		/* '5' */
+	{ 0x34, KEY_6 },		/* '6' / 'right arrow' */
+	{ 0x26, KEY_7 },		/* '7' */
+	{ 0x16, KEY_8 },		/* '8' / 'down arrow' */
+	{ 0x36, KEY_9 },		/* '9' */
+
+	{ 0x20, KEY_LIST },		/* 'source' */
+	{ 0x10, KEY_TEXT },		/* 'teletext' */
+	{ 0x00, KEY_POWER },		/* 'power' */
+	{ 0x04, KEY_AUDIO },		/* 'audio' */
+	{ 0x06, KEY_ZOOM },		/* 'full screen' */
+	{ 0x18, KEY_VIDEO },		/* 'display' */
+	{ 0x38, KEY_SEARCH },		/* 'loop' */
+	{ 0x08, KEY_INFO },		/* 'preview' */
+	{ 0x2a, KEY_REWIND },		/* 'backward <<' */
+	{ 0x1a, KEY_FASTFORWARD },	/* 'forward >>' */
+	{ 0x3a, KEY_RECORD },		/* 'capture' */
+	{ 0x0a, KEY_MUTE },		/* 'mute' */
+	{ 0x2c, KEY_RECORD },		/* 'record' */
+	{ 0x1c, KEY_PAUSE },		/* 'pause' */
+	{ 0x3c, KEY_STOP },		/* 'stop' */
+	{ 0x0c, KEY_PLAY },		/* 'play' */
+	{ 0x2e, KEY_RED },		/* 'red' */
+	{ 0x01, KEY_BLUE },		/* 'blue' / 'cancel' */
+	{ 0x0e, KEY_YELLOW },		/* 'yellow' / 'ok' */
+	{ 0x21, KEY_GREEN },		/* 'green' */
+	{ 0x11, KEY_CHANNELDOWN },	/* 'channel -' */
+	{ 0x31, KEY_CHANNELUP },	/* 'channel +' */
+	{ 0x1e, KEY_VOLUMEDOWN },	/* 'volume -' */
+	{ 0x3e, KEY_VOLUMEUP },		/* 'volume +' */
+};
+
+struct ir_scancode_table ir_codes_avermedia_dvbt_table = {
+	.scan = ir_codes_avermedia_dvbt,
+	.size = ARRAY_SIZE(ir_codes_avermedia_dvbt),
+};
+EXPORT_SYMBOL_GPL(ir_codes_avermedia_dvbt_table);
 
 /* Mauro Carvalho Chehab <mchehab@infradead.org> */
-IR_KEYTAB_TYPE ir_codes_avermedia_m135a[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_POWER2,
-	[0x2e] = KEY_DOT,		/* '.' */
-	[0x01] = KEY_MODE,		/* TV/FM */
-
-	[0x05] = KEY_1,
-	[0x06] = KEY_2,
-	[0x07] = KEY_3,
-	[0x09] = KEY_4,
-	[0x0a] = KEY_5,
-	[0x0b] = KEY_6,
-	[0x0d] = KEY_7,
-	[0x0e] = KEY_8,
-	[0x0f] = KEY_9,
-	[0x11] = KEY_0,
-
-	[0x13] = KEY_RIGHT,		/* -> */
-	[0x12] = KEY_LEFT,		/* <- */
-
-	[0x17] = KEY_SLEEP,		/* Capturar Imagem */
-	[0x10] = KEY_SHUFFLE,		/* Amostra */
+static struct ir_scancode ir_codes_avermedia_m135a[] = {
+	{ 0x00, KEY_POWER2 },
+	{ 0x2e, KEY_DOT },		/* '.' */
+	{ 0x01, KEY_MODE },		/* TV/FM */
+
+	{ 0x05, KEY_1 },
+	{ 0x06, KEY_2 },
+	{ 0x07, KEY_3 },
+	{ 0x09, KEY_4 },
+	{ 0x0a, KEY_5 },
+	{ 0x0b, KEY_6 },
+	{ 0x0d, KEY_7 },
+	{ 0x0e, KEY_8 },
+	{ 0x0f, KEY_9 },
+	{ 0x11, KEY_0 },
+
+	{ 0x13, KEY_RIGHT },		/* -> */
+	{ 0x12, KEY_LEFT },		/* <- */
+
+	{ 0x17, KEY_SLEEP },		/* Capturar Imagem */
+	{ 0x10, KEY_SHUFFLE },		/* Amostra */
 
 	/* FIXME: The keys bellow aren't ok */
 
-	[0x43] = KEY_CHANNELUP,
-	[0x42] = KEY_CHANNELDOWN,
-	[0x1f] = KEY_VOLUMEUP,
-	[0x1e] = KEY_VOLUMEDOWN,
-	[0x0c] = KEY_ENTER,
+	{ 0x43, KEY_CHANNELUP },
+	{ 0x42, KEY_CHANNELDOWN },
+	{ 0x1f, KEY_VOLUMEUP },
+	{ 0x1e, KEY_VOLUMEDOWN },
+	{ 0x0c, KEY_ENTER },
 
-	[0x14] = KEY_MUTE,
-	[0x08] = KEY_AUDIO,
+	{ 0x14, KEY_MUTE },
+	{ 0x08, KEY_AUDIO },
 
-	[0x03] = KEY_TEXT,
-	[0x04] = KEY_EPG,
-	[0x2b] = KEY_TV2,		/* TV2 */
+	{ 0x03, KEY_TEXT },
+	{ 0x04, KEY_EPG },
+	{ 0x2b, KEY_TV2 },		/* TV2 */
 
-	[0x1d] = KEY_RED,
-	[0x1c] = KEY_YELLOW,
-	[0x41] = KEY_GREEN,
-	[0x40] = KEY_BLUE,
+	{ 0x1d, KEY_RED },
+	{ 0x1c, KEY_YELLOW },
+	{ 0x41, KEY_GREEN },
+	{ 0x40, KEY_BLUE },
 
-	[0x1a] = KEY_PLAYPAUSE,
-	[0x19] = KEY_RECORD,
-	[0x18] = KEY_PLAY,
-	[0x1b] = KEY_STOP,
+	{ 0x1a, KEY_PLAYPAUSE },
+	{ 0x19, KEY_RECORD },
+	{ 0x18, KEY_PLAY },
+	{ 0x1b, KEY_STOP },
 };
-EXPORT_SYMBOL_GPL(ir_codes_avermedia_m135a);
+
+struct ir_scancode_table ir_codes_avermedia_m135a_table = {
+	.scan = ir_codes_avermedia_m135a,
+	.size = ARRAY_SIZE(ir_codes_avermedia_m135a),
+};
+EXPORT_SYMBOL_GPL(ir_codes_avermedia_m135a_table);
 
 /* Oldrich Jedlicka <oldium.pro@seznam.cz> */
-IR_KEYTAB_TYPE ir_codes_avermedia_cardbus[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_POWER,
-	[0x01] = KEY_TUNER,		/* TV/FM */
-	[0x03] = KEY_TEXT,		/* Teletext */
-	[0x04] = KEY_EPG,
-	[0x05] = KEY_1,
-	[0x06] = KEY_2,
-	[0x07] = KEY_3,
-	[0x08] = KEY_AUDIO,
-	[0x09] = KEY_4,
-	[0x0a] = KEY_5,
-	[0x0b] = KEY_6,
-	[0x0c] = KEY_ZOOM,		/* Full screen */
-	[0x0d] = KEY_7,
-	[0x0e] = KEY_8,
-	[0x0f] = KEY_9,
-	[0x10] = KEY_PAGEUP,		/* 16-CH PREV */
-	[0x11] = KEY_0,
-	[0x12] = KEY_INFO,
-	[0x13] = KEY_AGAIN,		/* CH RTN - channel return */
-	[0x14] = KEY_MUTE,
-	[0x15] = KEY_EDIT,		/* Autoscan */
-	[0x17] = KEY_SAVE,		/* Screenshot */
-	[0x18] = KEY_PLAYPAUSE,
-	[0x19] = KEY_RECORD,
-	[0x1a] = KEY_PLAY,
-	[0x1b] = KEY_STOP,
-	[0x1c] = KEY_FASTFORWARD,
-	[0x1d] = KEY_REWIND,
-	[0x1e] = KEY_VOLUMEDOWN,
-	[0x1f] = KEY_VOLUMEUP,
-	[0x22] = KEY_SLEEP,		/* Sleep */
-	[0x23] = KEY_ZOOM,		/* Aspect */
-	[0x26] = KEY_SCREEN,		/* Pos */
-	[0x27] = KEY_ANGLE,		/* Size */
-	[0x28] = KEY_SELECT,		/* Select */
-	[0x29] = KEY_BLUE,		/* Blue/Picture */
-	[0x2a] = KEY_BACKSPACE,		/* Back */
-	[0x2b] = KEY_MEDIA,		/* PIP (Picture-in-picture) */
-	[0x2c] = KEY_DOWN,
-	[0x2e] = KEY_DOT,
-	[0x2f] = KEY_TV,		/* Live TV */
-	[0x32] = KEY_LEFT,
-	[0x33] = KEY_CLEAR,		/* Clear */
-	[0x35] = KEY_RED,		/* Red/TV */
-	[0x36] = KEY_UP,
-	[0x37] = KEY_HOME,		/* Home */
-	[0x39] = KEY_GREEN,		/* Green/Video */
-	[0x3d] = KEY_YELLOW,		/* Yellow/Music */
-	[0x3e] = KEY_OK,		/* Ok */
-	[0x3f] = KEY_RIGHT,
-	[0x40] = KEY_NEXT,		/* Next */
-	[0x41] = KEY_PREVIOUS,		/* Previous */
-	[0x42] = KEY_CHANNELDOWN,	/* Channel down */
-	[0x43] = KEY_CHANNELUP,		/* Channel up */
-};
-EXPORT_SYMBOL_GPL(ir_codes_avermedia_cardbus);
+static struct ir_scancode ir_codes_avermedia_cardbus[] = {
+	{ 0x00, KEY_POWER },
+	{ 0x01, KEY_TUNER },		/* TV/FM */
+	{ 0x03, KEY_TEXT },		/* Teletext */
+	{ 0x04, KEY_EPG },
+	{ 0x05, KEY_1 },
+	{ 0x06, KEY_2 },
+	{ 0x07, KEY_3 },
+	{ 0x08, KEY_AUDIO },
+	{ 0x09, KEY_4 },
+	{ 0x0a, KEY_5 },
+	{ 0x0b, KEY_6 },
+	{ 0x0c, KEY_ZOOM },		/* Full screen */
+	{ 0x0d, KEY_7 },
+	{ 0x0e, KEY_8 },
+	{ 0x0f, KEY_9 },
+	{ 0x10, KEY_PAGEUP },		/* 16-CH PREV */
+	{ 0x11, KEY_0 },
+	{ 0x12, KEY_INFO },
+	{ 0x13, KEY_AGAIN },		/* CH RTN - channel return */
+	{ 0x14, KEY_MUTE },
+	{ 0x15, KEY_EDIT },		/* Autoscan */
+	{ 0x17, KEY_SAVE },		/* Screenshot */
+	{ 0x18, KEY_PLAYPAUSE },
+	{ 0x19, KEY_RECORD },
+	{ 0x1a, KEY_PLAY },
+	{ 0x1b, KEY_STOP },
+	{ 0x1c, KEY_FASTFORWARD },
+	{ 0x1d, KEY_REWIND },
+	{ 0x1e, KEY_VOLUMEDOWN },
+	{ 0x1f, KEY_VOLUMEUP },
+	{ 0x22, KEY_SLEEP },		/* Sleep */
+	{ 0x23, KEY_ZOOM },		/* Aspect */
+	{ 0x26, KEY_SCREEN },		/* Pos */
+	{ 0x27, KEY_ANGLE },		/* Size */
+	{ 0x28, KEY_SELECT },		/* Select */
+	{ 0x29, KEY_BLUE },		/* Blue/Picture */
+	{ 0x2a, KEY_BACKSPACE },	/* Back */
+	{ 0x2b, KEY_MEDIA },		/* PIP (Picture-in-picture) */
+	{ 0x2c, KEY_DOWN },
+	{ 0x2e, KEY_DOT },
+	{ 0x2f, KEY_TV },		/* Live TV */
+	{ 0x32, KEY_LEFT },
+	{ 0x33, KEY_CLEAR },		/* Clear */
+	{ 0x35, KEY_RED },		/* Red/TV */
+	{ 0x36, KEY_UP },
+	{ 0x37, KEY_HOME },		/* Home */
+	{ 0x39, KEY_GREEN },		/* Green/Video */
+	{ 0x3d, KEY_YELLOW },		/* Yellow/Music */
+	{ 0x3e, KEY_OK },		/* Ok */
+	{ 0x3f, KEY_RIGHT },
+	{ 0x40, KEY_NEXT },		/* Next */
+	{ 0x41, KEY_PREVIOUS },		/* Previous */
+	{ 0x42, KEY_CHANNELDOWN },	/* Channel down */
+	{ 0x43, KEY_CHANNELUP },	/* Channel up */
+};
+
+struct ir_scancode_table ir_codes_avermedia_cardbus_table = {
+	.scan = ir_codes_avermedia_cardbus,
+	.size = ARRAY_SIZE(ir_codes_avermedia_cardbus),
+};
+EXPORT_SYMBOL_GPL(ir_codes_avermedia_cardbus_table);
 
 /* Attila Kondoros <attila.kondoros@chello.hu> */
-IR_KEYTAB_TYPE ir_codes_apac_viewcomp[IR_KEYTAB_SIZE] = {
-
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-	[0x00] = KEY_0,
-	[0x17] = KEY_LAST,		/* +100 */
-	[0x0a] = KEY_LIST,		/* recall */
-
-
-	[0x1c] = KEY_TUNER,		/* TV/FM */
-	[0x15] = KEY_SEARCH,		/* scan */
-	[0x12] = KEY_POWER,		/* power */
-	[0x1f] = KEY_VOLUMEDOWN,	/* vol up */
-	[0x1b] = KEY_VOLUMEUP,		/* vol down */
-	[0x1e] = KEY_CHANNELDOWN,	/* chn up */
-	[0x1a] = KEY_CHANNELUP,		/* chn down */
-
-	[0x11] = KEY_VIDEO,		/* video */
-	[0x0f] = KEY_ZOOM,		/* full screen */
-	[0x13] = KEY_MUTE,		/* mute/unmute */
-	[0x10] = KEY_TEXT,		/* min */
-
-	[0x0d] = KEY_STOP,		/* freeze */
-	[0x0e] = KEY_RECORD,		/* record */
-	[0x1d] = KEY_PLAYPAUSE,		/* stop */
-	[0x19] = KEY_PLAY,		/* play */
-
-	[0x16] = KEY_GOTO,		/* osd */
-	[0x14] = KEY_REFRESH,		/* default */
-	[0x0c] = KEY_KPPLUS,		/* fine tune >>>> */
-	[0x18] = KEY_KPMINUS,		/* fine tune <<<< */
-};
-EXPORT_SYMBOL_GPL(ir_codes_apac_viewcomp);
+static struct ir_scancode ir_codes_apac_viewcomp[] = {
+
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+	{ 0x00, KEY_0 },
+	{ 0x17, KEY_LAST },		/* +100 */
+	{ 0x0a, KEY_LIST },		/* recall */
+
+
+	{ 0x1c, KEY_TUNER },		/* TV/FM */
+	{ 0x15, KEY_SEARCH },		/* scan */
+	{ 0x12, KEY_POWER },		/* power */
+	{ 0x1f, KEY_VOLUMEDOWN },	/* vol up */
+	{ 0x1b, KEY_VOLUMEUP },		/* vol down */
+	{ 0x1e, KEY_CHANNELDOWN },	/* chn up */
+	{ 0x1a, KEY_CHANNELUP },	/* chn down */
+
+	{ 0x11, KEY_VIDEO },		/* video */
+	{ 0x0f, KEY_ZOOM },		/* full screen */
+	{ 0x13, KEY_MUTE },		/* mute/unmute */
+	{ 0x10, KEY_TEXT },		/* min */
+
+	{ 0x0d, KEY_STOP },		/* freeze */
+	{ 0x0e, KEY_RECORD },		/* record */
+	{ 0x1d, KEY_PLAYPAUSE },	/* stop */
+	{ 0x19, KEY_PLAY },		/* play */
+
+	{ 0x16, KEY_GOTO },		/* osd */
+	{ 0x14, KEY_REFRESH },		/* default */
+	{ 0x0c, KEY_KPPLUS },		/* fine tune >>>> */
+	{ 0x18, KEY_KPMINUS },		/* fine tune <<<< */
+};
+
+struct ir_scancode_table ir_codes_apac_viewcomp_table = {
+	.scan = ir_codes_apac_viewcomp,
+	.size = ARRAY_SIZE(ir_codes_apac_viewcomp),
+};
+EXPORT_SYMBOL_GPL(ir_codes_apac_viewcomp_table);
 
 /* ---------------------------------------------------------------------- */
 
-IR_KEYTAB_TYPE ir_codes_pixelview[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_pixelview[] = {
 
-	[0x1e] = KEY_POWER,	/* power */
-	[0x07] = KEY_MEDIA,	/* source */
-	[0x1c] = KEY_SEARCH,	/* scan */
+	{ 0x1e, KEY_POWER },	/* power */
+	{ 0x07, KEY_MEDIA },	/* source */
+	{ 0x1c, KEY_SEARCH },	/* scan */
 
 
-	[0x03] = KEY_TUNER,		/* TV/FM */
+	{ 0x03, KEY_TUNER },		/* TV/FM */
 
-	[0x00] = KEY_RECORD,
-	[0x08] = KEY_STOP,
-	[0x11] = KEY_PLAY,
+	{ 0x00, KEY_RECORD },
+	{ 0x08, KEY_STOP },
+	{ 0x11, KEY_PLAY },
 
-	[0x1a] = KEY_PLAYPAUSE,		/* freeze */
-	[0x19] = KEY_ZOOM,		/* zoom */
-	[0x0f] = KEY_TEXT,		/* min */
+	{ 0x1a, KEY_PLAYPAUSE },	/* freeze */
+	{ 0x19, KEY_ZOOM },		/* zoom */
+	{ 0x0f, KEY_TEXT },		/* min */
 
-	[0x01] = KEY_1,
-	[0x0b] = KEY_2,
-	[0x1b] = KEY_3,
-	[0x05] = KEY_4,
-	[0x09] = KEY_5,
-	[0x15] = KEY_6,
-	[0x06] = KEY_7,
-	[0x0a] = KEY_8,
-	[0x12] = KEY_9,
-	[0x02] = KEY_0,
-	[0x10] = KEY_LAST,		/* +100 */
-	[0x13] = KEY_LIST,		/* recall */
+	{ 0x01, KEY_1 },
+	{ 0x0b, KEY_2 },
+	{ 0x1b, KEY_3 },
+	{ 0x05, KEY_4 },
+	{ 0x09, KEY_5 },
+	{ 0x15, KEY_6 },
+	{ 0x06, KEY_7 },
+	{ 0x0a, KEY_8 },
+	{ 0x12, KEY_9 },
+	{ 0x02, KEY_0 },
+	{ 0x10, KEY_LAST },		/* +100 */
+	{ 0x13, KEY_LIST },		/* recall */
 
-	[0x1f] = KEY_CHANNELUP,		/* chn down */
-	[0x17] = KEY_CHANNELDOWN,	/* chn up */
-	[0x16] = KEY_VOLUMEUP,		/* vol down */
-	[0x14] = KEY_VOLUMEDOWN,	/* vol up */
+	{ 0x1f, KEY_CHANNELUP },	/* chn down */
+	{ 0x17, KEY_CHANNELDOWN },	/* chn up */
+	{ 0x16, KEY_VOLUMEUP },		/* vol down */
+	{ 0x14, KEY_VOLUMEDOWN },	/* vol up */
 
-	[0x04] = KEY_KPMINUS,		/* <<< */
-	[0x0e] = KEY_SETUP,		/* function */
-	[0x0c] = KEY_KPPLUS,		/* >>> */
+	{ 0x04, KEY_KPMINUS },		/* <<< */
+	{ 0x0e, KEY_SETUP },		/* function */
+	{ 0x0c, KEY_KPPLUS },		/* >>> */
 
-	[0x0d] = KEY_GOTO,		/* mts */
-	[0x1d] = KEY_REFRESH,		/* reset */
-	[0x18] = KEY_MUTE,		/* mute/unmute */
+	{ 0x0d, KEY_GOTO },		/* mts */
+	{ 0x1d, KEY_REFRESH },		/* reset */
+	{ 0x18, KEY_MUTE },		/* mute/unmute */
 };
-EXPORT_SYMBOL_GPL(ir_codes_pixelview);
+
+struct ir_scancode_table ir_codes_pixelview_table = {
+	.scan = ir_codes_pixelview,
+	.size = ARRAY_SIZE(ir_codes_pixelview),
+};
+EXPORT_SYMBOL_GPL(ir_codes_pixelview_table);
 
 /*
    Mauro Carvalho Chehab <mchehab@infradead.org>
    present on PV MPEG 8000GT
  */
-IR_KEYTAB_TYPE ir_codes_pixelview_new[IR_KEYTAB_SIZE] = {
-	[0x3c] = KEY_TIME,		/* Timeshift */
-	[0x12] = KEY_POWER,
-
-	[0x3d] = KEY_1,
-	[0x38] = KEY_2,
-	[0x18] = KEY_3,
-	[0x35] = KEY_4,
-	[0x39] = KEY_5,
-	[0x15] = KEY_6,
-	[0x36] = KEY_7,
-	[0x3a] = KEY_8,
-	[0x1e] = KEY_9,
-	[0x3e] = KEY_0,
-
-	[0x1c] = KEY_AGAIN,		/* LOOP	*/
-	[0x3f] = KEY_MEDIA,		/* Source */
-	[0x1f] = KEY_LAST,		/* +100 */
-	[0x1b] = KEY_MUTE,
-
-	[0x17] = KEY_CHANNELDOWN,
-	[0x16] = KEY_CHANNELUP,
-	[0x10] = KEY_VOLUMEUP,
-	[0x14] = KEY_VOLUMEDOWN,
-	[0x13] = KEY_ZOOM,
-
-	[0x19] = KEY_CAMERA,		/* SNAPSHOT */
-	[0x1a] = KEY_SEARCH,		/* scan */
-
-	[0x37] = KEY_REWIND,		/* << */
-	[0x32] = KEY_RECORD,		/* o (red) */
-	[0x33] = KEY_FORWARD,		/* >> */
-	[0x11] = KEY_STOP,		/* square */
-	[0x3b] = KEY_PLAY,		/* > */
-	[0x30] = KEY_PLAYPAUSE,		/* || */
-
-	[0x31] = KEY_TV,
-	[0x34] = KEY_RADIO,
-};
-EXPORT_SYMBOL_GPL(ir_codes_pixelview_new);
-
-IR_KEYTAB_TYPE ir_codes_nebula[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-	[0x0a] = KEY_TV,
-	[0x0b] = KEY_AUX,
-	[0x0c] = KEY_DVD,
-	[0x0d] = KEY_POWER,
-	[0x0e] = KEY_MHP,	/* labelled 'Picture' */
-	[0x0f] = KEY_AUDIO,
-	[0x10] = KEY_INFO,
-	[0x11] = KEY_F13,	/* 16:9 */
-	[0x12] = KEY_F14,	/* 14:9 */
-	[0x13] = KEY_EPG,
-	[0x14] = KEY_EXIT,
-	[0x15] = KEY_MENU,
-	[0x16] = KEY_UP,
-	[0x17] = KEY_DOWN,
-	[0x18] = KEY_LEFT,
-	[0x19] = KEY_RIGHT,
-	[0x1a] = KEY_ENTER,
-	[0x1b] = KEY_CHANNELUP,
-	[0x1c] = KEY_CHANNELDOWN,
-	[0x1d] = KEY_VOLUMEUP,
-	[0x1e] = KEY_VOLUMEDOWN,
-	[0x1f] = KEY_RED,
-	[0x20] = KEY_GREEN,
-	[0x21] = KEY_YELLOW,
-	[0x22] = KEY_BLUE,
-	[0x23] = KEY_SUBTITLE,
-	[0x24] = KEY_F15,	/* AD */
-	[0x25] = KEY_TEXT,
-	[0x26] = KEY_MUTE,
-	[0x27] = KEY_REWIND,
-	[0x28] = KEY_STOP,
-	[0x29] = KEY_PLAY,
-	[0x2a] = KEY_FASTFORWARD,
-	[0x2b] = KEY_F16,	/* chapter */
-	[0x2c] = KEY_PAUSE,
-	[0x2d] = KEY_PLAY,
-	[0x2e] = KEY_RECORD,
-	[0x2f] = KEY_F17,	/* picture in picture */
-	[0x30] = KEY_KPPLUS,	/* zoom in */
-	[0x31] = KEY_KPMINUS,	/* zoom out */
-	[0x32] = KEY_F18,	/* capture */
-	[0x33] = KEY_F19,	/* web */
-	[0x34] = KEY_EMAIL,
-	[0x35] = KEY_PHONE,
-	[0x36] = KEY_PC,
-};
-EXPORT_SYMBOL_GPL(ir_codes_nebula);
+static struct ir_scancode ir_codes_pixelview_new[] = {
+	{ 0x3c, KEY_TIME },		/* Timeshift */
+	{ 0x12, KEY_POWER },
+
+	{ 0x3d, KEY_1 },
+	{ 0x38, KEY_2 },
+	{ 0x18, KEY_3 },
+	{ 0x35, KEY_4 },
+	{ 0x39, KEY_5 },
+	{ 0x15, KEY_6 },
+	{ 0x36, KEY_7 },
+	{ 0x3a, KEY_8 },
+	{ 0x1e, KEY_9 },
+	{ 0x3e, KEY_0 },
+
+	{ 0x1c, KEY_AGAIN },		/* LOOP	*/
+	{ 0x3f, KEY_MEDIA },		/* Source */
+	{ 0x1f, KEY_LAST },		/* +100 */
+	{ 0x1b, KEY_MUTE },
+
+	{ 0x17, KEY_CHANNELDOWN },
+	{ 0x16, KEY_CHANNELUP },
+	{ 0x10, KEY_VOLUMEUP },
+	{ 0x14, KEY_VOLUMEDOWN },
+	{ 0x13, KEY_ZOOM },
+
+	{ 0x19, KEY_CAMERA },		/* SNAPSHOT */
+	{ 0x1a, KEY_SEARCH },		/* scan */
+
+	{ 0x37, KEY_REWIND },		/* << */
+	{ 0x32, KEY_RECORD },		/* o (red) */
+	{ 0x33, KEY_FORWARD },		/* >> */
+	{ 0x11, KEY_STOP },		/* square */
+	{ 0x3b, KEY_PLAY },		/* > */
+	{ 0x30, KEY_PLAYPAUSE },	/* || */
+
+	{ 0x31, KEY_TV },
+	{ 0x34, KEY_RADIO },
+};
+
+struct ir_scancode_table ir_codes_pixelview_new_table = {
+	.scan = ir_codes_pixelview_new,
+	.size = ARRAY_SIZE(ir_codes_pixelview_new),
+};
+EXPORT_SYMBOL_GPL(ir_codes_pixelview_new_table);
+
+static struct ir_scancode ir_codes_nebula[] = {
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+	{ 0x0a, KEY_TV },
+	{ 0x0b, KEY_AUX },
+	{ 0x0c, KEY_DVD },
+	{ 0x0d, KEY_POWER },
+	{ 0x0e, KEY_MHP },	/* labelled 'Picture' */
+	{ 0x0f, KEY_AUDIO },
+	{ 0x10, KEY_INFO },
+	{ 0x11, KEY_F13 },	/* 16:9 */
+	{ 0x12, KEY_F14 },	/* 14:9 */
+	{ 0x13, KEY_EPG },
+	{ 0x14, KEY_EXIT },
+	{ 0x15, KEY_MENU },
+	{ 0x16, KEY_UP },
+	{ 0x17, KEY_DOWN },
+	{ 0x18, KEY_LEFT },
+	{ 0x19, KEY_RIGHT },
+	{ 0x1a, KEY_ENTER },
+	{ 0x1b, KEY_CHANNELUP },
+	{ 0x1c, KEY_CHANNELDOWN },
+	{ 0x1d, KEY_VOLUMEUP },
+	{ 0x1e, KEY_VOLUMEDOWN },
+	{ 0x1f, KEY_RED },
+	{ 0x20, KEY_GREEN },
+	{ 0x21, KEY_YELLOW },
+	{ 0x22, KEY_BLUE },
+	{ 0x23, KEY_SUBTITLE },
+	{ 0x24, KEY_F15 },	/* AD */
+	{ 0x25, KEY_TEXT },
+	{ 0x26, KEY_MUTE },
+	{ 0x27, KEY_REWIND },
+	{ 0x28, KEY_STOP },
+	{ 0x29, KEY_PLAY },
+	{ 0x2a, KEY_FASTFORWARD },
+	{ 0x2b, KEY_F16 },	/* chapter */
+	{ 0x2c, KEY_PAUSE },
+	{ 0x2d, KEY_PLAY },
+	{ 0x2e, KEY_RECORD },
+	{ 0x2f, KEY_F17 },	/* picture in picture */
+	{ 0x30, KEY_KPPLUS },	/* zoom in */
+	{ 0x31, KEY_KPMINUS },	/* zoom out */
+	{ 0x32, KEY_F18 },	/* capture */
+	{ 0x33, KEY_F19 },	/* web */
+	{ 0x34, KEY_EMAIL },
+	{ 0x35, KEY_PHONE },
+	{ 0x36, KEY_PC },
+};
+
+struct ir_scancode_table ir_codes_nebula_table = {
+	.scan = ir_codes_nebula,
+	.size = ARRAY_SIZE(ir_codes_nebula),
+};
+EXPORT_SYMBOL_GPL(ir_codes_nebula_table);
 
 /* DigitalNow DNTV Live DVB-T Remote */
-IR_KEYTAB_TYPE ir_codes_dntv_live_dvb_t[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_ESC,		/* 'go up a level?' */
+static struct ir_scancode ir_codes_dntv_live_dvb_t[] = {
+	{ 0x00, KEY_ESC },		/* 'go up a level?' */
 	/* Keys 0 to 9 */
-	[0x0a] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x0b] = KEY_TUNER,		/* tv/fm */
-	[0x0c] = KEY_SEARCH,		/* scan */
-	[0x0d] = KEY_STOP,
-	[0x0e] = KEY_PAUSE,
-	[0x0f] = KEY_LIST,		/* source */
-
-	[0x10] = KEY_MUTE,
-	[0x11] = KEY_REWIND,		/* backward << */
-	[0x12] = KEY_POWER,
-	[0x13] = KEY_CAMERA,		/* snap */
-	[0x14] = KEY_AUDIO,		/* stereo */
-	[0x15] = KEY_CLEAR,		/* reset */
-	[0x16] = KEY_PLAY,
-	[0x17] = KEY_ENTER,
-	[0x18] = KEY_ZOOM,		/* full screen */
-	[0x19] = KEY_FASTFORWARD,	/* forward >> */
-	[0x1a] = KEY_CHANNELUP,
-	[0x1b] = KEY_VOLUMEUP,
-	[0x1c] = KEY_INFO,		/* preview */
-	[0x1d] = KEY_RECORD,		/* record */
-	[0x1e] = KEY_CHANNELDOWN,
-	[0x1f] = KEY_VOLUMEDOWN,
-};
-EXPORT_SYMBOL_GPL(ir_codes_dntv_live_dvb_t);
+	{ 0x0a, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x0b, KEY_TUNER },		/* tv/fm */
+	{ 0x0c, KEY_SEARCH },		/* scan */
+	{ 0x0d, KEY_STOP },
+	{ 0x0e, KEY_PAUSE },
+	{ 0x0f, KEY_LIST },		/* source */
+
+	{ 0x10, KEY_MUTE },
+	{ 0x11, KEY_REWIND },		/* backward << */
+	{ 0x12, KEY_POWER },
+	{ 0x13, KEY_CAMERA },		/* snap */
+	{ 0x14, KEY_AUDIO },		/* stereo */
+	{ 0x15, KEY_CLEAR },		/* reset */
+	{ 0x16, KEY_PLAY },
+	{ 0x17, KEY_ENTER },
+	{ 0x18, KEY_ZOOM },		/* full screen */
+	{ 0x19, KEY_FASTFORWARD },	/* forward >> */
+	{ 0x1a, KEY_CHANNELUP },
+	{ 0x1b, KEY_VOLUMEUP },
+	{ 0x1c, KEY_INFO },		/* preview */
+	{ 0x1d, KEY_RECORD },		/* record */
+	{ 0x1e, KEY_CHANNELDOWN },
+	{ 0x1f, KEY_VOLUMEDOWN },
+};
+
+struct ir_scancode_table ir_codes_dntv_live_dvb_t_table = {
+	.scan = ir_codes_dntv_live_dvb_t,
+	.size = ARRAY_SIZE(ir_codes_dntv_live_dvb_t),
+};
+EXPORT_SYMBOL_GPL(ir_codes_dntv_live_dvb_t_table);
 
 /* ---------------------------------------------------------------------- */
 
 /* IO-DATA BCTV7E Remote */
-IR_KEYTAB_TYPE ir_codes_iodata_bctv7e[IR_KEYTAB_SIZE] = {
-	[0x40] = KEY_TV,
-	[0x20] = KEY_RADIO,		/* FM */
-	[0x60] = KEY_EPG,
-	[0x00] = KEY_POWER,
+static struct ir_scancode ir_codes_iodata_bctv7e[] = {
+	{ 0x40, KEY_TV },
+	{ 0x20, KEY_RADIO },		/* FM */
+	{ 0x60, KEY_EPG },
+	{ 0x00, KEY_POWER },
 
 	/* Keys 0 to 9 */
-	[0x44] = KEY_0,			/* 10 */
-	[0x50] = KEY_1,
-	[0x30] = KEY_2,
-	[0x70] = KEY_3,
-	[0x48] = KEY_4,
-	[0x28] = KEY_5,
-	[0x68] = KEY_6,
-	[0x58] = KEY_7,
-	[0x38] = KEY_8,
-	[0x78] = KEY_9,
-
-	[0x10] = KEY_L,			/* Live */
-	[0x08] = KEY_TIME,		/* Time Shift */
-
-	[0x18] = KEY_PLAYPAUSE,		/* Play */
-
-	[0x24] = KEY_ENTER,		/* 11 */
-	[0x64] = KEY_ESC,		/* 12 */
-	[0x04] = KEY_M,			/* Multi */
-
-	[0x54] = KEY_VIDEO,
-	[0x34] = KEY_CHANNELUP,
-	[0x74] = KEY_VOLUMEUP,
-	[0x14] = KEY_MUTE,
-
-	[0x4c] = KEY_VCR,		/* SVIDEO */
-	[0x2c] = KEY_CHANNELDOWN,
-	[0x6c] = KEY_VOLUMEDOWN,
-	[0x0c] = KEY_ZOOM,
-
-	[0x5c] = KEY_PAUSE,
-	[0x3c] = KEY_RED,		/* || (red) */
-	[0x7c] = KEY_RECORD,		/* recording */
-	[0x1c] = KEY_STOP,
-
-	[0x41] = KEY_REWIND,		/* backward << */
-	[0x21] = KEY_PLAY,
-	[0x61] = KEY_FASTFORWARD,	/* forward >> */
-	[0x01] = KEY_NEXT,		/* skip >| */
-};
-EXPORT_SYMBOL_GPL(ir_codes_iodata_bctv7e);
+	{ 0x44, KEY_0 },		/* 10 */
+	{ 0x50, KEY_1 },
+	{ 0x30, KEY_2 },
+	{ 0x70, KEY_3 },
+	{ 0x48, KEY_4 },
+	{ 0x28, KEY_5 },
+	{ 0x68, KEY_6 },
+	{ 0x58, KEY_7 },
+	{ 0x38, KEY_8 },
+	{ 0x78, KEY_9 },
+
+	{ 0x10, KEY_L },		/* Live */
+	{ 0x08, KEY_TIME },		/* Time Shift */
+
+	{ 0x18, KEY_PLAYPAUSE },	/* Play */
+
+	{ 0x24, KEY_ENTER },		/* 11 */
+	{ 0x64, KEY_ESC },		/* 12 */
+	{ 0x04, KEY_M },		/* Multi */
+
+	{ 0x54, KEY_VIDEO },
+	{ 0x34, KEY_CHANNELUP },
+	{ 0x74, KEY_VOLUMEUP },
+	{ 0x14, KEY_MUTE },
+
+	{ 0x4c, KEY_VCR },		/* SVIDEO */
+	{ 0x2c, KEY_CHANNELDOWN },
+	{ 0x6c, KEY_VOLUMEDOWN },
+	{ 0x0c, KEY_ZOOM },
+
+	{ 0x5c, KEY_PAUSE },
+	{ 0x3c, KEY_RED },		/* || (red) */
+	{ 0x7c, KEY_RECORD },		/* recording */
+	{ 0x1c, KEY_STOP },
+
+	{ 0x41, KEY_REWIND },		/* backward << */
+	{ 0x21, KEY_PLAY },
+	{ 0x61, KEY_FASTFORWARD },	/* forward >> */
+	{ 0x01, KEY_NEXT },		/* skip >| */
+};
+
+struct ir_scancode_table ir_codes_iodata_bctv7e_table = {
+	.scan = ir_codes_iodata_bctv7e,
+	.size = ARRAY_SIZE(ir_codes_iodata_bctv7e),
+};
+EXPORT_SYMBOL_GPL(ir_codes_iodata_bctv7e_table);
 
 /* ---------------------------------------------------------------------- */
 
 /* ADS Tech Instant TV DVB-T PCI Remote */
-IR_KEYTAB_TYPE ir_codes_adstech_dvb_t_pci[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_adstech_dvb_t_pci[] = {
 	/* Keys 0 to 9 */
-	[0x4d] = KEY_0,
-	[0x57] = KEY_1,
-	[0x4f] = KEY_2,
-	[0x53] = KEY_3,
-	[0x56] = KEY_4,
-	[0x4e] = KEY_5,
-	[0x5e] = KEY_6,
-	[0x54] = KEY_7,
-	[0x4c] = KEY_8,
-	[0x5c] = KEY_9,
-
-	[0x5b] = KEY_POWER,
-	[0x5f] = KEY_MUTE,
-	[0x55] = KEY_GOTO,
-	[0x5d] = KEY_SEARCH,
-	[0x17] = KEY_EPG,		/* Guide */
-	[0x1f] = KEY_MENU,
-	[0x0f] = KEY_UP,
-	[0x46] = KEY_DOWN,
-	[0x16] = KEY_LEFT,
-	[0x1e] = KEY_RIGHT,
-	[0x0e] = KEY_SELECT,		/* Enter */
-	[0x5a] = KEY_INFO,
-	[0x52] = KEY_EXIT,
-	[0x59] = KEY_PREVIOUS,
-	[0x51] = KEY_NEXT,
-	[0x58] = KEY_REWIND,
-	[0x50] = KEY_FORWARD,
-	[0x44] = KEY_PLAYPAUSE,
-	[0x07] = KEY_STOP,
-	[0x1b] = KEY_RECORD,
-	[0x13] = KEY_TUNER,		/* Live */
-	[0x0a] = KEY_A,
-	[0x12] = KEY_B,
-	[0x03] = KEY_PROG1,		/* 1 */
-	[0x01] = KEY_PROG2,		/* 2 */
-	[0x00] = KEY_PROG3,		/* 3 */
-	[0x06] = KEY_DVD,
-	[0x48] = KEY_AUX,		/* Photo */
-	[0x40] = KEY_VIDEO,
-	[0x19] = KEY_AUDIO,		/* Music */
-	[0x0b] = KEY_CHANNELUP,
-	[0x08] = KEY_CHANNELDOWN,
-	[0x15] = KEY_VOLUMEUP,
-	[0x1c] = KEY_VOLUMEDOWN,
-};
-EXPORT_SYMBOL_GPL(ir_codes_adstech_dvb_t_pci);
+	{ 0x4d, KEY_0 },
+	{ 0x57, KEY_1 },
+	{ 0x4f, KEY_2 },
+	{ 0x53, KEY_3 },
+	{ 0x56, KEY_4 },
+	{ 0x4e, KEY_5 },
+	{ 0x5e, KEY_6 },
+	{ 0x54, KEY_7 },
+	{ 0x4c, KEY_8 },
+	{ 0x5c, KEY_9 },
+
+	{ 0x5b, KEY_POWER },
+	{ 0x5f, KEY_MUTE },
+	{ 0x55, KEY_GOTO },
+	{ 0x5d, KEY_SEARCH },
+	{ 0x17, KEY_EPG },		/* Guide */
+	{ 0x1f, KEY_MENU },
+	{ 0x0f, KEY_UP },
+	{ 0x46, KEY_DOWN },
+	{ 0x16, KEY_LEFT },
+	{ 0x1e, KEY_RIGHT },
+	{ 0x0e, KEY_SELECT },		/* Enter */
+	{ 0x5a, KEY_INFO },
+	{ 0x52, KEY_EXIT },
+	{ 0x59, KEY_PREVIOUS },
+	{ 0x51, KEY_NEXT },
+	{ 0x58, KEY_REWIND },
+	{ 0x50, KEY_FORWARD },
+	{ 0x44, KEY_PLAYPAUSE },
+	{ 0x07, KEY_STOP },
+	{ 0x1b, KEY_RECORD },
+	{ 0x13, KEY_TUNER },		/* Live */
+	{ 0x0a, KEY_A },
+	{ 0x12, KEY_B },
+	{ 0x03, KEY_PROG1 },		/* 1 */
+	{ 0x01, KEY_PROG2 },		/* 2 */
+	{ 0x00, KEY_PROG3 },		/* 3 */
+	{ 0x06, KEY_DVD },
+	{ 0x48, KEY_AUX },		/* Photo */
+	{ 0x40, KEY_VIDEO },
+	{ 0x19, KEY_AUDIO },		/* Music */
+	{ 0x0b, KEY_CHANNELUP },
+	{ 0x08, KEY_CHANNELDOWN },
+	{ 0x15, KEY_VOLUMEUP },
+	{ 0x1c, KEY_VOLUMEDOWN },
+};
+
+struct ir_scancode_table ir_codes_adstech_dvb_t_pci_table = {
+	.scan = ir_codes_adstech_dvb_t_pci,
+	.size = ARRAY_SIZE(ir_codes_adstech_dvb_t_pci),
+};
+EXPORT_SYMBOL_GPL(ir_codes_adstech_dvb_t_pci_table);
 
 /* ---------------------------------------------------------------------- */
 
 /* MSI TV@nywhere MASTER remote */
 
-IR_KEYTAB_TYPE ir_codes_msi_tvanywhere[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_msi_tvanywhere[] = {
 	/* Keys 0 to 9 */
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x0c] = KEY_MUTE,
-	[0x0f] = KEY_SCREEN,		/* Full Screen */
-	[0x10] = KEY_FN,		/* Funtion */
-	[0x11] = KEY_TIME,		/* Time shift */
-	[0x12] = KEY_POWER,
-	[0x13] = KEY_MEDIA,		/* MTS */
-	[0x14] = KEY_SLOW,
-	[0x16] = KEY_REWIND,		/* backward << */
-	[0x17] = KEY_ENTER,		/* Return */
-	[0x18] = KEY_FASTFORWARD,	/* forward >> */
-	[0x1a] = KEY_CHANNELUP,
-	[0x1b] = KEY_VOLUMEUP,
-	[0x1e] = KEY_CHANNELDOWN,
-	[0x1f] = KEY_VOLUMEDOWN,
-};
-EXPORT_SYMBOL_GPL(ir_codes_msi_tvanywhere);
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x0c, KEY_MUTE },
+	{ 0x0f, KEY_SCREEN },		/* Full Screen */
+	{ 0x10, KEY_FN },		/* Funtion */
+	{ 0x11, KEY_TIME },		/* Time shift */
+	{ 0x12, KEY_POWER },
+	{ 0x13, KEY_MEDIA },		/* MTS */
+	{ 0x14, KEY_SLOW },
+	{ 0x16, KEY_REWIND },		/* backward << */
+	{ 0x17, KEY_ENTER },		/* Return */
+	{ 0x18, KEY_FASTFORWARD },	/* forward >> */
+	{ 0x1a, KEY_CHANNELUP },
+	{ 0x1b, KEY_VOLUMEUP },
+	{ 0x1e, KEY_CHANNELDOWN },
+	{ 0x1f, KEY_VOLUMEDOWN },
+};
+
+struct ir_scancode_table ir_codes_msi_tvanywhere_table = {
+	.scan = ir_codes_msi_tvanywhere,
+	.size = ARRAY_SIZE(ir_codes_msi_tvanywhere),
+};
+EXPORT_SYMBOL_GPL(ir_codes_msi_tvanywhere_table);
 
 /* ---------------------------------------------------------------------- */
 
@@ -603,7 +668,7 @@ EXPORT_SYMBOL_GPL(ir_codes_msi_tvanywhere);
 
 */
 
-IR_KEYTAB_TYPE ir_codes_msi_tvanywhere_plus[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_msi_tvanywhere_plus[] = {
 
 /*  ---- Remote Button Layout ----
 
@@ -625,567 +690,627 @@ IR_KEYTAB_TYPE ir_codes_msi_tvanywhere_plus[IR_KEYTAB_SIZE] = {
      <<      FUNC    >>     RESET
 */
 
-	[0x01] = KEY_1,		/* 1 */
-	[0x0b] = KEY_2,		/* 2 */
-	[0x1b] = KEY_3,		/* 3 */
-	[0x05] = KEY_4,		/* 4 */
-	[0x09] = KEY_5,		/* 5 */
-	[0x15] = KEY_6,		/* 6 */
-	[0x06] = KEY_7,		/* 7 */
-	[0x0a] = KEY_8,		/* 8 */
-	[0x12] = KEY_9,		/* 9 */
-	[0x02] = KEY_0,		/* 0 */
-	[0x10] = KEY_KPPLUS,	/* + */
-	[0x13] = KEY_AGAIN,	/* Recall */
-
-	[0x1e] = KEY_POWER,	/* Power */
-	[0x07] = KEY_TUNER,	/* Source */
-	[0x1c] = KEY_SEARCH,	/* Scan */
-	[0x18] = KEY_MUTE,	/* Mute */
-
-	[0x03] = KEY_RADIO,	/* TV/FM */
+	{ 0x01, KEY_1 },		/* 1 */
+	{ 0x0b, KEY_2 },		/* 2 */
+	{ 0x1b, KEY_3 },		/* 3 */
+	{ 0x05, KEY_4 },		/* 4 */
+	{ 0x09, KEY_5 },		/* 5 */
+	{ 0x15, KEY_6 },		/* 6 */
+	{ 0x06, KEY_7 },		/* 7 */
+	{ 0x0a, KEY_8 },		/* 8 */
+	{ 0x12, KEY_9 },		/* 9 */
+	{ 0x02, KEY_0 },		/* 0 */
+	{ 0x10, KEY_KPPLUS },		/* + */
+	{ 0x13, KEY_AGAIN },		/* Recall */
+
+	{ 0x1e, KEY_POWER },		/* Power */
+	{ 0x07, KEY_TUNER },		/* Source */
+	{ 0x1c, KEY_SEARCH },		/* Scan */
+	{ 0x18, KEY_MUTE },		/* Mute */
+
+	{ 0x03, KEY_RADIO },		/* TV/FM */
 	/* The next four keys are duplicates that appear to send the
 	   same IR code as Ch+, Ch-, >>, and << .  The raw code assigned
 	   to them is the actual code + 0x20 - they will never be
 	   detected as such unless some way is discovered to distinguish
 	   these buttons from those that have the same code. */
-	[0x3f] = KEY_RIGHT,	/* |> and Ch+ */
-	[0x37] = KEY_LEFT,	/* <| and Ch- */
-	[0x2c] = KEY_UP,	/* ^^Up and >> */
-	[0x24] = KEY_DOWN,	/* vvDn and << */
-
-	[0x00] = KEY_RECORD,	/* Record */
-	[0x08] = KEY_STOP,	/* Stop */
-	[0x11] = KEY_PLAY,	/* Play */
-
-	[0x0f] = KEY_CLOSE,	/* Minimize */
-	[0x19] = KEY_ZOOM,	/* Zoom */
-	[0x1a] = KEY_CAMERA,	/* Snapshot */
-	[0x0d] = KEY_LANGUAGE,	/* MTS */
-
-	[0x14] = KEY_VOLUMEDOWN,/* Vol- */
-	[0x16] = KEY_VOLUMEUP,	/* Vol+ */
-	[0x17] = KEY_CHANNELDOWN,/* Ch- */
-	[0x1f] = KEY_CHANNELUP,	/* Ch+ */
+	{ 0x3f, KEY_RIGHT },		/* |> and Ch+ */
+	{ 0x37, KEY_LEFT },		/* <| and Ch- */
+	{ 0x2c, KEY_UP },		/* ^^Up and >> */
+	{ 0x24, KEY_DOWN },		/* vvDn and << */
+
+	{ 0x00, KEY_RECORD },		/* Record */
+	{ 0x08, KEY_STOP },		/* Stop */
+	{ 0x11, KEY_PLAY },		/* Play */
+
+	{ 0x0f, KEY_CLOSE },		/* Minimize */
+	{ 0x19, KEY_ZOOM },		/* Zoom */
+	{ 0x1a, KEY_CAMERA },		/* Snapshot */
+	{ 0x0d, KEY_LANGUAGE },		/* MTS */
+
+	{ 0x14, KEY_VOLUMEDOWN },	/* Vol- */
+	{ 0x16, KEY_VOLUMEUP },		/* Vol+ */
+	{ 0x17, KEY_CHANNELDOWN },	/* Ch- */
+	{ 0x1f, KEY_CHANNELUP },	/* Ch+ */
+
+	{ 0x04, KEY_REWIND },		/* << */
+	{ 0x0e, KEY_MENU },		/* Function */
+	{ 0x0c, KEY_FASTFORWARD },	/* >> */
+	{ 0x1d, KEY_RESTART },		/* Reset */
+};
 
-	[0x04] = KEY_REWIND,	/* << */
-	[0x0e] = KEY_MENU,	/* Function */
-	[0x0c] = KEY_FASTFORWARD,/* >> */
-	[0x1d] = KEY_RESTART,	/* Reset */
+struct ir_scancode_table ir_codes_msi_tvanywhere_plus_table = {
+	.scan = ir_codes_msi_tvanywhere_plus,
+	.size = ARRAY_SIZE(ir_codes_msi_tvanywhere_plus),
 };
-EXPORT_SYMBOL_GPL(ir_codes_msi_tvanywhere_plus);
+EXPORT_SYMBOL_GPL(ir_codes_msi_tvanywhere_plus_table);
 
 /* ---------------------------------------------------------------------- */
 
 /* Cinergy 1400 DVB-T */
-IR_KEYTAB_TYPE ir_codes_cinergy_1400[IR_KEYTAB_SIZE] = {
-	[0x01] = KEY_POWER,
-	[0x02] = KEY_1,
-	[0x03] = KEY_2,
-	[0x04] = KEY_3,
-	[0x05] = KEY_4,
-	[0x06] = KEY_5,
-	[0x07] = KEY_6,
-	[0x08] = KEY_7,
-	[0x09] = KEY_8,
-	[0x0a] = KEY_9,
-	[0x0c] = KEY_0,
-
-	[0x0b] = KEY_VIDEO,
-	[0x0d] = KEY_REFRESH,
-	[0x0e] = KEY_SELECT,
-	[0x0f] = KEY_EPG,
-	[0x10] = KEY_UP,
-	[0x11] = KEY_LEFT,
-	[0x12] = KEY_OK,
-	[0x13] = KEY_RIGHT,
-	[0x14] = KEY_DOWN,
-	[0x15] = KEY_TEXT,
-	[0x16] = KEY_INFO,
-
-	[0x17] = KEY_RED,
-	[0x18] = KEY_GREEN,
-	[0x19] = KEY_YELLOW,
-	[0x1a] = KEY_BLUE,
-
-	[0x1b] = KEY_CHANNELUP,
-	[0x1c] = KEY_VOLUMEUP,
-	[0x1d] = KEY_MUTE,
-	[0x1e] = KEY_VOLUMEDOWN,
-	[0x1f] = KEY_CHANNELDOWN,
-
-	[0x40] = KEY_PAUSE,
-	[0x4c] = KEY_PLAY,
-	[0x58] = KEY_RECORD,
-	[0x54] = KEY_PREVIOUS,
-	[0x48] = KEY_STOP,
-	[0x5c] = KEY_NEXT,
-};
-EXPORT_SYMBOL_GPL(ir_codes_cinergy_1400);
+static struct ir_scancode ir_codes_cinergy_1400[] = {
+	{ 0x01, KEY_POWER },
+	{ 0x02, KEY_1 },
+	{ 0x03, KEY_2 },
+	{ 0x04, KEY_3 },
+	{ 0x05, KEY_4 },
+	{ 0x06, KEY_5 },
+	{ 0x07, KEY_6 },
+	{ 0x08, KEY_7 },
+	{ 0x09, KEY_8 },
+	{ 0x0a, KEY_9 },
+	{ 0x0c, KEY_0 },
+
+	{ 0x0b, KEY_VIDEO },
+	{ 0x0d, KEY_REFRESH },
+	{ 0x0e, KEY_SELECT },
+	{ 0x0f, KEY_EPG },
+	{ 0x10, KEY_UP },
+	{ 0x11, KEY_LEFT },
+	{ 0x12, KEY_OK },
+	{ 0x13, KEY_RIGHT },
+	{ 0x14, KEY_DOWN },
+	{ 0x15, KEY_TEXT },
+	{ 0x16, KEY_INFO },
+
+	{ 0x17, KEY_RED },
+	{ 0x18, KEY_GREEN },
+	{ 0x19, KEY_YELLOW },
+	{ 0x1a, KEY_BLUE },
+
+	{ 0x1b, KEY_CHANNELUP },
+	{ 0x1c, KEY_VOLUMEUP },
+	{ 0x1d, KEY_MUTE },
+	{ 0x1e, KEY_VOLUMEDOWN },
+	{ 0x1f, KEY_CHANNELDOWN },
+
+	{ 0x40, KEY_PAUSE },
+	{ 0x4c, KEY_PLAY },
+	{ 0x58, KEY_RECORD },
+	{ 0x54, KEY_PREVIOUS },
+	{ 0x48, KEY_STOP },
+	{ 0x5c, KEY_NEXT },
+};
+
+struct ir_scancode_table ir_codes_cinergy_1400_table = {
+	.scan = ir_codes_cinergy_1400,
+	.size = ARRAY_SIZE(ir_codes_cinergy_1400),
+};
+EXPORT_SYMBOL_GPL(ir_codes_cinergy_1400_table);
 
 /* ---------------------------------------------------------------------- */
 
 /* AVERTV STUDIO 303 Remote */
-IR_KEYTAB_TYPE ir_codes_avertv_303[IR_KEYTAB_SIZE] = {
-	[0x2a] = KEY_1,
-	[0x32] = KEY_2,
-	[0x3a] = KEY_3,
-	[0x4a] = KEY_4,
-	[0x52] = KEY_5,
-	[0x5a] = KEY_6,
-	[0x6a] = KEY_7,
-	[0x72] = KEY_8,
-	[0x7a] = KEY_9,
-	[0x0e] = KEY_0,
-
-	[0x02] = KEY_POWER,
-	[0x22] = KEY_VIDEO,
-	[0x42] = KEY_AUDIO,
-	[0x62] = KEY_ZOOM,
-	[0x0a] = KEY_TV,
-	[0x12] = KEY_CD,
-	[0x1a] = KEY_TEXT,
-
-	[0x16] = KEY_SUBTITLE,
-	[0x1e] = KEY_REWIND,
-	[0x06] = KEY_PRINT,
-
-	[0x2e] = KEY_SEARCH,
-	[0x36] = KEY_SLEEP,
-	[0x3e] = KEY_SHUFFLE,
-	[0x26] = KEY_MUTE,
-
-	[0x4e] = KEY_RECORD,
-	[0x56] = KEY_PAUSE,
-	[0x5e] = KEY_STOP,
-	[0x46] = KEY_PLAY,
-
-	[0x6e] = KEY_RED,
-	[0x0b] = KEY_GREEN,
-	[0x66] = KEY_YELLOW,
-	[0x03] = KEY_BLUE,
-
-	[0x76] = KEY_LEFT,
-	[0x7e] = KEY_RIGHT,
-	[0x13] = KEY_DOWN,
-	[0x1b] = KEY_UP,
-};
-EXPORT_SYMBOL_GPL(ir_codes_avertv_303);
+static struct ir_scancode ir_codes_avertv_303[] = {
+	{ 0x2a, KEY_1 },
+	{ 0x32, KEY_2 },
+	{ 0x3a, KEY_3 },
+	{ 0x4a, KEY_4 },
+	{ 0x52, KEY_5 },
+	{ 0x5a, KEY_6 },
+	{ 0x6a, KEY_7 },
+	{ 0x72, KEY_8 },
+	{ 0x7a, KEY_9 },
+	{ 0x0e, KEY_0 },
+
+	{ 0x02, KEY_POWER },
+	{ 0x22, KEY_VIDEO },
+	{ 0x42, KEY_AUDIO },
+	{ 0x62, KEY_ZOOM },
+	{ 0x0a, KEY_TV },
+	{ 0x12, KEY_CD },
+	{ 0x1a, KEY_TEXT },
+
+	{ 0x16, KEY_SUBTITLE },
+	{ 0x1e, KEY_REWIND },
+	{ 0x06, KEY_PRINT },
+
+	{ 0x2e, KEY_SEARCH },
+	{ 0x36, KEY_SLEEP },
+	{ 0x3e, KEY_SHUFFLE },
+	{ 0x26, KEY_MUTE },
+
+	{ 0x4e, KEY_RECORD },
+	{ 0x56, KEY_PAUSE },
+	{ 0x5e, KEY_STOP },
+	{ 0x46, KEY_PLAY },
+
+	{ 0x6e, KEY_RED },
+	{ 0x0b, KEY_GREEN },
+	{ 0x66, KEY_YELLOW },
+	{ 0x03, KEY_BLUE },
+
+	{ 0x76, KEY_LEFT },
+	{ 0x7e, KEY_RIGHT },
+	{ 0x13, KEY_DOWN },
+	{ 0x1b, KEY_UP },
+};
+
+struct ir_scancode_table ir_codes_avertv_303_table = {
+	.scan = ir_codes_avertv_303,
+	.size = ARRAY_SIZE(ir_codes_avertv_303),
+};
+EXPORT_SYMBOL_GPL(ir_codes_avertv_303_table);
 
 /* ---------------------------------------------------------------------- */
 
 /* DigitalNow DNTV Live! DVB-T Pro Remote */
-IR_KEYTAB_TYPE ir_codes_dntv_live_dvbt_pro[IR_KEYTAB_SIZE] = {
-	[0x16] = KEY_POWER,
-	[0x5b] = KEY_HOME,
-
-	[0x55] = KEY_TV,		/* live tv */
-	[0x58] = KEY_TUNER,		/* digital Radio */
-	[0x5a] = KEY_RADIO,		/* FM radio */
-	[0x59] = KEY_DVD,		/* dvd menu */
-	[0x03] = KEY_1,
-	[0x01] = KEY_2,
-	[0x06] = KEY_3,
-	[0x09] = KEY_4,
-	[0x1d] = KEY_5,
-	[0x1f] = KEY_6,
-	[0x0d] = KEY_7,
-	[0x19] = KEY_8,
-	[0x1b] = KEY_9,
-	[0x0c] = KEY_CANCEL,
-	[0x15] = KEY_0,
-	[0x4a] = KEY_CLEAR,
-	[0x13] = KEY_BACK,
-	[0x00] = KEY_TAB,
-	[0x4b] = KEY_UP,
-	[0x4e] = KEY_LEFT,
-	[0x4f] = KEY_OK,
-	[0x52] = KEY_RIGHT,
-	[0x51] = KEY_DOWN,
-	[0x1e] = KEY_VOLUMEUP,
-	[0x0a] = KEY_VOLUMEDOWN,
-	[0x02] = KEY_CHANNELDOWN,
-	[0x05] = KEY_CHANNELUP,
-	[0x11] = KEY_RECORD,
-	[0x14] = KEY_PLAY,
-	[0x4c] = KEY_PAUSE,
-	[0x1a] = KEY_STOP,
-	[0x40] = KEY_REWIND,
-	[0x12] = KEY_FASTFORWARD,
-	[0x41] = KEY_PREVIOUSSONG,	/* replay |< */
-	[0x42] = KEY_NEXTSONG,		/* skip >| */
-	[0x54] = KEY_CAMERA,		/* capture */
-	[0x50] = KEY_LANGUAGE,		/* sap */
-	[0x47] = KEY_TV2,		/* pip */
-	[0x4d] = KEY_SCREEN,
-	[0x43] = KEY_SUBTITLE,
-	[0x10] = KEY_MUTE,
-	[0x49] = KEY_AUDIO,		/* l/r */
-	[0x07] = KEY_SLEEP,
-	[0x08] = KEY_VIDEO,		/* a/v */
-	[0x0e] = KEY_PREVIOUS,		/* recall */
-	[0x45] = KEY_ZOOM,		/* zoom + */
-	[0x46] = KEY_ANGLE,		/* zoom - */
-	[0x56] = KEY_RED,
-	[0x57] = KEY_GREEN,
-	[0x5c] = KEY_YELLOW,
-	[0x5d] = KEY_BLUE,
-};
-EXPORT_SYMBOL_GPL(ir_codes_dntv_live_dvbt_pro);
-
-IR_KEYTAB_TYPE ir_codes_em_terratec[IR_KEYTAB_SIZE] = {
-	[0x01] = KEY_CHANNEL,
-	[0x02] = KEY_SELECT,
-	[0x03] = KEY_MUTE,
-	[0x04] = KEY_POWER,
-	[0x05] = KEY_1,
-	[0x06] = KEY_2,
-	[0x07] = KEY_3,
-	[0x08] = KEY_CHANNELUP,
-	[0x09] = KEY_4,
-	[0x0a] = KEY_5,
-	[0x0b] = KEY_6,
-	[0x0c] = KEY_CHANNELDOWN,
-	[0x0d] = KEY_7,
-	[0x0e] = KEY_8,
-	[0x0f] = KEY_9,
-	[0x10] = KEY_VOLUMEUP,
-	[0x11] = KEY_0,
-	[0x12] = KEY_MENU,
-	[0x13] = KEY_PRINT,
-	[0x14] = KEY_VOLUMEDOWN,
-	[0x16] = KEY_PAUSE,
-	[0x18] = KEY_RECORD,
-	[0x19] = KEY_REWIND,
-	[0x1a] = KEY_PLAY,
-	[0x1b] = KEY_FORWARD,
-	[0x1c] = KEY_BACKSPACE,
-	[0x1e] = KEY_STOP,
-	[0x40] = KEY_ZOOM,
-};
-EXPORT_SYMBOL_GPL(ir_codes_em_terratec);
-
-IR_KEYTAB_TYPE ir_codes_pinnacle_grey[IR_KEYTAB_SIZE] = {
-	[0x3a] = KEY_0,
-	[0x31] = KEY_1,
-	[0x32] = KEY_2,
-	[0x33] = KEY_3,
-	[0x34] = KEY_4,
-	[0x35] = KEY_5,
-	[0x36] = KEY_6,
-	[0x37] = KEY_7,
-	[0x38] = KEY_8,
-	[0x39] = KEY_9,
-
-	[0x2f] = KEY_POWER,
-
-	[0x2e] = KEY_P,
-	[0x1f] = KEY_L,
-	[0x2b] = KEY_I,
-
-	[0x2d] = KEY_SCREEN,
-	[0x1e] = KEY_ZOOM,
-	[0x1b] = KEY_VOLUMEUP,
-	[0x0f] = KEY_VOLUMEDOWN,
-	[0x17] = KEY_CHANNELUP,
-	[0x1c] = KEY_CHANNELDOWN,
-	[0x25] = KEY_INFO,
-
-	[0x3c] = KEY_MUTE,
-
-	[0x3d] = KEY_LEFT,
-	[0x3b] = KEY_RIGHT,
-
-	[0x3f] = KEY_UP,
-	[0x3e] = KEY_DOWN,
-	[0x1a] = KEY_ENTER,
-
-	[0x1d] = KEY_MENU,
-	[0x19] = KEY_AGAIN,
-	[0x16] = KEY_PREVIOUSSONG,
-	[0x13] = KEY_NEXTSONG,
-	[0x15] = KEY_PAUSE,
-	[0x0e] = KEY_REWIND,
-	[0x0d] = KEY_PLAY,
-	[0x0b] = KEY_STOP,
-	[0x07] = KEY_FORWARD,
-	[0x27] = KEY_RECORD,
-	[0x26] = KEY_TUNER,
-	[0x29] = KEY_TEXT,
-	[0x2a] = KEY_MEDIA,
-	[0x18] = KEY_EPG,
-};
-EXPORT_SYMBOL_GPL(ir_codes_pinnacle_grey);
-
-IR_KEYTAB_TYPE ir_codes_flyvideo[IR_KEYTAB_SIZE] = {
-	[0x0f] = KEY_0,
-	[0x03] = KEY_1,
-	[0x04] = KEY_2,
-	[0x05] = KEY_3,
-	[0x07] = KEY_4,
-	[0x08] = KEY_5,
-	[0x09] = KEY_6,
-	[0x0b] = KEY_7,
-	[0x0c] = KEY_8,
-	[0x0d] = KEY_9,
-
-	[0x0e] = KEY_MODE,	/* Air/Cable */
-	[0x11] = KEY_VIDEO,	/* Video */
-	[0x15] = KEY_AUDIO,	/* Audio */
-	[0x00] = KEY_POWER,	/* Power */
-	[0x18] = KEY_TUNER,	/* AV Source */
-	[0x02] = KEY_ZOOM,	/* Fullscreen */
-	[0x1a] = KEY_LANGUAGE,	/* Stereo */
-	[0x1b] = KEY_MUTE,	/* Mute */
-	[0x14] = KEY_VOLUMEUP,	/* Volume + */
-	[0x17] = KEY_VOLUMEDOWN,/* Volume - */
-	[0x12] = KEY_CHANNELUP,	/* Channel + */
-	[0x13] = KEY_CHANNELDOWN,/* Channel - */
-	[0x06] = KEY_AGAIN,	/* Recall */
-	[0x10] = KEY_ENTER,	/* Enter */
-
-	[0x19] = KEY_BACK,	/* Rewind  ( <<< ) */
-	[0x1f] = KEY_FORWARD,	/* Forward ( >>> ) */
-	[0x0a] = KEY_ANGLE,	/* no label, may be used as the PAUSE button */
-};
-EXPORT_SYMBOL_GPL(ir_codes_flyvideo);
-
-IR_KEYTAB_TYPE ir_codes_flydvb[IR_KEYTAB_SIZE] = {
-	[0x01] = KEY_ZOOM,		/* Full Screen */
-	[0x00] = KEY_POWER,		/* Power */
-
-	[0x03] = KEY_1,
-	[0x04] = KEY_2,
-	[0x05] = KEY_3,
-	[0x07] = KEY_4,
-	[0x08] = KEY_5,
-	[0x09] = KEY_6,
-	[0x0b] = KEY_7,
-	[0x0c] = KEY_8,
-	[0x0d] = KEY_9,
-	[0x06] = KEY_AGAIN,		/* Recall */
-	[0x0f] = KEY_0,
-	[0x10] = KEY_MUTE,		/* Mute */
-	[0x02] = KEY_RADIO,		/* TV/Radio */
-	[0x1b] = KEY_LANGUAGE,		/* SAP (Second Audio Program) */
-
-	[0x14] = KEY_VOLUMEUP,		/* VOL+ */
-	[0x17] = KEY_VOLUMEDOWN,	/* VOL- */
-	[0x12] = KEY_CHANNELUP,		/* CH+ */
-	[0x13] = KEY_CHANNELDOWN,	/* CH- */
-	[0x1d] = KEY_ENTER,		/* Enter */
-
-	[0x1a] = KEY_MODE,		/* PIP */
-	[0x18] = KEY_TUNER,		/* Source */
-
-	[0x1e] = KEY_RECORD,		/* Record/Pause */
-	[0x15] = KEY_ANGLE,		/* Swap (no label on key) */
-	[0x1c] = KEY_PAUSE,		/* Timeshift/Pause */
-	[0x19] = KEY_BACK,		/* Rewind << */
-	[0x0a] = KEY_PLAYPAUSE,		/* Play/Pause */
-	[0x1f] = KEY_FORWARD,		/* Forward >> */
-	[0x16] = KEY_PREVIOUS,		/* Back |<< */
-	[0x11] = KEY_STOP,		/* Stop */
-	[0x0e] = KEY_NEXT,		/* End >>| */
-};
-EXPORT_SYMBOL_GPL(ir_codes_flydvb);
-
-IR_KEYTAB_TYPE ir_codes_cinergy[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x0a] = KEY_POWER,
-	[0x0b] = KEY_PROG1,	/* app */
-	[0x0c] = KEY_ZOOM,	/* zoom/fullscreen */
-	[0x0d] = KEY_CHANNELUP,	/* channel */
-	[0x0e] = KEY_CHANNELDOWN,/* channel- */
-	[0x0f] = KEY_VOLUMEUP,
-	[0x10] = KEY_VOLUMEDOWN,
-	[0x11] = KEY_TUNER,	/* AV */
-	[0x12] = KEY_NUMLOCK,	/* -/-- */
-	[0x13] = KEY_AUDIO,	/* audio */
-	[0x14] = KEY_MUTE,
-	[0x15] = KEY_UP,
-	[0x16] = KEY_DOWN,
-	[0x17] = KEY_LEFT,
-	[0x18] = KEY_RIGHT,
-	[0x19] = BTN_LEFT,
-	[0x1a] = BTN_RIGHT,
-	[0x1b] = KEY_WWW,	/* text */
-	[0x1c] = KEY_REWIND,
-	[0x1d] = KEY_FORWARD,
-	[0x1e] = KEY_RECORD,
-	[0x1f] = KEY_PLAY,
-	[0x20] = KEY_PREVIOUSSONG,
-	[0x21] = KEY_NEXTSONG,
-	[0x22] = KEY_PAUSE,
-	[0x23] = KEY_STOP,
-};
-EXPORT_SYMBOL_GPL(ir_codes_cinergy);
+static struct ir_scancode ir_codes_dntv_live_dvbt_pro[] = {
+	{ 0x16, KEY_POWER },
+	{ 0x5b, KEY_HOME },
+
+	{ 0x55, KEY_TV },		/* live tv */
+	{ 0x58, KEY_TUNER },		/* digital Radio */
+	{ 0x5a, KEY_RADIO },		/* FM radio */
+	{ 0x59, KEY_DVD },		/* dvd menu */
+	{ 0x03, KEY_1 },
+	{ 0x01, KEY_2 },
+	{ 0x06, KEY_3 },
+	{ 0x09, KEY_4 },
+	{ 0x1d, KEY_5 },
+	{ 0x1f, KEY_6 },
+	{ 0x0d, KEY_7 },
+	{ 0x19, KEY_8 },
+	{ 0x1b, KEY_9 },
+	{ 0x0c, KEY_CANCEL },
+	{ 0x15, KEY_0 },
+	{ 0x4a, KEY_CLEAR },
+	{ 0x13, KEY_BACK },
+	{ 0x00, KEY_TAB },
+	{ 0x4b, KEY_UP },
+	{ 0x4e, KEY_LEFT },
+	{ 0x4f, KEY_OK },
+	{ 0x52, KEY_RIGHT },
+	{ 0x51, KEY_DOWN },
+	{ 0x1e, KEY_VOLUMEUP },
+	{ 0x0a, KEY_VOLUMEDOWN },
+	{ 0x02, KEY_CHANNELDOWN },
+	{ 0x05, KEY_CHANNELUP },
+	{ 0x11, KEY_RECORD },
+	{ 0x14, KEY_PLAY },
+	{ 0x4c, KEY_PAUSE },
+	{ 0x1a, KEY_STOP },
+	{ 0x40, KEY_REWIND },
+	{ 0x12, KEY_FASTFORWARD },
+	{ 0x41, KEY_PREVIOUSSONG },	/* replay |< */
+	{ 0x42, KEY_NEXTSONG },		/* skip >| */
+	{ 0x54, KEY_CAMERA },		/* capture */
+	{ 0x50, KEY_LANGUAGE },		/* sap */
+	{ 0x47, KEY_TV2 },		/* pip */
+	{ 0x4d, KEY_SCREEN },
+	{ 0x43, KEY_SUBTITLE },
+	{ 0x10, KEY_MUTE },
+	{ 0x49, KEY_AUDIO },		/* l/r */
+	{ 0x07, KEY_SLEEP },
+	{ 0x08, KEY_VIDEO },		/* a/v */
+	{ 0x0e, KEY_PREVIOUS },		/* recall */
+	{ 0x45, KEY_ZOOM },		/* zoom + */
+	{ 0x46, KEY_ANGLE },		/* zoom - */
+	{ 0x56, KEY_RED },
+	{ 0x57, KEY_GREEN },
+	{ 0x5c, KEY_YELLOW },
+	{ 0x5d, KEY_BLUE },
+};
+
+struct ir_scancode_table ir_codes_dntv_live_dvbt_pro_table = {
+	.scan = ir_codes_dntv_live_dvbt_pro,
+	.size = ARRAY_SIZE(ir_codes_dntv_live_dvbt_pro),
+};
+EXPORT_SYMBOL_GPL(ir_codes_dntv_live_dvbt_pro_table);
+
+static struct ir_scancode ir_codes_em_terratec[] = {
+	{ 0x01, KEY_CHANNEL },
+	{ 0x02, KEY_SELECT },
+	{ 0x03, KEY_MUTE },
+	{ 0x04, KEY_POWER },
+	{ 0x05, KEY_1 },
+	{ 0x06, KEY_2 },
+	{ 0x07, KEY_3 },
+	{ 0x08, KEY_CHANNELUP },
+	{ 0x09, KEY_4 },
+	{ 0x0a, KEY_5 },
+	{ 0x0b, KEY_6 },
+	{ 0x0c, KEY_CHANNELDOWN },
+	{ 0x0d, KEY_7 },
+	{ 0x0e, KEY_8 },
+	{ 0x0f, KEY_9 },
+	{ 0x10, KEY_VOLUMEUP },
+	{ 0x11, KEY_0 },
+	{ 0x12, KEY_MENU },
+	{ 0x13, KEY_PRINT },
+	{ 0x14, KEY_VOLUMEDOWN },
+	{ 0x16, KEY_PAUSE },
+	{ 0x18, KEY_RECORD },
+	{ 0x19, KEY_REWIND },
+	{ 0x1a, KEY_PLAY },
+	{ 0x1b, KEY_FORWARD },
+	{ 0x1c, KEY_BACKSPACE },
+	{ 0x1e, KEY_STOP },
+	{ 0x40, KEY_ZOOM },
+};
+
+struct ir_scancode_table ir_codes_em_terratec_table = {
+	.scan = ir_codes_em_terratec,
+	.size = ARRAY_SIZE(ir_codes_em_terratec),
+};
+EXPORT_SYMBOL_GPL(ir_codes_em_terratec_table);
+
+static struct ir_scancode ir_codes_pinnacle_grey[] = {
+	{ 0x3a, KEY_0 },
+	{ 0x31, KEY_1 },
+	{ 0x32, KEY_2 },
+	{ 0x33, KEY_3 },
+	{ 0x34, KEY_4 },
+	{ 0x35, KEY_5 },
+	{ 0x36, KEY_6 },
+	{ 0x37, KEY_7 },
+	{ 0x38, KEY_8 },
+	{ 0x39, KEY_9 },
+
+	{ 0x2f, KEY_POWER },
+
+	{ 0x2e, KEY_P },
+	{ 0x1f, KEY_L },
+	{ 0x2b, KEY_I },
+
+	{ 0x2d, KEY_SCREEN },
+	{ 0x1e, KEY_ZOOM },
+	{ 0x1b, KEY_VOLUMEUP },
+	{ 0x0f, KEY_VOLUMEDOWN },
+	{ 0x17, KEY_CHANNELUP },
+	{ 0x1c, KEY_CHANNELDOWN },
+	{ 0x25, KEY_INFO },
+
+	{ 0x3c, KEY_MUTE },
+
+	{ 0x3d, KEY_LEFT },
+	{ 0x3b, KEY_RIGHT },
+
+	{ 0x3f, KEY_UP },
+	{ 0x3e, KEY_DOWN },
+	{ 0x1a, KEY_ENTER },
+
+	{ 0x1d, KEY_MENU },
+	{ 0x19, KEY_AGAIN },
+	{ 0x16, KEY_PREVIOUSSONG },
+	{ 0x13, KEY_NEXTSONG },
+	{ 0x15, KEY_PAUSE },
+	{ 0x0e, KEY_REWIND },
+	{ 0x0d, KEY_PLAY },
+	{ 0x0b, KEY_STOP },
+	{ 0x07, KEY_FORWARD },
+	{ 0x27, KEY_RECORD },
+	{ 0x26, KEY_TUNER },
+	{ 0x29, KEY_TEXT },
+	{ 0x2a, KEY_MEDIA },
+	{ 0x18, KEY_EPG },
+};
+
+struct ir_scancode_table ir_codes_pinnacle_grey_table = {
+	.scan = ir_codes_pinnacle_grey,
+	.size = ARRAY_SIZE(ir_codes_pinnacle_grey),
+};
+EXPORT_SYMBOL_GPL(ir_codes_pinnacle_grey_table);
+
+static struct ir_scancode ir_codes_flyvideo[] = {
+	{ 0x0f, KEY_0 },
+	{ 0x03, KEY_1 },
+	{ 0x04, KEY_2 },
+	{ 0x05, KEY_3 },
+	{ 0x07, KEY_4 },
+	{ 0x08, KEY_5 },
+	{ 0x09, KEY_6 },
+	{ 0x0b, KEY_7 },
+	{ 0x0c, KEY_8 },
+	{ 0x0d, KEY_9 },
+
+	{ 0x0e, KEY_MODE },	/* Air/Cable */
+	{ 0x11, KEY_VIDEO },	/* Video */
+	{ 0x15, KEY_AUDIO },	/* Audio */
+	{ 0x00, KEY_POWER },	/* Power */
+	{ 0x18, KEY_TUNER },	/* AV Source */
+	{ 0x02, KEY_ZOOM },	/* Fullscreen */
+	{ 0x1a, KEY_LANGUAGE },	/* Stereo */
+	{ 0x1b, KEY_MUTE },	/* Mute */
+	{ 0x14, KEY_VOLUMEUP },	/* Volume + */
+	{ 0x17, KEY_VOLUMEDOWN },/* Volume - */
+	{ 0x12, KEY_CHANNELUP },/* Channel + */
+	{ 0x13, KEY_CHANNELDOWN },/* Channel - */
+	{ 0x06, KEY_AGAIN },	/* Recall */
+	{ 0x10, KEY_ENTER },	/* Enter */
+
+	{ 0x19, KEY_BACK },	/* Rewind  ( <<< ) */
+	{ 0x1f, KEY_FORWARD },	/* Forward ( >>> ) */
+	{ 0x0a, KEY_ANGLE },	/* no label, may be used as the PAUSE button */
+};
+
+struct ir_scancode_table ir_codes_flyvideo_table = {
+	.scan = ir_codes_flyvideo,
+	.size = ARRAY_SIZE(ir_codes_flyvideo),
+};
+EXPORT_SYMBOL_GPL(ir_codes_flyvideo_table);
+
+static struct ir_scancode ir_codes_flydvb[] = {
+	{ 0x01, KEY_ZOOM },		/* Full Screen */
+	{ 0x00, KEY_POWER },		/* Power */
+
+	{ 0x03, KEY_1 },
+	{ 0x04, KEY_2 },
+	{ 0x05, KEY_3 },
+	{ 0x07, KEY_4 },
+	{ 0x08, KEY_5 },
+	{ 0x09, KEY_6 },
+	{ 0x0b, KEY_7 },
+	{ 0x0c, KEY_8 },
+	{ 0x0d, KEY_9 },
+	{ 0x06, KEY_AGAIN },		/* Recall */
+	{ 0x0f, KEY_0 },
+	{ 0x10, KEY_MUTE },		/* Mute */
+	{ 0x02, KEY_RADIO },		/* TV/Radio */
+	{ 0x1b, KEY_LANGUAGE },		/* SAP (Second Audio Program) */
+
+	{ 0x14, KEY_VOLUMEUP },		/* VOL+ */
+	{ 0x17, KEY_VOLUMEDOWN },	/* VOL- */
+	{ 0x12, KEY_CHANNELUP },	/* CH+ */
+	{ 0x13, KEY_CHANNELDOWN },	/* CH- */
+	{ 0x1d, KEY_ENTER },		/* Enter */
+
+	{ 0x1a, KEY_MODE },		/* PIP */
+	{ 0x18, KEY_TUNER },		/* Source */
+
+	{ 0x1e, KEY_RECORD },		/* Record/Pause */
+	{ 0x15, KEY_ANGLE },		/* Swap (no label on key) */
+	{ 0x1c, KEY_PAUSE },		/* Timeshift/Pause */
+	{ 0x19, KEY_BACK },		/* Rewind << */
+	{ 0x0a, KEY_PLAYPAUSE },	/* Play/Pause */
+	{ 0x1f, KEY_FORWARD },		/* Forward >> */
+	{ 0x16, KEY_PREVIOUS },		/* Back |<< */
+	{ 0x11, KEY_STOP },		/* Stop */
+	{ 0x0e, KEY_NEXT },		/* End >>| */
+};
+
+struct ir_scancode_table ir_codes_flydvb_table = {
+	.scan = ir_codes_flydvb,
+	.size = ARRAY_SIZE(ir_codes_flydvb),
+};
+EXPORT_SYMBOL_GPL(ir_codes_flydvb_table);
+
+static struct ir_scancode ir_codes_cinergy[] = {
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x0a, KEY_POWER },
+	{ 0x0b, KEY_PROG1 },		/* app */
+	{ 0x0c, KEY_ZOOM },		/* zoom/fullscreen */
+	{ 0x0d, KEY_CHANNELUP },	/* channel */
+	{ 0x0e, KEY_CHANNELDOWN },	/* channel- */
+	{ 0x0f, KEY_VOLUMEUP },
+	{ 0x10, KEY_VOLUMEDOWN },
+	{ 0x11, KEY_TUNER },		/* AV */
+	{ 0x12, KEY_NUMLOCK },		/* -/-- */
+	{ 0x13, KEY_AUDIO },		/* audio */
+	{ 0x14, KEY_MUTE },
+	{ 0x15, KEY_UP },
+	{ 0x16, KEY_DOWN },
+	{ 0x17, KEY_LEFT },
+	{ 0x18, KEY_RIGHT },
+	{ 0x19, BTN_LEFT, },
+	{ 0x1a, BTN_RIGHT, },
+	{ 0x1b, KEY_WWW },		/* text */
+	{ 0x1c, KEY_REWIND },
+	{ 0x1d, KEY_FORWARD },
+	{ 0x1e, KEY_RECORD },
+	{ 0x1f, KEY_PLAY },
+	{ 0x20, KEY_PREVIOUSSONG },
+	{ 0x21, KEY_NEXTSONG },
+	{ 0x22, KEY_PAUSE },
+	{ 0x23, KEY_STOP },
+};
+
+struct ir_scancode_table ir_codes_cinergy_table = {
+	.scan = ir_codes_cinergy,
+	.size = ARRAY_SIZE(ir_codes_cinergy),
+};
+EXPORT_SYMBOL_GPL(ir_codes_cinergy_table);
 
 /* Alfons Geser <a.geser@cox.net>
  * updates from Job D. R. Borges <jobdrb@ig.com.br> */
-IR_KEYTAB_TYPE ir_codes_eztv[IR_KEYTAB_SIZE] = {
-	[0x12] = KEY_POWER,
-	[0x01] = KEY_TV,	/* DVR */
-	[0x15] = KEY_DVD,	/* DVD */
-	[0x17] = KEY_AUDIO,	/* music */
+static struct ir_scancode ir_codes_eztv[] = {
+	{ 0x12, KEY_POWER },
+	{ 0x01, KEY_TV },	/* DVR */
+	{ 0x15, KEY_DVD },	/* DVD */
+	{ 0x17, KEY_AUDIO },	/* music */
 				/* DVR mode / DVD mode / music mode */
 
-	[0x1b] = KEY_MUTE,	/* mute */
-	[0x02] = KEY_LANGUAGE,	/* MTS/SAP / audio / autoseek */
-	[0x1e] = KEY_SUBTITLE,	/* closed captioning / subtitle / seek */
-	[0x16] = KEY_ZOOM,	/* full screen */
-	[0x1c] = KEY_VIDEO,	/* video source / eject / delall */
-	[0x1d] = KEY_RESTART,	/* playback / angle / del */
-	[0x2f] = KEY_SEARCH,	/* scan / menu / playlist */
-	[0x30] = KEY_CHANNEL,	/* CH surfing / bookmark / memo */
-
-	[0x31] = KEY_HELP,	/* help */
-	[0x32] = KEY_MODE,	/* num/memo */
-	[0x33] = KEY_ESC,	/* cancel */
-
-	[0x0c] = KEY_UP,	/* up */
-	[0x10] = KEY_DOWN,	/* down */
-	[0x08] = KEY_LEFT,	/* left */
-	[0x04] = KEY_RIGHT,	/* right */
-	[0x03] = KEY_SELECT,	/* select */
-
-	[0x1f] = KEY_REWIND,	/* rewind */
-	[0x20] = KEY_PLAYPAUSE,	/* play/pause */
-	[0x29] = KEY_FORWARD,	/* forward */
-	[0x14] = KEY_AGAIN,	/* repeat */
-	[0x2b] = KEY_RECORD,	/* recording */
-	[0x2c] = KEY_STOP,	/* stop */
-	[0x2d] = KEY_PLAY,	/* play */
-	[0x2e] = KEY_CAMERA,	/* snapshot / shuffle */
-
-	[0x00] = KEY_0,
-	[0x05] = KEY_1,
-	[0x06] = KEY_2,
-	[0x07] = KEY_3,
-	[0x09] = KEY_4,
-	[0x0a] = KEY_5,
-	[0x0b] = KEY_6,
-	[0x0d] = KEY_7,
-	[0x0e] = KEY_8,
-	[0x0f] = KEY_9,
-
-	[0x2a] = KEY_VOLUMEUP,
-	[0x11] = KEY_VOLUMEDOWN,
-	[0x18] = KEY_CHANNELUP,	/* CH.tracking up */
-	[0x19] = KEY_CHANNELDOWN,/* CH.tracking down */
-
-	[0x13] = KEY_ENTER,	/* enter */
-	[0x21] = KEY_DOT,	/* . (decimal dot) */
-};
-EXPORT_SYMBOL_GPL(ir_codes_eztv);
+	{ 0x1b, KEY_MUTE },	/* mute */
+	{ 0x02, KEY_LANGUAGE },	/* MTS/SAP / audio / autoseek */
+	{ 0x1e, KEY_SUBTITLE },	/* closed captioning / subtitle / seek */
+	{ 0x16, KEY_ZOOM },	/* full screen */
+	{ 0x1c, KEY_VIDEO },	/* video source / eject / delall */
+	{ 0x1d, KEY_RESTART },	/* playback / angle / del */
+	{ 0x2f, KEY_SEARCH },	/* scan / menu / playlist */
+	{ 0x30, KEY_CHANNEL },	/* CH surfing / bookmark / memo */
+
+	{ 0x31, KEY_HELP },	/* help */
+	{ 0x32, KEY_MODE },	/* num/memo */
+	{ 0x33, KEY_ESC },	/* cancel */
+
+	{ 0x0c, KEY_UP },	/* up */
+	{ 0x10, KEY_DOWN },	/* down */
+	{ 0x08, KEY_LEFT },	/* left */
+	{ 0x04, KEY_RIGHT },	/* right */
+	{ 0x03, KEY_SELECT },	/* select */
+
+	{ 0x1f, KEY_REWIND },	/* rewind */
+	{ 0x20, KEY_PLAYPAUSE },/* play/pause */
+	{ 0x29, KEY_FORWARD },	/* forward */
+	{ 0x14, KEY_AGAIN },	/* repeat */
+	{ 0x2b, KEY_RECORD },	/* recording */
+	{ 0x2c, KEY_STOP },	/* stop */
+	{ 0x2d, KEY_PLAY },	/* play */
+	{ 0x2e, KEY_CAMERA },	/* snapshot / shuffle */
+
+	{ 0x00, KEY_0 },
+	{ 0x05, KEY_1 },
+	{ 0x06, KEY_2 },
+	{ 0x07, KEY_3 },
+	{ 0x09, KEY_4 },
+	{ 0x0a, KEY_5 },
+	{ 0x0b, KEY_6 },
+	{ 0x0d, KEY_7 },
+	{ 0x0e, KEY_8 },
+	{ 0x0f, KEY_9 },
+
+	{ 0x2a, KEY_VOLUMEUP },
+	{ 0x11, KEY_VOLUMEDOWN },
+	{ 0x18, KEY_CHANNELUP },/* CH.tracking up */
+	{ 0x19, KEY_CHANNELDOWN },/* CH.tracking down */
+
+	{ 0x13, KEY_ENTER },	/* enter */
+	{ 0x21, KEY_DOT },	/* . (decimal dot) */
+};
+
+struct ir_scancode_table ir_codes_eztv_table = {
+	.scan = ir_codes_eztv,
+	.size = ARRAY_SIZE(ir_codes_eztv),
+};
+EXPORT_SYMBOL_GPL(ir_codes_eztv_table);
 
 /* Alex Hermann <gaaf@gmx.net> */
-IR_KEYTAB_TYPE ir_codes_avermedia[IR_KEYTAB_SIZE] = {
-	[0x28] = KEY_1,
-	[0x18] = KEY_2,
-	[0x38] = KEY_3,
-	[0x24] = KEY_4,
-	[0x14] = KEY_5,
-	[0x34] = KEY_6,
-	[0x2c] = KEY_7,
-	[0x1c] = KEY_8,
-	[0x3c] = KEY_9,
-	[0x22] = KEY_0,
-
-	[0x20] = KEY_TV,		/* TV/FM */
-	[0x10] = KEY_CD,		/* CD */
-	[0x30] = KEY_TEXT,		/* TELETEXT */
-	[0x00] = KEY_POWER,		/* POWER */
-
-	[0x08] = KEY_VIDEO,		/* VIDEO */
-	[0x04] = KEY_AUDIO,		/* AUDIO */
-	[0x0c] = KEY_ZOOM,		/* FULL SCREEN */
-
-	[0x12] = KEY_SUBTITLE,		/* DISPLAY */
-	[0x32] = KEY_REWIND,		/* LOOP	*/
-	[0x02] = KEY_PRINT,		/* PREVIEW */
-
-	[0x2a] = KEY_SEARCH,		/* AUTOSCAN */
-	[0x1a] = KEY_SLEEP,		/* FREEZE */
-	[0x3a] = KEY_CAMERA,		/* SNAPSHOT */
-	[0x0a] = KEY_MUTE,		/* MUTE */
-
-	[0x26] = KEY_RECORD,		/* RECORD */
-	[0x16] = KEY_PAUSE,		/* PAUSE */
-	[0x36] = KEY_STOP,		/* STOP */
-	[0x06] = KEY_PLAY,		/* PLAY */
-
-	[0x2e] = KEY_RED,		/* RED */
-	[0x21] = KEY_GREEN,		/* GREEN */
-	[0x0e] = KEY_YELLOW,		/* YELLOW */
-	[0x01] = KEY_BLUE,		/* BLUE */
-
-	[0x1e] = KEY_VOLUMEDOWN,	/* VOLUME- */
-	[0x3e] = KEY_VOLUMEUP,		/* VOLUME+ */
-	[0x11] = KEY_CHANNELDOWN,	/* CHANNEL/PAGE- */
-	[0x31] = KEY_CHANNELUP		/* CHANNEL/PAGE+ */
-};
-EXPORT_SYMBOL_GPL(ir_codes_avermedia);
-
-IR_KEYTAB_TYPE ir_codes_videomate_tv_pvr[IR_KEYTAB_SIZE] = {
-	[0x14] = KEY_MUTE,
-	[0x24] = KEY_ZOOM,
-
-	[0x01] = KEY_DVD,
-	[0x23] = KEY_RADIO,
-	[0x00] = KEY_TV,
-
-	[0x0a] = KEY_REWIND,
-	[0x08] = KEY_PLAYPAUSE,
-	[0x0f] = KEY_FORWARD,
-
-	[0x02] = KEY_PREVIOUS,
-	[0x07] = KEY_STOP,
-	[0x06] = KEY_NEXT,
-
-	[0x0c] = KEY_UP,
-	[0x0e] = KEY_DOWN,
-	[0x0b] = KEY_LEFT,
-	[0x0d] = KEY_RIGHT,
-	[0x11] = KEY_OK,
-
-	[0x03] = KEY_MENU,
-	[0x09] = KEY_SETUP,
-	[0x05] = KEY_VIDEO,
-	[0x22] = KEY_CHANNEL,
-
-	[0x12] = KEY_VOLUMEUP,
-	[0x15] = KEY_VOLUMEDOWN,
-	[0x10] = KEY_CHANNELUP,
-	[0x13] = KEY_CHANNELDOWN,
-
-	[0x04] = KEY_RECORD,
-
-	[0x16] = KEY_1,
-	[0x17] = KEY_2,
-	[0x18] = KEY_3,
-	[0x19] = KEY_4,
-	[0x1a] = KEY_5,
-	[0x1b] = KEY_6,
-	[0x1c] = KEY_7,
-	[0x1d] = KEY_8,
-	[0x1e] = KEY_9,
-	[0x1f] = KEY_0,
-
-	[0x20] = KEY_LANGUAGE,
-	[0x21] = KEY_SLEEP,
-};
-EXPORT_SYMBOL_GPL(ir_codes_videomate_tv_pvr);
+static struct ir_scancode ir_codes_avermedia[] = {
+	{ 0x28, KEY_1 },
+	{ 0x18, KEY_2 },
+	{ 0x38, KEY_3 },
+	{ 0x24, KEY_4 },
+	{ 0x14, KEY_5 },
+	{ 0x34, KEY_6 },
+	{ 0x2c, KEY_7 },
+	{ 0x1c, KEY_8 },
+	{ 0x3c, KEY_9 },
+	{ 0x22, KEY_0 },
+
+	{ 0x20, KEY_TV },		/* TV/FM */
+	{ 0x10, KEY_CD },		/* CD */
+	{ 0x30, KEY_TEXT },		/* TELETEXT */
+	{ 0x00, KEY_POWER },		/* POWER */
+
+	{ 0x08, KEY_VIDEO },		/* VIDEO */
+	{ 0x04, KEY_AUDIO },		/* AUDIO */
+	{ 0x0c, KEY_ZOOM },		/* FULL SCREEN */
+
+	{ 0x12, KEY_SUBTITLE },		/* DISPLAY */
+	{ 0x32, KEY_REWIND },		/* LOOP	*/
+	{ 0x02, KEY_PRINT },		/* PREVIEW */
+
+	{ 0x2a, KEY_SEARCH },		/* AUTOSCAN */
+	{ 0x1a, KEY_SLEEP },		/* FREEZE */
+	{ 0x3a, KEY_CAMERA },		/* SNAPSHOT */
+	{ 0x0a, KEY_MUTE },		/* MUTE */
+
+	{ 0x26, KEY_RECORD },		/* RECORD */
+	{ 0x16, KEY_PAUSE },		/* PAUSE */
+	{ 0x36, KEY_STOP },		/* STOP */
+	{ 0x06, KEY_PLAY },		/* PLAY */
+
+	{ 0x2e, KEY_RED },		/* RED */
+	{ 0x21, KEY_GREEN },		/* GREEN */
+	{ 0x0e, KEY_YELLOW },		/* YELLOW */
+	{ 0x01, KEY_BLUE },		/* BLUE */
+
+	{ 0x1e, KEY_VOLUMEDOWN },	/* VOLUME- */
+	{ 0x3e, KEY_VOLUMEUP },		/* VOLUME+ */
+	{ 0x11, KEY_CHANNELDOWN },	/* CHANNEL/PAGE- */
+	{ 0x31, KEY_CHANNELUP }		/* CHANNEL/PAGE+ */
+};
+
+struct ir_scancode_table ir_codes_avermedia_table = {
+	.scan = ir_codes_avermedia,
+	.size = ARRAY_SIZE(ir_codes_avermedia),
+};
+EXPORT_SYMBOL_GPL(ir_codes_avermedia_table);
+
+static struct ir_scancode ir_codes_videomate_tv_pvr[] = {
+	{ 0x14, KEY_MUTE },
+	{ 0x24, KEY_ZOOM },
+
+	{ 0x01, KEY_DVD },
+	{ 0x23, KEY_RADIO },
+	{ 0x00, KEY_TV },
+
+	{ 0x0a, KEY_REWIND },
+	{ 0x08, KEY_PLAYPAUSE },
+	{ 0x0f, KEY_FORWARD },
+
+	{ 0x02, KEY_PREVIOUS },
+	{ 0x07, KEY_STOP },
+	{ 0x06, KEY_NEXT },
+
+	{ 0x0c, KEY_UP },
+	{ 0x0e, KEY_DOWN },
+	{ 0x0b, KEY_LEFT },
+	{ 0x0d, KEY_RIGHT },
+	{ 0x11, KEY_OK },
+
+	{ 0x03, KEY_MENU },
+	{ 0x09, KEY_SETUP },
+	{ 0x05, KEY_VIDEO },
+	{ 0x22, KEY_CHANNEL },
+
+	{ 0x12, KEY_VOLUMEUP },
+	{ 0x15, KEY_VOLUMEDOWN },
+	{ 0x10, KEY_CHANNELUP },
+	{ 0x13, KEY_CHANNELDOWN },
+
+	{ 0x04, KEY_RECORD },
+
+	{ 0x16, KEY_1 },
+	{ 0x17, KEY_2 },
+	{ 0x18, KEY_3 },
+	{ 0x19, KEY_4 },
+	{ 0x1a, KEY_5 },
+	{ 0x1b, KEY_6 },
+	{ 0x1c, KEY_7 },
+	{ 0x1d, KEY_8 },
+	{ 0x1e, KEY_9 },
+	{ 0x1f, KEY_0 },
+
+	{ 0x20, KEY_LANGUAGE },
+	{ 0x21, KEY_SLEEP },
+};
+
+struct ir_scancode_table ir_codes_videomate_tv_pvr_table = {
+	.scan = ir_codes_videomate_tv_pvr,
+	.size = ARRAY_SIZE(ir_codes_videomate_tv_pvr),
+};
+EXPORT_SYMBOL_GPL(ir_codes_videomate_tv_pvr_table);
 
 /* Michael Tokarev <mjt@tls.msk.ru>
    http://www.corpit.ru/mjt/beholdTV/remote_control.jpg
@@ -1196,14 +1321,14 @@ EXPORT_SYMBOL_GPL(ir_codes_videomate_tv_pvr);
    the button labels (several variants when appropriate)
    helps to descide which keycodes to assign to the buttons.
  */
-IR_KEYTAB_TYPE ir_codes_manli[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_manli[] = {
 
 	/*  0x1c            0x12  *
 	 * FUNCTION         POWER *
 	 *   FM              (|)  *
 	 *                        */
-	[0x1c] = KEY_RADIO,	/*XXX*/
-	[0x12] = KEY_POWER,
+	{ 0x1c, KEY_RADIO },	/*XXX*/
+	{ 0x12, KEY_POWER },
 
 	/*  0x01    0x02    0x03  *
 	 *   1       2       3    *
@@ -1214,29 +1339,29 @@ IR_KEYTAB_TYPE ir_codes_manli[IR_KEYTAB_SIZE] = {
 	 *  0x07    0x08    0x09  *
 	 *   7       8       9    *
 	 *                        */
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
 
 	/*  0x0a    0x00    0x17  *
 	 * RECALL    0      +100  *
 	 *                  PLUS  *
 	 *                        */
-	[0x0a] = KEY_AGAIN,	/*XXX KEY_REWIND? */
-	[0x00] = KEY_0,
-	[0x17] = KEY_DIGITS,	/*XXX*/
+	{ 0x0a, KEY_AGAIN },	/*XXX KEY_REWIND? */
+	{ 0x00, KEY_0 },
+	{ 0x17, KEY_DIGITS },	/*XXX*/
 
 	/*  0x14            0x10  *
 	 *  MENU            INFO  *
 	 *  OSD                   */
-	[0x14] = KEY_MENU,
-	[0x10] = KEY_INFO,
+	{ 0x14, KEY_MENU },
+	{ 0x10, KEY_INFO },
 
 	/*          0x0b          *
 	 *           Up           *
@@ -1247,18 +1372,18 @@ IR_KEYTAB_TYPE ir_codes_manli[IR_KEYTAB_SIZE] = {
 	 *         0x015          *
 	 *         Down           *
 	 *                        */
-	[0x0b] = KEY_UP,
-	[0x18] = KEY_LEFT,
-	[0x16] = KEY_OK,	/*XXX KEY_SELECT? KEY_ENTER? */
-	[0x0c] = KEY_RIGHT,
-	[0x15] = KEY_DOWN,
+	{ 0x0b, KEY_UP },
+	{ 0x18, KEY_LEFT },
+	{ 0x16, KEY_OK },	/*XXX KEY_SELECT? KEY_ENTER? */
+	{ 0x0c, KEY_RIGHT },
+	{ 0x15, KEY_DOWN },
 
 	/*  0x11            0x0d  *
 	 *  TV/AV           MODE  *
 	 *  SOURCE         STEREO *
 	 *                        */
-	[0x11] = KEY_TV,	/*XXX*/
-	[0x0d] = KEY_MODE,	/*XXX there's no KEY_STEREO	*/
+	{ 0x11, KEY_TV },	/*XXX*/
+	{ 0x0d, KEY_MODE },	/*XXX there's no KEY_STEREO	*/
 
 	/*  0x0f    0x1b    0x1a  *
 	 *  AUDIO   Vol+    Chan+ *
@@ -1267,877 +1392,967 @@ IR_KEYTAB_TYPE ir_codes_manli[IR_KEYTAB_SIZE] = {
 	 *  0x0e    0x1f    0x1e  *
 	 *  SLEEP   Vol-    Chan- *
 	 *                        */
-	[0x0f] = KEY_AUDIO,
-	[0x1b] = KEY_VOLUMEUP,
-	[0x1a] = KEY_CHANNELUP,
-	[0x0e] = KEY_TIME,
-	[0x1f] = KEY_VOLUMEDOWN,
-	[0x1e] = KEY_CHANNELDOWN,
+	{ 0x0f, KEY_AUDIO },
+	{ 0x1b, KEY_VOLUMEUP },
+	{ 0x1a, KEY_CHANNELUP },
+	{ 0x0e, KEY_TIME },
+	{ 0x1f, KEY_VOLUMEDOWN },
+	{ 0x1e, KEY_CHANNELDOWN },
 
 	/*         0x13     0x19  *
 	 *         MUTE   SNAPSHOT*
 	 *                        */
-	[0x13] = KEY_MUTE,
-	[0x19] = KEY_CAMERA,
+	{ 0x13, KEY_MUTE },
+	{ 0x19, KEY_CAMERA },
 
 	/* 0x1d unused ? */
 };
-EXPORT_SYMBOL_GPL(ir_codes_manli);
+
+struct ir_scancode_table ir_codes_manli_table = {
+	.scan = ir_codes_manli,
+	.size = ARRAY_SIZE(ir_codes_manli),
+};
+EXPORT_SYMBOL_GPL(ir_codes_manli_table);
 
 /* Mike Baikov <mike@baikov.com> */
-IR_KEYTAB_TYPE ir_codes_gotview7135[IR_KEYTAB_SIZE] = {
-
-	[0x11] = KEY_POWER,
-	[0x35] = KEY_TV,
-	[0x1b] = KEY_0,
-	[0x29] = KEY_1,
-	[0x19] = KEY_2,
-	[0x39] = KEY_3,
-	[0x1f] = KEY_4,
-	[0x2c] = KEY_5,
-	[0x21] = KEY_6,
-	[0x24] = KEY_7,
-	[0x18] = KEY_8,
-	[0x2b] = KEY_9,
-	[0x3b] = KEY_AGAIN,	/* LOOP */
-	[0x06] = KEY_AUDIO,
-	[0x31] = KEY_PRINT,	/* PREVIEW */
-	[0x3e] = KEY_VIDEO,
-	[0x10] = KEY_CHANNELUP,
-	[0x20] = KEY_CHANNELDOWN,
-	[0x0c] = KEY_VOLUMEDOWN,
-	[0x28] = KEY_VOLUMEUP,
-	[0x08] = KEY_MUTE,
-	[0x26] = KEY_SEARCH,	/* SCAN */
-	[0x3f] = KEY_CAMERA,	/* SNAPSHOT */
-	[0x12] = KEY_RECORD,
-	[0x32] = KEY_STOP,
-	[0x3c] = KEY_PLAY,
-	[0x1d] = KEY_REWIND,
-	[0x2d] = KEY_PAUSE,
-	[0x0d] = KEY_FORWARD,
-	[0x05] = KEY_ZOOM,	/*FULL*/
-
-	[0x2a] = KEY_F21,	/* LIVE TIMESHIFT */
-	[0x0e] = KEY_F22,	/* MIN TIMESHIFT */
-	[0x1e] = KEY_TIME,	/* TIMESHIFT */
-	[0x38] = KEY_F24,	/* NORMAL TIMESHIFT */
-};
-EXPORT_SYMBOL_GPL(ir_codes_gotview7135);
-
-IR_KEYTAB_TYPE ir_codes_purpletv[IR_KEYTAB_SIZE] = {
-	[0x03] = KEY_POWER,
-	[0x6f] = KEY_MUTE,
-	[0x10] = KEY_BACKSPACE,	/* Recall */
-
-	[0x11] = KEY_0,
-	[0x04] = KEY_1,
-	[0x05] = KEY_2,
-	[0x06] = KEY_3,
-	[0x08] = KEY_4,
-	[0x09] = KEY_5,
-	[0x0a] = KEY_6,
-	[0x0c] = KEY_7,
-	[0x0d] = KEY_8,
-	[0x0e] = KEY_9,
-	[0x12] = KEY_DOT,	/* 100+ */
-
-	[0x07] = KEY_VOLUMEUP,
-	[0x0b] = KEY_VOLUMEDOWN,
-	[0x1a] = KEY_KPPLUS,
-	[0x18] = KEY_KPMINUS,
-	[0x15] = KEY_UP,
-	[0x1d] = KEY_DOWN,
-	[0x0f] = KEY_CHANNELUP,
-	[0x13] = KEY_CHANNELDOWN,
-	[0x48] = KEY_ZOOM,
-
-	[0x1b] = KEY_VIDEO,	/* Video source */
-	[0x1f] = KEY_CAMERA,	/* Snapshot */
-	[0x49] = KEY_LANGUAGE,	/* MTS Select */
-	[0x19] = KEY_SEARCH,	/* Auto Scan */
-
-	[0x4b] = KEY_RECORD,
-	[0x46] = KEY_PLAY,
-	[0x45] = KEY_PAUSE,	/* Pause */
-	[0x44] = KEY_STOP,
-	[0x43] = KEY_TIME,	/* Time Shift */
-	[0x17] = KEY_CHANNEL,	/* SURF CH */
-	[0x40] = KEY_FORWARD,	/* Forward ? */
-	[0x42] = KEY_REWIND,	/* Backward ? */
-
-};
-EXPORT_SYMBOL_GPL(ir_codes_purpletv);
+static struct ir_scancode ir_codes_gotview7135[] = {
+
+	{ 0x11, KEY_POWER },
+	{ 0x35, KEY_TV },
+	{ 0x1b, KEY_0 },
+	{ 0x29, KEY_1 },
+	{ 0x19, KEY_2 },
+	{ 0x39, KEY_3 },
+	{ 0x1f, KEY_4 },
+	{ 0x2c, KEY_5 },
+	{ 0x21, KEY_6 },
+	{ 0x24, KEY_7 },
+	{ 0x18, KEY_8 },
+	{ 0x2b, KEY_9 },
+	{ 0x3b, KEY_AGAIN },	/* LOOP */
+	{ 0x06, KEY_AUDIO },
+	{ 0x31, KEY_PRINT },	/* PREVIEW */
+	{ 0x3e, KEY_VIDEO },
+	{ 0x10, KEY_CHANNELUP },
+	{ 0x20, KEY_CHANNELDOWN },
+	{ 0x0c, KEY_VOLUMEDOWN },
+	{ 0x28, KEY_VOLUMEUP },
+	{ 0x08, KEY_MUTE },
+	{ 0x26, KEY_SEARCH },	/* SCAN */
+	{ 0x3f, KEY_CAMERA },	/* SNAPSHOT */
+	{ 0x12, KEY_RECORD },
+	{ 0x32, KEY_STOP },
+	{ 0x3c, KEY_PLAY },
+	{ 0x1d, KEY_REWIND },
+	{ 0x2d, KEY_PAUSE },
+	{ 0x0d, KEY_FORWARD },
+	{ 0x05, KEY_ZOOM },	/*FULL*/
+
+	{ 0x2a, KEY_F21 },	/* LIVE TIMESHIFT */
+	{ 0x0e, KEY_F22 },	/* MIN TIMESHIFT */
+	{ 0x1e, KEY_TIME },	/* TIMESHIFT */
+	{ 0x38, KEY_F24 },	/* NORMAL TIMESHIFT */
+};
+
+struct ir_scancode_table ir_codes_gotview7135_table = {
+	.scan = ir_codes_gotview7135,
+	.size = ARRAY_SIZE(ir_codes_gotview7135),
+};
+EXPORT_SYMBOL_GPL(ir_codes_gotview7135_table);
+
+static struct ir_scancode ir_codes_purpletv[] = {
+	{ 0x03, KEY_POWER },
+	{ 0x6f, KEY_MUTE },
+	{ 0x10, KEY_BACKSPACE },	/* Recall */
+
+	{ 0x11, KEY_0 },
+	{ 0x04, KEY_1 },
+	{ 0x05, KEY_2 },
+	{ 0x06, KEY_3 },
+	{ 0x08, KEY_4 },
+	{ 0x09, KEY_5 },
+	{ 0x0a, KEY_6 },
+	{ 0x0c, KEY_7 },
+	{ 0x0d, KEY_8 },
+	{ 0x0e, KEY_9 },
+	{ 0x12, KEY_DOT },	/* 100+ */
+
+	{ 0x07, KEY_VOLUMEUP },
+	{ 0x0b, KEY_VOLUMEDOWN },
+	{ 0x1a, KEY_KPPLUS },
+	{ 0x18, KEY_KPMINUS },
+	{ 0x15, KEY_UP },
+	{ 0x1d, KEY_DOWN },
+	{ 0x0f, KEY_CHANNELUP },
+	{ 0x13, KEY_CHANNELDOWN },
+	{ 0x48, KEY_ZOOM },
+
+	{ 0x1b, KEY_VIDEO },	/* Video source */
+	{ 0x1f, KEY_CAMERA },	/* Snapshot */
+	{ 0x49, KEY_LANGUAGE },	/* MTS Select */
+	{ 0x19, KEY_SEARCH },	/* Auto Scan */
+
+	{ 0x4b, KEY_RECORD },
+	{ 0x46, KEY_PLAY },
+	{ 0x45, KEY_PAUSE },	/* Pause */
+	{ 0x44, KEY_STOP },
+	{ 0x43, KEY_TIME },	/* Time Shift */
+	{ 0x17, KEY_CHANNEL },	/* SURF CH */
+	{ 0x40, KEY_FORWARD },	/* Forward ? */
+	{ 0x42, KEY_REWIND },	/* Backward ? */
+
+};
+
+struct ir_scancode_table ir_codes_purpletv_table = {
+	.scan = ir_codes_purpletv,
+	.size = ARRAY_SIZE(ir_codes_purpletv),
+};
+EXPORT_SYMBOL_GPL(ir_codes_purpletv_table);
 
 /* Mapping for the 28 key remote control as seen at
    http://www.sednacomputer.com/photo/cardbus-tv.jpg
    Pavel Mihaylov <bin@bash.info>
    Also for the remote bundled with Kozumi KTV-01C card */
-IR_KEYTAB_TYPE ir_codes_pctv_sedna[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x0a] = KEY_AGAIN,	/* Recall */
-	[0x0b] = KEY_CHANNELUP,
-	[0x0c] = KEY_VOLUMEUP,
-	[0x0d] = KEY_MODE,	/* Stereo */
-	[0x0e] = KEY_STOP,
-	[0x0f] = KEY_PREVIOUSSONG,
-	[0x10] = KEY_ZOOM,
-	[0x11] = KEY_TUNER,	/* Source */
-	[0x12] = KEY_POWER,
-	[0x13] = KEY_MUTE,
-	[0x15] = KEY_CHANNELDOWN,
-	[0x18] = KEY_VOLUMEDOWN,
-	[0x19] = KEY_CAMERA,	/* Snapshot */
-	[0x1a] = KEY_NEXTSONG,
-	[0x1b] = KEY_TIME,	/* Time Shift */
-	[0x1c] = KEY_RADIO,	/* FM Radio */
-	[0x1d] = KEY_RECORD,
-	[0x1e] = KEY_PAUSE,
+static struct ir_scancode ir_codes_pctv_sedna[] = {
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x0a, KEY_AGAIN },	/* Recall */
+	{ 0x0b, KEY_CHANNELUP },
+	{ 0x0c, KEY_VOLUMEUP },
+	{ 0x0d, KEY_MODE },	/* Stereo */
+	{ 0x0e, KEY_STOP },
+	{ 0x0f, KEY_PREVIOUSSONG },
+	{ 0x10, KEY_ZOOM },
+	{ 0x11, KEY_TUNER },	/* Source */
+	{ 0x12, KEY_POWER },
+	{ 0x13, KEY_MUTE },
+	{ 0x15, KEY_CHANNELDOWN },
+	{ 0x18, KEY_VOLUMEDOWN },
+	{ 0x19, KEY_CAMERA },	/* Snapshot */
+	{ 0x1a, KEY_NEXTSONG },
+	{ 0x1b, KEY_TIME },	/* Time Shift */
+	{ 0x1c, KEY_RADIO },	/* FM Radio */
+	{ 0x1d, KEY_RECORD },
+	{ 0x1e, KEY_PAUSE },
 	/* additional codes for Kozumi's remote */
-	[0x14] = KEY_INFO,	/* OSD */
-	[0x16] = KEY_OK,	/* OK */
-	[0x17] = KEY_DIGITS,	/* Plus */
-	[0x1f] = KEY_PLAY,	/* Play */
+	{ 0x14, KEY_INFO },	/* OSD */
+	{ 0x16, KEY_OK },	/* OK */
+	{ 0x17, KEY_DIGITS },	/* Plus */
+	{ 0x1f, KEY_PLAY },	/* Play */
 };
-EXPORT_SYMBOL_GPL(ir_codes_pctv_sedna);
+
+struct ir_scancode_table ir_codes_pctv_sedna_table = {
+	.scan = ir_codes_pctv_sedna,
+	.size = ARRAY_SIZE(ir_codes_pctv_sedna),
+};
+EXPORT_SYMBOL_GPL(ir_codes_pctv_sedna_table);
 
 /* Mark Phalan <phalanm@o2.ie> */
-IR_KEYTAB_TYPE ir_codes_pv951[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x12] = KEY_POWER,
-	[0x10] = KEY_MUTE,
-	[0x1f] = KEY_VOLUMEDOWN,
-	[0x1b] = KEY_VOLUMEUP,
-	[0x1a] = KEY_CHANNELUP,
-	[0x1e] = KEY_CHANNELDOWN,
-	[0x0e] = KEY_PAGEUP,
-	[0x1d] = KEY_PAGEDOWN,
-	[0x13] = KEY_SOUND,
-
-	[0x18] = KEY_KPPLUSMINUS,	/* CH +/- */
-	[0x16] = KEY_SUBTITLE,		/* CC */
-	[0x0d] = KEY_TEXT,		/* TTX */
-	[0x0b] = KEY_TV,		/* AIR/CBL */
-	[0x11] = KEY_PC,		/* PC/TV */
-	[0x17] = KEY_OK,		/* CH RTN */
-	[0x19] = KEY_MODE,		/* FUNC */
-	[0x0c] = KEY_SEARCH,		/* AUTOSCAN */
+static struct ir_scancode ir_codes_pv951[] = {
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x12, KEY_POWER },
+	{ 0x10, KEY_MUTE },
+	{ 0x1f, KEY_VOLUMEDOWN },
+	{ 0x1b, KEY_VOLUMEUP },
+	{ 0x1a, KEY_CHANNELUP },
+	{ 0x1e, KEY_CHANNELDOWN },
+	{ 0x0e, KEY_PAGEUP },
+	{ 0x1d, KEY_PAGEDOWN },
+	{ 0x13, KEY_SOUND },
+
+	{ 0x18, KEY_KPPLUSMINUS },	/* CH +/- */
+	{ 0x16, KEY_SUBTITLE },		/* CC */
+	{ 0x0d, KEY_TEXT },		/* TTX */
+	{ 0x0b, KEY_TV },		/* AIR/CBL */
+	{ 0x11, KEY_PC },		/* PC/TV */
+	{ 0x17, KEY_OK },		/* CH RTN */
+	{ 0x19, KEY_MODE },		/* FUNC */
+	{ 0x0c, KEY_SEARCH },		/* AUTOSCAN */
 
 	/* Not sure what to do with these ones! */
-	[0x0f] = KEY_SELECT,		/* SOURCE */
-	[0x0a] = KEY_KPPLUS,		/* +100 */
-	[0x14] = KEY_EQUAL,		/* SYNC */
-	[0x1c] = KEY_MEDIA,		/* PC/TV */
+	{ 0x0f, KEY_SELECT },		/* SOURCE */
+	{ 0x0a, KEY_KPPLUS },		/* +100 */
+	{ 0x14, KEY_EQUAL },		/* SYNC */
+	{ 0x1c, KEY_MEDIA },		/* PC/TV */
+};
+
+struct ir_scancode_table ir_codes_pv951_table = {
+	.scan = ir_codes_pv951,
+	.size = ARRAY_SIZE(ir_codes_pv951),
 };
-EXPORT_SYMBOL_GPL(ir_codes_pv951);
+EXPORT_SYMBOL_GPL(ir_codes_pv951_table);
 
 /* generic RC5 keytable                                          */
 /* see http://users.pandora.be/nenya/electronics/rc5/codes00.htm */
 /* used by old (black) Hauppauge remotes                         */
-IR_KEYTAB_TYPE ir_codes_rc5_tv[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_rc5_tv[] = {
 	/* Keys 0 to 9 */
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x0b] = KEY_CHANNEL,		/* channel / program (japan: 11) */
-	[0x0c] = KEY_POWER,		/* standby */
-	[0x0d] = KEY_MUTE,		/* mute / demute */
-	[0x0f] = KEY_TV,		/* display */
-	[0x10] = KEY_VOLUMEUP,
-	[0x11] = KEY_VOLUMEDOWN,
-	[0x12] = KEY_BRIGHTNESSUP,
-	[0x13] = KEY_BRIGHTNESSDOWN,
-	[0x1e] = KEY_SEARCH,		/* search + */
-	[0x20] = KEY_CHANNELUP,		/* channel / program + */
-	[0x21] = KEY_CHANNELDOWN,	/* channel / program - */
-	[0x22] = KEY_CHANNEL,		/* alt / channel */
-	[0x23] = KEY_LANGUAGE,		/* 1st / 2nd language */
-	[0x26] = KEY_SLEEP,		/* sleeptimer */
-	[0x2e] = KEY_MENU,		/* 2nd controls (USA: menu) */
-	[0x30] = KEY_PAUSE,
-	[0x32] = KEY_REWIND,
-	[0x33] = KEY_GOTO,
-	[0x35] = KEY_PLAY,
-	[0x36] = KEY_STOP,
-	[0x37] = KEY_RECORD,		/* recording */
-	[0x3c] = KEY_TEXT,		/* teletext submode (Japan: 12) */
-	[0x3d] = KEY_SUSPEND,		/* system standby */
-
-};
-EXPORT_SYMBOL_GPL(ir_codes_rc5_tv);
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x0b, KEY_CHANNEL },		/* channel / program (japan: 11) */
+	{ 0x0c, KEY_POWER },		/* standby */
+	{ 0x0d, KEY_MUTE },		/* mute / demute */
+	{ 0x0f, KEY_TV },		/* display */
+	{ 0x10, KEY_VOLUMEUP },
+	{ 0x11, KEY_VOLUMEDOWN },
+	{ 0x12, KEY_BRIGHTNESSUP },
+	{ 0x13, KEY_BRIGHTNESSDOWN },
+	{ 0x1e, KEY_SEARCH },		/* search + */
+	{ 0x20, KEY_CHANNELUP },	/* channel / program + */
+	{ 0x21, KEY_CHANNELDOWN },	/* channel / program - */
+	{ 0x22, KEY_CHANNEL },		/* alt / channel */
+	{ 0x23, KEY_LANGUAGE },		/* 1st / 2nd language */
+	{ 0x26, KEY_SLEEP },		/* sleeptimer */
+	{ 0x2e, KEY_MENU },		/* 2nd controls (USA: menu) */
+	{ 0x30, KEY_PAUSE },
+	{ 0x32, KEY_REWIND },
+	{ 0x33, KEY_GOTO },
+	{ 0x35, KEY_PLAY },
+	{ 0x36, KEY_STOP },
+	{ 0x37, KEY_RECORD },		/* recording */
+	{ 0x3c, KEY_TEXT },		/* teletext submode (Japan: 12) */
+	{ 0x3d, KEY_SUSPEND },		/* system standby */
+
+};
+
+struct ir_scancode_table ir_codes_rc5_tv_table = {
+	.scan = ir_codes_rc5_tv,
+	.size = ARRAY_SIZE(ir_codes_rc5_tv),
+};
+EXPORT_SYMBOL_GPL(ir_codes_rc5_tv_table);
 
 /* Table for Leadtek Winfast Remote Controls - used by both bttv and cx88 */
-IR_KEYTAB_TYPE ir_codes_winfast[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_winfast[] = {
 	/* Keys 0 to 9 */
-	[0x12] = KEY_0,
-	[0x05] = KEY_1,
-	[0x06] = KEY_2,
-	[0x07] = KEY_3,
-	[0x09] = KEY_4,
-	[0x0a] = KEY_5,
-	[0x0b] = KEY_6,
-	[0x0d] = KEY_7,
-	[0x0e] = KEY_8,
-	[0x0f] = KEY_9,
-
-	[0x00] = KEY_POWER,
-	[0x1b] = KEY_AUDIO,		/* Audio Source */
-	[0x02] = KEY_TUNER,		/* TV/FM, not on Y0400052 */
-	[0x1e] = KEY_VIDEO,		/* Video Source */
-	[0x16] = KEY_INFO,		/* Display information */
-	[0x04] = KEY_VOLUMEUP,
-	[0x08] = KEY_VOLUMEDOWN,
-	[0x0c] = KEY_CHANNELUP,
-	[0x10] = KEY_CHANNELDOWN,
-	[0x03] = KEY_ZOOM,		/* fullscreen */
-	[0x1f] = KEY_TEXT,		/* closed caption/teletext */
-	[0x20] = KEY_SLEEP,
-	[0x29] = KEY_CLEAR,		/* boss key */
-	[0x14] = KEY_MUTE,
-	[0x2b] = KEY_RED,
-	[0x2c] = KEY_GREEN,
-	[0x2d] = KEY_YELLOW,
-	[0x2e] = KEY_BLUE,
-	[0x18] = KEY_KPPLUS,		/* fine tune + , not on Y040052 */
-	[0x19] = KEY_KPMINUS,		/* fine tune - , not on Y040052 */
-	[0x2a] = KEY_MEDIA,		/* PIP (Picture in picture */
-	[0x21] = KEY_DOT,
-	[0x13] = KEY_ENTER,
-	[0x11] = KEY_LAST,		/* Recall (last channel */
-	[0x22] = KEY_PREVIOUS,
-	[0x23] = KEY_PLAYPAUSE,
-	[0x24] = KEY_NEXT,
-	[0x25] = KEY_TIME,		/* Time Shifting */
-	[0x26] = KEY_STOP,
-	[0x27] = KEY_RECORD,
-	[0x28] = KEY_SAVE,		/* Screenshot */
-	[0x2f] = KEY_MENU,
-	[0x30] = KEY_CANCEL,
-	[0x31] = KEY_CHANNEL,		/* Channel Surf */
-	[0x32] = KEY_SUBTITLE,
-	[0x33] = KEY_LANGUAGE,
-	[0x34] = KEY_REWIND,
-	[0x35] = KEY_FASTFORWARD,
-	[0x36] = KEY_TV,
-	[0x37] = KEY_RADIO,		/* FM */
-	[0x38] = KEY_DVD,
-
-	[0x3e] = KEY_F21,		/* MCE +VOL, on Y04G0033 */
-	[0x3a] = KEY_F22,		/* MCE -VOL, on Y04G0033 */
-	[0x3b] = KEY_F23,		/* MCE +CH,  on Y04G0033 */
-	[0x3f] = KEY_F24		/* MCE -CH,  on Y04G0033 */
-};
-EXPORT_SYMBOL_GPL(ir_codes_winfast);
-
-IR_KEYTAB_TYPE ir_codes_pinnacle_color[IR_KEYTAB_SIZE] = {
-	[0x59] = KEY_MUTE,
-	[0x4a] = KEY_POWER,
-
-	[0x18] = KEY_TEXT,
-	[0x26] = KEY_TV,
-	[0x3d] = KEY_PRINT,
-
-	[0x48] = KEY_RED,
-	[0x04] = KEY_GREEN,
-	[0x11] = KEY_YELLOW,
-	[0x00] = KEY_BLUE,
-
-	[0x2d] = KEY_VOLUMEUP,
-	[0x1e] = KEY_VOLUMEDOWN,
-
-	[0x49] = KEY_MENU,
-
-	[0x16] = KEY_CHANNELUP,
-	[0x17] = KEY_CHANNELDOWN,
-
-	[0x20] = KEY_UP,
-	[0x21] = KEY_DOWN,
-	[0x22] = KEY_LEFT,
-	[0x23] = KEY_RIGHT,
-	[0x0d] = KEY_SELECT,
-
-	[0x08] = KEY_BACK,
-	[0x07] = KEY_REFRESH,
-
-	[0x2f] = KEY_ZOOM,
-	[0x29] = KEY_RECORD,
-
-	[0x4b] = KEY_PAUSE,
-	[0x4d] = KEY_REWIND,
-	[0x2e] = KEY_PLAY,
-	[0x4e] = KEY_FORWARD,
-	[0x53] = KEY_PREVIOUS,
-	[0x4c] = KEY_STOP,
-	[0x54] = KEY_NEXT,
-
-	[0x69] = KEY_0,
-	[0x6a] = KEY_1,
-	[0x6b] = KEY_2,
-	[0x6c] = KEY_3,
-	[0x6d] = KEY_4,
-	[0x6e] = KEY_5,
-	[0x6f] = KEY_6,
-	[0x70] = KEY_7,
-	[0x71] = KEY_8,
-	[0x72] = KEY_9,
-
-	[0x74] = KEY_CHANNEL,
-	[0x0a] = KEY_BACKSPACE,
-};
-EXPORT_SYMBOL_GPL(ir_codes_pinnacle_color);
+	{ 0x12, KEY_0 },
+	{ 0x05, KEY_1 },
+	{ 0x06, KEY_2 },
+	{ 0x07, KEY_3 },
+	{ 0x09, KEY_4 },
+	{ 0x0a, KEY_5 },
+	{ 0x0b, KEY_6 },
+	{ 0x0d, KEY_7 },
+	{ 0x0e, KEY_8 },
+	{ 0x0f, KEY_9 },
+
+	{ 0x00, KEY_POWER },
+	{ 0x1b, KEY_AUDIO },		/* Audio Source */
+	{ 0x02, KEY_TUNER },		/* TV/FM, not on Y0400052 */
+	{ 0x1e, KEY_VIDEO },		/* Video Source */
+	{ 0x16, KEY_INFO },		/* Display information */
+	{ 0x04, KEY_VOLUMEUP },
+	{ 0x08, KEY_VOLUMEDOWN },
+	{ 0x0c, KEY_CHANNELUP },
+	{ 0x10, KEY_CHANNELDOWN },
+	{ 0x03, KEY_ZOOM },		/* fullscreen */
+	{ 0x1f, KEY_TEXT },		/* closed caption/teletext */
+	{ 0x20, KEY_SLEEP },
+	{ 0x29, KEY_CLEAR },		/* boss key */
+	{ 0x14, KEY_MUTE },
+	{ 0x2b, KEY_RED },
+	{ 0x2c, KEY_GREEN },
+	{ 0x2d, KEY_YELLOW },
+	{ 0x2e, KEY_BLUE },
+	{ 0x18, KEY_KPPLUS },		/* fine tune + , not on Y040052 */
+	{ 0x19, KEY_KPMINUS },		/* fine tune - , not on Y040052 */
+	{ 0x2a, KEY_MEDIA },		/* PIP (Picture in picture */
+	{ 0x21, KEY_DOT },
+	{ 0x13, KEY_ENTER },
+	{ 0x11, KEY_LAST },		/* Recall (last channel */
+	{ 0x22, KEY_PREVIOUS },
+	{ 0x23, KEY_PLAYPAUSE },
+	{ 0x24, KEY_NEXT },
+	{ 0x25, KEY_TIME },		/* Time Shifting */
+	{ 0x26, KEY_STOP },
+	{ 0x27, KEY_RECORD },
+	{ 0x28, KEY_SAVE },		/* Screenshot */
+	{ 0x2f, KEY_MENU },
+	{ 0x30, KEY_CANCEL },
+	{ 0x31, KEY_CHANNEL },		/* Channel Surf */
+	{ 0x32, KEY_SUBTITLE },
+	{ 0x33, KEY_LANGUAGE },
+	{ 0x34, KEY_REWIND },
+	{ 0x35, KEY_FASTFORWARD },
+	{ 0x36, KEY_TV },
+	{ 0x37, KEY_RADIO },		/* FM */
+	{ 0x38, KEY_DVD },
+
+	{ 0x3e, KEY_F21 },		/* MCE +VOL, on Y04G0033 */
+	{ 0x3a, KEY_F22 },		/* MCE -VOL, on Y04G0033 */
+	{ 0x3b, KEY_F23 },		/* MCE +CH,  on Y04G0033 */
+	{ 0x3f, KEY_F24 }		/* MCE -CH,  on Y04G0033 */
+};
+
+struct ir_scancode_table ir_codes_winfast_table = {
+	.scan = ir_codes_winfast,
+	.size = ARRAY_SIZE(ir_codes_winfast),
+};
+EXPORT_SYMBOL_GPL(ir_codes_winfast_table);
+
+static struct ir_scancode ir_codes_pinnacle_color[] = {
+	{ 0x59, KEY_MUTE },
+	{ 0x4a, KEY_POWER },
+
+	{ 0x18, KEY_TEXT },
+	{ 0x26, KEY_TV },
+	{ 0x3d, KEY_PRINT },
+
+	{ 0x48, KEY_RED },
+	{ 0x04, KEY_GREEN },
+	{ 0x11, KEY_YELLOW },
+	{ 0x00, KEY_BLUE },
+
+	{ 0x2d, KEY_VOLUMEUP },
+	{ 0x1e, KEY_VOLUMEDOWN },
+
+	{ 0x49, KEY_MENU },
+
+	{ 0x16, KEY_CHANNELUP },
+	{ 0x17, KEY_CHANNELDOWN },
+
+	{ 0x20, KEY_UP },
+	{ 0x21, KEY_DOWN },
+	{ 0x22, KEY_LEFT },
+	{ 0x23, KEY_RIGHT },
+	{ 0x0d, KEY_SELECT },
+
+	{ 0x08, KEY_BACK },
+	{ 0x07, KEY_REFRESH },
+
+	{ 0x2f, KEY_ZOOM },
+	{ 0x29, KEY_RECORD },
+
+	{ 0x4b, KEY_PAUSE },
+	{ 0x4d, KEY_REWIND },
+	{ 0x2e, KEY_PLAY },
+	{ 0x4e, KEY_FORWARD },
+	{ 0x53, KEY_PREVIOUS },
+	{ 0x4c, KEY_STOP },
+	{ 0x54, KEY_NEXT },
+
+	{ 0x69, KEY_0 },
+	{ 0x6a, KEY_1 },
+	{ 0x6b, KEY_2 },
+	{ 0x6c, KEY_3 },
+	{ 0x6d, KEY_4 },
+	{ 0x6e, KEY_5 },
+	{ 0x6f, KEY_6 },
+	{ 0x70, KEY_7 },
+	{ 0x71, KEY_8 },
+	{ 0x72, KEY_9 },
+
+	{ 0x74, KEY_CHANNEL },
+	{ 0x0a, KEY_BACKSPACE },
+};
+
+struct ir_scancode_table ir_codes_pinnacle_color_table = {
+	.scan = ir_codes_pinnacle_color,
+	.size = ARRAY_SIZE(ir_codes_pinnacle_color),
+};
+EXPORT_SYMBOL_GPL(ir_codes_pinnacle_color_table);
 
 /* Hauppauge: the newer, gray remotes (seems there are multiple
  * slightly different versions), shipped with cx88+ivtv cards.
  * almost rc5 coding, but some non-standard keys */
-IR_KEYTAB_TYPE ir_codes_hauppauge_new[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_hauppauge_new[] = {
 	/* Keys 0 to 9 */
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x0a] = KEY_TEXT,		/* keypad asterisk as well */
-	[0x0b] = KEY_RED,		/* red button */
-	[0x0c] = KEY_RADIO,
-	[0x0d] = KEY_MENU,
-	[0x0e] = KEY_SUBTITLE,		/* also the # key */
-	[0x0f] = KEY_MUTE,
-	[0x10] = KEY_VOLUMEUP,
-	[0x11] = KEY_VOLUMEDOWN,
-	[0x12] = KEY_PREVIOUS,		/* previous channel */
-	[0x14] = KEY_UP,
-	[0x15] = KEY_DOWN,
-	[0x16] = KEY_LEFT,
-	[0x17] = KEY_RIGHT,
-	[0x18] = KEY_VIDEO,		/* Videos */
-	[0x19] = KEY_AUDIO,		/* Music */
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x0a, KEY_TEXT },		/* keypad asterisk as well */
+	{ 0x0b, KEY_RED },		/* red button */
+	{ 0x0c, KEY_RADIO },
+	{ 0x0d, KEY_MENU },
+	{ 0x0e, KEY_SUBTITLE },		/* also the # key */
+	{ 0x0f, KEY_MUTE },
+	{ 0x10, KEY_VOLUMEUP },
+	{ 0x11, KEY_VOLUMEDOWN },
+	{ 0x12, KEY_PREVIOUS },		/* previous channel */
+	{ 0x14, KEY_UP },
+	{ 0x15, KEY_DOWN },
+	{ 0x16, KEY_LEFT },
+	{ 0x17, KEY_RIGHT },
+	{ 0x18, KEY_VIDEO },		/* Videos */
+	{ 0x19, KEY_AUDIO },		/* Music */
 	/* 0x1a: Pictures - presume this means
 	   "Multimedia Home Platform" -
 	   no "PICTURES" key in input.h
 	 */
-	[0x1a] = KEY_MHP,
-
-	[0x1b] = KEY_EPG,		/* Guide */
-	[0x1c] = KEY_TV,
-	[0x1e] = KEY_NEXTSONG,		/* skip >| */
-	[0x1f] = KEY_EXIT,		/* back/exit */
-	[0x20] = KEY_CHANNELUP,		/* channel / program + */
-	[0x21] = KEY_CHANNELDOWN,	/* channel / program - */
-	[0x22] = KEY_CHANNEL,		/* source (old black remote) */
-	[0x24] = KEY_PREVIOUSSONG,	/* replay |< */
-	[0x25] = KEY_ENTER,		/* OK */
-	[0x26] = KEY_SLEEP,		/* minimize (old black remote) */
-	[0x29] = KEY_BLUE,		/* blue key */
-	[0x2e] = KEY_GREEN,		/* green button */
-	[0x30] = KEY_PAUSE,		/* pause */
-	[0x32] = KEY_REWIND,		/* backward << */
-	[0x34] = KEY_FASTFORWARD,	/* forward >> */
-	[0x35] = KEY_PLAY,
-	[0x36] = KEY_STOP,
-	[0x37] = KEY_RECORD,		/* recording */
-	[0x38] = KEY_YELLOW,		/* yellow key */
-	[0x3b] = KEY_SELECT,		/* top right button */
-	[0x3c] = KEY_ZOOM,		/* full */
-	[0x3d] = KEY_POWER,		/* system power (green button) */
-};
-EXPORT_SYMBOL_GPL(ir_codes_hauppauge_new);
-
-IR_KEYTAB_TYPE ir_codes_npgtech[IR_KEYTAB_SIZE] = {
-	[0x1d] = KEY_SWITCHVIDEOMODE,	/* switch inputs */
-	[0x2a] = KEY_FRONT,
-
-	[0x3e] = KEY_1,
-	[0x02] = KEY_2,
-	[0x06] = KEY_3,
-	[0x0a] = KEY_4,
-	[0x0e] = KEY_5,
-	[0x12] = KEY_6,
-	[0x16] = KEY_7,
-	[0x1a] = KEY_8,
-	[0x1e] = KEY_9,
-	[0x3a] = KEY_0,
-	[0x22] = KEY_NUMLOCK,		/* -/-- */
-	[0x20] = KEY_REFRESH,
-
-	[0x03] = KEY_BRIGHTNESSDOWN,
-	[0x28] = KEY_AUDIO,
-	[0x3c] = KEY_CHANNELUP,
-	[0x3f] = KEY_VOLUMEDOWN,
-	[0x2e] = KEY_MUTE,
-	[0x3b] = KEY_VOLUMEUP,
-	[0x00] = KEY_CHANNELDOWN,
-	[0x07] = KEY_BRIGHTNESSUP,
-	[0x2c] = KEY_TEXT,
-
-	[0x37] = KEY_RECORD,
-	[0x17] = KEY_PLAY,
-	[0x13] = KEY_PAUSE,
-	[0x26] = KEY_STOP,
-	[0x18] = KEY_FASTFORWARD,
-	[0x14] = KEY_REWIND,
-	[0x33] = KEY_ZOOM,
-	[0x32] = KEY_KEYBOARD,
-	[0x30] = KEY_GOTO,		/* Pointing arrow */
-	[0x36] = KEY_MACRO,		/* Maximize/Minimize (yellow) */
-	[0x0b] = KEY_RADIO,
-	[0x10] = KEY_POWER,
-
-};
-EXPORT_SYMBOL_GPL(ir_codes_npgtech);
+	{ 0x1a, KEY_MHP },
+
+	{ 0x1b, KEY_EPG },		/* Guide */
+	{ 0x1c, KEY_TV },
+	{ 0x1e, KEY_NEXTSONG },		/* skip >| */
+	{ 0x1f, KEY_EXIT },		/* back/exit */
+	{ 0x20, KEY_CHANNELUP },	/* channel / program + */
+	{ 0x21, KEY_CHANNELDOWN },	/* channel / program - */
+	{ 0x22, KEY_CHANNEL },		/* source (old black remote) */
+	{ 0x24, KEY_PREVIOUSSONG },	/* replay |< */
+	{ 0x25, KEY_ENTER },		/* OK */
+	{ 0x26, KEY_SLEEP },		/* minimize (old black remote) */
+	{ 0x29, KEY_BLUE },		/* blue key */
+	{ 0x2e, KEY_GREEN },		/* green button */
+	{ 0x30, KEY_PAUSE },		/* pause */
+	{ 0x32, KEY_REWIND },		/* backward << */
+	{ 0x34, KEY_FASTFORWARD },	/* forward >> */
+	{ 0x35, KEY_PLAY },
+	{ 0x36, KEY_STOP },
+	{ 0x37, KEY_RECORD },		/* recording */
+	{ 0x38, KEY_YELLOW },		/* yellow key */
+	{ 0x3b, KEY_SELECT },		/* top right button */
+	{ 0x3c, KEY_ZOOM },		/* full */
+	{ 0x3d, KEY_POWER },		/* system power (green button) */
+};
+
+struct ir_scancode_table ir_codes_hauppauge_new_table = {
+	.scan = ir_codes_hauppauge_new,
+	.size = ARRAY_SIZE(ir_codes_hauppauge_new),
+};
+EXPORT_SYMBOL_GPL(ir_codes_hauppauge_new_table);
+
+static struct ir_scancode ir_codes_npgtech[] = {
+	{ 0x1d, KEY_SWITCHVIDEOMODE },	/* switch inputs */
+	{ 0x2a, KEY_FRONT },
+
+	{ 0x3e, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x06, KEY_3 },
+	{ 0x0a, KEY_4 },
+	{ 0x0e, KEY_5 },
+	{ 0x12, KEY_6 },
+	{ 0x16, KEY_7 },
+	{ 0x1a, KEY_8 },
+	{ 0x1e, KEY_9 },
+	{ 0x3a, KEY_0 },
+	{ 0x22, KEY_NUMLOCK },		/* -/-- */
+	{ 0x20, KEY_REFRESH },
+
+	{ 0x03, KEY_BRIGHTNESSDOWN },
+	{ 0x28, KEY_AUDIO },
+	{ 0x3c, KEY_CHANNELUP },
+	{ 0x3f, KEY_VOLUMEDOWN },
+	{ 0x2e, KEY_MUTE },
+	{ 0x3b, KEY_VOLUMEUP },
+	{ 0x00, KEY_CHANNELDOWN },
+	{ 0x07, KEY_BRIGHTNESSUP },
+	{ 0x2c, KEY_TEXT },
+
+	{ 0x37, KEY_RECORD },
+	{ 0x17, KEY_PLAY },
+	{ 0x13, KEY_PAUSE },
+	{ 0x26, KEY_STOP },
+	{ 0x18, KEY_FASTFORWARD },
+	{ 0x14, KEY_REWIND },
+	{ 0x33, KEY_ZOOM },
+	{ 0x32, KEY_KEYBOARD },
+	{ 0x30, KEY_GOTO },		/* Pointing arrow */
+	{ 0x36, KEY_MACRO },		/* Maximize/Minimize (yellow) */
+	{ 0x0b, KEY_RADIO },
+	{ 0x10, KEY_POWER },
+
+};
+
+struct ir_scancode_table ir_codes_npgtech_table = {
+	.scan = ir_codes_npgtech,
+	.size = ARRAY_SIZE(ir_codes_npgtech),
+};
+EXPORT_SYMBOL_GPL(ir_codes_npgtech_table);
 
 /* Norwood Micro (non-Pro) TV Tuner
    By Peter Naulls <peter@chocky.org>
    Key comments are the functions given in the manual */
-IR_KEYTAB_TYPE ir_codes_norwood[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_norwood[] = {
 	/* Keys 0 to 9 */
-	[0x20] = KEY_0,
-	[0x21] = KEY_1,
-	[0x22] = KEY_2,
-	[0x23] = KEY_3,
-	[0x24] = KEY_4,
-	[0x25] = KEY_5,
-	[0x26] = KEY_6,
-	[0x27] = KEY_7,
-	[0x28] = KEY_8,
-	[0x29] = KEY_9,
-
-	[0x78] = KEY_TUNER,		/* Video Source        */
-	[0x2c] = KEY_EXIT,		/* Open/Close software */
-	[0x2a] = KEY_SELECT,		/* 2 Digit Select      */
-	[0x69] = KEY_AGAIN,		/* Recall              */
-
-	[0x32] = KEY_BRIGHTNESSUP,	/* Brightness increase */
-	[0x33] = KEY_BRIGHTNESSDOWN,	/* Brightness decrease */
-	[0x6b] = KEY_KPPLUS,	/* (not named >>>>>)   */
-	[0x6c] = KEY_KPMINUS,	/* (not named <<<<<)   */
-
-	[0x2d] = KEY_MUTE,	/* Mute                */
-	[0x30] = KEY_VOLUMEUP,	/* Volume up           */
-	[0x31] = KEY_VOLUMEDOWN,/* Volume down         */
-	[0x60] = KEY_CHANNELUP,	/* Channel up          */
-	[0x61] = KEY_CHANNELDOWN,/* Channel down        */
-
-	[0x3f] = KEY_RECORD,	/* Record              */
-	[0x37] = KEY_PLAY,	/* Play                */
-	[0x36] = KEY_PAUSE,	/* Pause               */
-	[0x2b] = KEY_STOP,	/* Stop                */
-	[0x67] = KEY_FASTFORWARD,/* Foward              */
-	[0x66] = KEY_REWIND,	/* Rewind              */
-	[0x3e] = KEY_SEARCH,	/* Auto Scan           */
-	[0x2e] = KEY_CAMERA,	/* Capture Video       */
-	[0x6d] = KEY_MENU,	/* Show/Hide Control   */
-	[0x2f] = KEY_ZOOM,	/* Full Screen         */
-	[0x34] = KEY_RADIO,	/* FM                  */
-	[0x65] = KEY_POWER,	/* Computer power      */
-};
-EXPORT_SYMBOL_GPL(ir_codes_norwood);
+	{ 0x20, KEY_0 },
+	{ 0x21, KEY_1 },
+	{ 0x22, KEY_2 },
+	{ 0x23, KEY_3 },
+	{ 0x24, KEY_4 },
+	{ 0x25, KEY_5 },
+	{ 0x26, KEY_6 },
+	{ 0x27, KEY_7 },
+	{ 0x28, KEY_8 },
+	{ 0x29, KEY_9 },
+
+	{ 0x78, KEY_TUNER },		/* Video Source        */
+	{ 0x2c, KEY_EXIT },		/* Open/Close software */
+	{ 0x2a, KEY_SELECT },		/* 2 Digit Select      */
+	{ 0x69, KEY_AGAIN },		/* Recall              */
+
+	{ 0x32, KEY_BRIGHTNESSUP },	/* Brightness increase */
+	{ 0x33, KEY_BRIGHTNESSDOWN },	/* Brightness decrease */
+	{ 0x6b, KEY_KPPLUS },		/* (not named >>>>>)   */
+	{ 0x6c, KEY_KPMINUS },		/* (not named <<<<<)   */
+
+	{ 0x2d, KEY_MUTE },		/* Mute                */
+	{ 0x30, KEY_VOLUMEUP },		/* Volume up           */
+	{ 0x31, KEY_VOLUMEDOWN },	/* Volume down         */
+	{ 0x60, KEY_CHANNELUP },	/* Channel up          */
+	{ 0x61, KEY_CHANNELDOWN },	/* Channel down        */
+
+	{ 0x3f, KEY_RECORD },		/* Record              */
+	{ 0x37, KEY_PLAY },		/* Play                */
+	{ 0x36, KEY_PAUSE },		/* Pause               */
+	{ 0x2b, KEY_STOP },		/* Stop                */
+	{ 0x67, KEY_FASTFORWARD },	/* Foward              */
+	{ 0x66, KEY_REWIND },		/* Rewind              */
+	{ 0x3e, KEY_SEARCH },		/* Auto Scan           */
+	{ 0x2e, KEY_CAMERA },		/* Capture Video       */
+	{ 0x6d, KEY_MENU },		/* Show/Hide Control   */
+	{ 0x2f, KEY_ZOOM },		/* Full Screen         */
+	{ 0x34, KEY_RADIO },		/* FM                  */
+	{ 0x65, KEY_POWER },		/* Computer power      */
+};
+
+struct ir_scancode_table ir_codes_norwood_table = {
+	.scan = ir_codes_norwood,
+	.size = ARRAY_SIZE(ir_codes_norwood),
+};
+EXPORT_SYMBOL_GPL(ir_codes_norwood_table);
 
 /* From reading the following remotes:
  * Zenith Universal 7 / TV Mode 807 / VCR Mode 837
  * Hauppauge (from NOVA-CI-s box product)
  * This is a "middle of the road" approach, differences are noted
  */
-IR_KEYTAB_TYPE ir_codes_budget_ci_old[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-	[0x0a] = KEY_ENTER,
-	[0x0b] = KEY_RED,
-	[0x0c] = KEY_POWER,		/* RADIO on Hauppauge */
-	[0x0d] = KEY_MUTE,
-	[0x0f] = KEY_A,			/* TV on Hauppauge */
-	[0x10] = KEY_VOLUMEUP,
-	[0x11] = KEY_VOLUMEDOWN,
-	[0x14] = KEY_B,
-	[0x1c] = KEY_UP,
-	[0x1d] = KEY_DOWN,
-	[0x1e] = KEY_OPTION,		/* RESERVED on Hauppauge */
-	[0x1f] = KEY_BREAK,
-	[0x20] = KEY_CHANNELUP,
-	[0x21] = KEY_CHANNELDOWN,
-	[0x22] = KEY_PREVIOUS,		/* Prev Ch on Zenith, SOURCE on Hauppauge */
-	[0x24] = KEY_RESTART,
-	[0x25] = KEY_OK,
-	[0x26] = KEY_CYCLEWINDOWS,	/* MINIMIZE on Hauppauge */
-	[0x28] = KEY_ENTER,		/* VCR mode on Zenith */
-	[0x29] = KEY_PAUSE,
-	[0x2b] = KEY_RIGHT,
-	[0x2c] = KEY_LEFT,
-	[0x2e] = KEY_MENU,		/* FULL SCREEN on Hauppauge */
-	[0x30] = KEY_SLOW,
-	[0x31] = KEY_PREVIOUS,		/* VCR mode on Zenith */
-	[0x32] = KEY_REWIND,
-	[0x34] = KEY_FASTFORWARD,
-	[0x35] = KEY_PLAY,
-	[0x36] = KEY_STOP,
-	[0x37] = KEY_RECORD,
-	[0x38] = KEY_TUNER,		/* TV/VCR on Zenith */
-	[0x3a] = KEY_C,
-	[0x3c] = KEY_EXIT,
-	[0x3d] = KEY_POWER2,
-	[0x3e] = KEY_TUNER,
-};
-EXPORT_SYMBOL_GPL(ir_codes_budget_ci_old);
+static struct ir_scancode ir_codes_budget_ci_old[] = {
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+	{ 0x0a, KEY_ENTER },
+	{ 0x0b, KEY_RED },
+	{ 0x0c, KEY_POWER },		/* RADIO on Hauppauge */
+	{ 0x0d, KEY_MUTE },
+	{ 0x0f, KEY_A },		/* TV on Hauppauge */
+	{ 0x10, KEY_VOLUMEUP },
+	{ 0x11, KEY_VOLUMEDOWN },
+	{ 0x14, KEY_B },
+	{ 0x1c, KEY_UP },
+	{ 0x1d, KEY_DOWN },
+	{ 0x1e, KEY_OPTION },		/* RESERVED on Hauppauge */
+	{ 0x1f, KEY_BREAK },
+	{ 0x20, KEY_CHANNELUP },
+	{ 0x21, KEY_CHANNELDOWN },
+	{ 0x22, KEY_PREVIOUS },		/* Prev Ch on Zenith, SOURCE on Hauppauge */
+	{ 0x24, KEY_RESTART },
+	{ 0x25, KEY_OK },
+	{ 0x26, KEY_CYCLEWINDOWS },	/* MINIMIZE on Hauppauge */
+	{ 0x28, KEY_ENTER },		/* VCR mode on Zenith */
+	{ 0x29, KEY_PAUSE },
+	{ 0x2b, KEY_RIGHT },
+	{ 0x2c, KEY_LEFT },
+	{ 0x2e, KEY_MENU },		/* FULL SCREEN on Hauppauge */
+	{ 0x30, KEY_SLOW },
+	{ 0x31, KEY_PREVIOUS },		/* VCR mode on Zenith */
+	{ 0x32, KEY_REWIND },
+	{ 0x34, KEY_FASTFORWARD },
+	{ 0x35, KEY_PLAY },
+	{ 0x36, KEY_STOP },
+	{ 0x37, KEY_RECORD },
+	{ 0x38, KEY_TUNER },		/* TV/VCR on Zenith */
+	{ 0x3a, KEY_C },
+	{ 0x3c, KEY_EXIT },
+	{ 0x3d, KEY_POWER2 },
+	{ 0x3e, KEY_TUNER },
+};
+
+struct ir_scancode_table ir_codes_budget_ci_old_table = {
+	.scan = ir_codes_budget_ci_old,
+	.size = ARRAY_SIZE(ir_codes_budget_ci_old),
+};
+EXPORT_SYMBOL_GPL(ir_codes_budget_ci_old_table);
 
 /*
  * Marc Fargas <telenieko@telenieko.com>
  * this is the remote control that comes with the asus p7131
  * which has a label saying is "Model PC-39"
  */
-IR_KEYTAB_TYPE ir_codes_asus_pc39[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_asus_pc39[] = {
 	/* Keys 0 to 9 */
-	[0x15] = KEY_0,
-	[0x29] = KEY_1,
-	[0x2d] = KEY_2,
-	[0x2b] = KEY_3,
-	[0x09] = KEY_4,
-	[0x0d] = KEY_5,
-	[0x0b] = KEY_6,
-	[0x31] = KEY_7,
-	[0x35] = KEY_8,
-	[0x33] = KEY_9,
-
-	[0x3e] = KEY_RADIO,		/* radio */
-	[0x03] = KEY_MENU,		/* dvd/menu */
-	[0x2a] = KEY_VOLUMEUP,
-	[0x19] = KEY_VOLUMEDOWN,
-	[0x37] = KEY_UP,
-	[0x3b] = KEY_DOWN,
-	[0x27] = KEY_LEFT,
-	[0x2f] = KEY_RIGHT,
-	[0x25] = KEY_VIDEO,		/* video */
-	[0x39] = KEY_AUDIO,		/* music */
-
-	[0x21] = KEY_TV,		/* tv */
-	[0x1d] = KEY_EXIT,		/* back */
-	[0x0a] = KEY_CHANNELUP,		/* channel / program + */
-	[0x1b] = KEY_CHANNELDOWN,	/* channel / program - */
-	[0x1a] = KEY_ENTER,		/* enter */
-
-	[0x06] = KEY_PAUSE,		/* play/pause */
-	[0x1e] = KEY_PREVIOUS,		/* rew */
-	[0x26] = KEY_NEXT,		/* forward */
-	[0x0e] = KEY_REWIND,		/* backward << */
-	[0x3a] = KEY_FASTFORWARD,	/* forward >> */
-	[0x36] = KEY_STOP,
-	[0x2e] = KEY_RECORD,		/* recording */
-	[0x16] = KEY_POWER,		/* the button that reads "close" */
-
-	[0x11] = KEY_ZOOM,		/* full screen */
-	[0x13] = KEY_MACRO,		/* recall */
-	[0x23] = KEY_HOME,		/* home */
-	[0x05] = KEY_PVR,		/* picture */
-	[0x3d] = KEY_MUTE,		/* mute */
-	[0x01] = KEY_DVD,		/* dvd */
-};
-EXPORT_SYMBOL_GPL(ir_codes_asus_pc39);
+	{ 0x15, KEY_0 },
+	{ 0x29, KEY_1 },
+	{ 0x2d, KEY_2 },
+	{ 0x2b, KEY_3 },
+	{ 0x09, KEY_4 },
+	{ 0x0d, KEY_5 },
+	{ 0x0b, KEY_6 },
+	{ 0x31, KEY_7 },
+	{ 0x35, KEY_8 },
+	{ 0x33, KEY_9 },
+
+	{ 0x3e, KEY_RADIO },		/* radio */
+	{ 0x03, KEY_MENU },		/* dvd/menu */
+	{ 0x2a, KEY_VOLUMEUP },
+	{ 0x19, KEY_VOLUMEDOWN },
+	{ 0x37, KEY_UP },
+	{ 0x3b, KEY_DOWN },
+	{ 0x27, KEY_LEFT },
+	{ 0x2f, KEY_RIGHT },
+	{ 0x25, KEY_VIDEO },		/* video */
+	{ 0x39, KEY_AUDIO },		/* music */
+
+	{ 0x21, KEY_TV },		/* tv */
+	{ 0x1d, KEY_EXIT },		/* back */
+	{ 0x0a, KEY_CHANNELUP },	/* channel / program + */
+	{ 0x1b, KEY_CHANNELDOWN },	/* channel / program - */
+	{ 0x1a, KEY_ENTER },		/* enter */
+
+	{ 0x06, KEY_PAUSE },		/* play/pause */
+	{ 0x1e, KEY_PREVIOUS },		/* rew */
+	{ 0x26, KEY_NEXT },		/* forward */
+	{ 0x0e, KEY_REWIND },		/* backward << */
+	{ 0x3a, KEY_FASTFORWARD },	/* forward >> */
+	{ 0x36, KEY_STOP },
+	{ 0x2e, KEY_RECORD },		/* recording */
+	{ 0x16, KEY_POWER },		/* the button that reads "close" */
+
+	{ 0x11, KEY_ZOOM },		/* full screen */
+	{ 0x13, KEY_MACRO },		/* recall */
+	{ 0x23, KEY_HOME },		/* home */
+	{ 0x05, KEY_PVR },		/* picture */
+	{ 0x3d, KEY_MUTE },		/* mute */
+	{ 0x01, KEY_DVD },		/* dvd */
+};
+
+struct ir_scancode_table ir_codes_asus_pc39_table = {
+	.scan = ir_codes_asus_pc39,
+	.size = ARRAY_SIZE(ir_codes_asus_pc39),
+};
+EXPORT_SYMBOL_GPL(ir_codes_asus_pc39_table);
 
 
 /* Encore ENLTV-FM  - black plastic, white front cover with white glowing buttons
     Juan Pablo Sormani <sorman@gmail.com> */
-IR_KEYTAB_TYPE ir_codes_encore_enltv[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_encore_enltv[] = {
 
 	/* Power button does nothing, neither in Windows app,
 	 although it sends data (used for BIOS wakeup?) */
-	[0x0d] = KEY_MUTE,
-
-	[0x1e] = KEY_TV,
-	[0x00] = KEY_VIDEO,
-	[0x01] = KEY_AUDIO,		/* music */
-	[0x02] = KEY_MHP,		/* picture */
-
-	[0x1f] = KEY_1,
-	[0x03] = KEY_2,
-	[0x04] = KEY_3,
-	[0x05] = KEY_4,
-	[0x1c] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x1d] = KEY_9,
-	[0x0a] = KEY_0,
-
-	[0x09] = KEY_LIST,		/* -/-- */
-	[0x0b] = KEY_LAST,		/* recall */
-
-	[0x14] = KEY_HOME,		/* win start menu */
-	[0x15] = KEY_EXIT,		/* exit */
-	[0x16] = KEY_CHANNELUP,		/* UP */
-	[0x12] = KEY_CHANNELDOWN,	/* DOWN */
-	[0x0c] = KEY_VOLUMEUP,		/* RIGHT */
-	[0x17] = KEY_VOLUMEDOWN,	/* LEFT */
-
-	[0x18] = KEY_ENTER,		/* OK */
-
-	[0x0e] = KEY_ESC,
-	[0x13] = KEY_CYCLEWINDOWS,	/* desktop */
-	[0x11] = KEY_TAB,
-	[0x19] = KEY_SWITCHVIDEOMODE,	/* switch */
-
-	[0x1a] = KEY_MENU,
-	[0x1b] = KEY_ZOOM,		/* fullscreen */
-	[0x44] = KEY_TIME,		/* time shift */
-	[0x40] = KEY_MODE,		/* source */
-
-	[0x5a] = KEY_RECORD,
-	[0x42] = KEY_PLAY,		/* play/pause */
-	[0x45] = KEY_STOP,
-	[0x43] = KEY_CAMERA,		/* camera icon */
-
-	[0x48] = KEY_REWIND,
-	[0x4a] = KEY_FASTFORWARD,
-	[0x49] = KEY_PREVIOUS,
-	[0x4b] = KEY_NEXT,
-
-	[0x4c] = KEY_FAVORITES,		/* tv wall */
-	[0x4d] = KEY_SOUND,		/* DVD sound */
-	[0x4e] = KEY_LANGUAGE,		/* DVD lang */
-	[0x4f] = KEY_TEXT,		/* DVD text */
-
-	[0x50] = KEY_SLEEP,		/* shutdown */
-	[0x51] = KEY_MODE,		/* stereo > main */
-	[0x52] = KEY_SELECT,		/* stereo > sap */
-	[0x53] = KEY_PROG1,		/* teletext */
-
-
-	[0x59] = KEY_RED,		/* AP1 */
-	[0x41] = KEY_GREEN,		/* AP2 */
-	[0x47] = KEY_YELLOW,		/* AP3 */
-	[0x57] = KEY_BLUE,		/* AP4 */
-};
-EXPORT_SYMBOL_GPL(ir_codes_encore_enltv);
+	{ 0x0d, KEY_MUTE },
+
+	{ 0x1e, KEY_TV },
+	{ 0x00, KEY_VIDEO },
+	{ 0x01, KEY_AUDIO },		/* music */
+	{ 0x02, KEY_MHP },		/* picture */
+
+	{ 0x1f, KEY_1 },
+	{ 0x03, KEY_2 },
+	{ 0x04, KEY_3 },
+	{ 0x05, KEY_4 },
+	{ 0x1c, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x1d, KEY_9 },
+	{ 0x0a, KEY_0 },
+
+	{ 0x09, KEY_LIST },		/* -/-- */
+	{ 0x0b, KEY_LAST },		/* recall */
+
+	{ 0x14, KEY_HOME },		/* win start menu */
+	{ 0x15, KEY_EXIT },		/* exit */
+	{ 0x16, KEY_CHANNELUP },	/* UP */
+	{ 0x12, KEY_CHANNELDOWN },	/* DOWN */
+	{ 0x0c, KEY_VOLUMEUP },		/* RIGHT */
+	{ 0x17, KEY_VOLUMEDOWN },	/* LEFT */
+
+	{ 0x18, KEY_ENTER },		/* OK */
+
+	{ 0x0e, KEY_ESC },
+	{ 0x13, KEY_CYCLEWINDOWS },	/* desktop */
+	{ 0x11, KEY_TAB },
+	{ 0x19, KEY_SWITCHVIDEOMODE },	/* switch */
+
+	{ 0x1a, KEY_MENU },
+	{ 0x1b, KEY_ZOOM },		/* fullscreen */
+	{ 0x44, KEY_TIME },		/* time shift */
+	{ 0x40, KEY_MODE },		/* source */
+
+	{ 0x5a, KEY_RECORD },
+	{ 0x42, KEY_PLAY },		/* play/pause */
+	{ 0x45, KEY_STOP },
+	{ 0x43, KEY_CAMERA },		/* camera icon */
+
+	{ 0x48, KEY_REWIND },
+	{ 0x4a, KEY_FASTFORWARD },
+	{ 0x49, KEY_PREVIOUS },
+	{ 0x4b, KEY_NEXT },
+
+	{ 0x4c, KEY_FAVORITES },	/* tv wall */
+	{ 0x4d, KEY_SOUND },		/* DVD sound */
+	{ 0x4e, KEY_LANGUAGE },		/* DVD lang */
+	{ 0x4f, KEY_TEXT },		/* DVD text */
+
+	{ 0x50, KEY_SLEEP },		/* shutdown */
+	{ 0x51, KEY_MODE },		/* stereo > main */
+	{ 0x52, KEY_SELECT },		/* stereo > sap */
+	{ 0x53, KEY_PROG1 },		/* teletext */
+
+
+	{ 0x59, KEY_RED },		/* AP1 */
+	{ 0x41, KEY_GREEN },		/* AP2 */
+	{ 0x47, KEY_YELLOW },		/* AP3 */
+	{ 0x57, KEY_BLUE },		/* AP4 */
+};
+
+struct ir_scancode_table ir_codes_encore_enltv_table = {
+	.scan = ir_codes_encore_enltv,
+	.size = ARRAY_SIZE(ir_codes_encore_enltv),
+};
+EXPORT_SYMBOL_GPL(ir_codes_encore_enltv_table);
 
 /* Encore ENLTV2-FM  - silver plastic - "Wand Media" written at the botton
     Mauro Carvalho Chehab <mchehab@infradead.org> */
-IR_KEYTAB_TYPE ir_codes_encore_enltv2[IR_KEYTAB_SIZE] = {
-	[0x4c] = KEY_POWER2,
-	[0x4a] = KEY_TUNER,
-	[0x40] = KEY_1,
-	[0x60] = KEY_2,
-	[0x50] = KEY_3,
-	[0x70] = KEY_4,
-	[0x48] = KEY_5,
-	[0x68] = KEY_6,
-	[0x58] = KEY_7,
-	[0x78] = KEY_8,
-	[0x44] = KEY_9,
-	[0x54] = KEY_0,
-
-	[0x64] = KEY_LAST,		/* +100 */
-	[0x4e] = KEY_AGAIN,		/* Recall */
-
-	[0x6c] = KEY_SWITCHVIDEOMODE,	/* Video Source */
-	[0x5e] = KEY_MENU,
-	[0x56] = KEY_SCREEN,
-	[0x7a] = KEY_SETUP,
-
-	[0x46] = KEY_MUTE,
-	[0x5c] = KEY_MODE,		/* Stereo */
-	[0x74] = KEY_INFO,
-	[0x7c] = KEY_CLEAR,
-
-	[0x55] = KEY_UP,
-	[0x49] = KEY_DOWN,
-	[0x7e] = KEY_LEFT,
-	[0x59] = KEY_RIGHT,
-	[0x6a] = KEY_ENTER,
-
-	[0x42] = KEY_VOLUMEUP,
-	[0x62] = KEY_VOLUMEDOWN,
-	[0x52] = KEY_CHANNELUP,
-	[0x72] = KEY_CHANNELDOWN,
-
-	[0x41] = KEY_RECORD,
-	[0x51] = KEY_CAMERA,		/* Snapshot */
-	[0x75] = KEY_TIME,		/* Timeshift */
-	[0x71] = KEY_TV2,		/* PIP */
-
-	[0x45] = KEY_REWIND,
-	[0x6f] = KEY_PAUSE,
-	[0x7d] = KEY_FORWARD,
-	[0x79] = KEY_STOP,
-};
-EXPORT_SYMBOL_GPL(ir_codes_encore_enltv2);
+static struct ir_scancode ir_codes_encore_enltv2[] = {
+	{ 0x4c, KEY_POWER2 },
+	{ 0x4a, KEY_TUNER },
+	{ 0x40, KEY_1 },
+	{ 0x60, KEY_2 },
+	{ 0x50, KEY_3 },
+	{ 0x70, KEY_4 },
+	{ 0x48, KEY_5 },
+	{ 0x68, KEY_6 },
+	{ 0x58, KEY_7 },
+	{ 0x78, KEY_8 },
+	{ 0x44, KEY_9 },
+	{ 0x54, KEY_0 },
+
+	{ 0x64, KEY_LAST },		/* +100 */
+	{ 0x4e, KEY_AGAIN },		/* Recall */
+
+	{ 0x6c, KEY_SWITCHVIDEOMODE },	/* Video Source */
+	{ 0x5e, KEY_MENU },
+	{ 0x56, KEY_SCREEN },
+	{ 0x7a, KEY_SETUP },
+
+	{ 0x46, KEY_MUTE },
+	{ 0x5c, KEY_MODE },		/* Stereo */
+	{ 0x74, KEY_INFO },
+	{ 0x7c, KEY_CLEAR },
+
+	{ 0x55, KEY_UP },
+	{ 0x49, KEY_DOWN },
+	{ 0x7e, KEY_LEFT },
+	{ 0x59, KEY_RIGHT },
+	{ 0x6a, KEY_ENTER },
+
+	{ 0x42, KEY_VOLUMEUP },
+	{ 0x62, KEY_VOLUMEDOWN },
+	{ 0x52, KEY_CHANNELUP },
+	{ 0x72, KEY_CHANNELDOWN },
+
+	{ 0x41, KEY_RECORD },
+	{ 0x51, KEY_CAMERA },		/* Snapshot */
+	{ 0x75, KEY_TIME },		/* Timeshift */
+	{ 0x71, KEY_TV2 },		/* PIP */
+
+	{ 0x45, KEY_REWIND },
+	{ 0x6f, KEY_PAUSE },
+	{ 0x7d, KEY_FORWARD },
+	{ 0x79, KEY_STOP },
+};
+
+struct ir_scancode_table ir_codes_encore_enltv2_table = {
+	.scan = ir_codes_encore_enltv2,
+	.size = ARRAY_SIZE(ir_codes_encore_enltv2),
+};
+EXPORT_SYMBOL_GPL(ir_codes_encore_enltv2_table);
 
 /* for the Technotrend 1500 bundled remotes (grey and black): */
-IR_KEYTAB_TYPE ir_codes_tt_1500[IR_KEYTAB_SIZE] = {
-	[0x01] = KEY_POWER,
-	[0x02] = KEY_SHUFFLE,		/* ? double-arrow key */
-	[0x03] = KEY_1,
-	[0x04] = KEY_2,
-	[0x05] = KEY_3,
-	[0x06] = KEY_4,
-	[0x07] = KEY_5,
-	[0x08] = KEY_6,
-	[0x09] = KEY_7,
-	[0x0a] = KEY_8,
-	[0x0b] = KEY_9,
-	[0x0c] = KEY_0,
-	[0x0d] = KEY_UP,
-	[0x0e] = KEY_LEFT,
-	[0x0f] = KEY_OK,
-	[0x10] = KEY_RIGHT,
-	[0x11] = KEY_DOWN,
-	[0x12] = KEY_INFO,
-	[0x13] = KEY_EXIT,
-	[0x14] = KEY_RED,
-	[0x15] = KEY_GREEN,
-	[0x16] = KEY_YELLOW,
-	[0x17] = KEY_BLUE,
-	[0x18] = KEY_MUTE,
-	[0x19] = KEY_TEXT,
-	[0x1a] = KEY_MODE,		/* ? TV/Radio */
-	[0x21] = KEY_OPTION,
-	[0x22] = KEY_EPG,
-	[0x23] = KEY_CHANNELUP,
-	[0x24] = KEY_CHANNELDOWN,
-	[0x25] = KEY_VOLUMEUP,
-	[0x26] = KEY_VOLUMEDOWN,
-	[0x27] = KEY_SETUP,
-	[0x3a] = KEY_RECORD,		/* these keys are only in the black remote */
-	[0x3b] = KEY_PLAY,
-	[0x3c] = KEY_STOP,
-	[0x3d] = KEY_REWIND,
-	[0x3e] = KEY_PAUSE,
-	[0x3f] = KEY_FORWARD,
-};
-EXPORT_SYMBOL_GPL(ir_codes_tt_1500);
+static struct ir_scancode ir_codes_tt_1500[] = {
+	{ 0x01, KEY_POWER },
+	{ 0x02, KEY_SHUFFLE },		/* ? double-arrow key */
+	{ 0x03, KEY_1 },
+	{ 0x04, KEY_2 },
+	{ 0x05, KEY_3 },
+	{ 0x06, KEY_4 },
+	{ 0x07, KEY_5 },
+	{ 0x08, KEY_6 },
+	{ 0x09, KEY_7 },
+	{ 0x0a, KEY_8 },
+	{ 0x0b, KEY_9 },
+	{ 0x0c, KEY_0 },
+	{ 0x0d, KEY_UP },
+	{ 0x0e, KEY_LEFT },
+	{ 0x0f, KEY_OK },
+	{ 0x10, KEY_RIGHT },
+	{ 0x11, KEY_DOWN },
+	{ 0x12, KEY_INFO },
+	{ 0x13, KEY_EXIT },
+	{ 0x14, KEY_RED },
+	{ 0x15, KEY_GREEN },
+	{ 0x16, KEY_YELLOW },
+	{ 0x17, KEY_BLUE },
+	{ 0x18, KEY_MUTE },
+	{ 0x19, KEY_TEXT },
+	{ 0x1a, KEY_MODE },		/* ? TV/Radio */
+	{ 0x21, KEY_OPTION },
+	{ 0x22, KEY_EPG },
+	{ 0x23, KEY_CHANNELUP },
+	{ 0x24, KEY_CHANNELDOWN },
+	{ 0x25, KEY_VOLUMEUP },
+	{ 0x26, KEY_VOLUMEDOWN },
+	{ 0x27, KEY_SETUP },
+	{ 0x3a, KEY_RECORD },		/* these keys are only in the black remote */
+	{ 0x3b, KEY_PLAY },
+	{ 0x3c, KEY_STOP },
+	{ 0x3d, KEY_REWIND },
+	{ 0x3e, KEY_PAUSE },
+	{ 0x3f, KEY_FORWARD },
+};
+
+struct ir_scancode_table ir_codes_tt_1500_table = {
+	.scan = ir_codes_tt_1500,
+	.size = ARRAY_SIZE(ir_codes_tt_1500),
+};
+EXPORT_SYMBOL_GPL(ir_codes_tt_1500_table);
 
 /* DViCO FUSION HDTV MCE remote */
-IR_KEYTAB_TYPE ir_codes_fusionhdtv_mce[IR_KEYTAB_SIZE] = {
-
-	[0x0b] = KEY_1,
-	[0x17] = KEY_2,
-	[0x1b] = KEY_3,
-	[0x07] = KEY_4,
-	[0x50] = KEY_5,
-	[0x54] = KEY_6,
-	[0x48] = KEY_7,
-	[0x4c] = KEY_8,
-	[0x58] = KEY_9,
-	[0x03] = KEY_0,
-
-	[0x5e] = KEY_OK,
-	[0x51] = KEY_UP,
-	[0x53] = KEY_DOWN,
-	[0x5b] = KEY_LEFT,
-	[0x5f] = KEY_RIGHT,
-
-	[0x02] = KEY_TV,		/* Labeled DTV on remote */
-	[0x0e] = KEY_MP3,
-	[0x1a] = KEY_DVD,
-	[0x1e] = KEY_FAVORITES,		/* Labeled CPF on remote */
-	[0x16] = KEY_SETUP,
-	[0x46] = KEY_POWER2,		/* TV On/Off button on remote */
-	[0x0a] = KEY_EPG,		/* Labeled Guide on remote */
-
-	[0x49] = KEY_BACK,
-	[0x59] = KEY_INFO,		/* Labeled MORE on remote */
-	[0x4d] = KEY_MENU,		/* Labeled DVDMENU on remote */
-	[0x55] = KEY_CYCLEWINDOWS,	/* Labeled ALT-TAB on remote */
-
-	[0x0f] = KEY_PREVIOUSSONG,	/* Labeled |<< REPLAY on remote */
-	[0x12] = KEY_NEXTSONG,		/* Labeled >>| SKIP on remote */
-	[0x42] = KEY_ENTER,		/* Labeled START with a green
+static struct ir_scancode ir_codes_fusionhdtv_mce[] = {
+
+	{ 0x0b, KEY_1 },
+	{ 0x17, KEY_2 },
+	{ 0x1b, KEY_3 },
+	{ 0x07, KEY_4 },
+	{ 0x50, KEY_5 },
+	{ 0x54, KEY_6 },
+	{ 0x48, KEY_7 },
+	{ 0x4c, KEY_8 },
+	{ 0x58, KEY_9 },
+	{ 0x03, KEY_0 },
+
+	{ 0x5e, KEY_OK },
+	{ 0x51, KEY_UP },
+	{ 0x53, KEY_DOWN },
+	{ 0x5b, KEY_LEFT },
+	{ 0x5f, KEY_RIGHT },
+
+	{ 0x02, KEY_TV },		/* Labeled DTV on remote */
+	{ 0x0e, KEY_MP3 },
+	{ 0x1a, KEY_DVD },
+	{ 0x1e, KEY_FAVORITES },	/* Labeled CPF on remote */
+	{ 0x16, KEY_SETUP },
+	{ 0x46, KEY_POWER2 },		/* TV On/Off button on remote */
+	{ 0x0a, KEY_EPG },		/* Labeled Guide on remote */
+
+	{ 0x49, KEY_BACK },
+	{ 0x59, KEY_INFO },		/* Labeled MORE on remote */
+	{ 0x4d, KEY_MENU },		/* Labeled DVDMENU on remote */
+	{ 0x55, KEY_CYCLEWINDOWS },	/* Labeled ALT-TAB on remote */
+
+	{ 0x0f, KEY_PREVIOUSSONG },	/* Labeled |<< REPLAY on remote */
+	{ 0x12, KEY_NEXTSONG },		/* Labeled >>| SKIP on remote */
+	{ 0x42, KEY_ENTER },		/* Labeled START with a green
 					   MS windows logo on remote */
 
-	[0x15] = KEY_VOLUMEUP,
-	[0x05] = KEY_VOLUMEDOWN,
-	[0x11] = KEY_CHANNELUP,
-	[0x09] = KEY_CHANNELDOWN,
-
-	[0x52] = KEY_CAMERA,
-	[0x5a] = KEY_TUNER,
-	[0x19] = KEY_OPEN,
-
-	[0x13] = KEY_MODE,		/* 4:3 16:9 select */
-	[0x1f] = KEY_ZOOM,
+	{ 0x15, KEY_VOLUMEUP },
+	{ 0x05, KEY_VOLUMEDOWN },
+	{ 0x11, KEY_CHANNELUP },
+	{ 0x09, KEY_CHANNELDOWN },
+
+	{ 0x52, KEY_CAMERA },
+	{ 0x5a, KEY_TUNER },
+	{ 0x19, KEY_OPEN },
+
+	{ 0x13, KEY_MODE },		/* 4:3 16:9 select */
+	{ 0x1f, KEY_ZOOM },
+
+	{ 0x43, KEY_REWIND },
+	{ 0x47, KEY_PLAYPAUSE },
+	{ 0x4f, KEY_FASTFORWARD },
+	{ 0x57, KEY_MUTE },
+	{ 0x0d, KEY_STOP },
+	{ 0x01, KEY_RECORD },
+	{ 0x4e, KEY_POWER },
+};
 
-	[0x43] = KEY_REWIND,
-	[0x47] = KEY_PLAYPAUSE,
-	[0x4f] = KEY_FASTFORWARD,
-	[0x57] = KEY_MUTE,
-	[0x0d] = KEY_STOP,
-	[0x01] = KEY_RECORD,
-	[0x4e] = KEY_POWER,
+struct ir_scancode_table ir_codes_fusionhdtv_mce_table = {
+	.scan = ir_codes_fusionhdtv_mce,
+	.size = ARRAY_SIZE(ir_codes_fusionhdtv_mce),
 };
-EXPORT_SYMBOL_GPL(ir_codes_fusionhdtv_mce);
+EXPORT_SYMBOL_GPL(ir_codes_fusionhdtv_mce_table);
 
 /* Pinnacle PCTV HD 800i mini remote */
-IR_KEYTAB_TYPE ir_codes_pinnacle_pctv_hd[IR_KEYTAB_SIZE] = {
-
-	[0x0f] = KEY_1,
-	[0x15] = KEY_2,
-	[0x10] = KEY_3,
-	[0x18] = KEY_4,
-	[0x1b] = KEY_5,
-	[0x1e] = KEY_6,
-	[0x11] = KEY_7,
-	[0x21] = KEY_8,
-	[0x12] = KEY_9,
-	[0x27] = KEY_0,
-
-	[0x24] = KEY_ZOOM,
-	[0x2a] = KEY_SUBTITLE,
-
-	[0x00] = KEY_MUTE,
-	[0x01] = KEY_ENTER,	/* Pinnacle Logo */
-	[0x39] = KEY_POWER,
-
-	[0x03] = KEY_VOLUMEUP,
-	[0x09] = KEY_VOLUMEDOWN,
-	[0x06] = KEY_CHANNELUP,
-	[0x0c] = KEY_CHANNELDOWN,
-
-	[0x2d] = KEY_REWIND,
-	[0x30] = KEY_PLAYPAUSE,
-	[0x33] = KEY_FASTFORWARD,
-	[0x3c] = KEY_STOP,
-	[0x36] = KEY_RECORD,
-	[0x3f] = KEY_EPG,	/* Labeled "?" */
-};
-EXPORT_SYMBOL_GPL(ir_codes_pinnacle_pctv_hd);
+static struct ir_scancode ir_codes_pinnacle_pctv_hd[] = {
+
+	{ 0x0f, KEY_1 },
+	{ 0x15, KEY_2 },
+	{ 0x10, KEY_3 },
+	{ 0x18, KEY_4 },
+	{ 0x1b, KEY_5 },
+	{ 0x1e, KEY_6 },
+	{ 0x11, KEY_7 },
+	{ 0x21, KEY_8 },
+	{ 0x12, KEY_9 },
+	{ 0x27, KEY_0 },
+
+	{ 0x24, KEY_ZOOM },
+	{ 0x2a, KEY_SUBTITLE },
+
+	{ 0x00, KEY_MUTE },
+	{ 0x01, KEY_ENTER },	/* Pinnacle Logo */
+	{ 0x39, KEY_POWER },
+
+	{ 0x03, KEY_VOLUMEUP },
+	{ 0x09, KEY_VOLUMEDOWN },
+	{ 0x06, KEY_CHANNELUP },
+	{ 0x0c, KEY_CHANNELDOWN },
+
+	{ 0x2d, KEY_REWIND },
+	{ 0x30, KEY_PLAYPAUSE },
+	{ 0x33, KEY_FASTFORWARD },
+	{ 0x3c, KEY_STOP },
+	{ 0x36, KEY_RECORD },
+	{ 0x3f, KEY_EPG },	/* Labeled "?" */
+};
+
+struct ir_scancode_table ir_codes_pinnacle_pctv_hd_table = {
+	.scan = ir_codes_pinnacle_pctv_hd,
+	.size = ARRAY_SIZE(ir_codes_pinnacle_pctv_hd),
+};
+EXPORT_SYMBOL_GPL(ir_codes_pinnacle_pctv_hd_table);
 
 /*
  * Igor Kuznetsov <igk72@ya.ru>
@@ -2150,13 +2365,13 @@ EXPORT_SYMBOL_GPL(ir_codes_pinnacle_pctv_hd);
  * the button labels (several variants when appropriate)
  * helps to descide which keycodes to assign to the buttons.
  */
-IR_KEYTAB_TYPE ir_codes_behold[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_behold[] = {
 
 	/*  0x1c            0x12  *
 	 *  TV/FM          POWER  *
 	 *                        */
-	[0x1c] = KEY_TUNER,	/* XXX KEY_TV / KEY_RADIO */
-	[0x12] = KEY_POWER,
+	{ 0x1c, KEY_TUNER },	/* XXX KEY_TV / KEY_RADIO */
+	{ 0x12, KEY_POWER },
 
 	/*  0x01    0x02    0x03  *
 	 *   1       2       3    *
@@ -2167,28 +2382,28 @@ IR_KEYTAB_TYPE ir_codes_behold[IR_KEYTAB_SIZE] = {
 	 *  0x07    0x08    0x09  *
 	 *   7       8       9    *
 	 *                        */
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
 
 	/*  0x0a    0x00    0x17  *
 	 * RECALL    0      MODE  *
 	 *                        */
-	[0x0a] = KEY_AGAIN,
-	[0x00] = KEY_0,
-	[0x17] = KEY_MODE,
+	{ 0x0a, KEY_AGAIN },
+	{ 0x00, KEY_0 },
+	{ 0x17, KEY_MODE },
 
 	/*  0x14          0x10    *
 	 * ASPECT      FULLSCREEN *
 	 *                        */
-	[0x14] = KEY_SCREEN,
-	[0x10] = KEY_ZOOM,
+	{ 0x14, KEY_SCREEN },
+	{ 0x10, KEY_ZOOM },
 
 	/*          0x0b          *
 	 *           Up           *
@@ -2199,17 +2414,17 @@ IR_KEYTAB_TYPE ir_codes_behold[IR_KEYTAB_SIZE] = {
 	 *         0x015          *
 	 *         Down           *
 	 *                        */
-	[0x0b] = KEY_CHANNELUP,
-	[0x18] = KEY_VOLUMEDOWN,
-	[0x16] = KEY_OK,		/* XXX KEY_ENTER */
-	[0x0c] = KEY_VOLUMEUP,
-	[0x15] = KEY_CHANNELDOWN,
+	{ 0x0b, KEY_CHANNELUP },
+	{ 0x18, KEY_VOLUMEDOWN },
+	{ 0x16, KEY_OK },		/* XXX KEY_ENTER */
+	{ 0x0c, KEY_VOLUMEUP },
+	{ 0x15, KEY_CHANNELDOWN },
 
 	/*  0x11            0x0d  *
 	 *  MUTE            INFO  *
 	 *                        */
-	[0x11] = KEY_MUTE,
-	[0x0d] = KEY_INFO,
+	{ 0x11, KEY_MUTE },
+	{ 0x0d, KEY_INFO },
 
 	/*  0x0f    0x1b    0x1a  *
 	 * RECORD PLAY/PAUSE STOP *
@@ -2218,29 +2433,34 @@ IR_KEYTAB_TYPE ir_codes_behold[IR_KEYTAB_SIZE] = {
 	 *TELETEXT  AUDIO  SOURCE *
 	 *           RED   YELLOW *
 	 *                        */
-	[0x0f] = KEY_RECORD,
-	[0x1b] = KEY_PLAYPAUSE,
-	[0x1a] = KEY_STOP,
-	[0x0e] = KEY_TEXT,
-	[0x1f] = KEY_RED,	/*XXX KEY_AUDIO	*/
-	[0x1e] = KEY_YELLOW,	/*XXX KEY_SOURCE	*/
+	{ 0x0f, KEY_RECORD },
+	{ 0x1b, KEY_PLAYPAUSE },
+	{ 0x1a, KEY_STOP },
+	{ 0x0e, KEY_TEXT },
+	{ 0x1f, KEY_RED },	/*XXX KEY_AUDIO	*/
+	{ 0x1e, KEY_YELLOW },	/*XXX KEY_SOURCE	*/
 
 	/*  0x1d   0x13     0x19  *
 	 * SLEEP  PREVIEW   DVB   *
 	 *         GREEN    BLUE  *
 	 *                        */
-	[0x1d] = KEY_SLEEP,
-	[0x13] = KEY_GREEN,
-	[0x19] = KEY_BLUE,	/* XXX KEY_SAT	*/
+	{ 0x1d, KEY_SLEEP },
+	{ 0x13, KEY_GREEN },
+	{ 0x19, KEY_BLUE },	/* XXX KEY_SAT	*/
 
 	/*  0x58           0x5c   *
 	 * FREEZE        SNAPSHOT *
 	 *                        */
-	[0x58] = KEY_SLOW,
-	[0x5c] = KEY_CAMERA,
+	{ 0x58, KEY_SLOW },
+	{ 0x5c, KEY_CAMERA },
 
 };
-EXPORT_SYMBOL_GPL(ir_codes_behold);
+
+struct ir_scancode_table ir_codes_behold_table = {
+	.scan = ir_codes_behold,
+	.size = ARRAY_SIZE(ir_codes_behold),
+};
+EXPORT_SYMBOL_GPL(ir_codes_behold_table);
 
 /* Beholder Intl. Ltd. 2008
  * Dmitry Belimov d.belimov@google.com
@@ -2250,16 +2470,16 @@ EXPORT_SYMBOL_GPL(ir_codes_behold);
  * the button labels (several variants when appropriate)
  * helps to descide which keycodes to assign to the buttons.
  */
-IR_KEYTAB_TYPE ir_codes_behold_columbus[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_behold_columbus[] = {
 
 	/*  0x13   0x11   0x1C   0x12  *
 	 *  Mute  Source  TV/FM  Power *
 	 *                             */
 
-	[0x13] = KEY_MUTE,
-	[0x11] = KEY_PROPS,
-	[0x1C] = KEY_TUNER,	/* KEY_TV/KEY_RADIO	*/
-	[0x12] = KEY_POWER,
+	{ 0x13, KEY_MUTE },
+	{ 0x11, KEY_PROPS },
+	{ 0x1C, KEY_TUNER },	/* KEY_TV/KEY_RADIO	*/
+	{ 0x12, KEY_POWER },
 
 	/*  0x01    0x02    0x03  0x0D    *
 	 *   1       2       3   Stereo   *
@@ -2270,173 +2490,188 @@ IR_KEYTAB_TYPE ir_codes_behold_columbus[IR_KEYTAB_SIZE] = {
 	 *  0x07    0x08    0x09  0x10    *
 	 *   7       8       9    Zoom 	  *
 	 *                                */
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x0D] = KEY_SETUP,	  /* Setup key */
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x19] = KEY_CAMERA,	/* Snapshot key */
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-	[0x10] = KEY_ZOOM,
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x0D, KEY_SETUP },	  /* Setup key */
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x19, KEY_CAMERA },	/* Snapshot key */
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+	{ 0x10, KEY_ZOOM },
 
 	/*  0x0A    0x00    0x0B       0x0C   *
 	 * RECALL    0    ChannelUp  VolumeUp *
 	 *                                    */
-	[0x0A] = KEY_AGAIN,
-	[0x00] = KEY_0,
-	[0x0B] = KEY_CHANNELUP,
-	[0x0C] = KEY_VOLUMEUP,
+	{ 0x0A, KEY_AGAIN },
+	{ 0x00, KEY_0 },
+	{ 0x0B, KEY_CHANNELUP },
+	{ 0x0C, KEY_VOLUMEUP },
 
 	/*   0x1B      0x1D      0x15        0x18     *
 	 * Timeshift  Record  ChannelDown  VolumeDown *
 	 *                                            */
 
-	[0x1B] = KEY_TIME,
-	[0x1D] = KEY_RECORD,
-	[0x15] = KEY_CHANNELDOWN,
-	[0x18] = KEY_VOLUMEDOWN,
+	{ 0x1B, KEY_TIME },
+	{ 0x1D, KEY_RECORD },
+	{ 0x15, KEY_CHANNELDOWN },
+	{ 0x18, KEY_VOLUMEDOWN },
 
 	/*   0x0E   0x1E     0x0F     0x1A  *
 	 *   Stop   Pause  Previouse  Next  *
 	 *                                  */
 
-	[0x0E] = KEY_STOP,
-	[0x1E] = KEY_PAUSE,
-	[0x0F] = KEY_PREVIOUS,
-	[0x1A] = KEY_NEXT,
+	{ 0x0E, KEY_STOP },
+	{ 0x1E, KEY_PAUSE },
+	{ 0x0F, KEY_PREVIOUS },
+	{ 0x1A, KEY_NEXT },
+
+};
 
+struct ir_scancode_table ir_codes_behold_columbus_table = {
+	.scan = ir_codes_behold_columbus,
+	.size = ARRAY_SIZE(ir_codes_behold_columbus),
 };
-EXPORT_SYMBOL_GPL(ir_codes_behold_columbus);
+EXPORT_SYMBOL_GPL(ir_codes_behold_columbus_table);
 
 /*
  * Remote control for the Genius TVGO A11MCE
  * Adrian Pardini <pardo.bsso@gmail.com>
  */
-IR_KEYTAB_TYPE ir_codes_genius_tvgo_a11mce[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_genius_tvgo_a11mce[] = {
 	/* Keys 0 to 9 */
-	[0x48] = KEY_0,
-	[0x09] = KEY_1,
-	[0x1d] = KEY_2,
-	[0x1f] = KEY_3,
-	[0x19] = KEY_4,
-	[0x1b] = KEY_5,
-	[0x11] = KEY_6,
-	[0x17] = KEY_7,
-	[0x12] = KEY_8,
-	[0x16] = KEY_9,
-
-	[0x54] = KEY_RECORD,		/* recording */
-	[0x06] = KEY_MUTE,		/* mute */
-	[0x10] = KEY_POWER,
-	[0x40] = KEY_LAST,		/* recall */
-	[0x4c] = KEY_CHANNELUP,		/* channel / program + */
-	[0x00] = KEY_CHANNELDOWN,	/* channel / program - */
-	[0x0d] = KEY_VOLUMEUP,
-	[0x15] = KEY_VOLUMEDOWN,
-	[0x4d] = KEY_OK,		/* also labeled as Pause */
-	[0x1c] = KEY_ZOOM,		/* full screen and Stop*/
-	[0x02] = KEY_MODE,		/* AV Source or Rewind*/
-	[0x04] = KEY_LIST,		/* -/-- */
+	{ 0x48, KEY_0 },
+	{ 0x09, KEY_1 },
+	{ 0x1d, KEY_2 },
+	{ 0x1f, KEY_3 },
+	{ 0x19, KEY_4 },
+	{ 0x1b, KEY_5 },
+	{ 0x11, KEY_6 },
+	{ 0x17, KEY_7 },
+	{ 0x12, KEY_8 },
+	{ 0x16, KEY_9 },
+
+	{ 0x54, KEY_RECORD },		/* recording */
+	{ 0x06, KEY_MUTE },		/* mute */
+	{ 0x10, KEY_POWER },
+	{ 0x40, KEY_LAST },		/* recall */
+	{ 0x4c, KEY_CHANNELUP },	/* channel / program + */
+	{ 0x00, KEY_CHANNELDOWN },	/* channel / program - */
+	{ 0x0d, KEY_VOLUMEUP },
+	{ 0x15, KEY_VOLUMEDOWN },
+	{ 0x4d, KEY_OK },		/* also labeled as Pause */
+	{ 0x1c, KEY_ZOOM },		/* full screen and Stop*/
+	{ 0x02, KEY_MODE },		/* AV Source or Rewind*/
+	{ 0x04, KEY_LIST },		/* -/-- */
 	/* small arrows above numbers */
-	[0x1a] = KEY_NEXT,		/* also Fast Forward */
-	[0x0e] = KEY_PREVIOUS,		/* also Rewind */
+	{ 0x1a, KEY_NEXT },		/* also Fast Forward */
+	{ 0x0e, KEY_PREVIOUS },		/* also Rewind */
 	/* these are in a rather non standard layout and have
 	an alternate name written */
-	[0x1e] = KEY_UP,		/* Video Setting */
-	[0x0a] = KEY_DOWN,		/* Video Default */
-	[0x05] = KEY_CAMERA,		/* Snapshot */
-	[0x0c] = KEY_RIGHT,		/* Hide Panel */
+	{ 0x1e, KEY_UP },		/* Video Setting */
+	{ 0x0a, KEY_DOWN },		/* Video Default */
+	{ 0x05, KEY_CAMERA },		/* Snapshot */
+	{ 0x0c, KEY_RIGHT },		/* Hide Panel */
 	/* Four buttons without label */
-	[0x49] = KEY_RED,
-	[0x0b] = KEY_GREEN,
-	[0x13] = KEY_YELLOW,
-	[0x50] = KEY_BLUE,
+	{ 0x49, KEY_RED },
+	{ 0x0b, KEY_GREEN },
+	{ 0x13, KEY_YELLOW },
+	{ 0x50, KEY_BLUE },
 };
-EXPORT_SYMBOL_GPL(ir_codes_genius_tvgo_a11mce);
+
+struct ir_scancode_table ir_codes_genius_tvgo_a11mce_table = {
+	.scan = ir_codes_genius_tvgo_a11mce,
+	.size = ARRAY_SIZE(ir_codes_genius_tvgo_a11mce),
+};
+EXPORT_SYMBOL_GPL(ir_codes_genius_tvgo_a11mce_table);
 
 /*
  * Remote control for Powercolor Real Angel 330
  * Daniel Fraga <fragabr@gmail.com>
  */
-IR_KEYTAB_TYPE ir_codes_powercolor_real_angel[IR_KEYTAB_SIZE] = {
-	[0x38] = KEY_SWITCHVIDEOMODE,	/* switch inputs */
-	[0x0c] = KEY_MEDIA,		/* Turn ON/OFF App */
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-	[0x0a] = KEY_DIGITS,		/* single, double, tripple digit */
-	[0x29] = KEY_PREVIOUS,		/* previous channel */
-	[0x12] = KEY_BRIGHTNESSUP,
-	[0x13] = KEY_BRIGHTNESSDOWN,
-	[0x2b] = KEY_MODE,		/* stereo/mono */
-	[0x2c] = KEY_TEXT,		/* teletext */
-	[0x20] = KEY_CHANNELUP,		/* channel up */
-	[0x21] = KEY_CHANNELDOWN,	/* channel down */
-	[0x10] = KEY_VOLUMEUP,		/* volume up */
-	[0x11] = KEY_VOLUMEDOWN,	/* volume down */
-	[0x0d] = KEY_MUTE,
-	[0x1f] = KEY_RECORD,
-	[0x17] = KEY_PLAY,
-	[0x16] = KEY_PAUSE,
-	[0x0b] = KEY_STOP,
-	[0x27] = KEY_FASTFORWARD,
-	[0x26] = KEY_REWIND,
-	[0x1e] = KEY_SEARCH,		/* autoscan */
-	[0x0e] = KEY_CAMERA,		/* snapshot */
-	[0x2d] = KEY_SETUP,
-	[0x0f] = KEY_SCREEN,		/* full screen */
-	[0x14] = KEY_RADIO,		/* FM radio */
-	[0x25] = KEY_POWER,		/* power */
-};
-EXPORT_SYMBOL_GPL(ir_codes_powercolor_real_angel);
+static struct ir_scancode ir_codes_powercolor_real_angel[] = {
+	{ 0x38, KEY_SWITCHVIDEOMODE },	/* switch inputs */
+	{ 0x0c, KEY_MEDIA },		/* Turn ON/OFF App */
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+	{ 0x0a, KEY_DIGITS },		/* single, double, tripple digit */
+	{ 0x29, KEY_PREVIOUS },		/* previous channel */
+	{ 0x12, KEY_BRIGHTNESSUP },
+	{ 0x13, KEY_BRIGHTNESSDOWN },
+	{ 0x2b, KEY_MODE },		/* stereo/mono */
+	{ 0x2c, KEY_TEXT },		/* teletext */
+	{ 0x20, KEY_CHANNELUP },	/* channel up */
+	{ 0x21, KEY_CHANNELDOWN },	/* channel down */
+	{ 0x10, KEY_VOLUMEUP },		/* volume up */
+	{ 0x11, KEY_VOLUMEDOWN },	/* volume down */
+	{ 0x0d, KEY_MUTE },
+	{ 0x1f, KEY_RECORD },
+	{ 0x17, KEY_PLAY },
+	{ 0x16, KEY_PAUSE },
+	{ 0x0b, KEY_STOP },
+	{ 0x27, KEY_FASTFORWARD },
+	{ 0x26, KEY_REWIND },
+	{ 0x1e, KEY_SEARCH },		/* autoscan */
+	{ 0x0e, KEY_CAMERA },		/* snapshot */
+	{ 0x2d, KEY_SETUP },
+	{ 0x0f, KEY_SCREEN },		/* full screen */
+	{ 0x14, KEY_RADIO },		/* FM radio */
+	{ 0x25, KEY_POWER },		/* power */
+};
+
+struct ir_scancode_table ir_codes_powercolor_real_angel_table = {
+	.scan = ir_codes_powercolor_real_angel,
+	.size = ARRAY_SIZE(ir_codes_powercolor_real_angel),
+};
+EXPORT_SYMBOL_GPL(ir_codes_powercolor_real_angel_table);
 
 /* Kworld Plus TV Analog Lite PCI IR
    Mauro Carvalho Chehab <mchehab@infradead.org>
  */
-IR_KEYTAB_TYPE ir_codes_kworld_plus_tv_analog[IR_KEYTAB_SIZE] = {
-	[0x0c] = KEY_PROG1,		/* Kworld key */
-	[0x16] = KEY_CLOSECD,		/* -> ) */
-	[0x1d] = KEY_POWER2,
-
-	[0x00] = KEY_1,
-	[0x01] = KEY_2,
-	[0x02] = KEY_3,			/* Two keys have the same code: 3 and left */
-	[0x03] = KEY_4,			/* Two keys have the same code: 3 and right */
-	[0x04] = KEY_5,
-	[0x05] = KEY_6,
-	[0x06] = KEY_7,
-	[0x07] = KEY_8,
-	[0x08] = KEY_9,
-	[0x0a] = KEY_0,
-
-	[0x09] = KEY_AGAIN,
-	[0x14] = KEY_MUTE,
-
-	[0x20] = KEY_UP,
-	[0x21] = KEY_DOWN,
-	[0x0b] = KEY_ENTER,
-
-	[0x10] = KEY_CHANNELUP,
-	[0x11] = KEY_CHANNELDOWN,
+static struct ir_scancode ir_codes_kworld_plus_tv_analog[] = {
+	{ 0x0c, KEY_PROG1 },		/* Kworld key */
+	{ 0x16, KEY_CLOSECD },		/* -> ) */
+	{ 0x1d, KEY_POWER2 },
+
+	{ 0x00, KEY_1 },
+	{ 0x01, KEY_2 },
+	{ 0x02, KEY_3 },		/* Two keys have the same code: 3 and left */
+	{ 0x03, KEY_4 },		/* Two keys have the same code: 3 and right */
+	{ 0x04, KEY_5 },
+	{ 0x05, KEY_6 },
+	{ 0x06, KEY_7 },
+	{ 0x07, KEY_8 },
+	{ 0x08, KEY_9 },
+	{ 0x0a, KEY_0 },
+
+	{ 0x09, KEY_AGAIN },
+	{ 0x14, KEY_MUTE },
+
+	{ 0x20, KEY_UP },
+	{ 0x21, KEY_DOWN },
+	{ 0x0b, KEY_ENTER },
+
+	{ 0x10, KEY_CHANNELUP },
+	{ 0x11, KEY_CHANNELDOWN },
 
 	/* Couldn't map key left/key right since those
 	   conflict with '3' and '4' scancodes
 	   I dunno what the original driver does
 	 */
 
-	[0x13] = KEY_VOLUMEUP,
-	[0x12] = KEY_VOLUMEDOWN,
+	{ 0x13, KEY_VOLUMEUP },
+	{ 0x12, KEY_VOLUMEDOWN },
 
 	/* The lower part of the IR
 	   There are several duplicated keycodes there.
@@ -2447,426 +2682,468 @@ IR_KEYTAB_TYPE ir_codes_kworld_plus_tv_analog[IR_KEYTAB_SIZE] = {
 	   Also, it is not related to the time between keyup
 	   and keydown.
 	 */
-	[0x19] = KEY_TIME,		/* Timeshift */
-	[0x1a] = KEY_STOP,
-	[0x1b] = KEY_RECORD,
+	{ 0x19, KEY_TIME},		/* Timeshift */
+	{ 0x1a, KEY_STOP},
+	{ 0x1b, KEY_RECORD},
 
-	[0x22] = KEY_TEXT,
+	{ 0x22, KEY_TEXT},
 
-	[0x15] = KEY_AUDIO,		/* ((*)) */
-	[0x0f] = KEY_ZOOM,
-	[0x1c] = KEY_CAMERA,		/* snapshot */
+	{ 0x15, KEY_AUDIO},		/* ((*)) */
+	{ 0x0f, KEY_ZOOM},
+	{ 0x1c, KEY_CAMERA},		/* snapshot */
 
-	[0x18] = KEY_RED,		/* B */
-	[0x23] = KEY_GREEN,		/* C */
+	{ 0x18, KEY_RED},		/* B */
+	{ 0x23, KEY_GREEN},		/* C */
+};
+struct ir_scancode_table ir_codes_kworld_plus_tv_analog_table = {
+	.scan = ir_codes_kworld_plus_tv_analog,
+	.size = ARRAY_SIZE(ir_codes_kworld_plus_tv_analog),
 };
-EXPORT_SYMBOL_GPL(ir_codes_kworld_plus_tv_analog);
+EXPORT_SYMBOL_GPL(ir_codes_kworld_plus_tv_analog_table);
 
 /* Kaiomy TVnPC U2
    Mauro Carvalho Chehab <mchehab@infradead.org>
  */
-IR_KEYTAB_TYPE ir_codes_kaiomy[IR_KEYTAB_SIZE] = {
-	[0x43] = KEY_POWER2,
-	[0x01] = KEY_LIST,
-	[0x0b] = KEY_ZOOM,
-	[0x03] = KEY_POWER,
-
-	[0x04] = KEY_1,
-	[0x08] = KEY_2,
-	[0x02] = KEY_3,
-
-	[0x0f] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-
-	[0x0c] = KEY_7,
-	[0x0d] = KEY_8,
-	[0x0a] = KEY_9,
-
-	[0x11] = KEY_0,
-
-	[0x09] = KEY_CHANNELUP,
-	[0x07] = KEY_CHANNELDOWN,
-
-	[0x0e] = KEY_VOLUMEUP,
-	[0x13] = KEY_VOLUMEDOWN,
-
-	[0x10] = KEY_HOME,
-	[0x12] = KEY_ENTER,
-
-	[0x14] = KEY_RECORD,
-	[0x15] = KEY_STOP,
-	[0x16] = KEY_PLAY,
-	[0x17] = KEY_MUTE,
-
-	[0x18] = KEY_UP,
-	[0x19] = KEY_DOWN,
-	[0x1a] = KEY_LEFT,
-	[0x1b] = KEY_RIGHT,
-
-	[0x1c] = KEY_RED,
-	[0x1d] = KEY_GREEN,
-	[0x1e] = KEY_YELLOW,
-	[0x1f] = KEY_BLUE,
-};
-EXPORT_SYMBOL_GPL(ir_codes_kaiomy);
-
-IR_KEYTAB_TYPE ir_codes_avermedia_a16d[IR_KEYTAB_SIZE] = {
-	[0x20] = KEY_LIST,
-	[0x00] = KEY_POWER,
-	[0x28] = KEY_1,
-	[0x18] = KEY_2,
-	[0x38] = KEY_3,
-	[0x24] = KEY_4,
-	[0x14] = KEY_5,
-	[0x34] = KEY_6,
-	[0x2c] = KEY_7,
-	[0x1c] = KEY_8,
-	[0x3c] = KEY_9,
-	[0x12] = KEY_SUBTITLE,
-	[0x22] = KEY_0,
-	[0x32] = KEY_REWIND,
-	[0x3a] = KEY_SHUFFLE,
-	[0x02] = KEY_PRINT,
-	[0x11] = KEY_CHANNELDOWN,
-	[0x31] = KEY_CHANNELUP,
-	[0x0c] = KEY_ZOOM,
-	[0x1e] = KEY_VOLUMEDOWN,
-	[0x3e] = KEY_VOLUMEUP,
-	[0x0a] = KEY_MUTE,
-	[0x04] = KEY_AUDIO,
-	[0x26] = KEY_RECORD,
-	[0x06] = KEY_PLAY,
-	[0x36] = KEY_STOP,
-	[0x16] = KEY_PAUSE,
-	[0x2e] = KEY_REWIND,
-	[0x0e] = KEY_FASTFORWARD,
-	[0x30] = KEY_TEXT,
-	[0x21] = KEY_GREEN,
-	[0x01] = KEY_BLUE,
-	[0x08] = KEY_EPG,
-	[0x2a] = KEY_MENU,
-};
-EXPORT_SYMBOL_GPL(ir_codes_avermedia_a16d);
+static struct ir_scancode ir_codes_kaiomy[] = {
+	{ 0x43, KEY_POWER2},
+	{ 0x01, KEY_LIST},
+	{ 0x0b, KEY_ZOOM},
+	{ 0x03, KEY_POWER},
 
-/* Encore ENLTV-FM v5.3
-   Mauro Carvalho Chehab <mchehab@infradead.org>
- */
-IR_KEYTAB_TYPE ir_codes_encore_enltv_fm53[IR_KEYTAB_SIZE] = {
-	[0x10] = KEY_POWER2,
-	[0x06] = KEY_MUTE,
-
-	[0x09] = KEY_1,
-	[0x1d] = KEY_2,
-	[0x1f] = KEY_3,
-	[0x19] = KEY_4,
-	[0x1b] = KEY_5,
-	[0x11] = KEY_6,
-	[0x17] = KEY_7,
-	[0x12] = KEY_8,
-	[0x16] = KEY_9,
-	[0x48] = KEY_0,
-
-	[0x04] = KEY_LIST,		/* -/-- */
-	[0x40] = KEY_LAST,		/* recall */
-
-	[0x02] = KEY_MODE,		/* TV/AV */
-	[0x05] = KEY_CAMERA,		/* SNAPSHOT */
-
-	[0x4c] = KEY_CHANNELUP,		/* UP */
-	[0x00] = KEY_CHANNELDOWN,	/* DOWN */
-	[0x0d] = KEY_VOLUMEUP,		/* RIGHT */
-	[0x15] = KEY_VOLUMEDOWN,	/* LEFT */
-	[0x49] = KEY_ENTER,		/* OK */
-
-	[0x54] = KEY_RECORD,
-	[0x4d] = KEY_PLAY,		/* pause */
-
-	[0x1e] = KEY_MENU,		/* video setting */
-	[0x0e] = KEY_RIGHT,		/* <- */
-	[0x1a] = KEY_LEFT,		/* -> */
-
-	[0x0a] = KEY_CLEAR,		/* video default */
-	[0x0c] = KEY_ZOOM,		/* hide pannel */
-	[0x47] = KEY_SLEEP,		/* shutdown */
-};
-EXPORT_SYMBOL_GPL(ir_codes_encore_enltv_fm53);
+	{ 0x04, KEY_1},
+	{ 0x08, KEY_2},
+	{ 0x02, KEY_3},
 
-/* Zogis Real Audio 220 - 32 keys IR */
-IR_KEYTAB_TYPE ir_codes_real_audio_220_32_keys[IR_KEYTAB_SIZE] = {
-	[0x1c] = KEY_RADIO,
-	[0x12] = KEY_POWER2,
+	{ 0x0f, KEY_4},
+	{ 0x05, KEY_5},
+	{ 0x06, KEY_6},
+
+	{ 0x0c, KEY_7},
+	{ 0x0d, KEY_8},
+	{ 0x0a, KEY_9},
+
+	{ 0x11, KEY_0},
+
+	{ 0x09, KEY_CHANNELUP},
+	{ 0x07, KEY_CHANNELDOWN},
 
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-	[0x00] = KEY_0,
+	{ 0x0e, KEY_VOLUMEUP},
+	{ 0x13, KEY_VOLUMEDOWN},
 
-	[0x0c] = KEY_VOLUMEUP,
-	[0x18] = KEY_VOLUMEDOWN,
-	[0x0b] = KEY_CHANNELUP,
-	[0x15] = KEY_CHANNELDOWN,
-	[0x16] = KEY_ENTER,
+	{ 0x10, KEY_HOME},
+	{ 0x12, KEY_ENTER},
 
-	[0x11] = KEY_LIST,		/* Source */
-	[0x0d] = KEY_AUDIO,		/* stereo */
+	{ 0x14, KEY_RECORD},
+	{ 0x15, KEY_STOP},
+	{ 0x16, KEY_PLAY},
+	{ 0x17, KEY_MUTE},
 
-	[0x0f] = KEY_PREVIOUS,		/* Prev */
-	[0x1b] = KEY_TIME,		/* Timeshift */
-	[0x1a] = KEY_NEXT,		/* Next */
+	{ 0x18, KEY_UP},
+	{ 0x19, KEY_DOWN},
+	{ 0x1a, KEY_LEFT},
+	{ 0x1b, KEY_RIGHT},
+
+	{ 0x1c, KEY_RED},
+	{ 0x1d, KEY_GREEN},
+	{ 0x1e, KEY_YELLOW},
+	{ 0x1f, KEY_BLUE},
+};
+struct ir_scancode_table ir_codes_kaiomy_table = {
+	.scan = ir_codes_kaiomy,
+	.size = ARRAY_SIZE(ir_codes_kaiomy),
+};
+EXPORT_SYMBOL_GPL(ir_codes_kaiomy_table);
+
+static struct ir_scancode ir_codes_avermedia_a16d[] = {
+	{ 0x20, KEY_LIST},
+	{ 0x00, KEY_POWER},
+	{ 0x28, KEY_1},
+	{ 0x18, KEY_2},
+	{ 0x38, KEY_3},
+	{ 0x24, KEY_4},
+	{ 0x14, KEY_5},
+	{ 0x34, KEY_6},
+	{ 0x2c, KEY_7},
+	{ 0x1c, KEY_8},
+	{ 0x3c, KEY_9},
+	{ 0x12, KEY_SUBTITLE},
+	{ 0x22, KEY_0},
+	{ 0x32, KEY_REWIND},
+	{ 0x3a, KEY_SHUFFLE},
+	{ 0x02, KEY_PRINT},
+	{ 0x11, KEY_CHANNELDOWN},
+	{ 0x31, KEY_CHANNELUP},
+	{ 0x0c, KEY_ZOOM},
+	{ 0x1e, KEY_VOLUMEDOWN},
+	{ 0x3e, KEY_VOLUMEUP},
+	{ 0x0a, KEY_MUTE},
+	{ 0x04, KEY_AUDIO},
+	{ 0x26, KEY_RECORD},
+	{ 0x06, KEY_PLAY},
+	{ 0x36, KEY_STOP},
+	{ 0x16, KEY_PAUSE},
+	{ 0x2e, KEY_REWIND},
+	{ 0x0e, KEY_FASTFORWARD},
+	{ 0x30, KEY_TEXT},
+	{ 0x21, KEY_GREEN},
+	{ 0x01, KEY_BLUE},
+	{ 0x08, KEY_EPG},
+	{ 0x2a, KEY_MENU},
+};
+struct ir_scancode_table ir_codes_avermedia_a16d_table = {
+	.scan = ir_codes_avermedia_a16d,
+	.size = ARRAY_SIZE(ir_codes_avermedia_a16d),
+};
+EXPORT_SYMBOL_GPL(ir_codes_avermedia_a16d_table);
 
-	[0x0e] = KEY_STOP,
-	[0x1f] = KEY_PLAY,
-	[0x1e] = KEY_PLAYPAUSE,		/* Pause */
+/* Encore ENLTV-FM v5.3
+   Mauro Carvalho Chehab <mchehab@infradead.org>
+ */
+static struct ir_scancode ir_codes_encore_enltv_fm53[] = {
+	{ 0x10, KEY_POWER2},
+	{ 0x06, KEY_MUTE},
+
+	{ 0x09, KEY_1},
+	{ 0x1d, KEY_2},
+	{ 0x1f, KEY_3},
+	{ 0x19, KEY_4},
+	{ 0x1b, KEY_5},
+	{ 0x11, KEY_6},
+	{ 0x17, KEY_7},
+	{ 0x12, KEY_8},
+	{ 0x16, KEY_9},
+	{ 0x48, KEY_0},
+
+	{ 0x04, KEY_LIST},		/* -/-- */
+	{ 0x40, KEY_LAST},		/* recall */
+
+	{ 0x02, KEY_MODE},		/* TV/AV */
+	{ 0x05, KEY_CAMERA},		/* SNAPSHOT */
+
+	{ 0x4c, KEY_CHANNELUP},		/* UP */
+	{ 0x00, KEY_CHANNELDOWN},	/* DOWN */
+	{ 0x0d, KEY_VOLUMEUP},		/* RIGHT */
+	{ 0x15, KEY_VOLUMEDOWN},	/* LEFT */
+	{ 0x49, KEY_ENTER},		/* OK */
+
+	{ 0x54, KEY_RECORD},
+	{ 0x4d, KEY_PLAY},		/* pause */
+
+	{ 0x1e, KEY_MENU},		/* video setting */
+	{ 0x0e, KEY_RIGHT},		/* <- */
+	{ 0x1a, KEY_LEFT},		/* -> */
+
+	{ 0x0a, KEY_CLEAR},		/* video default */
+	{ 0x0c, KEY_ZOOM},		/* hide pannel */
+	{ 0x47, KEY_SLEEP},		/* shutdown */
+};
+struct ir_scancode_table ir_codes_encore_enltv_fm53_table = {
+	.scan = ir_codes_encore_enltv_fm53,
+	.size = ARRAY_SIZE(ir_codes_encore_enltv_fm53),
+};
+EXPORT_SYMBOL_GPL(ir_codes_encore_enltv_fm53_table);
 
-	[0x1d] = KEY_RECORD,
-	[0x13] = KEY_MUTE,
-	[0x19] = KEY_CAMERA,		/* Snapshot */
+/* Zogis Real Audio 220 - 32 keys IR */
+static struct ir_scancode ir_codes_real_audio_220_32_keys[] = {
+	{ 0x1c, KEY_RADIO},
+	{ 0x12, KEY_POWER2},
+
+	{ 0x01, KEY_1},
+	{ 0x02, KEY_2},
+	{ 0x03, KEY_3},
+	{ 0x04, KEY_4},
+	{ 0x05, KEY_5},
+	{ 0x06, KEY_6},
+	{ 0x07, KEY_7},
+	{ 0x08, KEY_8},
+	{ 0x09, KEY_9},
+	{ 0x00, KEY_0},
+
+	{ 0x0c, KEY_VOLUMEUP},
+	{ 0x18, KEY_VOLUMEDOWN},
+	{ 0x0b, KEY_CHANNELUP},
+	{ 0x15, KEY_CHANNELDOWN},
+	{ 0x16, KEY_ENTER},
+
+	{ 0x11, KEY_LIST},		/* Source */
+	{ 0x0d, KEY_AUDIO},		/* stereo */
+
+	{ 0x0f, KEY_PREVIOUS},		/* Prev */
+	{ 0x1b, KEY_TIME},		/* Timeshift */
+	{ 0x1a, KEY_NEXT},		/* Next */
+
+	{ 0x0e, KEY_STOP},
+	{ 0x1f, KEY_PLAY},
+	{ 0x1e, KEY_PLAYPAUSE},		/* Pause */
+
+	{ 0x1d, KEY_RECORD},
+	{ 0x13, KEY_MUTE},
+	{ 0x19, KEY_CAMERA},		/* Snapshot */
 
 };
-EXPORT_SYMBOL_GPL(ir_codes_real_audio_220_32_keys);
+struct ir_scancode_table ir_codes_real_audio_220_32_keys_table = {
+	.scan = ir_codes_real_audio_220_32_keys,
+	.size = ARRAY_SIZE(ir_codes_real_audio_220_32_keys),
+};
+EXPORT_SYMBOL_GPL(ir_codes_real_audio_220_32_keys_table);
 
 /* ATI TV Wonder HD 600 USB
    Devin Heitmueller <devin.heitmueller@gmail.com>
  */
-IR_KEYTAB_TYPE ir_codes_ati_tv_wonder_hd_600[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_RECORD,		/* Row 1 */
-	[0x01] = KEY_PLAYPAUSE,
-	[0x02] = KEY_STOP,
-	[0x03] = KEY_POWER,
-	[0x04] = KEY_PREVIOUS,	/* Row 2 */
-	[0x05] = KEY_REWIND,
-	[0x06] = KEY_FORWARD,
-	[0x07] = KEY_NEXT,
-	[0x08] = KEY_EPG,		/* Row 3 */
-	[0x09] = KEY_HOME,
-	[0x0a] = KEY_MENU,
-	[0x0b] = KEY_CHANNELUP,
-	[0x0c] = KEY_BACK,		/* Row 4 */
-	[0x0d] = KEY_UP,
-	[0x0e] = KEY_INFO,
-	[0x0f] = KEY_CHANNELDOWN,
-	[0x10] = KEY_LEFT,		/* Row 5 */
-	[0x11] = KEY_SELECT,
-	[0x12] = KEY_RIGHT,
-	[0x13] = KEY_VOLUMEUP,
-	[0x14] = KEY_LAST,		/* Row 6 */
-	[0x15] = KEY_DOWN,
-	[0x16] = KEY_MUTE,
-	[0x17] = KEY_VOLUMEDOWN,
-};
-EXPORT_SYMBOL_GPL(ir_codes_ati_tv_wonder_hd_600);
+static struct ir_scancode ir_codes_ati_tv_wonder_hd_600[] = {
+	{ 0x00, KEY_RECORD},		/* Row 1 */
+	{ 0x01, KEY_PLAYPAUSE},
+	{ 0x02, KEY_STOP},
+	{ 0x03, KEY_POWER},
+	{ 0x04, KEY_PREVIOUS},	/* Row 2 */
+	{ 0x05, KEY_REWIND},
+	{ 0x06, KEY_FORWARD},
+	{ 0x07, KEY_NEXT},
+	{ 0x08, KEY_EPG},		/* Row 3 */
+	{ 0x09, KEY_HOME},
+	{ 0x0a, KEY_MENU},
+	{ 0x0b, KEY_CHANNELUP},
+	{ 0x0c, KEY_BACK},		/* Row 4 */
+	{ 0x0d, KEY_UP},
+	{ 0x0e, KEY_INFO},
+	{ 0x0f, KEY_CHANNELDOWN},
+	{ 0x10, KEY_LEFT},		/* Row 5 */
+	{ 0x11, KEY_SELECT},
+	{ 0x12, KEY_RIGHT},
+	{ 0x13, KEY_VOLUMEUP},
+	{ 0x14, KEY_LAST},		/* Row 6 */
+	{ 0x15, KEY_DOWN},
+	{ 0x16, KEY_MUTE},
+	{ 0x17, KEY_VOLUMEDOWN},
+};
+struct ir_scancode_table ir_codes_ati_tv_wonder_hd_600_table = {
+	.scan = ir_codes_ati_tv_wonder_hd_600,
+	.size = ARRAY_SIZE(ir_codes_ati_tv_wonder_hd_600),
+};
+EXPORT_SYMBOL_GPL(ir_codes_ati_tv_wonder_hd_600_table);
 
 /* DVBWorld remotes
    Igor M. Liplianin <liplianin@me.by>
  */
-IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE] = {
-	[0x0a] = KEY_POWER2,		/* power */
-	[0x0c] = KEY_MUTE,		/* mute */
-	[0x11] = KEY_1,
-	[0x12] = KEY_2,
-	[0x13] = KEY_3,
-	[0x14] = KEY_4,
-	[0x15] = KEY_5,
-	[0x16] = KEY_6,
-	[0x17] = KEY_7,
-	[0x18] = KEY_8,
-	[0x19] = KEY_9,
-	[0x10] = KEY_0,
-	[0x1c] = KEY_CHANNELUP,		/* ch+ */
-	[0x0f] = KEY_CHANNELDOWN,	/* ch- */
-	[0x1a] = KEY_VOLUMEUP,		/* vol+ */
-	[0x0e] = KEY_VOLUMEDOWN,	/* vol- */
-	[0x04] = KEY_RECORD,		/* rec */
-	[0x09] = KEY_CHANNEL,		/* fav */
-	[0x08] = KEY_BACKSPACE,		/* rewind */
-	[0x07] = KEY_FASTFORWARD,	/* fast */
-	[0x0b] = KEY_PAUSE,		/* pause */
-	[0x02] = KEY_ESC,		/* cancel */
-	[0x03] = KEY_TAB,		/* tab */
-	[0x00] = KEY_UP,		/* up */
-	[0x1f] = KEY_ENTER,		/* ok */
-	[0x01] = KEY_DOWN,		/* down */
-	[0x05] = KEY_RECORD,		/* cap */
-	[0x06] = KEY_STOP,		/* stop */
-	[0x40] = KEY_ZOOM,		/* full */
-	[0x1e] = KEY_TV,		/* tvmode */
-	[0x1b] = KEY_B,			/* recall */
-};
-EXPORT_SYMBOL_GPL(ir_codes_dm1105_nec);
-
+static struct ir_scancode ir_codes_dm1105_nec[] = {
+	{ 0x0a, KEY_POWER2},		/* power */
+	{ 0x0c, KEY_MUTE},		/* mute */
+	{ 0x11, KEY_1},
+	{ 0x12, KEY_2},
+	{ 0x13, KEY_3},
+	{ 0x14, KEY_4},
+	{ 0x15, KEY_5},
+	{ 0x16, KEY_6},
+	{ 0x17, KEY_7},
+	{ 0x18, KEY_8},
+	{ 0x19, KEY_9},
+	{ 0x10, KEY_0},
+	{ 0x1c, KEY_CHANNELUP},		/* ch+ */
+	{ 0x0f, KEY_CHANNELDOWN},	/* ch- */
+	{ 0x1a, KEY_VOLUMEUP},		/* vol+ */
+	{ 0x0e, KEY_VOLUMEDOWN},	/* vol- */
+	{ 0x04, KEY_RECORD},		/* rec */
+	{ 0x09, KEY_CHANNEL},		/* fav */
+	{ 0x08, KEY_BACKSPACE},		/* rewind */
+	{ 0x07, KEY_FASTFORWARD},	/* fast */
+	{ 0x0b, KEY_PAUSE},		/* pause */
+	{ 0x02, KEY_ESC},		/* cancel */
+	{ 0x03, KEY_TAB},		/* tab */
+	{ 0x00, KEY_UP},		/* up */
+	{ 0x1f, KEY_ENTER},		/* ok */
+	{ 0x01, KEY_DOWN},		/* down */
+	{ 0x05, KEY_RECORD},		/* cap */
+	{ 0x06, KEY_STOP},		/* stop */
+	{ 0x40, KEY_ZOOM},		/* full */
+	{ 0x1e, KEY_TV},		/* tvmode */
+	{ 0x1b, KEY_B},			/* recall */
+};
+struct ir_scancode_table ir_codes_dm1105_nec_table = {
+	.scan = ir_codes_dm1105_nec,
+	.size = ARRAY_SIZE(ir_codes_dm1105_nec),
+};
+EXPORT_SYMBOL_GPL(ir_codes_dm1105_nec_table);
 
 /* Terratec Cinergy Hybrid T USB XS
    Devin Heitmueller <dheitmueller@linuxtv.org>
  */
-IR_KEYTAB_TYPE ir_codes_terratec_cinergy_xs[IR_KEYTAB_SIZE] = {
-	[0x41] = KEY_HOME,
-	[0x01] = KEY_POWER,
-	[0x42] = KEY_MENU,
-	[0x02] = KEY_1,
-	[0x03] = KEY_2,
-	[0x04] = KEY_3,
-	[0x43] = KEY_SUBTITLE,
-	[0x05] = KEY_4,
-	[0x06] = KEY_5,
-	[0x07] = KEY_6,
-	[0x44] = KEY_TEXT,
-	[0x08] = KEY_7,
-	[0x09] = KEY_8,
-	[0x0a] = KEY_9,
-	[0x45] = KEY_DELETE,
-	[0x0b] = KEY_TUNER,
-	[0x0c] = KEY_0,
-	[0x0d] = KEY_MODE,
-	[0x46] = KEY_TV,
-	[0x47] = KEY_DVD,
-	[0x49] = KEY_VIDEO,
-	[0x4b] = KEY_AUX,
-	[0x10] = KEY_UP,
-	[0x11] = KEY_LEFT,
-	[0x12] = KEY_OK,
-	[0x13] = KEY_RIGHT,
-	[0x14] = KEY_DOWN,
-	[0x0f] = KEY_EPG,
-	[0x16] = KEY_INFO,
-	[0x4d] = KEY_BACKSPACE,
-	[0x1c] = KEY_VOLUMEUP,
-	[0x4c] = KEY_PLAY,
-	[0x1b] = KEY_CHANNELUP,
-	[0x1e] = KEY_VOLUMEDOWN,
-	[0x1d] = KEY_MUTE,
-	[0x1f] = KEY_CHANNELDOWN,
-	[0x17] = KEY_RED,
-	[0x18] = KEY_GREEN,
-	[0x19] = KEY_YELLOW,
-	[0x1a] = KEY_BLUE,
-	[0x58] = KEY_RECORD,
-	[0x48] = KEY_STOP,
-	[0x40] = KEY_PAUSE,
-	[0x54] = KEY_LAST,
-	[0x4e] = KEY_REWIND,
-	[0x4f] = KEY_FASTFORWARD,
-	[0x5c] = KEY_NEXT,
-};
-EXPORT_SYMBOL_GPL(ir_codes_terratec_cinergy_xs);
+static struct ir_scancode ir_codes_terratec_cinergy_xs[] = {
+	{ 0x41, KEY_HOME},
+	{ 0x01, KEY_POWER},
+	{ 0x42, KEY_MENU},
+	{ 0x02, KEY_1},
+	{ 0x03, KEY_2},
+	{ 0x04, KEY_3},
+	{ 0x43, KEY_SUBTITLE},
+	{ 0x05, KEY_4},
+	{ 0x06, KEY_5},
+	{ 0x07, KEY_6},
+	{ 0x44, KEY_TEXT},
+	{ 0x08, KEY_7},
+	{ 0x09, KEY_8},
+	{ 0x0a, KEY_9},
+	{ 0x45, KEY_DELETE},
+	{ 0x0b, KEY_TUNER},
+	{ 0x0c, KEY_0},
+	{ 0x0d, KEY_MODE},
+	{ 0x46, KEY_TV},
+	{ 0x47, KEY_DVD},
+	{ 0x49, KEY_VIDEO},
+	{ 0x4b, KEY_AUX},
+	{ 0x10, KEY_UP},
+	{ 0x11, KEY_LEFT},
+	{ 0x12, KEY_OK},
+	{ 0x13, KEY_RIGHT},
+	{ 0x14, KEY_DOWN},
+	{ 0x0f, KEY_EPG},
+	{ 0x16, KEY_INFO},
+	{ 0x4d, KEY_BACKSPACE},
+	{ 0x1c, KEY_VOLUMEUP},
+	{ 0x4c, KEY_PLAY},
+	{ 0x1b, KEY_CHANNELUP},
+	{ 0x1e, KEY_VOLUMEDOWN},
+	{ 0x1d, KEY_MUTE},
+	{ 0x1f, KEY_CHANNELDOWN},
+	{ 0x17, KEY_RED},
+	{ 0x18, KEY_GREEN},
+	{ 0x19, KEY_YELLOW},
+	{ 0x1a, KEY_BLUE},
+	{ 0x58, KEY_RECORD},
+	{ 0x48, KEY_STOP},
+	{ 0x40, KEY_PAUSE},
+	{ 0x54, KEY_LAST},
+	{ 0x4e, KEY_REWIND},
+	{ 0x4f, KEY_FASTFORWARD},
+	{ 0x5c, KEY_NEXT},
+};
+struct ir_scancode_table ir_codes_terratec_cinergy_xs_table = {
+	.scan = ir_codes_terratec_cinergy_xs,
+	.size = ARRAY_SIZE(ir_codes_terratec_cinergy_xs),
+};
+EXPORT_SYMBOL_GPL(ir_codes_terratec_cinergy_xs_table);
 
 /* EVGA inDtube
    Devin Heitmueller <devin.heitmueller@gmail.com>
  */
-IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE] = {
-	[0x12] = KEY_POWER,
-	[0x02] = KEY_MODE,	/* TV */
-	[0x14] = KEY_MUTE,
-	[0x1a] = KEY_CHANNELUP,
-	[0x16] = KEY_TV2,	/* PIP */
-	[0x1d] = KEY_VOLUMEUP,
-	[0x05] = KEY_CHANNELDOWN,
-	[0x0f] = KEY_PLAYPAUSE,
-	[0x19] = KEY_VOLUMEDOWN,
-	[0x1c] = KEY_REWIND,
-	[0x0d] = KEY_RECORD,
-	[0x18] = KEY_FORWARD,
-	[0x1e] = KEY_PREVIOUS,
-	[0x1b] = KEY_STOP,
-	[0x1f] = KEY_NEXT,
-	[0x13] = KEY_CAMERA,
-};
-EXPORT_SYMBOL_GPL(ir_codes_evga_indtube);
-
-IR_KEYTAB_TYPE ir_codes_videomate_s350[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_TV,
-	[0x01] = KEY_DVD,
-	[0x04] = KEY_RECORD,
-	[0x05] = KEY_VIDEO,	/* TV/Video */
-	[0x07] = KEY_STOP,
-	[0x08] = KEY_PLAYPAUSE,
-	[0x0a] = KEY_REWIND,
-	[0x0f] = KEY_FASTFORWARD,
-	[0x10] = KEY_CHANNELUP,
-	[0x12] = KEY_VOLUMEUP,
-	[0x13] = KEY_CHANNELDOWN,
-	[0x14] = KEY_MUTE,
-	[0x15] = KEY_VOLUMEDOWN,
-	[0x16] = KEY_1,
-	[0x17] = KEY_2,
-	[0x18] = KEY_3,
-	[0x19] = KEY_4,
-	[0x1a] = KEY_5,
-	[0x1b] = KEY_6,
-	[0x1c] = KEY_7,
-	[0x1d] = KEY_8,
-	[0x1e] = KEY_9,
-	[0x1f] = KEY_0,
-	[0x21] = KEY_SLEEP,
-	[0x24] = KEY_ZOOM,
-	[0x25] = KEY_LAST,	/* Recall */
-	[0x26] = KEY_SUBTITLE,	/* CC */
-	[0x27] = KEY_LANGUAGE,	/* MTS */
-	[0x29] = KEY_CHANNEL,	/* SURF */
-	[0x2b] = KEY_A,
-	[0x2c] = KEY_B,
-	[0x2f] = KEY_CAMERA,	/* Snapshot */
-	[0x23] = KEY_RADIO,
-	[0x02] = KEY_PREVIOUSSONG,
-	[0x06] = KEY_NEXTSONG,
-	[0x03] = KEY_EPG,
-	[0x09] = KEY_SETUP,
-	[0x22] = KEY_BACKSPACE,
-	[0x0c] = KEY_UP,
-	[0x0e] = KEY_DOWN,
-	[0x0b] = KEY_LEFT,
-	[0x0d] = KEY_RIGHT,
-	[0x11] = KEY_ENTER,
-	[0x20] = KEY_TEXT,
-};
-EXPORT_SYMBOL_GPL(ir_codes_videomate_s350);
-
+static struct ir_scancode ir_codes_evga_indtube[] = {
+	{ 0x12, KEY_POWER},
+	{ 0x02, KEY_MODE},	/* TV */
+	{ 0x14, KEY_MUTE},
+	{ 0x1a, KEY_CHANNELUP},
+	{ 0x16, KEY_TV2},	/* PIP */
+	{ 0x1d, KEY_VOLUMEUP},
+	{ 0x05, KEY_CHANNELDOWN},
+	{ 0x0f, KEY_PLAYPAUSE},
+	{ 0x19, KEY_VOLUMEDOWN},
+	{ 0x1c, KEY_REWIND},
+	{ 0x0d, KEY_RECORD},
+	{ 0x18, KEY_FORWARD},
+	{ 0x1e, KEY_PREVIOUS},
+	{ 0x1b, KEY_STOP},
+	{ 0x1f, KEY_NEXT},
+	{ 0x13, KEY_CAMERA},
+};
+struct ir_scancode_table ir_codes_evga_indtube_table = {
+	.scan = ir_codes_evga_indtube,
+	.size = ARRAY_SIZE(ir_codes_evga_indtube),
+};
+EXPORT_SYMBOL_GPL(ir_codes_evga_indtube_table);
+
+static struct ir_scancode ir_codes_videomate_s350[] = {
+	{ 0x00, KEY_TV},
+	{ 0x01, KEY_DVD},
+	{ 0x04, KEY_RECORD},
+	{ 0x05, KEY_VIDEO},	/* TV/Video */
+	{ 0x07, KEY_STOP},
+	{ 0x08, KEY_PLAYPAUSE},
+	{ 0x0a, KEY_REWIND},
+	{ 0x0f, KEY_FASTFORWARD},
+	{ 0x10, KEY_CHANNELUP},
+	{ 0x12, KEY_VOLUMEUP},
+	{ 0x13, KEY_CHANNELDOWN},
+	{ 0x14, KEY_MUTE},
+	{ 0x15, KEY_VOLUMEDOWN},
+	{ 0x16, KEY_1},
+	{ 0x17, KEY_2},
+	{ 0x18, KEY_3},
+	{ 0x19, KEY_4},
+	{ 0x1a, KEY_5},
+	{ 0x1b, KEY_6},
+	{ 0x1c, KEY_7},
+	{ 0x1d, KEY_8},
+	{ 0x1e, KEY_9},
+	{ 0x1f, KEY_0},
+	{ 0x21, KEY_SLEEP},
+	{ 0x24, KEY_ZOOM},
+	{ 0x25, KEY_LAST},	/* Recall */
+	{ 0x26, KEY_SUBTITLE},	/* CC */
+	{ 0x27, KEY_LANGUAGE},	/* MTS */
+	{ 0x29, KEY_CHANNEL},	/* SURF */
+	{ 0x2b, KEY_A},
+	{ 0x2c, KEY_B},
+	{ 0x2f, KEY_CAMERA},	/* Snapshot */
+	{ 0x23, KEY_RADIO},
+	{ 0x02, KEY_PREVIOUSSONG},
+	{ 0x06, KEY_NEXTSONG},
+	{ 0x03, KEY_EPG},
+	{ 0x09, KEY_SETUP},
+	{ 0x22, KEY_BACKSPACE},
+	{ 0x0c, KEY_UP},
+	{ 0x0e, KEY_DOWN},
+	{ 0x0b, KEY_LEFT},
+	{ 0x0d, KEY_RIGHT},
+	{ 0x11, KEY_ENTER},
+	{ 0x20, KEY_TEXT},
+};
+struct ir_scancode_table ir_codes_videomate_s350_table = {
+	.scan = ir_codes_videomate_s350,
+	.size = ARRAY_SIZE(ir_codes_videomate_s350),
+};
+EXPORT_SYMBOL_GPL(ir_codes_videomate_s350_table);
 
 /* GADMEI UTV330+ RM008Z remote
    Shine Liu <shinel@foxmail.com>
  */
-IR_KEYTAB_TYPE ir_codes_gadmei_rm008z[IR_KEYTAB_SIZE] = {
-	[0x14] = KEY_POWER2,		/* POWER OFF */
-	[0x0c] = KEY_MUTE,		/* MUTE */
-
-	[0x18] = KEY_TV,		/* TV */
-	[0x0e] = KEY_VIDEO,		/* AV */
-	[0x0b] = KEY_AUDIO,		/* SV */
-	[0x0f] = KEY_RADIO,		/* FM */
-
-	[0x00] = KEY_1,
-	[0x01] = KEY_2,
-	[0x02] = KEY_3,
-	[0x03] = KEY_4,
-	[0x04] = KEY_5,
-	[0x05] = KEY_6,
-	[0x06] = KEY_7,
-	[0x07] = KEY_8,
-	[0x08] = KEY_9,
-	[0x09] = KEY_0,
-	[0x0a] = KEY_INFO,		/* OSD */
-	[0x1c] = KEY_BACKSPACE,		/* LAST */
-
-	[0x0d] = KEY_PLAY,		/* PLAY */
-	[0x1e] = KEY_CAMERA,		/* SNAPSHOT */
-	[0x1a] = KEY_RECORD,		/* RECORD */
-	[0x17] = KEY_STOP,		/* STOP */
-
-	[0x1f] = KEY_UP,		/* UP */
-	[0x44] = KEY_DOWN,		/* DOWN */
-	[0x46] = KEY_TAB,		/* BACK */
-	[0x4a] = KEY_ZOOM,		/* FULLSECREEN */
-
-	[0x10] = KEY_VOLUMEUP,		/* VOLUMEUP */
-	[0x11] = KEY_VOLUMEDOWN,	/* VOLUMEDOWN */
-	[0x12] = KEY_CHANNELUP,		/* CHANNELUP */
-	[0x13] = KEY_CHANNELDOWN,	/* CHANNELDOWN */
-	[0x15] = KEY_ENTER,		/* OK */
-};
-EXPORT_SYMBOL_GPL(ir_codes_gadmei_rm008z);
+static struct ir_scancode ir_codes_gadmei_rm008z[] = {
+	{ 0x14, KEY_POWER2},		/* POWER OFF */
+	{ 0x0c, KEY_MUTE},		/* MUTE */
+
+	{ 0x18, KEY_TV},		/* TV */
+	{ 0x0e, KEY_VIDEO},		/* AV */
+	{ 0x0b, KEY_AUDIO},		/* SV */
+	{ 0x0f, KEY_RADIO},		/* FM */
+
+	{ 0x00, KEY_1},
+	{ 0x01, KEY_2},
+	{ 0x02, KEY_3},
+	{ 0x03, KEY_4},
+	{ 0x04, KEY_5},
+	{ 0x05, KEY_6},
+	{ 0x06, KEY_7},
+	{ 0x07, KEY_8},
+	{ 0x08, KEY_9},
+	{ 0x09, KEY_0},
+	{ 0x0a, KEY_INFO},		/* OSD */
+	{ 0x1c, KEY_BACKSPACE},		/* LAST */
+
+	{ 0x0d, KEY_PLAY},		/* PLAY */
+	{ 0x1e, KEY_CAMERA},		/* SNAPSHOT */
+	{ 0x1a, KEY_RECORD},		/* RECORD */
+	{ 0x17, KEY_STOP},		/* STOP */
+
+	{ 0x1f, KEY_UP},		/* UP */
+	{ 0x44, KEY_DOWN},		/* DOWN */
+	{ 0x46, KEY_TAB},		/* BACK */
+	{ 0x4a, KEY_ZOOM},		/* FULLSECREEN */
+
+	{ 0x10, KEY_VOLUMEUP},		/* VOLUMEUP */
+	{ 0x11, KEY_VOLUMEDOWN},	/* VOLUMEDOWN */
+	{ 0x12, KEY_CHANNELUP},		/* CHANNELUP */
+	{ 0x13, KEY_CHANNELDOWN},	/* CHANNELDOWN */
+	{ 0x15, KEY_ENTER},		/* OK */
+};
+struct ir_scancode_table ir_codes_gadmei_rm008z_table = {
+	.scan = ir_codes_gadmei_rm008z,
+	.size = ARRAY_SIZE(ir_codes_gadmei_rm008z),
+};
+EXPORT_SYMBOL_GPL(ir_codes_gadmei_rm008z_table);
diff --git a/drivers/media/dvb/dm1105/dm1105.c b/drivers/media/dvb/dm1105/dm1105.c
index c662fa65a339..2d099e271751 100644
--- a/drivers/media/dvb/dm1105/dm1105.c
+++ b/drivers/media/dvb/dm1105/dm1105.c
@@ -577,7 +577,7 @@ static irqreturn_t dm1105dvb_irq(int irq, void *dev_id)
 int __devinit dm1105_ir_init(struct dm1105dvb *dm1105)
 {
 	struct input_dev *input_dev;
-	IR_KEYTAB_TYPE *ir_codes = ir_codes_dm1105_nec;
+	struct ir_scancode_table *ir_codes = &ir_codes_dm1105_nec_table;
 	int ir_type = IR_TYPE_OTHER;
 	int err = -ENOMEM;
 
diff --git a/drivers/media/dvb/ttpci/budget-ci.c b/drivers/media/dvb/ttpci/budget-ci.c
index 371a71616810..b5c681372b6c 100644
--- a/drivers/media/dvb/ttpci/budget-ci.c
+++ b/drivers/media/dvb/ttpci/budget-ci.c
@@ -225,7 +225,7 @@ static int msp430_ir_init(struct budget_ci *budget_ci)
 	case 0x1012:
 		/* The hauppauge keymap is a superset of these remotes */
 		ir_input_init(input_dev, &budget_ci->ir.state,
-			      IR_TYPE_RC5, ir_codes_hauppauge_new);
+			      IR_TYPE_RC5, &ir_codes_hauppauge_new_table);
 
 		if (rc5_device < 0)
 			budget_ci->ir.rc5_device = 0x1f;
@@ -237,7 +237,7 @@ static int msp430_ir_init(struct budget_ci *budget_ci)
 	case 0x101a:
 		/* for the Technotrend 1500 bundled remote */
 		ir_input_init(input_dev, &budget_ci->ir.state,
-			      IR_TYPE_RC5, ir_codes_tt_1500);
+			      IR_TYPE_RC5, &ir_codes_tt_1500_table);
 
 		if (rc5_device < 0)
 			budget_ci->ir.rc5_device = IR_DEVICE_ANY;
@@ -247,7 +247,7 @@ static int msp430_ir_init(struct budget_ci *budget_ci)
 	default:
 		/* unknown remote */
 		ir_input_init(input_dev, &budget_ci->ir.state,
-			      IR_TYPE_RC5, ir_codes_budget_ci_old);
+			      IR_TYPE_RC5, &ir_codes_budget_ci_old_table);
 
 		if (rc5_device < 0)
 			budget_ci->ir.rc5_device = IR_DEVICE_ANY;
diff --git a/drivers/media/video/bt8xx/bttv-input.c b/drivers/media/video/bt8xx/bttv-input.c
index 3e7b48e73b01..ebd51afe8761 100644
--- a/drivers/media/video/bt8xx/bttv-input.c
+++ b/drivers/media/video/bt8xx/bttv-input.c
@@ -245,7 +245,7 @@ static void bttv_ir_stop(struct bttv *btv)
 int bttv_input_init(struct bttv *btv)
 {
 	struct card_ir *ir;
-	IR_KEYTAB_TYPE *ir_codes = NULL;
+	struct ir_scancode_table *ir_codes = NULL;
 	struct input_dev *input_dev;
 	int ir_type = IR_TYPE_OTHER;
 	int err = -ENOMEM;
@@ -263,7 +263,7 @@ int bttv_input_init(struct bttv *btv)
 	case BTTV_BOARD_AVERMEDIA:
 	case BTTV_BOARD_AVPHONE98:
 	case BTTV_BOARD_AVERMEDIA98:
-		ir_codes         = ir_codes_avermedia;
+		ir_codes         = &ir_codes_avermedia_table;
 		ir->mask_keycode = 0xf88000;
 		ir->mask_keydown = 0x010000;
 		ir->polling      = 50; // ms
@@ -271,14 +271,14 @@ int bttv_input_init(struct bttv *btv)
 
 	case BTTV_BOARD_AVDVBT_761:
 	case BTTV_BOARD_AVDVBT_771:
-		ir_codes         = ir_codes_avermedia_dvbt;
+		ir_codes         = &ir_codes_avermedia_dvbt_table;
 		ir->mask_keycode = 0x0f00c0;
 		ir->mask_keydown = 0x000020;
 		ir->polling      = 50; // ms
 		break;
 
 	case BTTV_BOARD_PXELVWPLTVPAK:
-		ir_codes         = ir_codes_pixelview;
+		ir_codes         = &ir_codes_pixelview_table;
 		ir->mask_keycode = 0x003e00;
 		ir->mask_keyup   = 0x010000;
 		ir->polling      = 50; // ms
@@ -286,24 +286,24 @@ int bttv_input_init(struct bttv *btv)
 	case BTTV_BOARD_PV_M4900:
 	case BTTV_BOARD_PV_BT878P_9B:
 	case BTTV_BOARD_PV_BT878P_PLUS:
-		ir_codes         = ir_codes_pixelview;
+		ir_codes         = &ir_codes_pixelview_table;
 		ir->mask_keycode = 0x001f00;
 		ir->mask_keyup   = 0x008000;
 		ir->polling      = 50; // ms
 		break;
 
 	case BTTV_BOARD_WINFAST2000:
-		ir_codes         = ir_codes_winfast;
+		ir_codes         = &ir_codes_winfast_table;
 		ir->mask_keycode = 0x1f8;
 		break;
 	case BTTV_BOARD_MAGICTVIEW061:
 	case BTTV_BOARD_MAGICTVIEW063:
-		ir_codes         = ir_codes_winfast;
+		ir_codes         = &ir_codes_winfast_table;
 		ir->mask_keycode = 0x0008e000;
 		ir->mask_keydown = 0x00200000;
 		break;
 	case BTTV_BOARD_APAC_VIEWCOMP:
-		ir_codes         = ir_codes_apac_viewcomp;
+		ir_codes         = &ir_codes_apac_viewcomp_table;
 		ir->mask_keycode = 0x001f00;
 		ir->mask_keyup   = 0x008000;
 		ir->polling      = 50; // ms
@@ -311,30 +311,30 @@ int bttv_input_init(struct bttv *btv)
 	case BTTV_BOARD_ASKEY_CPH03X:
 	case BTTV_BOARD_CONCEPTRONIC_CTVFMI2:
 	case BTTV_BOARD_CONTVFMI:
-		ir_codes         = ir_codes_pixelview;
+		ir_codes         = &ir_codes_pixelview_table;
 		ir->mask_keycode = 0x001F00;
 		ir->mask_keyup   = 0x006000;
 		ir->polling      = 50; // ms
 		break;
 	case BTTV_BOARD_NEBULA_DIGITV:
-		ir_codes = ir_codes_nebula;
+		ir_codes = &ir_codes_nebula_table;
 		btv->custom_irq = bttv_rc5_irq;
 		ir->rc5_gpio = 1;
 		break;
 	case BTTV_BOARD_MACHTV_MAGICTV:
-		ir_codes         = ir_codes_apac_viewcomp;
+		ir_codes         = &ir_codes_apac_viewcomp_table;
 		ir->mask_keycode = 0x001F00;
 		ir->mask_keyup   = 0x004000;
 		ir->polling      = 50; /* ms */
 		break;
 	case BTTV_BOARD_KOZUMI_KTV_01C:
-		ir_codes         = ir_codes_pctv_sedna;
+		ir_codes         = &ir_codes_pctv_sedna_table;
 		ir->mask_keycode = 0x001f00;
 		ir->mask_keyup   = 0x006000;
 		ir->polling      = 50; /* ms */
 		break;
 	case BTTV_BOARD_ENLTV_FM_2:
-		ir_codes         = ir_codes_encore_enltv2;
+		ir_codes         = &ir_codes_encore_enltv2_table;
 		ir->mask_keycode = 0x00fd00;
 		ir->mask_keyup   = 0x000080;
 		ir->polling      = 1; /* ms */
diff --git a/drivers/media/video/cx18/cx18-i2c.c b/drivers/media/video/cx18/cx18-i2c.c
index ba754e8056fb..766ddce5780f 100644
--- a/drivers/media/video/cx18/cx18-i2c.c
+++ b/drivers/media/video/cx18/cx18-i2c.c
@@ -111,7 +111,7 @@ static int cx18_i2c_new_ir(struct i2c_adapter *adap, u32 hw, const char *type,
 	switch (hw) {
 	case CX18_HW_Z8F0811_IR_RX_HAUP:
 		memset(&ir_init_data, 0, sizeof(struct IR_i2c_init_data));
-		ir_init_data.ir_codes = ir_codes_hauppauge_new;
+		ir_init_data.ir_codes = &ir_codes_hauppauge_new_table;
 		ir_init_data.internal_get_key_func = IR_KBD_GET_KEY_HAUP_XVR;
 		ir_init_data.type = IR_TYPE_RC5;
 		ir_init_data.name = "CX23418 Z8F0811 Hauppauge";
diff --git a/drivers/media/video/cx231xx/cx231xx.h b/drivers/media/video/cx231xx/cx231xx.h
index a0f823ac6b8d..64e2ddd3c401 100644
--- a/drivers/media/video/cx231xx/cx231xx.h
+++ b/drivers/media/video/cx231xx/cx231xx.h
@@ -282,7 +282,7 @@ struct cx231xx_board {
 
 	struct cx231xx_input input[MAX_CX231XX_INPUT];
 	struct cx231xx_input radio;
-	IR_KEYTAB_TYPE *ir_codes;
+	struct ir_scancode_table *ir_codes;
 };
 
 /* device states */
diff --git a/drivers/media/video/cx88/cx88-input.c b/drivers/media/video/cx88/cx88-input.c
index 79c4408a6171..78b3635178af 100644
--- a/drivers/media/video/cx88/cx88-input.c
+++ b/drivers/media/video/cx88/cx88-input.c
@@ -191,7 +191,7 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 {
 	struct cx88_IR *ir;
 	struct input_dev *input_dev;
-	IR_KEYTAB_TYPE *ir_codes = NULL;
+	struct ir_scancode_table *ir_codes = NULL;
 	int ir_type = IR_TYPE_OTHER;
 	int err = -ENOMEM;
 
@@ -207,14 +207,14 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 	case CX88_BOARD_DNTV_LIVE_DVB_T:
 	case CX88_BOARD_KWORLD_DVB_T:
 	case CX88_BOARD_KWORLD_DVB_T_CX22702:
-		ir_codes = ir_codes_dntv_live_dvb_t;
+		ir_codes = &ir_codes_dntv_live_dvb_t_table;
 		ir->gpio_addr = MO_GP1_IO;
 		ir->mask_keycode = 0x1f;
 		ir->mask_keyup = 0x60;
 		ir->polling = 50; /* ms */
 		break;
 	case CX88_BOARD_TERRATEC_CINERGY_1400_DVB_T1:
-		ir_codes = ir_codes_cinergy_1400;
+		ir_codes = &ir_codes_cinergy_1400_table;
 		ir_type = IR_TYPE_PD;
 		ir->sampling = 0xeb04; /* address */
 		break;
@@ -229,14 +229,14 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 	case CX88_BOARD_PCHDTV_HD3000:
 	case CX88_BOARD_PCHDTV_HD5500:
 	case CX88_BOARD_HAUPPAUGE_IRONLY:
-		ir_codes = ir_codes_hauppauge_new;
+		ir_codes = &ir_codes_hauppauge_new_table;
 		ir_type = IR_TYPE_RC5;
 		ir->sampling = 1;
 		break;
 	case CX88_BOARD_WINFAST_DTV2000H:
 	case CX88_BOARD_WINFAST_DTV2000H_J:
 	case CX88_BOARD_WINFAST_DTV1800H:
-		ir_codes = ir_codes_winfast;
+		ir_codes = &ir_codes_winfast_table;
 		ir->gpio_addr = MO_GP0_IO;
 		ir->mask_keycode = 0x8f8;
 		ir->mask_keyup = 0x100;
@@ -245,14 +245,14 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 	case CX88_BOARD_WINFAST2000XP_EXPERT:
 	case CX88_BOARD_WINFAST_DTV1000:
 	case CX88_BOARD_WINFAST_TV2000_XP_GLOBAL:
-		ir_codes = ir_codes_winfast;
+		ir_codes = &ir_codes_winfast_table;
 		ir->gpio_addr = MO_GP0_IO;
 		ir->mask_keycode = 0x8f8;
 		ir->mask_keyup = 0x100;
 		ir->polling = 1; /* ms */
 		break;
 	case CX88_BOARD_IODATA_GVBCTV7E:
-		ir_codes = ir_codes_iodata_bctv7e;
+		ir_codes = &ir_codes_iodata_bctv7e_table;
 		ir->gpio_addr = MO_GP0_IO;
 		ir->mask_keycode = 0xfd;
 		ir->mask_keydown = 0x02;
@@ -260,7 +260,7 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 		break;
 	case CX88_BOARD_PROLINK_PLAYTVPVR:
 	case CX88_BOARD_PIXELVIEW_PLAYTV_ULTRA_PRO:
-		ir_codes = ir_codes_pixelview;
+		ir_codes = &ir_codes_pixelview_table;
 		ir->gpio_addr = MO_GP1_IO;
 		ir->mask_keycode = 0x1f;
 		ir->mask_keyup = 0x80;
@@ -268,28 +268,28 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 		break;
 	case CX88_BOARD_PROLINK_PV_8000GT:
 	case CX88_BOARD_PROLINK_PV_GLOBAL_XTREME:
-		ir_codes = ir_codes_pixelview_new;
+		ir_codes = &ir_codes_pixelview_new_table;
 		ir->gpio_addr = MO_GP1_IO;
 		ir->mask_keycode = 0x3f;
 		ir->mask_keyup = 0x80;
 		ir->polling = 1; /* ms */
 		break;
 	case CX88_BOARD_KWORLD_LTV883:
-		ir_codes = ir_codes_pixelview;
+		ir_codes = &ir_codes_pixelview_table;
 		ir->gpio_addr = MO_GP1_IO;
 		ir->mask_keycode = 0x1f;
 		ir->mask_keyup = 0x60;
 		ir->polling = 1; /* ms */
 		break;
 	case CX88_BOARD_ADSTECH_DVB_T_PCI:
-		ir_codes = ir_codes_adstech_dvb_t_pci;
+		ir_codes = &ir_codes_adstech_dvb_t_pci_table;
 		ir->gpio_addr = MO_GP1_IO;
 		ir->mask_keycode = 0xbf;
 		ir->mask_keyup = 0x40;
 		ir->polling = 50; /* ms */
 		break;
 	case CX88_BOARD_MSI_TVANYWHERE_MASTER:
-		ir_codes = ir_codes_msi_tvanywhere;
+		ir_codes = &ir_codes_msi_tvanywhere_table;
 		ir->gpio_addr = MO_GP1_IO;
 		ir->mask_keycode = 0x1f;
 		ir->mask_keyup = 0x40;
@@ -297,40 +297,40 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 		break;
 	case CX88_BOARD_AVERTV_303:
 	case CX88_BOARD_AVERTV_STUDIO_303:
-		ir_codes         = ir_codes_avertv_303;
+		ir_codes         = &ir_codes_avertv_303_table;
 		ir->gpio_addr    = MO_GP2_IO;
 		ir->mask_keycode = 0xfb;
 		ir->mask_keydown = 0x02;
 		ir->polling      = 50; /* ms */
 		break;
 	case CX88_BOARD_DNTV_LIVE_DVB_T_PRO:
-		ir_codes = ir_codes_dntv_live_dvbt_pro;
-		ir_type = IR_TYPE_PD;
-		ir->sampling = 0xff00; /* address */
+		ir_codes         = &ir_codes_dntv_live_dvbt_pro_table;
+		ir_type          = IR_TYPE_PD;
+		ir->sampling     = 0xff00; /* address */
 		break;
 	case CX88_BOARD_NORWOOD_MICRO:
-		ir_codes         = ir_codes_norwood;
+		ir_codes         = &ir_codes_norwood_table;
 		ir->gpio_addr    = MO_GP1_IO;
 		ir->mask_keycode = 0x0e;
 		ir->mask_keyup   = 0x80;
 		ir->polling      = 50; /* ms */
 		break;
 	case CX88_BOARD_NPGTECH_REALTV_TOP10FM:
-		ir_codes = ir_codes_npgtech;
-		ir->gpio_addr = MO_GP0_IO;
+		ir_codes         = &ir_codes_npgtech_table;
+		ir->gpio_addr    = MO_GP0_IO;
 		ir->mask_keycode = 0xfa;
-		ir->polling = 50; /* ms */
+		ir->polling      = 50; /* ms */
 		break;
 	case CX88_BOARD_PINNACLE_PCTV_HD_800i:
-		ir_codes = ir_codes_pinnacle_pctv_hd;
-		ir_type = IR_TYPE_RC5;
-		ir->sampling = 1;
+		ir_codes         = &ir_codes_pinnacle_pctv_hd_table;
+		ir_type          = IR_TYPE_RC5;
+		ir->sampling     = 1;
 		break;
 	case CX88_BOARD_POWERCOLOR_REAL_ANGEL:
-		ir_codes = ir_codes_powercolor_real_angel;
-		ir->gpio_addr = MO_GP2_IO;
+		ir_codes         = &ir_codes_powercolor_real_angel_table;
+		ir->gpio_addr    = MO_GP2_IO;
 		ir->mask_keycode = 0x7e;
-		ir->polling = 100; /* ms */
+		ir->polling      = 100; /* ms */
 		break;
 	}
 
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index b184d482c497..f46b8a5c3a61 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -562,7 +562,7 @@ struct em28xx_board em28xx_boards[] = {
 		.name         = "Gadmei UTV330+",
 		.tuner_type   = TUNER_TNF_5335MF,
 		.tda9887_conf = TDA9887_PRESENT,
-		.ir_codes     = ir_codes_gadmei_rm008z,
+		.ir_codes     = &ir_codes_gadmei_rm008z_table,
 		.decoder      = EM28XX_SAA711X,
 		.xclk         = EM28XX_XCLK_FREQUENCY_12MHZ,
 		.input        = { {
@@ -736,7 +736,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware = 1,
 		.has_dvb      = 1,
 		.dvb_gpio     = hauppauge_wintv_hvr_900_digital,
-		.ir_codes     = ir_codes_hauppauge_new,
+		.ir_codes     = &ir_codes_hauppauge_new_table,
 		.decoder      = EM28XX_TVP5150,
 		.input        = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -761,7 +761,7 @@ struct em28xx_board em28xx_boards[] = {
 		.tuner_type   = TUNER_XC2028,
 		.tuner_gpio   = default_tuner_gpio,
 		.mts_firmware = 1,
-		.ir_codes     = ir_codes_hauppauge_new,
+		.ir_codes     = &ir_codes_hauppauge_new_table,
 		.decoder      = EM28XX_TVP5150,
 		.input        = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -787,7 +787,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware   = 1,
 		.has_dvb        = 1,
 		.dvb_gpio       = hauppauge_wintv_hvr_900_digital,
-		.ir_codes       = ir_codes_hauppauge_new,
+		.ir_codes       = &ir_codes_hauppauge_new_table,
 		.decoder        = EM28XX_TVP5150,
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -813,7 +813,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware   = 1,
 		.has_dvb        = 1,
 		.dvb_gpio       = hauppauge_wintv_hvr_900_digital,
-		.ir_codes       = ir_codes_hauppauge_new,
+		.ir_codes       = &ir_codes_hauppauge_new_table,
 		.decoder        = EM28XX_TVP5150,
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -839,7 +839,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware   = 1,
 		.has_dvb        = 1,
 		.dvb_gpio       = hauppauge_wintv_hvr_900_digital,
-		.ir_codes       = ir_codes_pinnacle_pctv_hd,
+		.ir_codes       = &ir_codes_pinnacle_pctv_hd_table,
 		.decoder        = EM28XX_TVP5150,
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -865,7 +865,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware   = 1,
 		.has_dvb        = 1,
 		.dvb_gpio       = hauppauge_wintv_hvr_900_digital,
-		.ir_codes       = ir_codes_ati_tv_wonder_hd_600,
+		.ir_codes       = &ir_codes_ati_tv_wonder_hd_600_table,
 		.decoder        = EM28XX_TVP5150,
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -891,7 +891,7 @@ struct em28xx_board em28xx_boards[] = {
 		.decoder        = EM28XX_TVP5150,
 		.has_dvb        = 1,
 		.dvb_gpio       = default_digital,
-		.ir_codes       = ir_codes_terratec_cinergy_xs,
+		.ir_codes       = &ir_codes_terratec_cinergy_xs_table,
 		.xclk           = EM28XX_XCLK_FREQUENCY_12MHZ, /* NEC IR */
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -1443,7 +1443,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware = 1,
 		.decoder      = EM28XX_TVP5150,
 		.tuner_gpio   = default_tuner_gpio,
-		.ir_codes     = ir_codes_kaiomy,
+		.ir_codes     = &ir_codes_kaiomy_table,
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
 			.vmux     = TVP5150_COMPOSITE0,
@@ -1543,7 +1543,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware = 1,
 		.has_dvb      = 1,
 		.dvb_gpio     = evga_indtube_digital,
-		.ir_codes     = ir_codes_evga_indtube,
+		.ir_codes     = &ir_codes_evga_indtube_table,
 		.input        = { {
 			.type     = EM28XX_VMUX_TELEVISION,
 			.vmux     = TVP5150_COMPOSITE0,
@@ -2217,17 +2217,17 @@ void em28xx_register_i2c_ir(struct em28xx *dev)
 		break;
 	case (EM2800_BOARD_TERRATEC_CINERGY_200):
 	case (EM2820_BOARD_TERRATEC_CINERGY_250):
-		init_data.ir_codes = ir_codes_em_terratec;
+		init_data.ir_codes = &ir_codes_em_terratec_table;
 		init_data.get_key = em28xx_get_key_terratec;
 		init_data.name = "i2c IR (EM28XX Terratec)";
 		break;
 	case (EM2820_BOARD_PINNACLE_USB_2):
-		init_data.ir_codes = ir_codes_pinnacle_grey;
+		init_data.ir_codes = &ir_codes_pinnacle_grey_table;
 		init_data.get_key = em28xx_get_key_pinnacle_usb_grey;
 		init_data.name = "i2c IR (EM28XX Pinnacle PCTV)";
 		break;
 	case (EM2820_BOARD_HAUPPAUGE_WINTV_USB_2):
-		init_data.ir_codes = ir_codes_hauppauge_new;
+		init_data.ir_codes = &ir_codes_hauppauge_new_table;
 		init_data.get_key = em28xx_get_key_em_haup;
 		init_data.name = "i2c IR (EM2840 Hauppauge)";
 		break;
diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h
index 23f34dd691e9..217f948e03d7 100644
--- a/drivers/media/video/em28xx/em28xx.h
+++ b/drivers/media/video/em28xx/em28xx.h
@@ -409,7 +409,7 @@ struct em28xx_board {
 
 	struct em28xx_input       input[MAX_EM28XX_INPUT];
 	struct em28xx_input	  radio;
-	IR_KEYTAB_TYPE            *ir_codes;
+	struct ir_scancode_table  *ir_codes;
 };
 
 struct em28xx_eeprom {
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index b92ddcabf0b6..247d3115a9b7 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -297,7 +297,7 @@ static void ir_work(struct work_struct *work)
 
 static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
-	IR_KEYTAB_TYPE *ir_codes = NULL;
+	struct ir_scancode_table *ir_codes = NULL;
 	const char *name = NULL;
 	int ir_type;
 	struct IR_i2c *ir;
@@ -322,13 +322,13 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		name        = "Pixelview";
 		ir->get_key = get_key_pixelview;
 		ir_type     = IR_TYPE_OTHER;
-		ir_codes    = ir_codes_empty;
+		ir_codes    = &ir_codes_empty_table;
 		break;
 	case 0x4b:
 		name        = "PV951";
 		ir->get_key = get_key_pv951;
 		ir_type     = IR_TYPE_OTHER;
-		ir_codes    = ir_codes_pv951;
+		ir_codes    = &ir_codes_pv951_table;
 		break;
 	case 0x18:
 	case 0x1a:
@@ -336,22 +336,22 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		ir->get_key = get_key_haup;
 		ir_type     = IR_TYPE_RC5;
 		if (hauppauge == 1) {
-			ir_codes    = ir_codes_hauppauge_new;
+			ir_codes    = &ir_codes_hauppauge_new_table;
 		} else {
-			ir_codes    = ir_codes_rc5_tv;
+			ir_codes    = &ir_codes_rc5_tv_table;
 		}
 		break;
 	case 0x30:
 		name        = "KNC One";
 		ir->get_key = get_key_knc1;
 		ir_type     = IR_TYPE_OTHER;
-		ir_codes    = ir_codes_empty;
+		ir_codes    = &ir_codes_empty_table;
 		break;
 	case 0x6b:
 		name        = "FusionHDTV";
 		ir->get_key = get_key_fusionhdtv;
 		ir_type     = IR_TYPE_RC5;
-		ir_codes    = ir_codes_fusionhdtv_mce;
+		ir_codes    = &ir_codes_fusionhdtv_mce_table;
 		break;
 	case 0x7a:
 	case 0x47:
@@ -365,9 +365,9 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 			ir_type     = IR_TYPE_RC5;
 			ir->get_key = get_key_haup_xvr;
 			if (hauppauge == 1) {
-				ir_codes    = ir_codes_hauppauge_new;
+				ir_codes    = &ir_codes_hauppauge_new_table;
 			} else {
-				ir_codes    = ir_codes_rc5_tv;
+				ir_codes    = &ir_codes_rc5_tv_table;
 			}
 		} else {
 			/* Handled by saa7134-input */
@@ -379,7 +379,7 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		name        = "AVerMedia Cardbus remote";
 		ir->get_key = get_key_avermedia_cardbus;
 		ir_type     = IR_TYPE_OTHER;
-		ir_codes    = ir_codes_avermedia_cardbus;
+		ir_codes    = &ir_codes_avermedia_cardbus_table;
 		break;
 	default:
 		dprintk(1, DEVNAME ": Unsupported i2c address 0x%02x\n", addr);
diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c
index 65fb7b17b678..9070e5fabb4e 100644
--- a/drivers/media/video/saa7134/saa7134-input.c
+++ b/drivers/media/video/saa7134/saa7134-input.c
@@ -394,7 +394,7 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 {
 	struct card_ir *ir;
 	struct input_dev *input_dev;
-	IR_KEYTAB_TYPE *ir_codes = NULL;
+	struct ir_scancode_table *ir_codes = NULL;
 	u32 mask_keycode = 0;
 	u32 mask_keydown = 0;
 	u32 mask_keyup   = 0;
@@ -416,27 +416,27 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 	case SAA7134_BOARD_FLYTVPLATINUM_FM:
 	case SAA7134_BOARD_FLYTVPLATINUM_MINI2:
 	case SAA7134_BOARD_ROVERMEDIA_LINK_PRO_FM:
-		ir_codes     = ir_codes_flyvideo;
+		ir_codes     = &ir_codes_flyvideo_table;
 		mask_keycode = 0xEC00000;
 		mask_keydown = 0x0040000;
 		break;
 	case SAA7134_BOARD_CINERGY400:
 	case SAA7134_BOARD_CINERGY600:
 	case SAA7134_BOARD_CINERGY600_MK3:
-		ir_codes     = ir_codes_cinergy;
+		ir_codes     = &ir_codes_cinergy_table;
 		mask_keycode = 0x00003f;
 		mask_keyup   = 0x040000;
 		break;
 	case SAA7134_BOARD_ECS_TVP3XP:
 	case SAA7134_BOARD_ECS_TVP3XP_4CB5:
-		ir_codes     = ir_codes_eztv;
+		ir_codes     = &ir_codes_eztv_table;
 		mask_keycode = 0x00017c;
 		mask_keyup   = 0x000002;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_KWORLD_XPERT:
 	case SAA7134_BOARD_AVACSSMARTTV:
-		ir_codes     = ir_codes_pixelview;
+		ir_codes     = &ir_codes_pixelview_table;
 		mask_keycode = 0x00001F;
 		mask_keyup   = 0x000020;
 		polling      = 50; // ms
@@ -453,7 +453,7 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 	case SAA7134_BOARD_AVERMEDIA_GO_007_FM:
 	case SAA7134_BOARD_AVERMEDIA_M102:
 	case SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS:
-		ir_codes     = ir_codes_avermedia;
+		ir_codes     = &ir_codes_avermedia_table;
 		mask_keycode = 0x0007C8;
 		mask_keydown = 0x000010;
 		polling      = 50; // ms
@@ -462,14 +462,14 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 		saa_setb(SAA7134_GPIO_GPSTATUS0, 0x4);
 		break;
 	case SAA7134_BOARD_AVERMEDIA_M135A:
-		ir_codes     = ir_codes_avermedia_m135a;
+		ir_codes     = &ir_codes_avermedia_m135a_table;
 		mask_keydown = 0x0040000;
 		mask_keycode = 0x00013f;
 		nec_gpio     = 1;
 		break;
 	case SAA7134_BOARD_AVERMEDIA_777:
 	case SAA7134_BOARD_AVERMEDIA_A16AR:
-		ir_codes     = ir_codes_avermedia;
+		ir_codes     = &ir_codes_avermedia_table;
 		mask_keycode = 0x02F200;
 		mask_keydown = 0x000400;
 		polling      = 50; // ms
@@ -478,7 +478,7 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 		saa_setb(SAA7134_GPIO_GPSTATUS1, 0x1);
 		break;
 	case SAA7134_BOARD_AVERMEDIA_A16D:
-		ir_codes     = ir_codes_avermedia_a16d;
+		ir_codes     = &ir_codes_avermedia_a16d_table;
 		mask_keycode = 0x02F200;
 		mask_keydown = 0x000400;
 		polling      = 50; /* ms */
@@ -487,14 +487,14 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 		saa_setb(SAA7134_GPIO_GPSTATUS1, 0x1);
 		break;
 	case SAA7134_BOARD_KWORLD_TERMINATOR:
-		ir_codes     = ir_codes_pixelview;
+		ir_codes     = &ir_codes_pixelview_table;
 		mask_keycode = 0x00001f;
 		mask_keyup   = 0x000060;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_MANLI_MTV001:
 	case SAA7134_BOARD_MANLI_MTV002:
-		ir_codes     = ir_codes_manli;
+		ir_codes     = &ir_codes_manli_table;
 		mask_keycode = 0x001f00;
 		mask_keyup   = 0x004000;
 		polling      = 50; /* ms */
@@ -513,25 +513,25 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 	case SAA7134_BOARD_BEHOLD_507_9FM:
 	case SAA7134_BOARD_BEHOLD_507RDS_MK3:
 	case SAA7134_BOARD_BEHOLD_507RDS_MK5:
-		ir_codes     = ir_codes_manli;
+		ir_codes     = &ir_codes_manli_table;
 		mask_keycode = 0x003f00;
 		mask_keyup   = 0x004000;
 		polling      = 50; /* ms */
 		break;
 	case SAA7134_BOARD_BEHOLD_COLUMBUS_TVFM:
-		ir_codes     = ir_codes_behold_columbus;
+		ir_codes     = &ir_codes_behold_columbus_table;
 		mask_keycode = 0x003f00;
 		mask_keyup   = 0x004000;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_SEDNA_PC_TV_CARDBUS:
-		ir_codes     = ir_codes_pctv_sedna;
+		ir_codes     = &ir_codes_pctv_sedna_table;
 		mask_keycode = 0x001f00;
 		mask_keyup   = 0x004000;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_GOTVIEW_7135:
-		ir_codes     = ir_codes_gotview7135;
+		ir_codes     = &ir_codes_gotview7135_table;
 		mask_keycode = 0x0003CC;
 		mask_keydown = 0x000010;
 		polling	     = 5; /* ms */
@@ -540,75 +540,75 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 	case SAA7134_BOARD_VIDEOMATE_TV_PVR:
 	case SAA7134_BOARD_VIDEOMATE_GOLD_PLUS:
 	case SAA7134_BOARD_VIDEOMATE_TV_GOLD_PLUSII:
-		ir_codes     = ir_codes_videomate_tv_pvr;
+		ir_codes     = &ir_codes_videomate_tv_pvr_table;
 		mask_keycode = 0x00003F;
 		mask_keyup   = 0x400000;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_PROTEUS_2309:
-		ir_codes     = ir_codes_proteus_2309;
+		ir_codes     = &ir_codes_proteus_2309_table;
 		mask_keycode = 0x00007F;
 		mask_keyup   = 0x000080;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_VIDEOMATE_DVBT_300:
 	case SAA7134_BOARD_VIDEOMATE_DVBT_200:
-		ir_codes     = ir_codes_videomate_tv_pvr;
+		ir_codes     = &ir_codes_videomate_tv_pvr_table;
 		mask_keycode = 0x003F00;
 		mask_keyup   = 0x040000;
 		break;
 	case SAA7134_BOARD_FLYDVBS_LR300:
 	case SAA7134_BOARD_FLYDVBT_LR301:
 	case SAA7134_BOARD_FLYDVBTDUO:
-		ir_codes     = ir_codes_flydvb;
+		ir_codes     = &ir_codes_flydvb_table;
 		mask_keycode = 0x0001F00;
 		mask_keydown = 0x0040000;
 		break;
 	case SAA7134_BOARD_ASUSTeK_P7131_DUAL:
 	case SAA7134_BOARD_ASUSTeK_P7131_HYBRID_LNA:
 	case SAA7134_BOARD_ASUSTeK_P7131_ANALOG:
-		ir_codes     = ir_codes_asus_pc39;
+		ir_codes     = &ir_codes_asus_pc39_table;
 		mask_keydown = 0x0040000;
 		rc5_gpio = 1;
 		break;
 	case SAA7134_BOARD_ENCORE_ENLTV:
 	case SAA7134_BOARD_ENCORE_ENLTV_FM:
-		ir_codes     = ir_codes_encore_enltv;
+		ir_codes     = &ir_codes_encore_enltv_table;
 		mask_keycode = 0x00007f;
 		mask_keyup   = 0x040000;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_ENCORE_ENLTV_FM53:
-		ir_codes     = ir_codes_encore_enltv_fm53;
+		ir_codes     = &ir_codes_encore_enltv_fm53_table;
 		mask_keydown = 0x0040000;
 		mask_keycode = 0x00007f;
 		nec_gpio = 1;
 		break;
 	case SAA7134_BOARD_10MOONSTVMASTER3:
-		ir_codes     = ir_codes_encore_enltv;
+		ir_codes     = &ir_codes_encore_enltv_table;
 		mask_keycode = 0x5f80000;
 		mask_keyup   = 0x8000000;
 		polling      = 50; //ms
 		break;
 	case SAA7134_BOARD_GENIUS_TVGO_A11MCE:
-		ir_codes     = ir_codes_genius_tvgo_a11mce;
+		ir_codes     = &ir_codes_genius_tvgo_a11mce_table;
 		mask_keycode = 0xff;
 		mask_keydown = 0xf00000;
 		polling = 50; /* ms */
 		break;
 	case SAA7134_BOARD_REAL_ANGEL_220:
-		ir_codes     = ir_codes_real_audio_220_32_keys;
+		ir_codes     = &ir_codes_real_audio_220_32_keys_table;
 		mask_keycode = 0x3f00;
 		mask_keyup   = 0x4000;
 		polling = 50; /* ms */
 		break;
 	case SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG:
-		ir_codes     = ir_codes_kworld_plus_tv_analog;
+		ir_codes     = &ir_codes_kworld_plus_tv_analog_table;
 		mask_keycode = 0x7f;
 		polling = 40; /* ms */
 		break;
 	case SAA7134_BOARD_VIDEOMATE_S350:
-		ir_codes     = ir_codes_videomate_s350;
+		ir_codes     = &ir_codes_videomate_s350_table;
 		mask_keycode = 0x003f00;
 		mask_keydown = 0x040000;
 		break;
@@ -722,23 +722,23 @@ void saa7134_probe_i2c_ir(struct saa7134_dev *dev)
 		init_data.name = "Pinnacle PCTV";
 		if (pinnacle_remote == 0) {
 			init_data.get_key = get_key_pinnacle_color;
-			init_data.ir_codes = ir_codes_pinnacle_color;
+			init_data.ir_codes = &ir_codes_pinnacle_color_table;
 			info.addr = 0x47;
 		} else {
 			init_data.get_key = get_key_pinnacle_grey;
-			init_data.ir_codes = ir_codes_pinnacle_grey;
+			init_data.ir_codes = &ir_codes_pinnacle_grey_table;
 			info.addr = 0x47;
 		}
 		break;
 	case SAA7134_BOARD_UPMOST_PURPLE_TV:
 		init_data.name = "Purple TV";
 		init_data.get_key = get_key_purpletv;
-		init_data.ir_codes = ir_codes_purpletv;
+		init_data.ir_codes = &ir_codes_purpletv_table;
 		break;
 	case SAA7134_BOARD_MSI_TVATANYWHERE_PLUS:
 		init_data.name = "MSI TV@nywhere Plus";
 		init_data.get_key = get_key_msi_tvanywhere_plus;
-		init_data.ir_codes = ir_codes_msi_tvanywhere_plus;
+		init_data.ir_codes = &ir_codes_msi_tvanywhere_plus_table;
 		info.addr = 0x30;
 		/* MSI TV@nywhere Plus controller doesn't seem to
 		   respond to probes unless we read something from
@@ -752,7 +752,7 @@ void saa7134_probe_i2c_ir(struct saa7134_dev *dev)
 	case SAA7134_BOARD_HAUPPAUGE_HVR1110:
 		init_data.name = "HVR 1110";
 		init_data.get_key = get_key_hvr1110;
-		init_data.ir_codes = ir_codes_hauppauge_new;
+		init_data.ir_codes = &ir_codes_hauppauge_new_table;
 		break;
 	case SAA7134_BOARD_BEHOLD_607FM_MK3:
 	case SAA7134_BOARD_BEHOLD_607FM_MK5:
@@ -769,7 +769,7 @@ void saa7134_probe_i2c_ir(struct saa7134_dev *dev)
 	case SAA7134_BOARD_BEHOLD_X7:
 		init_data.name = "BeholdTV";
 		init_data.get_key = get_key_beholdm6xx;
-		init_data.ir_codes = ir_codes_behold;
+		init_data.ir_codes = &ir_codes_behold_table;
 		break;
 	case SAA7134_BOARD_AVERMEDIA_CARDBUS_501:
 	case SAA7134_BOARD_AVERMEDIA_CARDBUS_506:
diff --git a/include/media/ir-common.h b/include/media/ir-common.h
index 74a7e55734f0..29f0e53cff94 100644
--- a/include/media/ir-common.h
+++ b/include/media/ir-common.h
@@ -31,8 +31,18 @@
 #define IR_TYPE_PD      2 /* Pulse distance encoded IR */
 #define IR_TYPE_OTHER  99
 
-#define IR_KEYTAB_TYPE	u32
-#define IR_KEYTAB_SIZE	128  // enougth for rc5, probably need more some day ...
+#define IR_KEYTAB_TYPE u32
+#define IR_KEYTAB_SIZE	128  /* enougth for rc5, probably need more some day */
+
+struct ir_scancode {
+	u16	scancode;
+	u32	keycode;
+};
+
+struct ir_scancode_table {
+	struct ir_scancode *scan;
+	int size;
+};
 
 #define IR_KEYCODE(tab,code)	(((unsigned)code < IR_KEYTAB_SIZE) \
 				 ? tab[code] : KEY_RESERVED)
@@ -93,7 +103,7 @@ struct card_ir {
 };
 
 void ir_input_init(struct input_dev *dev, struct ir_input_state *ir,
-		   int ir_type, IR_KEYTAB_TYPE *ir_codes);
+		   int ir_type, struct ir_scancode_table *ir_codes);
 void ir_input_nokey(struct input_dev *dev, struct ir_input_state *ir);
 void ir_input_keydown(struct input_dev *dev, struct ir_input_state *ir,
 		      u32 ir_key, u32 ir_raw);
@@ -107,70 +117,63 @@ void ir_rc5_timer_keyup(unsigned long data);
 
 /* Keymaps to be used by other modules */
 
-extern IR_KEYTAB_TYPE ir_codes_empty[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avermedia[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avermedia_dvbt[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avermedia_m135a[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avermedia_cardbus[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_apac_viewcomp[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pixelview[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pixelview_new[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_nebula[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_dntv_live_dvb_t[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_iodata_bctv7e[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_adstech_dvb_t_pci[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_msi_tvanywhere[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_cinergy_1400[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avertv_303[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_dntv_live_dvbt_pro[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_em_terratec[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pinnacle_grey[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_flyvideo[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_flydvb[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_cinergy[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_eztv[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avermedia[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_videomate_tv_pvr[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_manli[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_gotview7135[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_purpletv[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pctv_sedna[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pv951[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_rc5_tv[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_winfast[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pinnacle_color[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_hauppauge_new[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_npgtech[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_norwood[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_proteus_2309[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_budget_ci_old[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_asus_pc39[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_encore_enltv[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_encore_enltv2[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_tt_1500[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_fusionhdtv_mce[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_behold[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_behold_columbus[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pinnacle_pctv_hd[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_genius_tvgo_a11mce[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_powercolor_real_angel[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avermedia_a16d[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_encore_enltv_fm53[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_real_audio_220_32_keys[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_msi_tvanywhere_plus[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_ati_tv_wonder_hd_600[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_kworld_plus_tv_analog[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_kaiomy[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_terratec_cinergy_xs[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_videomate_s350[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_gadmei_rm008z[IR_KEYTAB_SIZE];
-
+extern struct ir_scancode_table ir_codes_empty_table;
+extern struct ir_scancode_table ir_codes_avermedia_table;
+extern struct ir_scancode_table ir_codes_avermedia_dvbt_table;
+extern struct ir_scancode_table ir_codes_avermedia_m135a_table;
+extern struct ir_scancode_table ir_codes_avermedia_cardbus_table;
+extern struct ir_scancode_table ir_codes_apac_viewcomp_table;
+extern struct ir_scancode_table ir_codes_pixelview_table;
+extern struct ir_scancode_table ir_codes_pixelview_new_table;
+extern struct ir_scancode_table ir_codes_nebula_table;
+extern struct ir_scancode_table ir_codes_dntv_live_dvb_t_table;
+extern struct ir_scancode_table ir_codes_iodata_bctv7e_table;
+extern struct ir_scancode_table ir_codes_adstech_dvb_t_pci_table;
+extern struct ir_scancode_table ir_codes_msi_tvanywhere_table;
+extern struct ir_scancode_table ir_codes_cinergy_1400_table;
+extern struct ir_scancode_table ir_codes_avertv_303_table;
+extern struct ir_scancode_table ir_codes_dntv_live_dvbt_pro_table;
+extern struct ir_scancode_table ir_codes_em_terratec_table;
+extern struct ir_scancode_table ir_codes_pinnacle_grey_table;
+extern struct ir_scancode_table ir_codes_flyvideo_table;
+extern struct ir_scancode_table ir_codes_flydvb_table;
+extern struct ir_scancode_table ir_codes_cinergy_table;
+extern struct ir_scancode_table ir_codes_eztv_table;
+extern struct ir_scancode_table ir_codes_avermedia_table;
+extern struct ir_scancode_table ir_codes_videomate_tv_pvr_table;
+extern struct ir_scancode_table ir_codes_manli_table;
+extern struct ir_scancode_table ir_codes_gotview7135_table;
+extern struct ir_scancode_table ir_codes_purpletv_table;
+extern struct ir_scancode_table ir_codes_pctv_sedna_table;
+extern struct ir_scancode_table ir_codes_pv951_table;
+extern struct ir_scancode_table ir_codes_rc5_tv_table;
+extern struct ir_scancode_table ir_codes_winfast_table;
+extern struct ir_scancode_table ir_codes_pinnacle_color_table;
+extern struct ir_scancode_table ir_codes_hauppauge_new_table;
+extern struct ir_scancode_table ir_codes_npgtech_table;
+extern struct ir_scancode_table ir_codes_norwood_table;
+extern struct ir_scancode_table ir_codes_proteus_2309_table;
+extern struct ir_scancode_table ir_codes_budget_ci_old_table;
+extern struct ir_scancode_table ir_codes_asus_pc39_table;
+extern struct ir_scancode_table ir_codes_encore_enltv_table;
+extern struct ir_scancode_table ir_codes_encore_enltv2_table;
+extern struct ir_scancode_table ir_codes_tt_1500_table;
+extern struct ir_scancode_table ir_codes_fusionhdtv_mce_table;
+extern struct ir_scancode_table ir_codes_behold_table;
+extern struct ir_scancode_table ir_codes_behold_columbus_table;
+extern struct ir_scancode_table ir_codes_pinnacle_pctv_hd_table;
+extern struct ir_scancode_table ir_codes_genius_tvgo_a11mce_table;
+extern struct ir_scancode_table ir_codes_powercolor_real_angel_table;
+extern struct ir_scancode_table ir_codes_avermedia_a16d_table;
+extern struct ir_scancode_table ir_codes_encore_enltv_fm53_table;
+extern struct ir_scancode_table ir_codes_real_audio_220_32_keys_table;
+extern struct ir_scancode_table ir_codes_msi_tvanywhere_plus_table;
+extern struct ir_scancode_table ir_codes_ati_tv_wonder_hd_600_table;
+extern struct ir_scancode_table ir_codes_kworld_plus_tv_analog_table;
+extern struct ir_scancode_table ir_codes_kaiomy_table;
+extern struct ir_scancode_table ir_codes_dm1105_nec_table;
+extern struct ir_scancode_table ir_codes_evga_indtube_table;
+extern struct ir_scancode_table ir_codes_terratec_cinergy_xs_table;
+extern struct ir_scancode_table ir_codes_videomate_s350_table;
+extern struct ir_scancode_table ir_codes_gadmei_rm008z_table;
 #endif
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
diff --git a/include/media/ir-kbd-i2c.h b/include/media/ir-kbd-i2c.h
index defef3b18dfd..aaf65e8b1a40 100644
--- a/include/media/ir-kbd-i2c.h
+++ b/include/media/ir-kbd-i2c.h
@@ -6,7 +6,8 @@
 struct IR_i2c;
 
 struct IR_i2c {
-	IR_KEYTAB_TYPE         *ir_codes;
+	struct ir_scancode_table *ir_codes;
+
 	struct i2c_client      *c;
 	struct input_dev       *input;
 	struct ir_input_state  ir;
@@ -33,7 +34,7 @@ enum ir_kbd_get_key_fn {
 
 /* Can be passed when instantiating an ir_video i2c device */
 struct IR_i2c_init_data {
-	IR_KEYTAB_TYPE         *ir_codes;
+	struct ir_scancode_table *ir_codes;
 	const char             *name;
 	int                    type; /* IR_TYPE_RC5, IR_TYPE_PD, etc */
 	/*
-- 
cgit v1.2.3


From 1e3c1f7695a446742e24ea1f0c80715be98edbc4 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 31 Aug 2009 11:32:46 -0300
Subject: V4L/DVB (12625): Add new V4L2_FMT_FLAG_EMULATED flag to videodev2.h

V4L2_FMT_FLAG_EMULATED	0x0002	This format is not native to the device but
emulated through software (usually libv4l2), where possible try to use a
native format instead for better performance.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 5758066cb57b..3689d7d81fe9 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -375,6 +375,7 @@ struct v4l2_fmtdesc {
 };
 
 #define V4L2_FMT_FLAG_COMPRESSED 0x0001
+#define V4L2_FMT_FLAG_EMULATED   0x0002
 
 #if 1
 	/* Experimental Frame Size and frame rate enumeration */
-- 
cgit v1.2.3


From fe63b94a430c0c8058de317f0a8ce921c69dbee4 Mon Sep 17 00:00:00 2001
From: Carsten Emde <Carsten.Emde@osadl.org>
Date: Sat, 12 Sep 2009 00:05:37 +0200
Subject: tracing: prevent NULL pointer dereference in
 ftrace_raw_event_block_bio_bounce

Booting 2.6.31 and executing
   echo 1 >/sys/kernel/debug/tracing/events/enable
leads to
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<c032a583>] ftrace_raw_event_block_bio_bounce+0x4b/0xb9

Apparently,
   bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
is called in block/blk-map.c:58 where bio->bi_bdev in set to NULL and
still is NULL when an attempt is made to evaluate bio->bi_bdev->bd_dev
in include/trace/events/block.h:189.

The tracepoint should ensure bio->bi_bdev is not dereferenced, if NULL.

Signed-off-by: Carsten Emde <C.Emde@osadl.org>
LKML-Reference: <4AAAC9B1.9060505@osadl.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/block.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 9a74b468a229..d86af94691c2 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -171,6 +171,7 @@ TRACE_EVENT(block_rq_complete,
 		  (unsigned long long)__entry->sector,
 		  __entry->nr_sector, __entry->errors)
 );
+
 TRACE_EVENT(block_bio_bounce,
 
 	TP_PROTO(struct request_queue *q, struct bio *bio),
@@ -186,7 +187,8 @@ TRACE_EVENT(block_bio_bounce,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->dev		= bio->bi_bdev ?
+					  bio->bi_bdev->bd_dev : 0;
 		__entry->sector		= bio->bi_sector;
 		__entry->nr_sector	= bio->bi_size >> 9;
 		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-- 
cgit v1.2.3


From 4b3b4c5e64ce26612646867ee354373620063534 Mon Sep 17 00:00:00 2001
From: John Reiser <jreiser@bitwagon.com>
Date: Mon, 27 Jul 2009 11:23:50 -0700
Subject: ftrace: __start_mcount_loc should be .init.rodata

__start_mcount_loc[] is unused after init, yet occupies RAM forever
as part of .rodata.  152kiB is typical on a 64-bit architecture.  Instead,
__start_mcount_loc should be in the interval [__init_begin, __init_end)
so that the space is reclaimed after init.

__start_mcount_loc[] is generated during the load portion
of kernel build, and is used only by ftrace_init().  ftrace_init is declared
'__init' and is in .init.text, which is freed after init.
__start_mcount_loc is placed into .rodata by a call to MCOUNT_REC inside
the RO_DATA macro of include/asm-generic/vmlinux.lds.h.  The array *is*
read-only, but more importantly it is not used after init.  So the call to
MCOUNT_REC should be moved from RO_DATA to INIT_DATA.

This patch has been tested on x86_64 with CONFIG_DEBUG_PAGEALLOC=y
which verifies that the address range never is accessed after init.

Signed-off-by: John Reiser <jreiser@BitWagon.com>
LKML-Reference: <4A6DF0B6.7080402@bitwagon.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/asm-generic/vmlinux.lds.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 6ad76bf5fb40..98b37cf3ac6d 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -91,7 +91,8 @@
 #endif
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
-#define MCOUNT_REC()	VMLINUX_SYMBOL(__start_mcount_loc) = .; \
+#define MCOUNT_REC()	. = ALIGN(8);				\
+			VMLINUX_SYMBOL(__start_mcount_loc) = .; \
 			*(__mcount_loc)				\
 			VMLINUX_SYMBOL(__stop_mcount_loc) = .;
 #else
@@ -331,7 +332,6 @@
 	/* __*init sections */						\
 	__init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) {		\
 		*(.ref.rodata)						\
-		MCOUNT_REC()						\
 		DEV_KEEP(init.rodata)					\
 		DEV_KEEP(exit.rodata)					\
 		CPU_KEEP(init.rodata)					\
@@ -455,6 +455,7 @@
 	MEM_DISCARD(init.data)						\
 	KERNEL_CTORS()							\
 	*(.init.rodata)							\
+	MCOUNT_REC()							\
 	DEV_DISCARD(init.rodata)					\
 	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.rodata)
-- 
cgit v1.2.3


From 16bb8eb1b73bf940d30ff88cae622bfcd3790f61 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Sat, 12 Sep 2009 19:04:54 -0400
Subject: tracing: allow filter predicates to handle ksym symbols

This patch increases the max string used by predicates to
handle KSYM_SYMBOL_LEN.

Also moves an include to look nicer.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 0608b0ff2635..bd099ba82ccc 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -1,8 +1,8 @@
 #ifndef _LINUX_FTRACE_EVENT_H
 #define _LINUX_FTRACE_EVENT_H
 
-#include <linux/trace_seq.h>
 #include <linux/ring_buffer.h>
+#include <linux/trace_seq.h>
 #include <linux/percpu.h>
 
 struct trace_array;
@@ -135,7 +135,7 @@ struct ftrace_event_call {
 };
 
 #define MAX_FILTER_PRED		32
-#define MAX_FILTER_STR_VAL	128
+#define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
 
 extern void destroy_preds(struct ftrace_event_call *call);
 extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
-- 
cgit v1.2.3


From f977bb4937857994312fff4f9c2cad336a36a932 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 13 Sep 2009 18:15:54 +0200
Subject: perf_counter, sched: Add sched_stat_runtime tracepoint

This allows more precise tracking of how the scheduler accounts
(and acts upon) a task having spent N nanoseconds of CPU time.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/sched.h | 33 +++++++++++++++++++++++++++++++++
 kernel/sched_fair.c          |  1 +
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index b48f1ad7c946..4069c43f4187 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -379,6 +379,39 @@ TRACE_EVENT(sched_stat_wait,
 			(unsigned long long)__entry->delay)
 );
 
+/*
+ * Tracepoint for accounting runtime (time the task is executing
+ * on a CPU).
+ */
+TRACE_EVENT(sched_stat_runtime,
+
+	TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
+
+	TP_ARGS(tsk, runtime, vruntime),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,	pid			)
+		__field( u64,	runtime			)
+		__field( u64,	vruntime			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid		= tsk->pid;
+		__entry->runtime	= runtime;
+		__entry->vruntime	= vruntime;
+	)
+	TP_perf_assign(
+		__perf_count(runtime);
+	),
+
+	TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]",
+			__entry->comm, __entry->pid,
+			(unsigned long long)__entry->runtime,
+			(unsigned long long)__entry->vruntime)
+);
+
 /*
  * Tracepoint for accounting sleep time (time the task is not runnable,
  * including iowait, see below).
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index aa7f84121016..a097e909e80f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -513,6 +513,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	if (entity_is_task(curr)) {
 		struct task_struct *curtask = task_of(curr);
 
+		trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
 		cpuacct_charge(curtask, delta_exec);
 		account_group_exec_runtime(curtask, delta_exec);
 	}
-- 
cgit v1.2.3


From f300baba5a1536070d6d77bf0c8c4ca999bb4f0f Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Thu, 10 Sep 2009 17:33:30 +0300
Subject: nfsd41: sunrpc: add new xprt class for nfsv4.1 backchannel

[sunrpc: change idle timeout value for the backchannel]
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/clnt.h     |  1 +
 include/linux/sunrpc/xprt.h     | 18 ++++++++
 include/linux/sunrpc/xprtrdma.h |  5 ---
 include/linux/sunrpc/xprtsock.h | 11 -----
 net/sunrpc/clnt.c               |  1 +
 net/sunrpc/xprtsock.c           | 96 ++++++++++++++++++++++++++++++++++++++++-
 6 files changed, 114 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 3d025588e56e..8ed9642a5a76 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -114,6 +114,7 @@ struct rpc_create_args {
 	rpc_authflavor_t	authflavor;
 	unsigned long		flags;
 	char			*client_name;
+	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
 };
 
 /* Values for "flags" field */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 228d694dbb90..6f9457a75b8f 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -124,6 +124,23 @@ struct rpc_xprt_ops {
 	void		(*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
 };
 
+/*
+ * RPC transport identifiers
+ *
+ * To preserve compatibility with the historical use of raw IP protocol
+ * id's for transport selection, UDP and TCP identifiers are specified
+ * with the previous values. No such restriction exists for new transports,
+ * except that they may not collide with these values (17 and 6,
+ * respectively).
+ */
+#define XPRT_TRANSPORT_BC       (1 << 31)
+enum xprt_transports {
+	XPRT_TRANSPORT_UDP	= IPPROTO_UDP,
+	XPRT_TRANSPORT_TCP	= IPPROTO_TCP,
+	XPRT_TRANSPORT_BC_TCP	= IPPROTO_TCP | XPRT_TRANSPORT_BC,
+	XPRT_TRANSPORT_RDMA	= 256
+};
+
 struct rpc_xprt {
 	struct kref		kref;		/* Reference count */
 	struct rpc_xprt_ops *	ops;		/* transport methods */
@@ -232,6 +249,7 @@ struct xprt_create {
 	struct sockaddr *	srcaddr;	/* optional local address */
 	struct sockaddr *	dstaddr;	/* remote peer address */
 	size_t			addrlen;
+	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
 };
 
 struct xprt_class {
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index 54a379c9e8eb..c2f04e1ae159 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -40,11 +40,6 @@
 #ifndef _LINUX_SUNRPC_XPRTRDMA_H
 #define _LINUX_SUNRPC_XPRTRDMA_H
 
-/*
- * RPC transport identifier for RDMA
- */
-#define XPRT_TRANSPORT_RDMA	256
-
 /*
  * rpcbind (v3+) RDMA netid.
  */
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index c2a46c45c8f7..3f14a02e9cc0 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -12,17 +12,6 @@
 int		init_socket_xprt(void);
 void		cleanup_socket_xprt(void);
 
-/*
- * RPC transport identifiers for UDP, TCP
- *
- * To preserve compatibility with the historical use of raw IP protocol
- * id's for transport selection, these are specified with the previous
- * values. No such restriction exists for new transports, except that
- * they may not collide with these values (17 and 6, respectively).
- */
-#define XPRT_TRANSPORT_UDP	IPPROTO_UDP
-#define XPRT_TRANSPORT_TCP	IPPROTO_TCP
-
 /*
  * RPC slot table sizes for UDP, TCP transports
  */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c1e467e1b07d..7389804e3bb7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -288,6 +288,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 		.srcaddr = args->saddress,
 		.dstaddr = args->address,
 		.addrlen = args->addrsize,
+		.bc_xprt = args->bc_xprt,
 	};
 	char servername[48];
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d9a2b815714e..bee415465754 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2468,11 +2468,93 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	return ERR_PTR(-EINVAL);
 }
 
+/**
+ * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket
+ * @args: rpc transport creation arguments
+ *
+ */
+static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
+{
+	struct sockaddr *addr = args->dstaddr;
+	struct rpc_xprt *xprt;
+	struct sock_xprt *transport;
+	struct svc_sock *bc_sock;
+
+	if (!args->bc_xprt)
+		ERR_PTR(-EINVAL);
+
+	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+	if (IS_ERR(xprt))
+		return xprt;
+	transport = container_of(xprt, struct sock_xprt, xprt);
+
+	xprt->prot = IPPROTO_TCP;
+	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
+	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
+	xprt->timeout = &xs_tcp_default_timeout;
+
+	/* backchannel */
+	xprt_set_bound(xprt);
+	xprt->bind_timeout = 0;
+	xprt->connect_timeout = 0;
+	xprt->reestablish_timeout = 0;
+	xprt->idle_timeout = 0;
+
+	/*
+	 * The backchannel uses the same socket connection as the
+	 * forechannel
+	 */
+	xprt->bc_xprt = args->bc_xprt;
+	bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
+	bc_sock->sk_bc_xprt = xprt;
+	transport->sock = bc_sock->sk_sock;
+	transport->inet = bc_sock->sk_sk;
+
+	xprt->ops = &bc_tcp_ops;
+
+	switch (addr->sa_family) {
+	case AF_INET:
+		xs_format_peer_addresses(xprt, "tcp",
+					 RPCBIND_NETID_TCP);
+		break;
+	case AF_INET6:
+		xs_format_peer_addresses(xprt, "tcp",
+				   RPCBIND_NETID_TCP6);
+		break;
+	default:
+		kfree(xprt);
+		return ERR_PTR(-EAFNOSUPPORT);
+	}
+
+	if (xprt_bound(xprt))
+		dprintk("RPC:       set up xprt to %s (port %s) via %s\n",
+				xprt->address_strings[RPC_DISPLAY_ADDR],
+				xprt->address_strings[RPC_DISPLAY_PORT],
+				xprt->address_strings[RPC_DISPLAY_PROTO]);
+	else
+		dprintk("RPC:       set up xprt to %s (autobind) via %s\n",
+				xprt->address_strings[RPC_DISPLAY_ADDR],
+				xprt->address_strings[RPC_DISPLAY_PROTO]);
+
+	/*
+	 * Since we don't want connections for the backchannel, we set
+	 * the xprt status to connected
+	 */
+	xprt_set_connected(xprt);
+
+
+	if (try_module_get(THIS_MODULE))
+		return xprt;
+	kfree(xprt->slot);
+	kfree(xprt);
+	return ERR_PTR(-EINVAL);
+}
+
 static struct xprt_class	xs_udp_transport = {
 	.list		= LIST_HEAD_INIT(xs_udp_transport.list),
 	.name		= "udp",
 	.owner		= THIS_MODULE,
-	.ident		= IPPROTO_UDP,
+	.ident		= XPRT_TRANSPORT_UDP,
 	.setup		= xs_setup_udp,
 };
 
@@ -2480,10 +2562,18 @@ static struct xprt_class	xs_tcp_transport = {
 	.list		= LIST_HEAD_INIT(xs_tcp_transport.list),
 	.name		= "tcp",
 	.owner		= THIS_MODULE,
-	.ident		= IPPROTO_TCP,
+	.ident		= XPRT_TRANSPORT_TCP,
 	.setup		= xs_setup_tcp,
 };
 
+static struct xprt_class	xs_bc_tcp_transport = {
+	.list		= LIST_HEAD_INIT(xs_bc_tcp_transport.list),
+	.name		= "tcp NFSv4.1 backchannel",
+	.owner		= THIS_MODULE,
+	.ident		= XPRT_TRANSPORT_BC_TCP,
+	.setup		= xs_setup_bc_tcp,
+};
+
 /**
  * init_socket_xprt - set up xprtsock's sysctls, register with RPC client
  *
@@ -2497,6 +2587,7 @@ int init_socket_xprt(void)
 
 	xprt_register_transport(&xs_udp_transport);
 	xprt_register_transport(&xs_tcp_transport);
+	xprt_register_transport(&xs_bc_tcp_transport);
 
 	return 0;
 }
@@ -2516,6 +2607,7 @@ void cleanup_socket_xprt(void)
 
 	xprt_unregister_transport(&xs_udp_transport);
 	xprt_unregister_transport(&xs_tcp_transport);
+	xprt_unregister_transport(&xs_bc_tcp_transport);
 }
 
 static int param_set_uint_minmax(const char *val, struct kernel_param *kp,
-- 
cgit v1.2.3


From ec827c7ece8901044e6b3f92aeea489be9e1bcf7 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 14 Sep 2009 10:50:23 -0400
Subject: tracing: add static to generated TRACE_EVENT functions

Some of the generated functions used in the TRACE_EVENT macros are
not declared static, but they are not global.

Discovered by sparse.

Reported-by: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h      | 4 ++--
 kernel/trace/trace_events.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 308bafd93325..fa8ce03f836c 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -254,7 +254,7 @@ ftrace_format_##call(struct ftrace_event_call *unused,			\
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-enum print_line_t							\
+static enum print_line_t						\
 ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 {									\
 	struct trace_seq *s = &iter->seq;				\
@@ -317,7 +317,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
-int									\
+static int								\
 ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 {									\
 	struct ftrace_raw_##call field;					\
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index adbed124c3e7..0fa8f9faa61c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1154,7 +1154,7 @@ static int trace_module_notify(struct notifier_block *self,
 }
 #endif /* CONFIG_MODULES */
 
-struct notifier_block trace_module_nb = {
+static struct notifier_block trace_module_nb = {
 	.notifier_call = trace_module_notify,
 	.priority = 0,
 };
-- 
cgit v1.2.3


From a48f494e1dbdcf4fb7c02100ae3208c4c1daecbf Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 14 Sep 2009 11:18:02 -0400
Subject: tracing: have TRACE_EVENT macro use __flags to not shadow parameter

The generated functions of TRACE_EVENT uses "flags" in one of the
sub macros which shadows a parameter in the outside macro.

Simple fix is to make the submacro use __flags instead.

Discovered by sparse.

Reported-by: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index fa8ce03f836c..72a3b437b829 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -239,9 +239,9 @@ ftrace_format_##call(struct ftrace_event_call *unused,			\
 #undef __print_flags
 #define __print_flags(flag, delim, flag_array...)			\
 	({								\
-		static const struct trace_print_flags flags[] =		\
+		static const struct trace_print_flags __flags[] =	\
 			{ flag_array, { -1, NULL }};			\
-		ftrace_print_flags_seq(p, delim, flag, flags);		\
+		ftrace_print_flags_seq(p, delim, flag, __flags);	\
 	})
 
 #undef __print_symbolic
-- 
cgit v1.2.3


From 7f53866932fd08add06ee2f93ead129949158490 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Thu, 10 Sep 2009 12:34:09 -0600
Subject: PCI Hotplug: convert acpi_pci_detect_ejectable() to take an
 acpi_handle

acpi_pci_detect_ejectable() goes through effort to convert its
struct pci_bus arg to an acpi_handle, but every time we use this
interface, we already have the handle available.

So let's just use the handle instead of converting back and forth.

Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Tested-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/acpi_pcihp.c   | 12 ++++++------
 drivers/pci/hotplug/acpiphp_glue.c | 33 +++++++++++----------------------
 drivers/pci/hotplug/pciehp_acpi.c  |  7 ++++---
 include/linux/pci_hotplug.h        |  2 +-
 4 files changed, 22 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index eb159587d0bf..ec3c039b7ebd 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -500,18 +500,18 @@ check_hotplug(acpi_handle handle, u32 lvl, void *context, void **rv)
 
 /**
  * acpi_pci_detect_ejectable - check if the PCI bus has ejectable slots
- * @pbus - PCI bus to scan
+ * @handle - handle of the PCI bus to scan
  *
  * Returns 1 if the PCI bus has ACPI based ejectable slots, 0 otherwise.
  */
-int acpi_pci_detect_ejectable(struct pci_bus *pbus)
+int acpi_pci_detect_ejectable(acpi_handle handle)
 {
-	acpi_handle handle;
 	int found = 0;
 
-	if (!(handle = acpi_pci_get_bridge_handle(pbus)))
-		return 0;
-	acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1,
+	if (!handle)
+		return found;
+
+	acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
 			    check_hotplug, (void *)&found, NULL);
 	return found;
 }
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index e72e0adc0681..680c33635b6f 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -62,22 +62,6 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus);
 static void acpiphp_set_hpp_values(acpi_handle handle, struct pci_bus *bus);
 static void handle_hotplug_event_func(acpi_handle handle, u32 type, void *context);
 
-static struct pci_bus *pci_bus_from_handle(acpi_handle handle)
-{
-	struct pci_bus *pbus;
-	struct acpi_pci_root *root;
-
-	root = acpi_pci_find_root(handle);
-	if (root)
-		pbus = root->bus;
-	else {
-		struct pci_dev *pdev = acpi_get_pci_dev(handle);
-		pbus = pdev->subordinate;
-		pci_dev_put(pdev);
-	}
-	return pbus;
-}
-
 /* callback routine to check for the existence of a pci dock device */
 static acpi_status
 is_pci_dock_device(acpi_handle handle, u32 lvl, void *context, void **rv)
@@ -279,11 +263,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
 /* see if it's worth looking at this bridge */
 static int detect_ejectable_slots(acpi_handle handle)
 {
-	int found;
-	struct pci_bus *pbus;
-
-	pbus = pci_bus_from_handle(handle);
-	found = acpi_pci_detect_ejectable(pbus);
+	int found = acpi_pci_detect_ejectable(handle);
 	if (!found) {
 		acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1,
 				    is_pci_dock_device, (void *)&found, NULL);
@@ -1364,7 +1344,16 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus)
 /* Program resources in newly inserted bridge */
 static int acpiphp_configure_bridge (acpi_handle handle)
 {
-	struct pci_bus *bus = pci_bus_from_handle(handle);
+	struct pci_bus *bus;
+
+	if (acpi_is_root_bridge(handle)) {
+		struct acpi_pci_root *root = acpi_pci_find_root(handle);
+		bus = root->bus;
+	} else {
+		struct pci_dev *pdev = acpi_get_pci_dev(handle);
+		bus = pdev->subordinate;
+		pci_dev_put(pdev);
+	}
 
 	pci_bus_size_bridges(bus);
 	pci_bus_assign_resources(bus);
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index 96048010e7d9..7163e6a6cfae 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -47,7 +47,7 @@ int pciehp_acpi_slot_detection_check(struct pci_dev *dev)
 {
 	if (slot_detection_mode != PCIEHP_DETECT_ACPI)
 		return 0;
-	if (acpi_pci_detect_ejectable(dev->subordinate))
+	if (acpi_pci_detect_ejectable(DEVICE_ACPI_HANDLE(&dev->dev)))
 		return 0;
 	return -ENODEV;
 }
@@ -76,9 +76,9 @@ static int __init dummy_probe(struct pcie_device *dev)
 {
 	int pos;
 	u32 slot_cap;
+	acpi_handle handle;
 	struct slot *slot, *tmp;
 	struct pci_dev *pdev = dev->port;
-	struct pci_bus *pbus = pdev->subordinate;
 	/* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */
 	if (pciehp_get_hp_hw_control_from_firmware(pdev))
 		return -ENODEV;
@@ -94,7 +94,8 @@ static int __init dummy_probe(struct pcie_device *dev)
 			dup_slot_id++;
 	}
 	list_add_tail(&slot->slot_list, &dummy_slots);
-	if (!acpi_slot_detected && acpi_pci_detect_ejectable(pbus))
+	handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle))
 		acpi_slot_detected = 1;
 	return -ENODEV;         /* dummy driver always returns error */
 }
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 1b00cc3177fc..f0c31ae3f842 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -231,7 +231,7 @@ extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
 				struct hotplug_params *hpp);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
-int acpi_pci_detect_ejectable(struct pci_bus *pbus);
+int acpi_pci_detect_ejectable(acpi_handle handle);
 #endif
 #endif
 
-- 
cgit v1.2.3


From d136f1bd366fdb7e747ca7e0218171e7a00a98a5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 12 Sep 2009 03:03:15 +0000
Subject: genetlink: fix netns vs. netlink table locking

Since my commits introducing netns awareness into
genetlink we can get this problem:

BUG: scheduling while atomic: modprobe/1178/0x00000002
2 locks held by modprobe/1178:
 #0:  (genl_mutex){+.+.+.}, at: [<ffffffff8135ee1a>] genl_register_mc_grou
 #1:  (rcu_read_lock){.+.+..}, at: [<ffffffff8135eeb5>] genl_register_mc_g
Pid: 1178, comm: modprobe Not tainted 2.6.31-rc8-wl-34789-g95cb731-dirty #
Call Trace:
 [<ffffffff8103e285>] __schedule_bug+0x85/0x90
 [<ffffffff81403138>] schedule+0x108/0x588
 [<ffffffff8135b131>] netlink_table_grab+0xa1/0xf0
 [<ffffffff8135c3a7>] netlink_change_ngroups+0x47/0x100
 [<ffffffff8135ef0f>] genl_register_mc_group+0x12f/0x290

because I overlooked that netlink_table_grab() will
schedule, thinking it was just the rwlock. However,
in the contention case, that isn't actually true.

Fix this by letting the code grab the netlink table
lock first and then the RCU for netns protection.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  4 ++++
 net/netlink/af_netlink.c | 51 +++++++++++++++++++++++++++---------------------
 net/netlink/genetlink.c  |  5 ++++-
 3 files changed, 37 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 0fbecbbe8e9e..080f6ba9e73a 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -176,12 +176,16 @@ struct netlink_skb_parms
 #define NETLINK_CREDS(skb)	(&NETLINK_CB((skb)).creds)
 
 
+extern void netlink_table_grab(void);
+extern void netlink_table_ungrab(void);
+
 extern struct sock *netlink_kernel_create(struct net *net,
 					  int unit,unsigned int groups,
 					  void (*input)(struct sk_buff *skb),
 					  struct mutex *cb_mutex,
 					  struct module *module);
 extern void netlink_kernel_release(struct sock *sk);
+extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group);
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index d0ff382c40ca..c5aab6a368ce 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -177,9 +177,11 @@ static void netlink_sock_destruct(struct sock *sk)
  * this, _but_ remember, it adds useless work on UP machines.
  */
 
-static void netlink_table_grab(void)
+void netlink_table_grab(void)
 	__acquires(nl_table_lock)
 {
+	might_sleep();
+
 	write_lock_irq(&nl_table_lock);
 
 	if (atomic_read(&nl_table_users)) {
@@ -200,7 +202,7 @@ static void netlink_table_grab(void)
 	}
 }
 
-static void netlink_table_ungrab(void)
+void netlink_table_ungrab(void)
 	__releases(nl_table_lock)
 {
 	write_unlock_irq(&nl_table_lock);
@@ -1549,37 +1551,21 @@ static void netlink_free_old_listeners(struct rcu_head *rcu_head)
 	kfree(lrh->ptr);
 }
 
-/**
- * netlink_change_ngroups - change number of multicast groups
- *
- * This changes the number of multicast groups that are available
- * on a certain netlink family. Note that it is not possible to
- * change the number of groups to below 32. Also note that it does
- * not implicitly call netlink_clear_multicast_users() when the
- * number of groups is reduced.
- *
- * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
- * @groups: The new number of groups.
- */
-int netlink_change_ngroups(struct sock *sk, unsigned int groups)
+int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
 {
 	unsigned long *listeners, *old = NULL;
 	struct listeners_rcu_head *old_rcu_head;
 	struct netlink_table *tbl = &nl_table[sk->sk_protocol];
-	int err = 0;
 
 	if (groups < 32)
 		groups = 32;
 
-	netlink_table_grab();
 	if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
 		listeners = kzalloc(NLGRPSZ(groups) +
 				    sizeof(struct listeners_rcu_head),
 				    GFP_ATOMIC);
-		if (!listeners) {
-			err = -ENOMEM;
-			goto out_ungrab;
-		}
+		if (!listeners)
+			return -ENOMEM;
 		old = tbl->listeners;
 		memcpy(listeners, old, NLGRPSZ(tbl->groups));
 		rcu_assign_pointer(tbl->listeners, listeners);
@@ -1597,8 +1583,29 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups)
 	}
 	tbl->groups = groups;
 
- out_ungrab:
+	return 0;
+}
+
+/**
+ * netlink_change_ngroups - change number of multicast groups
+ *
+ * This changes the number of multicast groups that are available
+ * on a certain netlink family. Note that it is not possible to
+ * change the number of groups to below 32. Also note that it does
+ * not implicitly call netlink_clear_multicast_users() when the
+ * number of groups is reduced.
+ *
+ * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
+ * @groups: The new number of groups.
+ */
+int netlink_change_ngroups(struct sock *sk, unsigned int groups)
+{
+	int err;
+
+	netlink_table_grab();
+	err = __netlink_change_ngroups(sk, groups);
 	netlink_table_ungrab();
+
 	return err;
 }
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 66f6ba0bab11..566941e03363 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -176,9 +176,10 @@ int genl_register_mc_group(struct genl_family *family,
 	if (family->netnsok) {
 		struct net *net;
 
+		netlink_table_grab();
 		rcu_read_lock();
 		for_each_net_rcu(net) {
-			err = netlink_change_ngroups(net->genl_sock,
+			err = __netlink_change_ngroups(net->genl_sock,
 					mc_groups_longs * BITS_PER_LONG);
 			if (err) {
 				/*
@@ -188,10 +189,12 @@ int genl_register_mc_group(struct genl_family *family,
 				 * increased on some sockets which is ok.
 				 */
 				rcu_read_unlock();
+				netlink_table_ungrab();
 				goto out;
 			}
 		}
 		rcu_read_unlock();
+		netlink_table_ungrab();
 	} else {
 		err = netlink_change_ngroups(init_net.genl_sock,
 					     mc_groups_longs * BITS_PER_LONG);
-- 
cgit v1.2.3


From 32613090a96dba2ca2cc524c8d4749d3126fdde5 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 14 Sep 2009 12:21:47 +0000
Subject: net: constify struct net_protocol

Remove long removed "inet_protocol_base" declaration.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/protocol.h |  7 +++----
 net/dccp/ipv4.c        |  2 +-
 net/ipv4/af_inet.c     | 18 +++++++++---------
 net/ipv4/ah4.c         |  2 +-
 net/ipv4/esp4.c        |  2 +-
 net/ipv4/icmp.c        |  2 +-
 net/ipv4/ip_gre.c      |  2 +-
 net/ipv4/ip_input.c    |  2 +-
 net/ipv4/ipcomp.c      |  2 +-
 net/ipv4/ipmr.c        |  6 +-----
 net/ipv4/protocol.c    |  6 +++---
 net/ipv4/tunnel4.c     |  4 ++--
 net/ipv4/udplite.c     |  2 +-
 net/sctp/protocol.c    |  2 +-
 14 files changed, 27 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/protocol.h b/include/net/protocol.h
index 1089d5aabd49..cea2aee92ac5 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -94,15 +94,14 @@ struct inet_protosw {
 #define INET_PROTOSW_PERMANENT 0x02  /* Permanent protocols are unremovable. */
 #define INET_PROTOSW_ICSK      0x04  /* Is this an inet_connection_sock? */
 
-extern struct net_protocol *inet_protocol_base;
-extern struct net_protocol *inet_protos[MAX_INET_PROTOS];
+extern const struct net_protocol *inet_protos[MAX_INET_PROTOS];
 
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 extern struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
 #endif
 
-extern int	inet_add_protocol(struct net_protocol *prot, unsigned char num);
-extern int	inet_del_protocol(struct net_protocol *prot, unsigned char num);
+extern int	inet_add_protocol(const struct net_protocol *prot, unsigned char num);
+extern int	inet_del_protocol(const struct net_protocol *prot, unsigned char num);
 extern void	inet_register_protosw(struct inet_protosw *p);
 extern void	inet_unregister_protosw(struct inet_protosw *p);
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d01c00de1ad0..7302e1498d46 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -948,7 +948,7 @@ static struct proto dccp_v4_prot = {
 #endif
 };
 
-static struct net_protocol dccp_v4_protocol = {
+static const struct net_protocol dccp_v4_protocol = {
 	.handler	= dccp_v4_rcv,
 	.err_handler	= dccp_v4_err,
 	.no_policy	= 1,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6c30a73f03f5..58c4b0f7c4aa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -244,7 +244,7 @@ EXPORT_SYMBOL(build_ehash_secret);
 static inline int inet_netns_ok(struct net *net, int protocol)
 {
 	int hash;
-	struct net_protocol *ipprot;
+	const struct net_protocol *ipprot;
 
 	if (net_eq(net, &init_net))
 		return 1;
@@ -1162,7 +1162,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
 static int inet_gso_send_check(struct sk_buff *skb)
 {
 	struct iphdr *iph;
-	struct net_protocol *ops;
+	const struct net_protocol *ops;
 	int proto;
 	int ihl;
 	int err = -EINVAL;
@@ -1198,7 +1198,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct iphdr *iph;
-	struct net_protocol *ops;
+	const struct net_protocol *ops;
 	int proto;
 	int ihl;
 	int id;
@@ -1265,7 +1265,7 @@ out:
 static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 					 struct sk_buff *skb)
 {
-	struct net_protocol *ops;
+	const struct net_protocol *ops;
 	struct sk_buff **pp = NULL;
 	struct sk_buff *p;
 	struct iphdr *iph;
@@ -1342,7 +1342,7 @@ out:
 
 static int inet_gro_complete(struct sk_buff *skb)
 {
-	struct net_protocol *ops;
+	const struct net_protocol *ops;
 	struct iphdr *iph = ip_hdr(skb);
 	int proto = iph->protocol & (MAX_INET_PROTOS - 1);
 	int err = -ENOSYS;
@@ -1427,13 +1427,13 @@ void snmp_mib_free(void *ptr[2])
 EXPORT_SYMBOL_GPL(snmp_mib_free);
 
 #ifdef CONFIG_IP_MULTICAST
-static struct net_protocol igmp_protocol = {
+static const struct net_protocol igmp_protocol = {
 	.handler =	igmp_rcv,
 	.netns_ok =	1,
 };
 #endif
 
-static struct net_protocol tcp_protocol = {
+static const struct net_protocol tcp_protocol = {
 	.handler =	tcp_v4_rcv,
 	.err_handler =	tcp_v4_err,
 	.gso_send_check = tcp_v4_gso_send_check,
@@ -1444,7 +1444,7 @@ static struct net_protocol tcp_protocol = {
 	.netns_ok =	1,
 };
 
-static struct net_protocol udp_protocol = {
+static const struct net_protocol udp_protocol = {
 	.handler =	udp_rcv,
 	.err_handler =	udp_err,
 	.gso_send_check = udp4_ufo_send_check,
@@ -1453,7 +1453,7 @@ static struct net_protocol udp_protocol = {
 	.netns_ok =	1,
 };
 
-static struct net_protocol icmp_protocol = {
+static const struct net_protocol icmp_protocol = {
 	.handler =	icmp_rcv,
 	.no_policy =	1,
 	.netns_ok =	1,
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index e878e494296e..5c662703eb1e 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -311,7 +311,7 @@ static const struct xfrm_type ah_type =
 	.output		= ah_output
 };
 
-static struct net_protocol ah4_protocol = {
+static const struct net_protocol ah4_protocol = {
 	.handler	=	xfrm4_rcv,
 	.err_handler	=	ah4_err,
 	.no_policy	=	1,
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 18bb383ea393..12f7287e902d 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -615,7 +615,7 @@ static const struct xfrm_type esp_type =
 	.output		= esp_output
 };
 
-static struct net_protocol esp4_protocol = {
+static const struct net_protocol esp4_protocol = {
 	.handler	=	xfrm4_rcv,
 	.err_handler	=	esp4_err,
 	.no_policy	=	1,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 97c410e84388..5bc13fe816d1 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -655,7 +655,7 @@ static void icmp_unreach(struct sk_buff *skb)
 	struct iphdr *iph;
 	struct icmphdr *icmph;
 	int hash, protocol;
-	struct net_protocol *ipprot;
+	const struct net_protocol *ipprot;
 	u32 info = 0;
 	struct net *net;
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 533afaadefd4..d9645c94a067 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1288,7 +1288,7 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
 }
 
 
-static struct net_protocol ipgre_protocol = {
+static const struct net_protocol ipgre_protocol = {
 	.handler	=	ipgre_rcv,
 	.err_handler	=	ipgre_err,
 	.netns_ok	=	1,
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index db46b4b5b2b9..6c98b43badf4 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -202,7 +202,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
 	{
 		int protocol = ip_hdr(skb)->protocol;
 		int hash, raw;
-		struct net_protocol *ipprot;
+		const struct net_protocol *ipprot;
 
 	resubmit:
 		raw = raw_local_deliver(skb, protocol);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 3262ce06294c..38fbf04150ae 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -146,7 +146,7 @@ static const struct xfrm_type ipcomp_type = {
 	.output		= ipcomp_output
 };
 
-static struct net_protocol ipcomp4_protocol = {
+static const struct net_protocol ipcomp4_protocol = {
 	.handler	=	xfrm4_rcv,
 	.err_handler	=	ipcomp4_err,
 	.no_policy	=	1,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 65d421cf5bc7..c43ec2d51ce2 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -99,10 +99,6 @@ static int ipmr_cache_report(struct net *net,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 
-#ifdef CONFIG_IP_PIMSM_V2
-static struct net_protocol pim_protocol;
-#endif
-
 static struct timer_list ipmr_expire_timer;
 
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
@@ -1945,7 +1941,7 @@ static const struct file_operations ipmr_mfc_fops = {
 #endif
 
 #ifdef CONFIG_IP_PIMSM_V2
-static struct net_protocol pim_protocol = {
+static const struct net_protocol pim_protocol = {
 	.handler	=	pim_rcv,
 	.netns_ok	=	1,
 };
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index a2e5fc0a15e1..542f22fc98b3 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,14 +28,14 @@
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 
-struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
+const struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
 static DEFINE_SPINLOCK(inet_proto_lock);
 
 /*
  *	Add a protocol handler to the hash tables
  */
 
-int inet_add_protocol(struct net_protocol *prot, unsigned char protocol)
+int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
 	int hash, ret;
 
@@ -57,7 +57,7 @@ int inet_add_protocol(struct net_protocol *prot, unsigned char protocol)
  *	Remove a protocol from the hash tables.
  */
 
-int inet_del_protocol(struct net_protocol *prot, unsigned char protocol)
+int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
 	int hash, ret;
 
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index cb1f0e83830b..3959e0ca456a 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -132,7 +132,7 @@ static void tunnel64_err(struct sk_buff *skb, u32 info)
 }
 #endif
 
-static struct net_protocol tunnel4_protocol = {
+static const struct net_protocol tunnel4_protocol = {
 	.handler	=	tunnel4_rcv,
 	.err_handler	=	tunnel4_err,
 	.no_policy	=	1,
@@ -140,7 +140,7 @@ static struct net_protocol tunnel4_protocol = {
 };
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static struct net_protocol tunnel64_protocol = {
+static const struct net_protocol tunnel64_protocol = {
 	.handler	=	tunnel64_rcv,
 	.err_handler	=	tunnel64_err,
 	.no_policy	=	1,
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index c784891cb7e5..95248d7f75ec 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -25,7 +25,7 @@ static void udplite_err(struct sk_buff *skb, u32 info)
 	__udp4_lib_err(skb, info, &udplite_table);
 }
 
-static	struct net_protocol udplite_protocol = {
+static const struct net_protocol udplite_protocol = {
 	.handler	= udplite_rcv,
 	.err_handler	= udplite_err,
 	.no_policy	= 1,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 60093be8385d..c557f1fb1c66 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -924,7 +924,7 @@ static struct inet_protosw sctp_stream_protosw = {
 };
 
 /* Register with IP layer.  */
-static struct net_protocol sctp_protocol = {
+static const struct net_protocol sctp_protocol = {
 	.handler     = sctp_rcv,
 	.err_handler = sctp_v4_err,
 	.no_policy   = 1,
-- 
cgit v1.2.3


From 41135cc836a1abeb12ca1416bdb29e87ad021153 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 14 Sep 2009 12:22:28 +0000
Subject: net: constify struct inet6_protocol

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/protocol.h |  6 +++---
 net/dccp/ipv6.c        |  2 +-
 net/ipv6/af_inet6.c    | 10 +++++-----
 net/ipv6/ah6.c         |  2 +-
 net/ipv6/esp6.c        |  2 +-
 net/ipv6/exthdrs.c     |  6 +++---
 net/ipv6/icmp.c        |  4 ++--
 net/ipv6/ip6_input.c   |  2 +-
 net/ipv6/ip6mr.c       |  6 +-----
 net/ipv6/ipcomp6.c     |  2 +-
 net/ipv6/protocol.c    |  6 +++---
 net/ipv6/reassembly.c  |  2 +-
 net/ipv6/tcp_ipv6.c    |  2 +-
 net/ipv6/tunnel6.c     |  4 ++--
 net/ipv6/udp.c         |  2 +-
 net/ipv6/udplite.c     |  2 +-
 net/sctp/ipv6.c        |  2 +-
 17 files changed, 29 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/net/protocol.h b/include/net/protocol.h
index cea2aee92ac5..60249e51b669 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -97,7 +97,7 @@ struct inet_protosw {
 extern const struct net_protocol *inet_protos[MAX_INET_PROTOS];
 
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-extern struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
+extern const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
 #endif
 
 extern int	inet_add_protocol(const struct net_protocol *prot, unsigned char num);
@@ -106,8 +106,8 @@ extern void	inet_register_protosw(struct inet_protosw *p);
 extern void	inet_unregister_protosw(struct inet_protosw *p);
 
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-extern int	inet6_add_protocol(struct inet6_protocol *prot, unsigned char num);
-extern int	inet6_del_protocol(struct inet6_protocol *prot, unsigned char num);
+extern int	inet6_add_protocol(const struct inet6_protocol *prot, unsigned char num);
+extern int	inet6_del_protocol(const struct inet6_protocol *prot, unsigned char num);
 extern int	inet6_register_protosw(struct inet_protosw *p);
 extern void	inet6_unregister_protosw(struct inet_protosw *p);
 #endif
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 64f011cc4491..364bfc76c29e 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1152,7 +1152,7 @@ static struct proto dccp_v6_prot = {
 #endif
 };
 
-static struct inet6_protocol dccp_v6_protocol = {
+static const struct inet6_protocol dccp_v6_protocol = {
 	.handler	= dccp_v6_rcv,
 	.err_handler	= dccp_v6_err,
 	.flags		= INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a123a328aeb3..e127a32f9540 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -710,7 +710,7 @@ EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
 
 static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
 {
-	struct inet6_protocol *ops = NULL;
+	const struct inet6_protocol *ops = NULL;
 
 	for (;;) {
 		struct ipv6_opt_hdr *opth;
@@ -745,7 +745,7 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
 static int ipv6_gso_send_check(struct sk_buff *skb)
 {
 	struct ipv6hdr *ipv6h;
-	struct inet6_protocol *ops;
+	const struct inet6_protocol *ops;
 	int err = -EINVAL;
 
 	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
@@ -773,7 +773,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct ipv6hdr *ipv6h;
-	struct inet6_protocol *ops;
+	const struct inet6_protocol *ops;
 	int proto;
 	struct frag_hdr *fptr;
 	unsigned int unfrag_ip6hlen;
@@ -840,7 +840,7 @@ struct ipv6_gro_cb {
 static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 					 struct sk_buff *skb)
 {
-	struct inet6_protocol *ops;
+	const struct inet6_protocol *ops;
 	struct sk_buff **pp = NULL;
 	struct sk_buff *p;
 	struct ipv6hdr *iph;
@@ -926,7 +926,7 @@ out:
 
 static int ipv6_gro_complete(struct sk_buff *skb)
 {
-	struct inet6_protocol *ops;
+	const struct inet6_protocol *ops;
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 	int err = -ENOSYS;
 
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 86f42a288c4b..c1589e2f1dc9 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -527,7 +527,7 @@ static const struct xfrm_type ah6_type =
 	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
-static struct inet6_protocol ah6_protocol = {
+static const struct inet6_protocol ah6_protocol = {
 	.handler	=	xfrm6_rcv,
 	.err_handler	=	ah6_err,
 	.flags		=	INET6_PROTO_NOPOLICY,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 678bb95b1525..af597c73ebe9 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -558,7 +558,7 @@ static const struct xfrm_type esp6_type =
 	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
-static struct inet6_protocol esp6_protocol = {
+static const struct inet6_protocol esp6_protocol = {
 	.handler 	=	xfrm6_rcv,
 	.err_handler	=	esp6_err,
 	.flags		=	INET6_PROTO_NOPOLICY,
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 4aae658e5501..df159fffe4bc 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -500,17 +500,17 @@ unknown_rh:
 	return -1;
 }
 
-static struct inet6_protocol rthdr_protocol = {
+static const struct inet6_protocol rthdr_protocol = {
 	.handler	=	ipv6_rthdr_rcv,
 	.flags		=	INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
 };
 
-static struct inet6_protocol destopt_protocol = {
+static const struct inet6_protocol destopt_protocol = {
 	.handler	=	ipv6_destopt_rcv,
 	.flags		=	INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
 };
 
-static struct inet6_protocol nodata_protocol = {
+static const struct inet6_protocol nodata_protocol = {
 	.handler	=	dst_discard,
 	.flags		=	INET6_PROTO_NOPOLICY,
 };
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e2325f6a05fb..f23ebbec0631 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -86,7 +86,7 @@ static inline struct sock *icmpv6_sk(struct net *net)
 
 static int icmpv6_rcv(struct sk_buff *skb);
 
-static struct inet6_protocol icmpv6_protocol = {
+static const struct inet6_protocol icmpv6_protocol = {
 	.handler	=	icmpv6_rcv,
 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
@@ -583,7 +583,7 @@ out:
 
 static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 {
-	struct inet6_protocol *ipprot;
+	const struct inet6_protocol *ipprot;
 	int inner_offset;
 	int hash;
 	u8 nexthdr;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 2d9cbaa67edb..237e2dba6e94 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -159,7 +159,7 @@ drop:
 
 static int ip6_input_finish(struct sk_buff *skb)
 {
-	struct inet6_protocol *ipprot;
+	const struct inet6_protocol *ipprot;
 	unsigned int nhoff;
 	int nexthdr, raw;
 	u8 hash;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 5c8d73730c75..3907510c2ce3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -83,10 +83,6 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
 static void mroute_clean_tables(struct net *net);
 
-#ifdef CONFIG_IPV6_PIMSM_V2
-static struct inet6_protocol pim6_protocol;
-#endif
-
 static struct timer_list ipmr_expire_timer;
 
 
@@ -410,7 +406,7 @@ static int pim6_rcv(struct sk_buff *skb)
 	return 0;
 }
 
-static struct inet6_protocol pim6_protocol = {
+static const struct inet6_protocol pim6_protocol = {
 	.handler	=	pim6_rcv,
 };
 
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 79c172f1ff01..2f2a5ca2c878 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -178,7 +178,7 @@ static const struct xfrm_type ipcomp6_type =
 	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
-static struct inet6_protocol ipcomp6_protocol =
+static const struct inet6_protocol ipcomp6_protocol =
 {
 	.handler	= xfrm6_rcv,
 	.err_handler	= ipcomp6_err,
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 568864f722ca..1fa3468f0f32 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,11 +25,11 @@
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 
-struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
+const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
 static DEFINE_SPINLOCK(inet6_proto_lock);
 
 
-int inet6_add_protocol(struct inet6_protocol *prot, unsigned char protocol)
+int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
 {
 	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
 
@@ -53,7 +53,7 @@ EXPORT_SYMBOL(inet6_add_protocol);
  *	Remove a protocol from the hash tables.
  */
 
-int inet6_del_protocol(struct inet6_protocol *prot, unsigned char protocol)
+int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol)
 {
 	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 2642a41a8535..da5bd0ed83df 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -627,7 +627,7 @@ fail_hdr:
 	return -1;
 }
 
-static struct inet6_protocol frag_protocol =
+static const struct inet6_protocol frag_protocol =
 {
 	.handler	=	ipv6_frag_rcv,
 	.flags		=	INET6_PROTO_NOPOLICY,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3aae0f217d61..7718a9261efb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2093,7 +2093,7 @@ struct proto tcpv6_prot = {
 #endif
 };
 
-static struct inet6_protocol tcpv6_protocol = {
+static const struct inet6_protocol tcpv6_protocol = {
 	.handler	=	tcp_v6_rcv,
 	.err_handler	=	tcp_v6_err,
 	.gso_send_check	=	tcp_v6_gso_send_check,
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 633ad789effc..51e2832d13a6 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -133,13 +133,13 @@ static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			break;
 }
 
-static struct inet6_protocol tunnel6_protocol = {
+static const struct inet6_protocol tunnel6_protocol = {
 	.handler	= tunnel6_rcv,
 	.err_handler	= tunnel6_err,
 	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
-static struct inet6_protocol tunnel46_protocol = {
+static const struct inet6_protocol tunnel46_protocol = {
 	.handler	= tunnel46_rcv,
 	.err_handler	= tunnel6_err,
 	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 164040613c2e..b265b7047d3e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1172,7 +1172,7 @@ out:
 	return segs;
 }
 
-static struct inet6_protocol udpv6_protocol = {
+static const struct inet6_protocol udpv6_protocol = {
 	.handler	=	udpv6_rcv,
 	.err_handler	=	udpv6_err,
 	.gso_send_check =	udp6_ufo_send_check,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 4818c48688f2..d737a27ee010 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -25,7 +25,7 @@ static void udplitev6_err(struct sk_buff *skb,
 	__udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table);
 }
 
-static struct inet6_protocol udplitev6_protocol = {
+static const struct inet6_protocol udplitev6_protocol = {
 	.handler	=	udplitev6_rcv,
 	.err_handler	=	udplitev6_err,
 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 6a4b19094143..bb280e60e00a 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -949,7 +949,7 @@ static int sctp6_rcv(struct sk_buff *skb)
 	return sctp_rcv(skb) ? -1 : 0;
 }
 
-static struct inet6_protocol sctpv6_protocol = {
+static const struct inet6_protocol sctpv6_protocol = {
 	.handler      = sctp6_rcv,
 	.err_handler  = sctp_v6_err,
 	.flags        = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
-- 
cgit v1.2.3


From 6a29172ba90e49c046245610caff9848307bfd6a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 14 Sep 2009 16:35:15 -0600
Subject: PCI hotplug: clean up acpi_get_hp_params_from_firmware() interface

This patch makes acpi_get_hp_params_from_firmware() take a
pci_dev rather than a pci_bus and makes it return a standard
int errno rather than acpi_status.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Acked-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/acpi_pcihp.c   | 14 +++++++-------
 drivers/pci/hotplug/acpiphp_glue.c |  7 +++----
 drivers/pci/hotplug/pciehp.h       |  4 +---
 drivers/pci/hotplug/shpchp.h       |  4 +---
 include/linux/pci_hotplug.h        |  2 +-
 5 files changed, 13 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index ec3c039b7ebd..14d2d8a01f68 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -324,18 +324,18 @@ static acpi_status acpi_run_oshp(acpi_handle handle)
 
 /* acpi_get_hp_params_from_firmware
  *
- * @bus - the pci_bus of the bus on which the device is newly added
+ * @dev - the pci_dev for which we want parameters
  * @hpp - allocated by the caller
  */
-acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
+int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
 		struct hotplug_params *hpp)
 {
-	acpi_status status = AE_NOT_FOUND;
+	acpi_status status;
 	acpi_handle handle, phandle;
 	struct pci_bus *pbus;
 
 	handle = NULL;
-	for (pbus = bus; pbus; pbus = pbus->parent) {
+	for (pbus = dev->bus; pbus; pbus = pbus->parent) {
 		handle = acpi_pci_get_bridge_handle(pbus);
 		if (handle)
 			break;
@@ -350,10 +350,10 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
 	while (handle) {
 		status = acpi_run_hpx(handle, hpp);
 		if (ACPI_SUCCESS(status))
-			break;
+			return 0;
 		status = acpi_run_hpp(handle, hpp);
 		if (ACPI_SUCCESS(status))
-			break;
+			return 0;
 		if (acpi_is_root_bridge(handle))
 			break;
 		status = acpi_get_parent(handle, &phandle);
@@ -361,7 +361,7 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
 			break;
 		handle = phandle;
 	}
-	return status;
+	return -ENODEV;
 }
 EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware);
 
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index db81c08de8d5..14e6f1a17f28 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -275,11 +275,10 @@ static int detect_ejectable_slots(acpi_handle handle)
 /* decode ACPI 2.0 _HPP hot plug parameters */
 static void decode_hpp(struct pci_dev *dev, struct hotplug_params *hpp)
 {
-	acpi_status status;
+	int ret;
 
-	status = acpi_get_hp_params_from_firmware(dev->bus, hpp);
-	if (ACPI_FAILURE(status) ||
-	    !hpp->t0 || (hpp->t0->revision > 1)) {
+	ret = acpi_get_hp_params_from_firmware(dev, hpp);
+	if (ret || !hpp->t0 || (hpp->t0->revision > 1)) {
 		/* use default numbers */
 		printk(KERN_WARNING
 		       "%s: Could not get hotplug parameters. Use defaults\n",
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index e6cf096498be..86cdfd71ed27 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -241,9 +241,7 @@ static inline int pciehp_get_hp_hw_control_from_firmware(struct pci_dev *dev)
 static inline int pciehp_get_hp_params_from_firmware(struct pci_dev *dev,
 			struct hotplug_params *hpp)
 {
-	if (ACPI_FAILURE(acpi_get_hp_params_from_firmware(dev->bus, hpp)))
-		return -ENODEV;
-	return 0;
+	return acpi_get_hp_params_from_firmware(dev, hpp);
 }
 #else
 #define pciehp_firmware_init()				do {} while (0)
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index 974e924ca96d..ad6a255cf0a5 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -191,9 +191,7 @@ static inline const char *slot_name(struct slot *slot)
 static inline int get_hp_params_from_firmware(struct pci_dev *dev,
 					      struct hotplug_params *hpp)
 {
-	if (ACPI_FAILURE(acpi_get_hp_params_from_firmware(dev->bus, hpp)))
-			return -ENODEV;
-	return 0;
+	return acpi_get_hp_params_from_firmware(dev, hpp);
 }
 
 static inline int get_hp_hw_control_from_firmware(struct pci_dev *dev)
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index f0c31ae3f842..0a7c2401d945 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -227,7 +227,7 @@ struct hotplug_params {
 #ifdef CONFIG_ACPI
 #include <acpi/acpi.h>
 #include <acpi/acpi_bus.h>
-extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
+int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
 				struct hotplug_params *hpp);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
-- 
cgit v1.2.3


From 8838400db5193c37588813c2eb1249b821781950 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 14 Sep 2009 16:35:20 -0600
Subject: PCI hotplug: add pci_configure_slot()

This patch adds a new pci_configure_slot() function that programs the
PCI bus characteristics for a newly-added device.  This is based on
code in pciehp_pci.c, but should be generic enough to be used by pciehp,
shpchp, and acpiphp.

The hotplug_params struct and the program_hpp_typeX() functions are based
on the ACPI definitions, but they aren't really ACPI-specific, and there's
no alternate implementation, so I don't see the need to abstract them yet.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Acked-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/Makefile     |   2 +-
 drivers/pci/hotplug/pcihp_slot.c | 187 +++++++++++++++++++++++++++++++++++++++
 include/linux/pci_hotplug.h      |   9 ++
 3 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100644 drivers/pci/hotplug/pcihp_slot.c

(limited to 'include')

diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
index 2aa117c8cd87..3625b094bf7e 100644
--- a/drivers/pci/hotplug/Makefile
+++ b/drivers/pci/hotplug/Makefile
@@ -22,7 +22,7 @@ obj-$(CONFIG_HOTPLUG_PCI_SGI)		+= sgi_hotplug.o
 # Link this last so it doesn't claim devices that have a real hotplug driver
 obj-$(CONFIG_HOTPLUG_PCI_FAKE)		+= fakephp.o
 
-pci_hotplug-objs	:=	pci_hotplug_core.o
+pci_hotplug-objs	:=	pci_hotplug_core.o pcihp_slot.o
 
 ifdef CONFIG_HOTPLUG_PCI_CPCI
 pci_hotplug-objs	+=	cpci_hotplug_core.o	\
diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c
new file mode 100644
index 000000000000..cc8ec3aa41a7
--- /dev/null
+++ b/drivers/pci/hotplug/pcihp_slot.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ * Copyright (C) 2003-2004 Intel Corporation
+ * (c) Copyright 2009 Hewlett-Packard Development Company, L.P.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+
+static struct hpp_type0 pci_default_type0 = {
+	.revision = 1,
+	.cache_line_size = 8,
+	.latency_timer = 0x40,
+	.enable_serr = 0,
+	.enable_perr = 0,
+};
+
+static void program_hpp_type0(struct pci_dev *dev, struct hpp_type0 *hpp)
+{
+	u16 pci_cmd, pci_bctl;
+
+	if (!hpp) {
+		/*
+		 * Perhaps we *should* use default settings for PCIe, but
+		 * pciehp didn't, so we won't either.
+		 */
+		if (dev->is_pcie)
+			return;
+		dev_info(&dev->dev, "using default PCI settings\n");
+		hpp = &pci_default_type0;
+	}
+
+	if (hpp->revision > 1) {
+		dev_warn(&dev->dev,
+			 "PCI settings rev %d not supported; using defaults\n",
+			 hpp->revision);
+		hpp = &pci_default_type0;
+	}
+
+	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, hpp->cache_line_size);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, hpp->latency_timer);
+	pci_read_config_word(dev, PCI_COMMAND, &pci_cmd);
+	if (hpp->enable_serr)
+		pci_cmd |= PCI_COMMAND_SERR;
+	else
+		pci_cmd &= ~PCI_COMMAND_SERR;
+	if (hpp->enable_perr)
+		pci_cmd |= PCI_COMMAND_PARITY;
+	else
+		pci_cmd &= ~PCI_COMMAND_PARITY;
+	pci_write_config_word(dev, PCI_COMMAND, pci_cmd);
+
+	/* Program bridge control value */
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER,
+				      hpp->latency_timer);
+		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &pci_bctl);
+		if (hpp->enable_serr)
+			pci_bctl |= PCI_BRIDGE_CTL_SERR;
+		else
+			pci_bctl &= ~PCI_BRIDGE_CTL_SERR;
+		if (hpp->enable_perr)
+			pci_bctl |= PCI_BRIDGE_CTL_PARITY;
+		else
+			pci_bctl &= ~PCI_BRIDGE_CTL_PARITY;
+		pci_write_config_word(dev, PCI_BRIDGE_CONTROL, pci_bctl);
+	}
+}
+
+static void program_hpp_type1(struct pci_dev *dev, struct hpp_type1 *hpp)
+{
+	if (hpp)
+		dev_warn(&dev->dev, "PCI-X settings not supported\n");
+}
+
+static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
+{
+	int pos;
+	u16 reg16;
+	u32 reg32;
+
+	if (!hpp)
+		return;
+
+	/* Find PCI Express capability */
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return;
+
+	if (hpp->revision > 1) {
+		dev_warn(&dev->dev, "PCIe settings rev %d not supported\n",
+			 hpp->revision);
+		return;
+	}
+
+	/* Initialize Device Control Register */
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &reg16);
+	reg16 = (reg16 & hpp->pci_exp_devctl_and) | hpp->pci_exp_devctl_or;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, reg16);
+
+	/* Initialize Link Control Register */
+	if (dev->subordinate) {
+		pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &reg16);
+		reg16 = (reg16 & hpp->pci_exp_lnkctl_and)
+			| hpp->pci_exp_lnkctl_or;
+		pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, reg16);
+	}
+
+	/* Find Advanced Error Reporting Enhanced Capability */
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		return;
+
+	/* Initialize Uncorrectable Error Mask Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &reg32);
+	reg32 = (reg32 & hpp->unc_err_mask_and) | hpp->unc_err_mask_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, reg32);
+
+	/* Initialize Uncorrectable Error Severity Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &reg32);
+	reg32 = (reg32 & hpp->unc_err_sever_and) | hpp->unc_err_sever_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, reg32);
+
+	/* Initialize Correctable Error Mask Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &reg32);
+	reg32 = (reg32 & hpp->cor_err_mask_and) | hpp->cor_err_mask_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg32);
+
+	/* Initialize Advanced Error Capabilities and Control Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
+	reg32 = (reg32 & hpp->adv_err_cap_and) | hpp->adv_err_cap_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
+
+	/*
+	 * FIXME: The following two registers are not supported yet.
+	 *
+	 *   o Secondary Uncorrectable Error Severity Register
+	 *   o Secondary Uncorrectable Error Mask Register
+	 */
+}
+
+void pci_configure_slot(struct pci_dev *dev)
+{
+	struct pci_dev *cdev;
+	struct hotplug_params hpp;
+	int ret;
+
+	if (!(dev->hdr_type == PCI_HEADER_TYPE_NORMAL ||
+			(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
+			(dev->class >> 8) == PCI_CLASS_BRIDGE_PCI)))
+		return;
+
+	memset(&hpp, 0, sizeof(hpp));
+	ret = pci_get_hp_params(dev, &hpp);
+	if (ret)
+		dev_warn(&dev->dev, "no hotplug settings from platform\n");
+
+	program_hpp_type2(dev, hpp.t2);
+	program_hpp_type1(dev, hpp.t1);
+	program_hpp_type0(dev, hpp.t0);
+
+	if (dev->subordinate) {
+		list_for_each_entry(cdev, &dev->subordinate->devices,
+				    bus_list)
+			pci_configure_slot(cdev);
+	}
+}
+EXPORT_SYMBOL_GPL(pci_configure_slot);
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 0a7c2401d945..1cdef8317377 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -232,6 +232,15 @@ int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
 int acpi_pci_detect_ejectable(acpi_handle handle);
+#define pci_get_hp_params(dev, hpp)  acpi_get_hp_params_from_firmware(dev, hpp)
+#else
+static inline int pci_get_hp_params(struct pci_dev *dev,
+				    struct hotplug_params *hpp)
+{
+	return -ENODEV;
+}
 #endif
+
+void pci_configure_slot(struct pci_dev *dev);
 #endif
 
-- 
cgit v1.2.3


From e81995bb1c0077a312cb621abc406a36f65a986a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 14 Sep 2009 16:35:35 -0600
Subject: PCI hotplug: acpiphp: use generic pci_configure_slot()

Use the generic pci_configure_slot() rather than the acpiphp-specific
decode_hpp() and program_hpp().

Unlike the previous acpiphp-specific code, pci_configure_slot() programs
PCIe settings when an _HPX method provides them, so acpiphp-managed PCIe
devices can now be configured.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Acked-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/acpi_pcihp.c   |  9 ++---
 drivers/pci/hotplug/acpiphp_glue.c | 81 +-------------------------------------
 include/linux/pci_hotplug.h        |  4 +-
 3 files changed, 7 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 14d2d8a01f68..ee24de1c5fae 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -322,13 +322,12 @@ static acpi_status acpi_run_oshp(acpi_handle handle)
 	return status;
 }
 
-/* acpi_get_hp_params_from_firmware
+/* pci_get_hp_params
  *
  * @dev - the pci_dev for which we want parameters
  * @hpp - allocated by the caller
  */
-int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
-		struct hotplug_params *hpp)
+int pci_get_hp_params(struct pci_dev *dev, struct hotplug_params *hpp)
 {
 	acpi_status status;
 	acpi_handle handle, phandle;
@@ -345,7 +344,7 @@ int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
 	 * _HPP settings apply to all child buses, until another _HPP is
 	 * encountered. If we don't find an _HPP for the input pci dev,
 	 * look for it in the parent device scope since that would apply to
-	 * this pci dev. If we don't find any _HPP, use hardcoded defaults
+	 * this pci dev.
 	 */
 	while (handle) {
 		status = acpi_run_hpx(handle, hpp);
@@ -363,7 +362,7 @@ int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
 	}
 	return -ENODEV;
 }
-EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware);
+EXPORT_SYMBOL_GPL(pci_get_hp_params);
 
 /**
  * acpi_get_hp_hw_control_from_firmware
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 14e6f1a17f28..58d25a163a8b 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -271,29 +271,6 @@ static int detect_ejectable_slots(acpi_handle handle)
 	return found;
 }
 
-
-/* decode ACPI 2.0 _HPP hot plug parameters */
-static void decode_hpp(struct pci_dev *dev, struct hotplug_params *hpp)
-{
-	int ret;
-
-	ret = acpi_get_hp_params_from_firmware(dev, hpp);
-	if (ret || !hpp->t0 || (hpp->t0->revision > 1)) {
-		/* use default numbers */
-		printk(KERN_WARNING
-		       "%s: Could not get hotplug parameters. Use defaults\n",
-		       __func__);
-		hpp->t0 = &hpp->type0_data;
-		hpp->t0->revision = 0;
-		hpp->t0->cache_line_size = 0x10;
-		hpp->t0->latency_timer = 0x40;
-		hpp->t0->enable_serr = 0;
-		hpp->t0->enable_perr = 0;
-	}
-}
-
-
-
 /* initialize miscellaneous stuff for both root and PCI-to-PCI bridge */
 static void init_bridge_misc(struct acpiphp_bridge *bridge)
 {
@@ -1247,66 +1224,12 @@ static int acpiphp_check_bridge(struct acpiphp_bridge *bridge)
 	return retval;
 }
 
-static void program_hpp(struct pci_dev *dev, struct hotplug_params *hpp)
-{
-	u16 pci_cmd, pci_bctl;
-	struct pci_dev *cdev;
-
-	/* Program hpp values for this device */
-	if (!(dev->hdr_type == PCI_HEADER_TYPE_NORMAL ||
-			(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
-			(dev->class >> 8) == PCI_CLASS_BRIDGE_PCI)))
-		return;
-
-	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
-		return;
-
-	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
-			hpp->t0->cache_line_size);
-	pci_write_config_byte(dev, PCI_LATENCY_TIMER,
-			hpp->t0->latency_timer);
-	pci_read_config_word(dev, PCI_COMMAND, &pci_cmd);
-	if (hpp->t0->enable_serr)
-		pci_cmd |= PCI_COMMAND_SERR;
-	else
-		pci_cmd &= ~PCI_COMMAND_SERR;
-	if (hpp->t0->enable_perr)
-		pci_cmd |= PCI_COMMAND_PARITY;
-	else
-		pci_cmd &= ~PCI_COMMAND_PARITY;
-	pci_write_config_word(dev, PCI_COMMAND, pci_cmd);
-
-	/* Program bridge control value and child devices */
-	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
-		pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER,
-				hpp->t0->latency_timer);
-		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &pci_bctl);
-		if (hpp->t0->enable_serr)
-			pci_bctl |= PCI_BRIDGE_CTL_SERR;
-		else
-			pci_bctl &= ~PCI_BRIDGE_CTL_SERR;
-		if (hpp->t0->enable_perr)
-			pci_bctl |= PCI_BRIDGE_CTL_PARITY;
-		else
-			pci_bctl &= ~PCI_BRIDGE_CTL_PARITY;
-		pci_write_config_word(dev, PCI_BRIDGE_CONTROL, pci_bctl);
-		if (dev->subordinate) {
-			list_for_each_entry(cdev, &dev->subordinate->devices,
-					bus_list)
-				program_hpp(cdev, hpp);
-		}
-	}
-}
-
 static void acpiphp_set_hpp_values(struct pci_bus *bus)
 {
 	struct pci_dev *dev;
-	struct hotplug_params hpp;
 
-	list_for_each_entry(dev, &bus->devices, bus_list) {
-		decode_hpp(dev, &hpp);
-		program_hpp(dev, &hpp);
-	}
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		pci_configure_slot(dev);
 }
 
 /*
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 1cdef8317377..652ba797696d 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -227,12 +227,10 @@ struct hotplug_params {
 #ifdef CONFIG_ACPI
 #include <acpi/acpi.h>
 #include <acpi/acpi_bus.h>
-int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
-				struct hotplug_params *hpp);
+int pci_get_hp_params(struct pci_dev *dev, struct hotplug_params *hpp);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
 int acpi_pci_detect_ejectable(acpi_handle handle);
-#define pci_get_hp_params(dev, hpp)  acpi_get_hp_params_from_firmware(dev, hpp)
 #else
 static inline int pci_get_hp_params(struct pci_dev *dev,
 				    struct hotplug_params *hpp)
-- 
cgit v1.2.3


From 3661d28615ea580c1db02a972fd4d3898df1cb01 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 14 Sep 2009 22:59:50 -0400
Subject: ext4: Fix include/trace/events/ext4.h to work with Systemtap

Using relative pathnames in #include statements interacts badly with
SystemTap, since the fs/ext4/*.h header files are not packaged up as
part of a distribution kernel's header files.  Since systemtap doesn't
use TP_fast_assign(), we can use a blind structure definition and then
make sure the needed header files are defined before the ext4 source
files #include the trace/events/ext4.h header file.

https://bugzilla.redhat.com/show_bug.cgi?id=512478

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c             | 1 +
 include/trace/events/ext4.h | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 04c693357336..af95dd8ba54b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -45,6 +45,7 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
+#include "mballoc.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/ext4.h>
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 68b53c7ef8a7..6fe6ce9ee071 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -5,10 +5,12 @@
 #define _TRACE_EXT4_H
 
 #include <linux/writeback.h>
-#include "../../../fs/ext4/ext4.h"
-#include "../../../fs/ext4/mballoc.h"
 #include <linux/tracepoint.h>
 
+struct ext4_allocation_context;
+struct ext4_allocation_request;
+struct ext4_prealloc_space;
+
 TRACE_EVENT(ext4_free_inode,
 	TP_PROTO(struct inode *inode),
 
-- 
cgit v1.2.3


From 12e09337fe238981cb0c87543306e23775d1a143 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 14 Sep 2009 23:37:40 +0200
Subject: time: Prevent 32 bit overflow with set_normalized_timespec()

set_normalized_timespec() nsec argument is of type long. The recent
timekeeping changes of ktime_get_ts() feed

	ts->tv_nsec + tomono.tv_nsec + nsecs

to set_normalized_timespec(). On 32 bit machines that sum can be
larger than (1 << 31) and therefor result in a negative value which
screws up the result completely.

Make the nsec argument of set_normalized_timespec() s64 to fix the
problem at hand. This also prevents similar problems for future users
of set_normalized_timespec().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Carsten Emde <carsten.emde@osadl.org>
LKML-Reference: <new-submission>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: John Stultz <johnstul@us.ibm.com>
---
 include/linux/time.h | 2 +-
 kernel/time.c        | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/time.h b/include/linux/time.h
index 256232f7e5e6..56787c093345 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -75,7 +75,7 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon,
 			    const unsigned int day, const unsigned int hour,
 			    const unsigned int min, const unsigned int sec);
 
-extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
+extern void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec);
 extern struct timespec timespec_add_safe(const struct timespec lhs,
 					 const struct timespec rhs);
 
diff --git a/kernel/time.c b/kernel/time.c
index 29511943871a..2e2e469a7fec 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -370,13 +370,20 @@ EXPORT_SYMBOL(mktime);
  *	0 <= tv_nsec < NSEC_PER_SEC
  * For negative values only the tv_sec field is negative !
  */
-void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
+void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
 {
 	while (nsec >= NSEC_PER_SEC) {
+		/*
+		 * The following asm() prevents the compiler from
+		 * optimising this loop into a modulo operation. See
+		 * also __iter_div_u64_rem() in include/linux/time.h
+		 */
+		asm("" : "+rm"(nsec));
 		nsec -= NSEC_PER_SEC;
 		++sec;
 	}
 	while (nsec < 0) {
+		asm("" : "+rm"(nsec));
 		nsec += NSEC_PER_SEC;
 		--sec;
 	}
-- 
cgit v1.2.3


From 0b6a05c1dbebe8c616e2e5b0f52b7a01fd792911 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Tue, 15 Sep 2009 01:30:10 -0700
Subject: tcp: fix ssthresh u16 leftover
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It was once upon time so that snd_sthresh was a 16-bit quantity.
...That has not been true for long period of time. I run across
some ancient compares which still seem to trust such legacy.
Put all that magic into a single place, I hopefully found all
of them.

Compile tested, though linking of allyesconfig is ridiculous
nowadays it seems.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        | 7 +++++++
 net/ipv4/tcp.c           | 2 +-
 net/ipv4/tcp_input.c     | 2 +-
 net/ipv4/tcp_ipv4.c      | 4 ++--
 net/ipv4/tcp_minisocks.c | 2 +-
 net/ipv6/tcp_ipv6.c      | 5 +++--
 6 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b71a446d58f6..56b76027b85e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -793,6 +793,13 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
 	return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
 }
 
+#define TCP_INFINITE_SSTHRESH	0x7fffffff
+
+static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
+{
+	return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
  * The exception is rate halving phase, when cwnd is decreasing towards
  * ssthresh.
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index edeea060db44..19a0612b8a20 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2012,7 +2012,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->snd_cwnd = 2;
 	icsk->icsk_probes_out = 0;
 	tp->packets_out = 0;
-	tp->snd_ssthresh = 0x7fffffff;
+	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_cnt = 0;
 	tp->bytes_acked = 0;
 	tcp_set_ca_state(sk, TCP_CA_Open);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index af6d6fa00db1..d86784be7ab3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -761,7 +761,7 @@ void tcp_update_metrics(struct sock *sk)
 			set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
 		}
 
-		if (tp->snd_ssthresh >= 0xFFFF) {
+		if (tcp_in_initial_slowstart(tp)) {
 			/* Slow start still did not finish. */
 			if (dst_metric(dst, RTAX_SSTHRESH) &&
 			    !dst_metric_locked(dst, RTAX_SSTHRESH) &&
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0543561da999..7cda24b53f61 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1808,7 +1808,7 @@ static int tcp_v4_init_sock(struct sock *sk)
 	/* See draft-stevens-tcpca-spec-01 for discussion of the
 	 * initialization of these values.
 	 */
-	tp->snd_ssthresh = 0x7fffffff;	/* Infinity */
+	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_clamp = ~0;
 	tp->mss_cache = 536;
 
@@ -2284,7 +2284,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 		jiffies_to_clock_t(icsk->icsk_ack.ato),
 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
 		tp->snd_cwnd,
-		tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh,
+		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
 		len);
 }
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e48c37d74d77..045bcfd3f288 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -410,7 +410,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->retrans_out = 0;
 		newtp->sacked_out = 0;
 		newtp->fackets_out = 0;
-		newtp->snd_ssthresh = 0x7fffffff;
+		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 
 		/* So many TCP implementations out there (incorrectly) count the
 		 * initial SYN frame in their delayed-ACK and congestion control
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7718a9261efb..21d100b68b19 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1846,7 +1846,7 @@ static int tcp_v6_init_sock(struct sock *sk)
 	/* See draft-stevens-tcpca-spec-01 for discussion of the
 	 * initialization of these values.
 	 */
-	tp->snd_ssthresh = 0x7fffffff;
+	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_clamp = ~0;
 	tp->mss_cache = 536;
 
@@ -1969,7 +1969,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 		   jiffies_to_clock_t(icsk->icsk_rto),
 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
 		   (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
-		   tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
+		   tp->snd_cwnd,
+		   tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
 		   );
 }
 
-- 
cgit v1.2.3


From 75c78500ddad74b229cd0691496b8549490496a2 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@voltaire.com>
Date: Tue, 15 Sep 2009 02:37:40 -0700
Subject: bonding: remap muticast addresses without using dev_close() and
 dev_open()

This patch fixes commit e36b9d16c6a6d0f59803b3ef04ff3c22c3844c10. The approach
there is to call dev_close()/dev_open() whenever the device type is changed in
order to remap the device IP multicast addresses to HW multicast addresses.
This approach suffers from 2 drawbacks:

*. It assumes tha the device is UP when calling dev_close(), or otherwise
   dev_close() has no affect. It is worth to mention that initscripts (Redhat)
   and sysconfig (Suse) doesn't act the same in this matter.
*. dev_close() has other side affects, like deleting entries from the routing
   table, which might be unnecessary.

The fix here is to directly remap the IP multicast addresses to HW multicast
addresses for a bonding device that changes its type, and nothing else.

Reported-by:   Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Moni Shoua <monis@voltaire.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  9 ++++++---
 include/linux/igmp.h            |  2 ++
 include/linux/netdevice.h       |  3 ++-
 include/linux/notifier.h        |  2 ++
 include/net/addrconf.h          |  2 ++
 net/core/dev.c                  |  4 ++--
 net/ipv4/devinet.c              |  6 ++++++
 net/ipv4/igmp.c                 | 22 ++++++++++++++++++++++
 net/ipv6/addrconf.c             | 19 +++++++++++++++++++
 net/ipv6/mcast.c                | 19 +++++++++++++++++++
 10 files changed, 82 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a7e731f8a0da..6419cf9a4fa6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1211,7 +1211,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 			write_unlock_bh(&bond->curr_slave_lock);
 			read_unlock(&bond->lock);
 
-			netdev_bonding_change(bond->dev);
+			netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
 
 			read_lock(&bond->lock);
 			write_lock_bh(&bond->curr_slave_lock);
@@ -1469,14 +1469,17 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	 */
 	if (bond->slave_cnt == 0) {
 		if (bond_dev->type != slave_dev->type) {
-			dev_close(bond_dev);
 			pr_debug("%s: change device type from %d to %d\n",
 				bond_dev->name, bond_dev->type, slave_dev->type);
+
+			netdev_bonding_change(bond_dev, NETDEV_BONDING_OLDTYPE);
+
 			if (slave_dev->type != ARPHRD_ETHER)
 				bond_setup_by_slave(bond_dev, slave_dev);
 			else
 				ether_setup(bond_dev);
-			dev_open(bond_dev);
+
+			netdev_bonding_change(bond_dev, NETDEV_BONDING_NEWTYPE);
 		}
 	} else if (bond_dev->type != slave_dev->type) {
 		pr_err(DRV_NAME ": %s ether type (%d) is different "
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 92fbd8cbd68f..fe158e0e20e6 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -233,6 +233,8 @@ extern void ip_mc_init_dev(struct in_device *);
 extern void ip_mc_destroy_dev(struct in_device *);
 extern void ip_mc_up(struct in_device *);
 extern void ip_mc_down(struct in_device *);
+extern void ip_mc_unmap(struct in_device *);
+extern void ip_mc_remap(struct in_device *);
 extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr);
 extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr);
 extern void ip_mc_rejoin_group(struct ip_mc_list *im);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 65ee1929b2b1..f46db6c7a734 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1873,7 +1873,8 @@ extern void		__dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct
 extern int		dev_set_promiscuity(struct net_device *dev, int inc);
 extern int		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
-extern void		netdev_bonding_change(struct net_device *dev);
+extern void		netdev_bonding_change(struct net_device *dev,
+					      unsigned long event);
 extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(struct net *net, const char *name);
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 81bc252dc8ac..44428d247dbe 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -199,6 +199,8 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_FEAT_CHANGE	0x000B
 #define NETDEV_BONDING_FAILOVER 0x000C
 #define NETDEV_PRE_UP		0x000D
+#define NETDEV_BONDING_OLDTYPE  0x000E
+#define NETDEV_BONDING_NEWTYPE  0x000F
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 7b55ab215a64..0f7c37825fc1 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -143,6 +143,8 @@ extern int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr
 extern int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr);
 extern void ipv6_mc_up(struct inet6_dev *idev);
 extern void ipv6_mc_down(struct inet6_dev *idev);
+extern void ipv6_mc_unmap(struct inet6_dev *idev);
+extern void ipv6_mc_remap(struct inet6_dev *idev);
 extern void ipv6_mc_init_dev(struct inet6_dev *idev);
 extern void ipv6_mc_destroy_dev(struct inet6_dev *idev);
 extern void addrconf_dad_failure(struct inet6_ifaddr *ifp);
diff --git a/net/core/dev.c b/net/core/dev.c
index 84945470ab38..560c8c9c03ab 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1017,9 +1017,9 @@ void netdev_state_change(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_state_change);
 
-void netdev_bonding_change(struct net_device *dev)
+void netdev_bonding_change(struct net_device *dev, unsigned long event)
 {
-	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
+	call_netdevice_notifiers(event, dev);
 }
 EXPORT_SYMBOL(netdev_bonding_change);
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 3863c3a4223f..07336c6201f0 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1087,6 +1087,12 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 	case NETDEV_DOWN:
 		ip_mc_down(in_dev);
 		break;
+	case NETDEV_BONDING_OLDTYPE:
+		ip_mc_unmap(in_dev);
+		break;
+	case NETDEV_BONDING_NEWTYPE:
+		ip_mc_remap(in_dev);
+		break;
 	case NETDEV_CHANGEMTU:
 		if (inetdev_valid_mtu(dev->mtu))
 			break;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 01b4284ed694..d41e5de79a82 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1298,6 +1298,28 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
 	}
 }
 
+/* Device changing type */
+
+void ip_mc_unmap(struct in_device *in_dev)
+{
+	struct ip_mc_list *i;
+
+	ASSERT_RTNL();
+
+	for (i = in_dev->mc_list; i; i = i->next)
+		igmp_group_dropped(i);
+}
+
+void ip_mc_remap(struct in_device *in_dev)
+{
+	struct ip_mc_list *i;
+
+	ASSERT_RTNL();
+
+	for (i = in_dev->mc_list; i; i = i->next)
+		igmp_group_added(i);
+}
+
 /* Device going down */
 
 void ip_mc_down(struct in_device *in_dev)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c9b369034a40..f216a41ceb22 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -137,6 +137,8 @@ static DEFINE_SPINLOCK(addrconf_verify_lock);
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
 
+static void addrconf_bonding_change(struct net_device *dev,
+				    unsigned long event);
 static int addrconf_ifdown(struct net_device *dev, int how);
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
@@ -2582,6 +2584,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				return notifier_from_errno(err);
 		}
 		break;
+	case NETDEV_BONDING_OLDTYPE:
+	case NETDEV_BONDING_NEWTYPE:
+		addrconf_bonding_change(dev, event);
+		break;
 	}
 
 	return NOTIFY_OK;
@@ -2595,6 +2601,19 @@ static struct notifier_block ipv6_dev_notf = {
 	.priority = 0
 };
 
+static void addrconf_bonding_change(struct net_device *dev, unsigned long event)
+{
+	struct inet6_dev *idev;
+	ASSERT_RTNL();
+
+	idev = __in6_dev_get(dev);
+
+	if (event == NETDEV_BONDING_NEWTYPE)
+		ipv6_mc_remap(idev);
+	else if (event == NETDEV_BONDING_OLDTYPE)
+		ipv6_mc_unmap(idev);
+}
+
 static int addrconf_ifdown(struct net_device *dev, int how)
 {
 	struct inet6_dev *idev;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 71c3dacec1ed..f9fcf690bd5d 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2249,6 +2249,25 @@ static void igmp6_timer_handler(unsigned long data)
 	ma_put(ma);
 }
 
+/* Device changing type */
+
+void ipv6_mc_unmap(struct inet6_dev *idev)
+{
+	struct ifmcaddr6 *i;
+
+	/* Install multicast list, except for all-nodes (already installed) */
+
+	read_lock_bh(&idev->lock);
+	for (i = idev->mc_list; i; i = i->next)
+		igmp6_group_dropped(i);
+	read_unlock_bh(&idev->lock);
+}
+
+void ipv6_mc_remap(struct inet6_dev *idev)
+{
+	ipv6_mc_up(idev);
+}
+
 /* Device going down */
 
 void ipv6_mc_down(struct inet6_dev *idev)
-- 
cgit v1.2.3


From 29a020d35f629619c67fa5e32acaaac3f8a1ba90 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 15 Sep 2009 02:39:20 -0700
Subject: [PATCH] net: kmemcheck annotation in struct socket

struct socket has a 16 bit hole that triggers kmemcheck warnings.

As suggested by Ingo, use kmemcheck annotations

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 5 +++++
 net/socket.c        | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index 4fc2ffd527f9..9040a10584f7 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -57,6 +57,7 @@ typedef enum {
 #include <linux/random.h>
 #include <linux/wait.h>
 #include <linux/fcntl.h>	/* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/kmemcheck.h>
 
 struct poll_table_struct;
 struct pipe_inode_info;
@@ -127,7 +128,11 @@ enum sock_shutdown_cmd {
  */
 struct socket {
 	socket_state		state;
+
+	kmemcheck_bitfield_begin(type);
 	short			type;
+	kmemcheck_bitfield_end(type);
+
 	unsigned long		flags;
 	/*
 	 * Please keep fasync_list & wait fields in the same cache line
diff --git a/net/socket.c b/net/socket.c
index 6d4716559047..2a022c00d85c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -489,6 +489,7 @@ static struct socket *sock_alloc(void)
 
 	sock = SOCKET_I(inode);
 
+	kmemcheck_annotate_bitfield(sock, type);
 	inode->i_mode = S_IFSOCK | S_IRWXUGO;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
-- 
cgit v1.2.3


From 8b815477f382f96deefbe5bd4404fa7b31cf5dcf Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 14 Sep 2009 01:17:30 +0000
Subject: RxRPC: Declare the security index constants symbolically

Declare the security index constants symbolically rather than just referring
to them numerically.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rxrpc.h | 7 +++++++
 net/rxrpc/ar-key.c    | 4 ++--
 net/rxrpc/rxkad.c     | 6 +++---
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h
index f7b826b565c7..a53915cd5581 100644
--- a/include/linux/rxrpc.h
+++ b/include/linux/rxrpc.h
@@ -58,5 +58,12 @@ struct sockaddr_rxrpc {
 #define RXRPC_SECURITY_AUTH	1	/* authenticated packets */
 #define RXRPC_SECURITY_ENCRYPT	2	/* encrypted packets */
 
+/*
+ * RxRPC security indices
+ */
+#define RXRPC_SECURITY_NONE	0	/* no security protocol */
+#define RXRPC_SECURITY_RXKAD	2	/* kaserver or kerberos 4 */
+#define RXRPC_SECURITY_RXGK	4	/* gssapi-based */
+#define RXRPC_SECURITY_RXK5	5	/* kerberos 5 */
 
 #endif /* _LINUX_RXRPC_H */
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index ad8c7a782da1..b3d10e7ccd7e 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -122,7 +122,7 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 		       tsec->ticket[6], tsec->ticket[7]);
 
 	ret = -EPROTONOSUPPORT;
-	if (tsec->security_index != 2)
+	if (tsec->security_index != RXRPC_SECURITY_RXKAD)
 		goto error;
 
 	key->type_data.x[0] = tsec->security_index;
@@ -308,7 +308,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
 	_debug("key %d", key_serial(key));
 
 	data.kver = 1;
-	data.tsec.security_index = 2;
+	data.tsec.security_index = RXRPC_SECURITY_RXKAD;
 	data.tsec.ticket_len = 0;
 	data.tsec.expiry = expiry;
 	data.tsec.kvno = 0;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index ef8f91030a15..acec76292c01 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -42,7 +42,7 @@ struct rxkad_level2_hdr {
 	__be32	checksum;	/* decrypted data checksum */
 };
 
-MODULE_DESCRIPTION("RxRPC network protocol type-2 security (Kerberos)");
+MODULE_DESCRIPTION("RxRPC network protocol type-2 security (Kerberos 4)");
 MODULE_AUTHOR("Red Hat, Inc.");
 MODULE_LICENSE("GPL");
 
@@ -506,7 +506,7 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
 	if (!call->conn->cipher)
 		return 0;
 
-	if (sp->hdr.securityIndex != 2) {
+	if (sp->hdr.securityIndex != RXRPC_SECURITY_RXKAD) {
 		*_abort_code = RXKADINCONSISTENCY;
 		_leave(" = -EPROTO [not rxkad]");
 		return -EPROTO;
@@ -1122,7 +1122,7 @@ static void rxkad_clear(struct rxrpc_connection *conn)
 static struct rxrpc_security rxkad = {
 	.owner				= THIS_MODULE,
 	.name				= "rxkad",
-	.security_index			= RXKAD_VERSION,
+	.security_index			= RXRPC_SECURITY_RXKAD,
 	.init_connection_security	= rxkad_init_connection_security,
 	.prime_packet_security		= rxkad_prime_packet_security,
 	.secure_packet			= rxkad_secure_packet,
-- 
cgit v1.2.3


From 339412841d7620f93fea805fbd7469f08186f458 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 14 Sep 2009 01:17:35 +0000
Subject: RxRPC: Allow key payloads to be passed in XDR form

Allow add_key() and KEYCTL_INSTANTIATE to accept key payloads in XDR form as
described by openafs-1.4.10/src/auth/afs_token.xg.  This provides a way of
passing kaserver, Kerberos 4, Kerberos 5 and GSSAPI keys from userspace, and
allows for future expansion.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/keys/rxrpc-type.h |  55 +++++++++
 net/rxrpc/ar-internal.h   |  16 ---
 net/rxrpc/ar-key.c        | 308 ++++++++++++++++++++++++++++++++++++++++------
 net/rxrpc/ar-security.c   |   8 +-
 net/rxrpc/rxkad.c         |  41 +++---
 5 files changed, 353 insertions(+), 75 deletions(-)

(limited to 'include')

diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
index 7609365577f1..c0d91218fdd3 100644
--- a/include/keys/rxrpc-type.h
+++ b/include/keys/rxrpc-type.h
@@ -21,4 +21,59 @@ extern struct key_type key_type_rxrpc;
 
 extern struct key *rxrpc_get_null_key(const char *);
 
+/*
+ * RxRPC key for Kerberos IV (type-2 security)
+ */
+struct rxkad_key {
+	u32	vice_id;
+	u32	start;			/* time at which ticket starts */
+	u32	expiry;			/* time at which ticket expires */
+	u32	kvno;			/* key version number */
+	u8	primary_flag;		/* T if key for primary cell for this user */
+	u16	ticket_len;		/* length of ticket[] */
+	u8	session_key[8];		/* DES session key */
+	u8	ticket[0];		/* the encrypted ticket */
+};
+
+/*
+ * list of tokens attached to an rxrpc key
+ */
+struct rxrpc_key_token {
+	u16	security_index;		/* RxRPC header security index */
+	struct rxrpc_key_token *next;	/* the next token in the list */
+	union {
+		struct rxkad_key *kad;
+	};
+};
+
+/*
+ * structure of raw payloads passed to add_key() or instantiate key
+ */
+struct rxrpc_key_data_v1 {
+	u32		kif_version;		/* 1 */
+	u16		security_index;
+	u16		ticket_length;
+	u32		expiry;			/* time_t */
+	u32		kvno;
+	u8		session_key[8];
+	u8		ticket[0];
+};
+
+/*
+ * AF_RXRPC key payload derived from XDR format
+ * - based on openafs-1.4.10/src/auth/afs_token.xg
+ */
+#define AFSTOKEN_LENGTH_MAX		16384	/* max payload size */
+#define AFSTOKEN_CELL_MAX		64	/* max cellname length */
+#define AFSTOKEN_MAX			8	/* max tokens per payload */
+#define AFSTOKEN_RK_TIX_MAX		12000	/* max RxKAD ticket size */
+#define AFSTOKEN_GK_KEY_MAX		64	/* max GSSAPI key size */
+#define AFSTOKEN_GK_TOKEN_MAX		16384	/* max GSSAPI token size */
+#define AFSTOKEN_K5_COMPONENTS_MAX	16	/* max K5 components */
+#define AFSTOKEN_K5_NAME_MAX		128	/* max K5 name length */
+#define AFSTOKEN_K5_REALM_MAX		64	/* max K5 realm name length */
+#define AFSTOKEN_K5_TIX_MAX		16384	/* max K5 ticket size */
+#define AFSTOKEN_K5_ADDRESSES_MAX	16	/* max K5 addresses */
+#define AFSTOKEN_K5_AUTHDATA_MAX	16	/* max K5 pieces of auth data */
+
 #endif /* _KEYS_RXRPC_TYPE_H */
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 3e7318c1343c..46c6d8888493 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -401,22 +401,6 @@ struct rxrpc_call {
 	__be32			call_id;	/* call ID on connection  */
 };
 
-/*
- * RxRPC key for Kerberos (type-2 security)
- */
-struct rxkad_key {
-	u16	security_index;		/* RxRPC header security index */
-	u16	ticket_len;		/* length of ticket[] */
-	u32	expiry;			/* time at which expires */
-	u32	kvno;			/* key version number */
-	u8	session_key[8];		/* DES session key */
-	u8	ticket[0];		/* the encrypted ticket */
-};
-
-struct rxrpc_key_payload {
-	struct rxkad_key k;
-};
-
 /*
  * locally abort an RxRPC call
  */
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index b3d10e7ccd7e..a3a7acb5071a 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -17,6 +17,7 @@
 #include <linux/skbuff.h>
 #include <linux/key-type.h>
 #include <linux/crypto.h>
+#include <linux/ctype.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <keys/rxrpc-type.h>
@@ -54,6 +55,202 @@ struct key_type key_type_rxrpc_s = {
 	.describe	= rxrpc_describe,
 };
 
+/*
+ * parse an RxKAD type XDR format token
+ * - the caller guarantees we have at least 4 words
+ */
+static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr,
+				       unsigned toklen)
+{
+	struct rxrpc_key_token *token;
+	size_t plen;
+	u32 tktlen;
+	int ret;
+
+	_enter(",{%x,%x,%x,%x},%u",
+	       ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]),
+	       toklen);
+
+	if (toklen <= 8 * 4)
+		return -EKEYREJECTED;
+	tktlen = ntohl(xdr[7]);
+	_debug("tktlen: %x", tktlen);
+	if (tktlen > AFSTOKEN_RK_TIX_MAX)
+		return -EKEYREJECTED;
+	if (8 * 4 + tktlen != toklen)
+		return -EKEYREJECTED;
+
+	plen = sizeof(*token) + sizeof(*token->kad) + tktlen;
+	ret = key_payload_reserve(key, key->datalen + plen);
+	if (ret < 0)
+		return ret;
+
+	plen -= sizeof(*token);
+	token = kmalloc(sizeof(*token), GFP_KERNEL);
+	if (!token)
+		return -ENOMEM;
+
+	token->kad = kmalloc(plen, GFP_KERNEL);
+	if (!token->kad) {
+		kfree(token);
+		return -ENOMEM;
+	}
+
+	token->security_index	= RXRPC_SECURITY_RXKAD;
+	token->kad->ticket_len	= tktlen;
+	token->kad->vice_id	= ntohl(xdr[0]);
+	token->kad->kvno	= ntohl(xdr[1]);
+	token->kad->start	= ntohl(xdr[4]);
+	token->kad->expiry	= ntohl(xdr[5]);
+	token->kad->primary_flag = ntohl(xdr[6]);
+	memcpy(&token->kad->session_key, &xdr[2], 8);
+	memcpy(&token->kad->ticket, &xdr[8], tktlen);
+
+	_debug("SCIX: %u", token->security_index);
+	_debug("TLEN: %u", token->kad->ticket_len);
+	_debug("EXPY: %x", token->kad->expiry);
+	_debug("KVNO: %u", token->kad->kvno);
+	_debug("PRIM: %u", token->kad->primary_flag);
+	_debug("SKEY: %02x%02x%02x%02x%02x%02x%02x%02x",
+	       token->kad->session_key[0], token->kad->session_key[1],
+	       token->kad->session_key[2], token->kad->session_key[3],
+	       token->kad->session_key[4], token->kad->session_key[5],
+	       token->kad->session_key[6], token->kad->session_key[7]);
+	if (token->kad->ticket_len >= 8)
+		_debug("TCKT: %02x%02x%02x%02x%02x%02x%02x%02x",
+		       token->kad->ticket[0], token->kad->ticket[1],
+		       token->kad->ticket[2], token->kad->ticket[3],
+		       token->kad->ticket[4], token->kad->ticket[5],
+		       token->kad->ticket[6], token->kad->ticket[7]);
+
+	/* count the number of tokens attached */
+	key->type_data.x[0]++;
+
+	/* attach the data */
+	token->next = key->payload.data;
+	key->payload.data = token;
+	if (token->kad->expiry < key->expiry)
+		key->expiry = token->kad->expiry;
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * attempt to parse the data as the XDR format
+ * - the caller guarantees we have more than 7 words
+ */
+static int rxrpc_instantiate_xdr(struct key *key, const void *data, size_t datalen)
+{
+	const __be32 *xdr = data, *token;
+	const char *cp;
+	unsigned len, tmp, loop, ntoken, toklen, sec_ix;
+	int ret;
+
+	_enter(",{%x,%x,%x,%x},%zu",
+	       ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]),
+	       datalen);
+
+	if (datalen > AFSTOKEN_LENGTH_MAX)
+		goto not_xdr;
+
+	/* XDR is an array of __be32's */
+	if (datalen & 3)
+		goto not_xdr;
+
+	/* the flags should be 0 (the setpag bit must be handled by
+	 * userspace) */
+	if (ntohl(*xdr++) != 0)
+		goto not_xdr;
+	datalen -= 4;
+
+	/* check the cell name */
+	len = ntohl(*xdr++);
+	if (len < 1 || len > AFSTOKEN_CELL_MAX)
+		goto not_xdr;
+	datalen -= 4;
+	tmp = (len + 3) & ~3;
+	if (tmp > datalen)
+		goto not_xdr;
+
+	cp = (const char *) xdr;
+	for (loop = 0; loop < len; loop++)
+		if (!isprint(cp[loop]))
+			goto not_xdr;
+	if (len < tmp)
+		for (; loop < tmp; loop++)
+			if (cp[loop])
+				goto not_xdr;
+	_debug("cellname: [%u/%u] '%*.*s'",
+	       len, tmp, len, len, (const char *) xdr);
+	datalen -= tmp;
+	xdr += tmp >> 2;
+
+	/* get the token count */
+	if (datalen < 12)
+		goto not_xdr;
+	ntoken = ntohl(*xdr++);
+	datalen -= 4;
+	_debug("ntoken: %x", ntoken);
+	if (ntoken < 1 || ntoken > AFSTOKEN_MAX)
+		goto not_xdr;
+
+	/* check each token wrapper */
+	token = xdr;
+	loop = ntoken;
+	do {
+		if (datalen < 8)
+			goto not_xdr;
+		toklen = ntohl(*xdr++);
+		sec_ix = ntohl(*xdr);
+		datalen -= 4;
+		_debug("token: [%x/%zx] %x", toklen, datalen, sec_ix);
+		if (toklen < 20 || toklen > datalen)
+			goto not_xdr;
+		datalen -= (toklen + 3) & ~3;
+		xdr += (toklen + 3) >> 2;
+
+	} while (--loop > 0);
+
+	_debug("remainder: %zu", datalen);
+	if (datalen != 0)
+		goto not_xdr;
+
+	/* okay: we're going to assume it's valid XDR format
+	 * - we ignore the cellname, relying on the key to be correctly named
+	 */
+	do {
+		xdr = token;
+		toklen = ntohl(*xdr++);
+		token = xdr + ((toklen + 3) >> 2);
+		sec_ix = ntohl(*xdr++);
+		toklen -= 4;
+
+		switch (sec_ix) {
+		case RXRPC_SECURITY_RXKAD:
+			ret = rxrpc_instantiate_xdr_rxkad(key, xdr, toklen);
+			if (ret != 0)
+				goto error;
+			break;
+
+		default:
+			ret = -EPROTONOSUPPORT;
+			goto error;
+		}
+
+	} while (--ntoken > 0);
+
+	_leave(" = 0");
+	return 0;
+
+not_xdr:
+	_leave(" = -EPROTO");
+	return -EPROTO;
+error:
+	_leave(" = %d", ret);
+	return ret;
+}
+
 /*
  * instantiate an rxrpc defined key
  * data should be of the form:
@@ -70,8 +267,8 @@ struct key_type key_type_rxrpc_s = {
  */
 static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 {
-	const struct rxkad_key *tsec;
-	struct rxrpc_key_payload *upayload;
+	const struct rxrpc_key_data_v1 *v1;
+	struct rxrpc_key_token *token, **pp;
 	size_t plen;
 	u32 kver;
 	int ret;
@@ -82,6 +279,13 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 	if (!data && datalen == 0)
 		return 0;
 
+	/* determine if the XDR payload format is being used */
+	if (datalen > 7 * 4) {
+		ret = rxrpc_instantiate_xdr(key, data, datalen);
+		if (ret != -EPROTO)
+			return ret;
+	}
+
 	/* get the key interface version number */
 	ret = -EINVAL;
 	if (datalen <= 4 || !data)
@@ -98,53 +302,67 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 
 	/* deal with a version 1 key */
 	ret = -EINVAL;
-	if (datalen < sizeof(*tsec))
+	if (datalen < sizeof(*v1))
 		goto error;
 
-	tsec = data;
-	if (datalen != sizeof(*tsec) + tsec->ticket_len)
+	v1 = data;
+	if (datalen != sizeof(*v1) + v1->ticket_length)
 		goto error;
 
-	_debug("SCIX: %u", tsec->security_index);
-	_debug("TLEN: %u", tsec->ticket_len);
-	_debug("EXPY: %x", tsec->expiry);
-	_debug("KVNO: %u", tsec->kvno);
+	_debug("SCIX: %u", v1->security_index);
+	_debug("TLEN: %u", v1->ticket_length);
+	_debug("EXPY: %x", v1->expiry);
+	_debug("KVNO: %u", v1->kvno);
 	_debug("SKEY: %02x%02x%02x%02x%02x%02x%02x%02x",
-	       tsec->session_key[0], tsec->session_key[1],
-	       tsec->session_key[2], tsec->session_key[3],
-	       tsec->session_key[4], tsec->session_key[5],
-	       tsec->session_key[6], tsec->session_key[7]);
-	if (tsec->ticket_len >= 8)
+	       v1->session_key[0], v1->session_key[1],
+	       v1->session_key[2], v1->session_key[3],
+	       v1->session_key[4], v1->session_key[5],
+	       v1->session_key[6], v1->session_key[7]);
+	if (v1->ticket_length >= 8)
 		_debug("TCKT: %02x%02x%02x%02x%02x%02x%02x%02x",
-		       tsec->ticket[0], tsec->ticket[1],
-		       tsec->ticket[2], tsec->ticket[3],
-		       tsec->ticket[4], tsec->ticket[5],
-		       tsec->ticket[6], tsec->ticket[7]);
+		       v1->ticket[0], v1->ticket[1],
+		       v1->ticket[2], v1->ticket[3],
+		       v1->ticket[4], v1->ticket[5],
+		       v1->ticket[6], v1->ticket[7]);
 
 	ret = -EPROTONOSUPPORT;
-	if (tsec->security_index != RXRPC_SECURITY_RXKAD)
+	if (v1->security_index != RXRPC_SECURITY_RXKAD)
 		goto error;
 
-	key->type_data.x[0] = tsec->security_index;
-
-	plen = sizeof(*upayload) + tsec->ticket_len;
-	ret = key_payload_reserve(key, plen);
+	plen = sizeof(*token->kad) + v1->ticket_length;
+	ret = key_payload_reserve(key, plen + sizeof(*token));
 	if (ret < 0)
 		goto error;
 
 	ret = -ENOMEM;
-	upayload = kmalloc(plen, GFP_KERNEL);
-	if (!upayload)
+	token = kmalloc(sizeof(*token), GFP_KERNEL);
+	if (!token)
 		goto error;
+	token->kad = kmalloc(plen, GFP_KERNEL);
+	if (!token->kad)
+		goto error_free;
+
+	token->security_index		= RXRPC_SECURITY_RXKAD;
+	token->kad->ticket_len		= v1->ticket_length;
+	token->kad->expiry		= v1->expiry;
+	token->kad->kvno		= v1->kvno;
+	memcpy(&token->kad->session_key, &v1->session_key, 8);
+	memcpy(&token->kad->ticket, v1->ticket, v1->ticket_length);
 
 	/* attach the data */
-	memcpy(&upayload->k, tsec, sizeof(*tsec));
-	memcpy(&upayload->k.ticket, (void *)tsec + sizeof(*tsec),
-	       tsec->ticket_len);
-	key->payload.data = upayload;
-	key->expiry = tsec->expiry;
+	key->type_data.x[0]++;
+
+	pp = (struct rxrpc_key_token **)&key->payload.data;
+	while (*pp)
+		pp = &(*pp)->next;
+	*pp = token;
+	if (token->kad->expiry < key->expiry)
+		key->expiry = token->kad->expiry;
+	token = NULL;
 	ret = 0;
 
+error_free:
+	kfree(token);
 error:
 	return ret;
 }
@@ -184,7 +402,22 @@ static int rxrpc_instantiate_s(struct key *key, const void *data,
  */
 static void rxrpc_destroy(struct key *key)
 {
-	kfree(key->payload.data);
+	struct rxrpc_key_token *token;
+
+	while ((token = key->payload.data)) {
+		key->payload.data = token->next;
+		switch (token->security_index) {
+		case RXRPC_SECURITY_RXKAD:
+			kfree(token->kad);
+			break;
+		default:
+			printk(KERN_ERR "Unknown token type %x on rxrpc key\n",
+			       token->security_index);
+			BUG();
+		}
+
+		kfree(token);
+	}
 }
 
 /*
@@ -293,7 +526,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
 
 	struct {
 		u32 kver;
-		struct rxkad_key tsec;
+		struct rxrpc_key_data_v1 v1;
 	} data;
 
 	_enter("");
@@ -308,13 +541,12 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
 	_debug("key %d", key_serial(key));
 
 	data.kver = 1;
-	data.tsec.security_index = RXRPC_SECURITY_RXKAD;
-	data.tsec.ticket_len = 0;
-	data.tsec.expiry = expiry;
-	data.tsec.kvno = 0;
+	data.v1.security_index = RXRPC_SECURITY_RXKAD;
+	data.v1.ticket_length = 0;
+	data.v1.expiry = expiry;
+	data.v1.kvno = 0;
 
-	memcpy(&data.tsec.session_key, session_key,
-	       sizeof(data.tsec.session_key));
+	memcpy(&data.v1.session_key, session_key, sizeof(data.v1.session_key));
 
 	ret = key_instantiate_and_link(key, &data, sizeof(data), NULL, NULL);
 	if (ret < 0)
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c
index dc62920ee19a..49b3cc31ee1f 100644
--- a/net/rxrpc/ar-security.c
+++ b/net/rxrpc/ar-security.c
@@ -16,6 +16,7 @@
 #include <linux/crypto.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
+#include <keys/rxrpc-type.h>
 #include "ar-internal.h"
 
 static LIST_HEAD(rxrpc_security_methods);
@@ -122,6 +123,7 @@ EXPORT_SYMBOL_GPL(rxrpc_unregister_security);
  */
 int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
 {
+	struct rxrpc_key_token *token;
 	struct rxrpc_security *sec;
 	struct key *key = conn->key;
 	int ret;
@@ -135,7 +137,11 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
 	if (ret < 0)
 		return ret;
 
-	sec = rxrpc_security_lookup(key->type_data.x[0]);
+	if (!key->payload.data)
+		return -EKEYREJECTED;
+	token = key->payload.data;
+
+	sec = rxrpc_security_lookup(token->security_index);
 	if (!sec)
 		return -EKEYREJECTED;
 	conn->security = sec;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index acec76292c01..713ac593e2e9 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -18,6 +18,7 @@
 #include <linux/ctype.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
+#include <keys/rxrpc-type.h>
 #define rxrpc_debug rxkad_debug
 #include "ar-internal.h"
 
@@ -59,14 +60,14 @@ static DEFINE_MUTEX(rxkad_ci_mutex);
  */
 static int rxkad_init_connection_security(struct rxrpc_connection *conn)
 {
-	struct rxrpc_key_payload *payload;
 	struct crypto_blkcipher *ci;
+	struct rxrpc_key_token *token;
 	int ret;
 
 	_enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
 
-	payload = conn->key->payload.data;
-	conn->security_ix = payload->k.security_index;
+	token = conn->key->payload.data;
+	conn->security_ix = token->security_index;
 
 	ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(ci)) {
@@ -75,8 +76,8 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
 		goto error;
 	}
 
-	if (crypto_blkcipher_setkey(ci, payload->k.session_key,
-				    sizeof(payload->k.session_key)) < 0)
+	if (crypto_blkcipher_setkey(ci, token->kad->session_key,
+				    sizeof(token->kad->session_key)) < 0)
 		BUG();
 
 	switch (conn->security_level) {
@@ -110,7 +111,7 @@ error:
  */
 static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
 {
-	struct rxrpc_key_payload *payload;
+	struct rxrpc_key_token *token;
 	struct blkcipher_desc desc;
 	struct scatterlist sg[2];
 	struct rxrpc_crypt iv;
@@ -123,8 +124,8 @@ static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
 	if (!conn->key)
 		return;
 
-	payload = conn->key->payload.data;
-	memcpy(&iv, payload->k.session_key, sizeof(iv));
+	token = conn->key->payload.data;
+	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
 	desc.tfm = conn->cipher;
 	desc.info = iv.x;
@@ -197,7 +198,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 					u32 data_size,
 					void *sechdr)
 {
-	const struct rxrpc_key_payload *payload;
+	const struct rxrpc_key_token *token;
 	struct rxkad_level2_hdr rxkhdr
 		__attribute__((aligned(8))); /* must be all on one page */
 	struct rxrpc_skb_priv *sp;
@@ -219,8 +220,8 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 	rxkhdr.checksum = 0;
 
 	/* encrypt from the session key */
-	payload = call->conn->key->payload.data;
-	memcpy(&iv, payload->k.session_key, sizeof(iv));
+	token = call->conn->key->payload.data;
+	memcpy(&iv, token->kad->session_key, sizeof(iv));
 	desc.tfm = call->conn->cipher;
 	desc.info = iv.x;
 	desc.flags = 0;
@@ -400,7 +401,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
 				       struct sk_buff *skb,
 				       u32 *_abort_code)
 {
-	const struct rxrpc_key_payload *payload;
+	const struct rxrpc_key_token *token;
 	struct rxkad_level2_hdr sechdr;
 	struct rxrpc_skb_priv *sp;
 	struct blkcipher_desc desc;
@@ -431,8 +432,8 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
 	skb_to_sgvec(skb, sg, 0, skb->len);
 
 	/* decrypt from the session key */
-	payload = call->conn->key->payload.data;
-	memcpy(&iv, payload->k.session_key, sizeof(iv));
+	token = call->conn->key->payload.data;
+	memcpy(&iv, token->kad->session_key, sizeof(iv));
 	desc.tfm = call->conn->cipher;
 	desc.info = iv.x;
 	desc.flags = 0;
@@ -737,7 +738,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 				      struct sk_buff *skb,
 				      u32 *_abort_code)
 {
-	const struct rxrpc_key_payload *payload;
+	const struct rxrpc_key_token *token;
 	struct rxkad_challenge challenge;
 	struct rxkad_response resp
 		__attribute__((aligned(8))); /* must be aligned for crypto */
@@ -778,7 +779,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 	if (conn->security_level < min_level)
 		goto protocol_error;
 
-	payload = conn->key->payload.data;
+	token = conn->key->payload.data;
 
 	/* build the response packet */
 	memset(&resp, 0, sizeof(resp));
@@ -797,13 +798,13 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 		(conn->channels[3] ? conn->channels[3]->call_id : 0);
 	resp.encrypted.inc_nonce = htonl(nonce + 1);
 	resp.encrypted.level = htonl(conn->security_level);
-	resp.kvno = htonl(payload->k.kvno);
-	resp.ticket_len = htonl(payload->k.ticket_len);
+	resp.kvno = htonl(token->kad->kvno);
+	resp.ticket_len = htonl(token->kad->ticket_len);
 
 	/* calculate the response checksum and then do the encryption */
 	rxkad_calc_response_checksum(&resp);
-	rxkad_encrypt_response(conn, &resp, &payload->k);
-	return rxkad_send_response(conn, &sp->hdr, &resp, &payload->k);
+	rxkad_encrypt_response(conn, &resp, token->kad);
+	return rxkad_send_response(conn, &sp->hdr, &resp, token->kad);
 
 protocol_error:
 	*_abort_code = abort_code;
-- 
cgit v1.2.3


From 99455153d0670ba110e6a3b855b8369bcbd11120 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 14 Sep 2009 01:17:46 +0000
Subject: RxRPC: Parse security index 5 keys (Kerberos 5)

Parse RxRPC security index 5 type keys (Kerberos 5 tokens).

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/keys/rxrpc-type.h |  52 +++++
 net/rxrpc/ar-key.c        | 577 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 589 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
index c0d91218fdd3..5eb23571b425 100644
--- a/include/keys/rxrpc-type.h
+++ b/include/keys/rxrpc-type.h
@@ -35,6 +35,54 @@ struct rxkad_key {
 	u8	ticket[0];		/* the encrypted ticket */
 };
 
+/*
+ * Kerberos 5 principal
+ *	name/name/name@realm
+ */
+struct krb5_principal {
+	u8	n_name_parts;		/* N of parts of the name part of the principal */
+	char	**name_parts;		/* parts of the name part of the principal */
+	char	*realm;			/* parts of the realm part of the principal */
+};
+
+/*
+ * Kerberos 5 tagged data
+ */
+struct krb5_tagged_data {
+	/* for tag value, see /usr/include/krb5/krb5.h
+	 * - KRB5_AUTHDATA_* for auth data
+	 * - 
+	 */
+	int32_t		tag;
+	uint32_t	data_len;
+	u8		*data;
+};
+
+/*
+ * RxRPC key for Kerberos V (type-5 security)
+ */
+struct rxk5_key {
+	uint64_t		authtime;	/* time at which auth token generated */
+	uint64_t		starttime;	/* time at which auth token starts */
+	uint64_t		endtime;	/* time at which auth token expired */
+	uint64_t		renew_till;	/* time to which auth token can be renewed */
+	int32_t			is_skey;	/* T if ticket is encrypted in another ticket's
+						 * skey */
+	int32_t			flags;		/* mask of TKT_FLG_* bits (krb5/krb5.h) */
+	struct krb5_principal	client;		/* client principal name */
+	struct krb5_principal	server;		/* server principal name */
+	uint16_t		ticket_len;	/* length of ticket */
+	uint16_t		ticket2_len;	/* length of second ticket */
+	u8			n_authdata;	/* number of authorisation data elements */
+	u8			n_addresses;	/* number of addresses */
+	struct krb5_tagged_data	session;	/* session data; tag is enctype */
+	struct krb5_tagged_data *addresses;	/* addresses */
+	u8			*ticket;	/* krb5 ticket */
+	u8			*ticket2;	/* second krb5 ticket, if related to ticket (via
+						 * DUPLICATE-SKEY or ENC-TKT-IN-SKEY) */
+	struct krb5_tagged_data *authdata;	/* authorisation data */
+};
+
 /*
  * list of tokens attached to an rxrpc key
  */
@@ -43,6 +91,7 @@ struct rxrpc_key_token {
 	struct rxrpc_key_token *next;	/* the next token in the list */
 	union {
 		struct rxkad_key *kad;
+		struct rxk5_key *k5;
 	};
 };
 
@@ -64,8 +113,11 @@ struct rxrpc_key_data_v1 {
  * - based on openafs-1.4.10/src/auth/afs_token.xg
  */
 #define AFSTOKEN_LENGTH_MAX		16384	/* max payload size */
+#define AFSTOKEN_STRING_MAX		256	/* max small string length */
+#define AFSTOKEN_DATA_MAX		64	/* max small data length */
 #define AFSTOKEN_CELL_MAX		64	/* max cellname length */
 #define AFSTOKEN_MAX			8	/* max tokens per payload */
+#define AFSTOKEN_BDATALN_MAX		16384	/* max big data length */
 #define AFSTOKEN_RK_TIX_MAX		12000	/* max RxKAD ticket size */
 #define AFSTOKEN_GK_KEY_MAX		64	/* max GSSAPI key size */
 #define AFSTOKEN_GK_TOKEN_MAX		16384	/* max GSSAPI token size */
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index bf4d623ee1ce..44836f6c9643 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -64,7 +64,7 @@ struct key_type key_type_rxrpc_s = {
 static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr,
 				       unsigned toklen)
 {
-	struct rxrpc_key_token *token;
+	struct rxrpc_key_token *token, **pptoken;
 	size_t plen;
 	u32 tktlen;
 	int ret;
@@ -129,13 +129,398 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr,
 	key->type_data.x[0]++;
 
 	/* attach the data */
-	token->next = key->payload.data;
-	key->payload.data = token;
+	for (pptoken = (struct rxrpc_key_token **)&key->payload.data;
+	     *pptoken;
+	     pptoken = &(*pptoken)->next)
+		continue;
+	*pptoken = token;
+	if (token->kad->expiry < key->expiry)
+		key->expiry = token->kad->expiry;
+
+	_leave(" = 0");
+	return 0;
+}
+
+static void rxrpc_free_krb5_principal(struct krb5_principal *princ)
+{
+	int loop;
+
+	if (princ->name_parts) {
+		for (loop = princ->n_name_parts - 1; loop >= 0; loop--)
+			kfree(princ->name_parts[loop]);
+		kfree(princ->name_parts);
+	}
+	kfree(princ->realm);
+}
+
+static void rxrpc_free_krb5_tagged(struct krb5_tagged_data *td)
+{
+	kfree(td->data);
+}
+
+/*
+ * free up an RxK5 token
+ */
+static void rxrpc_rxk5_free(struct rxk5_key *rxk5)
+{
+	int loop;
+
+	rxrpc_free_krb5_principal(&rxk5->client);
+	rxrpc_free_krb5_principal(&rxk5->server);
+	rxrpc_free_krb5_tagged(&rxk5->session);
+
+	if (rxk5->addresses) {
+		for (loop = rxk5->n_addresses - 1; loop >= 0; loop--)
+			rxrpc_free_krb5_tagged(&rxk5->addresses[loop]);
+		kfree(rxk5->addresses);
+	}
+	if (rxk5->authdata) {
+		for (loop = rxk5->n_authdata - 1; loop >= 0; loop--)
+			rxrpc_free_krb5_tagged(&rxk5->authdata[loop]);
+		kfree(rxk5->authdata);
+	}
+
+	kfree(rxk5->ticket);
+	kfree(rxk5->ticket2);
+	kfree(rxk5);
+}
+
+/*
+ * extract a krb5 principal
+ */
+static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
+				       const __be32 **_xdr,
+				       unsigned *_toklen)
+{
+	const __be32 *xdr = *_xdr;
+	unsigned toklen = *_toklen, n_parts, loop, tmp;
+
+	/* there must be at least one name, and at least #names+1 length
+	 * words */
+	if (toklen <= 12)
+		return -EINVAL;
+
+	_enter(",{%x,%x,%x},%u",
+	       ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), toklen);
+
+	n_parts = ntohl(*xdr++);
+	toklen -= 4;
+	if (n_parts <= 0 || n_parts > AFSTOKEN_K5_COMPONENTS_MAX)
+		return -EINVAL;
+	princ->n_name_parts = n_parts;
+
+	if (toklen <= (n_parts + 1) * 4)
+		return -EINVAL;
+
+	princ->name_parts = kcalloc(sizeof(char *), n_parts, GFP_KERNEL);
+	if (!princ->name_parts)
+		return -ENOMEM;
+
+	for (loop = 0; loop < n_parts; loop++) {
+		if (toklen < 4)
+			return -EINVAL;
+		tmp = ntohl(*xdr++);
+		toklen -= 4;
+		if (tmp <= 0 || tmp > AFSTOKEN_STRING_MAX)
+			return -EINVAL;
+		if (tmp > toklen)
+			return -EINVAL;
+		princ->name_parts[loop] = kmalloc(tmp + 1, GFP_KERNEL);
+		if (!princ->name_parts[loop])
+			return -ENOMEM;
+		memcpy(princ->name_parts[loop], xdr, tmp);
+		princ->name_parts[loop][tmp] = 0;
+		tmp = (tmp + 3) & ~3;
+		toklen -= tmp;
+		xdr += tmp >> 2;
+	}
+
+	if (toklen < 4)
+		return -EINVAL;
+	tmp = ntohl(*xdr++);
+	toklen -= 4;
+	if (tmp <= 0 || tmp > AFSTOKEN_K5_REALM_MAX)
+		return -EINVAL;
+	if (tmp > toklen)
+		return -EINVAL;
+	princ->realm = kmalloc(tmp + 1, GFP_KERNEL);
+	if (!princ->realm)
+		return -ENOMEM;
+	memcpy(princ->realm, xdr, tmp);
+	princ->realm[tmp] = 0;
+	tmp = (tmp + 3) & ~3;
+	toklen -= tmp;
+	xdr += tmp >> 2;
+
+	_debug("%s/...@%s", princ->name_parts[0], princ->realm);
+
+	*_xdr = xdr;
+	*_toklen = toklen;
+	_leave(" = 0 [toklen=%u]", toklen);
+	return 0;
+}
+
+/*
+ * extract a piece of krb5 tagged data
+ */
+static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td,
+					 size_t max_data_size,
+					 const __be32 **_xdr,
+					 unsigned *_toklen)
+{
+	const __be32 *xdr = *_xdr;
+	unsigned toklen = *_toklen, len;
+
+	/* there must be at least one tag and one length word */
+	if (toklen <= 8)
+		return -EINVAL;
+
+	_enter(",%zu,{%x,%x},%u",
+	       max_data_size, ntohl(xdr[0]), ntohl(xdr[1]), toklen);
+
+	td->tag = ntohl(*xdr++);
+	len = ntohl(*xdr++);
+	toklen -= 8;
+	if (len > max_data_size)
+		return -EINVAL;
+	td->data_len = len;
+
+	if (len > 0) {
+		td->data = kmalloc(len, GFP_KERNEL);
+		if (!td->data)
+			return -ENOMEM;
+		memcpy(td->data, xdr, len);
+		len = (len + 3) & ~3;
+		toklen -= len;
+		xdr += len >> 2;
+	}
+
+	_debug("tag %x len %x", td->tag, td->data_len);
+
+	*_xdr = xdr;
+	*_toklen = toklen;
+	_leave(" = 0 [toklen=%u]", toklen);
+	return 0;
+}
+
+/*
+ * extract an array of tagged data
+ */
+static int rxrpc_krb5_decode_tagged_array(struct krb5_tagged_data **_td,
+					  u8 *_n_elem,
+					  u8 max_n_elem,
+					  size_t max_elem_size,
+					  const __be32 **_xdr,
+					  unsigned *_toklen)
+{
+	struct krb5_tagged_data *td;
+	const __be32 *xdr = *_xdr;
+	unsigned toklen = *_toklen, n_elem, loop;
+	int ret;
+
+	/* there must be at least one count */
+	if (toklen < 4)
+		return -EINVAL;
+
+	_enter(",,%u,%zu,{%x},%u",
+	       max_n_elem, max_elem_size, ntohl(xdr[0]), toklen);
+
+	n_elem = ntohl(*xdr++);
+	toklen -= 4;
+	if (n_elem < 0 || n_elem > max_n_elem)
+		return -EINVAL;
+	*_n_elem = n_elem;
+	if (n_elem > 0) {
+		if (toklen <= (n_elem + 1) * 4)
+			return -EINVAL;
+
+		_debug("n_elem %d", n_elem);
+
+		td = kcalloc(sizeof(struct krb5_tagged_data), n_elem,
+			     GFP_KERNEL);
+		if (!td)
+			return -ENOMEM;
+		*_td = td;
+
+		for (loop = 0; loop < n_elem; loop++) {
+			ret = rxrpc_krb5_decode_tagged_data(&td[loop],
+							    max_elem_size,
+							    &xdr, &toklen);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	*_xdr = xdr;
+	*_toklen = toklen;
+	_leave(" = 0 [toklen=%u]", toklen);
+	return 0;
+}
+
+/*
+ * extract a krb5 ticket
+ */
+static int rxrpc_krb5_decode_ticket(u8 **_ticket, uint16_t *_tktlen,
+				    const __be32 **_xdr, unsigned *_toklen)
+{
+	const __be32 *xdr = *_xdr;
+	unsigned toklen = *_toklen, len;
+
+	/* there must be at least one length word */
+	if (toklen <= 4)
+		return -EINVAL;
+
+	_enter(",{%x},%u", ntohl(xdr[0]), toklen);
+
+	len = ntohl(*xdr++);
+	toklen -= 4;
+	if (len > AFSTOKEN_K5_TIX_MAX)
+		return -EINVAL;
+	*_tktlen = len;
+
+	_debug("ticket len %u", len);
+
+	if (len > 0) {
+		*_ticket = kmalloc(len, GFP_KERNEL);
+		if (!*_ticket)
+			return -ENOMEM;
+		memcpy(*_ticket, xdr, len);
+		len = (len + 3) & ~3;
+		toklen -= len;
+		xdr += len >> 2;
+	}
+
+	*_xdr = xdr;
+	*_toklen = toklen;
+	_leave(" = 0 [toklen=%u]", toklen);
+	return 0;
+}
+
+/*
+ * parse an RxK5 type XDR format token
+ * - the caller guarantees we have at least 4 words
+ */
+static int rxrpc_instantiate_xdr_rxk5(struct key *key, const __be32 *xdr,
+				      unsigned toklen)
+{
+	struct rxrpc_key_token *token, **pptoken;
+	struct rxk5_key *rxk5;
+	const __be32 *end_xdr = xdr + (toklen >> 2);
+	int ret;
+
+	_enter(",{%x,%x,%x,%x},%u",
+	       ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]),
+	       toklen);
+
+	/* reserve some payload space for this subkey - the length of the token
+	 * is a reasonable approximation */
+	ret = key_payload_reserve(key, key->datalen + toklen);
+	if (ret < 0)
+		return ret;
+
+	token = kzalloc(sizeof(*token), GFP_KERNEL);
+	if (!token)
+		return -ENOMEM;
+
+	rxk5 = kzalloc(sizeof(*rxk5), GFP_KERNEL);
+	if (!rxk5) {
+		kfree(token);
+		return -ENOMEM;
+	}
+
+	token->security_index = RXRPC_SECURITY_RXK5;
+	token->k5 = rxk5;
+
+	/* extract the principals */
+	ret = rxrpc_krb5_decode_principal(&rxk5->client, &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+	ret = rxrpc_krb5_decode_principal(&rxk5->server, &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+
+	/* extract the session key and the encoding type (the tag field ->
+	 * ENCTYPE_xxx) */
+	ret = rxrpc_krb5_decode_tagged_data(&rxk5->session, AFSTOKEN_DATA_MAX,
+					    &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+
+	if (toklen < 4 * 8 + 2 * 4)
+		goto inval;
+	rxk5->authtime	= be64_to_cpup((const __be64 *) xdr);
+	xdr += 2;
+	rxk5->starttime	= be64_to_cpup((const __be64 *) xdr);
+	xdr += 2;
+	rxk5->endtime	= be64_to_cpup((const __be64 *) xdr);
+	xdr += 2;
+	rxk5->renew_till = be64_to_cpup((const __be64 *) xdr);
+	xdr += 2;
+	rxk5->is_skey = ntohl(*xdr++);
+	rxk5->flags = ntohl(*xdr++);
+	toklen -= 4 * 8 + 2 * 4;
+
+	_debug("times: a=%llx s=%llx e=%llx rt=%llx",
+	       rxk5->authtime, rxk5->starttime, rxk5->endtime,
+	       rxk5->renew_till);
+	_debug("is_skey=%x flags=%x", rxk5->is_skey, rxk5->flags);
+
+	/* extract the permitted client addresses */
+	ret = rxrpc_krb5_decode_tagged_array(&rxk5->addresses,
+					     &rxk5->n_addresses,
+					     AFSTOKEN_K5_ADDRESSES_MAX,
+					     AFSTOKEN_DATA_MAX,
+					     &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+
+	ASSERTCMP((end_xdr - xdr) << 2, ==, toklen);
+
+	/* extract the tickets */
+	ret = rxrpc_krb5_decode_ticket(&rxk5->ticket, &rxk5->ticket_len,
+				       &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+	ret = rxrpc_krb5_decode_ticket(&rxk5->ticket2, &rxk5->ticket2_len,
+				       &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+
+	ASSERTCMP((end_xdr - xdr) << 2, ==, toklen);
+
+	/* extract the typed auth data */
+	ret = rxrpc_krb5_decode_tagged_array(&rxk5->authdata,
+					     &rxk5->n_authdata,
+					     AFSTOKEN_K5_AUTHDATA_MAX,
+					     AFSTOKEN_BDATALN_MAX,
+					     &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+
+	ASSERTCMP((end_xdr - xdr) << 2, ==, toklen);
+
+	if (toklen != 0)
+		goto inval;
+
+	/* attach the payload to the key */
+	for (pptoken = (struct rxrpc_key_token **)&key->payload.data;
+	     *pptoken;
+	     pptoken = &(*pptoken)->next)
+		continue;
+	*pptoken = token;
 	if (token->kad->expiry < key->expiry)
 		key->expiry = token->kad->expiry;
 
 	_leave(" = 0");
 	return 0;
+
+inval:
+	ret = -EINVAL;
+error:
+	rxrpc_rxk5_free(rxk5);
+	kfree(token);
+	_leave(" = %d", ret);
+	return ret;
 }
 
 /*
@@ -228,6 +613,8 @@ static int rxrpc_instantiate_xdr(struct key *key, const void *data, size_t datal
 		sec_ix = ntohl(*xdr++);
 		toklen -= 4;
 
+		_debug("TOKEN type=%u [%p-%p]", sec_ix, xdr, token);
+
 		switch (sec_ix) {
 		case RXRPC_SECURITY_RXKAD:
 			ret = rxrpc_instantiate_xdr_rxkad(key, xdr, toklen);
@@ -235,6 +622,12 @@ static int rxrpc_instantiate_xdr(struct key *key, const void *data, size_t datal
 				goto error;
 			break;
 
+		case RXRPC_SECURITY_RXK5:
+			ret = rxrpc_instantiate_xdr_rxk5(key, xdr, toklen);
+			if (ret != 0)
+				goto error;
+			break;
+
 		default:
 			ret = -EPROTONOSUPPORT;
 			goto error;
@@ -412,6 +805,10 @@ static void rxrpc_destroy(struct key *key)
 		case RXRPC_SECURITY_RXKAD:
 			kfree(token->kad);
 			break;
+		case RXRPC_SECURITY_RXK5:
+			if (token->k5)
+				rxrpc_rxk5_free(token->k5);
+			break;
 		default:
 			printk(KERN_ERR "Unknown token type %x on rxrpc key\n",
 			       token->security_index);
@@ -602,10 +999,13 @@ EXPORT_SYMBOL(rxrpc_get_null_key);
 static long rxrpc_read(const struct key *key,
 		       char __user *buffer, size_t buflen)
 {
-	struct rxrpc_key_token *token;
-	size_t size, toksize;
-	__be32 __user *xdr;
-	u32 cnlen, tktlen, ntoks, zero;
+	const struct rxrpc_key_token *token;
+	const struct krb5_principal *princ;
+	size_t size;
+	__be32 __user *xdr, *oldxdr;
+	u32 cnlen, toksize, ntoks, tok, zero;
+	u16 toksizes[AFSTOKEN_MAX];
+	int loop;
 
 	_enter("");
 
@@ -614,28 +1014,68 @@ static long rxrpc_read(const struct key *key,
 		return -EOPNOTSUPP;
 	cnlen = strlen(key->description + 4);
 
+#define RND(X) (((X) + 3) & ~3)
+
 	/* AFS keys we return in XDR form, so we need to work out the size of
 	 * the XDR */
 	size = 2 * 4;	/* flags, cellname len */
-	size += (cnlen + 3) & ~3;	/* cellname */
+	size += RND(cnlen);	/* cellname */
 	size += 1 * 4;	/* token count */
 
 	ntoks = 0;
 	for (token = key->payload.data; token; token = token->next) {
+		toksize = 4;	/* sec index */
+
 		switch (token->security_index) {
 		case RXRPC_SECURITY_RXKAD:
-			size += 2 * 4;	/* length, security index (switch ID) */
-			size += 8 * 4;	/* viceid, kvno, key*2, begin, end,
-					 * primary, tktlen */
-			size += (token->kad->ticket_len + 3) & ~3; /* ticket */
-			ntoks++;
+			toksize += 8 * 4;	/* viceid, kvno, key*2, begin,
+						 * end, primary, tktlen */
+			toksize += RND(token->kad->ticket_len);
 			break;
 
-		default: /* can't encode */
+		case RXRPC_SECURITY_RXK5:
+			princ = &token->k5->client;
+			toksize += 4 + princ->n_name_parts * 4;
+			for (loop = 0; loop < princ->n_name_parts; loop++)
+				toksize += RND(strlen(princ->name_parts[loop]));
+			toksize += 4 + RND(strlen(princ->realm));
+
+			princ = &token->k5->server;
+			toksize += 4 + princ->n_name_parts * 4;
+			for (loop = 0; loop < princ->n_name_parts; loop++)
+				toksize += RND(strlen(princ->name_parts[loop]));
+			toksize += 4 + RND(strlen(princ->realm));
+
+			toksize += 8 + RND(token->k5->session.data_len);
+
+			toksize += 4 * 8 + 2 * 4;
+
+			toksize += 4 + token->k5->n_addresses * 8;
+			for (loop = 0; loop < token->k5->n_addresses; loop++)
+				toksize += RND(token->k5->addresses[loop].data_len);
+
+			toksize += 4 + RND(token->k5->ticket_len);
+			toksize += 4 + RND(token->k5->ticket2_len);
+
+			toksize += 4 + token->k5->n_authdata * 8;
+			for (loop = 0; loop < token->k5->n_authdata; loop++)
+				toksize += RND(token->k5->authdata[loop].data_len);
 			break;
+
+		default: /* we have a ticket we can't encode */
+			BUG();
+			continue;
 		}
+
+		_debug("token[%u]: toksize=%u", ntoks, toksize);
+		ASSERTCMP(toksize, <=, AFSTOKEN_LENGTH_MAX);
+
+		toksizes[ntoks++] = toksize;
+		size += toksize + 4; /* each token has a length word */
 	}
 
+#undef RND
+
 	if (!buffer || buflen < size)
 		return size;
 
@@ -647,52 +1087,109 @@ static long rxrpc_read(const struct key *key,
 		if (put_user(y, xdr++) < 0)	\
 			goto fault;		\
 	} while(0)
+#define ENCODE_DATA(l, s)						\
+	do {								\
+		u32 _l = (l);						\
+		ENCODE(l);						\
+		if (copy_to_user(xdr, (s), _l) != 0)			\
+			goto fault;					\
+		if (_l & 3 &&						\
+		    copy_to_user((u8 *)xdr + _l, &zero, 4 - (_l & 3)) != 0) \
+			goto fault;					\
+		xdr += (_l + 3) >> 2;					\
+	} while(0)
+#define ENCODE64(x)					\
+	do {						\
+		__be64 y = cpu_to_be64(x);		\
+		if (copy_to_user(xdr, &y, 8) != 0)	\
+			goto fault;			\
+		xdr += 8 >> 2;				\
+	} while(0)
+#define ENCODE_STR(s)				\
+	do {					\
+		const char *_s = (s);		\
+		ENCODE_DATA(strlen(_s), _s);	\
+	} while(0)
 
-	ENCODE(0);	/* flags */
-	ENCODE(cnlen);	/* cellname length */
-	if (copy_to_user(xdr, key->description + 4, cnlen) != 0)
-		goto fault;
-	if (cnlen & 3 &&
-	    copy_to_user((u8 *)xdr + cnlen, &zero, 4 - (cnlen & 3)) != 0)
-		goto fault;
-	xdr += (cnlen + 3) >> 2;
-	ENCODE(ntoks);	/* token count */
+	ENCODE(0);					/* flags */
+	ENCODE_DATA(cnlen, key->description + 4);	/* cellname */
+	ENCODE(ntoks);
 
+	tok = 0;
 	for (token = key->payload.data; token; token = token->next) {
-		toksize = 1 * 4;	/* sec index */
+		toksize = toksizes[tok++];
+		ENCODE(toksize);
+		oldxdr = xdr;
+		ENCODE(token->security_index);
 
 		switch (token->security_index) {
 		case RXRPC_SECURITY_RXKAD:
-			toksize += 8 * 4;
-			toksize += (token->kad->ticket_len + 3) & ~3;
-			ENCODE(toksize);
-			ENCODE(token->security_index);
 			ENCODE(token->kad->vice_id);
 			ENCODE(token->kad->kvno);
-			if (copy_to_user(xdr, token->kad->session_key, 8) != 0)
-				goto fault;
-			xdr += 8 >> 2;
+			ENCODE_DATA(8, token->kad->session_key);
 			ENCODE(token->kad->start);
 			ENCODE(token->kad->expiry);
 			ENCODE(token->kad->primary_flag);
-			tktlen = token->kad->ticket_len;
-			ENCODE(tktlen);
-			if (copy_to_user(xdr, token->kad->ticket, tktlen) != 0)
-				goto fault;
-			if (tktlen & 3 &&
-			    copy_to_user((u8 *)xdr + tktlen, &zero,
-					 4 - (tktlen & 3)) != 0)
-				goto fault;
-			xdr += (tktlen + 3) >> 2;
+			ENCODE_DATA(token->kad->ticket_len, token->kad->ticket);
+			break;
+
+		case RXRPC_SECURITY_RXK5:
+			princ = &token->k5->client;
+			ENCODE(princ->n_name_parts);
+			for (loop = 0; loop < princ->n_name_parts; loop++)
+				ENCODE_STR(princ->name_parts[loop]);
+			ENCODE_STR(princ->realm);
+
+			princ = &token->k5->server;
+			ENCODE(princ->n_name_parts);
+			for (loop = 0; loop < princ->n_name_parts; loop++)
+				ENCODE_STR(princ->name_parts[loop]);
+			ENCODE_STR(princ->realm);
+
+			ENCODE(token->k5->session.tag);
+			ENCODE_DATA(token->k5->session.data_len,
+				    token->k5->session.data);
+
+			ENCODE64(token->k5->authtime);
+			ENCODE64(token->k5->starttime);
+			ENCODE64(token->k5->endtime);
+			ENCODE64(token->k5->renew_till);
+			ENCODE(token->k5->is_skey);
+			ENCODE(token->k5->flags);
+
+			ENCODE(token->k5->n_addresses);
+			for (loop = 0; loop < token->k5->n_addresses; loop++) {
+				ENCODE(token->k5->addresses[loop].tag);
+				ENCODE_DATA(token->k5->addresses[loop].data_len,
+					    token->k5->addresses[loop].data);
+			}
+
+			ENCODE_DATA(token->k5->ticket_len, token->k5->ticket);
+			ENCODE_DATA(token->k5->ticket2_len, token->k5->ticket2);
+
+			ENCODE(token->k5->n_authdata);
+			for (loop = 0; loop < token->k5->n_authdata; loop++) {
+				ENCODE(token->k5->authdata[loop].tag);
+				ENCODE_DATA(token->k5->authdata[loop].data_len,
+					    token->k5->authdata[loop].data);
+			}
 			break;
 
 		default:
+			BUG();
 			break;
 		}
+
+		ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==,
+			  toksize);
 	}
 
+#undef ENCODE_STR
+#undef ENCODE_DATA
+#undef ENCODE64
 #undef ENCODE
 
+	ASSERTCMP(tok, ==, ntoks);
 	ASSERTCMP((char __user *) xdr - buffer, ==, size);
 	_leave(" = %zu", size);
 	return size;
-- 
cgit v1.2.3


From 926e61b7c44db83013159ac2f74bccd451607b5a Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Tue, 15 Sep 2009 02:53:07 -0700
Subject: pkt_sched: Fix tx queue selection in tc_modify_qdisc

After the recent mq change there is the new select_queue qdisc class
method used in tc_modify_qdisc, but it works OK only for direct child
qdiscs of mq qdisc. Grandchildren always get the first tx queue, which
would give wrong qdisc_root etc. results (e.g. for sch_htb as child of
sch_prio). This patch fixes it by using parent's dev_queue for such
grandchildren qdiscs. The select_queue method's return type is changed
BTW.

With feedback from: Patrick McHardy <kaber@trash.net>

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  2 +-
 net/sched/sch_api.c       | 10 +++++++---
 net/sched/sch_mq.c        | 13 +++++++++----
 3 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 88eb9de095de..c33180dd42b4 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -81,7 +81,7 @@ struct Qdisc
 struct Qdisc_class_ops
 {
 	/* Child qdisc manipulation */
-	unsigned int		(*select_queue)(struct Qdisc *, struct tcmsg *);
+	struct netdev_queue *	(*select_queue)(struct Qdisc *, struct tcmsg *);
 	int			(*graft)(struct Qdisc *, unsigned long cl,
 					struct Qdisc *, struct Qdisc **);
 	struct Qdisc *		(*leaf)(struct Qdisc *, unsigned long cl);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c6e4063f698c..1367aa21fad5 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1116,12 +1116,16 @@ create_n_graft:
 				 tcm->tcm_parent, tcm->tcm_parent,
 				 tca, &err);
 	else {
-		unsigned int ntx = 0;
+		struct netdev_queue *dev_queue;
 
 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
-			ntx = p->ops->cl_ops->select_queue(p, tcm);
+			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
+		else if (p)
+			dev_queue = p->dev_queue;
+		else
+			dev_queue = netdev_get_tx_queue(dev, 0);
 
-		q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), p,
+		q = qdisc_create(dev, dev_queue, p,
 				 tcm->tcm_parent, tcm->tcm_handle,
 				 tca, &err);
 	}
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index dd5ee022f1f7..600c50143cc7 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -125,13 +125,18 @@ static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
 	return netdev_get_tx_queue(dev, ntx);
 }
 
-static unsigned int mq_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
+static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
+					    struct tcmsg *tcm)
 {
 	unsigned int ntx = TC_H_MIN(tcm->tcm_parent);
+	struct netdev_queue *dev_queue = mq_queue_get(sch, ntx);
 
-	if (!mq_queue_get(sch, ntx))
-		return 0;
-	return ntx - 1;
+	if (!dev_queue) {
+		struct net_device *dev = qdisc_dev(sch);
+
+		return netdev_get_tx_queue(dev, 0);
+	}
+	return dev_queue;
 }
 
 static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
-- 
cgit v1.2.3


From 5f3edc1b1ead6d9bd45a85c551f44eff8fe76b9f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 10 Sep 2009 13:42:00 +0200
Subject: sched: Hook sched_balance_self() into sched_class::select_task_rq()

Rather ugly patch to fully place the sched_balance_self() code
inside the fair class.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  3 ++-
 kernel/sched.c          | 14 +++++++-------
 kernel/sched_fair.c     |  7 ++++++-
 kernel/sched_idletask.c |  2 +-
 kernel/sched_rt.c       |  5 ++++-
 5 files changed, 20 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f3d74bd04d18..5d3c9900943e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -811,6 +811,7 @@ enum cpu_idle_type {
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
 #define SD_WAKE_IDLE_FAR	0x0800	/* Gain latency sacrificing cache hit */
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
+#define SD_BALANCE_WAKE		0x2000  /* Balance on wakeup */
 
 enum powersavings_balance_level {
 	POWERSAVINGS_BALANCE_NONE = 0,  /* No power saving load balance */
@@ -1032,7 +1033,7 @@ struct sched_class {
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
-	int  (*select_task_rq)(struct task_struct *p, int sync);
+	int  (*select_task_rq)(struct task_struct *p, int flag, int sync);
 
 	unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
 			struct rq *busiest, unsigned long max_load_move,
diff --git a/kernel/sched.c b/kernel/sched.c
index 60400a22401f..32b7a81230c2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2350,7 +2350,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	if (unlikely(task_running(rq, p)))
 		goto out_activate;
 
-	cpu = p->sched_class->select_task_rq(p, sync);
+	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, sync);
 	if (cpu != orig_cpu) {
 		set_task_cpu(p, cpu);
 		task_rq_unlock(rq, &flags);
@@ -2525,11 +2525,6 @@ void sched_fork(struct task_struct *p, int clone_flags)
 
 	__sched_fork(p);
 
-#ifdef CONFIG_SMP
-	cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
-#endif
-	set_task_cpu(p, cpu);
-
 	/*
 	 * Make sure we do not leak PI boosting priority to the child.
 	 */
@@ -2560,6 +2555,11 @@ void sched_fork(struct task_struct *p, int clone_flags)
 	if (!rt_prio(p->prio))
 		p->sched_class = &fair_sched_class;
 
+#ifdef CONFIG_SMP
+	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
+#endif
+	set_task_cpu(p, cpu);
+
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	if (likely(sched_info_on()))
 		memset(&p->sched_info, 0, sizeof(p->sched_info));
@@ -3114,7 +3114,7 @@ out:
 void sched_exec(void)
 {
 	int new_cpu, this_cpu = get_cpu();
-	new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC);
+	new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
 	put_cpu();
 	if (new_cpu != this_cpu)
 		sched_migrate_task(current, new_cpu);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a82d71d3afed..f2eb5b934715 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1300,7 +1300,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
 	return 0;
 }
 
-static int select_task_rq_fair(struct task_struct *p, int sync)
+static int sched_balance_self(int cpu, int flag);
+
+static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 {
 	struct sched_domain *sd, *this_sd = NULL;
 	int prev_cpu, this_cpu, new_cpu;
@@ -1314,6 +1316,9 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
 	this_rq		= cpu_rq(this_cpu);
 	new_cpu		= prev_cpu;
 
+	if (flag != SD_BALANCE_WAKE)
+		return sched_balance_self(this_cpu, flag);
+
 	/*
 	 * 'this_sd' is the first domain that both
 	 * this_cpu and prev_cpu are present in:
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 499672c10cbd..99b2f0337609 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -6,7 +6,7 @@
  */
 
 #ifdef CONFIG_SMP
-static int select_task_rq_idle(struct task_struct *p, int sync)
+static int select_task_rq_idle(struct task_struct *p, int flag, int sync)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 2eb4bd6a526c..438380810ac4 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -938,10 +938,13 @@ static void yield_task_rt(struct rq *rq)
 #ifdef CONFIG_SMP
 static int find_lowest_rq(struct task_struct *task);
 
-static int select_task_rq_rt(struct task_struct *p, int sync)
+static int select_task_rq_rt(struct task_struct *p, int flag, int sync)
 {
 	struct rq *rq = task_rq(p);
 
+	if (flag != SD_BALANCE_WAKE)
+		return smp_processor_id();
+
 	/*
 	 * If the current task is an RT task, then
 	 * try to see if we can wake this RT task up on another
-- 
cgit v1.2.3


From e9c8431185d6c406887190519f6dbdd112641686 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 15 Sep 2009 14:43:03 +0200
Subject: sched: Add TASK_WAKING

We're going to want to drop rq->lock in try_to_wake_up() for a
longer period of time, however we also want to deal with concurrent
waking of the same task, which is currently handled by holding
rq->lock.

So introduce a new TASK state, namely TASK_WAKING, which indicates
someone is already waking the task (other wakers will fail p->state
& state).

We also keep preemption disabled over the whole ttwu().

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  1 +
 kernel/sched.c        | 31 +++++++++++++++----------------
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5d3c9900943e..3b0ca66bd6ce 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -190,6 +190,7 @@ extern unsigned long long time_sync_thresh;
 /* in tsk->state again */
 #define TASK_DEAD		64
 #define TASK_WAKEKILL		128
+#define TASK_WAKING		256
 
 /* Convenience macros for the sake of set_task_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
diff --git a/kernel/sched.c b/kernel/sched.c
index 32b7a81230c2..fc6fda881d2e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2310,7 +2310,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 {
 	int cpu, orig_cpu, this_cpu, success = 0;
 	unsigned long flags;
-	long old_state;
 	struct rq *rq;
 
 	if (!sched_feat(SYNC_WAKEUPS))
@@ -2332,11 +2331,12 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	}
 #endif
 
+	this_cpu = get_cpu();
+
 	smp_wmb();
 	rq = task_rq_lock(p, &flags);
 	update_rq_clock(rq);
-	old_state = p->state;
-	if (!(old_state & state))
+	if (!(p->state & state))
 		goto out;
 
 	if (p->se.on_rq)
@@ -2344,27 +2344,25 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 
 	cpu = task_cpu(p);
 	orig_cpu = cpu;
-	this_cpu = smp_processor_id();
 
 #ifdef CONFIG_SMP
 	if (unlikely(task_running(rq, p)))
 		goto out_activate;
 
+	/*
+	 * In order to handle concurrent wakeups and release the rq->lock
+	 * we put the task in TASK_WAKING state.
+	 */
+	p->state = TASK_WAKING;
+	task_rq_unlock(rq, &flags);
+
 	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, sync);
-	if (cpu != orig_cpu) {
+	if (cpu != orig_cpu)
 		set_task_cpu(p, cpu);
-		task_rq_unlock(rq, &flags);
-		/* might preempt at this point */
-		rq = task_rq_lock(p, &flags);
-		old_state = p->state;
-		if (!(old_state & state))
-			goto out;
-		if (p->se.on_rq)
-			goto out_running;
 
-		this_cpu = smp_processor_id();
-		cpu = task_cpu(p);
-	}
+	rq = task_rq_lock(p, &flags);
+	WARN_ON(p->state != TASK_WAKING);
+	cpu = task_cpu(p);
 
 #ifdef CONFIG_SCHEDSTATS
 	schedstat_inc(rq, ttwu_count);
@@ -2422,6 +2420,7 @@ out_running:
 #endif
 out:
 	task_rq_unlock(rq, &flags);
+	put_cpu();
 
 	return success;
 }
-- 
cgit v1.2.3


From c88d5910890ad35af283344417891344604f0438 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 10 Sep 2009 13:50:02 +0200
Subject: sched: Merge select_task_rq_fair() and sched_balance_self()

The problem with wake_idle() is that is doesn't respect things like
cpu_power, which means it doesn't deal well with SMT nor the recent
RT interaction.

To cure this, it needs to do what sched_balance_self() does, which
leads to the possibility of merging select_task_rq_fair() and
sched_balance_self().

Modify sched_balance_self() to:

  - update_shares() when walking up the domain tree,
    (it only called it for the top domain, but it should
     have done this anyway), which allows us to remove
    this ugly bit from try_to_wake_up().

  - do wake_affine() on the smallest domain that contains
    both this (the waking) and the prev (the wakee) cpu for
    WAKE invocations.

Then use the top-down balance steps it had to replace wake_idle().

This leads to the dissapearance of SD_WAKE_BALANCE and
SD_WAKE_IDLE_FAR, with SD_WAKE_IDLE replaced with SD_BALANCE_WAKE.

SD_WAKE_AFFINE needs SD_BALANCE_WAKE to be effective.

Touch all topology bits to replace the old with new SD flags --
platforms might need re-tuning, enabling SD_BALANCE_WAKE
conditionally on a NUMA distance seems like a good additional
feature, magny-core and small nehalem systems would want this
enabled, systems with slow interconnects would not.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/topology.h           |   5 +-
 arch/mips/include/asm/mach-ip27/topology.h |   2 +-
 arch/powerpc/include/asm/topology.h        |   5 +-
 arch/sh/include/asm/topology.h             |   4 +-
 arch/sparc/include/asm/topology_64.h       |   4 +-
 arch/x86/include/asm/topology.h            |   4 +-
 include/linux/sched.h                      |   7 +-
 include/linux/topology.h                   |  16 +-
 kernel/sched.c                             |  41 +----
 kernel/sched_fair.c                        | 233 ++++++++---------------------
 10 files changed, 84 insertions(+), 237 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 7b4c8c70b2d1..cf6053b226c3 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -67,6 +67,7 @@ void build_cpu_to_node_map(void);
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_WAKE	\
 				| SD_WAKE_AFFINE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
@@ -91,8 +92,8 @@ void build_cpu_to_node_map(void);
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
-				| SD_SERIALIZE		\
-				| SD_WAKE_BALANCE,	\
+				| SD_BALANCE_WAKE	\
+				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 64,			\
 	.nr_balance_failed	= 0,			\
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 07547231e078..d8332398f5be 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -48,7 +48,7 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
 	.cache_nice_tries	= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
-				| SD_WAKE_BALANCE,	\
+				| SD_BALANCE_WAKE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 054a16d68082..c6343313ff59 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -62,9 +62,8 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_NEWIDLE	\
-				| SD_WAKE_IDLE		\
-				| SD_SERIALIZE		\
-				| SD_WAKE_BALANCE,	\
+				| SD_BALANCE_WAKE	\
+				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index b69ee850906d..dc1531e2f25f 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -21,8 +21,8 @@
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
-				| SD_SERIALIZE		\
-				| SD_WAKE_BALANCE,	\
+				| SD_BALANCE_WAKE	\
+				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index e5ea8d332421..1d091abd2d13 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -57,8 +57,8 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
-				| SD_SERIALIZE		\
-				| SD_WAKE_BALANCE,	\
+				| SD_BALANCE_WAKE	\
+				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 26d06e052a18..966d58dc6274 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -145,14 +145,12 @@ extern unsigned long node_remap_size[];
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 0*SD_WAKE_IDLE			\
+				| 1*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
-				| 1*SD_WAKE_BALANCE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 1*SD_SERIALIZE			\
-				| 1*SD_WAKE_IDLE_FAR			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
 	.last_balance		= jiffies,				\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3b0ca66bd6ce..c30bf3d516d1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -803,16 +803,15 @@ enum cpu_idle_type {
 #define SD_BALANCE_NEWIDLE	0x0002	/* Balance when about to become idle */
 #define SD_BALANCE_EXEC		0x0004	/* Balance on exec */
 #define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
-#define SD_WAKE_IDLE		0x0010	/* Wake to idle CPU on task wakeup */
+#define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-#define SD_WAKE_BALANCE		0x0040	/* Perform balancing at task wakeup */
+
 #define SD_SHARE_CPUPOWER	0x0080	/* Domain members share cpu power */
 #define SD_POWERSAVINGS_BALANCE	0x0100	/* Balance for power savings */
 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
-#define SD_WAKE_IDLE_FAR	0x0800	/* Gain latency sacrificing cache hit */
+
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
-#define SD_BALANCE_WAKE		0x2000  /* Balance on wakeup */
 
 enum powersavings_balance_level {
 	POWERSAVINGS_BALANCE_NONE = 0,  /* No power saving load balance */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 85e8cf7d393c..6a8cd15555bb 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -95,14 +95,12 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 0*SD_WAKE_IDLE			\
+				| 1*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
-				| 1*SD_WAKE_BALANCE			\
 				| 1*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
-				| 0*SD_WAKE_IDLE_FAR			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
 	.last_balance		= jiffies,				\
@@ -129,13 +127,11 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 1*SD_WAKE_IDLE			\
+				| 1*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
-				| 1*SD_WAKE_BALANCE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 1*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
-				| 0*SD_WAKE_IDLE_FAR			\
 				| sd_balance_for_mc_power()		\
 				| sd_power_saving_flags()		\
 				,					\
@@ -163,13 +159,11 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 1*SD_WAKE_IDLE			\
+				| 1*SD_BALANCE_WAKE			\
 				| 0*SD_WAKE_AFFINE			\
-				| 1*SD_WAKE_BALANCE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
-				| 0*SD_WAKE_IDLE_FAR			\
 				| sd_balance_for_package_power()	\
 				| sd_power_saving_flags()		\
 				,					\
@@ -191,14 +185,12 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 0*SD_BALANCE_EXEC			\
 				| 0*SD_BALANCE_FORK			\
-				| 0*SD_WAKE_IDLE			\
+				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
-				| 0*SD_WAKE_BALANCE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 1*SD_SERIALIZE			\
-				| 1*SD_WAKE_IDLE_FAR			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
 	.last_balance		= jiffies,				\
diff --git a/kernel/sched.c b/kernel/sched.c
index fc6fda881d2e..6c819f338b11 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -512,14 +512,6 @@ struct root_domain {
 #ifdef CONFIG_SMP
 	struct cpupri cpupri;
 #endif
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-	/*
-	 * Preferred wake up cpu nominated by sched_mc balance that will be
-	 * used when most cpus are idle in the system indicating overall very
-	 * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
-	 */
-	unsigned int sched_mc_preferred_wakeup_cpu;
-#endif
 };
 
 /*
@@ -2315,22 +2307,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	if (!sched_feat(SYNC_WAKEUPS))
 		sync = 0;
 
-#ifdef CONFIG_SMP
-	if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) {
-		struct sched_domain *sd;
-
-		this_cpu = raw_smp_processor_id();
-		cpu = task_cpu(p);
-
-		for_each_domain(this_cpu, sd) {
-			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-				update_shares(sd);
-				break;
-			}
-		}
-	}
-#endif
-
 	this_cpu = get_cpu();
 
 	smp_wmb();
@@ -3533,11 +3509,6 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
 	*imbalance = sds->min_load_per_task;
 	sds->busiest = sds->group_min;
 
-	if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
-		cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
-			group_first_cpu(sds->group_leader);
-	}
-
 	return 1;
 
 }
@@ -7850,9 +7821,7 @@ static int sd_degenerate(struct sched_domain *sd)
 	}
 
 	/* Following flags don't use groups */
-	if (sd->flags & (SD_WAKE_IDLE |
-			 SD_WAKE_AFFINE |
-			 SD_WAKE_BALANCE))
+	if (sd->flags & (SD_WAKE_AFFINE))
 		return 0;
 
 	return 1;
@@ -7869,10 +7838,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 	if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
 		return 0;
 
-	/* Does parent contain flags not in child? */
-	/* WAKE_BALANCE is a subset of WAKE_AFFINE */
-	if (cflags & SD_WAKE_AFFINE)
-		pflags &= ~SD_WAKE_BALANCE;
 	/* Flags needing groups don't count if only 1 group in parent */
 	if (parent->groups == parent->groups->next) {
 		pflags &= ~(SD_LOAD_BALANCE |
@@ -8558,10 +8523,10 @@ static void set_domain_attribute(struct sched_domain *sd,
 		request = attr->relax_domain_level;
 	if (request < sd->level) {
 		/* turn off idle balance on this domain */
-		sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE);
+		sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
 	} else {
 		/* turn on idle balance on this domain */
-		sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE);
+		sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
 	}
 }
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f2eb5b934715..09d19f77eb3a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1062,83 +1062,6 @@ static void yield_task_fair(struct rq *rq)
 	se->vruntime = rightmost->vruntime + 1;
 }
 
-/*
- * wake_idle() will wake a task on an idle cpu if task->cpu is
- * not idle and an idle cpu is available.  The span of cpus to
- * search starts with cpus closest then further out as needed,
- * so we always favor a closer, idle cpu.
- * Domains may include CPUs that are not usable for migration,
- * hence we need to mask them out (rq->rd->online)
- *
- * Returns the CPU we should wake onto.
- */
-#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
-
-#define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online)
-
-static int wake_idle(int cpu, struct task_struct *p)
-{
-	struct sched_domain *sd;
-	int i;
-	unsigned int chosen_wakeup_cpu;
-	int this_cpu;
-	struct rq *task_rq = task_rq(p);
-
-	/*
-	 * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
-	 * are idle and this is not a kernel thread and this task's affinity
-	 * allows it to be moved to preferred cpu, then just move!
-	 */
-
-	this_cpu = smp_processor_id();
-	chosen_wakeup_cpu =
-		cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
-
-	if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
-		idle_cpu(cpu) && idle_cpu(this_cpu) &&
-		p->mm && !(p->flags & PF_KTHREAD) &&
-		cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
-		return chosen_wakeup_cpu;
-
-	/*
-	 * If it is idle, then it is the best cpu to run this task.
-	 *
-	 * This cpu is also the best, if it has more than one task already.
-	 * Siblings must be also busy(in most cases) as they didn't already
-	 * pickup the extra load from this cpu and hence we need not check
-	 * sibling runqueue info. This will avoid the checks and cache miss
-	 * penalities associated with that.
-	 */
-	if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1)
-		return cpu;
-
-	for_each_domain(cpu, sd) {
-		if ((sd->flags & SD_WAKE_IDLE)
-		    || ((sd->flags & SD_WAKE_IDLE_FAR)
-			&& !task_hot(p, task_rq->clock, sd))) {
-			for_each_cpu_and(i, sched_domain_span(sd),
-					 &p->cpus_allowed) {
-				if (cpu_rd_active(i, task_rq) && idle_cpu(i)) {
-					if (i != task_cpu(p)) {
-						schedstat_inc(p,
-						       se.nr_wakeups_idle);
-					}
-					return i;
-				}
-			}
-		} else {
-			break;
-		}
-	}
-	return cpu;
-}
-#else /* !ARCH_HAS_SCHED_WAKE_IDLE*/
-static inline int wake_idle(int cpu, struct task_struct *p)
-{
-	return cpu;
-}
-#endif
-
 #ifdef CONFIG_SMP
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1225,21 +1148,22 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
 
 #endif
 
-static int
-wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
-	    struct task_struct *p, int prev_cpu, int this_cpu, int sync,
-	    int idx, unsigned long load, unsigned long this_load,
-	    unsigned int imbalance)
+static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 {
-	struct task_struct *curr = this_rq->curr;
-	struct task_group *tg;
-	unsigned long tl = this_load;
+	struct task_struct *curr = current;
+	unsigned long this_load, load;
+	int idx, this_cpu, prev_cpu;
 	unsigned long tl_per_task;
+	unsigned int imbalance;
+	struct task_group *tg;
 	unsigned long weight;
 	int balanced;
 
-	if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
-		return 0;
+	idx	  = sd->wake_idx;
+	this_cpu  = smp_processor_id();
+	prev_cpu  = task_cpu(p);
+	load	  = source_load(prev_cpu, idx);
+	this_load = target_load(this_cpu, idx);
 
 	if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
 			p->se.avg_overlap > sysctl_sched_migration_cost))
@@ -1254,24 +1178,26 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
 		tg = task_group(current);
 		weight = current->se.load.weight;
 
-		tl += effective_load(tg, this_cpu, -weight, -weight);
+		this_load += effective_load(tg, this_cpu, -weight, -weight);
 		load += effective_load(tg, prev_cpu, 0, -weight);
 	}
 
 	tg = task_group(p);
 	weight = p->se.load.weight;
 
+	imbalance = 100 + (sd->imbalance_pct - 100) / 2;
+
 	/*
 	 * In low-load situations, where prev_cpu is idle and this_cpu is idle
-	 * due to the sync cause above having dropped tl to 0, we'll always have
-	 * an imbalance, but there's really nothing you can do about that, so
-	 * that's good too.
+	 * due to the sync cause above having dropped this_load to 0, we'll
+	 * always have an imbalance, but there's really nothing you can do
+	 * about that, so that's good too.
 	 *
 	 * Otherwise check if either cpus are near enough in load to allow this
 	 * task to be woken on this_cpu.
 	 */
-	balanced = !tl ||
-		100*(tl + effective_load(tg, this_cpu, weight, weight)) <=
+	balanced = !this_load ||
+		100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
 		imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
 
 	/*
@@ -1285,14 +1211,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
 	schedstat_inc(p, se.nr_wakeups_affine_attempts);
 	tl_per_task = cpu_avg_load_per_task(this_cpu);
 
-	if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <=
-			tl_per_task)) {
+	if (balanced ||
+	    (this_load <= load &&
+	     this_load + target_load(prev_cpu, idx) <= tl_per_task)) {
 		/*
 		 * This domain has SD_WAKE_AFFINE and
 		 * p is cache cold in this domain, and
 		 * there is no bad imbalance.
 		 */
-		schedstat_inc(this_sd, ttwu_move_affine);
+		schedstat_inc(sd, ttwu_move_affine);
 		schedstat_inc(p, se.nr_wakeups_affine);
 
 		return 1;
@@ -1300,72 +1227,6 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
 	return 0;
 }
 
-static int sched_balance_self(int cpu, int flag);
-
-static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
-{
-	struct sched_domain *sd, *this_sd = NULL;
-	int prev_cpu, this_cpu, new_cpu;
-	unsigned long load, this_load;
-	struct rq *this_rq;
-	unsigned int imbalance;
-	int idx;
-
-	prev_cpu	= task_cpu(p);
-	this_cpu	= smp_processor_id();
-	this_rq		= cpu_rq(this_cpu);
-	new_cpu		= prev_cpu;
-
-	if (flag != SD_BALANCE_WAKE)
-		return sched_balance_self(this_cpu, flag);
-
-	/*
-	 * 'this_sd' is the first domain that both
-	 * this_cpu and prev_cpu are present in:
-	 */
-	for_each_domain(this_cpu, sd) {
-		if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
-			this_sd = sd;
-			break;
-		}
-	}
-
-	if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
-		goto out;
-
-	/*
-	 * Check for affine wakeup and passive balancing possibilities.
-	 */
-	if (!this_sd)
-		goto out;
-
-	idx = this_sd->wake_idx;
-
-	imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
-
-	load = source_load(prev_cpu, idx);
-	this_load = target_load(this_cpu, idx);
-
-	if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
-				     load, this_load, imbalance))
-		return this_cpu;
-
-	/*
-	 * Start passive balancing when half the imbalance_pct
-	 * limit is reached.
-	 */
-	if (this_sd->flags & SD_WAKE_BALANCE) {
-		if (imbalance*this_load <= 100*load) {
-			schedstat_inc(this_sd, ttwu_move_balance);
-			schedstat_inc(p, se.nr_wakeups_passive);
-			return this_cpu;
-		}
-	}
-
-out:
-	return wake_idle(new_cpu, p);
-}
-
 /*
  * find_idlest_group finds and returns the least busy CPU group within the
  * domain.
@@ -1455,10 +1316,20 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
  *
  * preempt must be disabled.
  */
-static int sched_balance_self(int cpu, int flag)
+static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 {
 	struct task_struct *t = current;
 	struct sched_domain *tmp, *sd = NULL;
+	int cpu = smp_processor_id();
+	int prev_cpu = task_cpu(p);
+	int new_cpu = cpu;
+	int want_affine = 0;
+
+	if (flag & SD_BALANCE_WAKE) {
+		if (sched_feat(AFFINE_WAKEUPS))
+			want_affine = 1;
+		new_cpu = prev_cpu;
+	}
 
 	for_each_domain(cpu, tmp) {
 		/*
@@ -1466,16 +1337,38 @@ static int sched_balance_self(int cpu, int flag)
 		 */
 		if (tmp->flags & SD_POWERSAVINGS_BALANCE)
 			break;
-		if (tmp->flags & flag)
-			sd = tmp;
-	}
 
-	if (sd)
-		update_shares(sd);
+		switch (flag) {
+		case SD_BALANCE_WAKE:
+			if (!sched_feat(LB_WAKEUP_UPDATE))
+				break;
+		case SD_BALANCE_FORK:
+		case SD_BALANCE_EXEC:
+			if (root_task_group_empty())
+				break;
+			update_shares(tmp);
+		default:
+			break;
+		}
+
+		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
+		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
+
+			if (wake_affine(tmp, p, sync))
+				return cpu;
+
+			want_affine = 0;
+		}
+
+		if (!(tmp->flags & flag))
+			continue;
+
+		sd = tmp;
+	}
 
 	while (sd) {
 		struct sched_group *group;
-		int new_cpu, weight;
+		int weight;
 
 		if (!(sd->flags & flag)) {
 			sd = sd->child;
@@ -1508,7 +1401,7 @@ static int sched_balance_self(int cpu, int flag)
 		/* while loop will break here if sd == NULL */
 	}
 
-	return cpu;
+	return new_cpu;
 }
 #endif /* CONFIG_SMP */
 
-- 
cgit v1.2.3


From 78e7ed53c9f42f04f9401ada6f7047db60781676 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 3 Sep 2009 13:16:51 +0200
Subject: sched: Tweak wake_idx

When merging select_task_rq_fair() and sched_balance_self() we lost
the use of wake_idx, restore that and set them to 0 to make wake
balancing more aggressive.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/topology.h     |  5 +++--
 arch/powerpc/include/asm/topology.h  |  3 ++-
 arch/sh/include/asm/topology.h       |  2 +-
 arch/sparc/include/asm/topology_64.h |  2 +-
 arch/x86/include/asm/topology.h      |  2 +-
 include/linux/topology.h             |  4 ++--
 kernel/sched_fair.c                  | 21 ++++++++++++++++++---
 7 files changed, 28 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index cf6053b226c3..47f3c51d5e27 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -62,11 +62,12 @@ void build_cpu_to_node_map(void);
 	.busy_idx		= 2,			\
 	.idle_idx		= 1,			\
 	.newidle_idx		= 2,			\
-	.wake_idx		= 1,			\
+	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_FORK	\
 				| SD_BALANCE_WAKE	\
 				| SD_WAKE_AFFINE,	\
 	.last_balance		= jiffies,		\
@@ -87,7 +88,7 @@ void build_cpu_to_node_map(void);
 	.busy_idx		= 3,			\
 	.idle_idx		= 2,			\
 	.newidle_idx		= 2,			\
-	.wake_idx		= 1,			\
+	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c6343313ff59..a6b220ab56db 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -58,9 +58,10 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 	.busy_idx		= 3,			\
 	.idle_idx		= 1,			\
 	.newidle_idx		= 2,			\
-	.wake_idx		= 1,			\
+	.wake_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_FORK	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_WAKE	\
 				| SD_SERIALIZE,		\
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index dc1531e2f25f..9054e5c0ad54 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -16,7 +16,7 @@
 	.busy_idx		= 3,			\
 	.idle_idx		= 2,			\
 	.newidle_idx		= 2,			\
-	.wake_idx		= 1,			\
+	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 1d091abd2d13..bc3a0930ed64 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -52,7 +52,7 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 	.busy_idx		= 3,			\
 	.idle_idx		= 2,			\
 	.newidle_idx		= 0, 			\
-	.wake_idx		= 1,			\
+	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 966d58dc6274..4b1b335097b5 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -138,7 +138,7 @@ extern unsigned long node_remap_size[];
 	.busy_idx		= 3,					\
 	.idle_idx		= SD_IDLE_IDX,				\
 	.newidle_idx		= SD_NEWIDLE_IDX,			\
-	.wake_idx		= 1,					\
+	.wake_idx		= 0,					\
 	.forkexec_idx		= SD_FORKEXEC_IDX,			\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 6a8cd15555bb..fef57040a4e2 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -120,7 +120,7 @@ int arch_update_cpu_topology(void);
 	.imbalance_pct		= 125,					\
 	.cache_nice_tries	= 1,					\
 	.busy_idx		= 2,					\
-	.wake_idx		= 1,					\
+	.wake_idx		= 0,					\
 	.forkexec_idx		= 1,					\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
@@ -152,7 +152,7 @@ int arch_update_cpu_topology(void);
 	.busy_idx		= 2,					\
 	.idle_idx		= 1,					\
 	.newidle_idx		= 2,					\
-	.wake_idx		= 1,					\
+	.wake_idx		= 0,					\
 	.forkexec_idx		= 1,					\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 8b3eddbcf9a4..19593568031a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1232,12 +1232,27 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
  * domain.
  */
 static struct sched_group *
-find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
+find_idlest_group(struct sched_domain *sd, struct task_struct *p,
+		  int this_cpu, int flag)
 {
 	struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long min_load = ULONG_MAX, this_load = 0;
-	int load_idx = sd->forkexec_idx;
 	int imbalance = 100 + (sd->imbalance_pct-100)/2;
+	int load_idx = 0;
+
+	switch (flag) {
+	case SD_BALANCE_FORK:
+	case SD_BALANCE_EXEC:
+		load_idx = sd->forkexec_idx;
+		break;
+
+	case SD_BALANCE_WAKE:
+		load_idx = sd->wake_idx;
+		break;
+
+	default:
+		break;
+	}
 
 	do {
 		unsigned long load, avg_load;
@@ -1392,7 +1407,7 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 			continue;
 		}
 
-		group = find_idlest_group(sd, p, cpu);
+		group = find_idlest_group(sd, p, cpu, flag);
 		if (!group) {
 			sd = sd->child;
 			continue;
-- 
cgit v1.2.3


From 6bd7821f905a8d6c471f0d6675f5cb7ea448d791 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 11 Sep 2009 18:42:15 +0200
Subject: sched: Fix some domain tunings

CPU level should have WAKE_AFFINE, whereas ALLNODES is dubious.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/topology.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index fef57040a4e2..c87edcd87967 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -160,7 +160,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
 				| 1*SD_BALANCE_WAKE			\
-				| 0*SD_WAKE_AFFINE			\
+				| 1*SD_WAKE_AFFINE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
@@ -186,7 +186,7 @@ int arch_update_cpu_topology(void);
 				| 0*SD_BALANCE_EXEC			\
 				| 0*SD_BALANCE_FORK			\
 				| 0*SD_BALANCE_WAKE			\
-				| 1*SD_WAKE_AFFINE			\
+				| 0*SD_WAKE_AFFINE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
 				| 0*SD_SHARE_PKG_RESOURCES		\
-- 
cgit v1.2.3


From 0ec9fab3d186d9cbb00c0f694d4a260d07c198d9 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Tue, 15 Sep 2009 15:07:03 +0200
Subject: sched: Improve latencies and throughput

Make the idle balancer more agressive, to improve a
x264 encoding workload provided by Jason Garrett-Glaser:

 NEXT_BUDDY NO_LB_BIAS
 encoded 600 frames, 252.82 fps, 22096.60 kb/s
 encoded 600 frames, 250.69 fps, 22096.60 kb/s
 encoded 600 frames, 245.76 fps, 22096.60 kb/s

 NO_NEXT_BUDDY LB_BIAS
 encoded 600 frames, 344.44 fps, 22096.60 kb/s
 encoded 600 frames, 346.66 fps, 22096.60 kb/s
 encoded 600 frames, 352.59 fps, 22096.60 kb/s

 NO_NEXT_BUDDY NO_LB_BIAS
 encoded 600 frames, 425.75 fps, 22096.60 kb/s
 encoded 600 frames, 425.45 fps, 22096.60 kb/s
 encoded 600 frames, 422.49 fps, 22096.60 kb/s

Peter pointed out that this is better done via newidle_idx,
not via LB_BIAS, newidle balancing should look for where
there is load _now_, not where there was load 2 ticks ago.

Worst-case latencies are improved as well as no buddies
means less vruntime spread. (as per prior lkml discussions)

This change improves kbuild-peak parallelism as well.

Reported-by: Jason Garrett-Glaser <darkshikari@gmail.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1253011667.9128.16.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/topology.h    | 5 +++--
 arch/powerpc/include/asm/topology.h | 2 +-
 arch/sh/include/asm/topology.h      | 3 ++-
 arch/x86/include/asm/topology.h     | 4 +---
 include/linux/topology.h            | 2 +-
 kernel/sched_features.h             | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 47f3c51d5e27..42f1673ec83f 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -61,7 +61,7 @@ void build_cpu_to_node_map(void);
 	.cache_nice_tries	= 2,			\
 	.busy_idx		= 2,			\
 	.idle_idx		= 1,			\
-	.newidle_idx		= 2,			\
+	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
@@ -87,10 +87,11 @@ void build_cpu_to_node_map(void);
 	.cache_nice_tries	= 2,			\
 	.busy_idx		= 3,			\
 	.idle_idx		= 2,			\
-	.newidle_idx		= 2,			\
+	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_WAKE	\
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index a6b220ab56db..1a2c9eb42a03 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -57,7 +57,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 	.cache_nice_tries	= 1,			\
 	.busy_idx		= 3,			\
 	.idle_idx		= 1,			\
-	.newidle_idx		= 2,			\
+	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index 9054e5c0ad54..c8436771e31d 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -15,13 +15,14 @@
 	.cache_nice_tries	= 2,			\
 	.busy_idx		= 3,			\
 	.idle_idx		= 2,			\
-	.newidle_idx		= 2,			\
+	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_WAKE	\
+				| SD_BALANCE_NEWIDLE	\
 				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4b1b335097b5..7fafd1bc4149 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -116,14 +116,12 @@ extern unsigned long node_remap_size[];
 
 # define SD_CACHE_NICE_TRIES	1
 # define SD_IDLE_IDX		1
-# define SD_NEWIDLE_IDX		2
 # define SD_FORKEXEC_IDX	0
 
 #else
 
 # define SD_CACHE_NICE_TRIES	2
 # define SD_IDLE_IDX		2
-# define SD_NEWIDLE_IDX		2
 # define SD_FORKEXEC_IDX	1
 
 #endif
@@ -137,7 +135,7 @@ extern unsigned long node_remap_size[];
 	.cache_nice_tries	= SD_CACHE_NICE_TRIES,			\
 	.busy_idx		= 3,					\
 	.idle_idx		= SD_IDLE_IDX,				\
-	.newidle_idx		= SD_NEWIDLE_IDX,			\
+	.newidle_idx		= 0,					\
 	.wake_idx		= 0,					\
 	.forkexec_idx		= SD_FORKEXEC_IDX,			\
 									\
diff --git a/include/linux/topology.h b/include/linux/topology.h
index c87edcd87967..4298745615a5 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -151,7 +151,7 @@ int arch_update_cpu_topology(void);
 	.cache_nice_tries	= 1,					\
 	.busy_idx		= 2,					\
 	.idle_idx		= 1,					\
-	.newidle_idx		= 2,					\
+	.newidle_idx		= 0,					\
 	.wake_idx		= 0,					\
 	.forkexec_idx		= 1,					\
 									\
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 891ea0f72b46..e98c2e8de1d5 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -67,7 +67,7 @@ SCHED_FEAT(AFFINE_WAKEUPS, 1)
  * wakeup-preemption), since its likely going to consume data we
  * touched, increases cache locality.
  */
-SCHED_FEAT(NEXT_BUDDY, 1)
+SCHED_FEAT(NEXT_BUDDY, 0)
 
 /*
  * Prefer to schedule the task that ran last (when we did
-- 
cgit v1.2.3


From b8a543ea5a5896830a9969bacfd047f9d15940b2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 15 Sep 2009 15:22:03 +0200
Subject: sched: Reduce forkexec_idx

If we're looking to place a new task, we might as well find the
idlest position _now_, not 1 tick ago.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/topology.h     | 4 ++--
 arch/sh/include/asm/topology.h       | 2 +-
 arch/sparc/include/asm/topology_64.h | 2 +-
 arch/x86/include/asm/topology.h      | 4 +---
 include/linux/topology.h             | 4 ++--
 5 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 42f1673ec83f..569b9dafc78c 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -63,7 +63,7 @@ void build_cpu_to_node_map(void);
 	.idle_idx		= 1,			\
 	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
-	.forkexec_idx		= 1,			\
+	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
@@ -89,7 +89,7 @@ void build_cpu_to_node_map(void);
 	.idle_idx		= 2,			\
 	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
-	.forkexec_idx		= 1,			\
+	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index c8436771e31d..a8cc564b703d 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -17,7 +17,7 @@
 	.idle_idx		= 2,			\
 	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
-	.forkexec_idx		= 1,			\
+	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index bc3a0930ed64..10b979d1de20 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -53,7 +53,7 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 	.idle_idx		= 2,			\
 	.newidle_idx		= 0, 			\
 	.wake_idx		= 0,			\
-	.forkexec_idx		= 1,			\
+	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 7fafd1bc4149..589f12383d78 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -116,13 +116,11 @@ extern unsigned long node_remap_size[];
 
 # define SD_CACHE_NICE_TRIES	1
 # define SD_IDLE_IDX		1
-# define SD_FORKEXEC_IDX	0
 
 #else
 
 # define SD_CACHE_NICE_TRIES	2
 # define SD_IDLE_IDX		2
-# define SD_FORKEXEC_IDX	1
 
 #endif
 
@@ -137,7 +135,7 @@ extern unsigned long node_remap_size[];
 	.idle_idx		= SD_IDLE_IDX,				\
 	.newidle_idx		= 0,					\
 	.wake_idx		= 0,					\
-	.forkexec_idx		= SD_FORKEXEC_IDX,			\
+	.forkexec_idx		= 0,					\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
 				| 1*SD_BALANCE_NEWIDLE			\
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 4298745615a5..936ab2b37683 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -121,7 +121,7 @@ int arch_update_cpu_topology(void);
 	.cache_nice_tries	= 1,					\
 	.busy_idx		= 2,					\
 	.wake_idx		= 0,					\
-	.forkexec_idx		= 1,					\
+	.forkexec_idx		= 0,					\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
 				| 1*SD_BALANCE_NEWIDLE			\
@@ -153,7 +153,7 @@ int arch_update_cpu_topology(void);
 	.idle_idx		= 1,					\
 	.newidle_idx		= 0,					\
 	.wake_idx		= 0,					\
-	.forkexec_idx		= 1,					\
+	.forkexec_idx		= 0,					\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
 				| 1*SD_BALANCE_NEWIDLE			\
-- 
cgit v1.2.3


From 47fe38fcff0517e67d395c039d2e26d2de688a60 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 2 Sep 2009 13:49:18 +0200
Subject: x86: sched: Provide arch implementations using aperf/mperf

APERF/MPERF support for cpu_power.

APERF/MPERF is arch defined to be a relative scale of work capacity
per logical cpu, this is assumed to include SMT and Turbo mode.

APERF/MPERF are specified to both reset to 0 when either counter
wraps, which is highly inconvenient, since that'll give a blimp
when that happens. The manual specifies writing 0 to the counters
after each read, but that's 1) too expensive, and 2) destroys the
possibility of sharing these counters with other users, so we live
with the blimp - the other existing user does too.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/Makefile |  2 +-
 arch/x86/kernel/cpu/sched.c  | 55 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sched.h        |  4 ++++
 3 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/sched.c

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index c1f253dac155..8dd30638fe44 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -13,7 +13,7 @@ CFLAGS_common.o		:= $(nostackp)
 
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
 obj-y			+= proc.o capflags.o powerflags.o common.o
-obj-y			+= vmware.o hypervisor.o
+obj-y			+= vmware.o hypervisor.o sched.o
 
 obj-$(CONFIG_X86_32)	+= bugs.o cmpxchg.o
 obj-$(CONFIG_X86_64)	+= bugs_64.o
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c
new file mode 100644
index 000000000000..6c00a8f3cce5
--- /dev/null
+++ b/arch/x86/kernel/cpu/sched.c
@@ -0,0 +1,55 @@
+#include <linux/sched.h>
+#include <linux/math64.h>
+#include <linux/percpu.h>
+#include <linux/irqflags.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+#ifdef CONFIG_SMP
+
+static DEFINE_PER_CPU(struct aperfmperf, old_perf);
+
+static unsigned long scale_aperfmperf(void)
+{
+	struct aperfmperf val, *old = &__get_cpu_var(old_perf);
+	unsigned long ratio, flags;
+
+	local_irq_save(flags);
+	get_aperfmperf(&val);
+	local_irq_restore(flags);
+
+	ratio = calc_aperfmperf_ratio(old, &val);
+	*old = val;
+
+	return ratio;
+}
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+	/*
+	 * do aperf/mperf on the cpu level because it includes things
+	 * like turbo mode, which are relevant to full cores.
+	 */
+	if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+		return scale_aperfmperf();
+
+	/*
+	 * maybe have something cpufreq here
+	 */
+
+	return default_scale_freq_power(sd, cpu);
+}
+
+unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu)
+{
+	/*
+	 * aperf/mperf already includes the smt gain
+	 */
+	if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+		return SCHED_LOAD_SCALE;
+
+	return default_scale_smt_power(sd, cpu);
+}
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c30bf3d516d1..fc4c0f9393d2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -992,6 +992,9 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag)
 	return 0;
 }
 
+unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu);
+unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu);
+
 #else /* CONFIG_SMP */
 
 struct sched_domain_attr;
@@ -1003,6 +1006,7 @@ partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 }
 #endif	/* !CONFIG_SMP */
 
+
 struct io_context;			/* See blkdev.h */
 
 
-- 
cgit v1.2.3


From 0763a660a84220cc3900fd32abdd7ad109e2278d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 14 Sep 2009 19:37:39 +0200
Subject: sched: Rename select_task_rq() argument

In order to be able to rename the sync argument, we need to rename
the current flag argument.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  2 +-
 kernel/sched_fair.c     | 14 +++++++-------
 kernel/sched_idletask.c |  2 +-
 kernel/sched_rt.c       |  4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fc4c0f9393d2..5c116f03d74c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1037,7 +1037,7 @@ struct sched_class {
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
-	int  (*select_task_rq)(struct task_struct *p, int flag, int sync);
+	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int sync);
 
 	unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
 			struct rq *busiest, unsigned long max_load_move,
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 19593568031a..b554e63c521a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1331,7 +1331,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
  *
  * preempt must be disabled.
  */
-static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
+static int select_task_rq_fair(struct task_struct *p, int sd_flag, int sync)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	int cpu = smp_processor_id();
@@ -1339,7 +1339,7 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 	int new_cpu = cpu;
 	int want_affine = 0;
 
-	if (flag & SD_BALANCE_WAKE) {
+	if (sd_flag & SD_BALANCE_WAKE) {
 		if (sched_feat(AFFINE_WAKEUPS))
 			want_affine = 1;
 		new_cpu = prev_cpu;
@@ -1368,7 +1368,7 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 				break;
 		}
 
-		switch (flag) {
+		switch (sd_flag) {
 		case SD_BALANCE_WAKE:
 			if (!sched_feat(LB_WAKEUP_UPDATE))
 				break;
@@ -1392,7 +1392,7 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 			want_affine = 0;
 		}
 
-		if (!(tmp->flags & flag))
+		if (!(tmp->flags & sd_flag))
 			continue;
 
 		sd = tmp;
@@ -1402,12 +1402,12 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 		struct sched_group *group;
 		int weight;
 
-		if (!(sd->flags & flag)) {
+		if (!(sd->flags & sd_flag)) {
 			sd = sd->child;
 			continue;
 		}
 
-		group = find_idlest_group(sd, p, cpu, flag);
+		group = find_idlest_group(sd, p, cpu, sd_flag);
 		if (!group) {
 			sd = sd->child;
 			continue;
@@ -1427,7 +1427,7 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 		for_each_domain(cpu, tmp) {
 			if (weight <= cpumask_weight(sched_domain_span(tmp)))
 				break;
-			if (tmp->flags & flag)
+			if (tmp->flags & sd_flag)
 				sd = tmp;
 		}
 		/* while loop will break here if sd == NULL */
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 99b2f0337609..9ff7697e5dc4 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -6,7 +6,7 @@
  */
 
 #ifdef CONFIG_SMP
-static int select_task_rq_idle(struct task_struct *p, int flag, int sync)
+static int select_task_rq_idle(struct task_struct *p, int sd_flag, int sync)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 438380810ac4..97c53f3f51a7 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -938,11 +938,11 @@ static void yield_task_rt(struct rq *rq)
 #ifdef CONFIG_SMP
 static int find_lowest_rq(struct task_struct *task);
 
-static int select_task_rq_rt(struct task_struct *p, int flag, int sync)
+static int select_task_rq_rt(struct task_struct *p, int sd_flag, int sync)
 {
 	struct rq *rq = task_rq(p);
 
-	if (flag != SD_BALANCE_WAKE)
+	if (sd_flag != SD_BALANCE_WAKE)
 		return smp_processor_id();
 
 	/*
-- 
cgit v1.2.3


From 7d47872146398dbede13223299fe1cb368ebc781 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 14 Sep 2009 19:55:44 +0200
Subject: sched: Rename sync arguments

In order to extend the functions to have more than 1 flag (sync),
rename the argument to flags, and explicitly define a WF_ space for
individual flags.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  9 +++++++--
 include/linux/wait.h    |  4 ++--
 kernel/sched.c          | 30 ++++++++++++++++--------------
 kernel/sched_fair.c     |  6 ++++--
 kernel/sched_idletask.c |  4 ++--
 kernel/sched_rt.c       |  4 ++--
 6 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c116f03d74c..3b07168b6f03 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1024,6 +1024,11 @@ struct uts_namespace;
 struct rq;
 struct sched_domain;
 
+/*
+ * wake flags
+ */
+#define WF_SYNC		0x01		/* waker goes to sleep after wakup */
+
 struct sched_class {
 	const struct sched_class *next;
 
@@ -1031,13 +1036,13 @@ struct sched_class {
 	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
 	void (*yield_task) (struct rq *rq);
 
-	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync);
+	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
 
 	struct task_struct * (*pick_next_task) (struct rq *rq);
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
-	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int sync);
+	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
 
 	unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
 			struct rq *busiest, unsigned long max_load_move,
diff --git a/include/linux/wait.h b/include/linux/wait.h
index cf3c2f5dba51..a48e16b77d5e 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -26,8 +26,8 @@
 #include <asm/current.h>
 
 typedef struct __wait_queue wait_queue_t;
-typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int sync, void *key);
-int default_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
+int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
 
 struct __wait_queue {
 	unsigned int flags;
diff --git a/kernel/sched.c b/kernel/sched.c
index e8e603bf8761..4da335cec8ee 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -636,9 +636,10 @@ struct rq {
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
-static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync)
+static inline
+void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 {
-	rq->curr->sched_class->check_preempt_curr(rq, p, sync);
+	rq->curr->sched_class->check_preempt_curr(rq, p, flags);
 }
 
 static inline int cpu_of(struct rq *rq)
@@ -2318,14 +2319,15 @@ void task_oncpu_function_call(struct task_struct *p,
  *
  * returns failure only if the task is already active.
  */
-static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
+static int try_to_wake_up(struct task_struct *p, unsigned int state,
+			  int wake_flags)
 {
 	int cpu, orig_cpu, this_cpu, success = 0;
 	unsigned long flags;
 	struct rq *rq;
 
 	if (!sched_feat(SYNC_WAKEUPS))
-		sync = 0;
+		wake_flags &= ~WF_SYNC;
 
 	this_cpu = get_cpu();
 
@@ -2352,7 +2354,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	p->state = TASK_WAKING;
 	task_rq_unlock(rq, &flags);
 
-	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, sync);
+	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
 	if (cpu != orig_cpu)
 		set_task_cpu(p, cpu);
 
@@ -2378,7 +2380,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 out_activate:
 #endif /* CONFIG_SMP */
 	schedstat_inc(p, se.nr_wakeups);
-	if (sync)
+	if (wake_flags & WF_SYNC)
 		schedstat_inc(p, se.nr_wakeups_sync);
 	if (orig_cpu != cpu)
 		schedstat_inc(p, se.nr_wakeups_migrate);
@@ -2407,7 +2409,7 @@ out_activate:
 
 out_running:
 	trace_sched_wakeup(rq, p, success);
-	check_preempt_curr(rq, p, sync);
+	check_preempt_curr(rq, p, wake_flags);
 
 	p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
@@ -5562,10 +5564,10 @@ asmlinkage void __sched preempt_schedule_irq(void)
 
 #endif /* CONFIG_PREEMPT */
 
-int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
+int default_wake_function(wait_queue_t *curr, unsigned mode, int flags,
 			  void *key)
 {
-	return try_to_wake_up(curr->private, mode, sync);
+	return try_to_wake_up(curr->private, mode, flags);
 }
 EXPORT_SYMBOL(default_wake_function);
 
@@ -5579,14 +5581,14 @@ EXPORT_SYMBOL(default_wake_function);
  * zero in this (rare) case, and we handle it by continuing to scan the queue.
  */
 static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
-			int nr_exclusive, int sync, void *key)
+			int nr_exclusive, int flags, void *key)
 {
 	wait_queue_t *curr, *next;
 
 	list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
 		unsigned flags = curr->flags;
 
-		if (curr->func(curr, mode, sync, key) &&
+		if (curr->func(curr, mode, flags, key) &&
 				(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
 			break;
 	}
@@ -5647,16 +5649,16 @@ void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
 			int nr_exclusive, void *key)
 {
 	unsigned long flags;
-	int sync = 1;
+	int wake_flags = WF_SYNC;
 
 	if (unlikely(!q))
 		return;
 
 	if (unlikely(!nr_exclusive))
-		sync = 0;
+		wake_flags = 0;
 
 	spin_lock_irqsave(&q->lock, flags);
-	__wake_up_common(q, mode, nr_exclusive, sync, key);
+	__wake_up_common(q, mode, nr_exclusive, wake_flags, key);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync_key);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index b554e63c521a..007958e3c93a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1331,13 +1331,14 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
  *
  * preempt must be disabled.
  */
-static int select_task_rq_fair(struct task_struct *p, int sd_flag, int sync)
+static int select_task_rq_fair(struct task_struct *p, int sd_flag, int flags)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	int cpu = smp_processor_id();
 	int prev_cpu = task_cpu(p);
 	int new_cpu = cpu;
 	int want_affine = 0;
+	int sync = flags & WF_SYNC;
 
 	if (sd_flag & SD_BALANCE_WAKE) {
 		if (sched_feat(AFFINE_WAKEUPS))
@@ -1548,11 +1549,12 @@ static void set_next_buddy(struct sched_entity *se)
 /*
  * Preempt the current task with a newly woken task if needed:
  */
-static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
+static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct task_struct *curr = rq->curr;
 	struct sched_entity *se = &curr->se, *pse = &p->se;
 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+	int sync = flags & WF_SYNC;
 
 	update_curr(cfs_rq);
 
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 9ff7697e5dc4..a8b448af004b 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -6,7 +6,7 @@
  */
 
 #ifdef CONFIG_SMP
-static int select_task_rq_idle(struct task_struct *p, int sd_flag, int sync)
+static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sd_flag, int sync)
 /*
  * Idle tasks are unconditionally rescheduled:
  */
-static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync)
+static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
 {
 	resched_task(rq->idle);
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 97c53f3f51a7..13de7126a6ab 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -938,7 +938,7 @@ static void yield_task_rt(struct rq *rq)
 #ifdef CONFIG_SMP
 static int find_lowest_rq(struct task_struct *task);
 
-static int select_task_rq_rt(struct task_struct *p, int sd_flag, int sync)
+static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 {
 	struct rq *rq = task_rq(p);
 
@@ -1002,7 +1002,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 /*
  * Preempt the current task with a newly woken task if needed:
  */
-static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync)
+static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
 {
 	if (p->prio < rq->curr->prio) {
 		resched_task(rq->curr);
-- 
cgit v1.2.3


From a7558e01056f5191ff2ecff53b075dcb9e484188 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 14 Sep 2009 20:02:34 +0200
Subject: sched: Add WF_FORK

Avoid the cache buddies from biasing the time distribution away
from fork()ers. Normally the next buddy will be the preferred
scheduling target, but this makes fork()s prefer to run the new
child, whereas we prefer to run the parent, since that will
generate more work.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 +
 kernel/sched.c        | 2 +-
 kernel/sched_fair.c   | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3b07168b6f03..ee1f88993097 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1028,6 +1028,7 @@ struct sched_domain;
  * wake flags
  */
 #define WF_SYNC		0x01		/* waker goes to sleep after wakup */
+#define WF_FORK		0x02		/* child wakeup after fork */
 
 struct sched_class {
 	const struct sched_class *next;
diff --git a/kernel/sched.c b/kernel/sched.c
index 4da335cec8ee..0d4c4fea3317 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2602,7 +2602,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 		inc_nr_running(rq);
 	}
 	trace_sched_wakeup_new(rq, p, 1);
-	check_preempt_curr(rq, p, 0);
+	check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
 	if (p->sched_class->task_wake_up)
 		p->sched_class->task_wake_up(rq, p);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 007958e3c93a..6766959c7f44 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1580,7 +1580,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int flags
 	 */
 	if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
 		set_last_buddy(se);
-	if (sched_feat(NEXT_BUDDY))
+	if (sched_feat(NEXT_BUDDY) && !(flags & WF_FORK))
 		set_next_buddy(pse);
 
 	/*
-- 
cgit v1.2.3


From 2f82af08fcc7dc01a7e98a49a5995a77e32a2925 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Mon, 14 Sep 2009 03:25:28 -0400
Subject: Nicolas Pitre has a new email address

Due to problems at cam.org, my nico@cam.org email address is no longer
valid.  FRom now on, nico@fluxnic.net should be used instead.

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 CREDITS                                        | 2 +-
 Documentation/arm/SA1100/ADSBitsy              | 2 +-
 Documentation/arm/SA1100/Assabet               | 2 +-
 Documentation/arm/SA1100/Brutus                | 2 +-
 Documentation/arm/SA1100/GraphicsClient        | 4 ++--
 Documentation/arm/SA1100/GraphicsMaster        | 4 ++--
 Documentation/arm/SA1100/Victor                | 2 +-
 MAINTAINERS                                    | 4 ++--
 arch/arm/boot/compressed/head-sa1100.S         | 2 +-
 arch/arm/lib/lib1funcs.S                       | 2 +-
 arch/arm/lib/sha1.S                            | 2 +-
 arch/arm/mach-sa1100/include/mach/assabet.h    | 2 +-
 arch/arm/mach-sa1100/include/mach/hardware.h   | 2 +-
 arch/arm/mach-sa1100/include/mach/memory.h     | 2 +-
 arch/arm/mach-sa1100/include/mach/neponset.h   | 2 +-
 arch/arm/mach-sa1100/include/mach/system.h     | 2 +-
 arch/arm/mach-sa1100/include/mach/uncompress.h | 2 +-
 arch/arm/mach-sa1100/pm.c                      | 2 +-
 arch/arm/mach-sa1100/time.c                    | 2 +-
 arch/arm/mm/proc-xscale.S                      | 2 +-
 arch/arm/plat-iop/setup.c                      | 2 +-
 arch/arm/plat-omap/include/mach/system.h       | 2 +-
 drivers/input/keyboard/pxa27x_keypad.c         | 2 +-
 drivers/mtd/chips/cfi_cmdset_0001.c            | 2 +-
 drivers/mtd/chips/cfi_cmdset_0020.c            | 2 +-
 drivers/mtd/maps/bfin-async-flash.c            | 2 +-
 drivers/mtd/maps/ceiva.c                       | 2 +-
 drivers/mtd/maps/dc21285.c                     | 4 ++--
 drivers/mtd/maps/ipaq-flash.c                  | 2 +-
 drivers/mtd/maps/pxa2xx-flash.c                | 2 +-
 drivers/mtd/maps/sa1100-flash.c                | 2 +-
 drivers/mtd/mtdblock.c                         | 4 ++--
 drivers/mtd/mtdpart.c                          | 2 +-
 drivers/net/smc91x.c                           | 4 ++--
 drivers/net/smc91x.h                           | 2 +-
 drivers/rtc/rtc-sa1100.c                       | 2 +-
 drivers/video/sa1100fb.c                       | 2 +-
 include/linux/mtd/partitions.h                 | 2 +-
 lib/inflate.c                                  | 2 +-
 39 files changed, 45 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/CREDITS b/CREDITS
index 1a41bf4addd0..72b487869788 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2800,7 +2800,7 @@ D: Starter of Linux1394 effort
 S: ask per mail for current address
 
 N: Nicolas Pitre
-E: nico@cam.org
+E: nico@fluxnic.net
 D: StrongARM SA1100 support integrator & hacker
 D: Xscale PXA architecture
 D: unified SMC 91C9x/91C11x ethernet driver (smc91x)
diff --git a/Documentation/arm/SA1100/ADSBitsy b/Documentation/arm/SA1100/ADSBitsy
index ab47c3833908..7197a9e958ee 100644
--- a/Documentation/arm/SA1100/ADSBitsy
+++ b/Documentation/arm/SA1100/ADSBitsy
@@ -40,4 +40,4 @@ Notes:
   mode, the timing is off so the image is corrupted.  This will be
   fixed soon.
 
-Any contribution can be sent to nico@cam.org and will be greatly welcome!
+Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
diff --git a/Documentation/arm/SA1100/Assabet b/Documentation/arm/SA1100/Assabet
index 78bc1c1b04e5..91f7ce7ba426 100644
--- a/Documentation/arm/SA1100/Assabet
+++ b/Documentation/arm/SA1100/Assabet
@@ -240,7 +240,7 @@ Then, rebooting the Assabet is just a matter of waiting for the login prompt.
 
 
 Nicolas Pitre
-nico@cam.org
+nico@fluxnic.net
 June 12, 2001
 
 
diff --git a/Documentation/arm/SA1100/Brutus b/Documentation/arm/SA1100/Brutus
index 2254c8f0b326..b1cfd405dccc 100644
--- a/Documentation/arm/SA1100/Brutus
+++ b/Documentation/arm/SA1100/Brutus
@@ -60,7 +60,7 @@ little modifications.
 
 Any contribution is welcome.
 
-Please send patches to nico@cam.org
+Please send patches to nico@fluxnic.net
 
 Have Fun !
 
diff --git a/Documentation/arm/SA1100/GraphicsClient b/Documentation/arm/SA1100/GraphicsClient
index 8fa7e8027ff1..6c9c4f5a36e1 100644
--- a/Documentation/arm/SA1100/GraphicsClient
+++ b/Documentation/arm/SA1100/GraphicsClient
@@ -4,7 +4,7 @@ For more details, contact Applied Data Systems or see
 http://www.applieddata.net/products.html
 
 The original Linux support for this product has been provided by 
-Nicolas Pitre <nico@cam.org>. Continued development work by
+Nicolas Pitre <nico@fluxnic.net>. Continued development work by
 Woojung Huh <whuh@applieddata.net>
 
 It's currently possible to mount a root filesystem via NFS providing a
@@ -94,5 +94,5 @@ Notes:
   mode, the timing is off so the image is corrupted.  This will be
   fixed soon.
 
-Any contribution can be sent to nico@cam.org and will be greatly welcome!
+Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
 
diff --git a/Documentation/arm/SA1100/GraphicsMaster b/Documentation/arm/SA1100/GraphicsMaster
index dd28745ac521..ee7c6595f23f 100644
--- a/Documentation/arm/SA1100/GraphicsMaster
+++ b/Documentation/arm/SA1100/GraphicsMaster
@@ -4,7 +4,7 @@ For more details, contact Applied Data Systems or see
 http://www.applieddata.net/products.html
 
 The original Linux support for this product has been provided by
-Nicolas Pitre <nico@cam.org>. Continued development work by
+Nicolas Pitre <nico@fluxnic.net>. Continued development work by
 Woojung Huh <whuh@applieddata.net>
 
 Use 'make graphicsmaster_config' before any 'make config'.
@@ -50,4 +50,4 @@ Notes:
   mode, the timing is off so the image is corrupted.  This will be
   fixed soon.
 
-Any contribution can be sent to nico@cam.org and will be greatly welcome!
+Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
diff --git a/Documentation/arm/SA1100/Victor b/Documentation/arm/SA1100/Victor
index 01e81fc49461..f938a29fdc20 100644
--- a/Documentation/arm/SA1100/Victor
+++ b/Documentation/arm/SA1100/Victor
@@ -9,7 +9,7 @@ Of course Victor is using Linux as its main operating system.
 The Victor implementation for Linux is maintained by Nicolas Pitre:
 
 	nico@visuaide.com
-	nico@cam.org
+	nico@fluxnic.net
 
 For any comments, please feel free to contact me through the above
 addresses.
diff --git a/MAINTAINERS b/MAINTAINERS
index 837b5985ac40..64b9e447545c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3317,7 +3317,7 @@ S:	Supported
 F:	drivers/net/wireless/mwl8k.c
 
 MARVELL SOC MMC/SD/SDIO CONTROLLER DRIVER
-M:	Nicolas Pitre <nico@cam.org>
+M:	Nicolas Pitre <nico@fluxnic.net>
 S:	Maintained
 
 MARVELL YUKON / SYSKONNECT DRIVER
@@ -4689,7 +4689,7 @@ F:	include/linux/sl?b*.h
 F:	mm/sl?b.c
 
 SMC91x ETHERNET DRIVER
-M:	Nicolas Pitre <nico@cam.org>
+M:	Nicolas Pitre <nico@fluxnic.net>
 S:	Maintained
 F:	drivers/net/smc91x.*
 
diff --git a/arch/arm/boot/compressed/head-sa1100.S b/arch/arm/boot/compressed/head-sa1100.S
index 4c8c0e46027d..6179d94dd5c6 100644
--- a/arch/arm/boot/compressed/head-sa1100.S
+++ b/arch/arm/boot/compressed/head-sa1100.S
@@ -1,7 +1,7 @@
 /* 
  * linux/arch/arm/boot/compressed/head-sa1100.S
  * 
- * Copyright (C) 1999 Nicolas Pitre <nico@cam.org>
+ * Copyright (C) 1999 Nicolas Pitre <nico@fluxnic.net>
  * 
  * SA1100 specific tweaks.  This is merged into head.S by the linker.
  *
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index 67964bcfc854..6dc06487f3c3 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -1,7 +1,7 @@
 /*
  * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
  *
- * Author: Nicolas Pitre <nico@cam.org>
+ * Author: Nicolas Pitre <nico@fluxnic.net>
  *   - contributed to gcc-3.4 on Sep 30, 2003
  *   - adapted for the Linux kernel on Oct 2, 2003
  */
diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S
index 09b548cac1a4..eb0edb80d7b8 100644
--- a/arch/arm/lib/sha1.S
+++ b/arch/arm/lib/sha1.S
@@ -3,7 +3,7 @@
  *
  *  SHA transform optimized for ARM
  *
- *  Copyright:	(C) 2005 by Nicolas Pitre <nico@cam.org>
+ *  Copyright:	(C) 2005 by Nicolas Pitre <nico@fluxnic.net>
  *  Created:	September 17, 2005
  *
  *  This program is free software; you can redistribute it and/or modify
diff --git a/arch/arm/mach-sa1100/include/mach/assabet.h b/arch/arm/mach-sa1100/include/mach/assabet.h
index 3959b20d5d1c..28c2cf50c259 100644
--- a/arch/arm/mach-sa1100/include/mach/assabet.h
+++ b/arch/arm/mach-sa1100/include/mach/assabet.h
@@ -1,7 +1,7 @@
 /*
  * arch/arm/mach-sa1100/include/mach/assabet.h
  *
- * Created 2000/06/05 by Nicolas Pitre <nico@cam.org>
+ * Created 2000/06/05 by Nicolas Pitre <nico@fluxnic.net>
  *
  * This file contains the hardware specific definitions for Assabet
  * Only include this file from SA1100-specific files.
diff --git a/arch/arm/mach-sa1100/include/mach/hardware.h b/arch/arm/mach-sa1100/include/mach/hardware.h
index 60711822b125..99f5856d8de4 100644
--- a/arch/arm/mach-sa1100/include/mach/hardware.h
+++ b/arch/arm/mach-sa1100/include/mach/hardware.h
@@ -1,7 +1,7 @@
 /*
  * arch/arm/mach-sa1100/include/mach/hardware.h
  *
- * Copyright (C) 1998 Nicolas Pitre <nico@cam.org>
+ * Copyright (C) 1998 Nicolas Pitre <nico@fluxnic.net>
  *
  * This file contains the hardware definitions for SA1100 architecture
  *
diff --git a/arch/arm/mach-sa1100/include/mach/memory.h b/arch/arm/mach-sa1100/include/mach/memory.h
index e9f8eed900f5..d5277f9bee77 100644
--- a/arch/arm/mach-sa1100/include/mach/memory.h
+++ b/arch/arm/mach-sa1100/include/mach/memory.h
@@ -1,7 +1,7 @@
 /*
  * arch/arm/mach-sa1100/include/mach/memory.h
  *
- * Copyright (C) 1999-2000 Nicolas Pitre <nico@cam.org>
+ * Copyright (C) 1999-2000 Nicolas Pitre <nico@fluxnic.net>
  */
 
 #ifndef __ASM_ARCH_MEMORY_H
diff --git a/arch/arm/mach-sa1100/include/mach/neponset.h b/arch/arm/mach-sa1100/include/mach/neponset.h
index d3f044f92c00..ffe2bc45eed0 100644
--- a/arch/arm/mach-sa1100/include/mach/neponset.h
+++ b/arch/arm/mach-sa1100/include/mach/neponset.h
@@ -1,7 +1,7 @@
 /*
  * arch/arm/mach-sa1100/include/mach/neponset.h
  *
- * Created 2000/06/05 by Nicolas Pitre <nico@cam.org>
+ * Created 2000/06/05 by Nicolas Pitre <nico@fluxnic.net>
  *
  * This file contains the hardware specific definitions for Assabet
  * Only include this file from SA1100-specific files.
diff --git a/arch/arm/mach-sa1100/include/mach/system.h b/arch/arm/mach-sa1100/include/mach/system.h
index 942b153e251d..ba9da9f7f183 100644
--- a/arch/arm/mach-sa1100/include/mach/system.h
+++ b/arch/arm/mach-sa1100/include/mach/system.h
@@ -1,7 +1,7 @@
 /*
  * arch/arm/mach-sa1100/include/mach/system.h
  *
- * Copyright (c) 1999 Nicolas Pitre <nico@cam.org>
+ * Copyright (c) 1999 Nicolas Pitre <nico@fluxnic.net>
  */
 #include <mach/hardware.h>
 
diff --git a/arch/arm/mach-sa1100/include/mach/uncompress.h b/arch/arm/mach-sa1100/include/mach/uncompress.h
index 714160b03d7a..6cb39ddde656 100644
--- a/arch/arm/mach-sa1100/include/mach/uncompress.h
+++ b/arch/arm/mach-sa1100/include/mach/uncompress.h
@@ -1,7 +1,7 @@
 /*
  * arch/arm/mach-sa1100/include/mach/uncompress.h
  *
- * (C) 1999 Nicolas Pitre <nico@cam.org>
+ * (C) 1999 Nicolas Pitre <nico@fluxnic.net>
  *
  * Reorganised to be machine independent.
  */
diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c
index 111cce67ad2f..c83fdc80edfd 100644
--- a/arch/arm/mach-sa1100/pm.c
+++ b/arch/arm/mach-sa1100/pm.c
@@ -15,7 +15,7 @@
  * 			Save more value for the resume function! Support
  * 			Bitsy/Assabet/Freebird board
  *
- * 2001-08-29:	Nicolas Pitre <nico@cam.org>
+ * 2001-08-29:	Nicolas Pitre <nico@fluxnic.net>
  * 			Cleaned up, pushed platform dependent stuff
  * 			in the platform specific files.
  *
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 711c0295c66f..95d92e8e56a8 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -4,7 +4,7 @@
  * Copyright (C) 1998 Deborah Wallach.
  * Twiddles  (C) 1999 Hugo Fiennes <hugo@empeg.com>
  *
- * 2000/03/29 (C) Nicolas Pitre <nico@cam.org>
+ * 2000/03/29 (C) Nicolas Pitre <nico@fluxnic.net>
  *	Rewritten: big cleanup, much simpler, better HZ accuracy.
  *
  */
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 0cce37b93937..423394260bcb 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -17,7 +17,7 @@
  *
  * 2001 Sep 08:
  *	Completely revisited, many important fixes
- *	Nicolas Pitre <nico@cam.org>
+ *	Nicolas Pitre <nico@fluxnic.net>
  */
 
 #include <linux/linkage.h>
diff --git a/arch/arm/plat-iop/setup.c b/arch/arm/plat-iop/setup.c
index 9e573e78176a..bade586fed0f 100644
--- a/arch/arm/plat-iop/setup.c
+++ b/arch/arm/plat-iop/setup.c
@@ -1,7 +1,7 @@
 /*
  * arch/arm/plat-iop/setup.c
  *
- * Author: Nicolas Pitre <nico@cam.org>
+ * Author: Nicolas Pitre <nico@fluxnic.net>
  * Copyright (C) 2001 MontaVista Software, Inc.
  * Copyright (C) 2004 Intel Corporation.
  *
diff --git a/arch/arm/plat-omap/include/mach/system.h b/arch/arm/plat-omap/include/mach/system.h
index 1060e345423b..ed8ec7477261 100644
--- a/arch/arm/plat-omap/include/mach/system.h
+++ b/arch/arm/plat-omap/include/mach/system.h
@@ -1,6 +1,6 @@
 /*
  * Copied from arch/arm/mach-sa1100/include/mach/system.h
- * Copyright (c) 1999 Nicolas Pitre <nico@cam.org>
+ * Copyright (c) 1999 Nicolas Pitre <nico@fluxnic.net>
  */
 #ifndef __ASM_ARCH_SYSTEM_H
 #define __ASM_ARCH_SYSTEM_H
diff --git a/drivers/input/keyboard/pxa27x_keypad.c b/drivers/input/keyboard/pxa27x_keypad.c
index 76f9668221a4..79cd3e9fdf2e 100644
--- a/drivers/input/keyboard/pxa27x_keypad.c
+++ b/drivers/input/keyboard/pxa27x_keypad.c
@@ -8,7 +8,7 @@
  *
  * Based on a previous implementations by Kevin O'Connor
  * <kevin_at_koconnor.net> and Alex Osborne <bobofdoom@gmail.com> and
- * on some suggestions by Nicolas Pitre <nico@cam.org>.
+ * on some suggestions by Nicolas Pitre <nico@fluxnic.net>.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 8664feebc93b..e7563a9872d0 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -5,7 +5,7 @@
  * (C) 2000 Red Hat. GPL'd
  *
  *
- * 10/10/2000	Nicolas Pitre <nico@cam.org>
+ * 10/10/2000	Nicolas Pitre <nico@fluxnic.net>
  * 	- completely revamped method functions so they are aware and
  * 	  independent of the flash geometry (buswidth, interleave, etc.)
  * 	- scalability vs code size is completely set at compile-time
diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index 6c740f346f91..0667a671525d 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -4,7 +4,7 @@
  *
  * (C) 2000 Red Hat. GPL'd
  *
- * 10/10/2000	Nicolas Pitre <nico@cam.org>
+ * 10/10/2000	Nicolas Pitre <nico@fluxnic.net>
  * 	- completely revamped method functions so they are aware and
  * 	  independent of the flash geometry (buswidth, interleave, etc.)
  * 	- scalability vs code size is completely set at compile-time
diff --git a/drivers/mtd/maps/bfin-async-flash.c b/drivers/mtd/maps/bfin-async-flash.c
index 365c77b1b871..a7c808b577d3 100644
--- a/drivers/mtd/maps/bfin-async-flash.c
+++ b/drivers/mtd/maps/bfin-async-flash.c
@@ -6,7 +6,7 @@
  * for example.  All board-specific configuration goes in your
  * board resources file.
  *
- * Copyright 2000 Nicolas Pitre <nico@cam.org>
+ * Copyright 2000 Nicolas Pitre <nico@fluxnic.net>
  * Copyright 2005-2008 Analog Devices Inc.
  *
  * Enter bugs at http://blackfin.uclinux.org/
diff --git a/drivers/mtd/maps/ceiva.c b/drivers/mtd/maps/ceiva.c
index 60e68bde0fea..d41f34766e53 100644
--- a/drivers/mtd/maps/ceiva.c
+++ b/drivers/mtd/maps/ceiva.c
@@ -9,7 +9,7 @@
  * Based on: sa1100-flash.c, which has the following copyright:
  * Flash memory access on SA11x0 based devices
  *
- * (C) 2000 Nicolas Pitre <nico@cam.org>
+ * (C) 2000 Nicolas Pitre <nico@fluxnic.net>
  *
  */
 
diff --git a/drivers/mtd/maps/dc21285.c b/drivers/mtd/maps/dc21285.c
index 42969fe051b2..b3cb3a183809 100644
--- a/drivers/mtd/maps/dc21285.c
+++ b/drivers/mtd/maps/dc21285.c
@@ -1,7 +1,7 @@
 /*
  * MTD map driver for flash on the DC21285 (the StrongARM-110 companion chip)
  *
- * (C) 2000  Nicolas Pitre <nico@cam.org>
+ * (C) 2000  Nicolas Pitre <nico@fluxnic.net>
  *
  * This code is GPL
  */
@@ -249,5 +249,5 @@ module_exit(cleanup_dc21285);
 
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Nicolas Pitre <nico@cam.org>");
+MODULE_AUTHOR("Nicolas Pitre <nico@fluxnic.net>");
 MODULE_DESCRIPTION("MTD map driver for DC21285 boards");
diff --git a/drivers/mtd/maps/ipaq-flash.c b/drivers/mtd/maps/ipaq-flash.c
index 748c85f635f1..76708e796b70 100644
--- a/drivers/mtd/maps/ipaq-flash.c
+++ b/drivers/mtd/maps/ipaq-flash.c
@@ -1,7 +1,7 @@
 /*
  * Flash memory access on iPAQ Handhelds (either SA1100 or PXA250 based)
  *
- * (C) 2000 Nicolas Pitre <nico@cam.org>
+ * (C) 2000 Nicolas Pitre <nico@fluxnic.net>
  * (C) 2002 Hewlett-Packard Company <jamey.hicks@hp.com>
  * (C) 2003 Christian Pellegrin <chri@ascensit.com>, <chri@infis.univ.ts.it>: concatenation of multiple flashes
  */
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index 643aa06b599e..74fa075c838a 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -175,5 +175,5 @@ module_init(init_pxa2xx_flash);
 module_exit(cleanup_pxa2xx_flash);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Nicolas Pitre <nico@cam.org>");
+MODULE_AUTHOR("Nicolas Pitre <nico@fluxnic.net>");
 MODULE_DESCRIPTION("MTD map driver for Intel XScale PXA2xx");
diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index c6210f5118d1..fdb97f3d30e9 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -1,7 +1,7 @@
 /*
  * Flash memory access on SA11x0 based devices
  *
- * (C) 2000 Nicolas Pitre <nico@cam.org>
+ * (C) 2000 Nicolas Pitre <nico@fluxnic.net>
  */
 #include <linux/module.h>
 #include <linux/types.h>
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 77db5ce24d92..2d70295a5fa3 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -1,7 +1,7 @@
 /*
  * Direct MTD block device access
  *
- * (C) 2000-2003 Nicolas Pitre <nico@cam.org>
+ * (C) 2000-2003 Nicolas Pitre <nico@fluxnic.net>
  * (C) 1999-2003 David Woodhouse <dwmw2@infradead.org>
  */
 
@@ -403,5 +403,5 @@ module_exit(cleanup_mtdblock);
 
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Nicolas Pitre <nico@cam.org> et al.");
+MODULE_AUTHOR("Nicolas Pitre <nico@fluxnic.net> et al.");
 MODULE_DESCRIPTION("Caching read/erase/writeback block device emulation access to MTD devices");
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 349fcbe5cc0f..742504ea96f5 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -1,7 +1,7 @@
 /*
  * Simple MTD partitioning layer
  *
- * (C) 2000 Nicolas Pitre <nico@cam.org>
+ * (C) 2000 Nicolas Pitre <nico@fluxnic.net>
  *
  * This code is GPL
  *
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c
index 61be6d7680f6..05c91ee6921e 100644
--- a/drivers/net/smc91x.c
+++ b/drivers/net/smc91x.c
@@ -35,7 +35,7 @@
  *
  * contributors:
  * 	Daris A Nevil <dnevil@snmc.com>
- *      Nicolas Pitre <nico@cam.org>
+ *      Nicolas Pitre <nico@fluxnic.net>
  *	Russell King <rmk@arm.linux.org.uk>
  *
  * History:
@@ -58,7 +58,7 @@
  *   22/09/04  Nicolas Pitre      big update (see commit log for details)
  */
 static const char version[] =
-	"smc91x.c: v1.1, sep 22 2004 by Nicolas Pitre <nico@cam.org>\n";
+	"smc91x.c: v1.1, sep 22 2004 by Nicolas Pitre <nico@fluxnic.net>\n";
 
 /* Debugging level */
 #ifndef SMC_DEBUG
diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h
index 57a159fac99f..784b631cfa3c 100644
--- a/drivers/net/smc91x.h
+++ b/drivers/net/smc91x.h
@@ -28,7 +28,7 @@
  . Authors
  .	Erik Stahlman		<erik@vt.edu>
  .	Daris A Nevil		<dnevil@snmc.com>
- .	Nicolas Pitre 		<nico@cam.org>
+ .	Nicolas Pitre 		<nico@fluxnic.net>
  .
  ---------------------------------------------------------------------------*/
 #ifndef _SMC91X_H_
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index 4f247e4dd3f9..021b2928f0b9 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -9,7 +9,7 @@
  *
  * Modifications from:
  *   CIH <cih@coventive.com>
- *   Nicolas Pitre <nico@cam.org>
+ *   Nicolas Pitre <nico@fluxnic.net>
  *   Andrew Christian <andrew.christian@hp.com>
  *
  * Converted to the RTC subsystem and Driver Model
diff --git a/drivers/video/sa1100fb.c b/drivers/video/sa1100fb.c
index 10ddad8e17d6..cdaa873a6054 100644
--- a/drivers/video/sa1100fb.c
+++ b/drivers/video/sa1100fb.c
@@ -66,7 +66,7 @@
  *	- FrameBuffer memory is now allocated at run-time when the
  *	  driver is initialized.    
  *
- * 2000/04/10: Nicolas Pitre <nico@cam.org>
+ * 2000/04/10: Nicolas Pitre <nico@fluxnic.net>
  *	- Big cleanup for dynamic selection of machine type at run time.
  *
  * 2000/07/19: Jamey Hicks <jamey@crl.dec.com>
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index b70313d33ff8..274b6196091d 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -1,7 +1,7 @@
 /*
  * MTD partitioning layer definitions
  *
- * (C) 2000 Nicolas Pitre <nico@cam.org>
+ * (C) 2000 Nicolas Pitre <nico@fluxnic.net>
  *
  * This code is GPL
  */
diff --git a/lib/inflate.c b/lib/inflate.c
index 1a8e8a978128..d10255973a9f 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -7,7 +7,7 @@
  * Adapted for booting Linux by Hannu Savolainen 1993
  * based on gzip-1.0.3 
  *
- * Nicolas Pitre <nico@cam.org>, 1999/04/14 :
+ * Nicolas Pitre <nico@fluxnic.net>, 1999/04/14 :
  *   Little mods for all variable to reside either into rodata or bss segments
  *   by marking constant variables with 'const' and initializing all the others
  *   at run-time only.  This allows for the kernel uncompressor to run
-- 
cgit v1.2.3


From b4028437876866aba4747a655ede00f892089e14 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 11 May 2009 14:16:57 -0700
Subject: Driver core: move dev_get/set_drvdata to drivers/base/dd.c

No one should directly access the driver_data field, so remove the field
and make it private.  We dynamically create the private field now if it
is needed, to handle drivers that call get/set before they are
registered with the driver core.

Also update the copyright notices on these files while we are there.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/base.h    |  5 +++++
 drivers/base/core.c    | 20 ++++++++++++++------
 drivers/base/dd.c      | 31 +++++++++++++++++++++++++++++--
 include/linux/device.h | 16 ++++------------
 4 files changed, 52 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index 1e52c125f437..503d59c57501 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -70,6 +70,8 @@ struct class_private {
  * @knode_parent - node in sibling list
  * @knode_driver - node in driver list
  * @knode_bus - node in bus list
+ * @driver_data - private pointer for driver specific info.  Will turn into a
+ * list soon.
  * @device - pointer back to the struct class that this structure is
  * associated with.
  *
@@ -80,6 +82,7 @@ struct device_private {
 	struct klist_node knode_parent;
 	struct klist_node knode_driver;
 	struct klist_node knode_bus;
+	void *driver_data;
 	struct device *device;
 };
 #define to_device_private_parent(obj)	\
@@ -89,6 +92,8 @@ struct device_private {
 #define to_device_private_bus(obj)	\
 	container_of(obj, struct device_private, knode_bus)
 
+extern int device_private_init(struct device *dev);
+
 /* initialisation functions */
 extern int devices_init(void);
 extern int buses_init(void);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index c34774d0b9d3..99dfe96fffcb 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -843,6 +843,17 @@ static void device_remove_sys_dev_entry(struct device *dev)
 	}
 }
 
+int device_private_init(struct device *dev)
+{
+	dev->p = kzalloc(sizeof(*dev->p), GFP_KERNEL);
+	if (!dev->p)
+		return -ENOMEM;
+	dev->p->device = dev;
+	klist_init(&dev->p->klist_children, klist_children_get,
+		   klist_children_put);
+	return 0;
+}
+
 /**
  * device_add - add device to device hierarchy.
  * @dev: device.
@@ -868,14 +879,11 @@ int device_add(struct device *dev)
 	if (!dev)
 		goto done;
 
-	dev->p = kzalloc(sizeof(*dev->p), GFP_KERNEL);
 	if (!dev->p) {
-		error = -ENOMEM;
-		goto done;
+		error = device_private_init(dev);
+		if (error)
+			goto done;
 	}
-	dev->p->device = dev;
-	klist_init(&dev->p->klist_children, klist_children_get,
-		   klist_children_put);
 
 	/*
 	 * for statically allocated devices, which should all be converted
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 7b34b3a48f67..979d159b5cd1 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -11,8 +11,8 @@
  *
  * Copyright (c) 2002-5 Patrick Mochel
  * Copyright (c) 2002-3 Open Source Development Labs
- * Copyright (c) 2007 Greg Kroah-Hartman <gregkh@suse.de>
- * Copyright (c) 2007 Novell Inc.
+ * Copyright (c) 2007-2009 Greg Kroah-Hartman <gregkh@suse.de>
+ * Copyright (c) 2007-2009 Novell Inc.
  *
  * This file is released under the GPLv2
  */
@@ -391,3 +391,30 @@ void driver_detach(struct device_driver *drv)
 		put_device(dev);
 	}
 }
+
+/*
+ * These exports can't be _GPL due to .h files using this within them, and it
+ * might break something that was previously working...
+ */
+void *dev_get_drvdata(const struct device *dev)
+{
+	if (dev && dev->p)
+		return dev->p->driver_data;
+	return NULL;
+}
+EXPORT_SYMBOL(dev_get_drvdata);
+
+void dev_set_drvdata(struct device *dev, void *data)
+{
+	int error;
+
+	if (!dev)
+		return;
+	if (!dev->p) {
+		error = device_private_init(dev);
+		if (error)
+			return;
+	}
+	dev->p->driver_data = data;
+}
+EXPORT_SYMBOL(dev_set_drvdata);
diff --git a/include/linux/device.h b/include/linux/device.h
index a28642975053..c0bd23048be0 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -2,7 +2,8 @@
  * device.h - generic, centralized driver model
  *
  * Copyright (c) 2001-2003 Patrick Mochel <mochel@osdl.org>
- * Copyright (c) 2004-2007 Greg Kroah-Hartman <gregkh@suse.de>
+ * Copyright (c) 2004-2009 Greg Kroah-Hartman <gregkh@suse.de>
+ * Copyright (c) 2008-2009 Novell Inc.
  *
  * This file is released under the GPLv2
  *
@@ -381,7 +382,6 @@ struct device {
 	struct bus_type	*bus;		/* type of bus device is on */
 	struct device_driver *driver;	/* which driver has allocated this
 					   device */
-	void		*driver_data;	/* data private to the driver */
 	void		*platform_data;	/* Platform specific data, device
 					   core doesn't touch it */
 	struct dev_pm_info	power;
@@ -447,16 +447,6 @@ static inline void set_dev_node(struct device *dev, int node)
 }
 #endif
 
-static inline void *dev_get_drvdata(const struct device *dev)
-{
-	return dev->driver_data;
-}
-
-static inline void dev_set_drvdata(struct device *dev, void *data)
-{
-	dev->driver_data = data;
-}
-
 static inline unsigned int dev_get_uevent_suppress(const struct device *dev)
 {
 	return dev->kobj.uevent_suppress;
@@ -490,6 +480,8 @@ extern int device_rename(struct device *dev, char *new_name);
 extern int device_move(struct device *dev, struct device *new_parent,
 		       enum dpm_order dpm_order);
 extern const char *device_get_nodename(struct device *dev, const char **tmp);
+extern void *dev_get_drvdata(const struct device *dev);
+extern void dev_set_drvdata(struct device *dev, void *data);
 
 /*
  * Root device objects for grouping under /sys/devices
-- 
cgit v1.2.3


From a5b8b1ada6dd76503884f5492b995cd29eefae0f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 17 Jul 2009 15:06:08 +0100
Subject: Driver core: Add accessor for device platform data

For consistency with driver data provide a dev_get_platdata() accessor
for reading the platform data from a device.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index c0bd23048be0..3f33f17f556c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -494,6 +494,11 @@ static inline struct device *root_device_register(const char *name)
 }
 extern void root_device_unregister(struct device *root);
 
+static inline void *dev_get_platdata(const struct device *dev)
+{
+	return dev->platform_data;
+}
+
 /*
  * Manual binding of a device to driver. See drivers/base/bus.c
  * for information on use.
-- 
cgit v1.2.3


From a4dbd6740df0872cdf0a86841f75beec8381964d Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 24 Jun 2009 10:06:31 -0700
Subject: driver model: constify attribute groups

Let attribute group vectors be declared "const".  We'd
like to let most attribute metadata live in read-only
sections... this is a start.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 block/genhd.c                              | 2 +-
 drivers/base/core.c                        | 4 ++--
 drivers/base/driver.c                      | 4 ++--
 drivers/block/cciss.c                      | 2 +-
 drivers/firewire/core-device.c             | 2 +-
 drivers/firmware/dmi-id.c                  | 2 +-
 drivers/infiniband/hw/ehca/ehca_main.c     | 2 +-
 drivers/infiniband/hw/ipath/ipath_kernel.h | 2 +-
 drivers/infiniband/hw/ipath/ipath_sysfs.c  | 2 +-
 drivers/input/input.c                      | 2 +-
 drivers/misc/enclosure.c                   | 4 ++--
 drivers/mmc/core/mmc.c                     | 2 +-
 drivers/mmc/core/sd.c                      | 2 +-
 drivers/mtd/mtdcore.c                      | 2 +-
 drivers/s390/cio/css.c                     | 2 +-
 drivers/s390/cio/device.c                  | 2 +-
 drivers/s390/net/netiucv.c                 | 2 +-
 drivers/scsi/scsi_priv.h                   | 2 +-
 drivers/scsi/scsi_sysfs.c                  | 4 ++--
 drivers/usb/core/endpoint.c                | 2 +-
 drivers/usb/core/sysfs.c                   | 4 ++--
 drivers/usb/core/usb.h                     | 4 ++--
 drivers/uwb/lc-dev.c                       | 2 +-
 fs/partitions/check.c                      | 2 +-
 include/linux/attribute_container.h        | 2 +-
 include/linux/device.h                     | 6 +++---
 include/linux/netdevice.h                  | 2 +-
 include/linux/transport_class.h            | 2 +-
 net/bluetooth/hci_sysfs.c                  | 4 ++--
 net/core/net-sysfs.c                       | 2 +-
 30 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/block/genhd.c b/block/genhd.c
index 5b76bf55d05c..2ad91ddad8e2 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -903,7 +903,7 @@ static struct attribute_group disk_attr_group = {
 	.attrs = disk_attrs,
 };
 
-static struct attribute_group *disk_attr_groups[] = {
+static const struct attribute_group *disk_attr_groups[] = {
 	&disk_attr_group,
 	NULL
 };
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 99dfe96fffcb..a992985d1fab 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -341,7 +341,7 @@ static void device_remove_attributes(struct device *dev,
 }
 
 static int device_add_groups(struct device *dev,
-			     struct attribute_group **groups)
+			     const struct attribute_group **groups)
 {
 	int error = 0;
 	int i;
@@ -361,7 +361,7 @@ static int device_add_groups(struct device *dev,
 }
 
 static void device_remove_groups(struct device *dev,
-				 struct attribute_group **groups)
+				 const struct attribute_group **groups)
 {
 	int i;
 
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index 8ae0f63602e0..ed2ebd3c287d 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -181,7 +181,7 @@ void put_driver(struct device_driver *drv)
 EXPORT_SYMBOL_GPL(put_driver);
 
 static int driver_add_groups(struct device_driver *drv,
-			     struct attribute_group **groups)
+			     const struct attribute_group **groups)
 {
 	int error = 0;
 	int i;
@@ -201,7 +201,7 @@ static int driver_add_groups(struct device_driver *drv,
 }
 
 static void driver_remove_groups(struct device_driver *drv,
-				 struct attribute_group **groups)
+				 const struct attribute_group **groups)
 {
 	int i;
 
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 0589dfbbd7db..d8372b432826 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -572,7 +572,7 @@ static struct attribute_group cciss_dev_attr_group = {
 	.attrs = cciss_dev_attrs,
 };
 
-static struct attribute_group *cciss_dev_attr_groups[] = {
+static const struct attribute_group *cciss_dev_attr_groups[] = {
 	&cciss_dev_attr_group,
 	NULL
 };
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 97e656af2d22..9d0dfcbe2c1c 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -312,7 +312,7 @@ static void init_fw_attribute_group(struct device *dev,
 	group->groups[0] = &group->group;
 	group->groups[1] = NULL;
 	group->group.attrs = group->attrs;
-	dev->groups = group->groups;
+	dev->groups = (const struct attribute_group **) group->groups;
 }
 
 static ssize_t modalias_show(struct device *dev,
diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c
index 5a76d056b9d0..dbdf6fadfc79 100644
--- a/drivers/firmware/dmi-id.c
+++ b/drivers/firmware/dmi-id.c
@@ -139,7 +139,7 @@ static struct attribute_group sys_dmi_attribute_group = {
 	.attrs = sys_dmi_attributes,
 };
 
-static struct attribute_group* sys_dmi_attribute_groups[] = {
+static const struct attribute_group* sys_dmi_attribute_groups[] = {
 	&sys_dmi_attribute_group,
 	NULL
 };
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 5b635aa5947e..fb2d83c5bf01 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -623,7 +623,7 @@ static struct attribute_group ehca_drv_attr_grp = {
 	.attrs = ehca_drv_attrs
 };
 
-static struct attribute_group *ehca_drv_attr_groups[] = {
+static const struct attribute_group *ehca_drv_attr_groups[] = {
 	&ehca_drv_attr_grp,
 	NULL,
 };
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 6ba4861dd6ac..b3d7efcdf021 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -1286,7 +1286,7 @@ struct device_driver;
 
 extern const char ib_ipath_version[];
 
-extern struct attribute_group *ipath_driver_attr_groups[];
+extern const struct attribute_group *ipath_driver_attr_groups[];
 
 int ipath_device_create_group(struct device *, struct ipath_devdata *);
 void ipath_device_remove_group(struct device *, struct ipath_devdata *);
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index a6c8efbdc0c9..b8cb2f145ae4 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -1069,7 +1069,7 @@ static ssize_t show_tempsense(struct device *dev,
 	return ret;
 }
 
-struct attribute_group *ipath_driver_attr_groups[] = {
+const struct attribute_group *ipath_driver_attr_groups[] = {
 	&driver_attr_group,
 	NULL,
 };
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 7c237e6ac711..851791d955f3 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1144,7 +1144,7 @@ static struct attribute_group input_dev_caps_attr_group = {
 	.attrs	= input_dev_caps_attrs,
 };
 
-static struct attribute_group *input_dev_attr_groups[] = {
+static const struct attribute_group *input_dev_attr_groups[] = {
 	&input_dev_attr_group,
 	&input_dev_id_attr_group,
 	&input_dev_caps_attr_group,
diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
index 7b039306037f..e9eae4a78402 100644
--- a/drivers/misc/enclosure.c
+++ b/drivers/misc/enclosure.c
@@ -238,7 +238,7 @@ static void enclosure_component_release(struct device *dev)
 	put_device(dev->parent);
 }
 
-static struct attribute_group *enclosure_groups[];
+static const struct attribute_group *enclosure_groups[];
 
 /**
  * enclosure_component_register - add a particular component to an enclosure
@@ -536,7 +536,7 @@ static struct attribute_group enclosure_group = {
 	.attrs = enclosure_component_attrs,
 };
 
-static struct attribute_group *enclosure_groups[] = {
+static const struct attribute_group *enclosure_groups[] = {
 	&enclosure_group,
 	NULL
 };
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 06084dbf1277..2fb9d5f271ea 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -276,7 +276,7 @@ static struct attribute_group mmc_std_attr_group = {
 	.attrs = mmc_std_attrs,
 };
 
-static struct attribute_group *mmc_attr_groups[] = {
+static const struct attribute_group *mmc_attr_groups[] = {
 	&mmc_std_attr_group,
 	NULL,
 };
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index cd81c395e164..7ad646fe077e 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -314,7 +314,7 @@ static struct attribute_group sd_std_attr_group = {
 	.attrs = sd_std_attrs,
 };
 
-static struct attribute_group *sd_attr_groups[] = {
+static const struct attribute_group *sd_attr_groups[] = {
 	&sd_std_attr_group,
 	NULL,
 };
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 00ebf7af7467..69007a6eff50 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -217,7 +217,7 @@ struct attribute_group mtd_group = {
 	.attrs		= mtd_attrs,
 };
 
-struct attribute_group *mtd_groups[] = {
+const struct attribute_group *mtd_groups[] = {
 	&mtd_group,
 	NULL,
 };
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index e995123fd805..393c73c47f87 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -266,7 +266,7 @@ static struct attribute_group subch_attr_group = {
 	.attrs = subch_attrs,
 };
 
-static struct attribute_group *default_subch_attr_groups[] = {
+static const struct attribute_group *default_subch_attr_groups[] = {
 	&subch_attr_group,
 	NULL,
 };
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 0f95405c2c5e..6527f3f34493 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -656,7 +656,7 @@ static struct attribute_group ccwdev_attr_group = {
 	.attrs = ccwdev_attrs,
 };
 
-static struct attribute_group *ccwdev_attr_groups[] = {
+static const struct attribute_group *ccwdev_attr_groups[] = {
 	&ccwdev_attr_group,
 	NULL,
 };
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 9215fbbccc08..a4b2c576144b 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -2159,7 +2159,7 @@ static struct attribute_group netiucv_drv_attr_group = {
 	.attrs = netiucv_drv_attrs,
 };
 
-static struct attribute_group *netiucv_drv_attr_groups[] = {
+static const struct attribute_group *netiucv_drv_attr_groups[] = {
 	&netiucv_drv_attr_group,
 	NULL,
 };
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 021e503c8c44..1fbf7c78bba0 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -132,7 +132,7 @@ extern struct scsi_transport_template blank_transport_template;
 extern void __scsi_remove_device(struct scsi_device *);
 
 extern struct bus_type scsi_bus_type;
-extern struct attribute_group *scsi_sysfs_shost_attr_groups[];
+extern const struct attribute_group *scsi_sysfs_shost_attr_groups[];
 
 /* scsi_netlink.c */
 #ifdef CONFIG_SCSI_NETLINK
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 91482f2dcc50..fde54537d715 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -275,7 +275,7 @@ struct attribute_group scsi_shost_attr_group = {
 	.attrs =	scsi_sysfs_shost_attrs,
 };
 
-struct attribute_group *scsi_sysfs_shost_attr_groups[] = {
+const struct attribute_group *scsi_sysfs_shost_attr_groups[] = {
 	&scsi_shost_attr_group,
 	NULL
 };
@@ -745,7 +745,7 @@ static struct attribute_group scsi_sdev_attr_group = {
 	.attrs =	scsi_sdev_attrs,
 };
 
-static struct attribute_group *scsi_sdev_attr_groups[] = {
+static const struct attribute_group *scsi_sdev_attr_groups[] = {
 	&scsi_sdev_attr_group,
 	NULL
 };
diff --git a/drivers/usb/core/endpoint.c b/drivers/usb/core/endpoint.c
index bc39fc40bbde..fdfaa7885515 100644
--- a/drivers/usb/core/endpoint.c
+++ b/drivers/usb/core/endpoint.c
@@ -154,7 +154,7 @@ static struct attribute *ep_dev_attrs[] = {
 static struct attribute_group ep_dev_attr_grp = {
 	.attrs = ep_dev_attrs,
 };
-static struct attribute_group *ep_dev_groups[] = {
+static const struct attribute_group *ep_dev_groups[] = {
 	&ep_dev_attr_grp,
 	NULL
 };
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index b5c72e458943..7ec3041ae79e 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -573,7 +573,7 @@ static struct attribute_group dev_string_attr_grp = {
 	.is_visible =	dev_string_attrs_are_visible,
 };
 
-struct attribute_group *usb_device_groups[] = {
+const struct attribute_group *usb_device_groups[] = {
 	&dev_attr_grp,
 	&dev_string_attr_grp,
 	NULL
@@ -799,7 +799,7 @@ static struct attribute_group intf_assoc_attr_grp = {
 	.is_visible =	intf_assoc_attrs_are_visible,
 };
 
-struct attribute_group *usb_interface_groups[] = {
+const struct attribute_group *usb_interface_groups[] = {
 	&intf_attr_grp,
 	&intf_assoc_attr_grp,
 	NULL
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index e2a8cfaade1d..c0e0ae2bb8e7 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -152,8 +152,8 @@ static inline int is_active(const struct usb_interface *f)
 extern const char *usbcore_name;
 
 /* sysfs stuff */
-extern struct attribute_group *usb_device_groups[];
-extern struct attribute_group *usb_interface_groups[];
+extern const struct attribute_group *usb_device_groups[];
+extern const struct attribute_group *usb_interface_groups[];
 
 /* usbfs stuff */
 extern struct mutex usbfs_mutex;
diff --git a/drivers/uwb/lc-dev.c b/drivers/uwb/lc-dev.c
index e9fe1bb7eb23..1097e81b56d0 100644
--- a/drivers/uwb/lc-dev.c
+++ b/drivers/uwb/lc-dev.c
@@ -255,7 +255,7 @@ static struct attribute_group dev_attr_group = {
 	.attrs = dev_attrs,
 };
 
-static struct attribute_group *groups[] = {
+static const struct attribute_group *groups[] = {
 	&dev_attr_group,
 	NULL,
 };
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 619ba99dfe39..fbeaddf595d3 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -312,7 +312,7 @@ static struct attribute_group part_attr_group = {
 	.attrs = part_attrs,
 };
 
-static struct attribute_group *part_attr_groups[] = {
+static const struct attribute_group *part_attr_groups[] = {
 	&part_attr_group,
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	&blk_trace_attr_group,
diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h
index 794ad74b1d61..c3ab81428c66 100644
--- a/include/linux/attribute_container.h
+++ b/include/linux/attribute_container.h
@@ -17,7 +17,7 @@ struct attribute_container {
 	struct list_head	node;
 	struct klist		containers;
 	struct class		*class;
-	struct attribute_group	*grp;
+	const struct attribute_group *grp;
 	struct device_attribute **attrs;
 	int (*match)(struct attribute_container *, struct device *);
 #define	ATTRIBUTE_CONTAINER_NO_CLASSDEVS	0x01
diff --git a/include/linux/device.h b/include/linux/device.h
index 3f33f17f556c..e19e40a3dcbe 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -131,7 +131,7 @@ struct device_driver {
 	void (*shutdown) (struct device *dev);
 	int (*suspend) (struct device *dev, pm_message_t state);
 	int (*resume) (struct device *dev);
-	struct attribute_group **groups;
+	const struct attribute_group **groups;
 
 	const struct dev_pm_ops *pm;
 
@@ -288,7 +288,7 @@ extern void class_destroy(struct class *cls);
  */
 struct device_type {
 	const char *name;
-	struct attribute_group **groups;
+	const struct attribute_group **groups;
 	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
 	char *(*nodename)(struct device *dev);
 	void (*release)(struct device *dev);
@@ -412,7 +412,7 @@ struct device {
 
 	struct klist_node	knode_class;
 	struct class		*class;
-	struct attribute_group	**groups;	/* optional groups */
+	const struct attribute_group **groups;	/* optional groups */
 
 	void	(*release)(struct device *dev);
 };
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 65ee1929b2b1..a9aa4b5917d7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -895,7 +895,7 @@ struct net_device
 	/* class/net/name entry */
 	struct device		dev;
 	/* space for optional statistics and wireless sysfs groups */
-	struct attribute_group  *sysfs_groups[3];
+	const struct attribute_group *sysfs_groups[3];
 
 	/* rtnetlink link ops */
 	const struct rtnl_link_ops *rtnl_link_ops;
diff --git a/include/linux/transport_class.h b/include/linux/transport_class.h
index eaec1ea9558e..9ae8da3e6407 100644
--- a/include/linux/transport_class.h
+++ b/include/linux/transport_class.h
@@ -55,7 +55,7 @@ struct anon_transport_class cls = {				\
 
 struct transport_container {
 	struct attribute_container ac;
-	struct attribute_group *statistics;
+	const struct attribute_group *statistics;
 };
 
 #define attribute_container_to_transport_container(x) \
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 95f7a7a544b4..7f939ce29801 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -68,7 +68,7 @@ static struct attribute_group bt_link_group = {
 	.attrs = bt_link_attrs,
 };
 
-static struct attribute_group *bt_link_groups[] = {
+static const struct attribute_group *bt_link_groups[] = {
 	&bt_link_group,
 	NULL
 };
@@ -392,7 +392,7 @@ static struct attribute_group bt_host_group = {
 	.attrs = bt_host_attrs,
 };
 
-static struct attribute_group *bt_host_groups[] = {
+static const struct attribute_group *bt_host_groups[] = {
 	&bt_host_group,
 	NULL
 };
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index ad91e9e5f475..7d4c57523b09 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -493,7 +493,7 @@ void netdev_unregister_kobject(struct net_device * net)
 int netdev_register_kobject(struct net_device *net)
 {
 	struct device *dev = &(net->dev);
-	struct attribute_group **groups = net->sysfs_groups;
+	const struct attribute_group **groups = net->sysfs_groups;
 
 	dev->class = &net_class;
 	dev->platform_data = net;
-- 
cgit v1.2.3


From ccb86a6907c9ba7b5be5f521362fc308e80bed34 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 20 Jul 2009 10:29:34 +0300
Subject: uio: add generic driver for PCI 2.3 devices

This adds a generic uio driver that can bind to any PCI device.  First
user will be virtualization where a qemu userspace process needs to give
guest OS access to the device.

Interrupts are handled using the Interrupt Disable bit in the PCI
command register and Interrupt Status bit in the PCI status register.
All devices compliant to PCI 2.3 (circa 2002) and all compliant PCI
Express devices should support these bits.  Driver detects this support,
and won't bind to devices which do not support the Interrupt Disable Bit
in the command register.

It's expected that more features of interest to virtualization will be
added to this driver in the future. Possibilities are: mmap for device
resources, MSI/MSI-X, eventfd (to interface with kvm), iommu.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Hans J. Koch <hjk@linutronix.de>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/DocBook/uio-howto.tmpl | 163 +++++++++++++++++++++++++++
 MAINTAINERS                          |   7 ++
 drivers/uio/Kconfig                  |  10 ++
 drivers/uio/Makefile                 |   1 +
 drivers/uio/uio_pci_generic.c        | 207 +++++++++++++++++++++++++++++++++++
 include/linux/pci_regs.h             |   1 +
 6 files changed, 389 insertions(+)
 create mode 100644 drivers/uio/uio_pci_generic.c

(limited to 'include')

diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl
index 8f6e3b2403c7..4d4ce0e61e42 100644
--- a/Documentation/DocBook/uio-howto.tmpl
+++ b/Documentation/DocBook/uio-howto.tmpl
@@ -25,6 +25,10 @@
 	<year>2006-2008</year>
 	<holder>Hans-Jürgen Koch.</holder>
 </copyright>
+<copyright>
+	<year>2009</year>
+	<holder>Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)</holder>
+</copyright>
 
 <legalnotice>
 <para>
@@ -41,6 +45,13 @@ GPL version 2.
 </abstract>
 
 <revhistory>
+	<revision>
+	<revnumber>0.9</revnumber>
+	<date>2009-07-16</date>
+	<authorinitials>mst</authorinitials>
+	<revremark>Added generic pci driver
+		</revremark>
+	</revision>
 	<revision>
 	<revnumber>0.8</revnumber>
 	<date>2008-12-24</date>
@@ -809,6 +820,158 @@ framework to set up sysfs files for this region. Simply leave it alone.
 
 </chapter>
 
+<chapter id="uio_pci_generic" xreflabel="Using Generic driver for PCI cards">
+<?dbhtml filename="uio_pci_generic.html"?>
+<title>Generic PCI UIO driver</title>
+	<para>
+	The generic driver is a kernel module named uio_pci_generic.
+	It can work with any device compliant to PCI 2.3 (circa 2002) and
+	any compliant PCI Express device. Using this, you only need to
+        write the userspace driver, removing the need to write
+        a hardware-specific kernel module.
+	</para>
+
+<sect1 id="uio_pci_generic_binding">
+<title>Making the driver recognize the device</title>
+	<para>
+Since the driver does not declare any device ids, it will not get loaded
+automatically and will not automatically bind to any devices, you must load it
+and allocate id to the driver yourself. For example:
+	<programlisting>
+ modprobe uio_pci_generic
+ echo &quot;8086 10f5&quot; &gt; /sys/bus/pci/drivers/uio_pci_generic/new_id
+	</programlisting>
+	</para>
+	<para>
+If there already is a hardware specific kernel driver for your device, the
+generic driver still won't bind to it, in this case if you want to use the
+generic driver (why would you?) you'll have to manually unbind the hardware
+specific driver and bind the generic driver, like this:
+	<programlisting>
+    echo -n 0000:00:19.0 &gt; /sys/bus/pci/drivers/e1000e/unbind
+    echo -n 0000:00:19.0 &gt; /sys/bus/pci/drivers/uio_pci_generic/bind
+	</programlisting>
+	</para>
+	<para>
+You can verify that the device has been bound to the driver
+by looking for it in sysfs, for example like the following:
+	<programlisting>
+    ls -l /sys/bus/pci/devices/0000:00:19.0/driver
+	</programlisting>
+Which if successful should print
+	<programlisting>
+  .../0000:00:19.0/driver -&gt; ../../../bus/pci/drivers/uio_pci_generic
+	</programlisting>
+Note that the generic driver will not bind to old PCI 2.2 devices.
+If binding the device failed, run the following command:
+	<programlisting>
+  dmesg
+	</programlisting>
+and look in the output for failure reasons
+	</para>
+</sect1>
+
+<sect1 id="uio_pci_generic_internals">
+<title>Things to know about uio_pci_generic</title>
+	<para>
+Interrupts are handled using the Interrupt Disable bit in the PCI command
+register and Interrupt Status bit in the PCI status register.  All devices
+compliant to PCI 2.3 (circa 2002) and all compliant PCI Express devices should
+support these bits.  uio_pci_generic detects this support, and won't bind to
+devices which do not support the Interrupt Disable Bit in the command register.
+	</para>
+	<para>
+On each interrupt, uio_pci_generic sets the Interrupt Disable bit.
+This prevents the device from generating further interrupts
+until the bit is cleared. The userspace driver should clear this
+bit before blocking and waiting for more interrupts.
+	</para>
+</sect1>
+<sect1 id="uio_pci_generic_userspace">
+<title>Writing userspace driver using uio_pci_generic</title>
+	<para>
+Userspace driver can use pci sysfs interface, or the
+libpci libray that wraps it, to talk to the device and to
+re-enable interrupts by writing to the command register.
+	</para>
+</sect1>
+<sect1 id="uio_pci_generic_example">
+<title>Example code using uio_pci_generic</title>
+	<para>
+Here is some sample userspace driver code using uio_pci_generic:
+<programlisting>
+#include &lt;stdlib.h&gt;
+#include &lt;stdio.h&gt;
+#include &lt;unistd.h&gt;
+#include &lt;sys/types.h&gt;
+#include &lt;sys/stat.h&gt;
+#include &lt;fcntl.h&gt;
+#include &lt;errno.h&gt;
+
+int main()
+{
+	int uiofd;
+	int configfd;
+	int err;
+	int i;
+	unsigned icount;
+	unsigned char command_high;
+
+	uiofd = open(&quot;/dev/uio0&quot;, O_RDONLY);
+	if (uiofd &lt; 0) {
+		perror(&quot;uio open:&quot;);
+		return errno;
+	}
+	configfd = open(&quot;/sys/class/uio/uio0/device/config&quot;, O_RDWR);
+	if (uiofd &lt; 0) {
+		perror(&quot;config open:&quot;);
+		return errno;
+	}
+
+	/* Read and cache command value */
+	err = pread(configfd, &amp;command_high, 1, 5);
+	if (err != 1) {
+		perror(&quot;command config read:&quot;);
+		return errno;
+	}
+	command_high &amp;= ~0x4;
+
+	for(i = 0;; ++i) {
+		/* Print out a message, for debugging. */
+		if (i == 0)
+			fprintf(stderr, &quot;Started uio test driver.\n&quot;);
+		else
+			fprintf(stderr, &quot;Interrupts: %d\n&quot;, icount);
+
+		/****************************************/
+		/* Here we got an interrupt from the
+		   device. Do something to it. */
+		/****************************************/
+
+		/* Re-enable interrupts. */
+		err = pwrite(configfd, &amp;command_high, 1, 5);
+		if (err != 1) {
+			perror(&quot;config write:&quot;);
+			break;
+		}
+
+		/* Wait for next interrupt. */
+		err = read(uiofd, &amp;icount, 4);
+		if (err != 4) {
+			perror(&quot;uio read:&quot;);
+			break;
+		}
+
+	}
+	return errno;
+}
+
+</programlisting>
+	</para>
+</sect1>
+
+</chapter>
+
 <appendix id="app1">
 <title>Further information</title>
 <itemizedlist>
diff --git a/MAINTAINERS b/MAINTAINERS
index 837b5985ac40..01193a4fe30e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2218,6 +2218,13 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git
 S:	Maintained
 F:	include/asm-generic
 
+GENERIC UIO DRIVER FOR PCI DEVICES
+M:	Michael S. Tsirkin <mst@redhat.com>
+L:	kvm@vger.kernel.org
+L:	linux-kernel@vger.kernel.org
+S:	Supported
+F:	drivers/uio/uio_pci_generic.c
+
 GFS2 FILE SYSTEM
 M:	Steven Whitehouse <swhiteho@redhat.com>
 L:	cluster-devel@redhat.com
diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig
index 45200fd68534..8aa1955f35ed 100644
--- a/drivers/uio/Kconfig
+++ b/drivers/uio/Kconfig
@@ -84,4 +84,14 @@ config UIO_SERCOS3
 
 	  If you compile this as a module, it will be called uio_sercos3.
 
+config UIO_PCI_GENERIC
+	tristate "Generic driver for PCI 2.3 and PCI Express cards"
+	depends on PCI
+	default n
+	help
+	  Generic driver that you can bind, dynamically, to any
+	  PCI 2.3 compliant and PCI Express card. It is useful,
+	  primarily, for virtualization scenarios.
+	  If you compile this as a module, it will be called uio_pci_generic.
+
 endif
diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile
index 5c2586d75797..73b2e7516729 100644
--- a/drivers/uio/Makefile
+++ b/drivers/uio/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_UIO_PDRV_GENIRQ)	+= uio_pdrv_genirq.o
 obj-$(CONFIG_UIO_SMX)	+= uio_smx.o
 obj-$(CONFIG_UIO_AEC)	+= uio_aec.o
 obj-$(CONFIG_UIO_SERCOS3)	+= uio_sercos3.o
+obj-$(CONFIG_UIO_PCI_GENERIC)	+= uio_pci_generic.o
diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
new file mode 100644
index 000000000000..313da35984af
--- /dev/null
+++ b/drivers/uio/uio_pci_generic.c
@@ -0,0 +1,207 @@
+/* uio_pci_generic - generic UIO driver for PCI 2.3 devices
+ *
+ * Copyright (C) 2009 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Since the driver does not declare any device ids, you must allocate
+ * id and bind the device to the driver yourself.  For example:
+ *
+ * # echo "8086 10f5" > /sys/bus/pci/drivers/uio_pci_generic/new_id
+ * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/e1000e/unbind
+ * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/uio_pci_generic/bind
+ * # ls -l /sys/bus/pci/devices/0000:00:19.0/driver
+ * .../0000:00:19.0/driver -> ../../../bus/pci/drivers/uio_pci_generic
+ *
+ * Driver won't bind to devices which do not support the Interrupt Disable Bit
+ * in the command register. All devices compliant to PCI 2.3 (circa 2002) and
+ * all compliant PCI Express devices should support this bit.
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/uio_driver.h>
+#include <linux/spinlock.h>
+
+#define DRIVER_VERSION	"0.01.0"
+#define DRIVER_AUTHOR	"Michael S. Tsirkin <mst@redhat.com>"
+#define DRIVER_DESC	"Generic UIO driver for PCI 2.3 devices"
+
+struct uio_pci_generic_dev {
+	struct uio_info info;
+	struct pci_dev *pdev;
+	spinlock_t lock; /* guards command register accesses */
+};
+
+static inline struct uio_pci_generic_dev *
+to_uio_pci_generic_dev(struct uio_info *info)
+{
+	return container_of(info, struct uio_pci_generic_dev, info);
+}
+
+/* Interrupt handler. Read/modify/write the command register to disable
+ * the interrupt. */
+static irqreturn_t irqhandler(int irq, struct uio_info *info)
+{
+	struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
+	struct pci_dev *pdev = gdev->pdev;
+	irqreturn_t ret = IRQ_NONE;
+	u32 cmd_status_dword;
+	u16 origcmd, newcmd, status;
+
+	/* We do a single dword read to retrieve both command and status.
+	 * Document assumptions that make this possible. */
+	BUILD_BUG_ON(PCI_COMMAND % 4);
+	BUILD_BUG_ON(PCI_COMMAND + 2 != PCI_STATUS);
+
+	spin_lock_irq(&gdev->lock);
+	pci_block_user_cfg_access(pdev);
+
+	/* Read both command and status registers in a single 32-bit operation.
+	 * Note: we could cache the value for command and move the status read
+	 * out of the lock if there was a way to get notified of user changes
+	 * to command register through sysfs. Should be good for shared irqs. */
+	pci_read_config_dword(pdev, PCI_COMMAND, &cmd_status_dword);
+	origcmd = cmd_status_dword;
+	status = cmd_status_dword >> 16;
+
+	/* Check interrupt status register to see whether our device
+	 * triggered the interrupt. */
+	if (!(status & PCI_STATUS_INTERRUPT))
+		goto done;
+
+	/* We triggered the interrupt, disable it. */
+	newcmd = origcmd | PCI_COMMAND_INTX_DISABLE;
+	if (newcmd != origcmd)
+		pci_write_config_word(pdev, PCI_COMMAND, newcmd);
+
+	/* UIO core will signal the user process. */
+	ret = IRQ_HANDLED;
+done:
+
+	pci_unblock_user_cfg_access(pdev);
+	spin_unlock_irq(&gdev->lock);
+	return ret;
+}
+
+/* Verify that the device supports Interrupt Disable bit in command register,
+ * per PCI 2.3, by flipping this bit and reading it back: this bit was readonly
+ * in PCI 2.2. */
+static int __devinit verify_pci_2_3(struct pci_dev *pdev)
+{
+	u16 orig, new;
+	int err = 0;
+
+	pci_block_user_cfg_access(pdev);
+	pci_read_config_word(pdev, PCI_COMMAND, &orig);
+	pci_write_config_word(pdev, PCI_COMMAND,
+			      orig ^ PCI_COMMAND_INTX_DISABLE);
+	pci_read_config_word(pdev, PCI_COMMAND, &new);
+	/* There's no way to protect against
+	 * hardware bugs or detect them reliably, but as long as we know
+	 * what the value should be, let's go ahead and check it. */
+	if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) {
+		err = -EBUSY;
+		dev_err(&pdev->dev, "Command changed from 0x%x to 0x%x: "
+			"driver or HW bug?\n", orig, new);
+		goto err;
+	}
+	if (!((new ^ orig) & PCI_COMMAND_INTX_DISABLE)) {
+		dev_warn(&pdev->dev, "Device does not support "
+			 "disabling interrupts: unable to bind.\n");
+		err = -ENODEV;
+		goto err;
+	}
+	/* Now restore the original value. */
+	pci_write_config_word(pdev, PCI_COMMAND, orig);
+err:
+	pci_unblock_user_cfg_access(pdev);
+	return err;
+}
+
+static int __devinit probe(struct pci_dev *pdev,
+			   const struct pci_device_id *id)
+{
+	struct uio_pci_generic_dev *gdev;
+	int err;
+
+	if (!pdev->irq) {
+		dev_warn(&pdev->dev, "No IRQ assigned to device: "
+			 "no support for interrupts?\n");
+		return -ENODEV;
+	}
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "%s: pci_enable_device failed: %d\n",
+			__func__, err);
+		return err;
+	}
+
+	err = verify_pci_2_3(pdev);
+	if (err)
+		goto err_verify;
+
+	gdev = kzalloc(sizeof(struct uio_pci_generic_dev), GFP_KERNEL);
+	if (!gdev) {
+		err = -ENOMEM;
+		goto err_alloc;
+	}
+
+	gdev->info.name = "uio_pci_generic";
+	gdev->info.version = DRIVER_VERSION;
+	gdev->info.irq = pdev->irq;
+	gdev->info.irq_flags = IRQF_SHARED;
+	gdev->info.handler = irqhandler;
+	gdev->pdev = pdev;
+	spin_lock_init(&gdev->lock);
+
+	if (uio_register_device(&pdev->dev, &gdev->info))
+		goto err_register;
+	pci_set_drvdata(pdev, gdev);
+
+	return 0;
+err_register:
+	kfree(gdev);
+err_alloc:
+err_verify:
+	pci_disable_device(pdev);
+	return err;
+}
+
+static void remove(struct pci_dev *pdev)
+{
+	struct uio_pci_generic_dev *gdev = pci_get_drvdata(pdev);
+
+	uio_unregister_device(&gdev->info);
+	pci_disable_device(pdev);
+	kfree(gdev);
+}
+
+static struct pci_driver driver = {
+	.name = "uio_pci_generic",
+	.id_table = NULL, /* only dynamic id's */
+	.probe = probe,
+	.remove = remove,
+};
+
+static int __init init(void)
+{
+	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
+	return pci_register_driver(&driver);
+}
+
+static void __exit cleanup(void)
+{
+	pci_unregister_driver(&driver);
+}
+
+module_init(init);
+module_exit(cleanup);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index fcaee42c7ac2..dd0bed4f1cf0 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -42,6 +42,7 @@
 #define  PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */
 
 #define PCI_STATUS		0x06	/* 16 bits */
+#define  PCI_STATUS_INTERRUPT	0x08	/* Interrupt status */
 #define  PCI_STATUS_CAP_LIST	0x10	/* Support Capability List */
 #define  PCI_STATUS_66MHZ	0x20	/* Support 66 Mhz PCI 2.1 bus */
 #define  PCI_STATUS_UDF		0x40	/* Support User Definable Features [obsolete] */
-- 
cgit v1.2.3


From 4622709445705c1e423d2addcfd8ccae052fe0ba Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 4 Aug 2009 12:55:34 +0200
Subject: Driver core: Add support for compatibility classes

When turning class devices into bus devices, we may need to
temporarily add links in sysfs so that user-space applications
are not confused. This is done by adding the following API:

* Functions to register and unregister compatibility classes.
  These appear in sysfs at the same location as regular classes, but
  instead of class devices, they contain links to bus devices.
* Functions to create and delete such links. Additionally, the caller
  can optionally pass a target device to which a "device" link should
  point (typically that would be the device's parent), to fully emulate
  the original class device.

The i2c subsystem will be the first user of this API, as i2c adapters
are being converted from class devices to bus devices.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
---
 drivers/base/class.c   | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/device.h |  8 +++++
 2 files changed, 95 insertions(+)

(limited to 'include')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index eb85e4312301..161746deab4b 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -488,6 +488,93 @@ void class_interface_unregister(struct class_interface *class_intf)
 	class_put(parent);
 }
 
+struct class_compat {
+	struct kobject *kobj;
+};
+
+/**
+ * class_compat_register - register a compatibility class
+ * @name: the name of the class
+ *
+ * Compatibility class are meant as a temporary user-space compatibility
+ * workaround when converting a family of class devices to a bus devices.
+ */
+struct class_compat *class_compat_register(const char *name)
+{
+	struct class_compat *cls;
+
+	cls = kmalloc(sizeof(struct class_compat), GFP_KERNEL);
+	if (!cls)
+		return NULL;
+	cls->kobj = kobject_create_and_add(name, &class_kset->kobj);
+	if (!cls->kobj) {
+		kfree(cls);
+		return NULL;
+	}
+	return cls;
+}
+EXPORT_SYMBOL_GPL(class_compat_register);
+
+/**
+ * class_compat_unregister - unregister a compatibility class
+ * @cls: the class to unregister
+ */
+void class_compat_unregister(struct class_compat *cls)
+{
+	kobject_put(cls->kobj);
+	kfree(cls);
+}
+EXPORT_SYMBOL_GPL(class_compat_unregister);
+
+/**
+ * class_compat_create_link - create a compatibility class device link to
+ *			      a bus device
+ * @cls: the compatibility class
+ * @dev: the target bus device
+ * @device_link: an optional device to which a "device" link should be created
+ */
+int class_compat_create_link(struct class_compat *cls, struct device *dev,
+			     struct device *device_link)
+{
+	int error;
+
+	error = sysfs_create_link(cls->kobj, &dev->kobj, dev_name(dev));
+	if (error)
+		return error;
+
+	/*
+	 * Optionally add a "device" link (typically to the parent), as a
+	 * class device would have one and we want to provide as much
+	 * backwards compatibility as possible.
+	 */
+	if (device_link) {
+		error = sysfs_create_link(&dev->kobj, &device_link->kobj,
+					  "device");
+		if (error)
+			sysfs_remove_link(cls->kobj, dev_name(dev));
+	}
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(class_compat_create_link);
+
+/**
+ * class_compat_remove_link - remove a compatibility class device link to
+ *			      a bus device
+ * @cls: the compatibility class
+ * @dev: the target bus device
+ * @device_link: an optional device to which a "device" link was previously
+ * 		 created
+ */
+void class_compat_remove_link(struct class_compat *cls, struct device *dev,
+			      struct device *device_link)
+{
+	if (device_link)
+		sysfs_remove_link(&dev->kobj, "device");
+	sysfs_remove_link(cls->kobj, dev_name(dev));
+}
+EXPORT_SYMBOL_GPL(class_compat_remove_link);
+
 int __init classes_init(void)
 {
 	class_kset = kset_create_and_add("class", NULL, NULL);
diff --git a/include/linux/device.h b/include/linux/device.h
index e19e40a3dcbe..62ff53a67931 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -225,6 +225,14 @@ extern void class_unregister(struct class *class);
 	__class_register(class, &__key);	\
 })
 
+struct class_compat;
+struct class_compat *class_compat_register(const char *name);
+void class_compat_unregister(struct class_compat *cls);
+int class_compat_create_link(struct class_compat *cls, struct device *dev,
+			     struct device *device_link);
+void class_compat_remove_link(struct class_compat *cls, struct device *dev,
+			      struct device *device_link);
+
 extern void class_dev_iter_init(struct class_dev_iter *iter,
 				struct class *class,
 				struct device *start,
-- 
cgit v1.2.3


From 2b2af54a5bb6f7e80ccf78f20084b93c398c3a8b Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 30 Apr 2009 15:23:42 +0200
Subject: Driver Core: devtmpfs - kernel-maintained tmpfs-based /dev

Devtmpfs lets the kernel create a tmpfs instance called devtmpfs
very early at kernel initialization, before any driver-core device
is registered. Every device with a major/minor will provide a
device node in devtmpfs.

Devtmpfs can be changed and altered by userspace at any time,
and in any way needed - just like today's udev-mounted tmpfs.
Unmodified udev versions will run just fine on top of it, and will
recognize an already existing kernel-created device node and use it.
The default node permissions are root:root 0600. Proper permissions
and user/group ownership, meaningful symlinks, all other policy still
needs to be applied by userspace.

If a node is created by devtmps, devtmpfs will remove the device node
when the device goes away. If the device node was created by
userspace, or the devtmpfs created node was replaced by userspace, it
will no longer be removed by devtmpfs.

If it is requested to auto-mount it, it makes init=/bin/sh work
without any further userspace support. /dev will be fully populated
and dynamic, and always reflect the current device state of the kernel.
With the commonly used dynamic device numbers, it solves the problem
where static devices nodes may point to the wrong devices.

It is intended to make the initial bootup logic simpler and more robust,
by de-coupling the creation of the inital environment, to reliably run
userspace processes, from a complex userspace bootstrap logic to provide
a working /dev.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jan Blunck <jblunck@suse.de>
Tested-By: Harald Hoyer <harald@redhat.com>
Tested-By: Scott James Remnant <scott@ubuntu.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/Kconfig     |  25 ++++
 drivers/base/Makefile    |   1 +
 drivers/base/base.h      |   6 +
 drivers/base/core.c      |   3 +
 drivers/base/devtmpfs.c  | 367 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/base/init.c      |   1 +
 include/linux/device.h   |  10 ++
 include/linux/shmem_fs.h |   3 +
 init/do_mounts.c         |   2 +-
 init/main.c              |   2 +
 mm/shmem.c               |   9 +-
 11 files changed, 422 insertions(+), 7 deletions(-)
 create mode 100644 drivers/base/devtmpfs.c

(limited to 'include')

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 8f006f96ff53..ee377270beb9 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -8,6 +8,31 @@ config UEVENT_HELPER_PATH
 	  Path to uevent helper program forked by the kernel for
 	  every uevent.
 
+config DEVTMPFS
+	bool "Create a kernel maintained /dev tmpfs (EXPERIMENTAL)"
+	depends on HOTPLUG && SHMEM && TMPFS
+	help
+	  This creates a tmpfs filesystem, and mounts it at bootup
+	  and mounts it at /dev. The kernel driver core creates device
+	  nodes for all registered devices in that filesystem. All device
+	  nodes are owned by root and have the default mode of 0600.
+	  Userspace can add and delete the nodes as needed. This is
+	  intended to simplify bootup, and make it possible to delay
+	  the initial coldplug at bootup done by udev in userspace.
+	  It should also provide a simpler way for rescue systems
+	  to bring up a kernel with dynamic major/minor numbers.
+	  Meaningful symlinks, permissions and device ownership must
+	  still be handled by userspace.
+	  If unsure, say N here.
+
+config DEVTMPFS_MOUNT
+	bool "Automount devtmpfs at /dev"
+	depends on DEVTMPFS
+	help
+	  This will mount devtmpfs at /dev if the kernel mounts the root
+	  filesystem. It will not affect initramfs based mounting.
+	  If unsure, say N here.
+
 config STANDALONE
 	bool "Select only drivers that don't need compile-time external firmware" if EXPERIMENTAL
 	default y
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 1b2640ce74f0..c12c7f2f2a6f 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -4,6 +4,7 @@ obj-y			:= core.o sys.o bus.o dd.o \
 			   driver.o class.o platform.o \
 			   cpu.o firmware.o init.o map.o devres.o \
 			   attribute_container.o transport_class.o
+obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
 obj-y			+= power/
 obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 503d59c57501..2ca7f5b7b824 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -139,3 +139,9 @@ static inline void module_add_driver(struct module *mod,
 				     struct device_driver *drv) { }
 static inline void module_remove_driver(struct device_driver *drv) { }
 #endif
+
+#ifdef CONFIG_DEVTMPFS
+extern int devtmpfs_init(void);
+#else
+static inline int devtmpfs_init(void) { return 0; }
+#endif
diff --git a/drivers/base/core.c b/drivers/base/core.c
index a992985d1fab..390e664ec1c7 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -929,6 +929,8 @@ int device_add(struct device *dev)
 		error = device_create_sys_dev_entry(dev);
 		if (error)
 			goto devtattrError;
+
+		devtmpfs_create_node(dev);
 	}
 
 	error = device_add_class_symlinks(dev);
@@ -1075,6 +1077,7 @@ void device_del(struct device *dev)
 	if (parent)
 		klist_del(&dev->p->knode_parent);
 	if (MAJOR(dev->devt)) {
+		devtmpfs_delete_node(dev);
 		device_remove_sys_dev_entry(dev);
 		device_remove_file(dev, &devt_attr);
 	}
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
new file mode 100644
index 000000000000..fd488ad4263a
--- /dev/null
+++ b/drivers/base/devtmpfs.c
@@ -0,0 +1,367 @@
+/*
+ * devtmpfs - kernel-maintained tmpfs-based /dev
+ *
+ * Copyright (C) 2009, Kay Sievers <kay.sievers@vrfy.org>
+ *
+ * During bootup, before any driver core device is registered,
+ * devtmpfs, a tmpfs-based filesystem is created. Every driver-core
+ * device which requests a device node, will add a node in this
+ * filesystem. The node is named after the the name of the device,
+ * or the susbsytem can provide a custom name. All devices are
+ * owned by root and have a mode of 0600.
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/mount.h>
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/namei.h>
+#include <linux/fs.h>
+#include <linux/shmem_fs.h>
+#include <linux/cred.h>
+#include <linux/init_task.h>
+
+static struct vfsmount *dev_mnt;
+
+#if defined CONFIG_DEVTMPFS_MOUNT
+static int dev_mount = 1;
+#else
+static int dev_mount;
+#endif
+
+static int __init mount_param(char *str)
+{
+	dev_mount = simple_strtoul(str, NULL, 0);
+	return 1;
+}
+__setup("devtmpfs.mount=", mount_param);
+
+static int dev_get_sb(struct file_system_type *fs_type, int flags,
+		      const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_single(fs_type, flags, data, shmem_fill_super, mnt);
+}
+
+static struct file_system_type dev_fs_type = {
+	.name = "devtmpfs",
+	.get_sb = dev_get_sb,
+	.kill_sb = kill_litter_super,
+};
+
+#ifdef CONFIG_BLOCK
+static inline int is_blockdev(struct device *dev)
+{
+	return dev->class == &block_class;
+}
+#else
+static inline int is_blockdev(struct device *dev) { return 0; }
+#endif
+
+static int dev_mkdir(const char *name, mode_t mode)
+{
+	struct nameidata nd;
+	struct dentry *dentry;
+	int err;
+
+	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
+			      name, LOOKUP_PARENT, &nd);
+	if (err)
+		return err;
+
+	dentry = lookup_create(&nd, 1);
+	if (!IS_ERR(dentry)) {
+		err = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
+		dput(dentry);
+	} else {
+		err = PTR_ERR(dentry);
+	}
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+
+	path_put(&nd.path);
+	return err;
+}
+
+static int create_path(const char *nodepath)
+{
+	char *path;
+	struct nameidata nd;
+	int err = 0;
+
+	path = kstrdup(nodepath, GFP_KERNEL);
+	if (!path)
+		return -ENOMEM;
+
+	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
+			      path, LOOKUP_PARENT, &nd);
+	if (err == 0) {
+		struct dentry *dentry;
+
+		/* create directory right away */
+		dentry = lookup_create(&nd, 1);
+		if (!IS_ERR(dentry)) {
+			err = vfs_mkdir(nd.path.dentry->d_inode,
+					dentry, 0755);
+			dput(dentry);
+		}
+		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+
+		path_put(&nd.path);
+	} else if (err == -ENOENT) {
+		char *s;
+
+		/* parent directories do not exist, create them */
+		s = path;
+		while (1) {
+			s = strchr(s, '/');
+			if (!s)
+				break;
+			s[0] = '\0';
+			err = dev_mkdir(path, 0755);
+			if (err && err != -EEXIST)
+				break;
+			s[0] = '/';
+			s++;
+		}
+	}
+
+	kfree(path);
+	return err;
+}
+
+int devtmpfs_create_node(struct device *dev)
+{
+	const char *tmp = NULL;
+	const char *nodename;
+	const struct cred *curr_cred;
+	mode_t mode;
+	struct nameidata nd;
+	struct dentry *dentry;
+	int err;
+
+	if (!dev_mnt)
+		return 0;
+
+	nodename = device_get_nodename(dev, &tmp);
+	if (!nodename)
+		return -ENOMEM;
+
+	if (is_blockdev(dev))
+		mode = S_IFBLK|0600;
+	else
+		mode = S_IFCHR|0600;
+
+	curr_cred = override_creds(&init_cred);
+	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
+			      nodename, LOOKUP_PARENT, &nd);
+	if (err == -ENOENT) {
+		/* create missing parent directories */
+		create_path(nodename);
+		err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
+				      nodename, LOOKUP_PARENT, &nd);
+		if (err)
+			goto out;
+	}
+
+	dentry = lookup_create(&nd, 0);
+	if (!IS_ERR(dentry)) {
+		err = vfs_mknod(nd.path.dentry->d_inode,
+				dentry, mode, dev->devt);
+		/* mark as kernel created inode */
+		if (!err)
+			dentry->d_inode->i_private = &dev_mnt;
+		dput(dentry);
+	} else {
+		err = PTR_ERR(dentry);
+	}
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+
+	path_put(&nd.path);
+out:
+	kfree(tmp);
+	revert_creds(curr_cred);
+	return err;
+}
+
+static int dev_rmdir(const char *name)
+{
+	struct nameidata nd;
+	struct dentry *dentry;
+	int err;
+
+	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
+			      name, LOOKUP_PARENT, &nd);
+	if (err)
+		return err;
+
+	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+	dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
+	if (!IS_ERR(dentry)) {
+		if (dentry->d_inode)
+			err = vfs_rmdir(nd.path.dentry->d_inode, dentry);
+		else
+			err = -ENOENT;
+		dput(dentry);
+	} else {
+		err = PTR_ERR(dentry);
+	}
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+
+	path_put(&nd.path);
+	return err;
+}
+
+static int delete_path(const char *nodepath)
+{
+	const char *path;
+	int err = 0;
+
+	path = kstrdup(nodepath, GFP_KERNEL);
+	if (!path)
+		return -ENOMEM;
+
+	while (1) {
+		char *base;
+
+		base = strrchr(path, '/');
+		if (!base)
+			break;
+		base[0] = '\0';
+		err = dev_rmdir(path);
+		if (err)
+			break;
+	}
+
+	kfree(path);
+	return err;
+}
+
+static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat)
+{
+	/* did we create it */
+	if (inode->i_private != &dev_mnt)
+		return 0;
+
+	/* does the dev_t match */
+	if (is_blockdev(dev)) {
+		if (!S_ISBLK(stat->mode))
+			return 0;
+	} else {
+		if (!S_ISCHR(stat->mode))
+			return 0;
+	}
+	if (stat->rdev != dev->devt)
+		return 0;
+
+	/* ours */
+	return 1;
+}
+
+int devtmpfs_delete_node(struct device *dev)
+{
+	const char *tmp = NULL;
+	const char *nodename;
+	const struct cred *curr_cred;
+	struct nameidata nd;
+	struct dentry *dentry;
+	struct kstat stat;
+	int deleted = 1;
+	int err;
+
+	if (!dev_mnt)
+		return 0;
+
+	nodename = device_get_nodename(dev, &tmp);
+	if (!nodename)
+		return -ENOMEM;
+
+	curr_cred = override_creds(&init_cred);
+	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
+			      nodename, LOOKUP_PARENT, &nd);
+	if (err)
+		goto out;
+
+	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+	dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
+	if (!IS_ERR(dentry)) {
+		if (dentry->d_inode) {
+			err = vfs_getattr(nd.path.mnt, dentry, &stat);
+			if (!err && dev_mynode(dev, dentry->d_inode, &stat)) {
+				err = vfs_unlink(nd.path.dentry->d_inode,
+						 dentry);
+				if (!err || err == -ENOENT)
+					deleted = 1;
+			}
+		} else {
+			err = -ENOENT;
+		}
+		dput(dentry);
+	} else {
+		err = PTR_ERR(dentry);
+	}
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+
+	path_put(&nd.path);
+	if (deleted && strchr(nodename, '/'))
+		delete_path(nodename);
+out:
+	kfree(tmp);
+	revert_creds(curr_cred);
+	return err;
+}
+
+/*
+ * If configured, or requested by the commandline, devtmpfs will be
+ * auto-mounted after the kernel mounted the root filesystem.
+ */
+int devtmpfs_mount(const char *mountpoint)
+{
+	struct path path;
+	int err;
+
+	if (!dev_mount)
+		return 0;
+
+	if (!dev_mnt)
+		return 0;
+
+	err = kern_path(mountpoint, LOOKUP_FOLLOW, &path);
+	if (err)
+		return err;
+	err = do_add_mount(dev_mnt, &path, 0, NULL);
+	if (err)
+		printk(KERN_INFO "devtmpfs: error mounting %i\n", err);
+	else
+		printk(KERN_INFO "devtmpfs: mounted\n");
+	path_put(&path);
+	return err;
+}
+
+/*
+ * Create devtmpfs instance, driver-core devices will add their device
+ * nodes here.
+ */
+int __init devtmpfs_init(void)
+{
+	int err;
+	struct vfsmount *mnt;
+
+	err = register_filesystem(&dev_fs_type);
+	if (err) {
+		printk(KERN_ERR "devtmpfs: unable to register devtmpfs "
+		       "type %i\n", err);
+		return err;
+	}
+
+	mnt = kern_mount(&dev_fs_type);
+	if (IS_ERR(mnt)) {
+		err = PTR_ERR(mnt);
+		printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err);
+		unregister_filesystem(&dev_fs_type);
+		return err;
+	}
+	dev_mnt = mnt;
+
+	printk(KERN_INFO "devtmpfs: initialized\n");
+	return 0;
+}
diff --git a/drivers/base/init.c b/drivers/base/init.c
index 7bd9b6a5b01f..c8a934e79421 100644
--- a/drivers/base/init.c
+++ b/drivers/base/init.c
@@ -20,6 +20,7 @@
 void __init driver_init(void)
 {
 	/* These are the core pieces */
+	devtmpfs_init();
 	devices_init();
 	buses_init();
 	classes_init();
diff --git a/include/linux/device.h b/include/linux/device.h
index 62ff53a67931..847b763e40e9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -552,6 +552,16 @@ extern void put_device(struct device *dev);
 
 extern void wait_for_device_probe(void);
 
+#ifdef CONFIG_DEVTMPFS
+extern int devtmpfs_create_node(struct device *dev);
+extern int devtmpfs_delete_node(struct device *dev);
+extern int devtmpfs_mount(const char *mountpoint);
+#else
+static inline int devtmpfs_create_node(struct device *dev) { return 0; }
+static inline int devtmpfs_delete_node(struct device *dev) { return 0; }
+static inline int devtmpfs_mount(const char *mountpoint) { return 0; }
+#endif
+
 /* drivers/base/power/shutdown.c */
 extern void device_shutdown(void);
 
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 6d3f2f449ead..deee7afd8d66 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -38,6 +38,9 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 	return container_of(inode, struct shmem_inode_info, vfs_inode);
 }
 
+extern int init_tmpfs(void);
+extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
+
 #ifdef CONFIG_TMPFS_POSIX_ACL
 int shmem_check_acl(struct inode *, int);
 int shmem_acl_init(struct inode *, struct inode *);
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 093f65915501..bb008d064c1a 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -415,7 +415,7 @@ void __init prepare_namespace(void)
 
 	mount_root();
 out:
+	devtmpfs_mount("dev");
 	sys_mount(".", "/", NULL, MS_MOVE, NULL);
 	sys_chroot(".");
 }
-
diff --git a/init/main.c b/init/main.c
index b34fd8e5edef..8e6a7846bd07 100644
--- a/init/main.c
+++ b/init/main.c
@@ -68,6 +68,7 @@
 #include <linux/async.h>
 #include <linux/kmemcheck.h>
 #include <linux/kmemtrace.h>
+#include <linux/shmem_fs.h>
 #include <trace/boot.h>
 
 #include <asm/io.h>
@@ -809,6 +810,7 @@ static void __init do_basic_setup(void)
 	init_workqueues();
 	cpuset_init_smp();
 	usermodehelper_init();
+	init_tmpfs();
 	driver_init();
 	init_irq_proc();
 	do_ctors();
diff --git a/mm/shmem.c b/mm/shmem.c
index 5a0b3d4055f3..bd20f8bb02aa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2298,8 +2298,7 @@ static void shmem_put_super(struct super_block *sb)
 	sb->s_fs_info = NULL;
 }
 
-static int shmem_fill_super(struct super_block *sb,
-			    void *data, int silent)
+int shmem_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;
 	struct dentry *root;
@@ -2519,7 +2518,7 @@ static struct file_system_type tmpfs_fs_type = {
 	.kill_sb	= kill_litter_super,
 };
 
-static int __init init_tmpfs(void)
+int __init init_tmpfs(void)
 {
 	int error;
 
@@ -2576,7 +2575,7 @@ static struct file_system_type tmpfs_fs_type = {
 	.kill_sb	= kill_litter_super,
 };
 
-static int __init init_tmpfs(void)
+int __init init_tmpfs(void)
 {
 	BUG_ON(register_filesystem(&tmpfs_fs_type) != 0);
 
@@ -2687,5 +2686,3 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 	vma->vm_ops = &shmem_vm_ops;
 	return 0;
 }
-
-module_init(init_tmpfs)
-- 
cgit v1.2.3


From 1d80766554322236aee50d6023693b3210b9cf38 Mon Sep 17 00:00:00 2001
From: "Steven A. Falco" <sfalco@harris.com>
Date: Tue, 16 Jun 2009 12:35:00 -0400
Subject: pcmcia: Use phys_addr_t for physical addresses

Physical addresses are currently represented as int or long types.
However, this does not work for processors like the PPC440EPx, which
is a 32-bit processor with a 36-bit address space.  This patch uses
the phys_addr_t type, which correctly holds a 36-bit address on
this processor.

Signed-off-by: Steven A. Falco <sfalco@harris.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/pcmcia/ss.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 9b4ac9385f5d..9a3b49865173 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -90,14 +90,14 @@ typedef struct pccard_io_map {
 	u_char	map;
 	u_char	flags;
 	u_short	speed;
-	u_int	start, stop;
+	phys_addr_t start, stop;
 } pccard_io_map;
 
 typedef struct pccard_mem_map {
 	u_char		map;
 	u_char		flags;
 	u_short		speed;
-	u_long		static_start;
+	phys_addr_t	static_start;
 	u_int		card_start;
 	struct resource	*res;
 } pccard_mem_map;
-- 
cgit v1.2.3


From 5d351754fcf58d1a604aa7cf95c2805e8a098ad9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 15 Sep 2009 13:32:13 -0400
Subject: SUNRPC: Defer the auth_gss upcall when the RPC call is asynchronous

Otherwise, the upcall is going to be synchronous, which may not be what the
caller wants...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/auth.h |  4 ++--
 net/sunrpc/auth.c           | 20 ++++++++++++--------
 net/sunrpc/auth_generic.c   |  4 ++--
 3 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 3f632182d8eb..996df4dac7d4 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -111,7 +111,7 @@ struct rpc_credops {
 	void			(*crdestroy)(struct rpc_cred *);
 
 	int			(*crmatch)(struct auth_cred *, struct rpc_cred *, int);
-	void			(*crbind)(struct rpc_task *, struct rpc_cred *);
+	void			(*crbind)(struct rpc_task *, struct rpc_cred *, int);
 	__be32 *		(*crmarshal)(struct rpc_task *, __be32 *);
 	int			(*crrefresh)(struct rpc_task *);
 	__be32 *		(*crvalidate)(struct rpc_task *, __be32 *);
@@ -140,7 +140,7 @@ struct rpc_cred *	rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *
 void			rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
 struct rpc_cred *	rpcauth_lookupcred(struct rpc_auth *, int);
 void			rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int);
-void			rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *);
+void			rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int);
 void			put_rpccred(struct rpc_cred *);
 void			rpcauth_unbindcred(struct rpc_task *);
 __be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 0c431c277af5..54a4e042f104 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -385,7 +385,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
 EXPORT_SYMBOL_GPL(rpcauth_init_cred);
 
 void
-rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
+rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
 {
 	task->tk_msg.rpc_cred = get_rpccred(cred);
 	dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
@@ -394,7 +394,7 @@ rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
 EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
 
 static void
-rpcauth_bind_root_cred(struct rpc_task *task)
+rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 	struct auth_cred acred = {
@@ -405,7 +405,7 @@ rpcauth_bind_root_cred(struct rpc_task *task)
 
 	dprintk("RPC: %5u looking up %s cred\n",
 		task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
-	ret = auth->au_ops->lookup_cred(auth, &acred, 0);
+	ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
 	if (!IS_ERR(ret))
 		task->tk_msg.rpc_cred = ret;
 	else
@@ -413,14 +413,14 @@ rpcauth_bind_root_cred(struct rpc_task *task)
 }
 
 static void
-rpcauth_bind_new_cred(struct rpc_task *task)
+rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 	struct rpc_cred *ret;
 
 	dprintk("RPC: %5u looking up %s cred\n",
 		task->tk_pid, auth->au_ops->au_name);
-	ret = rpcauth_lookupcred(auth, 0);
+	ret = rpcauth_lookupcred(auth, lookupflags);
 	if (!IS_ERR(ret))
 		task->tk_msg.rpc_cred = ret;
 	else
@@ -430,12 +430,16 @@ rpcauth_bind_new_cred(struct rpc_task *task)
 void
 rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
 {
+	int lookupflags = 0;
+
+	if (flags & RPC_TASK_ASYNC)
+		lookupflags |= RPCAUTH_LOOKUP_NEW;
 	if (cred != NULL)
-		cred->cr_ops->crbind(task, cred);
+		cred->cr_ops->crbind(task, cred, lookupflags);
 	else if (flags & RPC_TASK_ROOTCREDS)
-		rpcauth_bind_root_cred(task);
+		rpcauth_bind_root_cred(task, lookupflags);
 	else
-		rpcauth_bind_new_cred(task);
+		rpcauth_bind_new_cred(task, lookupflags);
 }
 
 void
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 4028502f0528..bf88bf8e9365 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -55,13 +55,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void)
 EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
 
 static void
-generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
+generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 	struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
 	struct rpc_cred *ret;
 
-	ret = auth->au_ops->lookup_cred(auth, acred, 0);
+	ret = auth->au_ops->lookup_cred(auth, acred, lookupflags);
 	if (!IS_ERR(ret))
 		task->tk_msg.rpc_cred = ret;
 	else
-- 
cgit v1.2.3


From 29ab23cc5d351658d01a4327d55e9106a73fd04f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 15 Sep 2009 15:56:50 -0400
Subject: nfsd4: allow nfs4 state startup to fail

The failure here is pretty unlikely, but we should handle it anyway.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c       | 17 ++++++++++++-----
 fs/nfsd/nfssvc.c          |  4 +++-
 include/linux/nfsd/nfsd.h |  4 ++--
 3 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 46e9ac526872..11db40cb2f2b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4004,7 +4004,7 @@ set_max_delegations(void)
 
 /* initialization to perform when the nfsd service is started: */
 
-static void
+static int
 __nfs4_state_start(void)
 {
 	unsigned long grace_time;
@@ -4016,19 +4016,26 @@ __nfs4_state_start(void)
 	printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
 	       grace_time/HZ);
 	laundry_wq = create_singlethread_workqueue("nfsd4");
+	if (laundry_wq == NULL)
+		return -ENOMEM;
 	queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
 	set_max_delegations();
+	return 0;
 }
 
-void
+int
 nfs4_state_start(void)
 {
+	int ret;
+
 	if (nfs4_init)
-		return;
+		return 0;
 	nfsd4_load_reboot_recovery_data();
-	__nfs4_state_start();
+	ret = __nfs4_state_start();
+	if (ret)
+		return ret;
 	nfs4_init = 1;
-	return;
+	return 0;
 }
 
 time_t
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 4472449c0937..fcc001088261 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -411,7 +411,9 @@ nfsd_svc(unsigned short port, int nrservs)
 	error =	nfsd_racache_init(2*nrservs);
 	if (error<0)
 		goto out;
-	nfs4_state_start();
+	error = nfs4_state_start();
+	if (error)
+		goto out;
 
 	nfsd_reset_versions();
 
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 2812ed52669d..24fdf89cea83 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -166,7 +166,7 @@ extern int nfsd_max_blksize;
 extern unsigned int max_delegations;
 int nfs4_state_init(void);
 void nfsd4_free_slabs(void);
-void nfs4_state_start(void);
+int nfs4_state_start(void);
 void nfs4_state_shutdown(void);
 time_t nfs4_lease_time(void);
 void nfs4_reset_lease(time_t leasetime);
@@ -174,7 +174,7 @@ int nfs4_reset_recoverydir(char *recdir);
 #else
 static inline int nfs4_state_init(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
-static inline void nfs4_state_start(void) { }
+static inline int nfs4_state_start(void) { }
 static inline void nfs4_state_shutdown(void) { }
 static inline time_t nfs4_lease_time(void) { return 0; }
 static inline void nfs4_reset_lease(time_t leasetime) { }
-- 
cgit v1.2.3


From 80fc015bdfe1f5b870c1e1ee02d78e709523fee7 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 15 Sep 2009 18:07:35 -0400
Subject: nfsd4: use common rpc_cred for all callbacks

Callbacks are always made using the machine's identity, so we can use a
single auth_generic credential shared among callbacks to all clients and
let the rpc code take care of the rest.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 33 ++++++++++-----------------------
 fs/nfsd/nfs4state.c        |  6 +-----
 include/linux/nfsd/state.h |  2 +-
 3 files changed, 12 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4abb88264c72..128519769ea8 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -432,42 +432,29 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
 	.rpc_call_done = nfsd4_cb_probe_done,
 };
 
-static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb)
+static struct rpc_cred *callback_cred;
+
+int set_callback_cred(void)
 {
-	struct auth_cred acred = {
-		.machine_cred = 1
-	};
-	struct rpc_auth *auth = cb->cb_client->cl_auth;
-
-	/*
-	 * Note in the gss case this doesn't actually have to wait for a
-	 * gss upcall (or any calls to the client); this just creates a
-	 * non-uptodate cred which the rpc state machine will fill in with
-	 * a refresh_upcall later.
-	 */
-	return auth->au_ops->lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW);
+	callback_cred = rpc_lookup_machine_cred();
+	if (!callback_cred)
+		return -ENOMEM;
+	return 0;
 }
 
+
 void do_probe_callback(struct nfs4_client *clp)
 {
 	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	struct rpc_message msg = {
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
 		.rpc_argp       = clp,
+		.rpc_cred	= callback_cred
 	};
-	struct rpc_cred *cred;
 	int status;
 
-	cred = lookup_cb_cred(cb);
-	if (IS_ERR(cred)) {
-		status = PTR_ERR(cred);
-		goto out;
-	}
-	cb->cb_cred = cred;
-	msg.rpc_cred = cb->cb_cred;
 	status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT,
 				&nfsd4_cb_probe_ops, (void *)clp);
-out:
 	if (status) {
 		warn_no_callback_path(clp, status);
 		put_nfs4_client(clp);
@@ -550,7 +537,7 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
 		.rpc_argp = dp,
-		.rpc_cred = clp->cl_cb_conn.cb_cred
+		.rpc_cred = callback_cred
 	};
 	int status;
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 11db40cb2f2b..0445192d660d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -696,10 +696,6 @@ shutdown_callback_client(struct nfs4_client *clp)
 		clp->cl_cb_conn.cb_client = NULL;
 		rpc_shutdown_client(clnt);
 	}
-	if (clp->cl_cb_conn.cb_cred) {
-		put_rpccred(clp->cl_cb_conn.cb_cred);
-		clp->cl_cb_conn.cb_cred = NULL;
-	}
 }
 
 static inline void
@@ -4020,7 +4016,7 @@ __nfs4_state_start(void)
 		return -ENOMEM;
 	queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
 	set_max_delegations();
-	return 0;
+	return set_callback_cred();
 }
 
 int
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 70ef5f4abbbc..9bf3aa8c5aea 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -89,7 +89,6 @@ struct nfs4_cb_conn {
 	/* RPC client info */
 	atomic_t		cb_set;     /* successful CB_NULL call */
 	struct rpc_clnt *       cb_client;
-	struct rpc_cred	*	cb_cred;
 };
 
 /* Maximum number of slots per session. 160 is useful for long haul TCP */
@@ -362,6 +361,7 @@ extern int nfs4_in_grace(void);
 extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
 extern void put_nfs4_client(struct nfs4_client *clp);
 extern void nfs4_free_stateowner(struct kref *kref);
+extern int set_callback_cred(void);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);
 extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
-- 
cgit v1.2.3


From 38524ab38f2752beee262a97403d871665838172 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 10 Sep 2009 12:25:59 +0300
Subject: nfsd41: Backchannel: callback infrastructure

Keep the xprt used for create_session in cl_cb_xprt.
Mark cl_callback.cb_minorversion = 1 and remember
the client provided cl_callback.cb_prog rpc program number.
Use it to probe the callback path.

Use the client's network address to initialize as the
callback's address as expected by the xprt creation
routines.

Define xdr sizes and code nfs4_cb_compound header to be able
to send a null callback rpc.

Signed-off-by: Andy Adamson<andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
[get callback minorversion from fore channel's]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: change bc_sock to bc_xprt]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[pulled definition for cl_cb_xprt]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: set up backchannel's cb_addr]
[moved rpc_create_args init to "nfsd: modify nfsd4.1 backchannel to use new xprt class"]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 17 +++++++++++++++--
 fs/nfsd/nfs4state.c        | 14 ++++++++++++++
 include/linux/nfsd/state.h |  3 +++
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 128519769ea8..db4188ce9b00 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -43,6 +43,7 @@
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svcsock.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/state.h>
 #include <linux/sunrpc/sched.h>
@@ -52,16 +53,19 @@
 
 #define NFSPROC4_CB_NULL 0
 #define NFSPROC4_CB_COMPOUND 1
+#define NFS4_STATEID_SIZE 16
 
 /* Index of predefined Linux callback client operations */
 
 enum {
 	NFSPROC4_CLNT_CB_NULL = 0,
 	NFSPROC4_CLNT_CB_RECALL,
+	NFSPROC4_CLNT_CB_SEQUENCE,
 };
 
 enum nfs_cb_opnum4 {
 	OP_CB_RECALL            = 4,
+	OP_CB_SEQUENCE          = 11,
 };
 
 #define NFS4_MAXTAGLEN		20
@@ -70,15 +74,22 @@ enum nfs_cb_opnum4 {
 #define NFS4_dec_cb_null_sz		0
 #define cb_compound_enc_hdr_sz		4
 #define cb_compound_dec_hdr_sz		(3 + (NFS4_MAXTAGLEN >> 2))
+#define sessionid_sz			(NFS4_MAX_SESSIONID_LEN >> 2)
+#define cb_sequence_enc_sz		(sessionid_sz + 4 +             \
+					1 /* no referring calls list yet */)
+#define cb_sequence_dec_sz		(op_dec_sz + sessionid_sz + 4)
+
 #define op_enc_sz			1
 #define op_dec_sz			2
 #define enc_nfs4_fh_sz			(1 + (NFS4_FHSIZE >> 2))
 #define enc_stateid_sz			(NFS4_STATEID_SIZE >> 2)
 #define NFS4_enc_cb_recall_sz		(cb_compound_enc_hdr_sz +       \
+					cb_sequence_enc_sz +            \
 					1 + enc_stateid_sz +            \
 					enc_nfs4_fh_sz)
 
 #define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
+					cb_sequence_dec_sz +            \
 					op_dec_sz)
 
 /*
@@ -137,11 +148,13 @@ xdr_error:                                      \
 } while (0)
 
 struct nfs4_cb_compound_hdr {
-	int		status;
-	u32		ident;
+	/* args */
+	u32		ident;	/* minorversion 0 only */
 	u32		nops;
 	__be32		*nops_p;
 	u32		minorversion;
+	/* res */
+	int		status;
 	u32		taglen;
 	char		*tag;
 };
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0445192d660d..d8196b453f61 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -702,6 +702,8 @@ static inline void
 free_client(struct nfs4_client *clp)
 {
 	shutdown_callback_client(clp);
+	if (clp->cl_cb_xprt)
+		svc_xprt_put(clp->cl_cb_xprt);
 	if (clp->cl_cred.cr_group_info)
 		put_group_info(clp->cl_cred.cr_group_info);
 	kfree(clp->cl_principal);
@@ -1317,6 +1319,18 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		cr_ses->flags &= ~SESSION4_PERSIST;
 		cr_ses->flags &= ~SESSION4_RDMA;
 
+		if (cr_ses->flags & SESSION4_BACK_CHAN) {
+			unconf->cl_cb_xprt = rqstp->rq_xprt;
+			svc_xprt_get(unconf->cl_cb_xprt);
+			rpc_copy_addr(
+				(struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
+				sa);
+			unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
+			unconf->cl_cb_conn.cb_minorversion =
+				cstate->minorversion;
+			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+			nfsd4_probe_callback(unconf);
+		}
 		conf = unconf;
 	} else {
 		status = nfserr_stale_clientid;
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 9bf3aa8c5aea..c916032570c4 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -211,6 +211,9 @@ struct nfs4_client {
 	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
 	u32			cl_exchange_flags;
 	struct nfs4_sessionid	cl_sessionid;
+
+	/* for nfs41 callbacks */
+	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
 };
 
 /* struct nfs4_client_reset
-- 
cgit v1.2.3


From 132f97715c098393fb8de3c26b07b9fdbd2334f1 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Thu, 10 Sep 2009 12:26:12 +0300
Subject: nfsd41: Backchannel: Add sequence arguments to callback RPC arguments

Follow the model we use in the client. Make the sequence arguments
part of the regular RPC arguments.  None of the callbacks that are
soon to be implemented expect results that need to be passed back
to the caller, so we don't define a separate RPC results structure.
For session validation, the cb_sequence decoding will use a pointer
to the sequence arguments that are part of the RPC argument.

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
[define struct nfsd4_cb_sequence here]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 5 +++++
 include/linux/nfsd/state.h | 6 ++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index db4188ce9b00..f31175717c1a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -92,6 +92,11 @@ enum nfs_cb_opnum4 {
 					cb_sequence_dec_sz +            \
 					op_dec_sz)
 
+struct nfs4_rpc_args {
+	void				*args_op;
+	struct nfsd4_cb_sequence	args_seq;
+};
+
 /*
 * Generic encode routines from fs/nfs/nfs4xdr.c
 */
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index c916032570c4..0e5b5aecde03 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -60,6 +60,12 @@ typedef struct {
 #define si_stateownerid   si_opaque.so_stateownerid
 #define si_fileid         si_opaque.so_fileid
 
+struct nfsd4_cb_sequence {
+	/* args/res */
+	u32			cbs_minorversion;
+	struct nfs4_client	*cbs_clp;
+};
+
 struct nfs4_delegation {
 	struct list_head	dl_perfile;
 	struct list_head	dl_perclnt;
-- 
cgit v1.2.3


From 199ff35e1c8724871e157c2e48556c2794946e82 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Thu, 10 Sep 2009 12:26:25 +0300
Subject: nfsd41: Backchannel: Server backchannel RPC wait queue

RPC callback requests will wait on this wait queue if the backchannel
is out of slots.

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 2 ++
 include/linux/nfsd/state.h | 4 ++++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d8196b453f61..f4cebd9016bc 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -775,6 +775,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
 	INIT_LIST_HEAD(&clp->cl_delegations);
 	INIT_LIST_HEAD(&clp->cl_sessions);
 	INIT_LIST_HEAD(&clp->cl_lru);
+	clear_bit(0, &clp->cl_cb_slot_busy);
+	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
 	return clp;
 }
 
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 0e5b5aecde03..9cc40a137c34 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -219,7 +219,11 @@ struct nfs4_client {
 	struct nfs4_sessionid	cl_sessionid;
 
 	/* for nfs41 callbacks */
+	/* We currently support a single back channel with a single slot */
+	unsigned long		cl_cb_slot_busy;
 	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
+	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+						/* wait here for slots */
 };
 
 /* struct nfs4_client_reset
-- 
cgit v1.2.3


From 2a1d1b593803d7c18a369bf148f3b48c5a3260fc Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Thu, 10 Sep 2009 12:26:38 +0300
Subject: nfsd41: Backchannel: Setup sequence information

Follows the model used by the NFS client.  Setup the RPC prepare and done
function pointers so that we can populate the sequence information if
minorversion == 1.  rpc_run_task() is then invoked directly just like
existing NFS client operations do.

nfsd4_cb_prepare() determines if the sequence information needs to be setup.
If the slot is in use, it adds itself to the wait queue.

nfsd4_cb_done() wakes anyone sleeping on the callback channel wait queue
after our RPC reply has been received.  It also sets the task message
result pointer to NULL to clearly indicate we're done using it.

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
[define and initialize cl_cb_seq_nr here]
[pulled out unused defintion of nfsd4_cb_done]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4state.c        |  1 +
 include/linux/nfsd/state.h |  1 +
 3 files changed, 64 insertions(+)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f31175717c1a..25a09069e458 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -501,6 +501,67 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	do_probe_callback(clp);
 }
 
+/*
+ * There's currently a single callback channel slot.
+ * If the slot is available, then mark it busy.  Otherwise, set the
+ * thread for sleeping on the callback RPC wait queue.
+ */
+static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
+		struct rpc_task *task)
+{
+	struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+	u32 *ptr = (u32 *)clp->cl_sessionid.data;
+	int status = 0;
+
+	dprintk("%s: %u:%u:%u:%u\n", __func__,
+		ptr[0], ptr[1], ptr[2], ptr[3]);
+
+	if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+		rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
+		dprintk("%s slot is busy\n", __func__);
+		status = -EAGAIN;
+		goto out;
+	}
+
+	/*
+	 * We'll need the clp during XDR encoding and decoding,
+	 * and the sequence during decoding to verify the reply
+	 */
+	args->args_seq.cbs_clp = clp;
+	task->tk_msg.rpc_resp = &args->args_seq;
+
+out:
+	dprintk("%s status=%d\n", __func__, status);
+	return status;
+}
+
+/*
+ * TODO: cb_sequence should support referring call lists, cachethis, multiple
+ * slots, and mark callback channel down on communication errors.
+ */
+static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_delegation *dp = calldata;
+	struct nfs4_client *clp = dp->dl_client;
+	struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+	u32 minorversion = clp->cl_cb_conn.cb_minorversion;
+	int status = 0;
+
+	args->args_seq.cbs_minorversion = minorversion;
+	if (minorversion) {
+		status = nfsd41_cb_setup_sequence(clp, task);
+		if (status) {
+			if (status != -EAGAIN) {
+				/* terminate rpc task */
+				task->tk_status = status;
+				task->tk_action = NULL;
+			}
+			return;
+		}
+	}
+	rpc_call_start(task);
+}
+
 static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_delegation *dp = calldata;
@@ -540,6 +601,7 @@ static void nfsd4_cb_recall_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+	.rpc_call_prepare = nfsd4_cb_prepare,
 	.rpc_call_done = nfsd4_cb_recall_done,
 	.rpc_release = nfsd4_cb_recall_release,
 };
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f4cebd9016bc..76b7bcbb3f20 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1331,6 +1331,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 			unconf->cl_cb_conn.cb_minorversion =
 				cstate->minorversion;
 			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+			unconf->cl_cb_seq_nr = 1;
 			nfsd4_probe_callback(unconf);
 		}
 		conf = unconf;
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 9cc40a137c34..b38d11324189 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -221,6 +221,7 @@ struct nfs4_client {
 	/* for nfs41 callbacks */
 	/* We currently support a single back channel with a single slot */
 	unsigned long		cl_cb_slot_busy;
+	u32			cl_cb_seq_nr;
 	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
 						/* wait here for slots */
-- 
cgit v1.2.3


From fe832a3a48737b24f95fab202b1c67fb588b071d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 15 Sep 2009 23:51:31 -0400
Subject: tracing: remove notrace from __kprobes annotation

When ftrace had issues with NMIs, it was needed to annotate all
the areas that kprobes had issues with notrace. Now that ftrace is
NMI safe, the functions that limit ftrace from tracing are just a
small few.

Kprobes is too big of a set for ftrace not to trace. Remove the
coupling.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kprobes.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index bcd9c07848be..3a46b7b7abb2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -48,13 +48,13 @@
 #define KPROBE_HIT_SSDONE	0x00000008
 
 /* Attach to insert probes on any functions which should be ignored*/
-#define __kprobes	__attribute__((__section__(".kprobes.text"))) notrace
+#define __kprobes	__attribute__((__section__(".kprobes.text")))
 #else /* CONFIG_KPROBES */
 typedef int kprobe_opcode_t;
 struct arch_specific_insn {
 	int dummy;
 };
-#define __kprobes	notrace
+#define __kprobes
 #endif /* CONFIG_KPROBES */
 
 struct kprobe;
-- 
cgit v1.2.3


From 59abf02644c45f1591e1374ee7bb45dc757fcb88 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Sep 2009 08:28:30 +0200
Subject: sched: Add SD_PREFER_LOCAL

And turn it on for NUMA and MC domains. This improves
locality in balancing decisions by keeping up to
capacity amount of tasks local before looking for idle
CPUs. (and twice the capacity if SD_POWERSAVINGS_BALANCE
is set.)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    | 2 +-
 include/linux/topology.h | 2 ++
 kernel/sched_fair.c      | 7 +++++--
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ee1f88993097..b4a39bb2b4a4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -805,7 +805,7 @@ enum cpu_idle_type {
 #define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
 #define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-
+#define SD_PREFER_LOCAL		0x0040  /* Prefer to keep tasks local to this domain */
 #define SD_SHARE_CPUPOWER	0x0080	/* Domain members share cpu power */
 #define SD_POWERSAVINGS_BALANCE	0x0100	/* Balance for power savings */
 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 936ab2b37683..a6614b0242a9 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -129,6 +129,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_FORK			\
 				| 1*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
+				| 1*SD_PREFER_LOCAL			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 1*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
@@ -161,6 +162,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_FORK			\
 				| 1*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
+				| 1*SD_PREFER_LOCAL			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 280892e9d85e..a37f311f436e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1360,7 +1360,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int flags)
 		 * If power savings logic is enabled for a domain, see if we
 		 * are not overloaded, if so, don't balance wider.
 		 */
-		if (tmp->flags & SD_POWERSAVINGS_BALANCE) {
+		if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) {
 			unsigned long power = 0;
 			unsigned long nr_running = 0;
 			unsigned long capacity;
@@ -1373,7 +1373,10 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int flags)
 
 			capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
 
-			if (nr_running/2 < capacity)
+			if (tmp->flags & SD_POWERSAVINGS_BALANCE)
+				nr_running /= 2;
+
+			if (nr_running < capacity)
 				break;
 		}
 
-- 
cgit v1.2.3


From 4e36a95e591e9c58dd10bb4103c00993917c27fd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 16 Sep 2009 00:01:13 -0700
Subject: RxRPC: Use uX/sX rather than uintX_t/intX_t types

Use uX rather than uintX_t types for consistency.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/keys/rxrpc-type.h | 20 ++++++++++----------
 net/rxrpc/ar-ack.c        |  6 +++---
 net/rxrpc/ar-internal.h   | 16 ++++++++--------
 net/rxrpc/ar-key.c        |  2 +-
 4 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
index 5eb23571b425..5cb86c307f5d 100644
--- a/include/keys/rxrpc-type.h
+++ b/include/keys/rxrpc-type.h
@@ -53,8 +53,8 @@ struct krb5_tagged_data {
 	 * - KRB5_AUTHDATA_* for auth data
 	 * - 
 	 */
-	int32_t		tag;
-	uint32_t	data_len;
+	s32		tag;
+	u32		data_len;
 	u8		*data;
 };
 
@@ -62,17 +62,17 @@ struct krb5_tagged_data {
  * RxRPC key for Kerberos V (type-5 security)
  */
 struct rxk5_key {
-	uint64_t		authtime;	/* time at which auth token generated */
-	uint64_t		starttime;	/* time at which auth token starts */
-	uint64_t		endtime;	/* time at which auth token expired */
-	uint64_t		renew_till;	/* time to which auth token can be renewed */
-	int32_t			is_skey;	/* T if ticket is encrypted in another ticket's
+	u64			authtime;	/* time at which auth token generated */
+	u64			starttime;	/* time at which auth token starts */
+	u64			endtime;	/* time at which auth token expired */
+	u64			renew_till;	/* time to which auth token can be renewed */
+	s32			is_skey;	/* T if ticket is encrypted in another ticket's
 						 * skey */
-	int32_t			flags;		/* mask of TKT_FLG_* bits (krb5/krb5.h) */
+	s32			flags;		/* mask of TKT_FLG_* bits (krb5/krb5.h) */
 	struct krb5_principal	client;		/* client principal name */
 	struct krb5_principal	server;		/* server principal name */
-	uint16_t		ticket_len;	/* length of ticket */
-	uint16_t		ticket2_len;	/* length of second ticket */
+	u16			ticket_len;	/* length of ticket */
+	u16			ticket2_len;	/* length of second ticket */
 	u8			n_authdata;	/* number of authorisation data elements */
 	u8			n_addresses;	/* number of addresses */
 	struct krb5_tagged_data	session;	/* session data; tag is enctype */
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index c9f1f0a3a2ff..b4a220977031 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -40,7 +40,7 @@ static const s8 rxrpc_ack_priority[] = {
 /*
  * propose an ACK be sent
  */
-void __rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
 			 __be32 serial, bool immediate)
 {
 	unsigned long expiry;
@@ -120,7 +120,7 @@ cancel_timer:
 /*
  * propose an ACK be sent, locking the call structure
  */
-void rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
 		       __be32 serial, bool immediate)
 {
 	s8 prior = rxrpc_ack_priority[ack_reason];
@@ -520,7 +520,7 @@ static void rxrpc_zap_tx_window(struct rxrpc_call *call)
 	struct rxrpc_skb_priv *sp;
 	struct sk_buff *skb;
 	unsigned long _skb, *acks_window;
-	uint8_t winsz = call->acks_winsz;
+	u8 winsz = call->acks_winsz;
 	int tail;
 
 	acks_window = call->acks_window;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 46c6d8888493..7043b294bb67 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -229,7 +229,7 @@ struct rxrpc_conn_bundle {
 	int			debug_id;	/* debug ID for printks */
 	unsigned short		num_conns;	/* number of connections in this bundle */
 	__be16			service_id;	/* service ID */
-	uint8_t			security_ix;	/* security type */
+	u8			security_ix;	/* security type */
 };
 
 /*
@@ -370,10 +370,10 @@ struct rxrpc_call {
 	u8			channel;	/* connection channel occupied by this call */
 
 	/* transmission-phase ACK management */
-	uint8_t			acks_head;	/* offset into window of first entry */
-	uint8_t			acks_tail;	/* offset into window of last entry */
-	uint8_t			acks_winsz;	/* size of un-ACK'd window */
-	uint8_t			acks_unacked;	/* lowest unacked packet in last ACK received */
+	u8			acks_head;	/* offset into window of first entry */
+	u8			acks_tail;	/* offset into window of last entry */
+	u8			acks_winsz;	/* size of un-ACK'd window */
+	u8			acks_unacked;	/* lowest unacked packet in last ACK received */
 	int			acks_latest;	/* serial number of latest ACK received */
 	rxrpc_seq_t		acks_hard;	/* highest definitively ACK'd msg seq */
 	unsigned long		*acks_window;	/* sent packet window
@@ -388,7 +388,7 @@ struct rxrpc_call {
 	rxrpc_seq_t		rx_first_oos;	/* first packet in rx_oos_queue (or 0) */
 	rxrpc_seq_t		ackr_win_top;	/* top of ACK window (rx_data_eaten is bottom) */
 	rxrpc_seq_net_t		ackr_prev_seq;	/* previous sequence number received */
-	uint8_t			ackr_reason;	/* reason to ACK */
+	u8			ackr_reason;	/* reason to ACK */
 	__be32			ackr_serial;	/* serial of packet being ACK'd */
 	atomic_t		ackr_not_idle;	/* number of packets in Rx queue */
 
@@ -434,8 +434,8 @@ extern int rxrpc_reject_call(struct rxrpc_sock *);
 /*
  * ar-ack.c
  */
-extern void __rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
-extern void rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
+extern void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
+extern void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
 extern void rxrpc_process_call(struct work_struct *);
 
 /*
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 44836f6c9643..74697b200496 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -360,7 +360,7 @@ static int rxrpc_krb5_decode_tagged_array(struct krb5_tagged_data **_td,
 /*
  * extract a krb5 ticket
  */
-static int rxrpc_krb5_decode_ticket(u8 **_ticket, uint16_t *_tktlen,
+static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
 				    const __be32 **_xdr, unsigned *_toklen)
 {
 	const __be32 *xdr = *_xdr;
-- 
cgit v1.2.3


From d466f2fcb32cd97fd586bfa33f5dba3ac78aadb0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:03 +0200
Subject: HWPOISON: Add page flag for poisoned pages

Hardware poisoned pages need special handling in the VM and shouldn't be
touched again. This requires a new page flag. Define it here.

The page flags wars seem to be over, so it shouldn't be a problem
to get a new one.

v2: Add TestSetHWPoison (suggested by Johannes Weiner)

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/page-flags.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 2b87acfc5f87..9bc5fd9fdbf6 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -51,6 +51,9 @@
  * PG_buddy is set to indicate that the page is free and in the buddy system
  * (see mm/page_alloc.c).
  *
+ * PG_hwpoison indicates that a page got corrupted in hardware and contains
+ * data with incorrect ECC bits that triggered a machine check. Accessing is
+ * not safe since it may cause another machine check. Don't touch!
  */
 
 /*
@@ -101,6 +104,9 @@ enum pageflags {
 #endif
 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
 	PG_uncached,		/* Page has been mapped as uncached */
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+	PG_hwpoison,		/* hardware poisoned page. Don't touch */
 #endif
 	__NR_PAGEFLAGS,
 
@@ -263,6 +269,15 @@ PAGEFLAG(Uncached, uncached)
 PAGEFLAG_FALSE(Uncached)
 #endif
 
+#ifdef CONFIG_MEMORY_FAILURE
+PAGEFLAG(HWPoison, hwpoison)
+TESTSETFLAG(HWPoison, hwpoison)
+#define __PG_HWPOISON (1UL << PG_hwpoison)
+#else
+PAGEFLAG_FALSE(HWPoison)
+#define __PG_HWPOISON 0
+#endif
+
 static inline int PageUptodate(struct page *page)
 {
 	int ret = test_bit(PG_uptodate, &(page)->flags);
@@ -387,7 +402,7 @@ static inline void __ClearPageTail(struct page *page)
 	 1 << PG_private | 1 << PG_private_2 | \
 	 1 << PG_buddy	 | 1 << PG_writeback | 1 << PG_reserved | \
 	 1 << PG_slab	 | 1 << PG_swapcache | 1 << PG_active | \
-	 1 << PG_unevictable | __PG_MLOCKED)
+	 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
-- 
cgit v1.2.3


From 10be22dfe1e6ad978269dc275147e0ed049187bb Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:04 +0200
Subject: HWPOISON: Export some rmap vma locking to outside world

Needed for later patch that walks rmap entries on its own.

This used to be very frowned upon, but memory-failure.c does
some rather specialized rmap walking and rmap has been stable
for quite some time, so I think it's ok now to export it.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/rmap.h | 6 ++++++
 mm/rmap.c            | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf116d0dbf23..8dff2ffab82c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -112,6 +112,12 @@ int page_mkclean(struct page *);
  */
 int try_to_munlock(struct page *);
 
+/*
+ * Called by memory-failure.c to kill processes.
+ */
+struct anon_vma *page_lock_anon_vma(struct page *page);
+void page_unlock_anon_vma(struct anon_vma *anon_vma);
+
 #else	/* !CONFIG_MMU */
 
 #define anon_vma_init()		do {} while (0)
diff --git a/mm/rmap.c b/mm/rmap.c
index 0895b5c7cbff..5a35c030e779 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -191,7 +191,7 @@ void __init anon_vma_init(void)
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma;
 	unsigned long anon_mapping;
@@ -211,7 +211,7 @@ out:
 	return NULL;
 }
 
-static void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma(struct anon_vma *anon_vma)
 {
 	spin_unlock(&anon_vma->lock);
 	rcu_read_unlock();
-- 
cgit v1.2.3


From a7420aa54dbf699a5a05feba3c859b6baaa3938c Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:05 +0200
Subject: HWPOISON: Add support for poison swap entries v2

Memory migration uses special swap entry types to trigger special actions on
page faults. Extend this mechanism to also support poisoned swap entries, to
trigger poison handling on page faults. This allows follow-on patches to
prevent processes from faulting in poisoned pages again.

v2: Fix overflow in MAX_SWAPFILES (Fengguang Wu)
v3: Better overflow fix (Hidehiro Kawai)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/swap.h    | 34 ++++++++++++++++++++++++++++------
 include/linux/swapops.h | 38 ++++++++++++++++++++++++++++++++++++++
 mm/swapfile.c           |  4 ++--
 3 files changed, 68 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7c15334f3ff2..f077e454c659 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -34,15 +34,37 @@ static inline int current_is_kswapd(void)
  * the type/offset into the pte as 5/27 as well.
  */
 #define MAX_SWAPFILES_SHIFT	5
-#ifndef CONFIG_MIGRATION
-#define MAX_SWAPFILES		(1 << MAX_SWAPFILES_SHIFT)
+
+/*
+ * Use some of the swap files numbers for other purposes. This
+ * is a convenient way to hook into the VM to trigger special
+ * actions on faults.
+ */
+
+/*
+ * NUMA node memory migration support
+ */
+#ifdef CONFIG_MIGRATION
+#define SWP_MIGRATION_NUM 2
+#define SWP_MIGRATION_READ	(MAX_SWAPFILES + SWP_HWPOISON_NUM)
+#define SWP_MIGRATION_WRITE	(MAX_SWAPFILES + SWP_HWPOISON_NUM + 1)
 #else
-/* Use last two entries for page migration swap entries */
-#define MAX_SWAPFILES		((1 << MAX_SWAPFILES_SHIFT)-2)
-#define SWP_MIGRATION_READ	MAX_SWAPFILES
-#define SWP_MIGRATION_WRITE	(MAX_SWAPFILES + 1)
+#define SWP_MIGRATION_NUM 0
 #endif
 
+/*
+ * Handling of hardware poisoned pages with memory corruption.
+ */
+#ifdef CONFIG_MEMORY_FAILURE
+#define SWP_HWPOISON_NUM 1
+#define SWP_HWPOISON		MAX_SWAPFILES
+#else
+#define SWP_HWPOISON_NUM 0
+#endif
+
+#define MAX_SWAPFILES \
+	((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
+
 /*
  * Magic header for a swap area. The first part of the union is
  * what the swap magic looks like for the old (limited to 128MB)
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 6ec39ab27b4b..cd42e30b7c6e 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry)
 
 #endif
 
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+	BUG_ON(!PageLocked(page));
+	return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+	return swp_type(entry) == SWP_HWPOISON;
+}
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+	return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+	return 0;
+}
+#endif
+
+#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
+static inline int non_swap_entry(swp_entry_t entry)
+{
+	return swp_type(entry) >= MAX_SWAPFILES;
+}
+#else
+static inline int non_swap_entry(swp_entry_t entry)
+{
+	return 0;
+}
+#endif
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 74f1102e8749..ce5dda6d604b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -699,7 +699,7 @@ int free_swap_and_cache(swp_entry_t entry)
 	struct swap_info_struct *p;
 	struct page *page = NULL;
 
-	if (is_migration_entry(entry))
+	if (non_swap_entry(entry))
 		return 1;
 
 	p = swap_info_get(entry);
@@ -2085,7 +2085,7 @@ static int __swap_duplicate(swp_entry_t entry, bool cache)
 	int count;
 	bool has_cache;
 
-	if (is_migration_entry(entry))
+	if (non_swap_entry(entry))
 		return -EINVAL;
 
 	type = swp_type(entry);
-- 
cgit v1.2.3


From ad5fa913991e9e0f122b021e882b0d50051fbdbc Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:06 +0200
Subject: HWPOISON: Add new SIGBUS error codes for hardware poison signals

Add new SIGBUS codes for reporting machine checks as signals. When
the hardware detects an uncorrected ECC error it can trigger these
signals.

This is needed for telling KVM's qemu about machine checks that happen to
guests, so that it can inject them, but might be also useful for other programs.
I find it useful in my test programs.

This patch merely defines the new types.

- Define two new si_codes for SIGBUS.  BUS_MCEERR_AO and BUS_MCEERR_AR
* BUS_MCEERR_AO is for "Action Optional" machine checks, which means that some
corruption has been detected in the background, but nothing has been consumed
so far. The program can ignore those if it wants (but most programs would
already get killed)
* BUS_MCEERR_AR is for "Action Required" machine checks. This happens
when corrupted data is consumed or the application ran into an area
which has been known to be corrupted earlier. These require immediate
action and cannot just returned to. Most programs would kill themselves.
- They report the address of the corruption in the user address space
in si_addr.
- Define a new si_addr_lsb field that reports the extent of the corruption
to user space. That's currently always a (small) page. The user application
cannot tell where in this page the corruption happened.

AK: I plan to write a man page update before anyone asks.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/asm-generic/siginfo.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index c840719a8c59..942d30b5aab1 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -82,6 +82,7 @@ typedef struct siginfo {
 #ifdef __ARCH_SI_TRAPNO
 			int _trapno;	/* TRAP # which caused the signal */
 #endif
+			short _addr_lsb; /* LSB of the reported address */
 		} _sigfault;
 
 		/* SIGPOLL */
@@ -112,6 +113,7 @@ typedef struct siginfo {
 #ifdef __ARCH_SI_TRAPNO
 #define si_trapno	_sifields._sigfault._trapno
 #endif
+#define si_addr_lsb	_sifields._sigfault._addr_lsb
 #define si_band		_sifields._sigpoll._band
 #define si_fd		_sifields._sigpoll._fd
 
@@ -192,7 +194,11 @@ typedef struct siginfo {
 #define BUS_ADRALN	(__SI_FAULT|1)	/* invalid address alignment */
 #define BUS_ADRERR	(__SI_FAULT|2)	/* non-existant physical address */
 #define BUS_OBJERR	(__SI_FAULT|3)	/* object specific hardware error */
-#define NSIGBUS		3
+/* hardware memory error consumed on a machine check: action required */
+#define BUS_MCEERR_AR	(__SI_FAULT|4)
+/* hardware memory error detected in process but not consumed: action optional*/
+#define BUS_MCEERR_AO	(__SI_FAULT|5)
+#define NSIGBUS		5
 
 /*
  * SIGTRAP si_codes
-- 
cgit v1.2.3


From d1737fdbec7f90edc52dd0c5c3767457f28e78d8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:06 +0200
Subject: HWPOISON: Add basic support for poisoned pages in fault handler v3

- Add a new VM_FAULT_HWPOISON error code to handle_mm_fault. Right now
architectures have to explicitely enable poison page support, so
this is forward compatible to all architectures. They only need
to add it when they enable poison page support.
- Add poison page handling in swap in fault code

v2: Add missing delayacct_clear_flag (Hidehiro Kawai)
v3: Really use delayacct_clear_flag (Hidehiro Kawai)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/mm.h |  3 ++-
 mm/memory.c        | 18 +++++++++++++++---
 2 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9a72cc78e6b8..082b68cb5ffe 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -685,11 +685,12 @@ static inline int page_mapped(struct page *page)
 #define VM_FAULT_SIGBUS	0x0002
 #define VM_FAULT_MAJOR	0x0004
 #define VM_FAULT_WRITE	0x0008	/* Special case for get_user_pages */
+#define VM_FAULT_HWPOISON 0x0010	/* Hit poisoned page */
 
 #define VM_FAULT_NOPAGE	0x0100	/* ->fault installed the pte, not return page */
 #define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
 
-#define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS)
+#define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)
 
 /*
  * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
diff --git a/mm/memory.c b/mm/memory.c
index aede2ce3aba4..02bae2d540d4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1319,7 +1319,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				if (ret & VM_FAULT_ERROR) {
 					if (ret & VM_FAULT_OOM)
 						return i ? i : -ENOMEM;
-					else if (ret & VM_FAULT_SIGBUS)
+					if (ret &
+					    (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
 						return i ? i : -EFAULT;
 					BUG();
 				}
@@ -2511,8 +2512,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto out;
 
 	entry = pte_to_swp_entry(orig_pte);
-	if (is_migration_entry(entry)) {
-		migration_entry_wait(mm, pmd, address);
+	if (unlikely(non_swap_entry(entry))) {
+		if (is_migration_entry(entry)) {
+			migration_entry_wait(mm, pmd, address);
+		} else if (is_hwpoison_entry(entry)) {
+			ret = VM_FAULT_HWPOISON;
+		} else {
+			print_bad_pte(vma, address, orig_pte, NULL);
+			ret = VM_FAULT_OOM;
+		}
 		goto out;
 	}
 	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
@@ -2536,6 +2544,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		/* Had to read the page from swap area: Major fault */
 		ret = VM_FAULT_MAJOR;
 		count_vm_event(PGMAJFAULT);
+	} else if (PageHWPoison(page)) {
+		ret = VM_FAULT_HWPOISON;
+		delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+		goto out;
 	}
 
 	lock_page(page);
-- 
cgit v1.2.3


From 14fa31b89c5ae79e4131da41761378a6df674352 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:10 +0200
Subject: HWPOISON: Use bitmask/action code for try_to_unmap behaviour

try_to_unmap currently has multiple modi (migration, munlock, normal unmap)
which are selected by magic flag variables. The logic is not very straight
forward, because each of these flag change multiple behaviours (e.g.
migration turns off aging, not only sets up migration ptes etc.)
Also the different flags interact in magic ways.

A later patch in this series adds another mode to try_to_unmap, so
this becomes quickly unmanageable.

Replace the different flags with a action code (migration, munlock, munmap)
and some additional flags as modifiers (ignore mlock, ignore aging).
This makes the logic more straight forward and allows easier extension
to new behaviours. Change all the caller to declare what they want to
do.

This patch is supposed to be a nop in behaviour. If anyone can prove
it is not that would be a bug.

Cc: Lee.Schermerhorn@hp.com
Cc: npiggin@suse.de

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/rmap.h | 13 ++++++++++++-
 mm/migrate.c         |  2 +-
 mm/rmap.c            | 40 ++++++++++++++++++++++------------------
 mm/vmscan.c          |  2 +-
 4 files changed, 36 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 8dff2ffab82c..4c4a2d4d289e 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -85,7 +85,18 @@ static inline void page_dup_rmap(struct page *page, struct vm_area_struct *vma,
  */
 int page_referenced(struct page *, int is_locked,
 			struct mem_cgroup *cnt, unsigned long *vm_flags);
-int try_to_unmap(struct page *, int ignore_refs);
+enum ttu_flags {
+	TTU_UNMAP = 0,			/* unmap mode */
+	TTU_MIGRATION = 1,		/* migration mode */
+	TTU_MUNLOCK = 2,		/* munlock mode */
+	TTU_ACTION_MASK = 0xff,
+
+	TTU_IGNORE_MLOCK = (1 << 8),	/* ignore mlock */
+	TTU_IGNORE_ACCESS = (1 << 9),	/* don't age */
+};
+#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
+
+int try_to_unmap(struct page *, enum ttu_flags flags);
 
 /*
  * Called from mm/filemap_xip.c to unmap empty zero page
diff --git a/mm/migrate.c b/mm/migrate.c
index 939888f9ddab..e3a0cd3859a9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -669,7 +669,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	}
 
 	/* Establish migration ptes or remove ptes */
-	try_to_unmap(page, 1);
+	try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
 
 	if (!page_mapped(page))
 		rc = move_to_new_page(newpage, page);
diff --git a/mm/rmap.c b/mm/rmap.c
index 5a35c030e779..08c112a776a7 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -774,7 +774,7 @@ void page_remove_rmap(struct page *page)
  * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
  */
 static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
-				int migration)
+				enum ttu_flags flags)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
@@ -796,11 +796,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	 * If it's recently referenced (perhaps page_referenced
 	 * skipped over this mm) then we should reactivate it.
 	 */
-	if (!migration) {
+	if (!(flags & TTU_IGNORE_MLOCK)) {
 		if (vma->vm_flags & VM_LOCKED) {
 			ret = SWAP_MLOCK;
 			goto out_unmap;
 		}
+	}
+	if (!(flags & TTU_IGNORE_ACCESS)) {
 		if (ptep_clear_flush_young_notify(vma, address, pte)) {
 			ret = SWAP_FAIL;
 			goto out_unmap;
@@ -840,12 +842,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			 * pte. do_swap_page() will wait until the migration
 			 * pte is removed and then restart fault handling.
 			 */
-			BUG_ON(!migration);
+			BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
 			entry = make_migration_entry(page, pte_write(pteval));
 		}
 		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
 		BUG_ON(pte_file(*pte));
-	} else if (PAGE_MIGRATION && migration) {
+	} else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) {
 		/* Establish migration entry for a file page */
 		swp_entry_t entry;
 		entry = make_migration_entry(page, pte_write(pteval));
@@ -1014,12 +1016,13 @@ static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
  * 'LOCKED.
  */
-static int try_to_unmap_anon(struct page *page, int unlock, int migration)
+static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
 {
 	struct anon_vma *anon_vma;
 	struct vm_area_struct *vma;
 	unsigned int mlocked = 0;
 	int ret = SWAP_AGAIN;
+	int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
 
 	if (MLOCK_PAGES && unlikely(unlock))
 		ret = SWAP_SUCCESS;	/* default for try_to_munlock() */
@@ -1035,7 +1038,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
 				continue;  /* must visit all unlocked vmas */
 			ret = SWAP_MLOCK;  /* saw at least one mlocked vma */
 		} else {
-			ret = try_to_unmap_one(page, vma, migration);
+			ret = try_to_unmap_one(page, vma, flags);
 			if (ret == SWAP_FAIL || !page_mapped(page))
 				break;
 		}
@@ -1059,8 +1062,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
 /**
  * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
  * @page: the page to unmap/unlock
- * @unlock:  request for unlock rather than unmap [unlikely]
- * @migration:  unmapping for migration - ignored if @unlock
+ * @flags: action and flags
  *
  * Find all the mappings of a page using the mapping pointer and the vma chains
  * contained in the address_space struct it points to.
@@ -1072,7 +1074,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
  * 'LOCKED.
  */
-static int try_to_unmap_file(struct page *page, int unlock, int migration)
+static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 {
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -1084,6 +1086,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
 	unsigned long max_nl_size = 0;
 	unsigned int mapcount;
 	unsigned int mlocked = 0;
+	int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
 
 	if (MLOCK_PAGES && unlikely(unlock))
 		ret = SWAP_SUCCESS;	/* default for try_to_munlock() */
@@ -1096,7 +1099,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
 				continue;	/* must visit all vmas */
 			ret = SWAP_MLOCK;
 		} else {
-			ret = try_to_unmap_one(page, vma, migration);
+			ret = try_to_unmap_one(page, vma, flags);
 			if (ret == SWAP_FAIL || !page_mapped(page))
 				goto out;
 		}
@@ -1121,7 +1124,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
 			ret = SWAP_MLOCK;	/* leave mlocked == 0 */
 			goto out;		/* no need to look further */
 		}
-		if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED))
+		if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
+			(vma->vm_flags & VM_LOCKED))
 			continue;
 		cursor = (unsigned long) vma->vm_private_data;
 		if (cursor > max_nl_cursor)
@@ -1155,7 +1159,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
 	do {
 		list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
 						shared.vm_set.list) {
-			if (!MLOCK_PAGES && !migration &&
+			if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
 			    (vma->vm_flags & VM_LOCKED))
 				continue;
 			cursor = (unsigned long) vma->vm_private_data;
@@ -1195,7 +1199,7 @@ out:
 /**
  * try_to_unmap - try to remove all page table mappings to a page
  * @page: the page to get unmapped
- * @migration: migration flag
+ * @flags: action and flags
  *
  * Tries to remove all the page table entries which are mapping this
  * page, used in the pageout path.  Caller must hold the page lock.
@@ -1206,16 +1210,16 @@ out:
  * SWAP_FAIL	- the page is unswappable
  * SWAP_MLOCK	- page is mlocked.
  */
-int try_to_unmap(struct page *page, int migration)
+int try_to_unmap(struct page *page, enum ttu_flags flags)
 {
 	int ret;
 
 	BUG_ON(!PageLocked(page));
 
 	if (PageAnon(page))
-		ret = try_to_unmap_anon(page, 0, migration);
+		ret = try_to_unmap_anon(page, flags);
 	else
-		ret = try_to_unmap_file(page, 0, migration);
+		ret = try_to_unmap_file(page, flags);
 	if (ret != SWAP_MLOCK && !page_mapped(page))
 		ret = SWAP_SUCCESS;
 	return ret;
@@ -1240,8 +1244,8 @@ int try_to_munlock(struct page *page)
 	VM_BUG_ON(!PageLocked(page) || PageLRU(page));
 
 	if (PageAnon(page))
-		return try_to_unmap_anon(page, 1, 0);
+		return try_to_unmap_anon(page, TTU_MUNLOCK);
 	else
-		return try_to_unmap_file(page, 1, 0);
+		return try_to_unmap_file(page, TTU_MUNLOCK);
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ba8228e0a806..ab3b0ad3ce52 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -659,7 +659,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 * processes. Try to unmap it here.
 		 */
 		if (page_mapped(page) && mapping) {
-			switch (try_to_unmap(page, 0)) {
+			switch (try_to_unmap(page, TTU_UNMAP)) {
 			case SWAP_FAIL:
 				goto activate_locked;
 			case SWAP_AGAIN:
-- 
cgit v1.2.3


From 888b9f7c58ebe8303bad817cd554df887a683957 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 16 Sep 2009 11:50:11 +0200
Subject: HWPOISON: Handle hardware poisoned pages in try_to_unmap

When a page has the poison bit set replace the PTE with a poison entry.
This causes the right error handling to be done later when a process runs
into it.

v2: add a new flag to not do that (needed for the memory-failure handler
later) (Fengguang)
v3: remove unnecessary is_migration_entry() test (Fengguang, Minchan)

Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/rmap.h | 1 +
 mm/rmap.c            | 9 ++++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 4c4a2d4d289e..ce989f1fc2ed 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -93,6 +93,7 @@ enum ttu_flags {
 
 	TTU_IGNORE_MLOCK = (1 << 8),	/* ignore mlock */
 	TTU_IGNORE_ACCESS = (1 << 9),	/* don't age */
+	TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
 };
 #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 08c112a776a7..7e72ca19d68b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -820,7 +820,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	/* Update high watermark before we lower rss */
 	update_hiwater_rss(mm);
 
-	if (PageAnon(page)) {
+	if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
+		if (PageAnon(page))
+			dec_mm_counter(mm, anon_rss);
+		else
+			dec_mm_counter(mm, file_rss);
+		set_pte_at(mm, address, pte,
+				swp_entry_to_pte(make_hwpoison_entry(page)));
+	} else if (PageAnon(page)) {
 		swp_entry_t entry = { .val = page_private(page) };
 
 		if (PageSwapCache(page)) {
-- 
cgit v1.2.3


From 750b4987b0cd4d408e54cb83a80a067cbe690feb Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 16 Sep 2009 11:50:12 +0200
Subject: HWPOISON: Refactor truncate to allow direct truncating of page v2

Extract out truncate_inode_page() out of the truncate path so that
it can be used by memory-failure.c

[AK: description, headers, fix typos]
v2: Some white space changes from Fengguang Wu

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/mm.h |  2 ++
 mm/truncate.c      | 29 +++++++++++++++--------------
 2 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 082b68cb5ffe..8cbc0aafd5bd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -794,6 +794,8 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 extern int vmtruncate(struct inode * inode, loff_t offset);
 extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
 
+int truncate_inode_page(struct address_space *mapping, struct page *page);
+
 #ifdef CONFIG_MMU
 extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, unsigned int flags);
diff --git a/mm/truncate.c b/mm/truncate.c
index ccc3ecf7cb98..2519a7c92873 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -93,11 +93,11 @@ EXPORT_SYMBOL(cancel_dirty_page);
  * its lock, b) when a concurrent invalidate_mapping_pages got there first and
  * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
  */
-static void
+static int
 truncate_complete_page(struct address_space *mapping, struct page *page)
 {
 	if (page->mapping != mapping)
-		return;
+		return -EIO;
 
 	if (page_has_private(page))
 		do_invalidatepage(page, 0);
@@ -108,6 +108,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
 	remove_from_page_cache(page);
 	ClearPageMappedToDisk(page);
 	page_cache_release(page);	/* pagecache ref */
+	return 0;
 }
 
 /*
@@ -135,6 +136,16 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
 	return ret;
 }
 
+int truncate_inode_page(struct address_space *mapping, struct page *page)
+{
+	if (page_mapped(page)) {
+		unmap_mapping_range(mapping,
+				   (loff_t)page->index << PAGE_CACHE_SHIFT,
+				   PAGE_CACHE_SIZE, 0);
+	}
+	return truncate_complete_page(mapping, page);
+}
+
 /**
  * truncate_inode_pages - truncate range of pages specified by start & end byte offsets
  * @mapping: mapping to truncate
@@ -196,12 +207,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 				unlock_page(page);
 				continue;
 			}
-			if (page_mapped(page)) {
-				unmap_mapping_range(mapping,
-				  (loff_t)page_index<<PAGE_CACHE_SHIFT,
-				  PAGE_CACHE_SIZE, 0);
-			}
-			truncate_complete_page(mapping, page);
+			truncate_inode_page(mapping, page);
 			unlock_page(page);
 		}
 		pagevec_release(&pvec);
@@ -238,15 +244,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
 				break;
 			lock_page(page);
 			wait_on_page_writeback(page);
-			if (page_mapped(page)) {
-				unmap_mapping_range(mapping,
-				  (loff_t)page->index<<PAGE_CACHE_SHIFT,
-				  PAGE_CACHE_SIZE, 0);
-			}
+			truncate_inode_page(mapping, page);
 			if (page->index > next)
 				next = page->index;
 			next++;
-			truncate_complete_page(mapping, page);
 			unlock_page(page);
 		}
 		pagevec_release(&pvec);
-- 
cgit v1.2.3


From 83f786680aec8d030184f7ced1a0a3dd8ac81764 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Sep 2009 11:50:13 +0200
Subject: HWPOISON: Add invalidate_inode_page

Add a simple way to invalidate a single page
This is just a refactoring of the truncate.c code.
Originally from Fengguang, modified by Andi Kleen.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/mm.h |  2 ++
 mm/truncate.c      | 26 ++++++++++++++++++++------
 2 files changed, 22 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8cbc0aafd5bd..b05bbde0296d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -796,6 +796,8 @@ extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
 
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 
+int invalidate_inode_page(struct page *page);
+
 #ifdef CONFIG_MMU
 extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, unsigned int flags);
diff --git a/mm/truncate.c b/mm/truncate.c
index 2519a7c92873..ea132f7ea2d2 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -146,6 +146,24 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
 	return truncate_complete_page(mapping, page);
 }
 
+/*
+ * Safely invalidate one page from its pagecache mapping.
+ * It only drops clean, unused pages. The page must be locked.
+ *
+ * Returns 1 if the page is successfully invalidated, otherwise 0.
+ */
+int invalidate_inode_page(struct page *page)
+{
+	struct address_space *mapping = page_mapping(page);
+	if (!mapping)
+		return 0;
+	if (PageDirty(page) || PageWriteback(page))
+		return 0;
+	if (page_mapped(page))
+		return 0;
+	return invalidate_complete_page(mapping, page);
+}
+
 /**
  * truncate_inode_pages - truncate range of pages specified by start & end byte offsets
  * @mapping: mapping to truncate
@@ -312,12 +330,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 			if (lock_failed)
 				continue;
 
-			if (PageDirty(page) || PageWriteback(page))
-				goto unlock;
-			if (page_mapped(page))
-				goto unlock;
-			ret += invalidate_complete_page(mapping, page);
-unlock:
+			ret += invalidate_inode_page(page);
+
 			unlock_page(page);
 			if (next > end)
 				break;
-- 
cgit v1.2.3


From 257187362123f15d9d1e09918cf87cebbea4e786 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:13 +0200
Subject: HWPOISON: Define a new error_remove_page address space op for async
 truncation

Truncating metadata pages is not safe right now before
we haven't audited all file systems.

To enable truncation only for data address space define
a new address_space callback error_remove_page.

This is used for memory_failure.c memory error handling.

This can be then set to truncate_inode_page()

This patch just defines the new operation and adds documentation.

Callers and users come in followon patches.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 Documentation/filesystems/vfs.txt |  7 +++++++
 include/linux/fs.h                |  1 +
 include/linux/mm.h                |  1 +
 mm/truncate.c                     | 17 +++++++++++++++++
 4 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index f49eecf2e573..623f094c9d8d 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -536,6 +536,7 @@ struct address_space_operations {
 	/* migrate the contents of a page to the specified target */
 	int (*migratepage) (struct page *, struct page *);
 	int (*launder_page) (struct page *);
+	int (*error_remove_page) (struct mapping *mapping, struct page *page);
 };
 
   writepage: called by the VM to write a dirty page to backing store.
@@ -694,6 +695,12 @@ struct address_space_operations {
   	prevent redirtying the page, it is kept locked during the whole
 	operation.
 
+  error_remove_page: normally set to generic_error_remove_page if truncation
+	is ok for this address space. Used for memory failure handling.
+	Setting this implies you deal with pages going away under you,
+	unless you have them locked or reference counts increased.
+
+
 The File Object
 ===============
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b21cf6b9c80b..4f47afd37647 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -595,6 +595,7 @@ struct address_space_operations {
 	int (*launder_page) (struct page *);
 	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
 					unsigned long);
+	int (*error_remove_page)(struct address_space *, struct page *);
 };
 
 /*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b05bbde0296d..a16018f7d61c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -795,6 +795,7 @@ extern int vmtruncate(struct inode * inode, loff_t offset);
 extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
 
 int truncate_inode_page(struct address_space *mapping, struct page *page);
+int generic_error_remove_page(struct address_space *mapping, struct page *page);
 
 int invalidate_inode_page(struct page *page);
 
diff --git a/mm/truncate.c b/mm/truncate.c
index ea132f7ea2d2..a17b3977cfdf 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -146,6 +146,23 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
 	return truncate_complete_page(mapping, page);
 }
 
+/*
+ * Used to get rid of pages on hardware memory corruption.
+ */
+int generic_error_remove_page(struct address_space *mapping, struct page *page)
+{
+	if (!mapping)
+		return -EINVAL;
+	/*
+	 * Only punch for normal data pages for now.
+	 * Handling other types like directories would need more auditing.
+	 */
+	if (!S_ISREG(mapping->host->i_mode))
+		return -EIO;
+	return truncate_inode_page(mapping, page);
+}
+EXPORT_SYMBOL(generic_error_remove_page);
+
 /*
  * Safely invalidate one page from its pagecache mapping.
  * It only drops clean, unused pages. The page must be locked.
-- 
cgit v1.2.3


From 4db96cf077aa938b11fe7ac79ecc9b29ec00fbab Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:14 +0200
Subject: HWPOISON: Add PR_MCE_KILL prctl to control early kill behaviour per
 process

This allows processes to override their early/late kill
behaviour on hardware memory errors.

Typically applications which are memory error aware is
better of with early kill (see the error as soon
as possible), all others with late kill (only
see the error when the error is really impacting execution)

There's a global sysctl, but this way an application
can set its specific policy.

We're using two bits, one to signify that the process
stated its intention and that

I also made the prctl future proof by enforcing
the unused arguments are 0.

The state is inherited to children.

Note this makes us officially run out of process flags
on 32bit, but the next patch can easily add another field.

Manpage patch will be supplied separately.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/prctl.h |  2 ++
 include/linux/sched.h |  2 ++
 kernel/sys.c          | 22 ++++++++++++++++++++++
 3 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index b00df4c79c63..3dc303197e67 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -88,4 +88,6 @@
 #define PR_TASK_PERF_COUNTERS_DISABLE		31
 #define PR_TASK_PERF_COUNTERS_ENABLE		32
 
+#define PR_MCE_KILL	33
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f3d74bd04d18..29eae73c951d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1687,6 +1687,7 @@ extern cputime_t task_gtime(struct task_struct *p);
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
 #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
+#define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
 #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
 #define PF_DUMPCORE	0x00000200	/* dumped core */
 #define PF_SIGNALED	0x00000400	/* killed by a signal */
@@ -1706,6 +1707,7 @@ extern cputime_t task_gtime(struct task_struct *p);
 #define PF_SPREAD_PAGE	0x01000000	/* Spread page cache over cpuset */
 #define PF_SPREAD_SLAB	0x02000000	/* Spread some slab caches over cpuset */
 #define PF_THREAD_BOUND	0x04000000	/* Thread bound to specific cpu */
+#define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezeable */
diff --git a/kernel/sys.c b/kernel/sys.c
index b3f1097c76fa..41e02eff3398 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1528,6 +1528,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 				current->timer_slack_ns = arg2;
 			error = 0;
 			break;
+		case PR_MCE_KILL:
+			if (arg4 | arg5)
+				return -EINVAL;
+			switch (arg2) {
+			case 0:
+				if (arg3 != 0)
+					return -EINVAL;
+				current->flags &= ~PF_MCE_PROCESS;
+				break;
+			case 1:
+				current->flags |= PF_MCE_PROCESS;
+				if (arg3 != 0)
+					current->flags |= PF_MCE_EARLY;
+				else
+					current->flags &= ~PF_MCE_EARLY;
+				break;
+			default:
+				return -EINVAL;
+			}
+			error = 0;
+			break;
+
 		default:
 			error = -EINVAL;
 			break;
-- 
cgit v1.2.3


From 6a46079cf57a7f7758e8b926980a4f852f89b34d Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:15 +0200
Subject: HWPOISON: The high level memory error handler in the VM v7

Add the high level memory handler that poisons pages
that got corrupted by hardware (typically by a two bit flip in a DIMM
or a cache) on the Linux level. The goal is to prevent everyone
from accessing these pages in the future.

This done at the VM level by marking a page hwpoisoned
and doing the appropriate action based on the type of page
it is.

The code that does this is portable and lives in mm/memory-failure.c

To quote the overview comment:

High level machine check handler. Handles pages reported by the
hardware as being corrupted usually due to a 2bit ECC memory or cache
failure.

This focuses on pages detected as corrupted in the background.
When the current CPU tries to consume corruption the currently
running process can just be killed directly instead. This implies
that if the error cannot be handled for some reason it's safe to
just ignore it because no corruption has been consumed yet. Instead
when that happens another machine check will happen.

Handles page cache pages in various states. The tricky part
here is that we can access any page asynchronous to other VM
users, because memory failures could happen anytime and anywhere,
possibly violating some of their assumptions. This is why this code
has to be extremely careful. Generally it tries to use normal locking
rules, as in get the standard locks, even if that means the
error handling takes potentially a long time.

Some of the operations here are somewhat inefficient and have non
linear algorithmic complexity, because the data structures have not
been optimized for this case. This is in particular the case
for the mapping from a vma to a process. Since this case is expected
to be rare we hope we can get away with this.

There are in principle two strategies to kill processes on poison:
- just unmap the data and wait for an actual reference before
killing
- kill as soon as corruption is detected.
Both have advantages and disadvantages and should be used
in different situations. Right now both are implemented and can
be switched with a new sysctl vm.memory_failure_early_kill
The default is early kill.

The patch does some rmap data structure walking on its own to collect
processes to kill. This is unusual because normally all rmap data structure
knowledge is in rmap.c only. I put it here for now to keep
everything together and rmap knowledge has been seeping out anyways

Includes contributions from Johannes Weiner, Chris Mason, Fengguang Wu,
Nick Piggin (who did a lot of great work) and others.

Cc: npiggin@suse.de
Cc: riel@redhat.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
---
 Documentation/sysctl/vm.txt |  41 ++-
 fs/proc/meminfo.c           |   9 +-
 include/linux/mm.h          |   7 +
 include/linux/rmap.h        |   1 +
 kernel/sysctl.c             |  25 ++
 mm/Kconfig                  |  10 +
 mm/Makefile                 |   1 +
 mm/filemap.c                |   4 +
 mm/memory-failure.c         | 832 ++++++++++++++++++++++++++++++++++++++++++++
 mm/rmap.c                   |   7 +-
 10 files changed, 934 insertions(+), 3 deletions(-)
 create mode 100644 mm/memory-failure.c

(limited to 'include')

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index c4de6359d440..faf62740aa2c 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/vm:
 - legacy_va_layout
 - lowmem_reserve_ratio
 - max_map_count
+- memory_failure_early_kill
+- memory_failure_recovery
 - min_free_kbytes
 - min_slab_ratio
 - min_unmapped_ratio
@@ -53,7 +55,6 @@ Currently, these files are in /proc/sys/vm:
 - vfs_cache_pressure
 - zone_reclaim_mode
 
-
 ==============================================================
 
 block_dump
@@ -275,6 +276,44 @@ e.g., up to one or two maps per allocation.
 
 The default value is 65536.
 
+=============================================================
+
+memory_failure_early_kill:
+
+Control how to kill processes when uncorrected memory error (typically
+a 2bit error in a memory module) is detected in the background by hardware
+that cannot be handled by the kernel. In some cases (like the page
+still having a valid copy on disk) the kernel will handle the failure
+transparently without affecting any applications. But if there is
+no other uptodate copy of the data it will kill to prevent any data
+corruptions from propagating.
+
+1: Kill all processes that have the corrupted and not reloadable page mapped
+as soon as the corruption is detected.  Note this is not supported
+for a few types of pages, like kernel internally allocated data or
+the swap cache, but works for the majority of user pages.
+
+0: Only unmap the corrupted page from all processes and only kill a process
+who tries to access it.
+
+The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
+handle this if they want to.
+
+This is only active on architectures/platforms with advanced machine
+check handling and depends on the hardware capabilities.
+
+Applications can override this setting individually with the PR_MCE_KILL prctl
+
+==============================================================
+
+memory_failure_recovery
+
+Enable memory failure recovery (when supported by the platform)
+
+1: Attempt recovery.
+
+0: Always panic on a memory failure.
+
 ==============================================================
 
 min_free_kbytes:
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index d5c410d47fae..78faedcb0a8d 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -95,7 +95,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		"Committed_AS:   %8lu kB\n"
 		"VmallocTotal:   %8lu kB\n"
 		"VmallocUsed:    %8lu kB\n"
-		"VmallocChunk:   %8lu kB\n",
+		"VmallocChunk:   %8lu kB\n"
+#ifdef CONFIG_MEMORY_FAILURE
+		"HardwareCorrupted: %8lu kB\n"
+#endif
+		,
 		K(i.totalram),
 		K(i.freeram),
 		K(i.bufferram),
@@ -140,6 +144,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		(unsigned long)VMALLOC_TOTAL >> 10,
 		vmi.used >> 10,
 		vmi.largest_chunk >> 10
+#ifdef CONFIG_MEMORY_FAILURE
+		,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10)
+#endif
 		);
 
 	hugetlb_report_meminfo(m);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a16018f7d61c..1ffca03f34b7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1309,5 +1309,12 @@ void vmemmap_populate_print_last(void);
 extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
 				 size_t size);
 extern void refund_locked_memory(struct mm_struct *mm, size_t size);
+
+extern void memory_failure(unsigned long pfn, int trapno);
+extern int __memory_failure(unsigned long pfn, int trapno, int ref);
+extern int sysctl_memory_failure_early_kill;
+extern int sysctl_memory_failure_recovery;
+extern atomic_long_t mce_bad_pages;
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index ce989f1fc2ed..3c1004e50747 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -129,6 +129,7 @@ int try_to_munlock(struct page *);
  */
 struct anon_vma *page_lock_anon_vma(struct page *page);
 void page_unlock_anon_vma(struct anon_vma *anon_vma);
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
 #else	/* !CONFIG_MMU */
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6bb59f707402..eacae77ac9fc 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1372,6 +1372,31 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &scan_unevictable_handler,
 	},
+#ifdef CONFIG_MEMORY_FAILURE
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "memory_failure_early_kill",
+		.data		= &sysctl_memory_failure_early_kill,
+		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "memory_failure_recovery",
+		.data		= &sysctl_memory_failure_recovery,
+		.maxlen		= sizeof(sysctl_memory_failure_recovery),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+#endif
+
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/mm/Kconfig b/mm/Kconfig
index 3aa519f52e18..ea2d8b61c631 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -233,6 +233,16 @@ config DEFAULT_MMAP_MIN_ADDR
 	  /proc/sys/vm/mmap_min_addr tunable.
 
 
+config MEMORY_FAILURE
+	depends on MMU
+	depends on X86_MCE
+	bool "Enable recovery from hardware memory errors"
+	help
+	  Enables code to recover from some memory failures on systems
+	  with MCA recovery. This allows a system to continue running
+	  even when some of its memory has uncorrected errors. This requires
+	  special hardware support and typically ECC memory.
+
 config NOMMU_INITIAL_TRIM_EXCESS
 	int "Turn on mmap() excess space trimming before booting"
 	depends on !MMU
diff --git a/mm/Makefile b/mm/Makefile
index ea4b18bd3960..dc2551e7d006 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -40,5 +40,6 @@ obj-$(CONFIG_SMP) += allocpercpu.o
 endif
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
diff --git a/mm/filemap.c b/mm/filemap.c
index dd51c68e2b86..75575c392167 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -104,6 +104,10 @@
  *
  *  ->task->proc_lock
  *    ->dcache_lock		(proc_pid_lookup)
+ *
+ *  (code doesn't rely on that order, so you could switch it around)
+ *  ->tasklist_lock             (memory_failure, collect_procs_ao)
+ *    ->i_mmap_lock
  */
 
 /*
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
new file mode 100644
index 000000000000..729d4b15b645
--- /dev/null
+++ b/mm/memory-failure.c
@@ -0,0 +1,832 @@
+/*
+ * Copyright (C) 2008, 2009 Intel Corporation
+ * Authors: Andi Kleen, Fengguang Wu
+ *
+ * This software may be redistributed and/or modified under the terms of
+ * the GNU General Public License ("GPL") version 2 only as published by the
+ * Free Software Foundation.
+ *
+ * High level machine check handler. Handles pages reported by the
+ * hardware as being corrupted usually due to a 2bit ECC memory or cache
+ * failure.
+ *
+ * Handles page cache pages in various states.	The tricky part
+ * here is that we can access any page asynchronous to other VM
+ * users, because memory failures could happen anytime and anywhere,
+ * possibly violating some of their assumptions. This is why this code
+ * has to be extremely careful. Generally it tries to use normal locking
+ * rules, as in get the standard locks, even if that means the
+ * error handling takes potentially a long time.
+ *
+ * The operation to map back from RMAP chains to processes has to walk
+ * the complete process list and has non linear complexity with the number
+ * mappings. In short it can be quite slow. But since memory corruptions
+ * are rare we hope to get away with this.
+ */
+
+/*
+ * Notebook:
+ * - hugetlb needs more code
+ * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages
+ * - pass bad pages to kdump next kernel
+ */
+#define DEBUG 1		/* remove me in 2.6.34 */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/page-flags.h>
+#include <linux/sched.h>
+#include <linux/rmap.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/backing-dev.h>
+#include "internal.h"
+
+int sysctl_memory_failure_early_kill __read_mostly = 0;
+
+int sysctl_memory_failure_recovery __read_mostly = 1;
+
+atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0);
+
+/*
+ * Send all the processes who have the page mapped an ``action optional''
+ * signal.
+ */
+static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
+			unsigned long pfn)
+{
+	struct siginfo si;
+	int ret;
+
+	printk(KERN_ERR
+		"MCE %#lx: Killing %s:%d early due to hardware memory corruption\n",
+		pfn, t->comm, t->pid);
+	si.si_signo = SIGBUS;
+	si.si_errno = 0;
+	si.si_code = BUS_MCEERR_AO;
+	si.si_addr = (void *)addr;
+#ifdef __ARCH_SI_TRAPNO
+	si.si_trapno = trapno;
+#endif
+	si.si_addr_lsb = PAGE_SHIFT;
+	/*
+	 * Don't use force here, it's convenient if the signal
+	 * can be temporarily blocked.
+	 * This could cause a loop when the user sets SIGBUS
+	 * to SIG_IGN, but hopefully noone will do that?
+	 */
+	ret = send_sig_info(SIGBUS, &si, t);  /* synchronous? */
+	if (ret < 0)
+		printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
+		       t->comm, t->pid, ret);
+	return ret;
+}
+
+/*
+ * Kill all processes that have a poisoned page mapped and then isolate
+ * the page.
+ *
+ * General strategy:
+ * Find all processes having the page mapped and kill them.
+ * But we keep a page reference around so that the page is not
+ * actually freed yet.
+ * Then stash the page away
+ *
+ * There's no convenient way to get back to mapped processes
+ * from the VMAs. So do a brute-force search over all
+ * running processes.
+ *
+ * Remember that machine checks are not common (or rather
+ * if they are common you have other problems), so this shouldn't
+ * be a performance issue.
+ *
+ * Also there are some races possible while we get from the
+ * error detection to actually handle it.
+ */
+
+struct to_kill {
+	struct list_head nd;
+	struct task_struct *tsk;
+	unsigned long addr;
+	unsigned addr_valid:1;
+};
+
+/*
+ * Failure handling: if we can't find or can't kill a process there's
+ * not much we can do.	We just print a message and ignore otherwise.
+ */
+
+/*
+ * Schedule a process for later kill.
+ * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
+ * TBD would GFP_NOIO be enough?
+ */
+static void add_to_kill(struct task_struct *tsk, struct page *p,
+		       struct vm_area_struct *vma,
+		       struct list_head *to_kill,
+		       struct to_kill **tkc)
+{
+	struct to_kill *tk;
+
+	if (*tkc) {
+		tk = *tkc;
+		*tkc = NULL;
+	} else {
+		tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
+		if (!tk) {
+			printk(KERN_ERR
+		"MCE: Out of memory while machine check handling\n");
+			return;
+		}
+	}
+	tk->addr = page_address_in_vma(p, vma);
+	tk->addr_valid = 1;
+
+	/*
+	 * In theory we don't have to kill when the page was
+	 * munmaped. But it could be also a mremap. Since that's
+	 * likely very rare kill anyways just out of paranoia, but use
+	 * a SIGKILL because the error is not contained anymore.
+	 */
+	if (tk->addr == -EFAULT) {
+		pr_debug("MCE: Unable to find user space address %lx in %s\n",
+			page_to_pfn(p), tsk->comm);
+		tk->addr_valid = 0;
+	}
+	get_task_struct(tsk);
+	tk->tsk = tsk;
+	list_add_tail(&tk->nd, to_kill);
+}
+
+/*
+ * Kill the processes that have been collected earlier.
+ *
+ * Only do anything when DOIT is set, otherwise just free the list
+ * (this is used for clean pages which do not need killing)
+ * Also when FAIL is set do a force kill because something went
+ * wrong earlier.
+ */
+static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
+			  int fail, unsigned long pfn)
+{
+	struct to_kill *tk, *next;
+
+	list_for_each_entry_safe (tk, next, to_kill, nd) {
+		if (doit) {
+			/*
+			 * In case something went wrong with munmaping
+			 * make sure the process doesn't catch the
+			 * signal and then access the memory. Just kill it.
+			 * the signal handlers
+			 */
+			if (fail || tk->addr_valid == 0) {
+				printk(KERN_ERR
+		"MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
+					pfn, tk->tsk->comm, tk->tsk->pid);
+				force_sig(SIGKILL, tk->tsk);
+			}
+
+			/*
+			 * In theory the process could have mapped
+			 * something else on the address in-between. We could
+			 * check for that, but we need to tell the
+			 * process anyways.
+			 */
+			else if (kill_proc_ao(tk->tsk, tk->addr, trapno,
+					      pfn) < 0)
+				printk(KERN_ERR
+		"MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
+					pfn, tk->tsk->comm, tk->tsk->pid);
+		}
+		put_task_struct(tk->tsk);
+		kfree(tk);
+	}
+}
+
+static int task_early_kill(struct task_struct *tsk)
+{
+	if (!tsk->mm)
+		return 0;
+	if (tsk->flags & PF_MCE_PROCESS)
+		return !!(tsk->flags & PF_MCE_EARLY);
+	return sysctl_memory_failure_early_kill;
+}
+
+/*
+ * Collect processes when the error hit an anonymous page.
+ */
+static void collect_procs_anon(struct page *page, struct list_head *to_kill,
+			      struct to_kill **tkc)
+{
+	struct vm_area_struct *vma;
+	struct task_struct *tsk;
+	struct anon_vma *av;
+
+	read_lock(&tasklist_lock);
+	av = page_lock_anon_vma(page);
+	if (av == NULL)	/* Not actually mapped anymore */
+		goto out;
+	for_each_process (tsk) {
+		if (!task_early_kill(tsk))
+			continue;
+		list_for_each_entry (vma, &av->head, anon_vma_node) {
+			if (!page_mapped_in_vma(page, vma))
+				continue;
+			if (vma->vm_mm == tsk->mm)
+				add_to_kill(tsk, page, vma, to_kill, tkc);
+		}
+	}
+	page_unlock_anon_vma(av);
+out:
+	read_unlock(&tasklist_lock);
+}
+
+/*
+ * Collect processes when the error hit a file mapped page.
+ */
+static void collect_procs_file(struct page *page, struct list_head *to_kill,
+			      struct to_kill **tkc)
+{
+	struct vm_area_struct *vma;
+	struct task_struct *tsk;
+	struct prio_tree_iter iter;
+	struct address_space *mapping = page->mapping;
+
+	/*
+	 * A note on the locking order between the two locks.
+	 * We don't rely on this particular order.
+	 * If you have some other code that needs a different order
+	 * feel free to switch them around. Or add a reverse link
+	 * from mm_struct to task_struct, then this could be all
+	 * done without taking tasklist_lock and looping over all tasks.
+	 */
+
+	read_lock(&tasklist_lock);
+	spin_lock(&mapping->i_mmap_lock);
+	for_each_process(tsk) {
+		pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+		if (!task_early_kill(tsk))
+			continue;
+
+		vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff,
+				      pgoff) {
+			/*
+			 * Send early kill signal to tasks where a vma covers
+			 * the page but the corrupted page is not necessarily
+			 * mapped it in its pte.
+			 * Assume applications who requested early kill want
+			 * to be informed of all such data corruptions.
+			 */
+			if (vma->vm_mm == tsk->mm)
+				add_to_kill(tsk, page, vma, to_kill, tkc);
+		}
+	}
+	spin_unlock(&mapping->i_mmap_lock);
+	read_unlock(&tasklist_lock);
+}
+
+/*
+ * Collect the processes who have the corrupted page mapped to kill.
+ * This is done in two steps for locking reasons.
+ * First preallocate one tokill structure outside the spin locks,
+ * so that we can kill at least one process reasonably reliable.
+ */
+static void collect_procs(struct page *page, struct list_head *tokill)
+{
+	struct to_kill *tk;
+
+	if (!page->mapping)
+		return;
+
+	tk = kmalloc(sizeof(struct to_kill), GFP_NOIO);
+	if (!tk)
+		return;
+	if (PageAnon(page))
+		collect_procs_anon(page, tokill, &tk);
+	else
+		collect_procs_file(page, tokill, &tk);
+	kfree(tk);
+}
+
+/*
+ * Error handlers for various types of pages.
+ */
+
+enum outcome {
+	FAILED,		/* Error handling failed */
+	DELAYED,	/* Will be handled later */
+	IGNORED,	/* Error safely ignored */
+	RECOVERED,	/* Successfully recovered */
+};
+
+static const char *action_name[] = {
+	[FAILED] = "Failed",
+	[DELAYED] = "Delayed",
+	[IGNORED] = "Ignored",
+	[RECOVERED] = "Recovered",
+};
+
+/*
+ * Error hit kernel page.
+ * Do nothing, try to be lucky and not touch this instead. For a few cases we
+ * could be more sophisticated.
+ */
+static int me_kernel(struct page *p, unsigned long pfn)
+{
+	return DELAYED;
+}
+
+/*
+ * Already poisoned page.
+ */
+static int me_ignore(struct page *p, unsigned long pfn)
+{
+	return IGNORED;
+}
+
+/*
+ * Page in unknown state. Do nothing.
+ */
+static int me_unknown(struct page *p, unsigned long pfn)
+{
+	printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn);
+	return FAILED;
+}
+
+/*
+ * Free memory
+ */
+static int me_free(struct page *p, unsigned long pfn)
+{
+	return DELAYED;
+}
+
+/*
+ * Clean (or cleaned) page cache page.
+ */
+static int me_pagecache_clean(struct page *p, unsigned long pfn)
+{
+	int err;
+	int ret = FAILED;
+	struct address_space *mapping;
+
+	if (!isolate_lru_page(p))
+		page_cache_release(p);
+
+	/*
+	 * For anonymous pages we're done the only reference left
+	 * should be the one m_f() holds.
+	 */
+	if (PageAnon(p))
+		return RECOVERED;
+
+	/*
+	 * Now truncate the page in the page cache. This is really
+	 * more like a "temporary hole punch"
+	 * Don't do this for block devices when someone else
+	 * has a reference, because it could be file system metadata
+	 * and that's not safe to truncate.
+	 */
+	mapping = page_mapping(p);
+	if (!mapping) {
+		/*
+		 * Page has been teared down in the meanwhile
+		 */
+		return FAILED;
+	}
+
+	/*
+	 * Truncation is a bit tricky. Enable it per file system for now.
+	 *
+	 * Open: to take i_mutex or not for this? Right now we don't.
+	 */
+	if (mapping->a_ops->error_remove_page) {
+		err = mapping->a_ops->error_remove_page(mapping, p);
+		if (err != 0) {
+			printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n",
+					pfn, err);
+		} else if (page_has_private(p) &&
+				!try_to_release_page(p, GFP_NOIO)) {
+			pr_debug("MCE %#lx: failed to release buffers\n", pfn);
+		} else {
+			ret = RECOVERED;
+		}
+	} else {
+		/*
+		 * If the file system doesn't support it just invalidate
+		 * This fails on dirty or anything with private pages
+		 */
+		if (invalidate_inode_page(p))
+			ret = RECOVERED;
+		else
+			printk(KERN_INFO "MCE %#lx: Failed to invalidate\n",
+				pfn);
+	}
+	return ret;
+}
+
+/*
+ * Dirty cache page page
+ * Issues: when the error hit a hole page the error is not properly
+ * propagated.
+ */
+static int me_pagecache_dirty(struct page *p, unsigned long pfn)
+{
+	struct address_space *mapping = page_mapping(p);
+
+	SetPageError(p);
+	/* TBD: print more information about the file. */
+	if (mapping) {
+		/*
+		 * IO error will be reported by write(), fsync(), etc.
+		 * who check the mapping.
+		 * This way the application knows that something went
+		 * wrong with its dirty file data.
+		 *
+		 * There's one open issue:
+		 *
+		 * The EIO will be only reported on the next IO
+		 * operation and then cleared through the IO map.
+		 * Normally Linux has two mechanisms to pass IO error
+		 * first through the AS_EIO flag in the address space
+		 * and then through the PageError flag in the page.
+		 * Since we drop pages on memory failure handling the
+		 * only mechanism open to use is through AS_AIO.
+		 *
+		 * This has the disadvantage that it gets cleared on
+		 * the first operation that returns an error, while
+		 * the PageError bit is more sticky and only cleared
+		 * when the page is reread or dropped.  If an
+		 * application assumes it will always get error on
+		 * fsync, but does other operations on the fd before
+		 * and the page is dropped inbetween then the error
+		 * will not be properly reported.
+		 *
+		 * This can already happen even without hwpoisoned
+		 * pages: first on metadata IO errors (which only
+		 * report through AS_EIO) or when the page is dropped
+		 * at the wrong time.
+		 *
+		 * So right now we assume that the application DTRT on
+		 * the first EIO, but we're not worse than other parts
+		 * of the kernel.
+		 */
+		mapping_set_error(mapping, EIO);
+	}
+
+	return me_pagecache_clean(p, pfn);
+}
+
+/*
+ * Clean and dirty swap cache.
+ *
+ * Dirty swap cache page is tricky to handle. The page could live both in page
+ * cache and swap cache(ie. page is freshly swapped in). So it could be
+ * referenced concurrently by 2 types of PTEs:
+ * normal PTEs and swap PTEs. We try to handle them consistently by calling
+ * try_to_unmap(TTU_IGNORE_HWPOISON) to convert the normal PTEs to swap PTEs,
+ * and then
+ *      - clear dirty bit to prevent IO
+ *      - remove from LRU
+ *      - but keep in the swap cache, so that when we return to it on
+ *        a later page fault, we know the application is accessing
+ *        corrupted data and shall be killed (we installed simple
+ *        interception code in do_swap_page to catch it).
+ *
+ * Clean swap cache pages can be directly isolated. A later page fault will
+ * bring in the known good data from disk.
+ */
+static int me_swapcache_dirty(struct page *p, unsigned long pfn)
+{
+	int ret = FAILED;
+
+	ClearPageDirty(p);
+	/* Trigger EIO in shmem: */
+	ClearPageUptodate(p);
+
+	if (!isolate_lru_page(p)) {
+		page_cache_release(p);
+		ret = DELAYED;
+	}
+
+	return ret;
+}
+
+static int me_swapcache_clean(struct page *p, unsigned long pfn)
+{
+	int ret = FAILED;
+
+	if (!isolate_lru_page(p)) {
+		page_cache_release(p);
+		ret = RECOVERED;
+	}
+	delete_from_swap_cache(p);
+	return ret;
+}
+
+/*
+ * Huge pages. Needs work.
+ * Issues:
+ * No rmap support so we cannot find the original mapper. In theory could walk
+ * all MMs and look for the mappings, but that would be non atomic and racy.
+ * Need rmap for hugepages for this. Alternatively we could employ a heuristic,
+ * like just walking the current process and hoping it has it mapped (that
+ * should be usually true for the common "shared database cache" case)
+ * Should handle free huge pages and dequeue them too, but this needs to
+ * handle huge page accounting correctly.
+ */
+static int me_huge_page(struct page *p, unsigned long pfn)
+{
+	return FAILED;
+}
+
+/*
+ * Various page states we can handle.
+ *
+ * A page state is defined by its current page->flags bits.
+ * The table matches them in order and calls the right handler.
+ *
+ * This is quite tricky because we can access page at any time
+ * in its live cycle, so all accesses have to be extremly careful.
+ *
+ * This is not complete. More states could be added.
+ * For any missing state don't attempt recovery.
+ */
+
+#define dirty		(1UL << PG_dirty)
+#define sc		(1UL << PG_swapcache)
+#define unevict		(1UL << PG_unevictable)
+#define mlock		(1UL << PG_mlocked)
+#define writeback	(1UL << PG_writeback)
+#define lru		(1UL << PG_lru)
+#define swapbacked	(1UL << PG_swapbacked)
+#define head		(1UL << PG_head)
+#define tail		(1UL << PG_tail)
+#define compound	(1UL << PG_compound)
+#define slab		(1UL << PG_slab)
+#define buddy		(1UL << PG_buddy)
+#define reserved	(1UL << PG_reserved)
+
+static struct page_state {
+	unsigned long mask;
+	unsigned long res;
+	char *msg;
+	int (*action)(struct page *p, unsigned long pfn);
+} error_states[] = {
+	{ reserved,	reserved,	"reserved kernel",	me_ignore },
+	{ buddy,	buddy,		"free kernel",	me_free },
+
+	/*
+	 * Could in theory check if slab page is free or if we can drop
+	 * currently unused objects without touching them. But just
+	 * treat it as standard kernel for now.
+	 */
+	{ slab,		slab,		"kernel slab",	me_kernel },
+
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+	{ head,		head,		"huge",		me_huge_page },
+	{ tail,		tail,		"huge",		me_huge_page },
+#else
+	{ compound,	compound,	"huge",		me_huge_page },
+#endif
+
+	{ sc|dirty,	sc|dirty,	"swapcache",	me_swapcache_dirty },
+	{ sc|dirty,	sc,		"swapcache",	me_swapcache_clean },
+
+	{ unevict|dirty, unevict|dirty,	"unevictable LRU", me_pagecache_dirty},
+	{ unevict,	unevict,	"unevictable LRU", me_pagecache_clean},
+
+#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
+	{ mlock|dirty,	mlock|dirty,	"mlocked LRU",	me_pagecache_dirty },
+	{ mlock,	mlock,		"mlocked LRU",	me_pagecache_clean },
+#endif
+
+	{ lru|dirty,	lru|dirty,	"LRU",		me_pagecache_dirty },
+	{ lru|dirty,	lru,		"clean LRU",	me_pagecache_clean },
+	{ swapbacked,	swapbacked,	"anonymous",	me_pagecache_clean },
+
+	/*
+	 * Catchall entry: must be at end.
+	 */
+	{ 0,		0,		"unknown page state",	me_unknown },
+};
+
+#undef lru
+
+static void action_result(unsigned long pfn, char *msg, int result)
+{
+	struct page *page = NULL;
+	if (pfn_valid(pfn))
+		page = pfn_to_page(pfn);
+
+	printk(KERN_ERR "MCE %#lx: %s%s page recovery: %s\n",
+		pfn,
+		page && PageDirty(page) ? "dirty " : "",
+		msg, action_name[result]);
+}
+
+static int page_action(struct page_state *ps, struct page *p,
+			unsigned long pfn, int ref)
+{
+	int result;
+
+	result = ps->action(p, pfn);
+	action_result(pfn, ps->msg, result);
+	if (page_count(p) != 1 + ref)
+		printk(KERN_ERR
+		       "MCE %#lx: %s page still referenced by %d users\n",
+		       pfn, ps->msg, page_count(p) - 1);
+
+	/* Could do more checks here if page looks ok */
+	/*
+	 * Could adjust zone counters here to correct for the missing page.
+	 */
+
+	return result == RECOVERED ? 0 : -EBUSY;
+}
+
+#define N_UNMAP_TRIES 5
+
+/*
+ * Do all that is necessary to remove user space mappings. Unmap
+ * the pages and send SIGBUS to the processes if the data was dirty.
+ */
+static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
+				  int trapno)
+{
+	enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
+	struct address_space *mapping;
+	LIST_HEAD(tokill);
+	int ret;
+	int i;
+	int kill = 1;
+
+	if (PageReserved(p) || PageCompound(p) || PageSlab(p))
+		return;
+
+	if (!PageLRU(p))
+		lru_add_drain_all();
+
+	/*
+	 * This check implies we don't kill processes if their pages
+	 * are in the swap cache early. Those are always late kills.
+	 */
+	if (!page_mapped(p))
+		return;
+
+	if (PageSwapCache(p)) {
+		printk(KERN_ERR
+		       "MCE %#lx: keeping poisoned page in swap cache\n", pfn);
+		ttu |= TTU_IGNORE_HWPOISON;
+	}
+
+	/*
+	 * Propagate the dirty bit from PTEs to struct page first, because we
+	 * need this to decide if we should kill or just drop the page.
+	 */
+	mapping = page_mapping(p);
+	if (!PageDirty(p) && mapping && mapping_cap_writeback_dirty(mapping)) {
+		if (page_mkclean(p)) {
+			SetPageDirty(p);
+		} else {
+			kill = 0;
+			ttu |= TTU_IGNORE_HWPOISON;
+			printk(KERN_INFO
+	"MCE %#lx: corrupted page was clean: dropped without side effects\n",
+				pfn);
+		}
+	}
+
+	/*
+	 * First collect all the processes that have the page
+	 * mapped in dirty form.  This has to be done before try_to_unmap,
+	 * because ttu takes the rmap data structures down.
+	 *
+	 * Error handling: We ignore errors here because
+	 * there's nothing that can be done.
+	 */
+	if (kill)
+		collect_procs(p, &tokill);
+
+	/*
+	 * try_to_unmap can fail temporarily due to races.
+	 * Try a few times (RED-PEN better strategy?)
+	 */
+	for (i = 0; i < N_UNMAP_TRIES; i++) {
+		ret = try_to_unmap(p, ttu);
+		if (ret == SWAP_SUCCESS)
+			break;
+		pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn,  ret);
+	}
+
+	if (ret != SWAP_SUCCESS)
+		printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
+				pfn, page_mapcount(p));
+
+	/*
+	 * Now that the dirty bit has been propagated to the
+	 * struct page and all unmaps done we can decide if
+	 * killing is needed or not.  Only kill when the page
+	 * was dirty, otherwise the tokill list is merely
+	 * freed.  When there was a problem unmapping earlier
+	 * use a more force-full uncatchable kill to prevent
+	 * any accesses to the poisoned memory.
+	 */
+	kill_procs_ao(&tokill, !!PageDirty(p), trapno,
+		      ret != SWAP_SUCCESS, pfn);
+}
+
+int __memory_failure(unsigned long pfn, int trapno, int ref)
+{
+	struct page_state *ps;
+	struct page *p;
+	int res;
+
+	if (!sysctl_memory_failure_recovery)
+		panic("Memory failure from trap %d on page %lx", trapno, pfn);
+
+	if (!pfn_valid(pfn)) {
+		action_result(pfn, "memory outside kernel control", IGNORED);
+		return -EIO;
+	}
+
+	p = pfn_to_page(pfn);
+	if (TestSetPageHWPoison(p)) {
+		action_result(pfn, "already hardware poisoned", IGNORED);
+		return 0;
+	}
+
+	atomic_long_add(1, &mce_bad_pages);
+
+	/*
+	 * We need/can do nothing about count=0 pages.
+	 * 1) it's a free page, and therefore in safe hand:
+	 *    prep_new_page() will be the gate keeper.
+	 * 2) it's part of a non-compound high order page.
+	 *    Implies some kernel user: cannot stop them from
+	 *    R/W the page; let's pray that the page has been
+	 *    used and will be freed some time later.
+	 * In fact it's dangerous to directly bump up page count from 0,
+	 * that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
+	 */
+	if (!get_page_unless_zero(compound_head(p))) {
+		action_result(pfn, "free or high order kernel", IGNORED);
+		return PageBuddy(compound_head(p)) ? 0 : -EBUSY;
+	}
+
+	/*
+	 * Lock the page and wait for writeback to finish.
+	 * It's very difficult to mess with pages currently under IO
+	 * and in many cases impossible, so we just avoid it here.
+	 */
+	lock_page_nosync(p);
+	wait_on_page_writeback(p);
+
+	/*
+	 * Now take care of user space mappings.
+	 */
+	hwpoison_user_mappings(p, pfn, trapno);
+
+	/*
+	 * Torn down by someone else?
+	 */
+	if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
+		action_result(pfn, "already truncated LRU", IGNORED);
+		res = 0;
+		goto out;
+	}
+
+	res = -EBUSY;
+	for (ps = error_states;; ps++) {
+		if ((p->flags & ps->mask) == ps->res) {
+			res = page_action(ps, p, pfn, ref);
+			break;
+		}
+	}
+out:
+	unlock_page(p);
+	return res;
+}
+EXPORT_SYMBOL_GPL(__memory_failure);
+
+/**
+ * memory_failure - Handle memory failure of a page.
+ * @pfn: Page Number of the corrupted page
+ * @trapno: Trap number reported in the signal to user space.
+ *
+ * This function is called by the low level machine check code
+ * of an architecture when it detects hardware memory corruption
+ * of a page. It tries its best to recover, which includes
+ * dropping pages, killing processes etc.
+ *
+ * The function is primarily of use for corruptions that
+ * happen outside the current execution context (e.g. when
+ * detected by a background scrubber)
+ *
+ * Must run in process context (e.g. a work queue) with interrupts
+ * enabled and no spinlocks hold.
+ */
+void memory_failure(unsigned long pfn, int trapno)
+{
+	__memory_failure(pfn, trapno, 0);
+}
diff --git a/mm/rmap.c b/mm/rmap.c
index 7e72ca19d68b..09c3d0b96116 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -36,6 +36,11 @@
  *                 mapping->tree_lock (widely used, in set_page_dirty,
  *                           in arch-dependent flush_dcache_mmap_lock,
  *                           within inode_lock in __sync_single_inode)
+ *
+ * (code doesn't rely on that order so it could be switched around)
+ * ->tasklist_lock
+ *   anon_vma->lock      (memory_failure, collect_procs_anon)
+ *     pte map lock
  */
 
 #include <linux/mm.h>
@@ -311,7 +316,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
  * if the page is not mapped into the page tables of this VMA.  Only
  * valid for normal file or anonymous VMAs.
  */
-static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
 {
 	unsigned long address;
 	pte_t *pte;
-- 
cgit v1.2.3


From 9893e49d64a4874ea67849ee2cfbf3f3d6817573 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:17 +0200
Subject: HWPOISON: Add madvise() based injector for hardware poisoned pages v4

Impact: optional, useful for debugging

Add a new madvice sub command to inject poison for some
pages in a process' address space.  This is useful for
testing the poison page handling.

This patch can allow root to tie up large amounts of memory.
I got feedback from container developers and they didn't see any
problem.

v2: Use write flag for get_user_pages to make sure to always get
a fresh page
v3: Don't request write mapping (Fengguang Wu)
v4: Move MADV_* number to avoid conflict with KSM (Hugh Dickins)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/asm-generic/mman-common.h |  1 +
 mm/madvise.c                      | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
index 3b69ad34189a..c325d1ef42ab 100644
--- a/include/asm-generic/mman-common.h
+++ b/include/asm-generic/mman-common.h
@@ -34,6 +34,7 @@
 #define MADV_REMOVE	9		/* remove these pages & resources */
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
+#define MADV_HWPOISON	100		/* poison a page for testing */
 
 /* compatibility flags */
 #define MAP_FILE	0
diff --git a/mm/madvise.c b/mm/madvise.c
index 76eb4193acdd..8dbd38b8e4a4 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -207,6 +207,32 @@ static long madvise_remove(struct vm_area_struct *vma,
 	return error;
 }
 
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Error injection support for memory error handling.
+ */
+static int madvise_hwpoison(unsigned long start, unsigned long end)
+{
+	int ret = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	for (; start < end; start += PAGE_SIZE) {
+		struct page *p;
+		int ret = get_user_pages(current, current->mm, start, 1,
+						0, 0, &p, NULL);
+		if (ret != 1)
+			return ret;
+		printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
+		       page_to_pfn(p), start);
+		/* Ignore return value for now */
+		__memory_failure(page_to_pfn(p), 0, 1);
+		put_page(p);
+	}
+	return ret;
+}
+#endif
+
 static long
 madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 		unsigned long start, unsigned long end, int behavior)
@@ -307,6 +333,10 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
 	int write;
 	size_t len;
 
+#ifdef CONFIG_MEMORY_FAILURE
+	if (behavior == MADV_HWPOISON)
+		return madvise_hwpoison(start, start+len_in);
+#endif
 	if (!madvise_behavior_valid(behavior))
 		return error;
 
-- 
cgit v1.2.3


From 2c96ce9f2084c1e04d02883e622f74a537a63aea Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 15 Sep 2009 09:43:56 +0200
Subject: fs: remove bdev->bd_inode_backing_dev_info

It has been unused since it was introduced in:

commit 520808bf20e90fdbdb320264ba7dd5cf9d47dcac
Author: Andrew Morton <akpm@osdl.org>
Date:   Fri May 21 00:46:17 2004 -0700

    [PATCH] block device layer: separate backing_dev_info infrastructure

So lets just kill it.

Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/block_dev.c        | 1 -
 fs/inode.c            | 4 +---
 fs/nilfs2/the_nilfs.c | 4 +---
 include/linux/fs.h    | 1 -
 4 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3581a4e53942..71e7e03ac343 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -420,7 +420,6 @@ static void bdev_destroy_inode(struct inode *inode)
 {
 	struct bdev_inode *bdi = BDEV_I(inode);
 
-	bdi->bdev.bd_inode_backing_dev_info = NULL;
 	kmem_cache_free(bdev_cachep, bdi);
 }
 
diff --git a/fs/inode.c b/fs/inode.c
index ae7b67e48661..b2ba83d2c4e1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -182,9 +182,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	if (sb->s_bdev) {
 		struct backing_dev_info *bdi;
 
-		bdi = sb->s_bdev->bd_inode_backing_dev_info;
-		if (!bdi)
-			bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
+		bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
 		mapping->backing_dev_info = bdi;
 	}
 	inode->i_private = NULL;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d4168e269c5d..ad391a8c3e7e 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -591,9 +591,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
 
 	nilfs->ns_mount_state = le16_to_cpu(sbp->s_state);
 
-	bdi = nilfs->ns_bdev->bd_inode_backing_dev_info;
-	if (!bdi)
-		bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info;
+	bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info;
 	nilfs->ns_bdi = bdi ? : &default_backing_dev_info;
 
 	/* Finding last segment */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b21cf6b9c80b..db29588874ac 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -655,7 +655,6 @@ struct block_device {
 	int			bd_invalidated;
 	struct gendisk *	bd_disk;
 	struct list_head	bd_list;
-	struct backing_dev_info *bd_inode_backing_dev_info;
 	/*
 	 * Private data.  You must have bd_claim'ed the block_device
 	 * to use this.  NOTE:  bd_claim allows an owner to claim
-- 
cgit v1.2.3


From 1fe06ad89255c211fe100d7f690d10b161398df8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 15 Sep 2009 15:10:20 +0200
Subject: writeback: get rid of wbc->for_writepages

It's only set, it's never checked. Kill it.

Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/afs/write.c              | 1 -
 fs/btrfs/ordered-data.c     | 1 -
 fs/jbd2/commit.c            | 1 -
 fs/nfs/write.c              | 1 -
 include/linux/writeback.h   | 1 -
 include/trace/events/ext4.h | 6 ++----
 mm/page-writeback.c         | 2 --
 7 files changed, 2 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/afs/write.c b/fs/afs/write.c
index c2e7a7ff0080..c63a3c8beb73 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -712,7 +712,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
 		.bdi		= mapping->backing_dev_info,
 		.sync_mode	= WB_SYNC_ALL,
 		.nr_to_write	= LONG_MAX,
-		.for_writepages = 1,
 		.range_cyclic	= 1,
 	};
 	int ret;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index d6f0806c682f..7b2f401e604e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -740,7 +740,6 @@ int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
 		.nr_to_write = mapping->nrpages * 2,
 		.range_start = start,
 		.range_end = end,
-		.for_writepages = 1,
 	};
 	return btrfs_writepages(mapping, &wbc);
 }
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7b4088b2364d..0df600e9162d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -220,7 +220,6 @@ static int journal_submit_inode_data_buffers(struct address_space *mapping)
 		.nr_to_write = mapping->nrpages * 2,
 		.range_start = 0,
 		.range_end = i_size_read(mapping->host),
-		.for_writepages = 1,
 	};
 
 	ret = generic_writepages(mapping, &wbc);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 120acadc6a84..53eb26c16b50 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1490,7 +1490,6 @@ static int nfs_write_mapping(struct address_space *mapping, int how)
 		.nr_to_write = LONG_MAX,
 		.range_start = 0,
 		.range_end = LLONG_MAX,
-		.for_writepages = 1,
 	};
 
 	return __nfs_write_mapping(mapping, &wbc, how);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d347632f1861..48a054e2b716 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -50,7 +50,6 @@ struct writeback_control {
 	unsigned encountered_congestion:1; /* An output: a queue is full */
 	unsigned for_kupdate:1;		/* A kupdate writeback */
 	unsigned for_reclaim:1;		/* Invoked from the page allocator */
-	unsigned for_writepages:1;	/* This is a writepages() call */
 	unsigned range_cyclic:1;	/* range_start is cyclic */
 	unsigned more_io:1;		/* more io to be dispatched */
 	/*
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 7d8b5bc74185..8d433c4e3709 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -227,7 +227,6 @@ TRACE_EVENT(ext4_da_writepages,
 		__field(	char,	nonblocking		)
 		__field(	char,	for_kupdate		)
 		__field(	char,	for_reclaim		)
-		__field(	char,	for_writepages		)
 		__field(	char,	range_cyclic		)
 	),
 
@@ -241,16 +240,15 @@ TRACE_EVENT(ext4_da_writepages,
 		__entry->nonblocking	= wbc->nonblocking;
 		__entry->for_kupdate	= wbc->for_kupdate;
 		__entry->for_reclaim	= wbc->for_reclaim;
-		__entry->for_writepages	= wbc->for_writepages;
 		__entry->range_cyclic	= wbc->range_cyclic;
 	),
 
-	TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d for_writepages %d range_cyclic %d",
+	TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d",
 		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write,
 		  __entry->pages_skipped, __entry->range_start,
 		  __entry->range_end, __entry->nonblocking,
 		  __entry->for_kupdate, __entry->for_reclaim,
-		  __entry->for_writepages, __entry->range_cyclic)
+		  __entry->range_cyclic)
 );
 
 TRACE_EVENT(ext4_da_writepages_result,
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index dd73d29c15a8..abc648f5de00 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1020,12 +1020,10 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 
 	if (wbc->nr_to_write <= 0)
 		return 0;
-	wbc->for_writepages = 1;
 	if (mapping->a_ops->writepages)
 		ret = mapping->a_ops->writepages(mapping, wbc);
 	else
 		ret = generic_writepages(mapping, wbc);
-	wbc->for_writepages = 0;
 	return ret;
 }
 
-- 
cgit v1.2.3


From 32a88aa1b6dfb901cec64e1898cac78d0f25028a Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 16 Sep 2009 15:02:33 +0200
Subject: fs: Assign bdi in super_block

We do this automatically in get_sb_bdev() from the set_bdev_super()
callback. Filesystems that have their own private backing_dev_info
must assign that in ->fill_super().

Note that ->s_bdi assignment is required for proper writeback!

Acked-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/btrfs/disk-io.c | 1 +
 fs/fuse/inode.c    | 2 ++
 fs/nfs/super.c     | 2 ++
 fs/super.c         | 6 ++++++
 fs/sync.c          | 9 ++++++++-
 fs/ubifs/super.c   | 1 +
 include/linux/fs.h | 1 +
 7 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 15831d5c7367..8b8192790011 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1600,6 +1600,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
 	sb->s_blocksize = 4096;
 	sb->s_blocksize_bits = blksize_bits(4096);
+	sb->s_bdi = &fs_info->bdi;
 
 	/*
 	 * we set the i_size on the btree inode to the max possible int.
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4567db6f9430..e5dbecd87b0f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -894,6 +894,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (err)
 		goto err_put_conn;
 
+	sb->s_bdi = &fc->bdi;
+
 	/* Handle umasking inside the fuse code */
 	if (sb->s_flags & MS_POSIXACL)
 		fc->dont_mask = 1;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 867f70504531..de935692d40d 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1918,6 +1918,8 @@ static inline void nfs_initialise_sb(struct super_block *sb)
 	if (server->flags & NFS_MOUNT_NOAC)
 		sb->s_flags |= MS_SYNCHRONOUS;
 
+	sb->s_bdi = &server->backing_dev_info;
+
 	nfs_super_set_maxbytes(sb, server->maxfilesize);
 }
 
diff --git a/fs/super.c b/fs/super.c
index 9cda337ddae2..b03fea8fbfb6 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -707,6 +707,12 @@ static int set_bdev_super(struct super_block *s, void *data)
 {
 	s->s_bdev = data;
 	s->s_dev = s->s_bdev->bd_dev;
+
+	/*
+	 * We set the bdi here to the queue backing, file systems can
+	 * overwrite this in ->fill_super()
+	 */
+	s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
 	return 0;
 }
 
diff --git a/fs/sync.c b/fs/sync.c
index 192340930bb4..c08467a5d7cb 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -27,6 +27,13 @@
  */
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
+	/*
+	 * This should be safe, as we require bdi backing to actually
+	 * write out data in the first place
+	 */
+	if (!sb->s_bdi)
+		return 0;
+
 	/* Avoid doing twice syncing and cache pruning for quota sync */
 	if (!wait) {
 		writeout_quota_sb(sb, -1);
@@ -101,7 +108,7 @@ restart:
 		spin_unlock(&sb_lock);
 
 		down_read(&sb->s_umount);
-		if (!(sb->s_flags & MS_RDONLY) && sb->s_root)
+		if (!(sb->s_flags & MS_RDONLY) && sb->s_root && sb->s_bdi)
 			__sync_filesystem(sb, wait);
 		up_read(&sb->s_umount);
 
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 51763aa8f4de..c4af069df1ad 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1980,6 +1980,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
 	if (err)
 		goto out_bdi;
 
+	sb->s_bdi = &c->bdi;
 	sb->s_fs_info = c;
 	sb->s_magic = UBIFS_SUPER_MAGIC;
 	sb->s_blocksize = UBIFS_BLOCK_SIZE;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index db29588874ac..90162fb3bf04 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1342,6 +1342,7 @@ struct super_block {
 	int			s_nr_dentry_unused;	/* # of dentry on lru */
 
 	struct block_device	*s_bdev;
+	struct backing_dev_info *s_bdi;
 	struct mtd_info		*s_mtd;
 	struct list_head	s_instances;
 	struct quota_info	s_dquot;	/* Diskquota specific options */
-- 
cgit v1.2.3


From cfc4ba5365449cb6b5c9f68d755a142f17da1e47 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 14 Sep 2009 13:12:40 +0200
Subject: writeback: use RCU to protect bdi_list

Now that bdi_writeback_all() no longer handles integrity writeback,
it doesn't have to block anymore. This means that we can switch
bdi_list reader side protection to RCU.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c           |  6 ++--
 include/linux/backing-dev.h |  1 +
 mm/backing-dev.c            | 76 ++++++++++++++++++++++++++++++++-------------
 mm/page-writeback.c         |  8 ++---
 4 files changed, 63 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 14f06b459197..f8cd7a97f5b7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -868,16 +868,16 @@ static void bdi_writeback_all(struct writeback_control *wbc)
 
 	WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
 
-	spin_lock(&bdi_lock);
+	rcu_read_lock();
 
-	list_for_each_entry(bdi, &bdi_list, bdi_list) {
+	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		if (!bdi_has_dirty_io(bdi))
 			continue;
 
 		bdi_alloc_queue_work(bdi, wbc);
 	}
 
-	spin_unlock(&bdi_lock);
+	rcu_read_unlock();
 }
 
 /*
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index f169bcb90b58..859e797f4576 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -59,6 +59,7 @@ struct bdi_writeback {
 
 struct backing_dev_info {
 	struct list_head bdi_list;
+	struct rcu_head rcu_head;
 	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
 	unsigned long state;	/* Always use atomic bitops on this */
 	unsigned int capabilities; /* Device capabilities */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d3ca0dac1111..fd93566345b6 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -26,6 +26,12 @@ struct backing_dev_info default_backing_dev_info = {
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
 static struct class *bdi_class;
+
+/*
+ * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as
+ * reader side protection for bdi_pending_list. bdi_list has RCU reader side
+ * locking.
+ */
 DEFINE_SPINLOCK(bdi_lock);
 LIST_HEAD(bdi_list);
 LIST_HEAD(bdi_pending_list);
@@ -284,9 +290,9 @@ static int bdi_start_fn(void *ptr)
 	/*
 	 * Add us to the active bdi_list
 	 */
-	spin_lock(&bdi_lock);
-	list_add(&bdi->bdi_list, &bdi_list);
-	spin_unlock(&bdi_lock);
+	spin_lock_bh(&bdi_lock);
+	list_add_rcu(&bdi->bdi_list, &bdi_list);
+	spin_unlock_bh(&bdi_lock);
 
 	bdi_task_init(bdi, wb);
 
@@ -389,7 +395,7 @@ static int bdi_forker_task(void *ptr)
 		if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list))
 			wb_do_writeback(me, 0);
 
-		spin_lock(&bdi_lock);
+		spin_lock_bh(&bdi_lock);
 
 		/*
 		 * Check if any existing bdi's have dirty data without
@@ -410,7 +416,7 @@ static int bdi_forker_task(void *ptr)
 		if (list_empty(&bdi_pending_list)) {
 			unsigned long wait;
 
-			spin_unlock(&bdi_lock);
+			spin_unlock_bh(&bdi_lock);
 			wait = msecs_to_jiffies(dirty_writeback_interval * 10);
 			schedule_timeout(wait);
 			try_to_freeze();
@@ -426,7 +432,7 @@ static int bdi_forker_task(void *ptr)
 		bdi = list_entry(bdi_pending_list.next, struct backing_dev_info,
 				 bdi_list);
 		list_del_init(&bdi->bdi_list);
-		spin_unlock(&bdi_lock);
+		spin_unlock_bh(&bdi_lock);
 
 		wb = &bdi->wb;
 		wb->task = kthread_run(bdi_start_fn, wb, "flush-%s",
@@ -445,9 +451,9 @@ static int bdi_forker_task(void *ptr)
 			 * a chance to flush other bdi's to free
 			 * memory.
 			 */
-			spin_lock(&bdi_lock);
+			spin_lock_bh(&bdi_lock);
 			list_add_tail(&bdi->bdi_list, &bdi_pending_list);
-			spin_unlock(&bdi_lock);
+			spin_unlock_bh(&bdi_lock);
 
 			bdi_flush_io(bdi);
 		}
@@ -456,6 +462,24 @@ static int bdi_forker_task(void *ptr)
 	return 0;
 }
 
+static void bdi_add_to_pending(struct rcu_head *head)
+{
+	struct backing_dev_info *bdi;
+
+	bdi = container_of(head, struct backing_dev_info, rcu_head);
+	INIT_LIST_HEAD(&bdi->bdi_list);
+
+	spin_lock(&bdi_lock);
+	list_add_tail(&bdi->bdi_list, &bdi_pending_list);
+	spin_unlock(&bdi_lock);
+
+	/*
+	 * We are now on the pending list, wake up bdi_forker_task()
+	 * to finish the job and add us back to the active bdi_list
+	 */
+	wake_up_process(default_backing_dev_info.wb.task);
+}
+
 /*
  * Add the default flusher task that gets created for any bdi
  * that has dirty data pending writeout
@@ -478,16 +502,29 @@ void static bdi_add_default_flusher_task(struct backing_dev_info *bdi)
 	 * waiting for previous additions to finish.
 	 */
 	if (!test_and_set_bit(BDI_pending, &bdi->state)) {
-		list_move_tail(&bdi->bdi_list, &bdi_pending_list);
+		list_del_rcu(&bdi->bdi_list);
 
 		/*
-		 * We are now on the pending list, wake up bdi_forker_task()
-		 * to finish the job and add us back to the active bdi_list
+		 * We must wait for the current RCU period to end before
+		 * moving to the pending list. So schedule that operation
+		 * from an RCU callback.
 		 */
-		wake_up_process(default_backing_dev_info.wb.task);
+		call_rcu(&bdi->rcu_head, bdi_add_to_pending);
 	}
 }
 
+/*
+ * Remove bdi from bdi_list, and ensure that it is no longer visible
+ */
+static void bdi_remove_from_list(struct backing_dev_info *bdi)
+{
+	spin_lock_bh(&bdi_lock);
+	list_del_rcu(&bdi->bdi_list);
+	spin_unlock_bh(&bdi_lock);
+
+	synchronize_rcu();
+}
+
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...)
 {
@@ -506,9 +543,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		goto exit;
 	}
 
-	spin_lock(&bdi_lock);
-	list_add_tail(&bdi->bdi_list, &bdi_list);
-	spin_unlock(&bdi_lock);
+	spin_lock_bh(&bdi_lock);
+	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
+	spin_unlock_bh(&bdi_lock);
 
 	bdi->dev = dev;
 
@@ -526,9 +563,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 			wb->task = NULL;
 			ret = -ENOMEM;
 
-			spin_lock(&bdi_lock);
-			list_del(&bdi->bdi_list);
-			spin_unlock(&bdi_lock);
+			bdi_remove_from_list(bdi);
 			goto exit;
 		}
 	}
@@ -565,9 +600,7 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 	/*
 	 * Make sure nobody finds us on the bdi_list anymore
 	 */
-	spin_lock(&bdi_lock);
-	list_del(&bdi->bdi_list);
-	spin_unlock(&bdi_lock);
+	bdi_remove_from_list(bdi);
 
 	/*
 	 * Finally, kill the kernel threads. We don't need to be RCU
@@ -599,6 +632,7 @@ int bdi_init(struct backing_dev_info *bdi)
 	bdi->max_ratio = 100;
 	bdi->max_prop_frac = PROP_FRAC_BASE;
 	spin_lock_init(&bdi->wb_lock);
+	INIT_RCU_HEAD(&bdi->rcu_head);
 	INIT_LIST_HEAD(&bdi->bdi_list);
 	INIT_LIST_HEAD(&bdi->wb_list);
 	INIT_LIST_HEAD(&bdi->work_list);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index abc648f5de00..12c3d843ce93 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -315,7 +315,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 {
 	int ret = 0;
 
-	spin_lock(&bdi_lock);
+	spin_lock_bh(&bdi_lock);
 	if (min_ratio > bdi->max_ratio) {
 		ret = -EINVAL;
 	} else {
@@ -327,7 +327,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 			ret = -EINVAL;
 		}
 	}
-	spin_unlock(&bdi_lock);
+	spin_unlock_bh(&bdi_lock);
 
 	return ret;
 }
@@ -339,14 +339,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 	if (max_ratio > 100)
 		return -EINVAL;
 
-	spin_lock(&bdi_lock);
+	spin_lock_bh(&bdi_lock);
 	if (bdi->min_ratio > max_ratio) {
 		ret = -EINVAL;
 	} else {
 		bdi->max_ratio = max_ratio;
 		bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
 	}
-	spin_unlock(&bdi_lock);
+	spin_unlock_bh(&bdi_lock);
 
 	return ret;
 }
-- 
cgit v1.2.3


From b6e51316daede0633e9274e1e30391cfa4747877 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 16 Sep 2009 15:13:54 +0200
Subject: writeback: separate starting of sync vs opportunistic writeback

bdi_start_writeback() is currently split into two paths, one for
WB_SYNC_NONE and one for WB_SYNC_ALL. Add bdi_sync_writeback()
for WB_SYNC_ALL writeback and let bdi_start_writeback() handle
only WB_SYNC_NONE.

Push down the writeback_control allocation and only accept the
parameters that make sense for each function. This cleans up
the API considerably.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c           | 132 ++++++++++++++++++++++----------------------
 fs/ubifs/budget.c           |  20 +------
 include/linux/backing-dev.h |   2 +-
 include/linux/writeback.h   |   4 +-
 mm/page-writeback.c         |  12 +---
 5 files changed, 75 insertions(+), 95 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 59b3ee63b624..5887328b5a06 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -74,14 +74,10 @@ static inline bool bdi_work_on_stack(struct bdi_work *work)
 }
 
 static inline void bdi_work_init(struct bdi_work *work,
-				 struct writeback_control *wbc)
+				 struct wb_writeback_args *args)
 {
 	INIT_RCU_HEAD(&work->rcu_head);
-	work->args.sb = wbc->sb;
-	work->args.nr_pages = wbc->nr_to_write;
-	work->args.sync_mode = wbc->sync_mode;
-	work->args.range_cyclic = wbc->range_cyclic;
-	work->args.for_kupdate = 0;
+	work->args = *args;
 	work->state = WS_USED;
 }
 
@@ -194,7 +190,7 @@ static void bdi_wait_on_work_clear(struct bdi_work *work)
 }
 
 static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
-				 struct writeback_control *wbc)
+				 struct wb_writeback_args *args)
 {
 	struct bdi_work *work;
 
@@ -204,7 +200,7 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
 	 */
 	work = kmalloc(sizeof(*work), GFP_ATOMIC);
 	if (work) {
-		bdi_work_init(work, wbc);
+		bdi_work_init(work, args);
 		bdi_queue_work(bdi, work);
 	} else {
 		struct bdi_writeback *wb = &bdi->wb;
@@ -214,24 +210,54 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
 	}
 }
 
-void bdi_start_writeback(struct writeback_control *wbc)
+/**
+ * bdi_sync_writeback - start and wait for writeback
+ * @bdi: the backing device to write from
+ * @sb: write inodes from this super_block
+ *
+ * Description:
+ *   This does WB_SYNC_ALL data integrity writeback and waits for the
+ *   IO to complete. Callers must hold the sb s_umount semaphore for
+ *   reading, to avoid having the super disappear before we are done.
+ */
+static void bdi_sync_writeback(struct backing_dev_info *bdi,
+			       struct super_block *sb)
 {
-	/*
-	 * WB_SYNC_NONE is opportunistic writeback. If this allocation fails,
-	 * bdi_queue_work() will wake up the thread and flush old data. This
-	 * should ensure some amount of progress in freeing memory.
-	 */
-	if (wbc->sync_mode != WB_SYNC_ALL)
-		bdi_alloc_queue_work(wbc->bdi, wbc);
-	else {
-		struct bdi_work work;
+	struct wb_writeback_args args = {
+		.sb		= sb,
+		.sync_mode	= WB_SYNC_ALL,
+		.nr_pages	= LONG_MAX,
+		.range_cyclic	= 0,
+	};
+	struct bdi_work work;
 
-		bdi_work_init(&work, wbc);
-		work.state |= WS_ONSTACK;
+	bdi_work_init(&work, &args);
+	work.state |= WS_ONSTACK;
 
-		bdi_queue_work(wbc->bdi, &work);
-		bdi_wait_on_work_clear(&work);
-	}
+	bdi_queue_work(bdi, &work);
+	bdi_wait_on_work_clear(&work);
+}
+
+/**
+ * bdi_start_writeback - start writeback
+ * @bdi: the backing device to write from
+ * @nr_pages: the number of pages to write
+ *
+ * Description:
+ *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
+ *   started when this function returns, we make no guarentees on
+ *   completion. Caller need not hold sb s_umount semaphore.
+ *
+ */
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
+{
+	struct wb_writeback_args args = {
+		.sync_mode	= WB_SYNC_NONE,
+		.nr_pages	= nr_pages,
+		.range_cyclic	= 1,
+	};
+
+	bdi_alloc_queue_work(bdi, &args);
 }
 
 /*
@@ -863,23 +889,25 @@ int bdi_writeback_task(struct bdi_writeback *wb)
 }
 
 /*
- * Schedule writeback for all backing devices. Can only be used for
- * WB_SYNC_NONE writeback, WB_SYNC_ALL should use bdi_start_writeback()
- * and pass in the superblock.
+ * Schedule writeback for all backing devices. This does WB_SYNC_NONE
+ * writeback, for integrity writeback see bdi_sync_writeback().
  */
-static void bdi_writeback_all(struct writeback_control *wbc)
+static void bdi_writeback_all(struct super_block *sb, long nr_pages)
 {
+	struct wb_writeback_args args = {
+		.sb		= sb,
+		.nr_pages	= nr_pages,
+		.sync_mode	= WB_SYNC_NONE,
+	};
 	struct backing_dev_info *bdi;
 
-	WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
-
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		if (!bdi_has_dirty_io(bdi))
 			continue;
 
-		bdi_alloc_queue_work(bdi, wbc);
+		bdi_alloc_queue_work(bdi, &args);
 	}
 
 	rcu_read_unlock();
@@ -891,17 +919,10 @@ static void bdi_writeback_all(struct writeback_control *wbc)
  */
 void wakeup_flusher_threads(long nr_pages)
 {
-	struct writeback_control wbc = {
-		.sync_mode	= WB_SYNC_NONE,
-		.older_than_this = NULL,
-		.range_cyclic	= 1,
-	};
-
 	if (nr_pages == 0)
 		nr_pages = global_page_state(NR_FILE_DIRTY) +
 				global_page_state(NR_UNSTABLE_NFS);
-	wbc.nr_to_write = nr_pages;
-	bdi_writeback_all(&wbc);
+	bdi_writeback_all(NULL, nr_pages);
 }
 
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
@@ -1048,7 +1069,7 @@ EXPORT_SYMBOL(__mark_inode_dirty);
  * on the writer throttling path, and we get decent balancing between many
  * throttled threads: we don't want them all piling up on inode_sync_wait.
  */
-static void wait_sb_inodes(struct writeback_control *wbc)
+static void wait_sb_inodes(struct super_block *sb)
 {
 	struct inode *inode, *old_inode = NULL;
 
@@ -1056,7 +1077,7 @@ static void wait_sb_inodes(struct writeback_control *wbc)
 	 * We need to be protected against the filesystem going from
 	 * r/o to r/w or vice versa.
 	 */
-	WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount));
+	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
 	spin_lock(&inode_lock);
 
@@ -1067,7 +1088,7 @@ static void wait_sb_inodes(struct writeback_control *wbc)
 	 * In which case, the inode may not be on the dirty list, but
 	 * we still have to wait for that writeout.
 	 */
-	list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) {
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		struct address_space *mapping;
 
 		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
@@ -1107,14 +1128,8 @@ static void wait_sb_inodes(struct writeback_control *wbc)
  * for IO completion of submitted IO. The number of pages submitted is
  * returned.
  */
-long writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb(struct super_block *sb)
 {
-	struct writeback_control wbc = {
-		.sb		= sb,
-		.sync_mode	= WB_SYNC_NONE,
-		.range_start	= 0,
-		.range_end	= LLONG_MAX,
-	};
 	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
 	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
 	long nr_to_write;
@@ -1122,9 +1137,7 @@ long writeback_inodes_sb(struct super_block *sb)
 	nr_to_write = nr_dirty + nr_unstable +
 			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
 
-	wbc.nr_to_write = nr_to_write;
-	bdi_writeback_all(&wbc);
-	return nr_to_write - wbc.nr_to_write;
+	bdi_writeback_all(sb, nr_to_write);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
@@ -1135,21 +1148,10 @@ EXPORT_SYMBOL(writeback_inodes_sb);
  * This function writes and waits on any dirty inode belonging to this
  * super_block. The number of pages synced is returned.
  */
-long sync_inodes_sb(struct super_block *sb)
+void sync_inodes_sb(struct super_block *sb)
 {
-	struct writeback_control wbc = {
-		.sb		= sb,
-		.bdi		= sb->s_bdi,
-		.sync_mode	= WB_SYNC_ALL,
-		.range_start	= 0,
-		.range_end	= LLONG_MAX,
-	};
-	long nr_to_write = LONG_MAX; /* doesn't actually matter */
-
-	wbc.nr_to_write = nr_to_write;
-	bdi_start_writeback(&wbc);
-	wait_sb_inodes(&wbc);
-	return nr_to_write - wbc.nr_to_write;
+	bdi_sync_writeback(sb->s_bdi, sb);
+	wait_sb_inodes(sb);
 }
 EXPORT_SYMBOL(sync_inodes_sb);
 
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 1c8991b0db13..ee1ce68fd98b 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -54,29 +54,15 @@
  * @nr_to_write: how many dirty pages to write-back
  *
  * This function shrinks UBIFS liability by means of writing back some amount
- * of dirty inodes and their pages. Returns the amount of pages which were
- * written back. The returned value does not include dirty inodes which were
- * synchronized.
+ * of dirty inodes and their pages.
  *
  * Note, this function synchronizes even VFS inodes which are locked
  * (@i_mutex) by the caller of the budgeting function, because write-back does
  * not touch @i_mutex.
  */
-static int shrink_liability(struct ubifs_info *c, int nr_to_write)
+static void shrink_liability(struct ubifs_info *c, int nr_to_write)
 {
-	int nr_written;
-
-	nr_written = writeback_inodes_sb(c->vfs_sb);
-	if (!nr_written) {
-		/*
-		 * Re-try again but wait on pages/inodes which are being
-		 * written-back concurrently (e.g., by pdflush).
-		 */
-		nr_written = sync_inodes_sb(c->vfs_sb);
-	}
-
-	dbg_budg("%d pages were written back", nr_written);
-	return nr_written;
+	writeback_inodes_sb(c->vfs_sb);
 }
 
 /**
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 859e797f4576..0ee33c2e6129 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -101,7 +101,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
-void bdi_start_writeback(struct writeback_control *wbc);
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
 int bdi_writeback_task(struct bdi_writeback *wb);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 48a054e2b716..75cf58666ff9 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -68,8 +68,8 @@ struct writeback_control {
  */	
 struct bdi_writeback;
 int inode_wait(void *);
-long writeback_inodes_sb(struct super_block *);
-long sync_inodes_sb(struct super_block *);
+void writeback_inodes_sb(struct super_block *);
+void sync_inodes_sb(struct super_block *);
 void writeback_inodes_wbc(struct writeback_control *wbc);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
 void wakeup_flusher_threads(long nr_pages);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 12c3d843ce93..1eea4fa0d410 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -582,16 +582,8 @@ static void balance_dirty_pages(struct address_space *mapping)
 	if ((laptop_mode && pages_written) ||
 	    (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY)
 					  + global_page_state(NR_UNSTABLE_NFS))
-					  > background_thresh))) {
-		struct writeback_control wbc = {
-			.bdi		= bdi,
-			.sync_mode	= WB_SYNC_NONE,
-			.nr_to_write	= nr_writeback,
-		};
-
-
-		bdi_start_writeback(&wbc);
-	}
+					  > background_thresh)))
+		bdi_start_writeback(bdi, nr_writeback);
 }
 
 void set_page_dirty_balance(struct page *page, int page_mkwrite)
-- 
cgit v1.2.3


From 182a85f8a119c789610a9d464f4129ded9f3c107 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Sep 2009 13:24:49 +0200
Subject: sched: Disable wakeup balancing

Sysbench thinks SD_BALANCE_WAKE is too agressive and kbuild doesn't
really mind too much, SD_BALANCE_NEWIDLE picks up most of the
slack.

On a dual socket, quad core, dual thread nehalem system:

sysbench (--num_threads=16):

 SD_BALANCE_WAKE-: 13982 tx/s
 SD_BALANCE_WAKE+: 15688 tx/s

kbuild (-j16):

 SD_BALANCE_WAKE-: 47.648295846  seconds time elapsed   ( +-   0.312% )
 SD_BALANCE_WAKE+: 47.608607360  seconds time elapsed   ( +-   0.026% )

(same within noise)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/topology.h           | 2 --
 arch/mips/include/asm/mach-ip27/topology.h | 1 -
 arch/powerpc/include/asm/topology.h        | 1 -
 arch/sh/include/asm/topology.h             | 1 -
 arch/sparc/include/asm/topology_64.h       | 1 -
 arch/x86/include/asm/topology.h            | 2 +-
 include/linux/topology.h                   | 6 +++---
 7 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 569b9dafc78c..d0141fbf51d0 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -68,7 +68,6 @@ void build_cpu_to_node_map(void);
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
-				| SD_BALANCE_WAKE	\
 				| SD_WAKE_AFFINE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
@@ -94,7 +93,6 @@ void build_cpu_to_node_map(void);
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
-				| SD_BALANCE_WAKE	\
 				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 64,			\
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index d8332398f5be..230591707005 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -48,7 +48,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
 	.cache_nice_tries	= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_WAKE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 1a2c9eb42a03..394edcbcce71 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -63,7 +63,6 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_NEWIDLE	\
-				| SD_BALANCE_WAKE	\
 				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index a8cc564b703d..f8c40cc65054 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -21,7 +21,6 @@
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_WAKE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 10b979d1de20..26cd25c08399 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -57,7 +57,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_WAKE	\
 				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 589f12383d78..6f0695d744bf 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -141,7 +141,7 @@ extern unsigned long node_remap_size[];
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 1*SD_BALANCE_WAKE			\
+				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
diff --git a/include/linux/topology.h b/include/linux/topology.h
index a6614b0242a9..809b26c07090 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -95,7 +95,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 1*SD_BALANCE_WAKE			\
+				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
 				| 1*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
@@ -127,7 +127,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 1*SD_BALANCE_WAKE			\
+				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
 				| 1*SD_PREFER_LOCAL			\
 				| 0*SD_SHARE_CPUPOWER			\
@@ -160,7 +160,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 1*SD_BALANCE_WAKE			\
+				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
 				| 1*SD_PREFER_LOCAL			\
 				| 0*SD_SHARE_CPUPOWER			\
-- 
cgit v1.2.3


From 9c28cbccec66a5ca292c6659bf5a0fe0c8459fa7 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 3 Aug 2009 19:21:00 +0200
Subject: jbd: Journal block numbers can ever be only 32-bit use unsigned int
 for them

It does not make sense to store block number for journal as unsigned long
since they can be only 32-bit (because of on-disk format limitation). So
change in-memory structures and variables to use unsigned int instead.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/jbd/checkpoint.c |  6 +++---
 fs/jbd/commit.c     |  2 +-
 fs/jbd/journal.c    | 30 +++++++++++++++---------------
 fs/jbd/recovery.c   | 18 +++++++++---------
 fs/jbd/revoke.c     | 16 ++++++++--------
 include/linux/jbd.h | 26 +++++++++++++-------------
 6 files changed, 49 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 61f32f3868cd..b0435dd0654d 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -456,7 +456,7 @@ int cleanup_journal_tail(journal_t *journal)
 {
 	transaction_t * transaction;
 	tid_t		first_tid;
-	unsigned long	blocknr, freed;
+	unsigned int	blocknr, freed;
 
 	if (is_journal_aborted(journal))
 		return 1;
@@ -502,8 +502,8 @@ int cleanup_journal_tail(journal_t *journal)
 		freed = freed + journal->j_last - journal->j_first;
 
 	jbd_debug(1,
-		  "Cleaning journal tail from %d to %d (offset %lu), "
-		  "freeing %lu\n",
+		  "Cleaning journal tail from %d to %d (offset %u), "
+		  "freeing %u\n",
 		  journal->j_tail_sequence, first_tid, blocknr, freed);
 
 	journal->j_free += freed;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 618e21c0b7a3..4bd882548c45 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -308,7 +308,7 @@ void journal_commit_transaction(journal_t *journal)
 	int bufs;
 	int flags;
 	int err;
-	unsigned long blocknr;
+	unsigned int blocknr;
 	ktime_t start_time;
 	u64 commit_time;
 	char *tagp = NULL;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f96f85092d1c..bd3c073b485d 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -276,7 +276,7 @@ static void journal_kill_thread(journal_t *journal)
 int journal_write_metadata_buffer(transaction_t *transaction,
 				  struct journal_head  *jh_in,
 				  struct journal_head **jh_out,
-				  unsigned long blocknr)
+				  unsigned int blocknr)
 {
 	int need_copy_out = 0;
 	int done_copy_out = 0;
@@ -567,9 +567,9 @@ int log_wait_commit(journal_t *journal, tid_t tid)
  * Log buffer allocation routines:
  */
 
-int journal_next_log_block(journal_t *journal, unsigned long *retp)
+int journal_next_log_block(journal_t *journal, unsigned int *retp)
 {
-	unsigned long blocknr;
+	unsigned int blocknr;
 
 	spin_lock(&journal->j_state_lock);
 	J_ASSERT(journal->j_free > 1);
@@ -590,11 +590,11 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp)
  * this is a no-op.  If needed, we can use j_blk_offset - everything is
  * ready.
  */
-int journal_bmap(journal_t *journal, unsigned long blocknr,
-		 unsigned long *retp)
+int journal_bmap(journal_t *journal, unsigned int blocknr,
+		 unsigned int *retp)
 {
 	int err = 0;
-	unsigned long ret;
+	unsigned int ret;
 
 	if (journal->j_inode) {
 		ret = bmap(journal->j_inode, blocknr);
@@ -604,7 +604,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr,
 			char b[BDEVNAME_SIZE];
 
 			printk(KERN_ALERT "%s: journal block not found "
-					"at offset %lu on %s\n",
+					"at offset %u on %s\n",
 				__func__,
 				blocknr,
 				bdevname(journal->j_dev, b));
@@ -630,7 +630,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr,
 struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
 {
 	struct buffer_head *bh;
-	unsigned long blocknr;
+	unsigned int blocknr;
 	int err;
 
 	err = journal_next_log_block(journal, &blocknr);
@@ -774,7 +774,7 @@ journal_t * journal_init_inode (struct inode *inode)
 	journal_t *journal = journal_init_common();
 	int err;
 	int n;
-	unsigned long blocknr;
+	unsigned int blocknr;
 
 	if (!journal)
 		return NULL;
@@ -846,12 +846,12 @@ static void journal_fail_superblock (journal_t *journal)
 static int journal_reset(journal_t *journal)
 {
 	journal_superblock_t *sb = journal->j_superblock;
-	unsigned long first, last;
+	unsigned int first, last;
 
 	first = be32_to_cpu(sb->s_first);
 	last = be32_to_cpu(sb->s_maxlen);
 	if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) {
-		printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n",
+		printk(KERN_ERR "JBD: Journal too short (blocks %u-%u).\n",
 		       first, last);
 		journal_fail_superblock(journal);
 		return -EINVAL;
@@ -885,7 +885,7 @@ static int journal_reset(journal_t *journal)
  **/
 int journal_create(journal_t *journal)
 {
-	unsigned long blocknr;
+	unsigned int blocknr;
 	struct buffer_head *bh;
 	journal_superblock_t *sb;
 	int i, err;
@@ -969,14 +969,14 @@ void journal_update_superblock(journal_t *journal, int wait)
 	if (sb->s_start == 0 && journal->j_tail_sequence ==
 				journal->j_transaction_sequence) {
 		jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
-			"(start %ld, seq %d, errno %d)\n",
+			"(start %u, seq %d, errno %d)\n",
 			journal->j_tail, journal->j_tail_sequence,
 			journal->j_errno);
 		goto out;
 	}
 
 	spin_lock(&journal->j_state_lock);
-	jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
+	jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n",
 		  journal->j_tail, journal->j_tail_sequence, journal->j_errno);
 
 	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -1371,7 +1371,7 @@ int journal_flush(journal_t *journal)
 {
 	int err = 0;
 	transaction_t *transaction = NULL;
-	unsigned long old_tail;
+	unsigned int old_tail;
 
 	spin_lock(&journal->j_state_lock);
 
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index db5e982c5ddf..cb1a49ae605e 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -70,7 +70,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
 {
 	int err;
 	unsigned int max, nbufs, next;
-	unsigned long blocknr;
+	unsigned int blocknr;
 	struct buffer_head *bh;
 
 	struct buffer_head * bufs[MAXBUF];
@@ -132,7 +132,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
 		 unsigned int offset)
 {
 	int err;
-	unsigned long blocknr;
+	unsigned int blocknr;
 	struct buffer_head *bh;
 
 	*bhp = NULL;
@@ -314,7 +314,7 @@ static int do_one_pass(journal_t *journal,
 			struct recovery_info *info, enum passtype pass)
 {
 	unsigned int		first_commit_ID, next_commit_ID;
-	unsigned long		next_log_block;
+	unsigned int		next_log_block;
 	int			err, success = 0;
 	journal_superblock_t *	sb;
 	journal_header_t *	tmp;
@@ -367,14 +367,14 @@ static int do_one_pass(journal_t *journal,
 			if (tid_geq(next_commit_ID, info->end_transaction))
 				break;
 
-		jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
+		jbd_debug(2, "Scanning for sequence ID %u at %u/%u\n",
 			  next_commit_ID, next_log_block, journal->j_last);
 
 		/* Skip over each chunk of the transaction looking
 		 * either the next descriptor block or the final commit
 		 * record. */
 
-		jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
+		jbd_debug(3, "JBD: checking block %u\n", next_log_block);
 		err = jread(&bh, journal, next_log_block);
 		if (err)
 			goto failed;
@@ -429,7 +429,7 @@ static int do_one_pass(journal_t *journal,
 			tagp = &bh->b_data[sizeof(journal_header_t)];
 			while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
 			       <= journal->j_blocksize) {
-				unsigned long io_block;
+				unsigned int io_block;
 
 				tag = (journal_block_tag_t *) tagp;
 				flags = be32_to_cpu(tag->t_flags);
@@ -443,10 +443,10 @@ static int do_one_pass(journal_t *journal,
 					success = err;
 					printk (KERN_ERR
 						"JBD: IO error %d recovering "
-						"block %ld in log\n",
+						"block %u in log\n",
 						err, io_block);
 				} else {
-					unsigned long blocknr;
+					unsigned int blocknr;
 
 					J_ASSERT(obh != NULL);
 					blocknr = be32_to_cpu(tag->t_blocknr);
@@ -581,7 +581,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 	max = be32_to_cpu(header->r_count);
 
 	while (offset < max) {
-		unsigned long blocknr;
+		unsigned int blocknr;
 		int err;
 
 		blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index da6cd9bdaabc..ad717328343a 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -101,7 +101,7 @@ struct jbd_revoke_record_s
 {
 	struct list_head  hash;
 	tid_t		  sequence;	/* Used for recovery only */
-	unsigned long	  blocknr;
+	unsigned int	  blocknr;
 };
 
 
@@ -126,7 +126,7 @@ static void flush_descriptor(journal_t *, struct journal_head *, int, int);
 /* Utility functions to maintain the revoke table */
 
 /* Borrowed from buffer.c: this is a tried and tested block hash function */
-static inline int hash(journal_t *journal, unsigned long block)
+static inline int hash(journal_t *journal, unsigned int block)
 {
 	struct jbd_revoke_table_s *table = journal->j_revoke;
 	int hash_shift = table->hash_shift;
@@ -136,7 +136,7 @@ static inline int hash(journal_t *journal, unsigned long block)
 		(block << (hash_shift - 12))) & (table->hash_size - 1);
 }
 
-static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
+static int insert_revoke_hash(journal_t *journal, unsigned int blocknr,
 			      tid_t seq)
 {
 	struct list_head *hash_list;
@@ -166,7 +166,7 @@ oom:
 /* Find a revoke record in the journal's hash table. */
 
 static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
-						      unsigned long blocknr)
+						      unsigned int blocknr)
 {
 	struct list_head *hash_list;
 	struct jbd_revoke_record_s *record;
@@ -332,7 +332,7 @@ void journal_destroy_revoke(journal_t *journal)
  * by one.
  */
 
-int journal_revoke(handle_t *handle, unsigned long blocknr,
+int journal_revoke(handle_t *handle, unsigned int blocknr,
 		   struct buffer_head *bh_in)
 {
 	struct buffer_head *bh = NULL;
@@ -401,7 +401,7 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
 		}
 	}
 
-	jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in);
+	jbd_debug(2, "insert revoke for block %u, bh_in=%p\n", blocknr, bh_in);
 	err = insert_revoke_hash(journal, blocknr,
 				handle->h_transaction->t_tid);
 	BUFFER_TRACE(bh_in, "exit");
@@ -644,7 +644,7 @@ static void flush_descriptor(journal_t *journal,
  */
 
 int journal_set_revoke(journal_t *journal,
-		       unsigned long blocknr,
+		       unsigned int blocknr,
 		       tid_t sequence)
 {
 	struct jbd_revoke_record_s *record;
@@ -668,7 +668,7 @@ int journal_set_revoke(journal_t *journal,
  */
 
 int journal_test_revoke(journal_t *journal,
-			unsigned long blocknr,
+			unsigned int blocknr,
 			tid_t sequence)
 {
 	struct jbd_revoke_record_s *record;
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index c2049a04fa0b..a1187a0c99b4 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -446,7 +446,7 @@ struct transaction_s
 	/*
 	 * Where in the log does this transaction's commit start? [no locking]
 	 */
-	unsigned long		t_log_start;
+	unsigned int		t_log_start;
 
 	/* Number of buffers on the t_buffers list [j_list_lock] */
 	int			t_nr_buffers;
@@ -701,26 +701,26 @@ struct journal_s
 	 * Journal head: identifies the first unused block in the journal.
 	 * [j_state_lock]
 	 */
-	unsigned long		j_head;
+	unsigned int		j_head;
 
 	/*
 	 * Journal tail: identifies the oldest still-used block in the journal.
 	 * [j_state_lock]
 	 */
-	unsigned long		j_tail;
+	unsigned int		j_tail;
 
 	/*
 	 * Journal free: how many free blocks are there in the journal?
 	 * [j_state_lock]
 	 */
-	unsigned long		j_free;
+	unsigned int		j_free;
 
 	/*
 	 * Journal start and end: the block numbers of the first usable block
 	 * and one beyond the last usable block in the journal. [j_state_lock]
 	 */
-	unsigned long		j_first;
-	unsigned long		j_last;
+	unsigned int		j_first;
+	unsigned int		j_last;
 
 	/*
 	 * Device, blocksize and starting block offset for the location where we
@@ -728,7 +728,7 @@ struct journal_s
 	 */
 	struct block_device	*j_dev;
 	int			j_blocksize;
-	unsigned long		j_blk_offset;
+	unsigned int		j_blk_offset;
 
 	/*
 	 * Device which holds the client fs.  For internal journal this will be
@@ -859,7 +859,7 @@ extern void __journal_clean_data_list(transaction_t *transaction);
 
 /* Log buffer allocation */
 extern struct journal_head * journal_get_descriptor_buffer(journal_t *);
-int journal_next_log_block(journal_t *, unsigned long *);
+int journal_next_log_block(journal_t *, unsigned int *);
 
 /* Commit management */
 extern void journal_commit_transaction(journal_t *);
@@ -874,7 +874,7 @@ extern int
 journal_write_metadata_buffer(transaction_t	  *transaction,
 			      struct journal_head  *jh_in,
 			      struct journal_head **jh_out,
-			      unsigned long	   blocknr);
+			      unsigned int blocknr);
 
 /* Transaction locking */
 extern void		__wait_on_journal (journal_t *);
@@ -942,7 +942,7 @@ extern void	   journal_abort      (journal_t *, int);
 extern int	   journal_errno      (journal_t *);
 extern void	   journal_ack_err    (journal_t *);
 extern int	   journal_clear_err  (journal_t *);
-extern int	   journal_bmap(journal_t *, unsigned long, unsigned long *);
+extern int	   journal_bmap(journal_t *, unsigned int, unsigned int *);
 extern int	   journal_force_commit(journal_t *);
 
 /*
@@ -976,14 +976,14 @@ extern int	   journal_init_revoke_caches(void);
 
 extern void	   journal_destroy_revoke(journal_t *);
 extern int	   journal_revoke (handle_t *,
-				unsigned long, struct buffer_head *);
+				unsigned int, struct buffer_head *);
 extern int	   journal_cancel_revoke(handle_t *, struct journal_head *);
 extern void	   journal_write_revoke_records(journal_t *,
 						transaction_t *, int);
 
 /* Recovery revoke support */
-extern int	journal_set_revoke(journal_t *, unsigned long, tid_t);
-extern int	journal_test_revoke(journal_t *, unsigned long, tid_t);
+extern int	journal_set_revoke(journal_t *, unsigned int, tid_t);
+extern int	journal_test_revoke(journal_t *, unsigned int, tid_t);
 extern void	journal_clear_revoke(journal_t *);
 extern void	journal_switch_revoke_table(journal_t *journal);
 
-- 
cgit v1.2.3


From fb40ba0d98968bc3454731360363d725b4f1064c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 16 Sep 2009 19:30:40 -0400
Subject: ext4: Add a tracepoint for ext4_alloc_da_blocks()

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c             |  2 ++
 include/trace/events/ext4.h | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a5b4ce40cc66..9887a0c562d5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3128,6 +3128,8 @@ out:
  */
 int ext4_alloc_da_blocks(struct inode *inode)
 {
+	trace_ext4_alloc_da_blocks(inode);
+
 	if (!EXT4_I(inode)->i_reserved_data_blocks &&
 	    !EXT4_I(inode)->i_reserved_meta_blocks)
 		return 0;
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 6fe6ce9ee071..c1bd8f1e8b94 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -10,6 +10,9 @@
 struct ext4_allocation_context;
 struct ext4_allocation_request;
 struct ext4_prealloc_space;
+struct ext4_inode_info;
+
+#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
 
 TRACE_EVENT(ext4_free_inode,
 	TP_PROTO(struct inode *inode),
@@ -710,6 +713,30 @@ TRACE_EVENT(ext4_sync_fs,
 		  __entry->wait)
 );
 
+TRACE_EVENT(ext4_alloc_da_blocks,
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field( unsigned int,	data_blocks	)
+		__field( unsigned int,	meta_blocks	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+		__entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
+	),
+
+	TP_printk("dev %s ino %lu data_blocks %u meta_blocks %u",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->data_blocks, __entry->meta_blocks)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 9f7b07d6cc3ed14783c9427a5b2a69794eb2de64 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Tue, 23 Jun 2009 12:32:11 -0300
Subject: mfd: Introduce irq_to_pcap()

Export an irq_to_pcap function to get pcap irq number, for the keypad driver.

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ezx-pcap.c       | 9 +++++----
 include/linux/mfd/ezx-pcap.h | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index c1de4afa89a6..de7e63706abb 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -107,10 +107,11 @@ int ezx_pcap_read(struct pcap_chip *pcap, u8 reg_num, u32 *value)
 EXPORT_SYMBOL_GPL(ezx_pcap_read);
 
 /* IRQ */
-static inline unsigned int irq2pcap(struct pcap_chip *pcap, int irq)
+int irq_to_pcap(struct pcap_chip *pcap, int irq)
 {
-	return 1 << (irq - pcap->irq_base);
+	return irq - pcap->irq_base;
 }
+EXPORT_SYMBOL_GPL(irq_to_pcap);
 
 int pcap_to_irq(struct pcap_chip *pcap, int irq)
 {
@@ -122,7 +123,7 @@ static void pcap_mask_irq(unsigned int irq)
 {
 	struct pcap_chip *pcap = get_irq_chip_data(irq);
 
-	pcap->msr |= irq2pcap(pcap, irq);
+	pcap->msr |= 1 << irq_to_pcap(pcap, irq);
 	queue_work(pcap->workqueue, &pcap->msr_work);
 }
 
@@ -130,7 +131,7 @@ static void pcap_unmask_irq(unsigned int irq)
 {
 	struct pcap_chip *pcap = get_irq_chip_data(irq);
 
-	pcap->msr &= ~irq2pcap(pcap, irq);
+	pcap->msr &= ~(1 << irq_to_pcap(pcap, irq));
 	queue_work(pcap->workqueue, &pcap->msr_work);
 }
 
diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h
index c12c3c0932bf..6296b4935a1e 100644
--- a/include/linux/mfd/ezx-pcap.h
+++ b/include/linux/mfd/ezx-pcap.h
@@ -26,6 +26,7 @@ struct pcap_chip;
 int ezx_pcap_write(struct pcap_chip *, u8, u32);
 int ezx_pcap_read(struct pcap_chip *, u8, u32 *);
 int pcap_to_irq(struct pcap_chip *, int);
+int irq_to_pcap(struct pcap_chip *, int);
 int pcap_adc_async(struct pcap_chip *, u8, u32, u8[], void *, void *);
 int pcap_adc_sync(struct pcap_chip *, u8, u32, u8[], u16[]);
 
-- 
cgit v1.2.3


From ecd78cbdb989fd593bf4fd69cdb572200e70a553 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Tue, 23 Jun 2009 12:33:10 -0300
Subject: mfd: add set_ts_bits for pcap

Some TS controller bits are on the same register as the ADC control, save
TS specific bits and export a set_ts_bits function so the TS driver can set
it with the adc_mutex lock held.

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ezx-pcap.c       | 22 ++++++++++++++++++----
 include/linux/mfd/ezx-pcap.h |  1 +
 2 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index de7e63706abb..c5122024f05a 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -195,6 +195,19 @@ static void pcap_irq_handler(unsigned int irq, struct irq_desc *desc)
 }
 
 /* ADC */
+void pcap_set_ts_bits(struct pcap_chip *pcap, u32 bits)
+{
+	u32 tmp;
+
+	mutex_lock(&pcap->adc_mutex);
+	ezx_pcap_read(pcap, PCAP_REG_ADC, &tmp);
+	tmp &= ~(PCAP_ADC_TS_M_MASK | PCAP_ADC_TS_REF_LOWPWR);
+	tmp |= bits & (PCAP_ADC_TS_M_MASK | PCAP_ADC_TS_REF_LOWPWR);
+	ezx_pcap_write(pcap, PCAP_REG_ADC, tmp);
+	mutex_unlock(&pcap->adc_mutex);
+}
+EXPORT_SYMBOL_GPL(pcap_set_ts_bits);
+
 static void pcap_disable_adc(struct pcap_chip *pcap)
 {
 	u32 tmp;
@@ -217,15 +230,16 @@ static void pcap_adc_trigger(struct pcap_chip *pcap)
 		mutex_unlock(&pcap->adc_mutex);
 		return;
 	}
-	mutex_unlock(&pcap->adc_mutex);
-
-	/* start conversion on requested bank */
-	tmp = pcap->adc_queue[head]->flags | PCAP_ADC_ADEN;
+	/* start conversion on requested bank, save TS_M bits */
+	ezx_pcap_read(pcap, PCAP_REG_ADC, &tmp);
+	tmp &= (PCAP_ADC_TS_M_MASK | PCAP_ADC_TS_REF_LOWPWR);
+	tmp |= pcap->adc_queue[head]->flags | PCAP_ADC_ADEN;
 
 	if (pcap->adc_queue[head]->bank == PCAP_ADC_BANK_1)
 		tmp |= PCAP_ADC_AD_SEL1;
 
 	ezx_pcap_write(pcap, PCAP_REG_ADC, tmp);
+	mutex_unlock(&pcap->adc_mutex);
 	ezx_pcap_write(pcap, PCAP_REG_ADR, PCAP_ADR_ASC);
 }
 
diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h
index 6296b4935a1e..b15caacf0720 100644
--- a/include/linux/mfd/ezx-pcap.h
+++ b/include/linux/mfd/ezx-pcap.h
@@ -29,6 +29,7 @@ int pcap_to_irq(struct pcap_chip *, int);
 int irq_to_pcap(struct pcap_chip *, int);
 int pcap_adc_async(struct pcap_chip *, u8, u32, u8[], void *, void *);
 int pcap_adc_sync(struct pcap_chip *, u8, u32, u8[], u16[]);
+void pcap_set_ts_bits(struct pcap_chip *, u32);
 
 #define PCAP_SECOND_PORT	1
 #define PCAP_CS_AH		2
-- 
cgit v1.2.3


From e9a22635b0d794d0cb242ffb0249f7b2a410bca2 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Sat, 27 Jun 2009 00:17:20 -0300
Subject: mfd: add ezx_pcap_setbits

Provides an atomic set_bits functions, as needed by the pcap-regulator
driver.

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ezx-pcap.c       | 23 +++++++++++++++++++++++
 include/linux/mfd/ezx-pcap.h |  1 +
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index 732664f238fe..86d394894d81 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -107,6 +107,29 @@ int ezx_pcap_read(struct pcap_chip *pcap, u8 reg_num, u32 *value)
 }
 EXPORT_SYMBOL_GPL(ezx_pcap_read);
 
+int ezx_pcap_set_bits(struct pcap_chip *pcap, u8 reg_num, u32 mask, u32 val)
+{
+	int ret;
+	u32 tmp = PCAP_REGISTER_READ_OP_BIT |
+		(reg_num << PCAP_REGISTER_ADDRESS_SHIFT);
+
+	mutex_lock(&pcap->io_mutex);
+	ret = ezx_pcap_putget(pcap, &tmp);
+	if (ret)
+		goto out_unlock;
+
+	tmp &= (PCAP_REGISTER_VALUE_MASK & ~mask);
+	tmp |= (val & mask) | PCAP_REGISTER_WRITE_OP_BIT |
+		(reg_num << PCAP_REGISTER_ADDRESS_SHIFT);
+
+	ret = ezx_pcap_putget(pcap, &tmp);
+out_unlock:
+	mutex_unlock(&pcap->io_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ezx_pcap_set_bits);
+
 /* IRQ */
 int irq_to_pcap(struct pcap_chip *pcap, int irq)
 {
diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h
index b15caacf0720..dec82b0b05f9 100644
--- a/include/linux/mfd/ezx-pcap.h
+++ b/include/linux/mfd/ezx-pcap.h
@@ -25,6 +25,7 @@ struct pcap_chip;
 
 int ezx_pcap_write(struct pcap_chip *, u8, u32);
 int ezx_pcap_read(struct pcap_chip *, u8, u32 *);
+int ezx_pcap_set_bits(struct pcap_chip *, u8, u32, u32);
 int pcap_to_irq(struct pcap_chip *, int);
 int irq_to_pcap(struct pcap_chip *, int);
 int pcap_adc_async(struct pcap_chip *, u8, u32, u8[], void *, void *);
-- 
cgit v1.2.3


From fb6c023a2b845df1ec383b74644ac35a4bbb76b6 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 20 Jul 2009 12:43:45 +0100
Subject: hwmon: Add WM835x PMIC hardware monitoring driver

This driver provides reporting of the status supply voltage rails
of the WM835x series of PMICs via the hwmon API.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 Documentation/hwmon/wm8350      |  26 +++++++
 drivers/hwmon/Kconfig           |  10 +++
 drivers/hwmon/Makefile          |   1 +
 drivers/hwmon/wm8350-hwmon.c    | 151 ++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/wm8350-core.c       |   3 +
 include/linux/mfd/wm8350/core.h |   6 ++
 6 files changed, 197 insertions(+)
 create mode 100644 Documentation/hwmon/wm8350
 create mode 100644 drivers/hwmon/wm8350-hwmon.c

(limited to 'include')

diff --git a/Documentation/hwmon/wm8350 b/Documentation/hwmon/wm8350
new file mode 100644
index 000000000000..98f923bd2e92
--- /dev/null
+++ b/Documentation/hwmon/wm8350
@@ -0,0 +1,26 @@
+Kernel driver wm8350-hwmon
+==========================
+
+Supported chips:
+  * Wolfson Microelectronics WM835x PMICs
+    Prefix: 'wm8350'
+    Datasheet:
+	http://www.wolfsonmicro.com/products/WM8350
+	http://www.wolfsonmicro.com/products/WM8351
+	http://www.wolfsonmicro.com/products/WM8352
+
+Authors: Mark Brown <broonie@opensource.wolfsonmicro.com>
+
+Description
+-----------
+
+The WM835x series of PMICs include an AUXADC which can be used to
+monitor a range of system operating parameters, including the voltages
+of the major supplies within the system.  Currently the driver provides
+simple access to these major supplies.
+
+Voltage Monitoring
+------------------
+
+Voltages are sampled by a 12 bit ADC.  For the internal supplies the ADC
+is referenced to the system VRTC.
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 2e25b7a827d3..120085ce651a 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -937,6 +937,16 @@ config SENSORS_W83627EHF
 	  This driver can also be built as a module.  If so, the module
 	  will be called w83627ehf.
 
+config SENSORS_WM8350
+	tristate "Wolfson Microelectronics WM835x"
+	depends on MFD_WM8350
+	help
+	  If you say yes here you get support for the hardware
+	  monitoring features of the WM835x series of PMICs.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called wm8350-hwmon.
+
 config SENSORS_ULTRA45
 	tristate "Sun Ultra45 PIC16F747"
 	depends on SPARC64
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 7f239a247c33..ed5f1781b8c4 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -90,6 +90,7 @@ obj-$(CONFIG_SENSORS_VT8231)	+= vt8231.o
 obj-$(CONFIG_SENSORS_W83627EHF)	+= w83627ehf.o
 obj-$(CONFIG_SENSORS_W83L785TS)	+= w83l785ts.o
 obj-$(CONFIG_SENSORS_W83L786NG)	+= w83l786ng.o
+obj-$(CONFIG_SENSORS_WM8350)	+= wm8350-hwmon.o
 
 ifeq ($(CONFIG_HWMON_DEBUG_CHIP),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/drivers/hwmon/wm8350-hwmon.c b/drivers/hwmon/wm8350-hwmon.c
new file mode 100644
index 000000000000..13290595ca86
--- /dev/null
+++ b/drivers/hwmon/wm8350-hwmon.c
@@ -0,0 +1,151 @@
+/*
+ * drivers/hwmon/wm8350-hwmon.c - Wolfson Microelectronics WM8350 PMIC
+ *                                  hardware monitoring features.
+ *
+ * Copyright (C) 2009 Wolfson Microelectronics plc
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License v2 as published by the
+ * Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+
+#include <linux/mfd/wm8350/core.h>
+#include <linux/mfd/wm8350/comparator.h>
+
+static ssize_t show_name(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "wm8350\n");
+}
+
+static const char *input_names[] = {
+	[WM8350_AUXADC_USB]  = "USB",
+	[WM8350_AUXADC_LINE] = "Line",
+	[WM8350_AUXADC_BATT] = "Battery",
+};
+
+
+static ssize_t show_voltage(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct wm8350 *wm8350 = dev_get_drvdata(dev);
+	int channel = to_sensor_dev_attr(attr)->index;
+	int val;
+
+	val = wm8350_read_auxadc(wm8350, channel, 0, 0) * WM8350_AUX_COEFF;
+	val = DIV_ROUND_CLOSEST(val, 1000);
+
+	return sprintf(buf, "%d\n", val);
+}
+
+static ssize_t show_label(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	int channel = to_sensor_dev_attr(attr)->index;
+
+	return sprintf(buf, "%s\n", input_names[channel]);
+}
+
+#define WM8350_NAMED_VOLTAGE(id, name) \
+	static SENSOR_DEVICE_ATTR(in##id##_input, S_IRUGO, show_voltage,\
+				  NULL, name);		\
+	static SENSOR_DEVICE_ATTR(in##id##_label, S_IRUGO, show_label,	\
+				  NULL, name)
+
+static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
+
+WM8350_NAMED_VOLTAGE(0, WM8350_AUXADC_USB);
+WM8350_NAMED_VOLTAGE(1, WM8350_AUXADC_BATT);
+WM8350_NAMED_VOLTAGE(2, WM8350_AUXADC_LINE);
+
+static struct attribute *wm8350_attributes[] = {
+	&dev_attr_name.attr,
+
+	&sensor_dev_attr_in0_input.dev_attr.attr,
+	&sensor_dev_attr_in0_label.dev_attr.attr,
+	&sensor_dev_attr_in1_input.dev_attr.attr,
+	&sensor_dev_attr_in1_label.dev_attr.attr,
+	&sensor_dev_attr_in2_input.dev_attr.attr,
+	&sensor_dev_attr_in2_label.dev_attr.attr,
+
+	NULL,
+};
+
+static const struct attribute_group wm8350_attr_group = {
+	.attrs	= wm8350_attributes,
+};
+
+static int __devinit wm8350_hwmon_probe(struct platform_device *pdev)
+{
+	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = sysfs_create_group(&pdev->dev.kobj, &wm8350_attr_group);
+	if (ret)
+		goto err;
+
+	wm8350->hwmon.classdev = hwmon_device_register(&pdev->dev);
+	if (IS_ERR(wm8350->hwmon.classdev)) {
+		ret = PTR_ERR(wm8350->hwmon.classdev);
+		goto err_group;
+	}
+
+	return 0;
+
+err_group:
+	sysfs_remove_group(&pdev->dev.kobj, &wm8350_attr_group);
+err:
+	return ret;
+}
+
+static int __devexit wm8350_hwmon_remove(struct platform_device *pdev)
+{
+	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
+
+	hwmon_device_unregister(wm8350->hwmon.classdev);
+	sysfs_remove_group(&pdev->dev.kobj, &wm8350_attr_group);
+
+	return 0;
+}
+
+static struct platform_driver wm8350_hwmon_driver = {
+	.probe = wm8350_hwmon_probe,
+	.remove = __devexit_p(wm8350_hwmon_remove),
+	.driver = {
+		.name = "wm8350-hwmon",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init wm8350_hwmon_init(void)
+{
+	return platform_driver_register(&wm8350_hwmon_driver);
+}
+module_init(wm8350_hwmon_init);
+
+static void __exit wm8350_hwmon_exit(void)
+{
+	platform_driver_unregister(&wm8350_hwmon_driver);
+}
+module_exit(wm8350_hwmon_exit);
+
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_DESCRIPTION("WM8350 Hardware Monitoring");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm8350-hwmon");
diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index fe24079387c5..9d662a576a41 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1472,6 +1472,8 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 				   &(wm8350->codec.pdev));
 	wm8350_client_dev_register(wm8350, "wm8350-gpio",
 				   &(wm8350->gpio.pdev));
+	wm8350_client_dev_register(wm8350, "wm8350-hwmon",
+				   &(wm8350->hwmon.pdev));
 	wm8350_client_dev_register(wm8350, "wm8350-power",
 				   &(wm8350->power.pdev));
 	wm8350_client_dev_register(wm8350, "wm8350-rtc", &(wm8350->rtc.pdev));
@@ -1498,6 +1500,7 @@ void wm8350_device_exit(struct wm8350 *wm8350)
 	platform_device_unregister(wm8350->wdt.pdev);
 	platform_device_unregister(wm8350->rtc.pdev);
 	platform_device_unregister(wm8350->power.pdev);
+	platform_device_unregister(wm8350->hwmon.pdev);
 	platform_device_unregister(wm8350->gpio.pdev);
 	platform_device_unregister(wm8350->codec.pdev);
 
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 42cca672f340..969b0b55615c 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -605,6 +605,11 @@ struct wm8350_irq {
 	void *data;
 };
 
+struct wm8350_hwmon {
+	struct platform_device *pdev;
+	struct device *classdev;
+};
+
 struct wm8350 {
 	struct device *dev;
 
@@ -629,6 +634,7 @@ struct wm8350 {
 	/* Client devices */
 	struct wm8350_codec codec;
 	struct wm8350_gpio gpio;
+	struct wm8350_hwmon hwmon;
 	struct wm8350_pmic pmic;
 	struct wm8350_power power;
 	struct wm8350_rtc rtc;
-- 
cgit v1.2.3


From ed52e62ebec9e703eb0b69704feaf1b6e847d882 Mon Sep 17 00:00:00 2001
From: Paul Fertser <fercerpav@gmail.com>
Date: Tue, 28 Jul 2009 00:41:15 +0400
Subject: mfd: use a dedicated workqueue for pcf50633 irq processing

Using the default kernel "events" workqueue causes problems with
synchronous adc readings if initiated from some task on the same
workqueue.

I had a deadlock trying to use pcf50633_adc_sync_read from a
power_supply class driver because the reading was initiated from the
workqueue and it waited for the irq processing to complete (to get the
result) and that was put on the same workqueue.

Signed-off-by: Paul Fertser <fercerpav@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/pcf50633-core.c       | 5 ++++-
 include/linux/mfd/pcf50633/core.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c
index 8d3c38bf9714..d26d7747175e 100644
--- a/drivers/mfd/pcf50633-core.c
+++ b/drivers/mfd/pcf50633-core.c
@@ -444,7 +444,7 @@ static irqreturn_t pcf50633_irq(int irq, void *data)
 
 	get_device(pcf->dev);
 	disable_irq_nosync(pcf->irq);
-	schedule_work(&pcf->irq_work);
+	queue_work(pcf->work_queue, &pcf->irq_work);
 
 	return IRQ_HANDLED;
 }
@@ -575,6 +575,7 @@ static int __devinit pcf50633_probe(struct i2c_client *client,
 	pcf->dev = &client->dev;
 	pcf->i2c_client = client;
 	pcf->irq = client->irq;
+	pcf->work_queue = create_singlethread_workqueue("pcf50633");
 
 	INIT_WORK(&pcf->irq_work, pcf50633_irq_worker);
 
@@ -651,6 +652,7 @@ static int __devinit pcf50633_probe(struct i2c_client *client,
 	return 0;
 
 err:
+	destroy_workqueue(pcf->work_queue);
 	kfree(pcf);
 	return ret;
 }
@@ -661,6 +663,7 @@ static int __devexit pcf50633_remove(struct i2c_client *client)
 	int i;
 
 	free_irq(pcf->irq, pcf);
+	destroy_workqueue(pcf->work_queue);
 
 	platform_device_unregister(pcf->input_pdev);
 	platform_device_unregister(pcf->rtc_pdev);
diff --git a/include/linux/mfd/pcf50633/core.h b/include/linux/mfd/pcf50633/core.h
index c8f51c3c0a72..9aba7b779fbc 100644
--- a/include/linux/mfd/pcf50633/core.h
+++ b/include/linux/mfd/pcf50633/core.h
@@ -136,6 +136,7 @@ struct pcf50633 {
 	int irq;
 	struct pcf50633_irq irq_handler[PCF50633_NUM_IRQ];
 	struct work_struct irq_work;
+	struct workqueue_struct *work_queue;
 	struct mutex lock;
 
 	u8 mask_regs[5];
-- 
cgit v1.2.3


From 8d360d8c03e1e8514bbaf606b1cd3b818dfc445d Mon Sep 17 00:00:00 2001
From: Paul Fertser <fercerpav@gmail.com>
Date: Tue, 28 Jul 2009 01:09:04 +0400
Subject: mfd: fix wrong define for 10bit pcf50633 ADC mode

The 10 bits definition was the 8 bits one.

Signed-off-by: Paul Fertser <fercerpav@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/pcf50633/adc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mfd/pcf50633/adc.h b/include/linux/mfd/pcf50633/adc.h
index 56669b4183ad..b35e62801ffa 100644
--- a/include/linux/mfd/pcf50633/adc.h
+++ b/include/linux/mfd/pcf50633/adc.h
@@ -25,7 +25,8 @@
 #define PCF50633_REG_ADCS3		0x57
 
 #define PCF50633_ADCC1_ADCSTART		0x01
-#define PCF50633_ADCC1_RES_10BIT	0x02
+#define PCF50633_ADCC1_RES_8BIT		0x02
+#define PCF50633_ADCC1_RES_10BIT	0x00
 #define PCF50633_ADCC1_AVERAGE_NO	0x00
 #define PCF50633_ADCC1_AVERAGE_4	0x04
 #define PCF50633_ADCC1_AVERAGE_8	0x08
-- 
cgit v1.2.3


From 327bc3a3efa408fb285948bfef112a6c58dfb375 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Fri, 31 Jul 2009 12:38:02 +0200
Subject: mfd: Remove VIB defines from pcap header file

Vibrator will be accessed via the pcap-regulator driver, no need to expose its
bits in the header file.

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/ezx-pcap.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h
index dec82b0b05f9..e5124ceea769 100644
--- a/include/linux/mfd/ezx-pcap.h
+++ b/include/linux/mfd/ezx-pcap.h
@@ -227,7 +227,6 @@ void pcap_set_ts_bits(struct pcap_chip *, u32);
 #define PCAP_LED1		1
 #define PCAP_BL0		2
 #define PCAP_BL1		3
-#define PCAP_VIB		4
 #define PCAP_LED_3MA		0
 #define PCAP_LED_4MA		1
 #define PCAP_LED_5MA		2
@@ -246,9 +245,6 @@ void pcap_set_ts_bits(struct pcap_chip *, u32);
 #define PCAP_LED0_C_SHIFT	15
 #define PCAP_LED1_C_SHIFT	17
 #define PCAP_BL1_SHIFT		20
-#define PCAP_VIB_MASK		0x3
-#define PCAP_VIB_SHIFT		20
-#define PCAP_VIB_EN		(1 << 19)
 
 /* RTC */
 #define PCAP_RTC_DAY_MASK	0x3fff
-- 
cgit v1.2.3


From 3bed6e415fc2cbf8d706848a62a48aebe84435e5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:51 +0100
Subject: mfd: Allow multiple MFD cells with the same name

Provide basic support for MFDs having multiple cells of a given
type with different IDs by adding an id to the mfd_cell structure
and then adding that to the id passed in to mfd_add_devices().

As it stands this approach requires that MFDs using this feature
deal with ensuring that there aren't any ID collisions resulting
from multiple MFDs of the same type being instantiated. This needs
to happen with the existing code too, but with this approach there
is a knock on effect on the IDs for non-duplicated devices.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/mfd-core.c   | 2 +-
 include/linux/mfd/core.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 54ddf3772e0c..ae15e495e20e 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -25,7 +25,7 @@ static int mfd_add_device(struct device *parent, int id,
 	int ret = -ENOMEM;
 	int r;
 
-	pdev = platform_device_alloc(cell->name, id);
+	pdev = platform_device_alloc(cell->name, id + cell->id);
 	if (!pdev)
 		goto fail_alloc;
 
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index 49ef857cdb2d..11d740b8831d 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -23,6 +23,7 @@
  */
 struct mfd_cell {
 	const char		*name;
+	int			id;
 
 	int			(*enable)(struct platform_device *dev);
 	int			(*disable)(struct platform_device *dev);
-- 
cgit v1.2.3


From d2bedfe7a8b2f34beee2cad9cae74a088ee8ed07 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:52 +0100
Subject: mfd: Initial core support for WM831x series devices

The WM831x series of devices are register compatible processor power
management subsystems, providing regulator and power path management
facilities along with other services like watchdog, RTC and touch
panel controllers.

This patch adds very basic support, providing basic single register
I2C access, handling of the security key and registration of the
devices.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c        | 1357 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/core.h  |  247 +++++++
 include/linux/mfd/wm831x/pdata.h |  107 +++
 3 files changed, 1711 insertions(+)
 create mode 100644 drivers/mfd/wm831x-core.c
 create mode 100644 include/linux/mfd/wm831x/core.h
 create mode 100644 include/linux/mfd/wm831x/pdata.h

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
new file mode 100644
index 000000000000..cc1040c9d46c
--- /dev/null
+++ b/drivers/mfd/wm831x-core.c
@@ -0,0 +1,1357 @@
+/*
+ * wm831x-core.c  --  Device access for Wolfson WM831x PMICs
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/mfd/core.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/pdata.h>
+
+enum wm831x_parent {
+	WM8310 = 0,
+	WM8311 = 1,
+	WM8312 = 2,
+};
+
+static int wm831x_reg_locked(struct wm831x *wm831x, unsigned short reg)
+{
+	if (!wm831x->locked)
+		return 0;
+
+	switch (reg) {
+	case WM831X_WATCHDOG:
+	case WM831X_DC4_CONTROL:
+	case WM831X_ON_PIN_CONTROL:
+	case WM831X_BACKUP_CHARGER_CONTROL:
+	case WM831X_CHARGER_CONTROL_1:
+	case WM831X_CHARGER_CONTROL_2:
+		return 1;
+
+	default:
+		return 0;
+	}
+}
+
+/**
+ * wm831x_reg_unlock: Unlock user keyed registers
+ *
+ * The WM831x has a user key preventing writes to particularly
+ * critical registers.  This function locks those registers,
+ * allowing writes to them.
+ */
+void wm831x_reg_lock(struct wm831x *wm831x)
+{
+	int ret;
+
+	ret = wm831x_reg_write(wm831x, WM831X_SECURITY_KEY, 0);
+	if (ret == 0) {
+		dev_vdbg(wm831x->dev, "Registers locked\n");
+
+		mutex_lock(&wm831x->io_lock);
+		WARN_ON(wm831x->locked);
+		wm831x->locked = 1;
+		mutex_unlock(&wm831x->io_lock);
+	} else {
+		dev_err(wm831x->dev, "Failed to lock registers: %d\n", ret);
+	}
+
+}
+EXPORT_SYMBOL_GPL(wm831x_reg_lock);
+
+/**
+ * wm831x_reg_unlock: Unlock user keyed registers
+ *
+ * The WM831x has a user key preventing writes to particularly
+ * critical registers.  This function locks those registers,
+ * preventing spurious writes.
+ */
+int wm831x_reg_unlock(struct wm831x *wm831x)
+{
+	int ret;
+
+	/* 0x9716 is the value required to unlock the registers */
+	ret = wm831x_reg_write(wm831x, WM831X_SECURITY_KEY, 0x9716);
+	if (ret == 0) {
+		dev_vdbg(wm831x->dev, "Registers unlocked\n");
+
+		mutex_lock(&wm831x->io_lock);
+		WARN_ON(!wm831x->locked);
+		wm831x->locked = 0;
+		mutex_unlock(&wm831x->io_lock);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_reg_unlock);
+
+static int wm831x_read(struct wm831x *wm831x, unsigned short reg,
+		       int bytes, void *dest)
+{
+	int ret, i;
+	u16 *buf = dest;
+
+	BUG_ON(bytes % 2);
+	BUG_ON(bytes <= 0);
+
+	ret = wm831x->read_dev(wm831x, reg, bytes, dest);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < bytes / 2; i++) {
+		buf[i] = be16_to_cpu(buf[i]);
+
+		dev_vdbg(wm831x->dev, "Read %04x from R%d(0x%x)\n",
+			 buf[i], reg + i, reg + i);
+	}
+
+	return 0;
+}
+
+/**
+ * wm831x_reg_read: Read a single WM831x register.
+ *
+ * @wm831x: Device to read from.
+ * @reg: Register to read.
+ */
+int wm831x_reg_read(struct wm831x *wm831x, unsigned short reg)
+{
+	unsigned short val;
+	int ret;
+
+	mutex_lock(&wm831x->io_lock);
+
+	ret = wm831x_read(wm831x, reg, 2, &val);
+
+	mutex_unlock(&wm831x->io_lock);
+
+	if (ret < 0)
+		return ret;
+	else
+		return val;
+}
+EXPORT_SYMBOL_GPL(wm831x_reg_read);
+
+/**
+ * wm831x_bulk_read: Read multiple WM831x registers
+ *
+ * @wm831x: Device to read from
+ * @reg: First register
+ * @count: Number of registers
+ * @buf: Buffer to fill.
+ */
+int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg,
+		     int count, u16 *buf)
+{
+	int ret;
+
+	mutex_lock(&wm831x->io_lock);
+
+	ret = wm831x_read(wm831x, reg, count * 2, buf);
+
+	mutex_unlock(&wm831x->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_bulk_read);
+
+static int wm831x_write(struct wm831x *wm831x, unsigned short reg,
+			int bytes, void *src)
+{
+	u16 *buf = src;
+	int i;
+
+	BUG_ON(bytes % 2);
+	BUG_ON(bytes <= 0);
+
+	for (i = 0; i < bytes / 2; i++) {
+		if (wm831x_reg_locked(wm831x, reg))
+			return -EPERM;
+
+		dev_vdbg(wm831x->dev, "Write %04x to R%d(0x%x)\n",
+			 buf[i], reg + i, reg + i);
+
+		buf[i] = cpu_to_be16(buf[i]);
+	}
+
+	return wm831x->write_dev(wm831x, reg, bytes, src);
+}
+
+/**
+ * wm831x_reg_write: Write a single WM831x register.
+ *
+ * @wm831x: Device to write to.
+ * @reg: Register to write to.
+ * @val: Value to write.
+ */
+int wm831x_reg_write(struct wm831x *wm831x, unsigned short reg,
+		     unsigned short val)
+{
+	int ret;
+
+	mutex_lock(&wm831x->io_lock);
+
+	ret = wm831x_write(wm831x, reg, 2, &val);
+
+	mutex_unlock(&wm831x->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_reg_write);
+
+/**
+ * wm831x_set_bits: Set the value of a bitfield in a WM831x register
+ *
+ * @wm831x: Device to write to.
+ * @reg: Register to write to.
+ * @mask: Mask of bits to set.
+ * @val: Value to set (unshifted)
+ */
+int wm831x_set_bits(struct wm831x *wm831x, unsigned short reg,
+		    unsigned short mask, unsigned short val)
+{
+	int ret;
+	u16 r;
+
+	mutex_lock(&wm831x->io_lock);
+
+	ret = wm831x_read(wm831x, reg, 2, &r);
+	if (ret < 0)
+		goto out;
+
+	r &= ~mask;
+	r |= val;
+
+	ret = wm831x_write(wm831x, reg, 2, &r);
+
+out:
+	mutex_unlock(&wm831x->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_set_bits);
+
+static struct resource wm831x_dcdc1_resources[] = {
+	{
+		.start = WM831X_DC1_CONTROL_1,
+		.end   = WM831X_DC1_DVS_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_DC1,
+		.end   = WM831X_IRQ_UV_DC1,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "HC",
+		.start = WM831X_IRQ_HC_DC1,
+		.end   = WM831X_IRQ_HC_DC1,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+
+static struct resource wm831x_dcdc2_resources[] = {
+	{
+		.start = WM831X_DC2_CONTROL_1,
+		.end   = WM831X_DC2_DVS_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_DC2,
+		.end   = WM831X_IRQ_UV_DC2,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "HC",
+		.start = WM831X_IRQ_HC_DC2,
+		.end   = WM831X_IRQ_HC_DC2,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_dcdc3_resources[] = {
+	{
+		.start = WM831X_DC3_CONTROL_1,
+		.end   = WM831X_DC3_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_DC3,
+		.end   = WM831X_IRQ_UV_DC3,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_dcdc4_resources[] = {
+	{
+		.start = WM831X_DC4_CONTROL,
+		.end   = WM831X_DC4_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_DC4,
+		.end   = WM831X_IRQ_UV_DC4,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_gpio_resources[] = {
+	{
+		.start = WM831X_IRQ_GPIO_1,
+		.end   = WM831X_IRQ_GPIO_16,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_isink1_resources[] = {
+	{
+		.start = WM831X_CURRENT_SINK_1,
+		.end   = WM831X_CURRENT_SINK_1,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.start = WM831X_IRQ_CS1,
+		.end   = WM831X_IRQ_CS1,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_isink2_resources[] = {
+	{
+		.start = WM831X_CURRENT_SINK_2,
+		.end   = WM831X_CURRENT_SINK_2,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.start = WM831X_IRQ_CS2,
+		.end   = WM831X_IRQ_CS2,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo1_resources[] = {
+	{
+		.start = WM831X_LDO1_CONTROL,
+		.end   = WM831X_LDO1_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO1,
+		.end   = WM831X_IRQ_UV_LDO1,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo2_resources[] = {
+	{
+		.start = WM831X_LDO2_CONTROL,
+		.end   = WM831X_LDO2_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO2,
+		.end   = WM831X_IRQ_UV_LDO2,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo3_resources[] = {
+	{
+		.start = WM831X_LDO3_CONTROL,
+		.end   = WM831X_LDO3_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO3,
+		.end   = WM831X_IRQ_UV_LDO3,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo4_resources[] = {
+	{
+		.start = WM831X_LDO4_CONTROL,
+		.end   = WM831X_LDO4_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO4,
+		.end   = WM831X_IRQ_UV_LDO4,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo5_resources[] = {
+	{
+		.start = WM831X_LDO5_CONTROL,
+		.end   = WM831X_LDO5_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO5,
+		.end   = WM831X_IRQ_UV_LDO5,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo6_resources[] = {
+	{
+		.start = WM831X_LDO6_CONTROL,
+		.end   = WM831X_LDO6_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO6,
+		.end   = WM831X_IRQ_UV_LDO6,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo7_resources[] = {
+	{
+		.start = WM831X_LDO7_CONTROL,
+		.end   = WM831X_LDO7_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO7,
+		.end   = WM831X_IRQ_UV_LDO7,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo8_resources[] = {
+	{
+		.start = WM831X_LDO8_CONTROL,
+		.end   = WM831X_LDO8_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO8,
+		.end   = WM831X_IRQ_UV_LDO8,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo9_resources[] = {
+	{
+		.start = WM831X_LDO9_CONTROL,
+		.end   = WM831X_LDO9_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO9,
+		.end   = WM831X_IRQ_UV_LDO9,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo10_resources[] = {
+	{
+		.start = WM831X_LDO10_CONTROL,
+		.end   = WM831X_LDO10_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO10,
+		.end   = WM831X_IRQ_UV_LDO10,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo11_resources[] = {
+	{
+		.start = WM831X_LDO11_ON_CONTROL,
+		.end   = WM831X_LDO11_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+};
+
+static struct resource wm831x_on_resources[] = {
+	{
+		.start = WM831X_IRQ_ON,
+		.end   = WM831X_IRQ_ON,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+
+static struct resource wm831x_power_resources[] = {
+	{
+		.name = "SYSLO",
+		.start = WM831X_IRQ_PPM_SYSLO,
+		.end   = WM831X_IRQ_PPM_SYSLO,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "PWR SRC",
+		.start = WM831X_IRQ_PPM_PWR_SRC,
+		.end   = WM831X_IRQ_PPM_PWR_SRC,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "USB CURR",
+		.start = WM831X_IRQ_PPM_USB_CURR,
+		.end   = WM831X_IRQ_PPM_USB_CURR,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "BATT HOT",
+		.start = WM831X_IRQ_CHG_BATT_HOT,
+		.end   = WM831X_IRQ_CHG_BATT_HOT,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "BATT COLD",
+		.start = WM831X_IRQ_CHG_BATT_COLD,
+		.end   = WM831X_IRQ_CHG_BATT_COLD,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "BATT FAIL",
+		.start = WM831X_IRQ_CHG_BATT_FAIL,
+		.end   = WM831X_IRQ_CHG_BATT_FAIL,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "OV",
+		.start = WM831X_IRQ_CHG_OV,
+		.end   = WM831X_IRQ_CHG_OV,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "END",
+		.start = WM831X_IRQ_CHG_END,
+		.end   = WM831X_IRQ_CHG_END,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "TO",
+		.start = WM831X_IRQ_CHG_TO,
+		.end   = WM831X_IRQ_CHG_TO,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "MODE",
+		.start = WM831X_IRQ_CHG_MODE,
+		.end   = WM831X_IRQ_CHG_MODE,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "START",
+		.start = WM831X_IRQ_CHG_START,
+		.end   = WM831X_IRQ_CHG_START,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_rtc_resources[] = {
+	{
+		.name = "PER",
+		.start = WM831X_IRQ_RTC_PER,
+		.end   = WM831X_IRQ_RTC_PER,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "ALM",
+		.start = WM831X_IRQ_RTC_ALM,
+		.end   = WM831X_IRQ_RTC_ALM,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_status1_resources[] = {
+	{
+		.start = WM831X_STATUS_LED_1,
+		.end   = WM831X_STATUS_LED_1,
+		.flags = IORESOURCE_IO,
+	},
+};
+
+static struct resource wm831x_status2_resources[] = {
+	{
+		.start = WM831X_STATUS_LED_2,
+		.end   = WM831X_STATUS_LED_2,
+		.flags = IORESOURCE_IO,
+	},
+};
+
+static struct resource wm831x_touch_resources[] = {
+	{
+		.name = "TCHPD",
+		.start = WM831X_IRQ_TCHPD,
+		.end   = WM831X_IRQ_TCHPD,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "TCHDATA",
+		.start = WM831X_IRQ_TCHDATA,
+		.end   = WM831X_IRQ_TCHDATA,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_wdt_resources[] = {
+	{
+		.start = WM831X_IRQ_WDOG_TO,
+		.end   = WM831X_IRQ_WDOG_TO,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell wm8310_devs[] = {
+	{
+		.name = "wm831x-buckv",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc1_resources),
+		.resources = wm831x_dcdc1_resources,
+	},
+	{
+		.name = "wm831x-buckv",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc2_resources),
+		.resources = wm831x_dcdc2_resources,
+	},
+	{
+		.name = "wm831x-buckp",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc3_resources),
+		.resources = wm831x_dcdc3_resources,
+	},
+	{
+		.name = "wm831x-boostp",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc4_resources),
+		.resources = wm831x_dcdc4_resources,
+	},
+	{
+		.name = "wm831x-epe",
+		.id = 1,
+	},
+	{
+		.name = "wm831x-epe",
+		.id = 2,
+	},
+	{
+		.name = "wm831x-gpio",
+		.num_resources = ARRAY_SIZE(wm831x_gpio_resources),
+		.resources = wm831x_gpio_resources,
+	},
+	{
+		.name = "wm831x-hwmon",
+	},
+	{
+		.name = "wm831x-isink",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_isink1_resources),
+		.resources = wm831x_isink1_resources,
+	},
+	{
+		.name = "wm831x-isink",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_isink2_resources),
+		.resources = wm831x_isink2_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_ldo1_resources),
+		.resources = wm831x_ldo1_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_ldo2_resources),
+		.resources = wm831x_ldo2_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_ldo3_resources),
+		.resources = wm831x_ldo3_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm831x_ldo4_resources),
+		.resources = wm831x_ldo4_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 5,
+		.num_resources = ARRAY_SIZE(wm831x_ldo5_resources),
+		.resources = wm831x_ldo5_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 6,
+		.num_resources = ARRAY_SIZE(wm831x_ldo6_resources),
+		.resources = wm831x_ldo6_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 7,
+		.num_resources = ARRAY_SIZE(wm831x_ldo7_resources),
+		.resources = wm831x_ldo7_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 8,
+		.num_resources = ARRAY_SIZE(wm831x_ldo8_resources),
+		.resources = wm831x_ldo8_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 9,
+		.num_resources = ARRAY_SIZE(wm831x_ldo9_resources),
+		.resources = wm831x_ldo9_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 10,
+		.num_resources = ARRAY_SIZE(wm831x_ldo10_resources),
+		.resources = wm831x_ldo10_resources,
+	},
+	{
+		.name = "wm831x-alive-ldo",
+		.id = 11,
+		.num_resources = ARRAY_SIZE(wm831x_ldo11_resources),
+		.resources = wm831x_ldo11_resources,
+	},
+	{
+		.name = "wm831x-on",
+		.num_resources = ARRAY_SIZE(wm831x_on_resources),
+		.resources = wm831x_on_resources,
+	},
+	{
+		.name = "wm831x-power",
+		.num_resources = ARRAY_SIZE(wm831x_power_resources),
+		.resources = wm831x_power_resources,
+	},
+	{
+		.name = "wm831x-rtc",
+		.num_resources = ARRAY_SIZE(wm831x_rtc_resources),
+		.resources = wm831x_rtc_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_status1_resources),
+		.resources = wm831x_status1_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_status2_resources),
+		.resources = wm831x_status2_resources,
+	},
+	{
+		.name = "wm831x-watchdog",
+		.num_resources = ARRAY_SIZE(wm831x_wdt_resources),
+		.resources = wm831x_wdt_resources,
+	},
+};
+
+static struct mfd_cell wm8311_devs[] = {
+	{
+		.name = "wm831x-buckv",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc1_resources),
+		.resources = wm831x_dcdc1_resources,
+	},
+	{
+		.name = "wm831x-buckv",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc2_resources),
+		.resources = wm831x_dcdc2_resources,
+	},
+	{
+		.name = "wm831x-buckp",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc3_resources),
+		.resources = wm831x_dcdc3_resources,
+	},
+	{
+		.name = "wm831x-boostp",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc4_resources),
+		.resources = wm831x_dcdc4_resources,
+	},
+	{
+		.name = "wm831x-epe",
+		.id = 1,
+	},
+	{
+		.name = "wm831x-epe",
+		.id = 2,
+	},
+	{
+		.name = "wm831x-gpio",
+		.num_resources = ARRAY_SIZE(wm831x_gpio_resources),
+		.resources = wm831x_gpio_resources,
+	},
+	{
+		.name = "wm831x-hwmon",
+	},
+	{
+		.name = "wm831x-isink",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_isink1_resources),
+		.resources = wm831x_isink1_resources,
+	},
+	{
+		.name = "wm831x-isink",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_isink2_resources),
+		.resources = wm831x_isink2_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_ldo1_resources),
+		.resources = wm831x_ldo1_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_ldo2_resources),
+		.resources = wm831x_ldo2_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_ldo3_resources),
+		.resources = wm831x_ldo3_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm831x_ldo4_resources),
+		.resources = wm831x_ldo4_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 5,
+		.num_resources = ARRAY_SIZE(wm831x_ldo5_resources),
+		.resources = wm831x_ldo5_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 7,
+		.num_resources = ARRAY_SIZE(wm831x_ldo7_resources),
+		.resources = wm831x_ldo7_resources,
+	},
+	{
+		.name = "wm831x-alive-ldo",
+		.id = 11,
+		.num_resources = ARRAY_SIZE(wm831x_ldo11_resources),
+		.resources = wm831x_ldo11_resources,
+	},
+	{
+		.name = "wm831x-on",
+		.num_resources = ARRAY_SIZE(wm831x_on_resources),
+		.resources = wm831x_on_resources,
+	},
+	{
+		.name = "wm831x-power",
+		.num_resources = ARRAY_SIZE(wm831x_power_resources),
+		.resources = wm831x_power_resources,
+	},
+	{
+		.name = "wm831x-rtc",
+		.num_resources = ARRAY_SIZE(wm831x_rtc_resources),
+		.resources = wm831x_rtc_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_status1_resources),
+		.resources = wm831x_status1_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_status2_resources),
+		.resources = wm831x_status2_resources,
+	},
+	{
+		.name = "wm831x-touch",
+		.num_resources = ARRAY_SIZE(wm831x_touch_resources),
+		.resources = wm831x_touch_resources,
+	},
+	{
+		.name = "wm831x-watchdog",
+		.num_resources = ARRAY_SIZE(wm831x_wdt_resources),
+		.resources = wm831x_wdt_resources,
+	},
+};
+
+static struct mfd_cell wm8312_devs[] = {
+	{
+		.name = "wm831x-buckv",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc1_resources),
+		.resources = wm831x_dcdc1_resources,
+	},
+	{
+		.name = "wm831x-buckv",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc2_resources),
+		.resources = wm831x_dcdc2_resources,
+	},
+	{
+		.name = "wm831x-buckp",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc3_resources),
+		.resources = wm831x_dcdc3_resources,
+	},
+	{
+		.name = "wm831x-boostp",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc4_resources),
+		.resources = wm831x_dcdc4_resources,
+	},
+	{
+		.name = "wm831x-epe",
+		.id = 1,
+	},
+	{
+		.name = "wm831x-epe",
+		.id = 2,
+	},
+	{
+		.name = "wm831x-gpio",
+		.num_resources = ARRAY_SIZE(wm831x_gpio_resources),
+		.resources = wm831x_gpio_resources,
+	},
+	{
+		.name = "wm831x-hwmon",
+	},
+	{
+		.name = "wm831x-isink",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_isink1_resources),
+		.resources = wm831x_isink1_resources,
+	},
+	{
+		.name = "wm831x-isink",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_isink2_resources),
+		.resources = wm831x_isink2_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_ldo1_resources),
+		.resources = wm831x_ldo1_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_ldo2_resources),
+		.resources = wm831x_ldo2_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_ldo3_resources),
+		.resources = wm831x_ldo3_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm831x_ldo4_resources),
+		.resources = wm831x_ldo4_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 5,
+		.num_resources = ARRAY_SIZE(wm831x_ldo5_resources),
+		.resources = wm831x_ldo5_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 6,
+		.num_resources = ARRAY_SIZE(wm831x_ldo6_resources),
+		.resources = wm831x_ldo6_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 7,
+		.num_resources = ARRAY_SIZE(wm831x_ldo7_resources),
+		.resources = wm831x_ldo7_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 8,
+		.num_resources = ARRAY_SIZE(wm831x_ldo8_resources),
+		.resources = wm831x_ldo8_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 9,
+		.num_resources = ARRAY_SIZE(wm831x_ldo9_resources),
+		.resources = wm831x_ldo9_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 10,
+		.num_resources = ARRAY_SIZE(wm831x_ldo10_resources),
+		.resources = wm831x_ldo10_resources,
+	},
+	{
+		.name = "wm831x-alive-ldo",
+		.id = 11,
+		.num_resources = ARRAY_SIZE(wm831x_ldo11_resources),
+		.resources = wm831x_ldo11_resources,
+	},
+	{
+		.name = "wm831x-on",
+		.num_resources = ARRAY_SIZE(wm831x_on_resources),
+		.resources = wm831x_on_resources,
+	},
+	{
+		.name = "wm831x-power",
+		.num_resources = ARRAY_SIZE(wm831x_power_resources),
+		.resources = wm831x_power_resources,
+	},
+	{
+		.name = "wm831x-rtc",
+		.num_resources = ARRAY_SIZE(wm831x_rtc_resources),
+		.resources = wm831x_rtc_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_status1_resources),
+		.resources = wm831x_status1_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_status2_resources),
+		.resources = wm831x_status2_resources,
+	},
+	{
+		.name = "wm831x-touch",
+		.num_resources = ARRAY_SIZE(wm831x_touch_resources),
+		.resources = wm831x_touch_resources,
+	},
+	{
+		.name = "wm831x-watchdog",
+		.num_resources = ARRAY_SIZE(wm831x_wdt_resources),
+		.resources = wm831x_wdt_resources,
+	},
+};
+
+/*
+ * Instantiate the generic non-control parts of the device.
+ */
+static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
+{
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	int rev;
+	enum wm831x_parent parent;
+	int ret;
+
+	mutex_init(&wm831x->io_lock);
+	mutex_init(&wm831x->key_lock);
+	dev_set_drvdata(wm831x->dev, wm831x);
+
+	ret = wm831x_reg_read(wm831x, WM831X_PARENT_ID);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read parent ID: %d\n", ret);
+		goto err;
+	}
+	if (ret != 0x6204) {
+		dev_err(wm831x->dev, "Device is not a WM831x: ID %x\n", ret);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = wm831x_reg_read(wm831x, WM831X_REVISION);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read revision: %d\n", ret);
+		goto err;
+	}
+	rev = (ret & WM831X_PARENT_REV_MASK) >> WM831X_PARENT_REV_SHIFT;
+
+	ret = wm831x_reg_read(wm831x, WM831X_RESET_ID);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read device ID: %d\n", ret);
+		goto err;
+	}
+
+	switch (ret) {
+	case 0x8310:
+		parent = WM8310;
+		switch (rev) {
+		case 0:
+			dev_info(wm831x->dev, "WM8310 revision %c\n",
+				 'A' + rev);
+			break;
+		}
+		break;
+
+	case 0x8311:
+		parent = WM8311;
+		switch (rev) {
+		case 0:
+			dev_info(wm831x->dev, "WM8311 revision %c\n",
+				 'A' + rev);
+			break;
+		}
+		break;
+
+	case 0x8312:
+		parent = WM8312;
+		switch (rev) {
+		case 0:
+			dev_info(wm831x->dev, "WM8312 revision %c\n",
+				 'A' + rev);
+			break;
+		}
+		break;
+
+	case 0:
+		/* Some engineering samples do not have the ID set,
+		 * rely on the device being registered correctly.
+		 * This will need revisiting for future devices with
+		 * multiple dies.
+		 */
+		parent = id;
+		switch (rev) {
+		case 0:
+			dev_info(wm831x->dev, "WM831%d ES revision %c\n",
+				 parent, 'A' + rev);
+			break;
+		}
+		break;
+
+	default:
+		dev_err(wm831x->dev, "Unknown WM831x device %04x\n", ret);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	/* This will need revisiting in future but is OK for all
+	 * current parts.
+	 */
+	if (parent != id)
+		dev_warn(wm831x->dev, "Device was registered as a WM831%lu\n",
+			 id);
+
+	/* Bootstrap the user key */
+	ret = wm831x_reg_read(wm831x, WM831X_SECURITY_KEY);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read security key: %d\n", ret);
+		goto err;
+	}
+	if (ret != 0) {
+		dev_warn(wm831x->dev, "Security key had non-zero value %x\n",
+			 ret);
+		wm831x_reg_write(wm831x, WM831X_SECURITY_KEY, 0);
+	}
+	wm831x->locked = 1;
+
+	if (pdata && pdata->pre_init) {
+		ret = pdata->pre_init(wm831x);
+		if (ret != 0) {
+			dev_err(wm831x->dev, "pre_init() failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	/* The core device is up, instantiate the subdevices. */
+	switch (parent) {
+	case WM8310:
+		ret = mfd_add_devices(wm831x->dev, -1,
+				      wm8310_devs, ARRAY_SIZE(wm8310_devs),
+				      NULL, 0);
+		break;
+
+	case WM8311:
+		ret = mfd_add_devices(wm831x->dev, -1,
+				      wm8311_devs, ARRAY_SIZE(wm8311_devs),
+				      NULL, 0);
+		break;
+
+	case WM8312:
+		ret = mfd_add_devices(wm831x->dev, -1,
+				      wm8312_devs, ARRAY_SIZE(wm8312_devs),
+				      NULL, 0);
+		break;
+
+	default:
+		/* If this happens the bus probe function is buggy */
+		BUG();
+	}
+
+	if (ret != 0) {
+		dev_err(wm831x->dev, "Failed to add children\n");
+		goto err;
+	}
+
+	if (pdata && pdata->post_init) {
+		ret = pdata->post_init(wm831x);
+		if (ret != 0) {
+			dev_err(wm831x->dev, "post_init() failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	mfd_remove_devices(wm831x->dev);
+	kfree(wm831x);
+	return ret;
+}
+
+static void wm831x_device_exit(struct wm831x *wm831x)
+{
+	mfd_remove_devices(wm831x->dev);
+	kfree(wm831x);
+}
+
+static int wm831x_i2c_read_device(struct wm831x *wm831x, unsigned short reg,
+				  int bytes, void *dest)
+{
+	struct i2c_client *i2c = wm831x->control_data;
+	int ret;
+	u16 r = cpu_to_be16(reg);
+
+	ret = i2c_master_send(i2c, (unsigned char *)&r, 2);
+	if (ret < 0)
+		return ret;
+	if (ret != 2)
+		return -EIO;
+
+	ret = i2c_master_recv(i2c, dest, bytes);
+	if (ret < 0)
+		return ret;
+	if (ret != bytes)
+		return -EIO;
+	return 0;
+}
+
+/* Currently we allocate the write buffer on the stack; this is OK for
+ * small writes - if we need to do large writes this will need to be
+ * revised.
+ */
+static int wm831x_i2c_write_device(struct wm831x *wm831x, unsigned short reg,
+				   int bytes, void *src)
+{
+	struct i2c_client *i2c = wm831x->control_data;
+	unsigned char msg[bytes + 2];
+	int ret;
+
+	reg = cpu_to_be16(reg);
+	memcpy(&msg[0], &reg, 2);
+	memcpy(&msg[2], src, bytes);
+
+	ret = i2c_master_send(i2c, msg, bytes + 2);
+	if (ret < 0)
+		return ret;
+	if (ret < bytes + 2)
+		return -EIO;
+
+	return 0;
+}
+
+static int wm831x_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct wm831x *wm831x;
+
+	wm831x = kzalloc(sizeof(struct wm831x), GFP_KERNEL);
+	if (wm831x == NULL) {
+		kfree(i2c);
+		return -ENOMEM;
+	}
+
+	i2c_set_clientdata(i2c, wm831x);
+	wm831x->dev = &i2c->dev;
+	wm831x->control_data = i2c;
+	wm831x->read_dev = wm831x_i2c_read_device;
+	wm831x->write_dev = wm831x_i2c_write_device;
+
+	return wm831x_device_init(wm831x, id->driver_data, i2c->irq);
+}
+
+static int wm831x_i2c_remove(struct i2c_client *i2c)
+{
+	struct wm831x *wm831x = i2c_get_clientdata(i2c);
+
+	wm831x_device_exit(wm831x);
+
+	return 0;
+}
+
+static const struct i2c_device_id wm831x_i2c_id[] = {
+	{ "wm8310", WM8310 },
+	{ "wm8311", WM8311 },
+	{ "wm8312", WM8312 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm831x_i2c_id);
+
+
+static struct i2c_driver wm831x_i2c_driver = {
+	.driver = {
+		   .name = "wm831x",
+		   .owner = THIS_MODULE,
+	},
+	.probe = wm831x_i2c_probe,
+	.remove = wm831x_i2c_remove,
+	.id_table = wm831x_i2c_id,
+};
+
+static int __init wm831x_i2c_init(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&wm831x_i2c_driver);
+	if (ret != 0)
+		pr_err("Failed to register wm831x I2C driver: %d\n", ret);
+
+	return ret;
+}
+subsys_initcall(wm831x_i2c_init);
+
+static void __exit wm831x_i2c_exit(void)
+{
+	i2c_del_driver(&wm831x_i2c_driver);
+}
+module_exit(wm831x_i2c_exit);
+
+MODULE_DESCRIPTION("I2C support for the WM831X AudioPlus PMIC");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mark Brown");
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
new file mode 100644
index 000000000000..d90e693053ba
--- /dev/null
+++ b/include/linux/mfd/wm831x/core.h
@@ -0,0 +1,247 @@
+/*
+ * include/linux/mfd/wm831x/core.h -- Core interface for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_CORE_H__
+#define __MFD_WM831X_CORE_H__
+
+/*
+ * Register values.
+ */
+#define WM831X_RESET_ID                         0x00
+#define WM831X_REVISION                         0x01
+#define WM831X_PARENT_ID                        0x4000
+#define WM831X_SYSVDD_CONTROL                   0x4001
+#define WM831X_THERMAL_MONITORING               0x4002
+#define WM831X_POWER_STATE                      0x4003
+#define WM831X_WATCHDOG                         0x4004
+#define WM831X_ON_PIN_CONTROL                   0x4005
+#define WM831X_RESET_CONTROL                    0x4006
+#define WM831X_CONTROL_INTERFACE                0x4007
+#define WM831X_SECURITY_KEY                     0x4008
+#define WM831X_SOFTWARE_SCRATCH                 0x4009
+#define WM831X_OTP_CONTROL                      0x400A
+#define WM831X_GPIO_LEVEL                       0x400C
+#define WM831X_SYSTEM_STATUS                    0x400D
+#define WM831X_ON_SOURCE                        0x400E
+#define WM831X_OFF_SOURCE                       0x400F
+#define WM831X_SYSTEM_INTERRUPTS                0x4010
+#define WM831X_INTERRUPT_STATUS_1               0x4011
+#define WM831X_INTERRUPT_STATUS_2               0x4012
+#define WM831X_INTERRUPT_STATUS_3               0x4013
+#define WM831X_INTERRUPT_STATUS_4               0x4014
+#define WM831X_INTERRUPT_STATUS_5               0x4015
+#define WM831X_IRQ_CONFIG                       0x4017
+#define WM831X_SYSTEM_INTERRUPTS_MASK           0x4018
+#define WM831X_INTERRUPT_STATUS_1_MASK          0x4019
+#define WM831X_INTERRUPT_STATUS_2_MASK          0x401A
+#define WM831X_INTERRUPT_STATUS_3_MASK          0x401B
+#define WM831X_INTERRUPT_STATUS_4_MASK          0x401C
+#define WM831X_INTERRUPT_STATUS_5_MASK          0x401D
+#define WM831X_RTC_WRITE_COUNTER                0x4020
+#define WM831X_RTC_TIME_1                       0x4021
+#define WM831X_RTC_TIME_2                       0x4022
+#define WM831X_RTC_ALARM_1                      0x4023
+#define WM831X_RTC_ALARM_2                      0x4024
+#define WM831X_RTC_CONTROL                      0x4025
+#define WM831X_RTC_TRIM                         0x4026
+#define WM831X_TOUCH_CONTROL_1                  0x4028
+#define WM831X_TOUCH_CONTROL_2                  0x4029
+#define WM831X_TOUCH_DATA_X                     0x402A
+#define WM831X_TOUCH_DATA_Y                     0x402B
+#define WM831X_TOUCH_DATA_Z                     0x402C
+#define WM831X_AUXADC_DATA                      0x402D
+#define WM831X_AUXADC_CONTROL                   0x402E
+#define WM831X_AUXADC_SOURCE                    0x402F
+#define WM831X_COMPARATOR_CONTROL               0x4030
+#define WM831X_COMPARATOR_1                     0x4031
+#define WM831X_COMPARATOR_2                     0x4032
+#define WM831X_COMPARATOR_3                     0x4033
+#define WM831X_COMPARATOR_4                     0x4034
+#define WM831X_GPIO1_CONTROL                    0x4038
+#define WM831X_GPIO2_CONTROL                    0x4039
+#define WM831X_GPIO3_CONTROL                    0x403A
+#define WM831X_GPIO4_CONTROL                    0x403B
+#define WM831X_GPIO5_CONTROL                    0x403C
+#define WM831X_GPIO6_CONTROL                    0x403D
+#define WM831X_GPIO7_CONTROL                    0x403E
+#define WM831X_GPIO8_CONTROL                    0x403F
+#define WM831X_GPIO9_CONTROL                    0x4040
+#define WM831X_GPIO10_CONTROL                   0x4041
+#define WM831X_GPIO11_CONTROL                   0x4042
+#define WM831X_GPIO12_CONTROL                   0x4043
+#define WM831X_GPIO13_CONTROL                   0x4044
+#define WM831X_GPIO14_CONTROL                   0x4045
+#define WM831X_GPIO15_CONTROL                   0x4046
+#define WM831X_GPIO16_CONTROL                   0x4047
+#define WM831X_CHARGER_CONTROL_1                0x4048
+#define WM831X_CHARGER_CONTROL_2                0x4049
+#define WM831X_CHARGER_STATUS                   0x404A
+#define WM831X_BACKUP_CHARGER_CONTROL           0x404B
+#define WM831X_STATUS_LED_1                     0x404C
+#define WM831X_STATUS_LED_2                     0x404D
+#define WM831X_CURRENT_SINK_1                   0x404E
+#define WM831X_CURRENT_SINK_2                   0x404F
+#define WM831X_DCDC_ENABLE                      0x4050
+#define WM831X_LDO_ENABLE                       0x4051
+#define WM831X_DCDC_STATUS                      0x4052
+#define WM831X_LDO_STATUS                       0x4053
+#define WM831X_DCDC_UV_STATUS                   0x4054
+#define WM831X_LDO_UV_STATUS                    0x4055
+#define WM831X_DC1_CONTROL_1                    0x4056
+#define WM831X_DC1_CONTROL_2                    0x4057
+#define WM831X_DC1_ON_CONFIG                    0x4058
+#define WM831X_DC1_SLEEP_CONTROL                0x4059
+#define WM831X_DC1_DVS_CONTROL                  0x405A
+#define WM831X_DC2_CONTROL_1                    0x405B
+#define WM831X_DC2_CONTROL_2                    0x405C
+#define WM831X_DC2_ON_CONFIG                    0x405D
+#define WM831X_DC2_SLEEP_CONTROL                0x405E
+#define WM831X_DC2_DVS_CONTROL                  0x405F
+#define WM831X_DC3_CONTROL_1                    0x4060
+#define WM831X_DC3_CONTROL_2                    0x4061
+#define WM831X_DC3_ON_CONFIG                    0x4062
+#define WM831X_DC3_SLEEP_CONTROL                0x4063
+#define WM831X_DC4_CONTROL                      0x4064
+#define WM831X_DC4_SLEEP_CONTROL                0x4065
+#define WM831X_EPE1_CONTROL                     0x4066
+#define WM831X_EPE2_CONTROL                     0x4067
+#define WM831X_LDO1_CONTROL                     0x4068
+#define WM831X_LDO1_ON_CONTROL                  0x4069
+#define WM831X_LDO1_SLEEP_CONTROL               0x406A
+#define WM831X_LDO2_CONTROL                     0x406B
+#define WM831X_LDO2_ON_CONTROL                  0x406C
+#define WM831X_LDO2_SLEEP_CONTROL               0x406D
+#define WM831X_LDO3_CONTROL                     0x406E
+#define WM831X_LDO3_ON_CONTROL                  0x406F
+#define WM831X_LDO3_SLEEP_CONTROL               0x4070
+#define WM831X_LDO4_CONTROL                     0x4071
+#define WM831X_LDO4_ON_CONTROL                  0x4072
+#define WM831X_LDO4_SLEEP_CONTROL               0x4073
+#define WM831X_LDO5_CONTROL                     0x4074
+#define WM831X_LDO5_ON_CONTROL                  0x4075
+#define WM831X_LDO5_SLEEP_CONTROL               0x4076
+#define WM831X_LDO6_CONTROL                     0x4077
+#define WM831X_LDO6_ON_CONTROL                  0x4078
+#define WM831X_LDO6_SLEEP_CONTROL               0x4079
+#define WM831X_LDO7_CONTROL                     0x407A
+#define WM831X_LDO7_ON_CONTROL                  0x407B
+#define WM831X_LDO7_SLEEP_CONTROL               0x407C
+#define WM831X_LDO8_CONTROL                     0x407D
+#define WM831X_LDO8_ON_CONTROL                  0x407E
+#define WM831X_LDO8_SLEEP_CONTROL               0x407F
+#define WM831X_LDO9_CONTROL                     0x4080
+#define WM831X_LDO9_ON_CONTROL                  0x4081
+#define WM831X_LDO9_SLEEP_CONTROL               0x4082
+#define WM831X_LDO10_CONTROL                    0x4083
+#define WM831X_LDO10_ON_CONTROL                 0x4084
+#define WM831X_LDO10_SLEEP_CONTROL              0x4085
+#define WM831X_LDO11_ON_CONTROL                 0x4087
+#define WM831X_LDO11_SLEEP_CONTROL              0x4088
+#define WM831X_POWER_GOOD_SOURCE_1              0x408E
+#define WM831X_POWER_GOOD_SOURCE_2              0x408F
+#define WM831X_CLOCK_CONTROL_1                  0x4090
+#define WM831X_CLOCK_CONTROL_2                  0x4091
+#define WM831X_FLL_CONTROL_1                    0x4092
+#define WM831X_FLL_CONTROL_2                    0x4093
+#define WM831X_FLL_CONTROL_3                    0x4094
+#define WM831X_FLL_CONTROL_4                    0x4095
+#define WM831X_FLL_CONTROL_5                    0x4096
+#define WM831X_UNIQUE_ID_1                      0x7800
+#define WM831X_UNIQUE_ID_2                      0x7801
+#define WM831X_UNIQUE_ID_3                      0x7802
+#define WM831X_UNIQUE_ID_4                      0x7803
+#define WM831X_UNIQUE_ID_5                      0x7804
+#define WM831X_UNIQUE_ID_6                      0x7805
+#define WM831X_UNIQUE_ID_7                      0x7806
+#define WM831X_UNIQUE_ID_8                      0x7807
+#define WM831X_FACTORY_OTP_ID                   0x7808
+#define WM831X_FACTORY_OTP_1                    0x7809
+#define WM831X_FACTORY_OTP_2                    0x780A
+#define WM831X_FACTORY_OTP_3                    0x780B
+#define WM831X_FACTORY_OTP_4                    0x780C
+#define WM831X_FACTORY_OTP_5                    0x780D
+#define WM831X_CUSTOMER_OTP_ID                  0x7810
+#define WM831X_DC1_OTP_CONTROL                  0x7811
+#define WM831X_DC2_OTP_CONTROL                  0x7812
+#define WM831X_DC3_OTP_CONTROL                  0x7813
+#define WM831X_LDO1_2_OTP_CONTROL               0x7814
+#define WM831X_LDO3_4_OTP_CONTROL               0x7815
+#define WM831X_LDO5_6_OTP_CONTROL               0x7816
+#define WM831X_LDO7_8_OTP_CONTROL               0x7817
+#define WM831X_LDO9_10_OTP_CONTROL              0x7818
+#define WM831X_LDO11_EPE_CONTROL                0x7819
+#define WM831X_GPIO1_OTP_CONTROL                0x781A
+#define WM831X_GPIO2_OTP_CONTROL                0x781B
+#define WM831X_GPIO3_OTP_CONTROL                0x781C
+#define WM831X_GPIO4_OTP_CONTROL                0x781D
+#define WM831X_GPIO5_OTP_CONTROL                0x781E
+#define WM831X_GPIO6_OTP_CONTROL                0x781F
+#define WM831X_DBE_CHECK_DATA                   0x7827
+
+/*
+ * R0 (0x00) - Reset ID
+ */
+#define WM831X_CHIP_ID_MASK                     0xFFFF  /* CHIP_ID - [15:0] */
+#define WM831X_CHIP_ID_SHIFT                         0  /* CHIP_ID - [15:0] */
+#define WM831X_CHIP_ID_WIDTH                        16  /* CHIP_ID - [15:0] */
+
+/*
+ * R1 (0x01) - Revision
+ */
+#define WM831X_PARENT_REV_MASK                  0xFF00  /* PARENT_REV - [15:8] */
+#define WM831X_PARENT_REV_SHIFT                      8  /* PARENT_REV - [15:8] */
+#define WM831X_PARENT_REV_WIDTH                      8  /* PARENT_REV - [15:8] */
+#define WM831X_CHILD_REV_MASK                   0x00FF  /* CHILD_REV - [7:0] */
+#define WM831X_CHILD_REV_SHIFT                       0  /* CHILD_REV - [7:0] */
+#define WM831X_CHILD_REV_WIDTH                       8  /* CHILD_REV - [7:0] */
+
+/*
+ * R16384 (0x4000) - Parent ID
+ */
+#define WM831X_PARENT_ID_MASK                   0xFFFF  /* PARENT_ID - [15:0] */
+#define WM831X_PARENT_ID_SHIFT                       0  /* PARENT_ID - [15:0] */
+#define WM831X_PARENT_ID_WIDTH                      16  /* PARENT_ID - [15:0] */
+
+struct wm831x {
+	struct mutex io_lock;
+
+	struct device *dev;
+	int (*read_dev)(struct wm831x *wm831x, unsigned short reg,
+			int bytes, void *dest);
+	int (*write_dev)(struct wm831x *wm831x, unsigned short reg,
+			 int bytes, void *src);
+
+	void *control_data;
+
+	/* The WM831x has a security key blocking access to certain
+	 * registers.  The mutex is taken by the accessors for locking
+	 * and unlocking the security key, locked is used to fail
+	 * writes if the lock is held.
+	 */
+	struct mutex key_lock;
+	unsigned int locked:1;
+};
+
+/* Device I/O API */
+int wm831x_reg_read(struct wm831x *wm831x, unsigned short reg);
+int wm831x_reg_write(struct wm831x *wm831x, unsigned short reg,
+		 unsigned short val);
+void wm831x_reg_lock(struct wm831x *wm831x);
+int wm831x_reg_unlock(struct wm831x *wm831x);
+int wm831x_set_bits(struct wm831x *wm831x, unsigned short reg,
+		    unsigned short mask, unsigned short val);
+int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg,
+		     int count, u16 *buf);
+
+#endif
diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h
new file mode 100644
index 000000000000..571e60136264
--- /dev/null
+++ b/include/linux/mfd/wm831x/pdata.h
@@ -0,0 +1,107 @@
+/*
+ * include/linux/mfd/wm831x/pdata.h -- Platform data for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_PDATA_H__
+#define __MFD_WM831X_PDATA_H__
+
+struct wm831x;
+struct regulator_init_data;
+
+struct wm831x_backup_pdata {
+	int charger_enable;
+	int no_constant_voltage;  /** Disable constant voltage charging */
+	int vlim;   /** Voltage limit in milivolts */
+	int ilim;   /** Current limit in microamps */
+};
+
+struct wm831x_battery_pdata {
+	int enable;         /** Enable charging */
+	int fast_enable;    /** Enable fast charging */
+	int off_mask;       /** Mask OFF while charging */
+	int trickle_ilim;   /** Trickle charge current limit, in mA */
+	int vsel;           /** Target voltage, in mV */
+	int eoc_iterm;      /** End of trickle charge current, in mA */
+	int fast_ilim;      /** Fast charge current limit, in mA */
+	int timeout;        /** Charge cycle timeout, in minutes */
+};
+
+/* Sources for status LED configuration.  Values are register values
+ * plus 1 to allow for a zero default for preserve.
+ */
+enum wm831x_status_src {
+	WM831X_STATUS_PRESERVE = 0,  /* Keep the current hardware setting */
+	WM831X_STATUS_OTP = 1,
+	WM831X_STATUS_POWER = 2,
+	WM831X_STATUS_CHARGER = 3,
+	WM831X_STATUS_MANUAL = 4,
+};
+
+struct wm831x_status_pdata {
+	enum wm831x_status_src default_src;
+	const char *name;
+	const char *default_trigger;
+};
+
+struct wm831x_touch_pdata {
+	int fivewire;          /** 1 for five wire mode, 0 for 4 wire */
+	int isel;              /** Current for pen down (uA) */
+	int rpu;               /** Pen down sensitivity resistor divider */
+	int pressure;          /** Report pressure (boolean) */
+	int data_irq;          /** Touch data ready IRQ */
+};
+
+enum wm831x_watchdog_action {
+	WM831X_WDOG_NONE = 0,
+	WM831X_WDOG_INTERRUPT = 1,
+	WM831X_WDOG_RESET = 2,
+	WM831X_WDOG_WAKE = 3,
+};
+
+struct wm831x_watchdog_pdata {
+	enum wm831x_watchdog_action primary, secondary;
+	int update_gpio;
+	unsigned int software:1;
+};
+
+#define WM831X_MAX_STATUS 2
+#define WM831X_MAX_DCDC   4
+#define WM831X_MAX_EPE    2
+#define WM831X_MAX_LDO    11
+#define WM831X_MAX_ISINK  2
+
+struct wm831x_pdata {
+	/** Called before subdevices are set up */
+	int (*pre_init)(struct wm831x *wm831x);
+	/** Called after subdevices are set up */
+	int (*post_init)(struct wm831x *wm831x);
+
+	int gpio_base;
+	struct wm831x_backup_pdata *backup;
+	struct wm831x_battery_pdata *battery;
+	struct wm831x_touch_pdata *touch;
+	struct wm831x_watchdog_pdata *watchdog;
+
+	/** LED1 = 0 and so on */
+	struct wm831x_status_pdata *status[WM831X_MAX_STATUS];
+	/** DCDC1 = 0 and so on */
+	struct regulator_init_data *dcdc[WM831X_MAX_DCDC];
+	/** EPE1 = 0 and so on */
+	struct regulator_init_data *epe[WM831X_MAX_EPE];
+	/** LDO1 = 0 and so on */
+	struct regulator_init_data *ldo[WM831X_MAX_LDO];
+	/** ISINK1 = 0 and so on*/
+	struct regulator_init_data *isink[WM831X_MAX_ISINK];
+};
+
+#endif
-- 
cgit v1.2.3


From 7d4d0a3e7343e3190afaa17253073db58e3d9bff Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:53 +0100
Subject: mfd: Add WM831x interrupt support

The WM831x includes an interrupt controller managing interrupts for
the various functions on the chip. This patch adds support for the
core interrupt block on the device.

Ideally this would be supported by genirq, particularly for the
GPIOs, but currently genirq is unable to cope with controllers on
interrupt driven buses so we cut'n'paste the generic interface.
Once genirq is able to cope chips like this it should be a case
of filing the prefixes off the code and redoing wm831x-irq.c to
move over.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c       |  12 +-
 drivers/mfd/wm831x-irq.c        | 559 +++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/core.h |  21 ++
 include/linux/mfd/wm831x/irq.h  | 764 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1354 insertions(+), 2 deletions(-)
 create mode 100644 drivers/mfd/wm831x-irq.c
 create mode 100644 include/linux/mfd/wm831x/irq.h

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index cc1040c9d46c..eb63d22160d1 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -19,6 +19,7 @@
 
 #include <linux/mfd/wm831x/core.h>
 #include <linux/mfd/wm831x/pdata.h>
+#include <linux/mfd/wm831x/irq.h>
 
 enum wm831x_parent {
 	WM8310 = 0,
@@ -1189,6 +1190,10 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 		}
 	}
 
+	ret = wm831x_irq_init(wm831x, irq);
+	if (ret != 0)
+		goto err;
+
 	/* The core device is up, instantiate the subdevices. */
 	switch (parent) {
 	case WM8310:
@@ -1216,19 +1221,21 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 
 	if (ret != 0) {
 		dev_err(wm831x->dev, "Failed to add children\n");
-		goto err;
+		goto err_irq;
 	}
 
 	if (pdata && pdata->post_init) {
 		ret = pdata->post_init(wm831x);
 		if (ret != 0) {
 			dev_err(wm831x->dev, "post_init() failed: %d\n", ret);
-			goto err;
+			goto err_irq;
 		}
 	}
 
 	return 0;
 
+err_irq:
+	wm831x_irq_exit(wm831x);
 err:
 	mfd_remove_devices(wm831x->dev);
 	kfree(wm831x);
@@ -1238,6 +1245,7 @@ err:
 static void wm831x_device_exit(struct wm831x *wm831x)
 {
 	mfd_remove_devices(wm831x->dev);
+	wm831x_irq_exit(wm831x);
 	kfree(wm831x);
 }
 
diff --git a/drivers/mfd/wm831x-irq.c b/drivers/mfd/wm831x-irq.c
new file mode 100644
index 000000000000..d3015dfb9134
--- /dev/null
+++ b/drivers/mfd/wm831x-irq.c
@@ -0,0 +1,559 @@
+/*
+ * wm831x-irq.c  --  Interrupt controller support for Wolfson WM831x PMICs
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/mfd/core.h>
+#include <linux/interrupt.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/pdata.h>
+#include <linux/mfd/wm831x/irq.h>
+
+#include <linux/delay.h>
+
+/*
+ * Since generic IRQs don't currently support interrupt controllers on
+ * interrupt driven buses we don't use genirq but instead provide an
+ * interface that looks very much like the standard ones.  This leads
+ * to some bodges, including storing interrupt handler information in
+ * the static irq_data table we use to look up the data for individual
+ * interrupts, but hopefully won't last too long.
+ */
+
+struct wm831x_irq_data {
+	int primary;
+	int reg;
+	int mask;
+	irq_handler_t handler;
+	void *handler_data;
+};
+
+static struct wm831x_irq_data wm831x_irqs[] = {
+	[WM831X_IRQ_TEMP_THW] = {
+		.primary = WM831X_TEMP_INT,
+		.reg = 1,
+		.mask = WM831X_TEMP_THW_EINT,
+	},
+	[WM831X_IRQ_GPIO_1] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP1_EINT,
+	},
+	[WM831X_IRQ_GPIO_2] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP2_EINT,
+	},
+	[WM831X_IRQ_GPIO_3] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP3_EINT,
+	},
+	[WM831X_IRQ_GPIO_4] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP4_EINT,
+	},
+	[WM831X_IRQ_GPIO_5] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP5_EINT,
+	},
+	[WM831X_IRQ_GPIO_6] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP6_EINT,
+	},
+	[WM831X_IRQ_GPIO_7] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP7_EINT,
+	},
+	[WM831X_IRQ_GPIO_8] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP8_EINT,
+	},
+	[WM831X_IRQ_GPIO_9] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP9_EINT,
+	},
+	[WM831X_IRQ_GPIO_10] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP10_EINT,
+	},
+	[WM831X_IRQ_GPIO_11] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP11_EINT,
+	},
+	[WM831X_IRQ_GPIO_12] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP12_EINT,
+	},
+	[WM831X_IRQ_GPIO_13] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP13_EINT,
+	},
+	[WM831X_IRQ_GPIO_14] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP14_EINT,
+	},
+	[WM831X_IRQ_GPIO_15] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP15_EINT,
+	},
+	[WM831X_IRQ_GPIO_16] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP16_EINT,
+	},
+	[WM831X_IRQ_ON] = {
+		.primary = WM831X_ON_PIN_INT,
+		.reg = 1,
+		.mask = WM831X_ON_PIN_EINT,
+	},
+	[WM831X_IRQ_PPM_SYSLO] = {
+		.primary = WM831X_PPM_INT,
+		.reg = 1,
+		.mask = WM831X_PPM_SYSLO_EINT,
+	},
+	[WM831X_IRQ_PPM_PWR_SRC] = {
+		.primary = WM831X_PPM_INT,
+		.reg = 1,
+		.mask = WM831X_PPM_PWR_SRC_EINT,
+	},
+	[WM831X_IRQ_PPM_USB_CURR] = {
+		.primary = WM831X_PPM_INT,
+		.reg = 1,
+		.mask = WM831X_PPM_USB_CURR_EINT,
+	},
+	[WM831X_IRQ_WDOG_TO] = {
+		.primary = WM831X_WDOG_INT,
+		.reg = 1,
+		.mask = WM831X_WDOG_TO_EINT,
+	},
+	[WM831X_IRQ_RTC_PER] = {
+		.primary = WM831X_RTC_INT,
+		.reg = 1,
+		.mask = WM831X_RTC_PER_EINT,
+	},
+	[WM831X_IRQ_RTC_ALM] = {
+		.primary = WM831X_RTC_INT,
+		.reg = 1,
+		.mask = WM831X_RTC_ALM_EINT,
+	},
+	[WM831X_IRQ_CHG_BATT_HOT] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_BATT_HOT_EINT,
+	},
+	[WM831X_IRQ_CHG_BATT_COLD] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_BATT_COLD_EINT,
+	},
+	[WM831X_IRQ_CHG_BATT_FAIL] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_BATT_FAIL_EINT,
+	},
+	[WM831X_IRQ_CHG_OV] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_OV_EINT,
+	},
+	[WM831X_IRQ_CHG_END] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_END_EINT,
+	},
+	[WM831X_IRQ_CHG_TO] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_TO_EINT,
+	},
+	[WM831X_IRQ_CHG_MODE] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_MODE_EINT,
+	},
+	[WM831X_IRQ_CHG_START] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_START_EINT,
+	},
+	[WM831X_IRQ_TCHDATA] = {
+		.primary = WM831X_TCHDATA_INT,
+		.reg = 1,
+		.mask = WM831X_TCHDATA_EINT,
+	},
+	[WM831X_IRQ_TCHPD] = {
+		.primary = WM831X_TCHPD_INT,
+		.reg = 1,
+		.mask = WM831X_TCHPD_EINT,
+	},
+	[WM831X_IRQ_AUXADC_DATA] = {
+		.primary = WM831X_AUXADC_INT,
+		.reg = 1,
+		.mask = WM831X_AUXADC_DATA_EINT,
+	},
+	[WM831X_IRQ_AUXADC_DCOMP1] = {
+		.primary = WM831X_AUXADC_INT,
+		.reg = 1,
+		.mask = WM831X_AUXADC_DCOMP1_EINT,
+	},
+	[WM831X_IRQ_AUXADC_DCOMP2] = {
+		.primary = WM831X_AUXADC_INT,
+		.reg = 1,
+		.mask = WM831X_AUXADC_DCOMP2_EINT,
+	},
+	[WM831X_IRQ_AUXADC_DCOMP3] = {
+		.primary = WM831X_AUXADC_INT,
+		.reg = 1,
+		.mask = WM831X_AUXADC_DCOMP3_EINT,
+	},
+	[WM831X_IRQ_AUXADC_DCOMP4] = {
+		.primary = WM831X_AUXADC_INT,
+		.reg = 1,
+		.mask = WM831X_AUXADC_DCOMP4_EINT,
+	},
+	[WM831X_IRQ_CS1] = {
+		.primary = WM831X_CS_INT,
+		.reg = 2,
+		.mask = WM831X_CS1_EINT,
+	},
+	[WM831X_IRQ_CS2] = {
+		.primary = WM831X_CS_INT,
+		.reg = 2,
+		.mask = WM831X_CS2_EINT,
+	},
+	[WM831X_IRQ_HC_DC1] = {
+		.primary = WM831X_HC_INT,
+		.reg = 4,
+		.mask = WM831X_HC_DC1_EINT,
+	},
+	[WM831X_IRQ_HC_DC2] = {
+		.primary = WM831X_HC_INT,
+		.reg = 4,
+		.mask = WM831X_HC_DC2_EINT,
+	},
+	[WM831X_IRQ_UV_LDO1] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO1_EINT,
+	},
+	[WM831X_IRQ_UV_LDO2] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO2_EINT,
+	},
+	[WM831X_IRQ_UV_LDO3] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO3_EINT,
+	},
+	[WM831X_IRQ_UV_LDO4] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO4_EINT,
+	},
+	[WM831X_IRQ_UV_LDO5] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO5_EINT,
+	},
+	[WM831X_IRQ_UV_LDO6] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO6_EINT,
+	},
+	[WM831X_IRQ_UV_LDO7] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO7_EINT,
+	},
+	[WM831X_IRQ_UV_LDO8] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO8_EINT,
+	},
+	[WM831X_IRQ_UV_LDO9] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO9_EINT,
+	},
+	[WM831X_IRQ_UV_LDO10] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO10_EINT,
+	},
+	[WM831X_IRQ_UV_DC1] = {
+		.primary = WM831X_UV_INT,
+		.reg = 4,
+		.mask = WM831X_UV_DC1_EINT,
+	},
+	[WM831X_IRQ_UV_DC2] = {
+		.primary = WM831X_UV_INT,
+		.reg = 4,
+		.mask = WM831X_UV_DC2_EINT,
+	},
+	[WM831X_IRQ_UV_DC3] = {
+		.primary = WM831X_UV_INT,
+		.reg = 4,
+		.mask = WM831X_UV_DC3_EINT,
+	},
+	[WM831X_IRQ_UV_DC4] = {
+		.primary = WM831X_UV_INT,
+		.reg = 4,
+		.mask = WM831X_UV_DC4_EINT,
+	},
+};
+
+static inline int irq_data_to_status_reg(struct wm831x_irq_data *irq_data)
+{
+	return WM831X_INTERRUPT_STATUS_1 - 1 + irq_data->reg;
+}
+
+static inline int irq_data_to_mask_reg(struct wm831x_irq_data *irq_data)
+{
+	return WM831X_INTERRUPT_STATUS_1_MASK - 1 + irq_data->reg;
+}
+
+static void __wm831x_enable_irq(struct wm831x *wm831x, int irq)
+{
+	struct wm831x_irq_data *irq_data = &wm831x_irqs[irq];
+
+	wm831x->irq_masks[irq_data->reg - 1] &= ~irq_data->mask;
+	wm831x_reg_write(wm831x, irq_data_to_mask_reg(irq_data),
+			 wm831x->irq_masks[irq_data->reg - 1]);
+}
+
+void wm831x_enable_irq(struct wm831x *wm831x, int irq)
+{
+	mutex_lock(&wm831x->irq_lock);
+	__wm831x_enable_irq(wm831x, irq);
+	mutex_unlock(&wm831x->irq_lock);
+}
+EXPORT_SYMBOL_GPL(wm831x_enable_irq);
+
+static void __wm831x_disable_irq(struct wm831x *wm831x, int irq)
+{
+	struct wm831x_irq_data *irq_data = &wm831x_irqs[irq];
+
+	wm831x->irq_masks[irq_data->reg - 1] |= irq_data->mask;
+	wm831x_reg_write(wm831x, irq_data_to_mask_reg(irq_data),
+			 wm831x->irq_masks[irq_data->reg - 1]);
+}
+
+void wm831x_disable_irq(struct wm831x *wm831x, int irq)
+{
+	mutex_lock(&wm831x->irq_lock);
+	__wm831x_disable_irq(wm831x, irq);
+	mutex_unlock(&wm831x->irq_lock);
+}
+EXPORT_SYMBOL_GPL(wm831x_disable_irq);
+
+int wm831x_request_irq(struct wm831x *wm831x,
+		       unsigned int irq, irq_handler_t handler,
+		       unsigned long flags, const char *name,
+		       void *dev)
+{
+	int ret = 0;
+
+	if (irq < 0 || irq >= WM831X_NUM_IRQS)
+		return -EINVAL;
+
+	mutex_lock(&wm831x->irq_lock);
+
+	if (wm831x_irqs[irq].handler) {
+		dev_err(wm831x->dev, "Already have handler for IRQ %d\n", irq);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	wm831x_irqs[irq].handler = handler;
+	wm831x_irqs[irq].handler_data = dev;
+
+	__wm831x_enable_irq(wm831x, irq);
+
+out:
+	mutex_unlock(&wm831x->irq_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_request_irq);
+
+void wm831x_free_irq(struct wm831x *wm831x, unsigned int irq, void *data)
+{
+	if (irq < 0 || irq >= WM831X_NUM_IRQS)
+		return;
+
+	mutex_lock(&wm831x->irq_lock);
+
+	wm831x_irqs[irq].handler = NULL;
+	wm831x_irqs[irq].handler_data = NULL;
+
+	__wm831x_disable_irq(wm831x, irq);
+
+	mutex_unlock(&wm831x->irq_lock);
+}
+EXPORT_SYMBOL_GPL(wm831x_free_irq);
+
+
+static void wm831x_handle_irq(struct wm831x *wm831x, int irq, int status)
+{
+	struct wm831x_irq_data *irq_data = &wm831x_irqs[irq];
+
+	if (irq_data->handler) {
+		irq_data->handler(irq, irq_data->handler_data);
+		wm831x_reg_write(wm831x, irq_data_to_status_reg(irq_data),
+				 irq_data->mask);
+	} else {
+		dev_err(wm831x->dev, "Unhandled IRQ %d, masking\n", irq);
+		__wm831x_disable_irq(wm831x, irq);
+	}
+}
+
+/* Main interrupt handling occurs in a workqueue since we need
+ * interrupts enabled to interact with the chip. */
+static void wm831x_irq_worker(struct work_struct *work)
+{
+	struct wm831x *wm831x = container_of(work, struct wm831x, irq_work);
+	unsigned int i;
+	int primary;
+	int status_regs[5];
+	int read[5] = { 0 };
+	int *status;
+
+	primary = wm831x_reg_read(wm831x, WM831X_SYSTEM_INTERRUPTS);
+	if (primary < 0) {
+		dev_err(wm831x->dev, "Failed to read system interrupt: %d\n",
+			primary);
+		goto out;
+	}
+
+	mutex_lock(&wm831x->irq_lock);
+
+	for (i = 0; i < ARRAY_SIZE(wm831x_irqs); i++) {
+		int offset = wm831x_irqs[i].reg - 1;
+
+		if (!(primary & wm831x_irqs[i].primary))
+			continue;
+
+		status = &status_regs[offset];
+
+		/* Hopefully there should only be one register to read
+		 * each time otherwise we ought to do a block read. */
+		if (!read[offset]) {
+			*status = wm831x_reg_read(wm831x,
+				     irq_data_to_status_reg(&wm831x_irqs[i]));
+			if (*status < 0) {
+				dev_err(wm831x->dev,
+					"Failed to read IRQ status: %d\n",
+					*status);
+				goto out_lock;
+			}
+
+			/* Mask out the disabled IRQs */
+			*status &= ~wm831x->irq_masks[offset];
+			read[offset] = 1;
+		}
+
+		if (*status & wm831x_irqs[i].mask)
+			wm831x_handle_irq(wm831x, i, *status);
+	}
+
+out_lock:
+	mutex_unlock(&wm831x->irq_lock);
+out:
+	enable_irq(wm831x->irq);
+}
+
+
+static irqreturn_t wm831x_cpu_irq(int irq, void *data)
+{
+	struct wm831x *wm831x = data;
+
+	/* Shut the interrupt to the CPU up and schedule the actual
+	 * handler; we can't check that the IRQ is asserted. */
+	disable_irq_nosync(irq);
+
+	queue_work(wm831x->irq_wq, &wm831x->irq_work);
+
+	return IRQ_HANDLED;
+}
+
+int wm831x_irq_init(struct wm831x *wm831x, int irq)
+{
+	int i, ret;
+
+	if (!irq) {
+		dev_warn(wm831x->dev,
+			 "No interrupt specified - functionality limited\n");
+		return 0;
+	}
+
+
+	wm831x->irq_wq = create_singlethread_workqueue("wm831x-irq");
+	if (!wm831x->irq_wq) {
+		dev_err(wm831x->dev, "Failed to allocate IRQ worker\n");
+		return -ESRCH;
+	}
+
+	wm831x->irq = irq;
+	mutex_init(&wm831x->irq_lock);
+	INIT_WORK(&wm831x->irq_work, wm831x_irq_worker);
+
+	/* Mask the individual interrupt sources */
+	for (i = 0; i < ARRAY_SIZE(wm831x->irq_masks); i++) {
+		wm831x->irq_masks[i] = 0xffff;
+		wm831x_reg_write(wm831x, WM831X_INTERRUPT_STATUS_1_MASK + i,
+				 0xffff);
+	}
+
+	/* Enable top level interrupts, we mask at secondary level */
+	wm831x_reg_write(wm831x, WM831X_SYSTEM_INTERRUPTS_MASK, 0);
+
+	/* We're good to go.  We set IRQF_SHARED since there's a
+	 * chance the driver will interoperate with another driver but
+	 * the need to disable the IRQ while handing via I2C/SPI means
+	 * that this may break and performance will be impacted.  If
+	 * this does happen it's a hardware design issue and the only
+	 * other alternative would be polling.
+	 */
+	ret = request_irq(irq, wm831x_cpu_irq, IRQF_TRIGGER_LOW | IRQF_SHARED,
+			  "wm831x", wm831x);
+	if (ret != 0) {
+		dev_err(wm831x->dev, "Failed to request IRQ %d: %d\n",
+			irq, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+void wm831x_irq_exit(struct wm831x *wm831x)
+{
+	if (wm831x->irq)
+		free_irq(wm831x->irq, wm831x);
+}
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index d90e693053ba..b96c9355b16e 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -15,6 +15,9 @@
 #ifndef __MFD_WM831X_CORE_H__
 #define __MFD_WM831X_CORE_H__
 
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+
 /*
  * Register values.
  */
@@ -224,6 +227,13 @@ struct wm831x {
 
 	void *control_data;
 
+	int irq;  /* Our chip IRQ */
+	struct mutex irq_lock;
+	struct workqueue_struct *irq_wq;
+	struct work_struct irq_work;
+	unsigned int irq_base;
+	int irq_masks[5];
+
 	/* The WM831x has a security key blocking access to certain
 	 * registers.  The mutex is taken by the accessors for locking
 	 * and unlocking the security key, locked is used to fail
@@ -244,4 +254,15 @@ int wm831x_set_bits(struct wm831x *wm831x, unsigned short reg,
 int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg,
 		     int count, u16 *buf);
 
+int wm831x_irq_init(struct wm831x *wm831x, int irq);
+void wm831x_irq_exit(struct wm831x *wm831x);
+
+int __must_check wm831x_request_irq(struct wm831x *wm831x,
+				    unsigned int irq, irq_handler_t handler,
+				    unsigned long flags, const char *name,
+				    void *dev);
+void wm831x_free_irq(struct wm831x *wm831x, unsigned int, void *);
+void wm831x_disable_irq(struct wm831x *wm831x, int irq);
+void wm831x_enable_irq(struct wm831x *wm831x, int irq);
+
 #endif
diff --git a/include/linux/mfd/wm831x/irq.h b/include/linux/mfd/wm831x/irq.h
new file mode 100644
index 000000000000..3a8c97656fda
--- /dev/null
+++ b/include/linux/mfd/wm831x/irq.h
@@ -0,0 +1,764 @@
+/*
+ * include/linux/mfd/wm831x/irq.h -- Interrupt controller for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_IRQ_H__
+#define __MFD_WM831X_IRQ_H__
+
+/* Interrupt number assignments within Linux */
+#define WM831X_IRQ_TEMP_THW 0
+#define WM831X_IRQ_GPIO_1   1
+#define WM831X_IRQ_GPIO_2   2
+#define WM831X_IRQ_GPIO_3   3
+#define WM831X_IRQ_GPIO_4   4
+#define WM831X_IRQ_GPIO_5   5
+#define WM831X_IRQ_GPIO_6   6
+#define WM831X_IRQ_GPIO_7   7
+#define WM831X_IRQ_GPIO_8   8
+#define WM831X_IRQ_GPIO_9   9
+#define WM831X_IRQ_GPIO_10  10
+#define WM831X_IRQ_GPIO_11  11
+#define WM831X_IRQ_GPIO_12  12
+#define WM831X_IRQ_GPIO_13  13
+#define WM831X_IRQ_GPIO_14  14
+#define WM831X_IRQ_GPIO_15  15
+#define WM831X_IRQ_GPIO_16  16
+#define WM831X_IRQ_ON           17
+#define WM831X_IRQ_PPM_SYSLO    18
+#define WM831X_IRQ_PPM_PWR_SRC  19
+#define WM831X_IRQ_PPM_USB_CURR 20
+#define WM831X_IRQ_WDOG_TO      21
+#define WM831X_IRQ_RTC_PER      22
+#define WM831X_IRQ_RTC_ALM      23
+#define WM831X_IRQ_CHG_BATT_HOT  24
+#define WM831X_IRQ_CHG_BATT_COLD 25
+#define WM831X_IRQ_CHG_BATT_FAIL 26
+#define WM831X_IRQ_CHG_OV        27
+#define WM831X_IRQ_CHG_END       29
+#define WM831X_IRQ_CHG_TO        30
+#define WM831X_IRQ_CHG_MODE      31
+#define WM831X_IRQ_CHG_START     32
+#define WM831X_IRQ_TCHDATA       33
+#define WM831X_IRQ_TCHPD         34
+#define WM831X_IRQ_AUXADC_DATA   35
+#define WM831X_IRQ_AUXADC_DCOMP1 36
+#define WM831X_IRQ_AUXADC_DCOMP2 37
+#define WM831X_IRQ_AUXADC_DCOMP3 38
+#define WM831X_IRQ_AUXADC_DCOMP4 39
+#define WM831X_IRQ_CS1           40
+#define WM831X_IRQ_CS2           41
+#define WM831X_IRQ_HC_DC1        42
+#define WM831X_IRQ_HC_DC2        43
+#define WM831X_IRQ_UV_LDO1       44
+#define WM831X_IRQ_UV_LDO2       45
+#define WM831X_IRQ_UV_LDO3       46
+#define WM831X_IRQ_UV_LDO4       47
+#define WM831X_IRQ_UV_LDO5       48
+#define WM831X_IRQ_UV_LDO6       49
+#define WM831X_IRQ_UV_LDO7       50
+#define WM831X_IRQ_UV_LDO8       51
+#define WM831X_IRQ_UV_LDO9       52
+#define WM831X_IRQ_UV_LDO10      53
+#define WM831X_IRQ_UV_DC1        54
+#define WM831X_IRQ_UV_DC2        55
+#define WM831X_IRQ_UV_DC3        56
+#define WM831X_IRQ_UV_DC4        57
+
+#define WM831X_NUM_IRQS     58
+
+/*
+ * R16400 (0x4010) - System Interrupts
+ */
+#define WM831X_PS_INT                           0x8000  /* PS_INT */
+#define WM831X_PS_INT_MASK                      0x8000  /* PS_INT */
+#define WM831X_PS_INT_SHIFT                         15  /* PS_INT */
+#define WM831X_PS_INT_WIDTH                          1  /* PS_INT */
+#define WM831X_TEMP_INT                         0x4000  /* TEMP_INT */
+#define WM831X_TEMP_INT_MASK                    0x4000  /* TEMP_INT */
+#define WM831X_TEMP_INT_SHIFT                       14  /* TEMP_INT */
+#define WM831X_TEMP_INT_WIDTH                        1  /* TEMP_INT */
+#define WM831X_GP_INT                           0x2000  /* GP_INT */
+#define WM831X_GP_INT_MASK                      0x2000  /* GP_INT */
+#define WM831X_GP_INT_SHIFT                         13  /* GP_INT */
+#define WM831X_GP_INT_WIDTH                          1  /* GP_INT */
+#define WM831X_ON_PIN_INT                       0x1000  /* ON_PIN_INT */
+#define WM831X_ON_PIN_INT_MASK                  0x1000  /* ON_PIN_INT */
+#define WM831X_ON_PIN_INT_SHIFT                     12  /* ON_PIN_INT */
+#define WM831X_ON_PIN_INT_WIDTH                      1  /* ON_PIN_INT */
+#define WM831X_WDOG_INT                         0x0800  /* WDOG_INT */
+#define WM831X_WDOG_INT_MASK                    0x0800  /* WDOG_INT */
+#define WM831X_WDOG_INT_SHIFT                       11  /* WDOG_INT */
+#define WM831X_WDOG_INT_WIDTH                        1  /* WDOG_INT */
+#define WM831X_TCHDATA_INT                      0x0400  /* TCHDATA_INT */
+#define WM831X_TCHDATA_INT_MASK                 0x0400  /* TCHDATA_INT */
+#define WM831X_TCHDATA_INT_SHIFT                    10  /* TCHDATA_INT */
+#define WM831X_TCHDATA_INT_WIDTH                     1  /* TCHDATA_INT */
+#define WM831X_TCHPD_INT                        0x0200  /* TCHPD_INT */
+#define WM831X_TCHPD_INT_MASK                   0x0200  /* TCHPD_INT */
+#define WM831X_TCHPD_INT_SHIFT                       9  /* TCHPD_INT */
+#define WM831X_TCHPD_INT_WIDTH                       1  /* TCHPD_INT */
+#define WM831X_AUXADC_INT                       0x0100  /* AUXADC_INT */
+#define WM831X_AUXADC_INT_MASK                  0x0100  /* AUXADC_INT */
+#define WM831X_AUXADC_INT_SHIFT                      8  /* AUXADC_INT */
+#define WM831X_AUXADC_INT_WIDTH                      1  /* AUXADC_INT */
+#define WM831X_PPM_INT                          0x0080  /* PPM_INT */
+#define WM831X_PPM_INT_MASK                     0x0080  /* PPM_INT */
+#define WM831X_PPM_INT_SHIFT                         7  /* PPM_INT */
+#define WM831X_PPM_INT_WIDTH                         1  /* PPM_INT */
+#define WM831X_CS_INT                           0x0040  /* CS_INT */
+#define WM831X_CS_INT_MASK                      0x0040  /* CS_INT */
+#define WM831X_CS_INT_SHIFT                          6  /* CS_INT */
+#define WM831X_CS_INT_WIDTH                          1  /* CS_INT */
+#define WM831X_RTC_INT                          0x0020  /* RTC_INT */
+#define WM831X_RTC_INT_MASK                     0x0020  /* RTC_INT */
+#define WM831X_RTC_INT_SHIFT                         5  /* RTC_INT */
+#define WM831X_RTC_INT_WIDTH                         1  /* RTC_INT */
+#define WM831X_OTP_INT                          0x0010  /* OTP_INT */
+#define WM831X_OTP_INT_MASK                     0x0010  /* OTP_INT */
+#define WM831X_OTP_INT_SHIFT                         4  /* OTP_INT */
+#define WM831X_OTP_INT_WIDTH                         1  /* OTP_INT */
+#define WM831X_CHILD_INT                        0x0008  /* CHILD_INT */
+#define WM831X_CHILD_INT_MASK                   0x0008  /* CHILD_INT */
+#define WM831X_CHILD_INT_SHIFT                       3  /* CHILD_INT */
+#define WM831X_CHILD_INT_WIDTH                       1  /* CHILD_INT */
+#define WM831X_CHG_INT                          0x0004  /* CHG_INT */
+#define WM831X_CHG_INT_MASK                     0x0004  /* CHG_INT */
+#define WM831X_CHG_INT_SHIFT                         2  /* CHG_INT */
+#define WM831X_CHG_INT_WIDTH                         1  /* CHG_INT */
+#define WM831X_HC_INT                           0x0002  /* HC_INT */
+#define WM831X_HC_INT_MASK                      0x0002  /* HC_INT */
+#define WM831X_HC_INT_SHIFT                          1  /* HC_INT */
+#define WM831X_HC_INT_WIDTH                          1  /* HC_INT */
+#define WM831X_UV_INT                           0x0001  /* UV_INT */
+#define WM831X_UV_INT_MASK                      0x0001  /* UV_INT */
+#define WM831X_UV_INT_SHIFT                          0  /* UV_INT */
+#define WM831X_UV_INT_WIDTH                          1  /* UV_INT */
+
+/*
+ * R16401 (0x4011) - Interrupt Status 1
+ */
+#define WM831X_PPM_SYSLO_EINT                   0x8000  /* PPM_SYSLO_EINT */
+#define WM831X_PPM_SYSLO_EINT_MASK              0x8000  /* PPM_SYSLO_EINT */
+#define WM831X_PPM_SYSLO_EINT_SHIFT                 15  /* PPM_SYSLO_EINT */
+#define WM831X_PPM_SYSLO_EINT_WIDTH                  1  /* PPM_SYSLO_EINT */
+#define WM831X_PPM_PWR_SRC_EINT                 0x4000  /* PPM_PWR_SRC_EINT */
+#define WM831X_PPM_PWR_SRC_EINT_MASK            0x4000  /* PPM_PWR_SRC_EINT */
+#define WM831X_PPM_PWR_SRC_EINT_SHIFT               14  /* PPM_PWR_SRC_EINT */
+#define WM831X_PPM_PWR_SRC_EINT_WIDTH                1  /* PPM_PWR_SRC_EINT */
+#define WM831X_PPM_USB_CURR_EINT                0x2000  /* PPM_USB_CURR_EINT */
+#define WM831X_PPM_USB_CURR_EINT_MASK           0x2000  /* PPM_USB_CURR_EINT */
+#define WM831X_PPM_USB_CURR_EINT_SHIFT              13  /* PPM_USB_CURR_EINT */
+#define WM831X_PPM_USB_CURR_EINT_WIDTH               1  /* PPM_USB_CURR_EINT */
+#define WM831X_ON_PIN_EINT                      0x1000  /* ON_PIN_EINT */
+#define WM831X_ON_PIN_EINT_MASK                 0x1000  /* ON_PIN_EINT */
+#define WM831X_ON_PIN_EINT_SHIFT                    12  /* ON_PIN_EINT */
+#define WM831X_ON_PIN_EINT_WIDTH                     1  /* ON_PIN_EINT */
+#define WM831X_WDOG_TO_EINT                     0x0800  /* WDOG_TO_EINT */
+#define WM831X_WDOG_TO_EINT_MASK                0x0800  /* WDOG_TO_EINT */
+#define WM831X_WDOG_TO_EINT_SHIFT                   11  /* WDOG_TO_EINT */
+#define WM831X_WDOG_TO_EINT_WIDTH                    1  /* WDOG_TO_EINT */
+#define WM831X_TCHDATA_EINT                     0x0400  /* TCHDATA_EINT */
+#define WM831X_TCHDATA_EINT_MASK                0x0400  /* TCHDATA_EINT */
+#define WM831X_TCHDATA_EINT_SHIFT                   10  /* TCHDATA_EINT */
+#define WM831X_TCHDATA_EINT_WIDTH                    1  /* TCHDATA_EINT */
+#define WM831X_TCHPD_EINT                       0x0200  /* TCHPD_EINT */
+#define WM831X_TCHPD_EINT_MASK                  0x0200  /* TCHPD_EINT */
+#define WM831X_TCHPD_EINT_SHIFT                      9  /* TCHPD_EINT */
+#define WM831X_TCHPD_EINT_WIDTH                      1  /* TCHPD_EINT */
+#define WM831X_AUXADC_DATA_EINT                 0x0100  /* AUXADC_DATA_EINT */
+#define WM831X_AUXADC_DATA_EINT_MASK            0x0100  /* AUXADC_DATA_EINT */
+#define WM831X_AUXADC_DATA_EINT_SHIFT                8  /* AUXADC_DATA_EINT */
+#define WM831X_AUXADC_DATA_EINT_WIDTH                1  /* AUXADC_DATA_EINT */
+#define WM831X_AUXADC_DCOMP4_EINT               0x0080  /* AUXADC_DCOMP4_EINT */
+#define WM831X_AUXADC_DCOMP4_EINT_MASK          0x0080  /* AUXADC_DCOMP4_EINT */
+#define WM831X_AUXADC_DCOMP4_EINT_SHIFT              7  /* AUXADC_DCOMP4_EINT */
+#define WM831X_AUXADC_DCOMP4_EINT_WIDTH              1  /* AUXADC_DCOMP4_EINT */
+#define WM831X_AUXADC_DCOMP3_EINT               0x0040  /* AUXADC_DCOMP3_EINT */
+#define WM831X_AUXADC_DCOMP3_EINT_MASK          0x0040  /* AUXADC_DCOMP3_EINT */
+#define WM831X_AUXADC_DCOMP3_EINT_SHIFT              6  /* AUXADC_DCOMP3_EINT */
+#define WM831X_AUXADC_DCOMP3_EINT_WIDTH              1  /* AUXADC_DCOMP3_EINT */
+#define WM831X_AUXADC_DCOMP2_EINT               0x0020  /* AUXADC_DCOMP2_EINT */
+#define WM831X_AUXADC_DCOMP2_EINT_MASK          0x0020  /* AUXADC_DCOMP2_EINT */
+#define WM831X_AUXADC_DCOMP2_EINT_SHIFT              5  /* AUXADC_DCOMP2_EINT */
+#define WM831X_AUXADC_DCOMP2_EINT_WIDTH              1  /* AUXADC_DCOMP2_EINT */
+#define WM831X_AUXADC_DCOMP1_EINT               0x0010  /* AUXADC_DCOMP1_EINT */
+#define WM831X_AUXADC_DCOMP1_EINT_MASK          0x0010  /* AUXADC_DCOMP1_EINT */
+#define WM831X_AUXADC_DCOMP1_EINT_SHIFT              4  /* AUXADC_DCOMP1_EINT */
+#define WM831X_AUXADC_DCOMP1_EINT_WIDTH              1  /* AUXADC_DCOMP1_EINT */
+#define WM831X_RTC_PER_EINT                     0x0008  /* RTC_PER_EINT */
+#define WM831X_RTC_PER_EINT_MASK                0x0008  /* RTC_PER_EINT */
+#define WM831X_RTC_PER_EINT_SHIFT                    3  /* RTC_PER_EINT */
+#define WM831X_RTC_PER_EINT_WIDTH                    1  /* RTC_PER_EINT */
+#define WM831X_RTC_ALM_EINT                     0x0004  /* RTC_ALM_EINT */
+#define WM831X_RTC_ALM_EINT_MASK                0x0004  /* RTC_ALM_EINT */
+#define WM831X_RTC_ALM_EINT_SHIFT                    2  /* RTC_ALM_EINT */
+#define WM831X_RTC_ALM_EINT_WIDTH                    1  /* RTC_ALM_EINT */
+#define WM831X_TEMP_THW_EINT                    0x0002  /* TEMP_THW_EINT */
+#define WM831X_TEMP_THW_EINT_MASK               0x0002  /* TEMP_THW_EINT */
+#define WM831X_TEMP_THW_EINT_SHIFT                   1  /* TEMP_THW_EINT */
+#define WM831X_TEMP_THW_EINT_WIDTH                   1  /* TEMP_THW_EINT */
+
+/*
+ * R16402 (0x4012) - Interrupt Status 2
+ */
+#define WM831X_CHG_BATT_HOT_EINT                0x8000  /* CHG_BATT_HOT_EINT */
+#define WM831X_CHG_BATT_HOT_EINT_MASK           0x8000  /* CHG_BATT_HOT_EINT */
+#define WM831X_CHG_BATT_HOT_EINT_SHIFT              15  /* CHG_BATT_HOT_EINT */
+#define WM831X_CHG_BATT_HOT_EINT_WIDTH               1  /* CHG_BATT_HOT_EINT */
+#define WM831X_CHG_BATT_COLD_EINT               0x4000  /* CHG_BATT_COLD_EINT */
+#define WM831X_CHG_BATT_COLD_EINT_MASK          0x4000  /* CHG_BATT_COLD_EINT */
+#define WM831X_CHG_BATT_COLD_EINT_SHIFT             14  /* CHG_BATT_COLD_EINT */
+#define WM831X_CHG_BATT_COLD_EINT_WIDTH              1  /* CHG_BATT_COLD_EINT */
+#define WM831X_CHG_BATT_FAIL_EINT               0x2000  /* CHG_BATT_FAIL_EINT */
+#define WM831X_CHG_BATT_FAIL_EINT_MASK          0x2000  /* CHG_BATT_FAIL_EINT */
+#define WM831X_CHG_BATT_FAIL_EINT_SHIFT             13  /* CHG_BATT_FAIL_EINT */
+#define WM831X_CHG_BATT_FAIL_EINT_WIDTH              1  /* CHG_BATT_FAIL_EINT */
+#define WM831X_CHG_OV_EINT                      0x1000  /* CHG_OV_EINT */
+#define WM831X_CHG_OV_EINT_MASK                 0x1000  /* CHG_OV_EINT */
+#define WM831X_CHG_OV_EINT_SHIFT                    12  /* CHG_OV_EINT */
+#define WM831X_CHG_OV_EINT_WIDTH                     1  /* CHG_OV_EINT */
+#define WM831X_CHG_END_EINT                     0x0800  /* CHG_END_EINT */
+#define WM831X_CHG_END_EINT_MASK                0x0800  /* CHG_END_EINT */
+#define WM831X_CHG_END_EINT_SHIFT                   11  /* CHG_END_EINT */
+#define WM831X_CHG_END_EINT_WIDTH                    1  /* CHG_END_EINT */
+#define WM831X_CHG_TO_EINT                      0x0400  /* CHG_TO_EINT */
+#define WM831X_CHG_TO_EINT_MASK                 0x0400  /* CHG_TO_EINT */
+#define WM831X_CHG_TO_EINT_SHIFT                    10  /* CHG_TO_EINT */
+#define WM831X_CHG_TO_EINT_WIDTH                     1  /* CHG_TO_EINT */
+#define WM831X_CHG_MODE_EINT                    0x0200  /* CHG_MODE_EINT */
+#define WM831X_CHG_MODE_EINT_MASK               0x0200  /* CHG_MODE_EINT */
+#define WM831X_CHG_MODE_EINT_SHIFT                   9  /* CHG_MODE_EINT */
+#define WM831X_CHG_MODE_EINT_WIDTH                   1  /* CHG_MODE_EINT */
+#define WM831X_CHG_START_EINT                   0x0100  /* CHG_START_EINT */
+#define WM831X_CHG_START_EINT_MASK              0x0100  /* CHG_START_EINT */
+#define WM831X_CHG_START_EINT_SHIFT                  8  /* CHG_START_EINT */
+#define WM831X_CHG_START_EINT_WIDTH                  1  /* CHG_START_EINT */
+#define WM831X_CS2_EINT                         0x0080  /* CS2_EINT */
+#define WM831X_CS2_EINT_MASK                    0x0080  /* CS2_EINT */
+#define WM831X_CS2_EINT_SHIFT                        7  /* CS2_EINT */
+#define WM831X_CS2_EINT_WIDTH                        1  /* CS2_EINT */
+#define WM831X_CS1_EINT                         0x0040  /* CS1_EINT */
+#define WM831X_CS1_EINT_MASK                    0x0040  /* CS1_EINT */
+#define WM831X_CS1_EINT_SHIFT                        6  /* CS1_EINT */
+#define WM831X_CS1_EINT_WIDTH                        1  /* CS1_EINT */
+#define WM831X_OTP_CMD_END_EINT                 0x0020  /* OTP_CMD_END_EINT */
+#define WM831X_OTP_CMD_END_EINT_MASK            0x0020  /* OTP_CMD_END_EINT */
+#define WM831X_OTP_CMD_END_EINT_SHIFT                5  /* OTP_CMD_END_EINT */
+#define WM831X_OTP_CMD_END_EINT_WIDTH                1  /* OTP_CMD_END_EINT */
+#define WM831X_OTP_ERR_EINT                     0x0010  /* OTP_ERR_EINT */
+#define WM831X_OTP_ERR_EINT_MASK                0x0010  /* OTP_ERR_EINT */
+#define WM831X_OTP_ERR_EINT_SHIFT                    4  /* OTP_ERR_EINT */
+#define WM831X_OTP_ERR_EINT_WIDTH                    1  /* OTP_ERR_EINT */
+#define WM831X_PS_POR_EINT                      0x0004  /* PS_POR_EINT */
+#define WM831X_PS_POR_EINT_MASK                 0x0004  /* PS_POR_EINT */
+#define WM831X_PS_POR_EINT_SHIFT                     2  /* PS_POR_EINT */
+#define WM831X_PS_POR_EINT_WIDTH                     1  /* PS_POR_EINT */
+#define WM831X_PS_SLEEP_OFF_EINT                0x0002  /* PS_SLEEP_OFF_EINT */
+#define WM831X_PS_SLEEP_OFF_EINT_MASK           0x0002  /* PS_SLEEP_OFF_EINT */
+#define WM831X_PS_SLEEP_OFF_EINT_SHIFT               1  /* PS_SLEEP_OFF_EINT */
+#define WM831X_PS_SLEEP_OFF_EINT_WIDTH               1  /* PS_SLEEP_OFF_EINT */
+#define WM831X_PS_ON_WAKE_EINT                  0x0001  /* PS_ON_WAKE_EINT */
+#define WM831X_PS_ON_WAKE_EINT_MASK             0x0001  /* PS_ON_WAKE_EINT */
+#define WM831X_PS_ON_WAKE_EINT_SHIFT                 0  /* PS_ON_WAKE_EINT */
+#define WM831X_PS_ON_WAKE_EINT_WIDTH                 1  /* PS_ON_WAKE_EINT */
+
+/*
+ * R16403 (0x4013) - Interrupt Status 3
+ */
+#define WM831X_UV_LDO10_EINT                    0x0200  /* UV_LDO10_EINT */
+#define WM831X_UV_LDO10_EINT_MASK               0x0200  /* UV_LDO10_EINT */
+#define WM831X_UV_LDO10_EINT_SHIFT                   9  /* UV_LDO10_EINT */
+#define WM831X_UV_LDO10_EINT_WIDTH                   1  /* UV_LDO10_EINT */
+#define WM831X_UV_LDO9_EINT                     0x0100  /* UV_LDO9_EINT */
+#define WM831X_UV_LDO9_EINT_MASK                0x0100  /* UV_LDO9_EINT */
+#define WM831X_UV_LDO9_EINT_SHIFT                    8  /* UV_LDO9_EINT */
+#define WM831X_UV_LDO9_EINT_WIDTH                    1  /* UV_LDO9_EINT */
+#define WM831X_UV_LDO8_EINT                     0x0080  /* UV_LDO8_EINT */
+#define WM831X_UV_LDO8_EINT_MASK                0x0080  /* UV_LDO8_EINT */
+#define WM831X_UV_LDO8_EINT_SHIFT                    7  /* UV_LDO8_EINT */
+#define WM831X_UV_LDO8_EINT_WIDTH                    1  /* UV_LDO8_EINT */
+#define WM831X_UV_LDO7_EINT                     0x0040  /* UV_LDO7_EINT */
+#define WM831X_UV_LDO7_EINT_MASK                0x0040  /* UV_LDO7_EINT */
+#define WM831X_UV_LDO7_EINT_SHIFT                    6  /* UV_LDO7_EINT */
+#define WM831X_UV_LDO7_EINT_WIDTH                    1  /* UV_LDO7_EINT */
+#define WM831X_UV_LDO6_EINT                     0x0020  /* UV_LDO6_EINT */
+#define WM831X_UV_LDO6_EINT_MASK                0x0020  /* UV_LDO6_EINT */
+#define WM831X_UV_LDO6_EINT_SHIFT                    5  /* UV_LDO6_EINT */
+#define WM831X_UV_LDO6_EINT_WIDTH                    1  /* UV_LDO6_EINT */
+#define WM831X_UV_LDO5_EINT                     0x0010  /* UV_LDO5_EINT */
+#define WM831X_UV_LDO5_EINT_MASK                0x0010  /* UV_LDO5_EINT */
+#define WM831X_UV_LDO5_EINT_SHIFT                    4  /* UV_LDO5_EINT */
+#define WM831X_UV_LDO5_EINT_WIDTH                    1  /* UV_LDO5_EINT */
+#define WM831X_UV_LDO4_EINT                     0x0008  /* UV_LDO4_EINT */
+#define WM831X_UV_LDO4_EINT_MASK                0x0008  /* UV_LDO4_EINT */
+#define WM831X_UV_LDO4_EINT_SHIFT                    3  /* UV_LDO4_EINT */
+#define WM831X_UV_LDO4_EINT_WIDTH                    1  /* UV_LDO4_EINT */
+#define WM831X_UV_LDO3_EINT                     0x0004  /* UV_LDO3_EINT */
+#define WM831X_UV_LDO3_EINT_MASK                0x0004  /* UV_LDO3_EINT */
+#define WM831X_UV_LDO3_EINT_SHIFT                    2  /* UV_LDO3_EINT */
+#define WM831X_UV_LDO3_EINT_WIDTH                    1  /* UV_LDO3_EINT */
+#define WM831X_UV_LDO2_EINT                     0x0002  /* UV_LDO2_EINT */
+#define WM831X_UV_LDO2_EINT_MASK                0x0002  /* UV_LDO2_EINT */
+#define WM831X_UV_LDO2_EINT_SHIFT                    1  /* UV_LDO2_EINT */
+#define WM831X_UV_LDO2_EINT_WIDTH                    1  /* UV_LDO2_EINT */
+#define WM831X_UV_LDO1_EINT                     0x0001  /* UV_LDO1_EINT */
+#define WM831X_UV_LDO1_EINT_MASK                0x0001  /* UV_LDO1_EINT */
+#define WM831X_UV_LDO1_EINT_SHIFT                    0  /* UV_LDO1_EINT */
+#define WM831X_UV_LDO1_EINT_WIDTH                    1  /* UV_LDO1_EINT */
+
+/*
+ * R16404 (0x4014) - Interrupt Status 4
+ */
+#define WM831X_HC_DC2_EINT                      0x0200  /* HC_DC2_EINT */
+#define WM831X_HC_DC2_EINT_MASK                 0x0200  /* HC_DC2_EINT */
+#define WM831X_HC_DC2_EINT_SHIFT                     9  /* HC_DC2_EINT */
+#define WM831X_HC_DC2_EINT_WIDTH                     1  /* HC_DC2_EINT */
+#define WM831X_HC_DC1_EINT                      0x0100  /* HC_DC1_EINT */
+#define WM831X_HC_DC1_EINT_MASK                 0x0100  /* HC_DC1_EINT */
+#define WM831X_HC_DC1_EINT_SHIFT                     8  /* HC_DC1_EINT */
+#define WM831X_HC_DC1_EINT_WIDTH                     1  /* HC_DC1_EINT */
+#define WM831X_UV_DC4_EINT                      0x0008  /* UV_DC4_EINT */
+#define WM831X_UV_DC4_EINT_MASK                 0x0008  /* UV_DC4_EINT */
+#define WM831X_UV_DC4_EINT_SHIFT                     3  /* UV_DC4_EINT */
+#define WM831X_UV_DC4_EINT_WIDTH                     1  /* UV_DC4_EINT */
+#define WM831X_UV_DC3_EINT                      0x0004  /* UV_DC3_EINT */
+#define WM831X_UV_DC3_EINT_MASK                 0x0004  /* UV_DC3_EINT */
+#define WM831X_UV_DC3_EINT_SHIFT                     2  /* UV_DC3_EINT */
+#define WM831X_UV_DC3_EINT_WIDTH                     1  /* UV_DC3_EINT */
+#define WM831X_UV_DC2_EINT                      0x0002  /* UV_DC2_EINT */
+#define WM831X_UV_DC2_EINT_MASK                 0x0002  /* UV_DC2_EINT */
+#define WM831X_UV_DC2_EINT_SHIFT                     1  /* UV_DC2_EINT */
+#define WM831X_UV_DC2_EINT_WIDTH                     1  /* UV_DC2_EINT */
+#define WM831X_UV_DC1_EINT                      0x0001  /* UV_DC1_EINT */
+#define WM831X_UV_DC1_EINT_MASK                 0x0001  /* UV_DC1_EINT */
+#define WM831X_UV_DC1_EINT_SHIFT                     0  /* UV_DC1_EINT */
+#define WM831X_UV_DC1_EINT_WIDTH                     1  /* UV_DC1_EINT */
+
+/*
+ * R16405 (0x4015) - Interrupt Status 5
+ */
+#define WM831X_GP16_EINT                        0x8000  /* GP16_EINT */
+#define WM831X_GP16_EINT_MASK                   0x8000  /* GP16_EINT */
+#define WM831X_GP16_EINT_SHIFT                      15  /* GP16_EINT */
+#define WM831X_GP16_EINT_WIDTH                       1  /* GP16_EINT */
+#define WM831X_GP15_EINT                        0x4000  /* GP15_EINT */
+#define WM831X_GP15_EINT_MASK                   0x4000  /* GP15_EINT */
+#define WM831X_GP15_EINT_SHIFT                      14  /* GP15_EINT */
+#define WM831X_GP15_EINT_WIDTH                       1  /* GP15_EINT */
+#define WM831X_GP14_EINT                        0x2000  /* GP14_EINT */
+#define WM831X_GP14_EINT_MASK                   0x2000  /* GP14_EINT */
+#define WM831X_GP14_EINT_SHIFT                      13  /* GP14_EINT */
+#define WM831X_GP14_EINT_WIDTH                       1  /* GP14_EINT */
+#define WM831X_GP13_EINT                        0x1000  /* GP13_EINT */
+#define WM831X_GP13_EINT_MASK                   0x1000  /* GP13_EINT */
+#define WM831X_GP13_EINT_SHIFT                      12  /* GP13_EINT */
+#define WM831X_GP13_EINT_WIDTH                       1  /* GP13_EINT */
+#define WM831X_GP12_EINT                        0x0800  /* GP12_EINT */
+#define WM831X_GP12_EINT_MASK                   0x0800  /* GP12_EINT */
+#define WM831X_GP12_EINT_SHIFT                      11  /* GP12_EINT */
+#define WM831X_GP12_EINT_WIDTH                       1  /* GP12_EINT */
+#define WM831X_GP11_EINT                        0x0400  /* GP11_EINT */
+#define WM831X_GP11_EINT_MASK                   0x0400  /* GP11_EINT */
+#define WM831X_GP11_EINT_SHIFT                      10  /* GP11_EINT */
+#define WM831X_GP11_EINT_WIDTH                       1  /* GP11_EINT */
+#define WM831X_GP10_EINT                        0x0200  /* GP10_EINT */
+#define WM831X_GP10_EINT_MASK                   0x0200  /* GP10_EINT */
+#define WM831X_GP10_EINT_SHIFT                       9  /* GP10_EINT */
+#define WM831X_GP10_EINT_WIDTH                       1  /* GP10_EINT */
+#define WM831X_GP9_EINT                         0x0100  /* GP9_EINT */
+#define WM831X_GP9_EINT_MASK                    0x0100  /* GP9_EINT */
+#define WM831X_GP9_EINT_SHIFT                        8  /* GP9_EINT */
+#define WM831X_GP9_EINT_WIDTH                        1  /* GP9_EINT */
+#define WM831X_GP8_EINT                         0x0080  /* GP8_EINT */
+#define WM831X_GP8_EINT_MASK                    0x0080  /* GP8_EINT */
+#define WM831X_GP8_EINT_SHIFT                        7  /* GP8_EINT */
+#define WM831X_GP8_EINT_WIDTH                        1  /* GP8_EINT */
+#define WM831X_GP7_EINT                         0x0040  /* GP7_EINT */
+#define WM831X_GP7_EINT_MASK                    0x0040  /* GP7_EINT */
+#define WM831X_GP7_EINT_SHIFT                        6  /* GP7_EINT */
+#define WM831X_GP7_EINT_WIDTH                        1  /* GP7_EINT */
+#define WM831X_GP6_EINT                         0x0020  /* GP6_EINT */
+#define WM831X_GP6_EINT_MASK                    0x0020  /* GP6_EINT */
+#define WM831X_GP6_EINT_SHIFT                        5  /* GP6_EINT */
+#define WM831X_GP6_EINT_WIDTH                        1  /* GP6_EINT */
+#define WM831X_GP5_EINT                         0x0010  /* GP5_EINT */
+#define WM831X_GP5_EINT_MASK                    0x0010  /* GP5_EINT */
+#define WM831X_GP5_EINT_SHIFT                        4  /* GP5_EINT */
+#define WM831X_GP5_EINT_WIDTH                        1  /* GP5_EINT */
+#define WM831X_GP4_EINT                         0x0008  /* GP4_EINT */
+#define WM831X_GP4_EINT_MASK                    0x0008  /* GP4_EINT */
+#define WM831X_GP4_EINT_SHIFT                        3  /* GP4_EINT */
+#define WM831X_GP4_EINT_WIDTH                        1  /* GP4_EINT */
+#define WM831X_GP3_EINT                         0x0004  /* GP3_EINT */
+#define WM831X_GP3_EINT_MASK                    0x0004  /* GP3_EINT */
+#define WM831X_GP3_EINT_SHIFT                        2  /* GP3_EINT */
+#define WM831X_GP3_EINT_WIDTH                        1  /* GP3_EINT */
+#define WM831X_GP2_EINT                         0x0002  /* GP2_EINT */
+#define WM831X_GP2_EINT_MASK                    0x0002  /* GP2_EINT */
+#define WM831X_GP2_EINT_SHIFT                        1  /* GP2_EINT */
+#define WM831X_GP2_EINT_WIDTH                        1  /* GP2_EINT */
+#define WM831X_GP1_EINT                         0x0001  /* GP1_EINT */
+#define WM831X_GP1_EINT_MASK                    0x0001  /* GP1_EINT */
+#define WM831X_GP1_EINT_SHIFT                        0  /* GP1_EINT */
+#define WM831X_GP1_EINT_WIDTH                        1  /* GP1_EINT */
+
+/*
+ * R16407 (0x4017) - IRQ Config
+ */
+#define WM831X_IRQ_OD                           0x0002  /* IRQ_OD */
+#define WM831X_IRQ_OD_MASK                      0x0002  /* IRQ_OD */
+#define WM831X_IRQ_OD_SHIFT                          1  /* IRQ_OD */
+#define WM831X_IRQ_OD_WIDTH                          1  /* IRQ_OD */
+#define WM831X_IM_IRQ                           0x0001  /* IM_IRQ */
+#define WM831X_IM_IRQ_MASK                      0x0001  /* IM_IRQ */
+#define WM831X_IM_IRQ_SHIFT                          0  /* IM_IRQ */
+#define WM831X_IM_IRQ_WIDTH                          1  /* IM_IRQ */
+
+/*
+ * R16408 (0x4018) - System Interrupts Mask
+ */
+#define WM831X_IM_PS_INT                        0x8000  /* IM_PS_INT */
+#define WM831X_IM_PS_INT_MASK                   0x8000  /* IM_PS_INT */
+#define WM831X_IM_PS_INT_SHIFT                      15  /* IM_PS_INT */
+#define WM831X_IM_PS_INT_WIDTH                       1  /* IM_PS_INT */
+#define WM831X_IM_TEMP_INT                      0x4000  /* IM_TEMP_INT */
+#define WM831X_IM_TEMP_INT_MASK                 0x4000  /* IM_TEMP_INT */
+#define WM831X_IM_TEMP_INT_SHIFT                    14  /* IM_TEMP_INT */
+#define WM831X_IM_TEMP_INT_WIDTH                     1  /* IM_TEMP_INT */
+#define WM831X_IM_GP_INT                        0x2000  /* IM_GP_INT */
+#define WM831X_IM_GP_INT_MASK                   0x2000  /* IM_GP_INT */
+#define WM831X_IM_GP_INT_SHIFT                      13  /* IM_GP_INT */
+#define WM831X_IM_GP_INT_WIDTH                       1  /* IM_GP_INT */
+#define WM831X_IM_ON_PIN_INT                    0x1000  /* IM_ON_PIN_INT */
+#define WM831X_IM_ON_PIN_INT_MASK               0x1000  /* IM_ON_PIN_INT */
+#define WM831X_IM_ON_PIN_INT_SHIFT                  12  /* IM_ON_PIN_INT */
+#define WM831X_IM_ON_PIN_INT_WIDTH                   1  /* IM_ON_PIN_INT */
+#define WM831X_IM_WDOG_INT                      0x0800  /* IM_WDOG_INT */
+#define WM831X_IM_WDOG_INT_MASK                 0x0800  /* IM_WDOG_INT */
+#define WM831X_IM_WDOG_INT_SHIFT                    11  /* IM_WDOG_INT */
+#define WM831X_IM_WDOG_INT_WIDTH                     1  /* IM_WDOG_INT */
+#define WM831X_IM_TCHDATA_INT                   0x0400  /* IM_TCHDATA_INT */
+#define WM831X_IM_TCHDATA_INT_MASK              0x0400  /* IM_TCHDATA_INT */
+#define WM831X_IM_TCHDATA_INT_SHIFT                 10  /* IM_TCHDATA_INT */
+#define WM831X_IM_TCHDATA_INT_WIDTH                  1  /* IM_TCHDATA_INT */
+#define WM831X_IM_TCHPD_INT                     0x0200  /* IM_TCHPD_INT */
+#define WM831X_IM_TCHPD_INT_MASK                0x0200  /* IM_TCHPD_INT */
+#define WM831X_IM_TCHPD_INT_SHIFT                    9  /* IM_TCHPD_INT */
+#define WM831X_IM_TCHPD_INT_WIDTH                    1  /* IM_TCHPD_INT */
+#define WM831X_IM_AUXADC_INT                    0x0100  /* IM_AUXADC_INT */
+#define WM831X_IM_AUXADC_INT_MASK               0x0100  /* IM_AUXADC_INT */
+#define WM831X_IM_AUXADC_INT_SHIFT                   8  /* IM_AUXADC_INT */
+#define WM831X_IM_AUXADC_INT_WIDTH                   1  /* IM_AUXADC_INT */
+#define WM831X_IM_PPM_INT                       0x0080  /* IM_PPM_INT */
+#define WM831X_IM_PPM_INT_MASK                  0x0080  /* IM_PPM_INT */
+#define WM831X_IM_PPM_INT_SHIFT                      7  /* IM_PPM_INT */
+#define WM831X_IM_PPM_INT_WIDTH                      1  /* IM_PPM_INT */
+#define WM831X_IM_CS_INT                        0x0040  /* IM_CS_INT */
+#define WM831X_IM_CS_INT_MASK                   0x0040  /* IM_CS_INT */
+#define WM831X_IM_CS_INT_SHIFT                       6  /* IM_CS_INT */
+#define WM831X_IM_CS_INT_WIDTH                       1  /* IM_CS_INT */
+#define WM831X_IM_RTC_INT                       0x0020  /* IM_RTC_INT */
+#define WM831X_IM_RTC_INT_MASK                  0x0020  /* IM_RTC_INT */
+#define WM831X_IM_RTC_INT_SHIFT                      5  /* IM_RTC_INT */
+#define WM831X_IM_RTC_INT_WIDTH                      1  /* IM_RTC_INT */
+#define WM831X_IM_OTP_INT                       0x0010  /* IM_OTP_INT */
+#define WM831X_IM_OTP_INT_MASK                  0x0010  /* IM_OTP_INT */
+#define WM831X_IM_OTP_INT_SHIFT                      4  /* IM_OTP_INT */
+#define WM831X_IM_OTP_INT_WIDTH                      1  /* IM_OTP_INT */
+#define WM831X_IM_CHILD_INT                     0x0008  /* IM_CHILD_INT */
+#define WM831X_IM_CHILD_INT_MASK                0x0008  /* IM_CHILD_INT */
+#define WM831X_IM_CHILD_INT_SHIFT                    3  /* IM_CHILD_INT */
+#define WM831X_IM_CHILD_INT_WIDTH                    1  /* IM_CHILD_INT */
+#define WM831X_IM_CHG_INT                       0x0004  /* IM_CHG_INT */
+#define WM831X_IM_CHG_INT_MASK                  0x0004  /* IM_CHG_INT */
+#define WM831X_IM_CHG_INT_SHIFT                      2  /* IM_CHG_INT */
+#define WM831X_IM_CHG_INT_WIDTH                      1  /* IM_CHG_INT */
+#define WM831X_IM_HC_INT                        0x0002  /* IM_HC_INT */
+#define WM831X_IM_HC_INT_MASK                   0x0002  /* IM_HC_INT */
+#define WM831X_IM_HC_INT_SHIFT                       1  /* IM_HC_INT */
+#define WM831X_IM_HC_INT_WIDTH                       1  /* IM_HC_INT */
+#define WM831X_IM_UV_INT                        0x0001  /* IM_UV_INT */
+#define WM831X_IM_UV_INT_MASK                   0x0001  /* IM_UV_INT */
+#define WM831X_IM_UV_INT_SHIFT                       0  /* IM_UV_INT */
+#define WM831X_IM_UV_INT_WIDTH                       1  /* IM_UV_INT */
+
+/*
+ * R16409 (0x4019) - Interrupt Status 1 Mask
+ */
+#define WM831X_IM_PPM_SYSLO_EINT                0x8000  /* IM_PPM_SYSLO_EINT */
+#define WM831X_IM_PPM_SYSLO_EINT_MASK           0x8000  /* IM_PPM_SYSLO_EINT */
+#define WM831X_IM_PPM_SYSLO_EINT_SHIFT              15  /* IM_PPM_SYSLO_EINT */
+#define WM831X_IM_PPM_SYSLO_EINT_WIDTH               1  /* IM_PPM_SYSLO_EINT */
+#define WM831X_IM_PPM_PWR_SRC_EINT              0x4000  /* IM_PPM_PWR_SRC_EINT */
+#define WM831X_IM_PPM_PWR_SRC_EINT_MASK         0x4000  /* IM_PPM_PWR_SRC_EINT */
+#define WM831X_IM_PPM_PWR_SRC_EINT_SHIFT            14  /* IM_PPM_PWR_SRC_EINT */
+#define WM831X_IM_PPM_PWR_SRC_EINT_WIDTH             1  /* IM_PPM_PWR_SRC_EINT */
+#define WM831X_IM_PPM_USB_CURR_EINT             0x2000  /* IM_PPM_USB_CURR_EINT */
+#define WM831X_IM_PPM_USB_CURR_EINT_MASK        0x2000  /* IM_PPM_USB_CURR_EINT */
+#define WM831X_IM_PPM_USB_CURR_EINT_SHIFT           13  /* IM_PPM_USB_CURR_EINT */
+#define WM831X_IM_PPM_USB_CURR_EINT_WIDTH            1  /* IM_PPM_USB_CURR_EINT */
+#define WM831X_IM_ON_PIN_EINT                   0x1000  /* IM_ON_PIN_EINT */
+#define WM831X_IM_ON_PIN_EINT_MASK              0x1000  /* IM_ON_PIN_EINT */
+#define WM831X_IM_ON_PIN_EINT_SHIFT                 12  /* IM_ON_PIN_EINT */
+#define WM831X_IM_ON_PIN_EINT_WIDTH                  1  /* IM_ON_PIN_EINT */
+#define WM831X_IM_WDOG_TO_EINT                  0x0800  /* IM_WDOG_TO_EINT */
+#define WM831X_IM_WDOG_TO_EINT_MASK             0x0800  /* IM_WDOG_TO_EINT */
+#define WM831X_IM_WDOG_TO_EINT_SHIFT                11  /* IM_WDOG_TO_EINT */
+#define WM831X_IM_WDOG_TO_EINT_WIDTH                 1  /* IM_WDOG_TO_EINT */
+#define WM831X_IM_TCHDATA_EINT                  0x0400  /* IM_TCHDATA_EINT */
+#define WM831X_IM_TCHDATA_EINT_MASK             0x0400  /* IM_TCHDATA_EINT */
+#define WM831X_IM_TCHDATA_EINT_SHIFT                10  /* IM_TCHDATA_EINT */
+#define WM831X_IM_TCHDATA_EINT_WIDTH                 1  /* IM_TCHDATA_EINT */
+#define WM831X_IM_TCHPD_EINT                    0x0200  /* IM_TCHPD_EINT */
+#define WM831X_IM_TCHPD_EINT_MASK               0x0200  /* IM_TCHPD_EINT */
+#define WM831X_IM_TCHPD_EINT_SHIFT                   9  /* IM_TCHPD_EINT */
+#define WM831X_IM_TCHPD_EINT_WIDTH                   1  /* IM_TCHPD_EINT */
+#define WM831X_IM_AUXADC_DATA_EINT              0x0100  /* IM_AUXADC_DATA_EINT */
+#define WM831X_IM_AUXADC_DATA_EINT_MASK         0x0100  /* IM_AUXADC_DATA_EINT */
+#define WM831X_IM_AUXADC_DATA_EINT_SHIFT             8  /* IM_AUXADC_DATA_EINT */
+#define WM831X_IM_AUXADC_DATA_EINT_WIDTH             1  /* IM_AUXADC_DATA_EINT */
+#define WM831X_IM_AUXADC_DCOMP4_EINT            0x0080  /* IM_AUXADC_DCOMP4_EINT */
+#define WM831X_IM_AUXADC_DCOMP4_EINT_MASK       0x0080  /* IM_AUXADC_DCOMP4_EINT */
+#define WM831X_IM_AUXADC_DCOMP4_EINT_SHIFT           7  /* IM_AUXADC_DCOMP4_EINT */
+#define WM831X_IM_AUXADC_DCOMP4_EINT_WIDTH           1  /* IM_AUXADC_DCOMP4_EINT */
+#define WM831X_IM_AUXADC_DCOMP3_EINT            0x0040  /* IM_AUXADC_DCOMP3_EINT */
+#define WM831X_IM_AUXADC_DCOMP3_EINT_MASK       0x0040  /* IM_AUXADC_DCOMP3_EINT */
+#define WM831X_IM_AUXADC_DCOMP3_EINT_SHIFT           6  /* IM_AUXADC_DCOMP3_EINT */
+#define WM831X_IM_AUXADC_DCOMP3_EINT_WIDTH           1  /* IM_AUXADC_DCOMP3_EINT */
+#define WM831X_IM_AUXADC_DCOMP2_EINT            0x0020  /* IM_AUXADC_DCOMP2_EINT */
+#define WM831X_IM_AUXADC_DCOMP2_EINT_MASK       0x0020  /* IM_AUXADC_DCOMP2_EINT */
+#define WM831X_IM_AUXADC_DCOMP2_EINT_SHIFT           5  /* IM_AUXADC_DCOMP2_EINT */
+#define WM831X_IM_AUXADC_DCOMP2_EINT_WIDTH           1  /* IM_AUXADC_DCOMP2_EINT */
+#define WM831X_IM_AUXADC_DCOMP1_EINT            0x0010  /* IM_AUXADC_DCOMP1_EINT */
+#define WM831X_IM_AUXADC_DCOMP1_EINT_MASK       0x0010  /* IM_AUXADC_DCOMP1_EINT */
+#define WM831X_IM_AUXADC_DCOMP1_EINT_SHIFT           4  /* IM_AUXADC_DCOMP1_EINT */
+#define WM831X_IM_AUXADC_DCOMP1_EINT_WIDTH           1  /* IM_AUXADC_DCOMP1_EINT */
+#define WM831X_IM_RTC_PER_EINT                  0x0008  /* IM_RTC_PER_EINT */
+#define WM831X_IM_RTC_PER_EINT_MASK             0x0008  /* IM_RTC_PER_EINT */
+#define WM831X_IM_RTC_PER_EINT_SHIFT                 3  /* IM_RTC_PER_EINT */
+#define WM831X_IM_RTC_PER_EINT_WIDTH                 1  /* IM_RTC_PER_EINT */
+#define WM831X_IM_RTC_ALM_EINT                  0x0004  /* IM_RTC_ALM_EINT */
+#define WM831X_IM_RTC_ALM_EINT_MASK             0x0004  /* IM_RTC_ALM_EINT */
+#define WM831X_IM_RTC_ALM_EINT_SHIFT                 2  /* IM_RTC_ALM_EINT */
+#define WM831X_IM_RTC_ALM_EINT_WIDTH                 1  /* IM_RTC_ALM_EINT */
+#define WM831X_IM_TEMP_THW_EINT                 0x0002  /* IM_TEMP_THW_EINT */
+#define WM831X_IM_TEMP_THW_EINT_MASK            0x0002  /* IM_TEMP_THW_EINT */
+#define WM831X_IM_TEMP_THW_EINT_SHIFT                1  /* IM_TEMP_THW_EINT */
+#define WM831X_IM_TEMP_THW_EINT_WIDTH                1  /* IM_TEMP_THW_EINT */
+
+/*
+ * R16410 (0x401A) - Interrupt Status 2 Mask
+ */
+#define WM831X_IM_CHG_BATT_HOT_EINT             0x8000  /* IM_CHG_BATT_HOT_EINT */
+#define WM831X_IM_CHG_BATT_HOT_EINT_MASK        0x8000  /* IM_CHG_BATT_HOT_EINT */
+#define WM831X_IM_CHG_BATT_HOT_EINT_SHIFT           15  /* IM_CHG_BATT_HOT_EINT */
+#define WM831X_IM_CHG_BATT_HOT_EINT_WIDTH            1  /* IM_CHG_BATT_HOT_EINT */
+#define WM831X_IM_CHG_BATT_COLD_EINT            0x4000  /* IM_CHG_BATT_COLD_EINT */
+#define WM831X_IM_CHG_BATT_COLD_EINT_MASK       0x4000  /* IM_CHG_BATT_COLD_EINT */
+#define WM831X_IM_CHG_BATT_COLD_EINT_SHIFT          14  /* IM_CHG_BATT_COLD_EINT */
+#define WM831X_IM_CHG_BATT_COLD_EINT_WIDTH           1  /* IM_CHG_BATT_COLD_EINT */
+#define WM831X_IM_CHG_BATT_FAIL_EINT            0x2000  /* IM_CHG_BATT_FAIL_EINT */
+#define WM831X_IM_CHG_BATT_FAIL_EINT_MASK       0x2000  /* IM_CHG_BATT_FAIL_EINT */
+#define WM831X_IM_CHG_BATT_FAIL_EINT_SHIFT          13  /* IM_CHG_BATT_FAIL_EINT */
+#define WM831X_IM_CHG_BATT_FAIL_EINT_WIDTH           1  /* IM_CHG_BATT_FAIL_EINT */
+#define WM831X_IM_CHG_OV_EINT                   0x1000  /* IM_CHG_OV_EINT */
+#define WM831X_IM_CHG_OV_EINT_MASK              0x1000  /* IM_CHG_OV_EINT */
+#define WM831X_IM_CHG_OV_EINT_SHIFT                 12  /* IM_CHG_OV_EINT */
+#define WM831X_IM_CHG_OV_EINT_WIDTH                  1  /* IM_CHG_OV_EINT */
+#define WM831X_IM_CHG_END_EINT                  0x0800  /* IM_CHG_END_EINT */
+#define WM831X_IM_CHG_END_EINT_MASK             0x0800  /* IM_CHG_END_EINT */
+#define WM831X_IM_CHG_END_EINT_SHIFT                11  /* IM_CHG_END_EINT */
+#define WM831X_IM_CHG_END_EINT_WIDTH                 1  /* IM_CHG_END_EINT */
+#define WM831X_IM_CHG_TO_EINT                   0x0400  /* IM_CHG_TO_EINT */
+#define WM831X_IM_CHG_TO_EINT_MASK              0x0400  /* IM_CHG_TO_EINT */
+#define WM831X_IM_CHG_TO_EINT_SHIFT                 10  /* IM_CHG_TO_EINT */
+#define WM831X_IM_CHG_TO_EINT_WIDTH                  1  /* IM_CHG_TO_EINT */
+#define WM831X_IM_CHG_MODE_EINT                 0x0200  /* IM_CHG_MODE_EINT */
+#define WM831X_IM_CHG_MODE_EINT_MASK            0x0200  /* IM_CHG_MODE_EINT */
+#define WM831X_IM_CHG_MODE_EINT_SHIFT                9  /* IM_CHG_MODE_EINT */
+#define WM831X_IM_CHG_MODE_EINT_WIDTH                1  /* IM_CHG_MODE_EINT */
+#define WM831X_IM_CHG_START_EINT                0x0100  /* IM_CHG_START_EINT */
+#define WM831X_IM_CHG_START_EINT_MASK           0x0100  /* IM_CHG_START_EINT */
+#define WM831X_IM_CHG_START_EINT_SHIFT               8  /* IM_CHG_START_EINT */
+#define WM831X_IM_CHG_START_EINT_WIDTH               1  /* IM_CHG_START_EINT */
+#define WM831X_IM_CS2_EINT                      0x0080  /* IM_CS2_EINT */
+#define WM831X_IM_CS2_EINT_MASK                 0x0080  /* IM_CS2_EINT */
+#define WM831X_IM_CS2_EINT_SHIFT                     7  /* IM_CS2_EINT */
+#define WM831X_IM_CS2_EINT_WIDTH                     1  /* IM_CS2_EINT */
+#define WM831X_IM_CS1_EINT                      0x0040  /* IM_CS1_EINT */
+#define WM831X_IM_CS1_EINT_MASK                 0x0040  /* IM_CS1_EINT */
+#define WM831X_IM_CS1_EINT_SHIFT                     6  /* IM_CS1_EINT */
+#define WM831X_IM_CS1_EINT_WIDTH                     1  /* IM_CS1_EINT */
+#define WM831X_IM_OTP_CMD_END_EINT              0x0020  /* IM_OTP_CMD_END_EINT */
+#define WM831X_IM_OTP_CMD_END_EINT_MASK         0x0020  /* IM_OTP_CMD_END_EINT */
+#define WM831X_IM_OTP_CMD_END_EINT_SHIFT             5  /* IM_OTP_CMD_END_EINT */
+#define WM831X_IM_OTP_CMD_END_EINT_WIDTH             1  /* IM_OTP_CMD_END_EINT */
+#define WM831X_IM_OTP_ERR_EINT                  0x0010  /* IM_OTP_ERR_EINT */
+#define WM831X_IM_OTP_ERR_EINT_MASK             0x0010  /* IM_OTP_ERR_EINT */
+#define WM831X_IM_OTP_ERR_EINT_SHIFT                 4  /* IM_OTP_ERR_EINT */
+#define WM831X_IM_OTP_ERR_EINT_WIDTH                 1  /* IM_OTP_ERR_EINT */
+#define WM831X_IM_PS_POR_EINT                   0x0004  /* IM_PS_POR_EINT */
+#define WM831X_IM_PS_POR_EINT_MASK              0x0004  /* IM_PS_POR_EINT */
+#define WM831X_IM_PS_POR_EINT_SHIFT                  2  /* IM_PS_POR_EINT */
+#define WM831X_IM_PS_POR_EINT_WIDTH                  1  /* IM_PS_POR_EINT */
+#define WM831X_IM_PS_SLEEP_OFF_EINT             0x0002  /* IM_PS_SLEEP_OFF_EINT */
+#define WM831X_IM_PS_SLEEP_OFF_EINT_MASK        0x0002  /* IM_PS_SLEEP_OFF_EINT */
+#define WM831X_IM_PS_SLEEP_OFF_EINT_SHIFT            1  /* IM_PS_SLEEP_OFF_EINT */
+#define WM831X_IM_PS_SLEEP_OFF_EINT_WIDTH            1  /* IM_PS_SLEEP_OFF_EINT */
+#define WM831X_IM_PS_ON_WAKE_EINT               0x0001  /* IM_PS_ON_WAKE_EINT */
+#define WM831X_IM_PS_ON_WAKE_EINT_MASK          0x0001  /* IM_PS_ON_WAKE_EINT */
+#define WM831X_IM_PS_ON_WAKE_EINT_SHIFT              0  /* IM_PS_ON_WAKE_EINT */
+#define WM831X_IM_PS_ON_WAKE_EINT_WIDTH              1  /* IM_PS_ON_WAKE_EINT */
+
+/*
+ * R16411 (0x401B) - Interrupt Status 3 Mask
+ */
+#define WM831X_IM_UV_LDO10_EINT                 0x0200  /* IM_UV_LDO10_EINT */
+#define WM831X_IM_UV_LDO10_EINT_MASK            0x0200  /* IM_UV_LDO10_EINT */
+#define WM831X_IM_UV_LDO10_EINT_SHIFT                9  /* IM_UV_LDO10_EINT */
+#define WM831X_IM_UV_LDO10_EINT_WIDTH                1  /* IM_UV_LDO10_EINT */
+#define WM831X_IM_UV_LDO9_EINT                  0x0100  /* IM_UV_LDO9_EINT */
+#define WM831X_IM_UV_LDO9_EINT_MASK             0x0100  /* IM_UV_LDO9_EINT */
+#define WM831X_IM_UV_LDO9_EINT_SHIFT                 8  /* IM_UV_LDO9_EINT */
+#define WM831X_IM_UV_LDO9_EINT_WIDTH                 1  /* IM_UV_LDO9_EINT */
+#define WM831X_IM_UV_LDO8_EINT                  0x0080  /* IM_UV_LDO8_EINT */
+#define WM831X_IM_UV_LDO8_EINT_MASK             0x0080  /* IM_UV_LDO8_EINT */
+#define WM831X_IM_UV_LDO8_EINT_SHIFT                 7  /* IM_UV_LDO8_EINT */
+#define WM831X_IM_UV_LDO8_EINT_WIDTH                 1  /* IM_UV_LDO8_EINT */
+#define WM831X_IM_UV_LDO7_EINT                  0x0040  /* IM_UV_LDO7_EINT */
+#define WM831X_IM_UV_LDO7_EINT_MASK             0x0040  /* IM_UV_LDO7_EINT */
+#define WM831X_IM_UV_LDO7_EINT_SHIFT                 6  /* IM_UV_LDO7_EINT */
+#define WM831X_IM_UV_LDO7_EINT_WIDTH                 1  /* IM_UV_LDO7_EINT */
+#define WM831X_IM_UV_LDO6_EINT                  0x0020  /* IM_UV_LDO6_EINT */
+#define WM831X_IM_UV_LDO6_EINT_MASK             0x0020  /* IM_UV_LDO6_EINT */
+#define WM831X_IM_UV_LDO6_EINT_SHIFT                 5  /* IM_UV_LDO6_EINT */
+#define WM831X_IM_UV_LDO6_EINT_WIDTH                 1  /* IM_UV_LDO6_EINT */
+#define WM831X_IM_UV_LDO5_EINT                  0x0010  /* IM_UV_LDO5_EINT */
+#define WM831X_IM_UV_LDO5_EINT_MASK             0x0010  /* IM_UV_LDO5_EINT */
+#define WM831X_IM_UV_LDO5_EINT_SHIFT                 4  /* IM_UV_LDO5_EINT */
+#define WM831X_IM_UV_LDO5_EINT_WIDTH                 1  /* IM_UV_LDO5_EINT */
+#define WM831X_IM_UV_LDO4_EINT                  0x0008  /* IM_UV_LDO4_EINT */
+#define WM831X_IM_UV_LDO4_EINT_MASK             0x0008  /* IM_UV_LDO4_EINT */
+#define WM831X_IM_UV_LDO4_EINT_SHIFT                 3  /* IM_UV_LDO4_EINT */
+#define WM831X_IM_UV_LDO4_EINT_WIDTH                 1  /* IM_UV_LDO4_EINT */
+#define WM831X_IM_UV_LDO3_EINT                  0x0004  /* IM_UV_LDO3_EINT */
+#define WM831X_IM_UV_LDO3_EINT_MASK             0x0004  /* IM_UV_LDO3_EINT */
+#define WM831X_IM_UV_LDO3_EINT_SHIFT                 2  /* IM_UV_LDO3_EINT */
+#define WM831X_IM_UV_LDO3_EINT_WIDTH                 1  /* IM_UV_LDO3_EINT */
+#define WM831X_IM_UV_LDO2_EINT                  0x0002  /* IM_UV_LDO2_EINT */
+#define WM831X_IM_UV_LDO2_EINT_MASK             0x0002  /* IM_UV_LDO2_EINT */
+#define WM831X_IM_UV_LDO2_EINT_SHIFT                 1  /* IM_UV_LDO2_EINT */
+#define WM831X_IM_UV_LDO2_EINT_WIDTH                 1  /* IM_UV_LDO2_EINT */
+#define WM831X_IM_UV_LDO1_EINT                  0x0001  /* IM_UV_LDO1_EINT */
+#define WM831X_IM_UV_LDO1_EINT_MASK             0x0001  /* IM_UV_LDO1_EINT */
+#define WM831X_IM_UV_LDO1_EINT_SHIFT                 0  /* IM_UV_LDO1_EINT */
+#define WM831X_IM_UV_LDO1_EINT_WIDTH                 1  /* IM_UV_LDO1_EINT */
+
+/*
+ * R16412 (0x401C) - Interrupt Status 4 Mask
+ */
+#define WM831X_IM_HC_DC2_EINT                   0x0200  /* IM_HC_DC2_EINT */
+#define WM831X_IM_HC_DC2_EINT_MASK              0x0200  /* IM_HC_DC2_EINT */
+#define WM831X_IM_HC_DC2_EINT_SHIFT                  9  /* IM_HC_DC2_EINT */
+#define WM831X_IM_HC_DC2_EINT_WIDTH                  1  /* IM_HC_DC2_EINT */
+#define WM831X_IM_HC_DC1_EINT                   0x0100  /* IM_HC_DC1_EINT */
+#define WM831X_IM_HC_DC1_EINT_MASK              0x0100  /* IM_HC_DC1_EINT */
+#define WM831X_IM_HC_DC1_EINT_SHIFT                  8  /* IM_HC_DC1_EINT */
+#define WM831X_IM_HC_DC1_EINT_WIDTH                  1  /* IM_HC_DC1_EINT */
+#define WM831X_IM_UV_DC4_EINT                   0x0008  /* IM_UV_DC4_EINT */
+#define WM831X_IM_UV_DC4_EINT_MASK              0x0008  /* IM_UV_DC4_EINT */
+#define WM831X_IM_UV_DC4_EINT_SHIFT                  3  /* IM_UV_DC4_EINT */
+#define WM831X_IM_UV_DC4_EINT_WIDTH                  1  /* IM_UV_DC4_EINT */
+#define WM831X_IM_UV_DC3_EINT                   0x0004  /* IM_UV_DC3_EINT */
+#define WM831X_IM_UV_DC3_EINT_MASK              0x0004  /* IM_UV_DC3_EINT */
+#define WM831X_IM_UV_DC3_EINT_SHIFT                  2  /* IM_UV_DC3_EINT */
+#define WM831X_IM_UV_DC3_EINT_WIDTH                  1  /* IM_UV_DC3_EINT */
+#define WM831X_IM_UV_DC2_EINT                   0x0002  /* IM_UV_DC2_EINT */
+#define WM831X_IM_UV_DC2_EINT_MASK              0x0002  /* IM_UV_DC2_EINT */
+#define WM831X_IM_UV_DC2_EINT_SHIFT                  1  /* IM_UV_DC2_EINT */
+#define WM831X_IM_UV_DC2_EINT_WIDTH                  1  /* IM_UV_DC2_EINT */
+#define WM831X_IM_UV_DC1_EINT                   0x0001  /* IM_UV_DC1_EINT */
+#define WM831X_IM_UV_DC1_EINT_MASK              0x0001  /* IM_UV_DC1_EINT */
+#define WM831X_IM_UV_DC1_EINT_SHIFT                  0  /* IM_UV_DC1_EINT */
+#define WM831X_IM_UV_DC1_EINT_WIDTH                  1  /* IM_UV_DC1_EINT */
+
+/*
+ * R16413 (0x401D) - Interrupt Status 5 Mask
+ */
+#define WM831X_IM_GP16_EINT                     0x8000  /* IM_GP16_EINT */
+#define WM831X_IM_GP16_EINT_MASK                0x8000  /* IM_GP16_EINT */
+#define WM831X_IM_GP16_EINT_SHIFT                   15  /* IM_GP16_EINT */
+#define WM831X_IM_GP16_EINT_WIDTH                    1  /* IM_GP16_EINT */
+#define WM831X_IM_GP15_EINT                     0x4000  /* IM_GP15_EINT */
+#define WM831X_IM_GP15_EINT_MASK                0x4000  /* IM_GP15_EINT */
+#define WM831X_IM_GP15_EINT_SHIFT                   14  /* IM_GP15_EINT */
+#define WM831X_IM_GP15_EINT_WIDTH                    1  /* IM_GP15_EINT */
+#define WM831X_IM_GP14_EINT                     0x2000  /* IM_GP14_EINT */
+#define WM831X_IM_GP14_EINT_MASK                0x2000  /* IM_GP14_EINT */
+#define WM831X_IM_GP14_EINT_SHIFT                   13  /* IM_GP14_EINT */
+#define WM831X_IM_GP14_EINT_WIDTH                    1  /* IM_GP14_EINT */
+#define WM831X_IM_GP13_EINT                     0x1000  /* IM_GP13_EINT */
+#define WM831X_IM_GP13_EINT_MASK                0x1000  /* IM_GP13_EINT */
+#define WM831X_IM_GP13_EINT_SHIFT                   12  /* IM_GP13_EINT */
+#define WM831X_IM_GP13_EINT_WIDTH                    1  /* IM_GP13_EINT */
+#define WM831X_IM_GP12_EINT                     0x0800  /* IM_GP12_EINT */
+#define WM831X_IM_GP12_EINT_MASK                0x0800  /* IM_GP12_EINT */
+#define WM831X_IM_GP12_EINT_SHIFT                   11  /* IM_GP12_EINT */
+#define WM831X_IM_GP12_EINT_WIDTH                    1  /* IM_GP12_EINT */
+#define WM831X_IM_GP11_EINT                     0x0400  /* IM_GP11_EINT */
+#define WM831X_IM_GP11_EINT_MASK                0x0400  /* IM_GP11_EINT */
+#define WM831X_IM_GP11_EINT_SHIFT                   10  /* IM_GP11_EINT */
+#define WM831X_IM_GP11_EINT_WIDTH                    1  /* IM_GP11_EINT */
+#define WM831X_IM_GP10_EINT                     0x0200  /* IM_GP10_EINT */
+#define WM831X_IM_GP10_EINT_MASK                0x0200  /* IM_GP10_EINT */
+#define WM831X_IM_GP10_EINT_SHIFT                    9  /* IM_GP10_EINT */
+#define WM831X_IM_GP10_EINT_WIDTH                    1  /* IM_GP10_EINT */
+#define WM831X_IM_GP9_EINT                      0x0100  /* IM_GP9_EINT */
+#define WM831X_IM_GP9_EINT_MASK                 0x0100  /* IM_GP9_EINT */
+#define WM831X_IM_GP9_EINT_SHIFT                     8  /* IM_GP9_EINT */
+#define WM831X_IM_GP9_EINT_WIDTH                     1  /* IM_GP9_EINT */
+#define WM831X_IM_GP8_EINT                      0x0080  /* IM_GP8_EINT */
+#define WM831X_IM_GP8_EINT_MASK                 0x0080  /* IM_GP8_EINT */
+#define WM831X_IM_GP8_EINT_SHIFT                     7  /* IM_GP8_EINT */
+#define WM831X_IM_GP8_EINT_WIDTH                     1  /* IM_GP8_EINT */
+#define WM831X_IM_GP7_EINT                      0x0040  /* IM_GP7_EINT */
+#define WM831X_IM_GP7_EINT_MASK                 0x0040  /* IM_GP7_EINT */
+#define WM831X_IM_GP7_EINT_SHIFT                     6  /* IM_GP7_EINT */
+#define WM831X_IM_GP7_EINT_WIDTH                     1  /* IM_GP7_EINT */
+#define WM831X_IM_GP6_EINT                      0x0020  /* IM_GP6_EINT */
+#define WM831X_IM_GP6_EINT_MASK                 0x0020  /* IM_GP6_EINT */
+#define WM831X_IM_GP6_EINT_SHIFT                     5  /* IM_GP6_EINT */
+#define WM831X_IM_GP6_EINT_WIDTH                     1  /* IM_GP6_EINT */
+#define WM831X_IM_GP5_EINT                      0x0010  /* IM_GP5_EINT */
+#define WM831X_IM_GP5_EINT_MASK                 0x0010  /* IM_GP5_EINT */
+#define WM831X_IM_GP5_EINT_SHIFT                     4  /* IM_GP5_EINT */
+#define WM831X_IM_GP5_EINT_WIDTH                     1  /* IM_GP5_EINT */
+#define WM831X_IM_GP4_EINT                      0x0008  /* IM_GP4_EINT */
+#define WM831X_IM_GP4_EINT_MASK                 0x0008  /* IM_GP4_EINT */
+#define WM831X_IM_GP4_EINT_SHIFT                     3  /* IM_GP4_EINT */
+#define WM831X_IM_GP4_EINT_WIDTH                     1  /* IM_GP4_EINT */
+#define WM831X_IM_GP3_EINT                      0x0004  /* IM_GP3_EINT */
+#define WM831X_IM_GP3_EINT_MASK                 0x0004  /* IM_GP3_EINT */
+#define WM831X_IM_GP3_EINT_SHIFT                     2  /* IM_GP3_EINT */
+#define WM831X_IM_GP3_EINT_WIDTH                     1  /* IM_GP3_EINT */
+#define WM831X_IM_GP2_EINT                      0x0002  /* IM_GP2_EINT */
+#define WM831X_IM_GP2_EINT_MASK                 0x0002  /* IM_GP2_EINT */
+#define WM831X_IM_GP2_EINT_SHIFT                     1  /* IM_GP2_EINT */
+#define WM831X_IM_GP2_EINT_WIDTH                     1  /* IM_GP2_EINT */
+#define WM831X_IM_GP1_EINT                      0x0001  /* IM_GP1_EINT */
+#define WM831X_IM_GP1_EINT_MASK                 0x0001  /* IM_GP1_EINT */
+#define WM831X_IM_GP1_EINT_SHIFT                     0  /* IM_GP1_EINT */
+#define WM831X_IM_GP1_EINT_WIDTH                     1  /* IM_GP1_EINT */
+
+
+#endif
-- 
cgit v1.2.3


From 7e9f9fd4b8285c52c0950a1929864346de5caa6d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:54 +0100
Subject: mfd: Add WM831x AUXADC support

The WM831x contains an auxiliary ADC with a number of switchable
inputs which is used to monitor some of the voltages and
temperatures in the system and has some external inputs which can be
used for machine specific purposes. Provide an API allowing drivers
to read values from the ADC.

An internal reference voltage is provided to allow callibration of
the ADC. This is used to calibrate the device at startup.

The hardware also supports continuous readings and digital comparators.
These are not yet supported by the driver.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c         | 101 ++++++++++++++++++
 include/linux/mfd/wm831x/auxadc.h | 216 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/core.h   |   2 +
 3 files changed, 319 insertions(+)
 create mode 100644 include/linux/mfd/wm831x/auxadc.h

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index eb63d22160d1..42bef1dd2ca1 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -15,11 +15,14 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
+#include <linux/bcd.h>
+#include <linux/delay.h>
 #include <linux/mfd/core.h>
 
 #include <linux/mfd/wm831x/core.h>
 #include <linux/mfd/wm831x/pdata.h>
 #include <linux/mfd/wm831x/irq.h>
+#include <linux/mfd/wm831x/auxadc.h>
 
 enum wm831x_parent {
 	WM8310 = 0,
@@ -244,6 +247,103 @@ out:
 }
 EXPORT_SYMBOL_GPL(wm831x_set_bits);
 
+/**
+ * wm831x_auxadc_read: Read a value from the WM831x AUXADC
+ *
+ * @wm831x: Device to read from.
+ * @input: AUXADC input to read.
+ */
+int wm831x_auxadc_read(struct wm831x *wm831x, enum wm831x_auxadc input)
+{
+	int tries = 10;
+	int ret, src;
+
+	mutex_lock(&wm831x->auxadc_lock);
+
+	ret = wm831x_set_bits(wm831x, WM831X_AUXADC_CONTROL,
+			      WM831X_AUX_ENA, WM831X_AUX_ENA);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to enable AUXADC: %d\n", ret);
+		goto out;
+	}
+
+	/* We force a single source at present */
+	src = input;
+	ret = wm831x_reg_write(wm831x, WM831X_AUXADC_SOURCE,
+			       1 << src);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to set AUXADC source: %d\n", ret);
+		goto out;
+	}
+
+	ret = wm831x_set_bits(wm831x, WM831X_AUXADC_CONTROL,
+			      WM831X_AUX_CVT_ENA, WM831X_AUX_CVT_ENA);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to start AUXADC: %d\n", ret);
+		goto disable;
+	}
+
+	do {
+		msleep(1);
+
+		ret = wm831x_reg_read(wm831x, WM831X_AUXADC_CONTROL);
+		if (ret < 0)
+			ret = WM831X_AUX_CVT_ENA;
+	} while ((ret & WM831X_AUX_CVT_ENA) && --tries);
+
+	if (ret & WM831X_AUX_CVT_ENA) {
+		dev_err(wm831x->dev, "Timed out reading AUXADC\n");
+		ret = -EBUSY;
+		goto disable;
+	}
+
+	ret = wm831x_reg_read(wm831x, WM831X_AUXADC_DATA);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read AUXADC data: %d\n", ret);
+	} else {
+		src = ((ret & WM831X_AUX_DATA_SRC_MASK)
+		       >> WM831X_AUX_DATA_SRC_SHIFT) - 1;
+
+		if (src == 14)
+			src = WM831X_AUX_CAL;
+
+		if (src != input) {
+			dev_err(wm831x->dev, "Data from source %d not %d\n",
+				src, input);
+			ret = -EINVAL;
+		} else {
+			ret &= WM831X_AUX_DATA_MASK;
+		}
+	}
+
+disable:
+	wm831x_set_bits(wm831x, WM831X_AUXADC_CONTROL, WM831X_AUX_ENA, 0);
+out:
+	mutex_unlock(&wm831x->auxadc_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_auxadc_read);
+
+/**
+ * wm831x_auxadc_read_uv: Read a voltage from the WM831x AUXADC
+ *
+ * @wm831x: Device to read from.
+ * @input: AUXADC input to read.
+ */
+int wm831x_auxadc_read_uv(struct wm831x *wm831x, enum wm831x_auxadc input)
+{
+	int ret;
+
+	ret = wm831x_auxadc_read(wm831x, input);
+	if (ret < 0)
+		return ret;
+
+	ret *= 1465;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_auxadc_read_uv);
+
 static struct resource wm831x_dcdc1_resources[] = {
 	{
 		.start = WM831X_DC1_CONTROL_1,
@@ -1084,6 +1184,7 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 
 	mutex_init(&wm831x->io_lock);
 	mutex_init(&wm831x->key_lock);
+	mutex_init(&wm831x->auxadc_lock);
 	dev_set_drvdata(wm831x->dev, wm831x);
 
 	ret = wm831x_reg_read(wm831x, WM831X_PARENT_ID);
diff --git a/include/linux/mfd/wm831x/auxadc.h b/include/linux/mfd/wm831x/auxadc.h
new file mode 100644
index 000000000000..b132067e9e99
--- /dev/null
+++ b/include/linux/mfd/wm831x/auxadc.h
@@ -0,0 +1,216 @@
+/*
+ * include/linux/mfd/wm831x/auxadc.h -- Auxiliary ADC interface for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_AUXADC_H__
+#define __MFD_WM831X_AUXADC_H__
+
+/*
+ * R16429 (0x402D) - AuxADC Data
+ */
+#define WM831X_AUX_DATA_SRC_MASK                0xF000  /* AUX_DATA_SRC - [15:12] */
+#define WM831X_AUX_DATA_SRC_SHIFT                   12  /* AUX_DATA_SRC - [15:12] */
+#define WM831X_AUX_DATA_SRC_WIDTH                    4  /* AUX_DATA_SRC - [15:12] */
+#define WM831X_AUX_DATA_MASK                    0x0FFF  /* AUX_DATA - [11:0] */
+#define WM831X_AUX_DATA_SHIFT                        0  /* AUX_DATA - [11:0] */
+#define WM831X_AUX_DATA_WIDTH                       12  /* AUX_DATA - [11:0] */
+
+/*
+ * R16430 (0x402E) - AuxADC Control
+ */
+#define WM831X_AUX_ENA                          0x8000  /* AUX_ENA */
+#define WM831X_AUX_ENA_MASK                     0x8000  /* AUX_ENA */
+#define WM831X_AUX_ENA_SHIFT                        15  /* AUX_ENA */
+#define WM831X_AUX_ENA_WIDTH                         1  /* AUX_ENA */
+#define WM831X_AUX_CVT_ENA                      0x4000  /* AUX_CVT_ENA */
+#define WM831X_AUX_CVT_ENA_MASK                 0x4000  /* AUX_CVT_ENA */
+#define WM831X_AUX_CVT_ENA_SHIFT                    14  /* AUX_CVT_ENA */
+#define WM831X_AUX_CVT_ENA_WIDTH                     1  /* AUX_CVT_ENA */
+#define WM831X_AUX_SLPENA                       0x1000  /* AUX_SLPENA */
+#define WM831X_AUX_SLPENA_MASK                  0x1000  /* AUX_SLPENA */
+#define WM831X_AUX_SLPENA_SHIFT                     12  /* AUX_SLPENA */
+#define WM831X_AUX_SLPENA_WIDTH                      1  /* AUX_SLPENA */
+#define WM831X_AUX_FRC_ENA                      0x0800  /* AUX_FRC_ENA */
+#define WM831X_AUX_FRC_ENA_MASK                 0x0800  /* AUX_FRC_ENA */
+#define WM831X_AUX_FRC_ENA_SHIFT                    11  /* AUX_FRC_ENA */
+#define WM831X_AUX_FRC_ENA_WIDTH                     1  /* AUX_FRC_ENA */
+#define WM831X_AUX_RATE_MASK                    0x003F  /* AUX_RATE - [5:0] */
+#define WM831X_AUX_RATE_SHIFT                        0  /* AUX_RATE - [5:0] */
+#define WM831X_AUX_RATE_WIDTH                        6  /* AUX_RATE - [5:0] */
+
+/*
+ * R16431 (0x402F) - AuxADC Source
+ */
+#define WM831X_AUX_CAL_SEL                      0x8000  /* AUX_CAL_SEL */
+#define WM831X_AUX_CAL_SEL_MASK                 0x8000  /* AUX_CAL_SEL */
+#define WM831X_AUX_CAL_SEL_SHIFT                    15  /* AUX_CAL_SEL */
+#define WM831X_AUX_CAL_SEL_WIDTH                     1  /* AUX_CAL_SEL */
+#define WM831X_AUX_BKUP_BATT_SEL                0x0400  /* AUX_BKUP_BATT_SEL */
+#define WM831X_AUX_BKUP_BATT_SEL_MASK           0x0400  /* AUX_BKUP_BATT_SEL */
+#define WM831X_AUX_BKUP_BATT_SEL_SHIFT              10  /* AUX_BKUP_BATT_SEL */
+#define WM831X_AUX_BKUP_BATT_SEL_WIDTH               1  /* AUX_BKUP_BATT_SEL */
+#define WM831X_AUX_WALL_SEL                     0x0200  /* AUX_WALL_SEL */
+#define WM831X_AUX_WALL_SEL_MASK                0x0200  /* AUX_WALL_SEL */
+#define WM831X_AUX_WALL_SEL_SHIFT                    9  /* AUX_WALL_SEL */
+#define WM831X_AUX_WALL_SEL_WIDTH                    1  /* AUX_WALL_SEL */
+#define WM831X_AUX_BATT_SEL                     0x0100  /* AUX_BATT_SEL */
+#define WM831X_AUX_BATT_SEL_MASK                0x0100  /* AUX_BATT_SEL */
+#define WM831X_AUX_BATT_SEL_SHIFT                    8  /* AUX_BATT_SEL */
+#define WM831X_AUX_BATT_SEL_WIDTH                    1  /* AUX_BATT_SEL */
+#define WM831X_AUX_USB_SEL                      0x0080  /* AUX_USB_SEL */
+#define WM831X_AUX_USB_SEL_MASK                 0x0080  /* AUX_USB_SEL */
+#define WM831X_AUX_USB_SEL_SHIFT                     7  /* AUX_USB_SEL */
+#define WM831X_AUX_USB_SEL_WIDTH                     1  /* AUX_USB_SEL */
+#define WM831X_AUX_SYSVDD_SEL                   0x0040  /* AUX_SYSVDD_SEL */
+#define WM831X_AUX_SYSVDD_SEL_MASK              0x0040  /* AUX_SYSVDD_SEL */
+#define WM831X_AUX_SYSVDD_SEL_SHIFT                  6  /* AUX_SYSVDD_SEL */
+#define WM831X_AUX_SYSVDD_SEL_WIDTH                  1  /* AUX_SYSVDD_SEL */
+#define WM831X_AUX_BATT_TEMP_SEL                0x0020  /* AUX_BATT_TEMP_SEL */
+#define WM831X_AUX_BATT_TEMP_SEL_MASK           0x0020  /* AUX_BATT_TEMP_SEL */
+#define WM831X_AUX_BATT_TEMP_SEL_SHIFT               5  /* AUX_BATT_TEMP_SEL */
+#define WM831X_AUX_BATT_TEMP_SEL_WIDTH               1  /* AUX_BATT_TEMP_SEL */
+#define WM831X_AUX_CHIP_TEMP_SEL                0x0010  /* AUX_CHIP_TEMP_SEL */
+#define WM831X_AUX_CHIP_TEMP_SEL_MASK           0x0010  /* AUX_CHIP_TEMP_SEL */
+#define WM831X_AUX_CHIP_TEMP_SEL_SHIFT               4  /* AUX_CHIP_TEMP_SEL */
+#define WM831X_AUX_CHIP_TEMP_SEL_WIDTH               1  /* AUX_CHIP_TEMP_SEL */
+#define WM831X_AUX_AUX4_SEL                     0x0008  /* AUX_AUX4_SEL */
+#define WM831X_AUX_AUX4_SEL_MASK                0x0008  /* AUX_AUX4_SEL */
+#define WM831X_AUX_AUX4_SEL_SHIFT                    3  /* AUX_AUX4_SEL */
+#define WM831X_AUX_AUX4_SEL_WIDTH                    1  /* AUX_AUX4_SEL */
+#define WM831X_AUX_AUX3_SEL                     0x0004  /* AUX_AUX3_SEL */
+#define WM831X_AUX_AUX3_SEL_MASK                0x0004  /* AUX_AUX3_SEL */
+#define WM831X_AUX_AUX3_SEL_SHIFT                    2  /* AUX_AUX3_SEL */
+#define WM831X_AUX_AUX3_SEL_WIDTH                    1  /* AUX_AUX3_SEL */
+#define WM831X_AUX_AUX2_SEL                     0x0002  /* AUX_AUX2_SEL */
+#define WM831X_AUX_AUX2_SEL_MASK                0x0002  /* AUX_AUX2_SEL */
+#define WM831X_AUX_AUX2_SEL_SHIFT                    1  /* AUX_AUX2_SEL */
+#define WM831X_AUX_AUX2_SEL_WIDTH                    1  /* AUX_AUX2_SEL */
+#define WM831X_AUX_AUX1_SEL                     0x0001  /* AUX_AUX1_SEL */
+#define WM831X_AUX_AUX1_SEL_MASK                0x0001  /* AUX_AUX1_SEL */
+#define WM831X_AUX_AUX1_SEL_SHIFT                    0  /* AUX_AUX1_SEL */
+#define WM831X_AUX_AUX1_SEL_WIDTH                    1  /* AUX_AUX1_SEL */
+
+/*
+ * R16432 (0x4030) - Comparator Control
+ */
+#define WM831X_DCOMP4_STS                       0x0800  /* DCOMP4_STS */
+#define WM831X_DCOMP4_STS_MASK                  0x0800  /* DCOMP4_STS */
+#define WM831X_DCOMP4_STS_SHIFT                     11  /* DCOMP4_STS */
+#define WM831X_DCOMP4_STS_WIDTH                      1  /* DCOMP4_STS */
+#define WM831X_DCOMP3_STS                       0x0400  /* DCOMP3_STS */
+#define WM831X_DCOMP3_STS_MASK                  0x0400  /* DCOMP3_STS */
+#define WM831X_DCOMP3_STS_SHIFT                     10  /* DCOMP3_STS */
+#define WM831X_DCOMP3_STS_WIDTH                      1  /* DCOMP3_STS */
+#define WM831X_DCOMP2_STS                       0x0200  /* DCOMP2_STS */
+#define WM831X_DCOMP2_STS_MASK                  0x0200  /* DCOMP2_STS */
+#define WM831X_DCOMP2_STS_SHIFT                      9  /* DCOMP2_STS */
+#define WM831X_DCOMP2_STS_WIDTH                      1  /* DCOMP2_STS */
+#define WM831X_DCOMP1_STS                       0x0100  /* DCOMP1_STS */
+#define WM831X_DCOMP1_STS_MASK                  0x0100  /* DCOMP1_STS */
+#define WM831X_DCOMP1_STS_SHIFT                      8  /* DCOMP1_STS */
+#define WM831X_DCOMP1_STS_WIDTH                      1  /* DCOMP1_STS */
+#define WM831X_DCMP4_ENA                        0x0008  /* DCMP4_ENA */
+#define WM831X_DCMP4_ENA_MASK                   0x0008  /* DCMP4_ENA */
+#define WM831X_DCMP4_ENA_SHIFT                       3  /* DCMP4_ENA */
+#define WM831X_DCMP4_ENA_WIDTH                       1  /* DCMP4_ENA */
+#define WM831X_DCMP3_ENA                        0x0004  /* DCMP3_ENA */
+#define WM831X_DCMP3_ENA_MASK                   0x0004  /* DCMP3_ENA */
+#define WM831X_DCMP3_ENA_SHIFT                       2  /* DCMP3_ENA */
+#define WM831X_DCMP3_ENA_WIDTH                       1  /* DCMP3_ENA */
+#define WM831X_DCMP2_ENA                        0x0002  /* DCMP2_ENA */
+#define WM831X_DCMP2_ENA_MASK                   0x0002  /* DCMP2_ENA */
+#define WM831X_DCMP2_ENA_SHIFT                       1  /* DCMP2_ENA */
+#define WM831X_DCMP2_ENA_WIDTH                       1  /* DCMP2_ENA */
+#define WM831X_DCMP1_ENA                        0x0001  /* DCMP1_ENA */
+#define WM831X_DCMP1_ENA_MASK                   0x0001  /* DCMP1_ENA */
+#define WM831X_DCMP1_ENA_SHIFT                       0  /* DCMP1_ENA */
+#define WM831X_DCMP1_ENA_WIDTH                       1  /* DCMP1_ENA */
+
+/*
+ * R16433 (0x4031) - Comparator 1
+ */
+#define WM831X_DCMP1_SRC_MASK                   0xE000  /* DCMP1_SRC - [15:13] */
+#define WM831X_DCMP1_SRC_SHIFT                      13  /* DCMP1_SRC - [15:13] */
+#define WM831X_DCMP1_SRC_WIDTH                       3  /* DCMP1_SRC - [15:13] */
+#define WM831X_DCMP1_GT                         0x1000  /* DCMP1_GT */
+#define WM831X_DCMP1_GT_MASK                    0x1000  /* DCMP1_GT */
+#define WM831X_DCMP1_GT_SHIFT                       12  /* DCMP1_GT */
+#define WM831X_DCMP1_GT_WIDTH                        1  /* DCMP1_GT */
+#define WM831X_DCMP1_THR_MASK                   0x0FFF  /* DCMP1_THR - [11:0] */
+#define WM831X_DCMP1_THR_SHIFT                       0  /* DCMP1_THR - [11:0] */
+#define WM831X_DCMP1_THR_WIDTH                      12  /* DCMP1_THR - [11:0] */
+
+/*
+ * R16434 (0x4032) - Comparator 2
+ */
+#define WM831X_DCMP2_SRC_MASK                   0xE000  /* DCMP2_SRC - [15:13] */
+#define WM831X_DCMP2_SRC_SHIFT                      13  /* DCMP2_SRC - [15:13] */
+#define WM831X_DCMP2_SRC_WIDTH                       3  /* DCMP2_SRC - [15:13] */
+#define WM831X_DCMP2_GT                         0x1000  /* DCMP2_GT */
+#define WM831X_DCMP2_GT_MASK                    0x1000  /* DCMP2_GT */
+#define WM831X_DCMP2_GT_SHIFT                       12  /* DCMP2_GT */
+#define WM831X_DCMP2_GT_WIDTH                        1  /* DCMP2_GT */
+#define WM831X_DCMP2_THR_MASK                   0x0FFF  /* DCMP2_THR - [11:0] */
+#define WM831X_DCMP2_THR_SHIFT                       0  /* DCMP2_THR - [11:0] */
+#define WM831X_DCMP2_THR_WIDTH                      12  /* DCMP2_THR - [11:0] */
+
+/*
+ * R16435 (0x4033) - Comparator 3
+ */
+#define WM831X_DCMP3_SRC_MASK                   0xE000  /* DCMP3_SRC - [15:13] */
+#define WM831X_DCMP3_SRC_SHIFT                      13  /* DCMP3_SRC - [15:13] */
+#define WM831X_DCMP3_SRC_WIDTH                       3  /* DCMP3_SRC - [15:13] */
+#define WM831X_DCMP3_GT                         0x1000  /* DCMP3_GT */
+#define WM831X_DCMP3_GT_MASK                    0x1000  /* DCMP3_GT */
+#define WM831X_DCMP3_GT_SHIFT                       12  /* DCMP3_GT */
+#define WM831X_DCMP3_GT_WIDTH                        1  /* DCMP3_GT */
+#define WM831X_DCMP3_THR_MASK                   0x0FFF  /* DCMP3_THR - [11:0] */
+#define WM831X_DCMP3_THR_SHIFT                       0  /* DCMP3_THR - [11:0] */
+#define WM831X_DCMP3_THR_WIDTH                      12  /* DCMP3_THR - [11:0] */
+
+/*
+ * R16436 (0x4034) - Comparator 4
+ */
+#define WM831X_DCMP4_SRC_MASK                   0xE000  /* DCMP4_SRC - [15:13] */
+#define WM831X_DCMP4_SRC_SHIFT                      13  /* DCMP4_SRC - [15:13] */
+#define WM831X_DCMP4_SRC_WIDTH                       3  /* DCMP4_SRC - [15:13] */
+#define WM831X_DCMP4_GT                         0x1000  /* DCMP4_GT */
+#define WM831X_DCMP4_GT_MASK                    0x1000  /* DCMP4_GT */
+#define WM831X_DCMP4_GT_SHIFT                       12  /* DCMP4_GT */
+#define WM831X_DCMP4_GT_WIDTH                        1  /* DCMP4_GT */
+#define WM831X_DCMP4_THR_MASK                   0x0FFF  /* DCMP4_THR - [11:0] */
+#define WM831X_DCMP4_THR_SHIFT                       0  /* DCMP4_THR - [11:0] */
+#define WM831X_DCMP4_THR_WIDTH                      12  /* DCMP4_THR - [11:0] */
+
+#define WM831X_AUX_CAL_FACTOR  0xfff
+#define WM831X_AUX_CAL_NOMINAL 0x222
+
+enum wm831x_auxadc {
+	WM831X_AUX_CAL = 15,
+	WM831X_AUX_BKUP_BATT = 10,
+	WM831X_AUX_WALL = 9,
+	WM831X_AUX_BATT = 8,
+	WM831X_AUX_USB = 7,
+	WM831X_AUX_SYSVDD = 6,
+	WM831X_AUX_BATT_TEMP = 5,
+	WM831X_AUX_CHIP_TEMP = 4,
+	WM831X_AUX_AUX4 = 3,
+	WM831X_AUX_AUX3 = 2,
+	WM831X_AUX_AUX2 = 1,
+	WM831X_AUX_AUX1 = 0,
+};
+
+int wm831x_auxadc_read(struct wm831x *wm831x, enum wm831x_auxadc input);
+int wm831x_auxadc_read_uv(struct wm831x *wm831x, enum wm831x_auxadc input);
+
+#endif
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index b96c9355b16e..d7134dfba56e 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -234,6 +234,8 @@ struct wm831x {
 	unsigned int irq_base;
 	int irq_masks[5];
 
+	struct mutex auxadc_lock;
+
 	/* The WM831x has a security key blocking access to certain
 	 * registers.  The mutex is taken by the accessors for locking
 	 * and unlocking the security key, locked is used to fail
-- 
cgit v1.2.3


From 63aed85e3535b4603798184cc941e49de386d354 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:55 +0100
Subject: mfd: Conditionally add WM831x backlight subdevice

The WM831x backlight driver requires at least the specification of the
current sink to use and a maximum current to allow them to function and
will actively interfere with other users of the regulators it uses if
misconfigured so only register the subdevice for it if this platform
data has been supplied.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c        | 15 +++++++++++++++
 include/linux/mfd/wm831x/pdata.h |  6 ++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 42bef1dd2ca1..bc40ea315cc0 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -1172,6 +1172,12 @@ static struct mfd_cell wm8312_devs[] = {
 	},
 };
 
+static struct mfd_cell backlight_devs[] = {
+	{
+		.name = "wm831x-backlight",
+	},
+};
+
 /*
  * Instantiate the generic non-control parts of the device.
  */
@@ -1325,6 +1331,15 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 		goto err_irq;
 	}
 
+	if (pdata && pdata->backlight) {
+		/* Treat errors as non-critical */
+		ret = mfd_add_devices(wm831x->dev, -1, backlight_devs,
+				      ARRAY_SIZE(backlight_devs), NULL, 0);
+		if (ret < 0)
+			dev_err(wm831x->dev, "Failed to add backlight: %d\n",
+				ret);
+	}
+
 	if (pdata && pdata->post_init) {
 		ret = pdata->post_init(wm831x);
 		if (ret != 0) {
diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h
index 571e60136264..90d820260aad 100644
--- a/include/linux/mfd/wm831x/pdata.h
+++ b/include/linux/mfd/wm831x/pdata.h
@@ -18,6 +18,11 @@
 struct wm831x;
 struct regulator_init_data;
 
+struct wm831x_backlight_pdata {
+	int isink;     /** ISINK to use, 1 or 2 */
+	int max_uA;    /** Maximum current to allow */
+};
+
 struct wm831x_backup_pdata {
 	int charger_enable;
 	int no_constant_voltage;  /** Disable constant voltage charging */
@@ -87,6 +92,7 @@ struct wm831x_pdata {
 	int (*post_init)(struct wm831x *wm831x);
 
 	int gpio_base;
+	struct wm831x_backlight_pdata *backlight;
 	struct wm831x_backup_pdata *backup;
 	struct wm831x_battery_pdata *battery;
 	struct wm831x_touch_pdata *touch;
-- 
cgit v1.2.3


From 6704e5171ba9053ba173bcd807c7392d2076bdb4 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:56 +0100
Subject: mfd: Add basic WM831x OTP support

The WM831x series of devices use OTP (One Time Programmable, a type
of PROM) to store system configuration. At run time this data is
visible via registers.

Currently the only explicitly supported feature is that the unique
ID provided by every WM831x device is exported to user space via
sysfs. Other configuration data may be read by system-specific
code in the pre_init() and post_init() platform data operations.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c      |   4 +
 drivers/mfd/wm831x-otp.c       |  83 +++++++++++++++++++++
 include/linux/mfd/wm831x/otp.h | 162 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 249 insertions(+)
 create mode 100644 drivers/mfd/wm831x-otp.c
 create mode 100644 include/linux/mfd/wm831x/otp.h

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index bc40ea315cc0..33eaea2f988e 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -23,6 +23,7 @@
 #include <linux/mfd/wm831x/pdata.h>
 #include <linux/mfd/wm831x/irq.h>
 #include <linux/mfd/wm831x/auxadc.h>
+#include <linux/mfd/wm831x/otp.h>
 
 enum wm831x_parent {
 	WM8310 = 0,
@@ -1340,6 +1341,8 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 				ret);
 	}
 
+	wm831x_otp_init(wm831x);
+
 	if (pdata && pdata->post_init) {
 		ret = pdata->post_init(wm831x);
 		if (ret != 0) {
@@ -1360,6 +1363,7 @@ err:
 
 static void wm831x_device_exit(struct wm831x *wm831x)
 {
+	wm831x_otp_exit(wm831x);
 	mfd_remove_devices(wm831x->dev);
 	wm831x_irq_exit(wm831x);
 	kfree(wm831x);
diff --git a/drivers/mfd/wm831x-otp.c b/drivers/mfd/wm831x-otp.c
new file mode 100644
index 000000000000..f742745ff354
--- /dev/null
+++ b/drivers/mfd/wm831x-otp.c
@@ -0,0 +1,83 @@
+/*
+ * wm831x-otp.c  --  OTP for Wolfson WM831x PMICs
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/bcd.h>
+#include <linux/delay.h>
+#include <linux/mfd/core.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/otp.h>
+
+/* In bytes */
+#define WM831X_UNIQUE_ID_LEN 16
+
+/* Read the unique ID from the chip into id */
+static int wm831x_unique_id_read(struct wm831x *wm831x, char *id)
+{
+	int i, val;
+
+	for (i = 0; i < WM831X_UNIQUE_ID_LEN / 2; i++) {
+		val = wm831x_reg_read(wm831x, WM831X_UNIQUE_ID_1 + i);
+		if (val < 0)
+			return val;
+
+		id[i * 2]       = (val >> 8) & 0xff;
+		id[(i * 2) + 1] = val & 0xff;
+	}
+
+	return 0;
+}
+
+static ssize_t wm831x_unique_id_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct wm831x *wm831x = dev_get_drvdata(dev);
+	int i, rval;
+	char id[WM831X_UNIQUE_ID_LEN];
+	ssize_t ret = 0;
+
+	rval = wm831x_unique_id_read(wm831x, id);
+	if (rval < 0)
+		return 0;
+
+	for (i = 0; i < WM831X_UNIQUE_ID_LEN; i++)
+		ret += sprintf(&buf[ret], "%02x", buf[i]);
+
+	ret += sprintf(&buf[ret], "\n");
+
+	return ret;
+}
+
+static DEVICE_ATTR(unique_id, 0444, wm831x_unique_id_show, NULL);
+
+int wm831x_otp_init(struct wm831x *wm831x)
+{
+	int ret;
+
+	ret = device_create_file(wm831x->dev, &dev_attr_unique_id);
+	if (ret != 0)
+		dev_err(wm831x->dev, "Unique ID attribute not created: %d\n",
+			ret);
+
+	return ret;
+}
+
+void wm831x_otp_exit(struct wm831x *wm831x)
+{
+	device_remove_file(wm831x->dev, &dev_attr_unique_id);
+}
+
diff --git a/include/linux/mfd/wm831x/otp.h b/include/linux/mfd/wm831x/otp.h
new file mode 100644
index 000000000000..ce1f81a39bfc
--- /dev/null
+++ b/include/linux/mfd/wm831x/otp.h
@@ -0,0 +1,162 @@
+/*
+ * include/linux/mfd/wm831x/otp.h -- OTP interface for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_OTP_H__
+#define __MFD_WM831X_OTP_H__
+
+int wm831x_otp_init(struct wm831x *wm831x);
+void wm831x_otp_exit(struct wm831x *wm831x);
+
+/*
+ * R30720 (0x7800) - Unique ID 1
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30721 (0x7801) - Unique ID 2
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30722 (0x7802) - Unique ID 3
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30723 (0x7803) - Unique ID 4
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30724 (0x7804) - Unique ID 5
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30725 (0x7805) - Unique ID 6
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30726 (0x7806) - Unique ID 7
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30727 (0x7807) - Unique ID 8
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30728 (0x7808) - Factory OTP ID
+ */
+#define WM831X_OTP_FACT_ID_MASK                 0xFFFE  /* OTP_FACT_ID - [15:1] */
+#define WM831X_OTP_FACT_ID_SHIFT                     1  /* OTP_FACT_ID - [15:1] */
+#define WM831X_OTP_FACT_ID_WIDTH                    15  /* OTP_FACT_ID - [15:1] */
+#define WM831X_OTP_FACT_FINAL                   0x0001  /* OTP_FACT_FINAL */
+#define WM831X_OTP_FACT_FINAL_MASK              0x0001  /* OTP_FACT_FINAL */
+#define WM831X_OTP_FACT_FINAL_SHIFT                  0  /* OTP_FACT_FINAL */
+#define WM831X_OTP_FACT_FINAL_WIDTH                  1  /* OTP_FACT_FINAL */
+
+/*
+ * R30729 (0x7809) - Factory OTP 1
+ */
+#define WM831X_DC3_TRIM_MASK                    0xF000  /* DC3_TRIM - [15:12] */
+#define WM831X_DC3_TRIM_SHIFT                       12  /* DC3_TRIM - [15:12] */
+#define WM831X_DC3_TRIM_WIDTH                        4  /* DC3_TRIM - [15:12] */
+#define WM831X_DC2_TRIM_MASK                    0x0FC0  /* DC2_TRIM - [11:6] */
+#define WM831X_DC2_TRIM_SHIFT                        6  /* DC2_TRIM - [11:6] */
+#define WM831X_DC2_TRIM_WIDTH                        6  /* DC2_TRIM - [11:6] */
+#define WM831X_DC1_TRIM_MASK                    0x003F  /* DC1_TRIM - [5:0] */
+#define WM831X_DC1_TRIM_SHIFT                        0  /* DC1_TRIM - [5:0] */
+#define WM831X_DC1_TRIM_WIDTH                        6  /* DC1_TRIM - [5:0] */
+
+/*
+ * R30730 (0x780A) - Factory OTP 2
+ */
+#define WM831X_CHIP_ID_MASK                     0xFFFF  /* CHIP_ID - [15:0] */
+#define WM831X_CHIP_ID_SHIFT                         0  /* CHIP_ID - [15:0] */
+#define WM831X_CHIP_ID_WIDTH                        16  /* CHIP_ID - [15:0] */
+
+/*
+ * R30731 (0x780B) - Factory OTP 3
+ */
+#define WM831X_OSC_TRIM_MASK                    0x0780  /* OSC_TRIM - [10:7] */
+#define WM831X_OSC_TRIM_SHIFT                        7  /* OSC_TRIM - [10:7] */
+#define WM831X_OSC_TRIM_WIDTH                        4  /* OSC_TRIM - [10:7] */
+#define WM831X_BG_TRIM_MASK                     0x0078  /* BG_TRIM - [6:3] */
+#define WM831X_BG_TRIM_SHIFT                         3  /* BG_TRIM - [6:3] */
+#define WM831X_BG_TRIM_WIDTH                         4  /* BG_TRIM - [6:3] */
+#define WM831X_LPBG_TRIM_MASK                   0x0007  /* LPBG_TRIM - [2:0] */
+#define WM831X_LPBG_TRIM_SHIFT                       0  /* LPBG_TRIM - [2:0] */
+#define WM831X_LPBG_TRIM_WIDTH                       3  /* LPBG_TRIM - [2:0] */
+
+/*
+ * R30732 (0x780C) - Factory OTP 4
+ */
+#define WM831X_CHILD_I2C_ADDR_MASK              0x00FE  /* CHILD_I2C_ADDR - [7:1] */
+#define WM831X_CHILD_I2C_ADDR_SHIFT                  1  /* CHILD_I2C_ADDR - [7:1] */
+#define WM831X_CHILD_I2C_ADDR_WIDTH                  7  /* CHILD_I2C_ADDR - [7:1] */
+#define WM831X_CH_AW                            0x0001  /* CH_AW */
+#define WM831X_CH_AW_MASK                       0x0001  /* CH_AW */
+#define WM831X_CH_AW_SHIFT                           0  /* CH_AW */
+#define WM831X_CH_AW_WIDTH                           1  /* CH_AW */
+
+/*
+ * R30733 (0x780D) - Factory OTP 5
+ */
+#define WM831X_CHARGE_TRIM_MASK                 0x003F  /* CHARGE_TRIM - [5:0] */
+#define WM831X_CHARGE_TRIM_SHIFT                     0  /* CHARGE_TRIM - [5:0] */
+#define WM831X_CHARGE_TRIM_WIDTH                     6  /* CHARGE_TRIM - [5:0] */
+
+/*
+ * R30736 (0x7810) - Customer OTP ID
+ */
+#define WM831X_OTP_AUTO_PROG                    0x8000  /* OTP_AUTO_PROG */
+#define WM831X_OTP_AUTO_PROG_MASK               0x8000  /* OTP_AUTO_PROG */
+#define WM831X_OTP_AUTO_PROG_SHIFT                  15  /* OTP_AUTO_PROG */
+#define WM831X_OTP_AUTO_PROG_WIDTH                   1  /* OTP_AUTO_PROG */
+#define WM831X_OTP_CUST_ID_MASK                 0x7FFE  /* OTP_CUST_ID - [14:1] */
+#define WM831X_OTP_CUST_ID_SHIFT                     1  /* OTP_CUST_ID - [14:1] */
+#define WM831X_OTP_CUST_ID_WIDTH                    14  /* OTP_CUST_ID - [14:1] */
+#define WM831X_OTP_CUST_FINAL                   0x0001  /* OTP_CUST_FINAL */
+#define WM831X_OTP_CUST_FINAL_MASK              0x0001  /* OTP_CUST_FINAL */
+#define WM831X_OTP_CUST_FINAL_SHIFT                  0  /* OTP_CUST_FINAL */
+#define WM831X_OTP_CUST_FINAL_WIDTH                  1  /* OTP_CUST_FINAL */
+
+/*
+ * R30759 (0x7827) - DBE CHECK DATA
+ */
+#define WM831X_DBE_VALID_DATA_MASK              0xFFFF  /* DBE_VALID_DATA - [15:0] */
+#define WM831X_DBE_VALID_DATA_SHIFT                  0  /* DBE_VALID_DATA - [15:0] */
+#define WM831X_DBE_VALID_DATA_WIDTH                 16  /* DBE_VALID_DATA - [15:0] */
+
+
+#endif
-- 
cgit v1.2.3


From 698659d5f78606c698781574773f433c60176e40 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:57 +0100
Subject: mfd: Export ISEL values from WM831x core

The current settings which can be used with the WM831x current sinks
can't easily be mapped between register values and currents at run
time without a lookup table since the values scale logarithmically
to match the way the human eye interprets brightness. This lookup
table is inclided in the core since several drivers need to use it.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c            | 64 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/regulator.h | 21 ++++++++++++
 2 files changed, 85 insertions(+)
 create mode 100644 include/linux/mfd/wm831x/regulator.h

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 33eaea2f988e..49b7885c2702 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -24,6 +24,70 @@
 #include <linux/mfd/wm831x/irq.h>
 #include <linux/mfd/wm831x/auxadc.h>
 #include <linux/mfd/wm831x/otp.h>
+#include <linux/mfd/wm831x/regulator.h>
+
+/* Current settings - values are 2*2^(reg_val/4) microamps.  These are
+ * exported since they are used by multiple drivers.
+ */
+int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL] = {
+	2,
+	2,
+	3,
+	3,
+	4,
+	5,
+	6,
+	7,
+	8,
+	10,
+	11,
+	13,
+	16,
+	19,
+	23,
+	27,
+	32,
+	38,
+	45,
+	54,
+	64,
+	76,
+	91,
+	108,
+	128,
+	152,
+	181,
+	215,
+	256,
+	304,
+	362,
+	431,
+	512,
+	609,
+	724,
+	861,
+	1024,
+	1218,
+	1448,
+	1722,
+	2048,
+	2435,
+	2896,
+	3444,
+	4096,
+	4871,
+	5793,
+	6889,
+	8192,
+	9742,
+	11585,
+	13777,
+	16384,
+	19484,
+	23170,
+	27554,
+};
+EXPORT_SYMBOL_GPL(wm831x_isinkv_values);
 
 enum wm831x_parent {
 	WM8310 = 0,
diff --git a/include/linux/mfd/wm831x/regulator.h b/include/linux/mfd/wm831x/regulator.h
new file mode 100644
index 000000000000..b5d58fb38b4e
--- /dev/null
+++ b/include/linux/mfd/wm831x/regulator.h
@@ -0,0 +1,21 @@
+/*
+ * linux/mfd/wm831x/regulator.h -- Regulator definitons for wm831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_REGULATOR_H__
+#define __MFD_WM831X_REGULATOR_H__
+
+#define WM831X_ISINK_MAX_ISEL 56
+extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL];
+
+#endif
-- 
cgit v1.2.3


From e4b736f18f338daae141325c818187c4ab3e244c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:46:00 +0100
Subject: gpio: Add WM831X GPIO driver

Add support for the GPIO pins on the WM831x. No direct support is
currently supplied for configuring non-gpiolib functionality such
as pull configuration and alternate functions, soft configuration
of these will be provided in a future patch.

Currently use of these pins as interrupts is not supported due to
the ongoing issues with generic irq not support interrupt controllers
on interrupt driven buses. Users can directly request the interrupts
with the wm831x-specific APIs currently provided if required.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/gpio/Kconfig            |   7 ++
 drivers/gpio/Makefile           |   1 +
 drivers/gpio/wm831x-gpio.c      | 252 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/gpio.h |  55 +++++++++
 4 files changed, 315 insertions(+)
 create mode 100644 drivers/gpio/wm831x-gpio.c
 create mode 100644 include/linux/mfd/wm831x/gpio.h

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 96dda81c9228..6b4c484a699a 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -155,6 +155,13 @@ config GPIO_TWL4030
 	  Say yes here to access the GPIO signals of various multi-function
 	  power management chips from Texas Instruments.
 
+config GPIO_WM831X
+	tristate "WM831x GPIOs"
+	depends on MFD_WM831X
+	help
+	  Say yes here to access the GPIO signals of WM831x power management
+	  chips from Wolfson Microelectronics.
+
 comment "PCI GPIO expanders:"
 
 config GPIO_BT8XX
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 9244c6fcd8be..ea7c745f26a8 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -14,3 +14,4 @@ obj-$(CONFIG_GPIO_TWL4030)	+= twl4030-gpio.o
 obj-$(CONFIG_GPIO_XILINX)	+= xilinx_gpio.o
 obj-$(CONFIG_GPIO_BT8XX)	+= bt8xxgpio.o
 obj-$(CONFIG_GPIO_VR41XX)	+= vr41xx_giu.o
+obj-$(CONFIG_GPIO_WM831X)	+= wm831x-gpio.o
diff --git a/drivers/gpio/wm831x-gpio.c b/drivers/gpio/wm831x-gpio.c
new file mode 100644
index 000000000000..f9c09a54ec7f
--- /dev/null
+++ b/drivers/gpio/wm831x-gpio.c
@@ -0,0 +1,252 @@
+/*
+ * wm831x-gpio.c  --  gpiolib support for Wolfson WM831x PMICs
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/mfd/core.h>
+#include <linux/platform_device.h>
+#include <linux/seq_file.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/pdata.h>
+#include <linux/mfd/wm831x/gpio.h>
+
+#define WM831X_GPIO_MAX 16
+
+struct wm831x_gpio {
+	struct wm831x *wm831x;
+	struct gpio_chip gpio_chip;
+};
+
+static inline struct wm831x_gpio *to_wm831x_gpio(struct gpio_chip *chip)
+{
+	return container_of(chip, struct wm831x_gpio, gpio_chip);
+}
+
+static int wm831x_gpio_direction_in(struct gpio_chip *chip, unsigned offset)
+{
+	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
+	struct wm831x *wm831x = wm831x_gpio->wm831x;
+
+	return wm831x_set_bits(wm831x, WM831X_GPIO1_CONTROL + offset,
+			       WM831X_GPN_DIR | WM831X_GPN_TRI,
+			       WM831X_GPN_DIR);
+}
+
+static int wm831x_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
+	struct wm831x *wm831x = wm831x_gpio->wm831x;
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_GPIO_LEVEL);
+	if (ret < 0)
+		return ret;
+
+	if (ret & 1 << offset)
+		return 1;
+	else
+		return 0;
+}
+
+static int wm831x_gpio_direction_out(struct gpio_chip *chip,
+				     unsigned offset, int value)
+{
+	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
+	struct wm831x *wm831x = wm831x_gpio->wm831x;
+
+	return wm831x_set_bits(wm831x, WM831X_GPIO1_CONTROL + offset,
+			       WM831X_GPN_DIR | WM831X_GPN_TRI, 0);
+}
+
+static void wm831x_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
+	struct wm831x *wm831x = wm831x_gpio->wm831x;
+
+	wm831x_set_bits(wm831x, WM831X_GPIO_LEVEL, 1 << offset,
+			value << offset);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static void wm831x_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
+{
+	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
+	struct wm831x *wm831x = wm831x_gpio->wm831x;
+	int i;
+
+	for (i = 0; i < chip->ngpio; i++) {
+		int gpio = i + chip->base;
+		int reg;
+		const char *label, *pull, *powerdomain;
+
+		/* We report the GPIO even if it's not requested since
+		 * we're also reporting things like alternate
+		 * functions which apply even when the GPIO is not in
+		 * use as a GPIO.
+		 */
+		label = gpiochip_is_requested(chip, i);
+		if (!label)
+			label = "Unrequested";
+
+		seq_printf(s, " gpio-%-3d (%-20.20s) ", gpio, label);
+
+		reg = wm831x_reg_read(wm831x, WM831X_GPIO1_CONTROL + i);
+		if (reg < 0) {
+			dev_err(wm831x->dev,
+				"GPIO control %d read failed: %d\n",
+				gpio, reg);
+			seq_printf(s, "\n");
+			continue;
+		}
+
+		switch (reg & WM831X_GPN_PULL_MASK) {
+		case WM831X_GPIO_PULL_NONE:
+			pull = "nopull";
+			break;
+		case WM831X_GPIO_PULL_DOWN:
+			pull = "pulldown";
+			break;
+		case WM831X_GPIO_PULL_UP:
+			pull = "pullup";
+		default:
+			pull = "INVALID PULL";
+			break;
+		}
+
+		switch (i + 1) {
+		case 1 ... 3:
+		case 7 ... 9:
+			if (reg & WM831X_GPN_PWR_DOM)
+				powerdomain = "VPMIC";
+			else
+				powerdomain = "DBVDD";
+			break;
+
+		case 4 ... 6:
+		case 10 ... 12:
+			if (reg & WM831X_GPN_PWR_DOM)
+				powerdomain = "SYSVDD";
+			else
+				powerdomain = "DBVDD";
+			break;
+
+		case 13 ... 16:
+			powerdomain = "TPVDD";
+			break;
+
+		default:
+			BUG();
+			break;
+		}
+
+		seq_printf(s, " %s %s %s %s%s\n"
+			   "                                  %s%s (0x%4x)\n",
+			   reg & WM831X_GPN_DIR ? "in" : "out",
+			   wm831x_gpio_get(chip, i) ? "high" : "low",
+			   pull,
+			   powerdomain,
+			   reg & WM831X_GPN_POL ? " inverted" : "",
+			   reg & WM831X_GPN_OD ? "open-drain" : "CMOS",
+			   reg & WM831X_GPN_TRI ? " tristated" : "",
+			   reg);
+	}
+}
+#else
+#define wm831x_gpio_dbg_show NULL
+#endif
+
+static struct gpio_chip template_chip = {
+	.label			= "wm831x",
+	.owner			= THIS_MODULE,
+	.direction_input	= wm831x_gpio_direction_in,
+	.get			= wm831x_gpio_get,
+	.direction_output	= wm831x_gpio_direction_out,
+	.set			= wm831x_gpio_set,
+	.dbg_show		= wm831x_gpio_dbg_show,
+	.can_sleep		= 1,
+};
+
+static int __devinit wm831x_gpio_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	struct wm831x_gpio *wm831x_gpio;
+	int ret;
+
+	wm831x_gpio = kzalloc(sizeof(*wm831x_gpio), GFP_KERNEL);
+	if (wm831x_gpio == NULL)
+		return -ENOMEM;
+
+	wm831x_gpio->wm831x = wm831x;
+	wm831x_gpio->gpio_chip = template_chip;
+	wm831x_gpio->gpio_chip.ngpio = WM831X_GPIO_MAX;
+	wm831x_gpio->gpio_chip.dev = &pdev->dev;
+	if (pdata && pdata->gpio_base)
+		wm831x_gpio->gpio_chip.base = pdata->gpio_base;
+	else
+		wm831x_gpio->gpio_chip.base = -1;
+
+	ret = gpiochip_add(&wm831x_gpio->gpio_chip);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Could not register gpiochip, %d\n",
+			ret);
+		goto err;
+	}
+
+	platform_set_drvdata(pdev, wm831x_gpio);
+
+	return ret;
+
+err:
+	kfree(wm831x_gpio);
+	return ret;
+}
+
+static int __devexit wm831x_gpio_remove(struct platform_device *pdev)
+{
+	struct wm831x_gpio *wm831x_gpio = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = gpiochip_remove(&wm831x_gpio->gpio_chip);
+	if (ret == 0)
+		kfree(wm831x_gpio);
+
+	return ret;
+}
+
+static struct platform_driver wm831x_gpio_driver = {
+	.driver.name	= "wm831x-gpio",
+	.driver.owner	= THIS_MODULE,
+	.probe		= wm831x_gpio_probe,
+	.remove		= __devexit_p(wm831x_gpio_remove),
+};
+
+static int __init wm831x_gpio_init(void)
+{
+	return platform_driver_register(&wm831x_gpio_driver);
+}
+subsys_initcall(wm831x_gpio_init);
+
+static void __exit wm831x_gpio_exit(void)
+{
+	platform_driver_unregister(&wm831x_gpio_driver);
+}
+module_exit(wm831x_gpio_exit);
+
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_DESCRIPTION("GPIO interface for WM831x PMICs");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm831x-gpio");
diff --git a/include/linux/mfd/wm831x/gpio.h b/include/linux/mfd/wm831x/gpio.h
new file mode 100644
index 000000000000..2835614af0e3
--- /dev/null
+++ b/include/linux/mfd/wm831x/gpio.h
@@ -0,0 +1,55 @@
+/*
+ * include/linux/mfd/wm831x/gpio.h -- GPIO for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_GPIO_H__
+#define __MFD_WM831X_GPIO_H__
+
+/*
+ * R16440-16455 (0x4038-0x4047) - GPIOx Control
+ */
+#define WM831X_GPN_DIR                          0x8000  /* GPN_DIR */
+#define WM831X_GPN_DIR_MASK                     0x8000  /* GPN_DIR */
+#define WM831X_GPN_DIR_SHIFT                        15  /* GPN_DIR */
+#define WM831X_GPN_DIR_WIDTH                         1  /* GPN_DIR */
+#define WM831X_GPN_PULL_MASK                    0x6000  /* GPN_PULL - [14:13] */
+#define WM831X_GPN_PULL_SHIFT                       13  /* GPN_PULL - [14:13] */
+#define WM831X_GPN_PULL_WIDTH                        2  /* GPN_PULL - [14:13] */
+#define WM831X_GPN_INT_MODE                     0x1000  /* GPN_INT_MODE */
+#define WM831X_GPN_INT_MODE_MASK                0x1000  /* GPN_INT_MODE */
+#define WM831X_GPN_INT_MODE_SHIFT                   12  /* GPN_INT_MODE */
+#define WM831X_GPN_INT_MODE_WIDTH                    1  /* GPN_INT_MODE */
+#define WM831X_GPN_PWR_DOM                      0x0800  /* GPN_PWR_DOM */
+#define WM831X_GPN_PWR_DOM_MASK                 0x0800  /* GPN_PWR_DOM */
+#define WM831X_GPN_PWR_DOM_SHIFT                    11  /* GPN_PWR_DOM */
+#define WM831X_GPN_PWR_DOM_WIDTH                     1  /* GPN_PWR_DOM */
+#define WM831X_GPN_POL                          0x0400  /* GPN_POL */
+#define WM831X_GPN_POL_MASK                     0x0400  /* GPN_POL */
+#define WM831X_GPN_POL_SHIFT                        10  /* GPN_POL */
+#define WM831X_GPN_POL_WIDTH                         1  /* GPN_POL */
+#define WM831X_GPN_OD                           0x0200  /* GPN_OD */
+#define WM831X_GPN_OD_MASK                      0x0200  /* GPN_OD */
+#define WM831X_GPN_OD_SHIFT                          9  /* GPN_OD */
+#define WM831X_GPN_OD_WIDTH                          1  /* GPN_OD */
+#define WM831X_GPN_TRI                          0x0080  /* GPN_TRI */
+#define WM831X_GPN_TRI_MASK                     0x0080  /* GPN_TRI */
+#define WM831X_GPN_TRI_SHIFT                         7  /* GPN_TRI */
+#define WM831X_GPN_TRI_WIDTH                         1  /* GPN_TRI */
+#define WM831X_GPN_FN_MASK                      0x000F  /* GPN_FN - [3:0] */
+#define WM831X_GPN_FN_SHIFT                          0  /* GPN_FN - [3:0] */
+#define WM831X_GPN_FN_WIDTH                          4  /* GPN_FN - [3:0] */
+
+#define WM831X_GPIO_PULL_NONE (0 << WM831X_GPN_PULL_SHIFT)
+#define WM831X_GPIO_PULL_DOWN (1 << WM831X_GPN_PULL_SHIFT)
+#define WM831X_GPIO_PULL_UP   (2 << WM831X_GPN_PULL_SHIFT)
+#endif
-- 
cgit v1.2.3


From 0c73b992dd4c645f050344cb13142c0fd3496824 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 15 Sep 2009 12:07:12 +0200
Subject: input: Add support for the WM831x ON pin

The WM831x series of PMICs support control of initial power on
through the ON pin on the device with soft control of the pin
at other times. Represent this to userspace as KEY_POWER.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/misc/Kconfig      |  10 +++
 drivers/input/misc/Makefile     |   1 +
 drivers/input/misc/wm831x-on.c  | 163 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/core.h |  19 +++++
 4 files changed, 193 insertions(+)
 create mode 100644 drivers/input/misc/wm831x-on.c

(limited to 'include')

diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index cbe21bc96b52..852941d108ff 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -279,4 +279,14 @@ config INPUT_BFIN_ROTARY
 	  To compile this driver as a module, choose M here: the
 	  module will be called bfin-rotary.
 
+config INPUT_WM831X_ON
+	tristate "WM831X ON pin"
+	depends on MFD_WM831X
+	help
+	  Support the ON pin of WM831X PMICs as an input device
+	  reporting power button status.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called wm831x_on.
+
 endif
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index 79c1e9a5ea31..c97533fb83c8 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -26,4 +26,5 @@ obj-$(CONFIG_INPUT_SPARCSPKR)		+= sparcspkr.o
 obj-$(CONFIG_INPUT_TWL4030_PWRBUTTON)	+= twl4030-pwrbutton.o
 obj-$(CONFIG_INPUT_UINPUT)		+= uinput.o
 obj-$(CONFIG_INPUT_WISTRON_BTNS)	+= wistron_btns.o
+obj-$(CONFIG_INPUT_WM831X_ON)		+= wm831x-on.o
 obj-$(CONFIG_INPUT_YEALINK)		+= yealink.o
diff --git a/drivers/input/misc/wm831x-on.c b/drivers/input/misc/wm831x-on.c
new file mode 100644
index 000000000000..ba4f5dd7c60e
--- /dev/null
+++ b/drivers/input/misc/wm831x-on.c
@@ -0,0 +1,163 @@
+/**
+ * wm831x-on.c - WM831X ON pin driver
+ *
+ * Copyright (C) 2009 Wolfson Microelectronics plc
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+#include <linux/mfd/wm831x/core.h>
+
+struct wm831x_on {
+	struct input_dev *dev;
+	struct delayed_work work;
+	struct wm831x *wm831x;
+};
+
+/*
+ * The chip gives us an interrupt when the ON pin is asserted but we
+ * then need to poll to see when the pin is deasserted.
+ */
+static void wm831x_poll_on(struct work_struct *work)
+{
+	struct wm831x_on *wm831x_on = container_of(work, struct wm831x_on,
+						   work.work);
+	struct wm831x *wm831x = wm831x_on->wm831x;
+	int poll, ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_ON_PIN_CONTROL);
+	if (ret >= 0) {
+		poll = !(ret & WM831X_ON_PIN_STS);
+
+		input_report_key(wm831x_on->dev, KEY_POWER, poll);
+		input_sync(wm831x_on->dev);
+	} else {
+		dev_err(wm831x->dev, "Failed to read ON status: %d\n", ret);
+		poll = 1;
+	}
+
+	if (poll)
+		schedule_delayed_work(&wm831x_on->work, 100);
+}
+
+static irqreturn_t wm831x_on_irq(int irq, void *data)
+{
+	struct wm831x_on *wm831x_on = data;
+
+	schedule_delayed_work(&wm831x_on->work, 0);
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit wm831x_on_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_on *wm831x_on;
+	int irq = platform_get_irq(pdev, 0);
+	int ret;
+
+	wm831x_on = kzalloc(sizeof(struct wm831x_on), GFP_KERNEL);
+	if (!wm831x_on) {
+		dev_err(&pdev->dev, "Can't allocate data\n");
+		return -ENOMEM;
+	}
+
+	wm831x_on->wm831x = wm831x;
+	INIT_DELAYED_WORK(&wm831x_on->work, wm831x_poll_on);
+
+	wm831x_on->dev = input_allocate_device();
+	if (!wm831x_on->dev) {
+		dev_err(&pdev->dev, "Can't allocate input dev\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	wm831x_on->dev->evbit[0] = BIT_MASK(EV_KEY);
+	wm831x_on->dev->keybit[BIT_WORD(KEY_POWER)] = BIT_MASK(KEY_POWER);
+	wm831x_on->dev->name = "wm831x_on";
+	wm831x_on->dev->phys = "wm831x_on/input0";
+	wm831x_on->dev->dev.parent = &pdev->dev;
+
+	ret = wm831x_request_irq(wm831x, irq, wm831x_on_irq,
+				 IRQF_TRIGGER_RISING, "wm831x_on", wm831x_on);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Unable to request IRQ: %d\n", ret);
+		goto err_input_dev;
+	}
+	ret = input_register_device(wm831x_on->dev);
+	if (ret) {
+		dev_dbg(&pdev->dev, "Can't register input device: %d\n", ret);
+		goto err_irq;
+	}
+
+	platform_set_drvdata(pdev, wm831x_on);
+
+	return 0;
+
+err_irq:
+	wm831x_free_irq(wm831x, irq, NULL);
+err_input_dev:
+	input_free_device(wm831x_on->dev);
+err:
+	kfree(wm831x_on);
+	return ret;
+}
+
+static int __devexit wm831x_on_remove(struct platform_device *pdev)
+{
+	struct wm831x_on *wm831x_on = platform_get_drvdata(pdev);
+	int irq = platform_get_irq(pdev, 0);
+
+	wm831x_free_irq(wm831x_on->wm831x, irq, wm831x_on);
+	cancel_delayed_work_sync(&wm831x_on->work);
+	input_unregister_device(wm831x_on->dev);
+	kfree(wm831x_on);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_on_driver = {
+	.probe		= wm831x_on_probe,
+	.remove		= __devexit_p(wm831x_on_remove),
+	.driver		= {
+		.name	= "wm831x-on",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init wm831x_on_init(void)
+{
+	return platform_driver_register(&wm831x_on_driver);
+}
+module_init(wm831x_on_init);
+
+static void __exit wm831x_on_exit(void)
+{
+	platform_driver_unregister(&wm831x_on_driver);
+}
+module_exit(wm831x_on_exit);
+
+MODULE_ALIAS("platform:wm831x-on");
+MODULE_DESCRIPTION("WM831x ON pin");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index d7134dfba56e..91eb493bf14c 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -216,6 +216,25 @@
 #define WM831X_PARENT_ID_SHIFT                       0  /* PARENT_ID - [15:0] */
 #define WM831X_PARENT_ID_WIDTH                      16  /* PARENT_ID - [15:0] */
 
+/*
+ * R16389 (0x4005) - ON Pin Control
+ */
+#define WM831X_ON_PIN_SECACT_MASK               0x0300  /* ON_PIN_SECACT - [9:8] */
+#define WM831X_ON_PIN_SECACT_SHIFT                   8  /* ON_PIN_SECACT - [9:8] */
+#define WM831X_ON_PIN_SECACT_WIDTH                   2  /* ON_PIN_SECACT - [9:8] */
+#define WM831X_ON_PIN_PRIMACT_MASK              0x0030  /* ON_PIN_PRIMACT - [5:4] */
+#define WM831X_ON_PIN_PRIMACT_SHIFT                  4  /* ON_PIN_PRIMACT - [5:4] */
+#define WM831X_ON_PIN_PRIMACT_WIDTH                  2  /* ON_PIN_PRIMACT - [5:4] */
+#define WM831X_ON_PIN_STS                       0x0008  /* ON_PIN_STS */
+#define WM831X_ON_PIN_STS_MASK                  0x0008  /* ON_PIN_STS */
+#define WM831X_ON_PIN_STS_SHIFT                      3  /* ON_PIN_STS */
+#define WM831X_ON_PIN_STS_WIDTH                      1  /* ON_PIN_STS */
+#define WM831X_ON_PIN_TO_MASK                   0x0003  /* ON_PIN_TO - [1:0] */
+#define WM831X_ON_PIN_TO_SHIFT                       0  /* ON_PIN_TO - [1:0] */
+#define WM831X_ON_PIN_TO_WIDTH                       2  /* ON_PIN_TO - [1:0] */
+
+struct regulator_dev;
+
 struct wm831x {
 	struct mutex io_lock;
 
-- 
cgit v1.2.3


From be721979dd6b335e4ab6f83abb5cc11c33662aa8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 4 Aug 2009 20:09:52 +0200
Subject: regulator: Provide mode to status conversion function

This is useful for implementing get_status() in terms of get_mode().

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/regulator/core.c         | 24 ++++++++++++++++++++++++
 include/linux/regulator/driver.h |  2 ++
 2 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 98c3a74e9949..91ba9bfaa706 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1864,6 +1864,30 @@ int regulator_notifier_call_chain(struct regulator_dev *rdev,
 }
 EXPORT_SYMBOL_GPL(regulator_notifier_call_chain);
 
+/**
+ * regulator_mode_to_status - convert a regulator mode into a status
+ *
+ * @mode: Mode to convert
+ *
+ * Convert a regulator mode into a status.
+ */
+int regulator_mode_to_status(unsigned int mode)
+{
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		return REGULATOR_STATUS_FAST;
+	case REGULATOR_MODE_NORMAL:
+		return REGULATOR_STATUS_NORMAL;
+	case REGULATOR_MODE_IDLE:
+		return REGULATOR_STATUS_IDLE;
+	case REGULATOR_STATUS_STANDBY:
+		return REGULATOR_STATUS_STANDBY;
+	default:
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(regulator_mode_to_status);
+
 /*
  * To avoid cluttering sysfs (and memory) with useless state, only
  * create attributes that can be meaningfully displayed.
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 225f733e7533..ce1be708ca16 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -193,6 +193,8 @@ void *rdev_get_drvdata(struct regulator_dev *rdev);
 struct device *rdev_get_dev(struct regulator_dev *rdev);
 int rdev_get_id(struct regulator_dev *rdev);
 
+int regulator_mode_to_status(unsigned int);
+
 void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data);
 
 #endif
-- 
cgit v1.2.3


From e4ee831f949a7c7746a56bcf1e7ca057d6f69e2a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 28 Jul 2009 15:21:49 +0100
Subject: regulator: Add WM831x DC-DC buck convertor support

The WM831x series of devices all have 3 DC-DC buck convertors. This
driver implements software control for these regulators via the
regulator API.  Use with split hardware/software control of individual
regulators is not supported, though regulators not controlled by
software may be controlled via the hardware control interfaces.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/regulator/Kconfig            |   7 +
 drivers/regulator/Makefile           |   1 +
 drivers/regulator/wm831x-dcdc.c      | 643 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/regulator.h | 571 +++++++++++++++++++++++++++++++
 4 files changed, 1222 insertions(+)
 create mode 100644 drivers/regulator/wm831x-dcdc.c

(limited to 'include')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index da7483ef32b3..38ea5dc8e143 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -82,6 +82,13 @@ config REGULATOR_TWL4030
 	  This driver supports the voltage regulators provided by
 	  this family of companion chips.
 
+config REGULATOR_WM831X
+	tristate "Wolfson Microelcronics WM831x PMIC regulators"
+	depends on MFD_WM831X
+	help
+	  Support the voltage and current regulators of the WM831x series
+	  of PMIC devices.
+
 config REGULATOR_WM8350
 	tristate "Wolfson Microelectroncis WM8350 AudioPlus PMIC"
 	depends on MFD_WM8350
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 3a9748ff860b..b1d2b826f532 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl4030-regulator.o
+obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o
 obj-$(CONFIG_REGULATOR_WM8350) += wm8350-regulator.o
 obj-$(CONFIG_REGULATOR_WM8400) += wm8400-regulator.o
 obj-$(CONFIG_REGULATOR_DA903X)	+= da903x.o
diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
new file mode 100644
index 000000000000..fa5126e38acc
--- /dev/null
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -0,0 +1,643 @@
+/*
+ * wm831x-dcdc.c  --  DC-DC buck convertor driver for the WM831x series
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/regulator.h>
+#include <linux/mfd/wm831x/pdata.h>
+
+#define WM831X_BUCKV_MAX_SELECTOR 0x68
+#define WM831X_BUCKP_MAX_SELECTOR 0x66
+
+#define WM831X_DCDC_MODE_FAST    0
+#define WM831X_DCDC_MODE_NORMAL  1
+#define WM831X_DCDC_MODE_IDLE    2
+#define WM831X_DCDC_MODE_STANDBY 3
+
+#define WM831X_DCDC_MAX_NAME 6
+
+/* Register offsets in control block */
+#define WM831X_DCDC_CONTROL_1     0
+#define WM831X_DCDC_CONTROL_2     1
+#define WM831X_DCDC_ON_CONFIG     2
+#define WM831X_DCDC_SLEEP_CONTROL 3
+
+/*
+ * Shared
+ */
+
+struct wm831x_dcdc {
+	char name[WM831X_DCDC_MAX_NAME];
+	struct regulator_desc desc;
+	int base;
+	struct wm831x *wm831x;
+	struct regulator_dev *regulator;
+};
+
+static int wm831x_dcdc_is_enabled(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+	int reg;
+
+	reg = wm831x_reg_read(wm831x, WM831X_DCDC_ENABLE);
+	if (reg < 0)
+		return reg;
+
+	if (reg & mask)
+		return 1;
+	else
+		return 0;
+}
+
+static int wm831x_dcdc_enable(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+
+	return wm831x_set_bits(wm831x, WM831X_DCDC_ENABLE, mask, mask);
+}
+
+static int wm831x_dcdc_disable(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+
+	return wm831x_set_bits(wm831x, WM831X_DCDC_ENABLE, mask, 0);
+}
+
+static unsigned int wm831x_dcdc_get_mode(struct regulator_dev *rdev)
+
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+	int val;
+
+	val = wm831x_reg_read(wm831x, reg);
+	if (val < 0)
+		return val;
+
+	val = (val & WM831X_DC1_ON_MODE_MASK) >> WM831X_DC1_ON_MODE_SHIFT;
+
+	switch (val) {
+	case WM831X_DCDC_MODE_FAST:
+		return REGULATOR_MODE_FAST;
+	case WM831X_DCDC_MODE_NORMAL:
+		return REGULATOR_MODE_NORMAL;
+	case WM831X_DCDC_MODE_STANDBY:
+		return REGULATOR_MODE_STANDBY;
+	case WM831X_DCDC_MODE_IDLE:
+		return REGULATOR_MODE_IDLE;
+	default:
+		BUG();
+	}
+}
+
+static int wm831x_dcdc_set_mode_int(struct wm831x *wm831x, int reg,
+				    unsigned int mode)
+{
+	int val;
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		val = WM831X_DCDC_MODE_FAST;
+		break;
+	case REGULATOR_MODE_NORMAL:
+		val = WM831X_DCDC_MODE_NORMAL;
+		break;
+	case REGULATOR_MODE_STANDBY:
+		val = WM831X_DCDC_MODE_STANDBY;
+		break;
+	case REGULATOR_MODE_IDLE:
+		val = WM831X_DCDC_MODE_IDLE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return wm831x_set_bits(wm831x, reg, WM831X_DC1_ON_MODE_MASK,
+			       val << WM831X_DC1_ON_MODE_SHIFT);
+}
+
+static int wm831x_dcdc_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+
+	return wm831x_dcdc_set_mode_int(wm831x, reg, mode);
+}
+
+static int wm831x_dcdc_set_suspend_mode(struct regulator_dev *rdev,
+					unsigned int mode)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_SLEEP_CONTROL;
+
+	return wm831x_dcdc_set_mode_int(wm831x, reg, mode);
+}
+
+static int wm831x_dcdc_get_status(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	int ret;
+
+	/* First, check for errors */
+	ret = wm831x_reg_read(wm831x, WM831X_DCDC_UV_STATUS);
+	if (ret < 0)
+		return ret;
+
+	if (ret & (1 << rdev_get_id(rdev))) {
+		dev_dbg(wm831x->dev, "DCDC%d under voltage\n",
+			rdev_get_id(rdev) + 1);
+		return REGULATOR_STATUS_ERROR;
+	}
+
+	/* DCDC1 and DCDC2 can additionally detect high voltage/current */
+	if (rdev_get_id(rdev) < 2) {
+		if (ret & (WM831X_DC1_OV_STS << rdev_get_id(rdev))) {
+			dev_dbg(wm831x->dev, "DCDC%d over voltage\n",
+				rdev_get_id(rdev) + 1);
+			return REGULATOR_STATUS_ERROR;
+		}
+
+		if (ret & (WM831X_DC1_HC_STS << rdev_get_id(rdev))) {
+			dev_dbg(wm831x->dev, "DCDC%d over current\n",
+				rdev_get_id(rdev) + 1);
+			return REGULATOR_STATUS_ERROR;
+		}
+	}
+
+	/* Is the regulator on? */
+	ret = wm831x_reg_read(wm831x, WM831X_DCDC_STATUS);
+	if (ret < 0)
+		return ret;
+	if (!(ret & (1 << rdev_get_id(rdev))))
+		return REGULATOR_STATUS_OFF;
+
+	/* TODO: When we handle hardware control modes so we can report the
+	 * current mode. */
+	return REGULATOR_STATUS_ON;
+}
+
+static irqreturn_t wm831x_dcdc_uv_irq(int irq, void *data)
+{
+	struct wm831x_dcdc *dcdc = data;
+
+	regulator_notifier_call_chain(dcdc->regulator,
+				      REGULATOR_EVENT_UNDER_VOLTAGE,
+				      NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t wm831x_dcdc_oc_irq(int irq, void *data)
+{
+	struct wm831x_dcdc *dcdc = data;
+
+	regulator_notifier_call_chain(dcdc->regulator,
+				      REGULATOR_EVENT_OVER_CURRENT,
+				      NULL);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * BUCKV specifics
+ */
+
+static int wm831x_buckv_list_voltage(struct regulator_dev *rdev,
+				      unsigned selector)
+{
+	if (selector <= 0x8)
+		return 600000;
+	if (selector <= WM831X_BUCKV_MAX_SELECTOR)
+		return 600000 + ((selector - 0x8) * 12500);
+	return -EINVAL;
+}
+
+static int wm831x_buckv_set_voltage_int(struct regulator_dev *rdev, int reg,
+					 int min_uV, int max_uV)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 vsel;
+
+	if (min_uV < 600000)
+		vsel = 0;
+	else if (min_uV <= 1800000)
+		vsel = ((min_uV - 600000) / 12500) + 8;
+	else
+		return -EINVAL;
+
+	if (wm831x_buckv_list_voltage(rdev, vsel) > max_uV)
+		return -EINVAL;
+
+	return wm831x_set_bits(wm831x, reg, WM831X_DC1_ON_VSEL_MASK, vsel);
+}
+
+static int wm831x_buckv_set_voltage(struct regulator_dev *rdev,
+				     int min_uV, int max_uV)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+
+	return wm831x_buckv_set_voltage_int(rdev, reg, min_uV, max_uV);
+}
+
+static int wm831x_buckv_set_suspend_voltage(struct regulator_dev *rdev,
+					     int uV)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	u16 reg = dcdc->base + WM831X_DCDC_SLEEP_CONTROL;
+
+	return wm831x_buckv_set_voltage_int(rdev, reg, uV, uV);
+}
+
+static int wm831x_buckv_get_voltage(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+	int val;
+
+	val = wm831x_reg_read(wm831x, reg);
+	if (val < 0)
+		return val;
+
+	return wm831x_buckv_list_voltage(rdev, val & WM831X_DC1_ON_VSEL_MASK);
+}
+
+/* Current limit options */
+static u16 wm831x_dcdc_ilim[] = {
+	125, 250, 375, 500, 625, 750, 875, 1000
+};
+
+static int wm831x_buckv_set_current_limit(struct regulator_dev *rdev,
+					   int min_uA, int max_uA)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_CONTROL_2;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(wm831x_dcdc_ilim); i++) {
+		if (max_uA <= wm831x_dcdc_ilim[i])
+			break;
+	}
+	if (i == ARRAY_SIZE(wm831x_dcdc_ilim))
+		return -EINVAL;
+
+	return wm831x_set_bits(wm831x, reg, WM831X_DC1_HC_THR_MASK, i);
+}
+
+static int wm831x_buckv_get_current_limit(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_CONTROL_2;
+	int val;
+
+	val = wm831x_reg_read(wm831x, reg);
+	if (val < 0)
+		return val;
+
+	return wm831x_dcdc_ilim[val & WM831X_DC1_HC_THR_MASK];
+}
+
+static struct regulator_ops wm831x_buckv_ops = {
+	.set_voltage = wm831x_buckv_set_voltage,
+	.get_voltage = wm831x_buckv_get_voltage,
+	.list_voltage = wm831x_buckv_list_voltage,
+	.set_suspend_voltage = wm831x_buckv_set_suspend_voltage,
+	.set_current_limit = wm831x_buckv_set_current_limit,
+	.get_current_limit = wm831x_buckv_get_current_limit,
+
+	.is_enabled = wm831x_dcdc_is_enabled,
+	.enable = wm831x_dcdc_enable,
+	.disable = wm831x_dcdc_disable,
+	.get_status = wm831x_dcdc_get_status,
+	.get_mode = wm831x_dcdc_get_mode,
+	.set_mode = wm831x_dcdc_set_mode,
+	.set_suspend_mode = wm831x_dcdc_set_suspend_mode,
+};
+
+static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	int id = pdev->id % ARRAY_SIZE(pdata->dcdc);
+	struct wm831x_dcdc *dcdc;
+	struct resource *res;
+	int ret, irq;
+
+	dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1);
+
+	if (pdata == NULL || pdata->dcdc[id] == NULL)
+		return -ENODEV;
+
+	dcdc = kzalloc(sizeof(struct wm831x_dcdc), GFP_KERNEL);
+	if (dcdc == NULL) {
+		dev_err(&pdev->dev, "Unable to allocate private data\n");
+		return -ENOMEM;
+	}
+
+	dcdc->wm831x = wm831x;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "No I/O resource\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	dcdc->base = res->start;
+
+	snprintf(dcdc->name, sizeof(dcdc->name), "DCDC%d", id + 1);
+	dcdc->desc.name = dcdc->name;
+	dcdc->desc.id = id;
+	dcdc->desc.type = REGULATOR_VOLTAGE;
+	dcdc->desc.n_voltages = WM831X_BUCKV_MAX_SELECTOR + 1;
+	dcdc->desc.ops = &wm831x_buckv_ops;
+	dcdc->desc.owner = THIS_MODULE;
+
+	dcdc->regulator = regulator_register(&dcdc->desc, &pdev->dev,
+					     pdata->dcdc[id], dcdc);
+	if (IS_ERR(dcdc->regulator)) {
+		ret = PTR_ERR(dcdc->regulator);
+		dev_err(wm831x->dev, "Failed to register DCDC%d: %d\n",
+			id + 1, ret);
+		goto err;
+	}
+
+	irq = platform_get_irq_byname(pdev, "UV");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_dcdc_uv_irq,
+				 IRQF_TRIGGER_RISING, dcdc->name,
+				 dcdc);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request UV IRQ %d: %d\n",
+			irq, ret);
+		goto err_regulator;
+	}
+
+	irq = platform_get_irq_byname(pdev, "HC");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_dcdc_oc_irq,
+				 IRQF_TRIGGER_RISING, dcdc->name,
+				 dcdc);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request HC IRQ %d: %d\n",
+			irq, ret);
+		goto err_uv;
+	}
+
+	platform_set_drvdata(pdev, dcdc);
+
+	return 0;
+
+err_uv:
+	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
+err_regulator:
+	regulator_unregister(dcdc->regulator);
+err:
+	kfree(dcdc);
+	return ret;
+}
+
+static __devexit int wm831x_buckv_remove(struct platform_device *pdev)
+{
+	struct wm831x_dcdc *dcdc = platform_get_drvdata(pdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+
+	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "HC"), dcdc);
+	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
+	regulator_unregister(dcdc->regulator);
+	kfree(dcdc);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_buckv_driver = {
+	.probe = wm831x_buckv_probe,
+	.remove = __devexit_p(wm831x_buckv_remove),
+	.driver		= {
+		.name	= "wm831x-buckv",
+	},
+};
+
+/*
+ * BUCKP specifics
+ */
+
+static int wm831x_buckp_list_voltage(struct regulator_dev *rdev,
+				      unsigned selector)
+{
+	if (selector <= WM831X_BUCKP_MAX_SELECTOR)
+		return 850000 + (selector * 25000);
+	else
+		return -EINVAL;
+}
+
+static int wm831x_buckp_set_voltage_int(struct regulator_dev *rdev, int reg,
+					int min_uV, int max_uV)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 vsel;
+
+	if (min_uV <= 34000000)
+		vsel = (min_uV - 850000) / 25000;
+	else
+		return -EINVAL;
+
+	if (wm831x_buckp_list_voltage(rdev, vsel) > max_uV)
+		return -EINVAL;
+
+	return wm831x_set_bits(wm831x, reg, WM831X_DC3_ON_VSEL_MASK, vsel);
+}
+
+static int wm831x_buckp_set_voltage(struct regulator_dev *rdev,
+				    int min_uV, int max_uV)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+
+	return wm831x_buckp_set_voltage_int(rdev, reg, min_uV, max_uV);
+}
+
+static int wm831x_buckp_set_suspend_voltage(struct regulator_dev *rdev,
+					    int uV)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	u16 reg = dcdc->base + WM831X_DCDC_SLEEP_CONTROL;
+
+	return wm831x_buckp_set_voltage_int(rdev, reg, uV, uV);
+}
+
+static int wm831x_buckp_get_voltage(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+	int val;
+
+	val = wm831x_reg_read(wm831x, reg);
+	if (val < 0)
+		return val;
+
+	return wm831x_buckp_list_voltage(rdev, val & WM831X_DC3_ON_VSEL_MASK);
+}
+
+static struct regulator_ops wm831x_buckp_ops = {
+	.set_voltage = wm831x_buckp_set_voltage,
+	.get_voltage = wm831x_buckp_get_voltage,
+	.list_voltage = wm831x_buckp_list_voltage,
+	.set_suspend_voltage = wm831x_buckp_set_suspend_voltage,
+
+	.is_enabled = wm831x_dcdc_is_enabled,
+	.enable = wm831x_dcdc_enable,
+	.disable = wm831x_dcdc_disable,
+	.get_status = wm831x_dcdc_get_status,
+	.get_mode = wm831x_dcdc_get_mode,
+	.set_mode = wm831x_dcdc_set_mode,
+	.set_suspend_mode = wm831x_dcdc_set_suspend_mode,
+};
+
+static __devinit int wm831x_buckp_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	int id = pdev->id % ARRAY_SIZE(pdata->dcdc);
+	struct wm831x_dcdc *dcdc;
+	struct resource *res;
+	int ret, irq;
+
+	dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1);
+
+	if (pdata == NULL || pdata->dcdc[id] == NULL)
+		return -ENODEV;
+
+	dcdc = kzalloc(sizeof(struct wm831x_dcdc), GFP_KERNEL);
+	if (dcdc == NULL) {
+		dev_err(&pdev->dev, "Unable to allocate private data\n");
+		return -ENOMEM;
+	}
+
+	dcdc->wm831x = wm831x;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "No I/O resource\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	dcdc->base = res->start;
+
+	snprintf(dcdc->name, sizeof(dcdc->name), "DCDC%d", id + 1);
+	dcdc->desc.name = dcdc->name;
+	dcdc->desc.id = id;
+	dcdc->desc.type = REGULATOR_VOLTAGE;
+	dcdc->desc.n_voltages = WM831X_BUCKP_MAX_SELECTOR + 1;
+	dcdc->desc.ops = &wm831x_buckp_ops;
+	dcdc->desc.owner = THIS_MODULE;
+
+	dcdc->regulator = regulator_register(&dcdc->desc, &pdev->dev,
+					     pdata->dcdc[id], dcdc);
+	if (IS_ERR(dcdc->regulator)) {
+		ret = PTR_ERR(dcdc->regulator);
+		dev_err(wm831x->dev, "Failed to register DCDC%d: %d\n",
+			id + 1, ret);
+		goto err;
+	}
+
+	irq = platform_get_irq_byname(pdev, "UV");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_dcdc_uv_irq,
+				 IRQF_TRIGGER_RISING, dcdc->name,
+				 dcdc);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request UV IRQ %d: %d\n",
+			irq, ret);
+		goto err_regulator;
+	}
+
+	platform_set_drvdata(pdev, dcdc);
+
+	return 0;
+
+err_regulator:
+	regulator_unregister(dcdc->regulator);
+err:
+	kfree(dcdc);
+	return ret;
+}
+
+static __devexit int wm831x_buckp_remove(struct platform_device *pdev)
+{
+	struct wm831x_dcdc *dcdc = platform_get_drvdata(pdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+
+	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
+	regulator_unregister(dcdc->regulator);
+	kfree(dcdc);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_buckp_driver = {
+	.probe = wm831x_buckp_probe,
+	.remove = __devexit_p(wm831x_buckp_remove),
+	.driver		= {
+		.name	= "wm831x-buckp",
+	},
+};
+
+static int __init wm831x_dcdc_init(void)
+{
+	int ret;
+	ret = platform_driver_register(&wm831x_buckv_driver);
+	if (ret != 0)
+		pr_err("Failed to register WM831x BUCKV driver: %d\n", ret);
+
+	ret = platform_driver_register(&wm831x_buckp_driver);
+	if (ret != 0)
+		pr_err("Failed to register WM831x BUCKP driver: %d\n", ret);
+
+	return 0;
+}
+subsys_initcall(wm831x_dcdc_init);
+
+static void __exit wm831x_dcdc_exit(void)
+{
+	platform_driver_unregister(&wm831x_buckp_driver);
+	platform_driver_unregister(&wm831x_buckv_driver);
+}
+module_exit(wm831x_dcdc_exit);
+
+/* Module information */
+MODULE_AUTHOR("Mark Brown");
+MODULE_DESCRIPTION("WM831x DC-DC convertor driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm831x-buckv");
+MODULE_ALIAS("platform:wm831x-buckp");
diff --git a/include/linux/mfd/wm831x/regulator.h b/include/linux/mfd/wm831x/regulator.h
index b5d58fb38b4e..c74d6aafdca8 100644
--- a/include/linux/mfd/wm831x/regulator.h
+++ b/include/linux/mfd/wm831x/regulator.h
@@ -15,6 +15,577 @@
 #ifndef __MFD_WM831X_REGULATOR_H__
 #define __MFD_WM831X_REGULATOR_H__
 
+/*
+ * R16462 (0x404E) - Current Sink 1
+ */
+#define WM831X_CS1_ENA                          0x8000  /* CS1_ENA */
+#define WM831X_CS1_ENA_MASK                     0x8000  /* CS1_ENA */
+#define WM831X_CS1_ENA_SHIFT                        15  /* CS1_ENA */
+#define WM831X_CS1_ENA_WIDTH                         1  /* CS1_ENA */
+#define WM831X_CS1_DRIVE                        0x4000  /* CS1_DRIVE */
+#define WM831X_CS1_DRIVE_MASK                   0x4000  /* CS1_DRIVE */
+#define WM831X_CS1_DRIVE_SHIFT                      14  /* CS1_DRIVE */
+#define WM831X_CS1_DRIVE_WIDTH                       1  /* CS1_DRIVE */
+#define WM831X_CS1_SLPENA                       0x1000  /* CS1_SLPENA */
+#define WM831X_CS1_SLPENA_MASK                  0x1000  /* CS1_SLPENA */
+#define WM831X_CS1_SLPENA_SHIFT                     12  /* CS1_SLPENA */
+#define WM831X_CS1_SLPENA_WIDTH                      1  /* CS1_SLPENA */
+#define WM831X_CS1_OFF_RAMP_MASK                0x0C00  /* CS1_OFF_RAMP - [11:10] */
+#define WM831X_CS1_OFF_RAMP_SHIFT                   10  /* CS1_OFF_RAMP - [11:10] */
+#define WM831X_CS1_OFF_RAMP_WIDTH                    2  /* CS1_OFF_RAMP - [11:10] */
+#define WM831X_CS1_ON_RAMP_MASK                 0x0300  /* CS1_ON_RAMP - [9:8] */
+#define WM831X_CS1_ON_RAMP_SHIFT                     8  /* CS1_ON_RAMP - [9:8] */
+#define WM831X_CS1_ON_RAMP_WIDTH                     2  /* CS1_ON_RAMP - [9:8] */
+#define WM831X_CS1_ISEL_MASK                    0x003F  /* CS1_ISEL - [5:0] */
+#define WM831X_CS1_ISEL_SHIFT                        0  /* CS1_ISEL - [5:0] */
+#define WM831X_CS1_ISEL_WIDTH                        6  /* CS1_ISEL - [5:0] */
+
+/*
+ * R16463 (0x404F) - Current Sink 2
+ */
+#define WM831X_CS2_ENA                          0x8000  /* CS2_ENA */
+#define WM831X_CS2_ENA_MASK                     0x8000  /* CS2_ENA */
+#define WM831X_CS2_ENA_SHIFT                        15  /* CS2_ENA */
+#define WM831X_CS2_ENA_WIDTH                         1  /* CS2_ENA */
+#define WM831X_CS2_DRIVE                        0x4000  /* CS2_DRIVE */
+#define WM831X_CS2_DRIVE_MASK                   0x4000  /* CS2_DRIVE */
+#define WM831X_CS2_DRIVE_SHIFT                      14  /* CS2_DRIVE */
+#define WM831X_CS2_DRIVE_WIDTH                       1  /* CS2_DRIVE */
+#define WM831X_CS2_SLPENA                       0x1000  /* CS2_SLPENA */
+#define WM831X_CS2_SLPENA_MASK                  0x1000  /* CS2_SLPENA */
+#define WM831X_CS2_SLPENA_SHIFT                     12  /* CS2_SLPENA */
+#define WM831X_CS2_SLPENA_WIDTH                      1  /* CS2_SLPENA */
+#define WM831X_CS2_OFF_RAMP_MASK                0x0C00  /* CS2_OFF_RAMP - [11:10] */
+#define WM831X_CS2_OFF_RAMP_SHIFT                   10  /* CS2_OFF_RAMP - [11:10] */
+#define WM831X_CS2_OFF_RAMP_WIDTH                    2  /* CS2_OFF_RAMP - [11:10] */
+#define WM831X_CS2_ON_RAMP_MASK                 0x0300  /* CS2_ON_RAMP - [9:8] */
+#define WM831X_CS2_ON_RAMP_SHIFT                     8  /* CS2_ON_RAMP - [9:8] */
+#define WM831X_CS2_ON_RAMP_WIDTH                     2  /* CS2_ON_RAMP - [9:8] */
+#define WM831X_CS2_ISEL_MASK                    0x003F  /* CS2_ISEL - [5:0] */
+#define WM831X_CS2_ISEL_SHIFT                        0  /* CS2_ISEL - [5:0] */
+#define WM831X_CS2_ISEL_WIDTH                        6  /* CS2_ISEL - [5:0] */
+
+/*
+ * R16464 (0x4050) - DCDC Enable
+ */
+#define WM831X_EPE2_ENA                         0x0080  /* EPE2_ENA */
+#define WM831X_EPE2_ENA_MASK                    0x0080  /* EPE2_ENA */
+#define WM831X_EPE2_ENA_SHIFT                        7  /* EPE2_ENA */
+#define WM831X_EPE2_ENA_WIDTH                        1  /* EPE2_ENA */
+#define WM831X_EPE1_ENA                         0x0040  /* EPE1_ENA */
+#define WM831X_EPE1_ENA_MASK                    0x0040  /* EPE1_ENA */
+#define WM831X_EPE1_ENA_SHIFT                        6  /* EPE1_ENA */
+#define WM831X_EPE1_ENA_WIDTH                        1  /* EPE1_ENA */
+#define WM831X_DC4_ENA                          0x0008  /* DC4_ENA */
+#define WM831X_DC4_ENA_MASK                     0x0008  /* DC4_ENA */
+#define WM831X_DC4_ENA_SHIFT                         3  /* DC4_ENA */
+#define WM831X_DC4_ENA_WIDTH                         1  /* DC4_ENA */
+#define WM831X_DC3_ENA                          0x0004  /* DC3_ENA */
+#define WM831X_DC3_ENA_MASK                     0x0004  /* DC3_ENA */
+#define WM831X_DC3_ENA_SHIFT                         2  /* DC3_ENA */
+#define WM831X_DC3_ENA_WIDTH                         1  /* DC3_ENA */
+#define WM831X_DC2_ENA                          0x0002  /* DC2_ENA */
+#define WM831X_DC2_ENA_MASK                     0x0002  /* DC2_ENA */
+#define WM831X_DC2_ENA_SHIFT                         1  /* DC2_ENA */
+#define WM831X_DC2_ENA_WIDTH                         1  /* DC2_ENA */
+#define WM831X_DC1_ENA                          0x0001  /* DC1_ENA */
+#define WM831X_DC1_ENA_MASK                     0x0001  /* DC1_ENA */
+#define WM831X_DC1_ENA_SHIFT                         0  /* DC1_ENA */
+#define WM831X_DC1_ENA_WIDTH                         1  /* DC1_ENA */
+
+/*
+ * R16465 (0x4051) - LDO Enable
+ */
+#define WM831X_LDO11_ENA                        0x0400  /* LDO11_ENA */
+#define WM831X_LDO11_ENA_MASK                   0x0400  /* LDO11_ENA */
+#define WM831X_LDO11_ENA_SHIFT                      10  /* LDO11_ENA */
+#define WM831X_LDO11_ENA_WIDTH                       1  /* LDO11_ENA */
+#define WM831X_LDO10_ENA                        0x0200  /* LDO10_ENA */
+#define WM831X_LDO10_ENA_MASK                   0x0200  /* LDO10_ENA */
+#define WM831X_LDO10_ENA_SHIFT                       9  /* LDO10_ENA */
+#define WM831X_LDO10_ENA_WIDTH                       1  /* LDO10_ENA */
+#define WM831X_LDO9_ENA                         0x0100  /* LDO9_ENA */
+#define WM831X_LDO9_ENA_MASK                    0x0100  /* LDO9_ENA */
+#define WM831X_LDO9_ENA_SHIFT                        8  /* LDO9_ENA */
+#define WM831X_LDO9_ENA_WIDTH                        1  /* LDO9_ENA */
+#define WM831X_LDO8_ENA                         0x0080  /* LDO8_ENA */
+#define WM831X_LDO8_ENA_MASK                    0x0080  /* LDO8_ENA */
+#define WM831X_LDO8_ENA_SHIFT                        7  /* LDO8_ENA */
+#define WM831X_LDO8_ENA_WIDTH                        1  /* LDO8_ENA */
+#define WM831X_LDO7_ENA                         0x0040  /* LDO7_ENA */
+#define WM831X_LDO7_ENA_MASK                    0x0040  /* LDO7_ENA */
+#define WM831X_LDO7_ENA_SHIFT                        6  /* LDO7_ENA */
+#define WM831X_LDO7_ENA_WIDTH                        1  /* LDO7_ENA */
+#define WM831X_LDO6_ENA                         0x0020  /* LDO6_ENA */
+#define WM831X_LDO6_ENA_MASK                    0x0020  /* LDO6_ENA */
+#define WM831X_LDO6_ENA_SHIFT                        5  /* LDO6_ENA */
+#define WM831X_LDO6_ENA_WIDTH                        1  /* LDO6_ENA */
+#define WM831X_LDO5_ENA                         0x0010  /* LDO5_ENA */
+#define WM831X_LDO5_ENA_MASK                    0x0010  /* LDO5_ENA */
+#define WM831X_LDO5_ENA_SHIFT                        4  /* LDO5_ENA */
+#define WM831X_LDO5_ENA_WIDTH                        1  /* LDO5_ENA */
+#define WM831X_LDO4_ENA                         0x0008  /* LDO4_ENA */
+#define WM831X_LDO4_ENA_MASK                    0x0008  /* LDO4_ENA */
+#define WM831X_LDO4_ENA_SHIFT                        3  /* LDO4_ENA */
+#define WM831X_LDO4_ENA_WIDTH                        1  /* LDO4_ENA */
+#define WM831X_LDO3_ENA                         0x0004  /* LDO3_ENA */
+#define WM831X_LDO3_ENA_MASK                    0x0004  /* LDO3_ENA */
+#define WM831X_LDO3_ENA_SHIFT                        2  /* LDO3_ENA */
+#define WM831X_LDO3_ENA_WIDTH                        1  /* LDO3_ENA */
+#define WM831X_LDO2_ENA                         0x0002  /* LDO2_ENA */
+#define WM831X_LDO2_ENA_MASK                    0x0002  /* LDO2_ENA */
+#define WM831X_LDO2_ENA_SHIFT                        1  /* LDO2_ENA */
+#define WM831X_LDO2_ENA_WIDTH                        1  /* LDO2_ENA */
+#define WM831X_LDO1_ENA                         0x0001  /* LDO1_ENA */
+#define WM831X_LDO1_ENA_MASK                    0x0001  /* LDO1_ENA */
+#define WM831X_LDO1_ENA_SHIFT                        0  /* LDO1_ENA */
+#define WM831X_LDO1_ENA_WIDTH                        1  /* LDO1_ENA */
+
+/*
+ * R16466 (0x4052) - DCDC Status
+ */
+#define WM831X_EPE2_STS                         0x0080  /* EPE2_STS */
+#define WM831X_EPE2_STS_MASK                    0x0080  /* EPE2_STS */
+#define WM831X_EPE2_STS_SHIFT                        7  /* EPE2_STS */
+#define WM831X_EPE2_STS_WIDTH                        1  /* EPE2_STS */
+#define WM831X_EPE1_STS                         0x0040  /* EPE1_STS */
+#define WM831X_EPE1_STS_MASK                    0x0040  /* EPE1_STS */
+#define WM831X_EPE1_STS_SHIFT                        6  /* EPE1_STS */
+#define WM831X_EPE1_STS_WIDTH                        1  /* EPE1_STS */
+#define WM831X_DC4_STS                          0x0008  /* DC4_STS */
+#define WM831X_DC4_STS_MASK                     0x0008  /* DC4_STS */
+#define WM831X_DC4_STS_SHIFT                         3  /* DC4_STS */
+#define WM831X_DC4_STS_WIDTH                         1  /* DC4_STS */
+#define WM831X_DC3_STS                          0x0004  /* DC3_STS */
+#define WM831X_DC3_STS_MASK                     0x0004  /* DC3_STS */
+#define WM831X_DC3_STS_SHIFT                         2  /* DC3_STS */
+#define WM831X_DC3_STS_WIDTH                         1  /* DC3_STS */
+#define WM831X_DC2_STS                          0x0002  /* DC2_STS */
+#define WM831X_DC2_STS_MASK                     0x0002  /* DC2_STS */
+#define WM831X_DC2_STS_SHIFT                         1  /* DC2_STS */
+#define WM831X_DC2_STS_WIDTH                         1  /* DC2_STS */
+#define WM831X_DC1_STS                          0x0001  /* DC1_STS */
+#define WM831X_DC1_STS_MASK                     0x0001  /* DC1_STS */
+#define WM831X_DC1_STS_SHIFT                         0  /* DC1_STS */
+#define WM831X_DC1_STS_WIDTH                         1  /* DC1_STS */
+
+/*
+ * R16467 (0x4053) - LDO Status
+ */
+#define WM831X_LDO11_STS                        0x0400  /* LDO11_STS */
+#define WM831X_LDO11_STS_MASK                   0x0400  /* LDO11_STS */
+#define WM831X_LDO11_STS_SHIFT                      10  /* LDO11_STS */
+#define WM831X_LDO11_STS_WIDTH                       1  /* LDO11_STS */
+#define WM831X_LDO10_STS                        0x0200  /* LDO10_STS */
+#define WM831X_LDO10_STS_MASK                   0x0200  /* LDO10_STS */
+#define WM831X_LDO10_STS_SHIFT                       9  /* LDO10_STS */
+#define WM831X_LDO10_STS_WIDTH                       1  /* LDO10_STS */
+#define WM831X_LDO9_STS                         0x0100  /* LDO9_STS */
+#define WM831X_LDO9_STS_MASK                    0x0100  /* LDO9_STS */
+#define WM831X_LDO9_STS_SHIFT                        8  /* LDO9_STS */
+#define WM831X_LDO9_STS_WIDTH                        1  /* LDO9_STS */
+#define WM831X_LDO8_STS                         0x0080  /* LDO8_STS */
+#define WM831X_LDO8_STS_MASK                    0x0080  /* LDO8_STS */
+#define WM831X_LDO8_STS_SHIFT                        7  /* LDO8_STS */
+#define WM831X_LDO8_STS_WIDTH                        1  /* LDO8_STS */
+#define WM831X_LDO7_STS                         0x0040  /* LDO7_STS */
+#define WM831X_LDO7_STS_MASK                    0x0040  /* LDO7_STS */
+#define WM831X_LDO7_STS_SHIFT                        6  /* LDO7_STS */
+#define WM831X_LDO7_STS_WIDTH                        1  /* LDO7_STS */
+#define WM831X_LDO6_STS                         0x0020  /* LDO6_STS */
+#define WM831X_LDO6_STS_MASK                    0x0020  /* LDO6_STS */
+#define WM831X_LDO6_STS_SHIFT                        5  /* LDO6_STS */
+#define WM831X_LDO6_STS_WIDTH                        1  /* LDO6_STS */
+#define WM831X_LDO5_STS                         0x0010  /* LDO5_STS */
+#define WM831X_LDO5_STS_MASK                    0x0010  /* LDO5_STS */
+#define WM831X_LDO5_STS_SHIFT                        4  /* LDO5_STS */
+#define WM831X_LDO5_STS_WIDTH                        1  /* LDO5_STS */
+#define WM831X_LDO4_STS                         0x0008  /* LDO4_STS */
+#define WM831X_LDO4_STS_MASK                    0x0008  /* LDO4_STS */
+#define WM831X_LDO4_STS_SHIFT                        3  /* LDO4_STS */
+#define WM831X_LDO4_STS_WIDTH                        1  /* LDO4_STS */
+#define WM831X_LDO3_STS                         0x0004  /* LDO3_STS */
+#define WM831X_LDO3_STS_MASK                    0x0004  /* LDO3_STS */
+#define WM831X_LDO3_STS_SHIFT                        2  /* LDO3_STS */
+#define WM831X_LDO3_STS_WIDTH                        1  /* LDO3_STS */
+#define WM831X_LDO2_STS                         0x0002  /* LDO2_STS */
+#define WM831X_LDO2_STS_MASK                    0x0002  /* LDO2_STS */
+#define WM831X_LDO2_STS_SHIFT                        1  /* LDO2_STS */
+#define WM831X_LDO2_STS_WIDTH                        1  /* LDO2_STS */
+#define WM831X_LDO1_STS                         0x0001  /* LDO1_STS */
+#define WM831X_LDO1_STS_MASK                    0x0001  /* LDO1_STS */
+#define WM831X_LDO1_STS_SHIFT                        0  /* LDO1_STS */
+#define WM831X_LDO1_STS_WIDTH                        1  /* LDO1_STS */
+
+/*
+ * R16468 (0x4054) - DCDC UV Status
+ */
+#define WM831X_DC2_OV_STS                       0x2000  /* DC2_OV_STS */
+#define WM831X_DC2_OV_STS_MASK                  0x2000  /* DC2_OV_STS */
+#define WM831X_DC2_OV_STS_SHIFT                     13  /* DC2_OV_STS */
+#define WM831X_DC2_OV_STS_WIDTH                      1  /* DC2_OV_STS */
+#define WM831X_DC1_OV_STS                       0x1000  /* DC1_OV_STS */
+#define WM831X_DC1_OV_STS_MASK                  0x1000  /* DC1_OV_STS */
+#define WM831X_DC1_OV_STS_SHIFT                     12  /* DC1_OV_STS */
+#define WM831X_DC1_OV_STS_WIDTH                      1  /* DC1_OV_STS */
+#define WM831X_DC2_HC_STS                       0x0200  /* DC2_HC_STS */
+#define WM831X_DC2_HC_STS_MASK                  0x0200  /* DC2_HC_STS */
+#define WM831X_DC2_HC_STS_SHIFT                      9  /* DC2_HC_STS */
+#define WM831X_DC2_HC_STS_WIDTH                      1  /* DC2_HC_STS */
+#define WM831X_DC1_HC_STS                       0x0100  /* DC1_HC_STS */
+#define WM831X_DC1_HC_STS_MASK                  0x0100  /* DC1_HC_STS */
+#define WM831X_DC1_HC_STS_SHIFT                      8  /* DC1_HC_STS */
+#define WM831X_DC1_HC_STS_WIDTH                      1  /* DC1_HC_STS */
+#define WM831X_DC4_UV_STS                       0x0008  /* DC4_UV_STS */
+#define WM831X_DC4_UV_STS_MASK                  0x0008  /* DC4_UV_STS */
+#define WM831X_DC4_UV_STS_SHIFT                      3  /* DC4_UV_STS */
+#define WM831X_DC4_UV_STS_WIDTH                      1  /* DC4_UV_STS */
+#define WM831X_DC3_UV_STS                       0x0004  /* DC3_UV_STS */
+#define WM831X_DC3_UV_STS_MASK                  0x0004  /* DC3_UV_STS */
+#define WM831X_DC3_UV_STS_SHIFT                      2  /* DC3_UV_STS */
+#define WM831X_DC3_UV_STS_WIDTH                      1  /* DC3_UV_STS */
+#define WM831X_DC2_UV_STS                       0x0002  /* DC2_UV_STS */
+#define WM831X_DC2_UV_STS_MASK                  0x0002  /* DC2_UV_STS */
+#define WM831X_DC2_UV_STS_SHIFT                      1  /* DC2_UV_STS */
+#define WM831X_DC2_UV_STS_WIDTH                      1  /* DC2_UV_STS */
+#define WM831X_DC1_UV_STS                       0x0001  /* DC1_UV_STS */
+#define WM831X_DC1_UV_STS_MASK                  0x0001  /* DC1_UV_STS */
+#define WM831X_DC1_UV_STS_SHIFT                      0  /* DC1_UV_STS */
+#define WM831X_DC1_UV_STS_WIDTH                      1  /* DC1_UV_STS */
+
+/*
+ * R16469 (0x4055) - LDO UV Status
+ */
+#define WM831X_INTLDO_UV_STS                    0x8000  /* INTLDO_UV_STS */
+#define WM831X_INTLDO_UV_STS_MASK               0x8000  /* INTLDO_UV_STS */
+#define WM831X_INTLDO_UV_STS_SHIFT                  15  /* INTLDO_UV_STS */
+#define WM831X_INTLDO_UV_STS_WIDTH                   1  /* INTLDO_UV_STS */
+#define WM831X_LDO10_UV_STS                     0x0200  /* LDO10_UV_STS */
+#define WM831X_LDO10_UV_STS_MASK                0x0200  /* LDO10_UV_STS */
+#define WM831X_LDO10_UV_STS_SHIFT                    9  /* LDO10_UV_STS */
+#define WM831X_LDO10_UV_STS_WIDTH                    1  /* LDO10_UV_STS */
+#define WM831X_LDO9_UV_STS                      0x0100  /* LDO9_UV_STS */
+#define WM831X_LDO9_UV_STS_MASK                 0x0100  /* LDO9_UV_STS */
+#define WM831X_LDO9_UV_STS_SHIFT                     8  /* LDO9_UV_STS */
+#define WM831X_LDO9_UV_STS_WIDTH                     1  /* LDO9_UV_STS */
+#define WM831X_LDO8_UV_STS                      0x0080  /* LDO8_UV_STS */
+#define WM831X_LDO8_UV_STS_MASK                 0x0080  /* LDO8_UV_STS */
+#define WM831X_LDO8_UV_STS_SHIFT                     7  /* LDO8_UV_STS */
+#define WM831X_LDO8_UV_STS_WIDTH                     1  /* LDO8_UV_STS */
+#define WM831X_LDO7_UV_STS                      0x0040  /* LDO7_UV_STS */
+#define WM831X_LDO7_UV_STS_MASK                 0x0040  /* LDO7_UV_STS */
+#define WM831X_LDO7_UV_STS_SHIFT                     6  /* LDO7_UV_STS */
+#define WM831X_LDO7_UV_STS_WIDTH                     1  /* LDO7_UV_STS */
+#define WM831X_LDO6_UV_STS                      0x0020  /* LDO6_UV_STS */
+#define WM831X_LDO6_UV_STS_MASK                 0x0020  /* LDO6_UV_STS */
+#define WM831X_LDO6_UV_STS_SHIFT                     5  /* LDO6_UV_STS */
+#define WM831X_LDO6_UV_STS_WIDTH                     1  /* LDO6_UV_STS */
+#define WM831X_LDO5_UV_STS                      0x0010  /* LDO5_UV_STS */
+#define WM831X_LDO5_UV_STS_MASK                 0x0010  /* LDO5_UV_STS */
+#define WM831X_LDO5_UV_STS_SHIFT                     4  /* LDO5_UV_STS */
+#define WM831X_LDO5_UV_STS_WIDTH                     1  /* LDO5_UV_STS */
+#define WM831X_LDO4_UV_STS                      0x0008  /* LDO4_UV_STS */
+#define WM831X_LDO4_UV_STS_MASK                 0x0008  /* LDO4_UV_STS */
+#define WM831X_LDO4_UV_STS_SHIFT                     3  /* LDO4_UV_STS */
+#define WM831X_LDO4_UV_STS_WIDTH                     1  /* LDO4_UV_STS */
+#define WM831X_LDO3_UV_STS                      0x0004  /* LDO3_UV_STS */
+#define WM831X_LDO3_UV_STS_MASK                 0x0004  /* LDO3_UV_STS */
+#define WM831X_LDO3_UV_STS_SHIFT                     2  /* LDO3_UV_STS */
+#define WM831X_LDO3_UV_STS_WIDTH                     1  /* LDO3_UV_STS */
+#define WM831X_LDO2_UV_STS                      0x0002  /* LDO2_UV_STS */
+#define WM831X_LDO2_UV_STS_MASK                 0x0002  /* LDO2_UV_STS */
+#define WM831X_LDO2_UV_STS_SHIFT                     1  /* LDO2_UV_STS */
+#define WM831X_LDO2_UV_STS_WIDTH                     1  /* LDO2_UV_STS */
+#define WM831X_LDO1_UV_STS                      0x0001  /* LDO1_UV_STS */
+#define WM831X_LDO1_UV_STS_MASK                 0x0001  /* LDO1_UV_STS */
+#define WM831X_LDO1_UV_STS_SHIFT                     0  /* LDO1_UV_STS */
+#define WM831X_LDO1_UV_STS_WIDTH                     1  /* LDO1_UV_STS */
+
+/*
+ * R16470 (0x4056) - DC1 Control 1
+ */
+#define WM831X_DC1_RATE_MASK                    0xC000  /* DC1_RATE - [15:14] */
+#define WM831X_DC1_RATE_SHIFT                       14  /* DC1_RATE - [15:14] */
+#define WM831X_DC1_RATE_WIDTH                        2  /* DC1_RATE - [15:14] */
+#define WM831X_DC1_PHASE                        0x1000  /* DC1_PHASE */
+#define WM831X_DC1_PHASE_MASK                   0x1000  /* DC1_PHASE */
+#define WM831X_DC1_PHASE_SHIFT                      12  /* DC1_PHASE */
+#define WM831X_DC1_PHASE_WIDTH                       1  /* DC1_PHASE */
+#define WM831X_DC1_FREQ_MASK                    0x0300  /* DC1_FREQ - [9:8] */
+#define WM831X_DC1_FREQ_SHIFT                        8  /* DC1_FREQ - [9:8] */
+#define WM831X_DC1_FREQ_WIDTH                        2  /* DC1_FREQ - [9:8] */
+#define WM831X_DC1_FLT                          0x0080  /* DC1_FLT */
+#define WM831X_DC1_FLT_MASK                     0x0080  /* DC1_FLT */
+#define WM831X_DC1_FLT_SHIFT                         7  /* DC1_FLT */
+#define WM831X_DC1_FLT_WIDTH                         1  /* DC1_FLT */
+#define WM831X_DC1_SOFT_START_MASK              0x0030  /* DC1_SOFT_START - [5:4] */
+#define WM831X_DC1_SOFT_START_SHIFT                  4  /* DC1_SOFT_START - [5:4] */
+#define WM831X_DC1_SOFT_START_WIDTH                  2  /* DC1_SOFT_START - [5:4] */
+#define WM831X_DC1_CAP_MASK                     0x0003  /* DC1_CAP - [1:0] */
+#define WM831X_DC1_CAP_SHIFT                         0  /* DC1_CAP - [1:0] */
+#define WM831X_DC1_CAP_WIDTH                         2  /* DC1_CAP - [1:0] */
+
+/*
+ * R16471 (0x4057) - DC1 Control 2
+ */
+#define WM831X_DC1_ERR_ACT_MASK                 0xC000  /* DC1_ERR_ACT - [15:14] */
+#define WM831X_DC1_ERR_ACT_SHIFT                    14  /* DC1_ERR_ACT - [15:14] */
+#define WM831X_DC1_ERR_ACT_WIDTH                     2  /* DC1_ERR_ACT - [15:14] */
+#define WM831X_DC1_HWC_SRC_MASK                 0x1800  /* DC1_HWC_SRC - [12:11] */
+#define WM831X_DC1_HWC_SRC_SHIFT                    11  /* DC1_HWC_SRC - [12:11] */
+#define WM831X_DC1_HWC_SRC_WIDTH                     2  /* DC1_HWC_SRC - [12:11] */
+#define WM831X_DC1_HWC_VSEL                     0x0400  /* DC1_HWC_VSEL */
+#define WM831X_DC1_HWC_VSEL_MASK                0x0400  /* DC1_HWC_VSEL */
+#define WM831X_DC1_HWC_VSEL_SHIFT                   10  /* DC1_HWC_VSEL */
+#define WM831X_DC1_HWC_VSEL_WIDTH                    1  /* DC1_HWC_VSEL */
+#define WM831X_DC1_HWC_MODE_MASK                0x0300  /* DC1_HWC_MODE - [9:8] */
+#define WM831X_DC1_HWC_MODE_SHIFT                    8  /* DC1_HWC_MODE - [9:8] */
+#define WM831X_DC1_HWC_MODE_WIDTH                    2  /* DC1_HWC_MODE - [9:8] */
+#define WM831X_DC1_HC_THR_MASK                  0x0070  /* DC1_HC_THR - [6:4] */
+#define WM831X_DC1_HC_THR_SHIFT                      4  /* DC1_HC_THR - [6:4] */
+#define WM831X_DC1_HC_THR_WIDTH                      3  /* DC1_HC_THR - [6:4] */
+#define WM831X_DC1_HC_IND_ENA                   0x0001  /* DC1_HC_IND_ENA */
+#define WM831X_DC1_HC_IND_ENA_MASK              0x0001  /* DC1_HC_IND_ENA */
+#define WM831X_DC1_HC_IND_ENA_SHIFT                  0  /* DC1_HC_IND_ENA */
+#define WM831X_DC1_HC_IND_ENA_WIDTH                  1  /* DC1_HC_IND_ENA */
+
+/*
+ * R16472 (0x4058) - DC1 ON Config
+ */
+#define WM831X_DC1_ON_SLOT_MASK                 0xE000  /* DC1_ON_SLOT - [15:13] */
+#define WM831X_DC1_ON_SLOT_SHIFT                    13  /* DC1_ON_SLOT - [15:13] */
+#define WM831X_DC1_ON_SLOT_WIDTH                     3  /* DC1_ON_SLOT - [15:13] */
+#define WM831X_DC1_ON_MODE_MASK                 0x0300  /* DC1_ON_MODE - [9:8] */
+#define WM831X_DC1_ON_MODE_SHIFT                     8  /* DC1_ON_MODE - [9:8] */
+#define WM831X_DC1_ON_MODE_WIDTH                     2  /* DC1_ON_MODE - [9:8] */
+#define WM831X_DC1_ON_VSEL_MASK                 0x007F  /* DC1_ON_VSEL - [6:0] */
+#define WM831X_DC1_ON_VSEL_SHIFT                     0  /* DC1_ON_VSEL - [6:0] */
+#define WM831X_DC1_ON_VSEL_WIDTH                     7  /* DC1_ON_VSEL - [6:0] */
+
+/*
+ * R16473 (0x4059) - DC1 SLEEP Control
+ */
+#define WM831X_DC1_SLP_SLOT_MASK                0xE000  /* DC1_SLP_SLOT - [15:13] */
+#define WM831X_DC1_SLP_SLOT_SHIFT                   13  /* DC1_SLP_SLOT - [15:13] */
+#define WM831X_DC1_SLP_SLOT_WIDTH                    3  /* DC1_SLP_SLOT - [15:13] */
+#define WM831X_DC1_SLP_MODE_MASK                0x0300  /* DC1_SLP_MODE - [9:8] */
+#define WM831X_DC1_SLP_MODE_SHIFT                    8  /* DC1_SLP_MODE - [9:8] */
+#define WM831X_DC1_SLP_MODE_WIDTH                    2  /* DC1_SLP_MODE - [9:8] */
+#define WM831X_DC1_SLP_VSEL_MASK                0x007F  /* DC1_SLP_VSEL - [6:0] */
+#define WM831X_DC1_SLP_VSEL_SHIFT                    0  /* DC1_SLP_VSEL - [6:0] */
+#define WM831X_DC1_SLP_VSEL_WIDTH                    7  /* DC1_SLP_VSEL - [6:0] */
+
+/*
+ * R16474 (0x405A) - DC1 DVS Control
+ */
+#define WM831X_DC1_DVS_SRC_MASK                 0x1800  /* DC1_DVS_SRC - [12:11] */
+#define WM831X_DC1_DVS_SRC_SHIFT                    11  /* DC1_DVS_SRC - [12:11] */
+#define WM831X_DC1_DVS_SRC_WIDTH                     2  /* DC1_DVS_SRC - [12:11] */
+#define WM831X_DC1_DVS_VSEL_MASK                0x007F  /* DC1_DVS_VSEL - [6:0] */
+#define WM831X_DC1_DVS_VSEL_SHIFT                    0  /* DC1_DVS_VSEL - [6:0] */
+#define WM831X_DC1_DVS_VSEL_WIDTH                    7  /* DC1_DVS_VSEL - [6:0] */
+
+/*
+ * R16475 (0x405B) - DC2 Control 1
+ */
+#define WM831X_DC2_RATE_MASK                    0xC000  /* DC2_RATE - [15:14] */
+#define WM831X_DC2_RATE_SHIFT                       14  /* DC2_RATE - [15:14] */
+#define WM831X_DC2_RATE_WIDTH                        2  /* DC2_RATE - [15:14] */
+#define WM831X_DC2_PHASE                        0x1000  /* DC2_PHASE */
+#define WM831X_DC2_PHASE_MASK                   0x1000  /* DC2_PHASE */
+#define WM831X_DC2_PHASE_SHIFT                      12  /* DC2_PHASE */
+#define WM831X_DC2_PHASE_WIDTH                       1  /* DC2_PHASE */
+#define WM831X_DC2_FREQ_MASK                    0x0300  /* DC2_FREQ - [9:8] */
+#define WM831X_DC2_FREQ_SHIFT                        8  /* DC2_FREQ - [9:8] */
+#define WM831X_DC2_FREQ_WIDTH                        2  /* DC2_FREQ - [9:8] */
+#define WM831X_DC2_FLT                          0x0080  /* DC2_FLT */
+#define WM831X_DC2_FLT_MASK                     0x0080  /* DC2_FLT */
+#define WM831X_DC2_FLT_SHIFT                         7  /* DC2_FLT */
+#define WM831X_DC2_FLT_WIDTH                         1  /* DC2_FLT */
+#define WM831X_DC2_SOFT_START_MASK              0x0030  /* DC2_SOFT_START - [5:4] */
+#define WM831X_DC2_SOFT_START_SHIFT                  4  /* DC2_SOFT_START - [5:4] */
+#define WM831X_DC2_SOFT_START_WIDTH                  2  /* DC2_SOFT_START - [5:4] */
+#define WM831X_DC2_CAP_MASK                     0x0003  /* DC2_CAP - [1:0] */
+#define WM831X_DC2_CAP_SHIFT                         0  /* DC2_CAP - [1:0] */
+#define WM831X_DC2_CAP_WIDTH                         2  /* DC2_CAP - [1:0] */
+
+/*
+ * R16476 (0x405C) - DC2 Control 2
+ */
+#define WM831X_DC2_ERR_ACT_MASK                 0xC000  /* DC2_ERR_ACT - [15:14] */
+#define WM831X_DC2_ERR_ACT_SHIFT                    14  /* DC2_ERR_ACT - [15:14] */
+#define WM831X_DC2_ERR_ACT_WIDTH                     2  /* DC2_ERR_ACT - [15:14] */
+#define WM831X_DC2_HWC_SRC_MASK                 0x1800  /* DC2_HWC_SRC - [12:11] */
+#define WM831X_DC2_HWC_SRC_SHIFT                    11  /* DC2_HWC_SRC - [12:11] */
+#define WM831X_DC2_HWC_SRC_WIDTH                     2  /* DC2_HWC_SRC - [12:11] */
+#define WM831X_DC2_HWC_VSEL                     0x0400  /* DC2_HWC_VSEL */
+#define WM831X_DC2_HWC_VSEL_MASK                0x0400  /* DC2_HWC_VSEL */
+#define WM831X_DC2_HWC_VSEL_SHIFT                   10  /* DC2_HWC_VSEL */
+#define WM831X_DC2_HWC_VSEL_WIDTH                    1  /* DC2_HWC_VSEL */
+#define WM831X_DC2_HWC_MODE_MASK                0x0300  /* DC2_HWC_MODE - [9:8] */
+#define WM831X_DC2_HWC_MODE_SHIFT                    8  /* DC2_HWC_MODE - [9:8] */
+#define WM831X_DC2_HWC_MODE_WIDTH                    2  /* DC2_HWC_MODE - [9:8] */
+#define WM831X_DC2_HC_THR_MASK                  0x0070  /* DC2_HC_THR - [6:4] */
+#define WM831X_DC2_HC_THR_SHIFT                      4  /* DC2_HC_THR - [6:4] */
+#define WM831X_DC2_HC_THR_WIDTH                      3  /* DC2_HC_THR - [6:4] */
+#define WM831X_DC2_HC_IND_ENA                   0x0001  /* DC2_HC_IND_ENA */
+#define WM831X_DC2_HC_IND_ENA_MASK              0x0001  /* DC2_HC_IND_ENA */
+#define WM831X_DC2_HC_IND_ENA_SHIFT                  0  /* DC2_HC_IND_ENA */
+#define WM831X_DC2_HC_IND_ENA_WIDTH                  1  /* DC2_HC_IND_ENA */
+
+/*
+ * R16477 (0x405D) - DC2 ON Config
+ */
+#define WM831X_DC2_ON_SLOT_MASK                 0xE000  /* DC2_ON_SLOT - [15:13] */
+#define WM831X_DC2_ON_SLOT_SHIFT                    13  /* DC2_ON_SLOT - [15:13] */
+#define WM831X_DC2_ON_SLOT_WIDTH                     3  /* DC2_ON_SLOT - [15:13] */
+#define WM831X_DC2_ON_MODE_MASK                 0x0300  /* DC2_ON_MODE - [9:8] */
+#define WM831X_DC2_ON_MODE_SHIFT                     8  /* DC2_ON_MODE - [9:8] */
+#define WM831X_DC2_ON_MODE_WIDTH                     2  /* DC2_ON_MODE - [9:8] */
+#define WM831X_DC2_ON_VSEL_MASK                 0x007F  /* DC2_ON_VSEL - [6:0] */
+#define WM831X_DC2_ON_VSEL_SHIFT                     0  /* DC2_ON_VSEL - [6:0] */
+#define WM831X_DC2_ON_VSEL_WIDTH                     7  /* DC2_ON_VSEL - [6:0] */
+
+/*
+ * R16478 (0x405E) - DC2 SLEEP Control
+ */
+#define WM831X_DC2_SLP_SLOT_MASK                0xE000  /* DC2_SLP_SLOT - [15:13] */
+#define WM831X_DC2_SLP_SLOT_SHIFT                   13  /* DC2_SLP_SLOT - [15:13] */
+#define WM831X_DC2_SLP_SLOT_WIDTH                    3  /* DC2_SLP_SLOT - [15:13] */
+#define WM831X_DC2_SLP_MODE_MASK                0x0300  /* DC2_SLP_MODE - [9:8] */
+#define WM831X_DC2_SLP_MODE_SHIFT                    8  /* DC2_SLP_MODE - [9:8] */
+#define WM831X_DC2_SLP_MODE_WIDTH                    2  /* DC2_SLP_MODE - [9:8] */
+#define WM831X_DC2_SLP_VSEL_MASK                0x007F  /* DC2_SLP_VSEL - [6:0] */
+#define WM831X_DC2_SLP_VSEL_SHIFT                    0  /* DC2_SLP_VSEL - [6:0] */
+#define WM831X_DC2_SLP_VSEL_WIDTH                    7  /* DC2_SLP_VSEL - [6:0] */
+
+/*
+ * R16479 (0x405F) - DC2 DVS Control
+ */
+#define WM831X_DC2_DVS_SRC_MASK                 0x1800  /* DC2_DVS_SRC - [12:11] */
+#define WM831X_DC2_DVS_SRC_SHIFT                    11  /* DC2_DVS_SRC - [12:11] */
+#define WM831X_DC2_DVS_SRC_WIDTH                     2  /* DC2_DVS_SRC - [12:11] */
+#define WM831X_DC2_DVS_VSEL_MASK                0x007F  /* DC2_DVS_VSEL - [6:0] */
+#define WM831X_DC2_DVS_VSEL_SHIFT                    0  /* DC2_DVS_VSEL - [6:0] */
+#define WM831X_DC2_DVS_VSEL_WIDTH                    7  /* DC2_DVS_VSEL - [6:0] */
+
+/*
+ * R16480 (0x4060) - DC3 Control 1
+ */
+#define WM831X_DC3_PHASE                        0x1000  /* DC3_PHASE */
+#define WM831X_DC3_PHASE_MASK                   0x1000  /* DC3_PHASE */
+#define WM831X_DC3_PHASE_SHIFT                      12  /* DC3_PHASE */
+#define WM831X_DC3_PHASE_WIDTH                       1  /* DC3_PHASE */
+#define WM831X_DC3_FLT                          0x0080  /* DC3_FLT */
+#define WM831X_DC3_FLT_MASK                     0x0080  /* DC3_FLT */
+#define WM831X_DC3_FLT_SHIFT                         7  /* DC3_FLT */
+#define WM831X_DC3_FLT_WIDTH                         1  /* DC3_FLT */
+#define WM831X_DC3_SOFT_START_MASK              0x0030  /* DC3_SOFT_START - [5:4] */
+#define WM831X_DC3_SOFT_START_SHIFT                  4  /* DC3_SOFT_START - [5:4] */
+#define WM831X_DC3_SOFT_START_WIDTH                  2  /* DC3_SOFT_START - [5:4] */
+#define WM831X_DC3_STNBY_LIM_MASK               0x000C  /* DC3_STNBY_LIM - [3:2] */
+#define WM831X_DC3_STNBY_LIM_SHIFT                   2  /* DC3_STNBY_LIM - [3:2] */
+#define WM831X_DC3_STNBY_LIM_WIDTH                   2  /* DC3_STNBY_LIM - [3:2] */
+#define WM831X_DC3_CAP_MASK                     0x0003  /* DC3_CAP - [1:0] */
+#define WM831X_DC3_CAP_SHIFT                         0  /* DC3_CAP - [1:0] */
+#define WM831X_DC3_CAP_WIDTH                         2  /* DC3_CAP - [1:0] */
+
+/*
+ * R16481 (0x4061) - DC3 Control 2
+ */
+#define WM831X_DC3_ERR_ACT_MASK                 0xC000  /* DC3_ERR_ACT - [15:14] */
+#define WM831X_DC3_ERR_ACT_SHIFT                    14  /* DC3_ERR_ACT - [15:14] */
+#define WM831X_DC3_ERR_ACT_WIDTH                     2  /* DC3_ERR_ACT - [15:14] */
+#define WM831X_DC3_HWC_SRC_MASK                 0x1800  /* DC3_HWC_SRC - [12:11] */
+#define WM831X_DC3_HWC_SRC_SHIFT                    11  /* DC3_HWC_SRC - [12:11] */
+#define WM831X_DC3_HWC_SRC_WIDTH                     2  /* DC3_HWC_SRC - [12:11] */
+#define WM831X_DC3_HWC_VSEL                     0x0400  /* DC3_HWC_VSEL */
+#define WM831X_DC3_HWC_VSEL_MASK                0x0400  /* DC3_HWC_VSEL */
+#define WM831X_DC3_HWC_VSEL_SHIFT                   10  /* DC3_HWC_VSEL */
+#define WM831X_DC3_HWC_VSEL_WIDTH                    1  /* DC3_HWC_VSEL */
+#define WM831X_DC3_HWC_MODE_MASK                0x0300  /* DC3_HWC_MODE - [9:8] */
+#define WM831X_DC3_HWC_MODE_SHIFT                    8  /* DC3_HWC_MODE - [9:8] */
+#define WM831X_DC3_HWC_MODE_WIDTH                    2  /* DC3_HWC_MODE - [9:8] */
+#define WM831X_DC3_OVP                          0x0080  /* DC3_OVP */
+#define WM831X_DC3_OVP_MASK                     0x0080  /* DC3_OVP */
+#define WM831X_DC3_OVP_SHIFT                         7  /* DC3_OVP */
+#define WM831X_DC3_OVP_WIDTH                         1  /* DC3_OVP */
+
+/*
+ * R16482 (0x4062) - DC3 ON Config
+ */
+#define WM831X_DC3_ON_SLOT_MASK                 0xE000  /* DC3_ON_SLOT - [15:13] */
+#define WM831X_DC3_ON_SLOT_SHIFT                    13  /* DC3_ON_SLOT - [15:13] */
+#define WM831X_DC3_ON_SLOT_WIDTH                     3  /* DC3_ON_SLOT - [15:13] */
+#define WM831X_DC3_ON_MODE_MASK                 0x0300  /* DC3_ON_MODE - [9:8] */
+#define WM831X_DC3_ON_MODE_SHIFT                     8  /* DC3_ON_MODE - [9:8] */
+#define WM831X_DC3_ON_MODE_WIDTH                     2  /* DC3_ON_MODE - [9:8] */
+#define WM831X_DC3_ON_VSEL_MASK                 0x007F  /* DC3_ON_VSEL - [6:0] */
+#define WM831X_DC3_ON_VSEL_SHIFT                     0  /* DC3_ON_VSEL - [6:0] */
+#define WM831X_DC3_ON_VSEL_WIDTH                     7  /* DC3_ON_VSEL - [6:0] */
+
+/*
+ * R16483 (0x4063) - DC3 SLEEP Control
+ */
+#define WM831X_DC3_SLP_SLOT_MASK                0xE000  /* DC3_SLP_SLOT - [15:13] */
+#define WM831X_DC3_SLP_SLOT_SHIFT                   13  /* DC3_SLP_SLOT - [15:13] */
+#define WM831X_DC3_SLP_SLOT_WIDTH                    3  /* DC3_SLP_SLOT - [15:13] */
+#define WM831X_DC3_SLP_MODE_MASK                0x0300  /* DC3_SLP_MODE - [9:8] */
+#define WM831X_DC3_SLP_MODE_SHIFT                    8  /* DC3_SLP_MODE - [9:8] */
+#define WM831X_DC3_SLP_MODE_WIDTH                    2  /* DC3_SLP_MODE - [9:8] */
+#define WM831X_DC3_SLP_VSEL_MASK                0x007F  /* DC3_SLP_VSEL - [6:0] */
+#define WM831X_DC3_SLP_VSEL_SHIFT                    0  /* DC3_SLP_VSEL - [6:0] */
+#define WM831X_DC3_SLP_VSEL_WIDTH                    7  /* DC3_SLP_VSEL - [6:0] */
+
+/*
+ * R16484 (0x4064) - DC4 Control
+ */
+#define WM831X_DC4_ERR_ACT_MASK                 0xC000  /* DC4_ERR_ACT - [15:14] */
+#define WM831X_DC4_ERR_ACT_SHIFT                    14  /* DC4_ERR_ACT - [15:14] */
+#define WM831X_DC4_ERR_ACT_WIDTH                     2  /* DC4_ERR_ACT - [15:14] */
+#define WM831X_DC4_HWC_SRC_MASK                 0x1800  /* DC4_HWC_SRC - [12:11] */
+#define WM831X_DC4_HWC_SRC_SHIFT                    11  /* DC4_HWC_SRC - [12:11] */
+#define WM831X_DC4_HWC_SRC_WIDTH                     2  /* DC4_HWC_SRC - [12:11] */
+#define WM831X_DC4_HWC_MODE                     0x0100  /* DC4_HWC_MODE */
+#define WM831X_DC4_HWC_MODE_MASK                0x0100  /* DC4_HWC_MODE */
+#define WM831X_DC4_HWC_MODE_SHIFT                    8  /* DC4_HWC_MODE */
+#define WM831X_DC4_HWC_MODE_WIDTH                    1  /* DC4_HWC_MODE */
+#define WM831X_DC4_RANGE_MASK                   0x000C  /* DC4_RANGE - [3:2] */
+#define WM831X_DC4_RANGE_SHIFT                       2  /* DC4_RANGE - [3:2] */
+#define WM831X_DC4_RANGE_WIDTH                       2  /* DC4_RANGE - [3:2] */
+#define WM831X_DC4_FBSRC                        0x0001  /* DC4_FBSRC */
+#define WM831X_DC4_FBSRC_MASK                   0x0001  /* DC4_FBSRC */
+#define WM831X_DC4_FBSRC_SHIFT                       0  /* DC4_FBSRC */
+#define WM831X_DC4_FBSRC_WIDTH                       1  /* DC4_FBSRC */
+
+/*
+ * R16485 (0x4065) - DC4 SLEEP Control
+ */
+#define WM831X_DC4_SLPENA                       0x0100  /* DC4_SLPENA */
+#define WM831X_DC4_SLPENA_MASK                  0x0100  /* DC4_SLPENA */
+#define WM831X_DC4_SLPENA_SHIFT                      8  /* DC4_SLPENA */
+#define WM831X_DC4_SLPENA_WIDTH                      1  /* DC4_SLPENA */
+
+/*
+ * R16526 (0x408E) - Power Good Source 1
+ */
+#define WM831X_DC4_OK                           0x0008  /* DC4_OK */
+#define WM831X_DC4_OK_MASK                      0x0008  /* DC4_OK */
+#define WM831X_DC4_OK_SHIFT                          3  /* DC4_OK */
+#define WM831X_DC4_OK_WIDTH                          1  /* DC4_OK */
+#define WM831X_DC3_OK                           0x0004  /* DC3_OK */
+#define WM831X_DC3_OK_MASK                      0x0004  /* DC3_OK */
+#define WM831X_DC3_OK_SHIFT                          2  /* DC3_OK */
+#define WM831X_DC3_OK_WIDTH                          1  /* DC3_OK */
+#define WM831X_DC2_OK                           0x0002  /* DC2_OK */
+#define WM831X_DC2_OK_MASK                      0x0002  /* DC2_OK */
+#define WM831X_DC2_OK_SHIFT                          1  /* DC2_OK */
+#define WM831X_DC2_OK_WIDTH                          1  /* DC2_OK */
+#define WM831X_DC1_OK                           0x0001  /* DC1_OK */
+#define WM831X_DC1_OK_MASK                      0x0001  /* DC1_OK */
+#define WM831X_DC1_OK_SHIFT                          0  /* DC1_OK */
+#define WM831X_DC1_OK_WIDTH                          1  /* DC1_OK */
+
 #define WM831X_ISINK_MAX_ISEL 56
 extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL];
 
-- 
cgit v1.2.3


From d1c6b4fe668b2ae02f490deee86eaab60822a362 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 28 Jul 2009 15:22:02 +0100
Subject: regulator: Add WM831x LDO support

The WM831x series of devices provide three types of LDO:

 - General purpose LDOs supporting voltages from 0.9-3.3V
 - High performance analogue LDOs supporting voltages from 1-3.5V
 - Very low power consumption LDOs intended to support always on
   functionality.

This patch adds support for all three kinds of LDO. Each regulator
is probed as an individual platform device with resources used to
provide the register map location of the regulator. Mixed hardware
and software control of regulators is not current supported.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/regulator/Makefile           |   1 +
 drivers/regulator/wm831x-ldo.c       | 852 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/regulator.h | 626 +++++++++++++++++++++++++
 3 files changed, 1479 insertions(+)
 create mode 100644 drivers/regulator/wm831x-ldo.c

(limited to 'include')

diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index b1d2b826f532..a0a635fdae87 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl4030-regulator.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o
+obj-$(CONFIG_REGULATOR_WM831X) += wm831x-ldo.o
 obj-$(CONFIG_REGULATOR_WM8350) += wm8350-regulator.o
 obj-$(CONFIG_REGULATOR_WM8400) += wm8400-regulator.o
 obj-$(CONFIG_REGULATOR_DA903X)	+= da903x.o
diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c
new file mode 100644
index 000000000000..bb61aede4801
--- /dev/null
+++ b/drivers/regulator/wm831x-ldo.c
@@ -0,0 +1,852 @@
+/*
+ * wm831x-ldo.c  --  LDO driver for the WM831x series
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/regulator.h>
+#include <linux/mfd/wm831x/pdata.h>
+
+#define WM831X_LDO_MAX_NAME 6
+
+#define WM831X_LDO_CONTROL       0
+#define WM831X_LDO_ON_CONTROL    1
+#define WM831X_LDO_SLEEP_CONTROL 2
+
+#define WM831X_ALIVE_LDO_ON_CONTROL    0
+#define WM831X_ALIVE_LDO_SLEEP_CONTROL 1
+
+struct wm831x_ldo {
+	char name[WM831X_LDO_MAX_NAME];
+	struct regulator_desc desc;
+	int base;
+	struct wm831x *wm831x;
+	struct regulator_dev *regulator;
+};
+
+/*
+ * Shared
+ */
+
+static int wm831x_ldo_is_enabled(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+	int reg;
+
+	reg = wm831x_reg_read(wm831x, WM831X_LDO_ENABLE);
+	if (reg < 0)
+		return reg;
+
+	if (reg & mask)
+		return 1;
+	else
+		return 0;
+}
+
+static int wm831x_ldo_enable(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+
+	return wm831x_set_bits(wm831x, WM831X_LDO_ENABLE, mask, mask);
+}
+
+static int wm831x_ldo_disable(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+
+	return wm831x_set_bits(wm831x, WM831X_LDO_ENABLE, mask, 0);
+}
+
+static irqreturn_t wm831x_ldo_uv_irq(int irq, void *data)
+{
+	struct wm831x_ldo *ldo = data;
+
+	regulator_notifier_call_chain(ldo->regulator,
+				      REGULATOR_EVENT_UNDER_VOLTAGE,
+				      NULL);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * General purpose LDOs
+ */
+
+#define WM831X_GP_LDO_SELECTOR_LOW 0xe
+#define WM831X_GP_LDO_MAX_SELECTOR 0x1f
+
+static int wm831x_gp_ldo_list_voltage(struct regulator_dev *rdev,
+				      unsigned int selector)
+{
+	/* 0.9-1.6V in 50mV steps */
+	if (selector <= WM831X_GP_LDO_SELECTOR_LOW)
+		return 900000 + (selector * 50000);
+	/* 1.7-3.3V in 50mV steps */
+	if (selector <= WM831X_GP_LDO_MAX_SELECTOR)
+		return 1600000 + ((selector - WM831X_GP_LDO_SELECTOR_LOW)
+				  * 100000);
+	return -EINVAL;
+}
+
+static int wm831x_gp_ldo_set_voltage_int(struct regulator_dev *rdev, int reg,
+					 int min_uV, int max_uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int vsel, ret;
+
+	if (min_uV < 900000)
+		vsel = 0;
+	else if (min_uV < 1700000)
+		vsel = ((min_uV - 900000) / 50000);
+	else
+		vsel = ((min_uV - 1700000) / 100000)
+			+ WM831X_GP_LDO_SELECTOR_LOW + 1;
+
+	ret = wm831x_gp_ldo_list_voltage(rdev, vsel);
+	if (ret < 0)
+		return ret;
+	if (ret < min_uV || ret > max_uV)
+		return -EINVAL;
+
+	return wm831x_set_bits(wm831x, reg, WM831X_LDO1_ON_VSEL_MASK, vsel);
+}
+
+static int wm831x_gp_ldo_set_voltage(struct regulator_dev *rdev,
+				     int min_uV, int max_uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	int reg = ldo->base + WM831X_LDO_ON_CONTROL;
+
+	return wm831x_gp_ldo_set_voltage_int(rdev, reg, min_uV, max_uV);
+}
+
+static int wm831x_gp_ldo_set_suspend_voltage(struct regulator_dev *rdev,
+					     int uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	int reg = ldo->base + WM831X_LDO_SLEEP_CONTROL;
+
+	return wm831x_gp_ldo_set_voltage_int(rdev, reg, uV, uV);
+}
+
+static int wm831x_gp_ldo_get_voltage(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int reg = ldo->base + WM831X_LDO_ON_CONTROL;
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, reg);
+	if (ret < 0)
+		return ret;
+
+	ret &= WM831X_LDO1_ON_VSEL_MASK;
+
+	return wm831x_gp_ldo_list_voltage(rdev, ret);
+}
+
+static unsigned int wm831x_gp_ldo_get_mode(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int ctrl_reg = ldo->base + WM831X_LDO_CONTROL;
+	int on_reg = ldo->base + WM831X_LDO_ON_CONTROL;
+	unsigned int ret;
+
+	ret = wm831x_reg_read(wm831x, on_reg);
+	if (ret < 0)
+		return 0;
+
+	if (!(ret & WM831X_LDO1_ON_MODE))
+		return REGULATOR_MODE_NORMAL;
+
+	ret = wm831x_reg_read(wm831x, ctrl_reg);
+	if (ret < 0)
+		return 0;
+
+	if (ret & WM831X_LDO1_LP_MODE)
+		return REGULATOR_MODE_STANDBY;
+	else
+		return REGULATOR_MODE_IDLE;
+}
+
+static int wm831x_gp_ldo_set_mode(struct regulator_dev *rdev,
+				  unsigned int mode)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int ctrl_reg = ldo->base + WM831X_LDO_CONTROL;
+	int on_reg = ldo->base + WM831X_LDO_ON_CONTROL;
+	int ret;
+
+
+	switch (mode) {
+	case REGULATOR_MODE_NORMAL:
+		ret = wm831x_set_bits(wm831x, on_reg,
+				      WM831X_LDO1_ON_MODE, 0);
+		if (ret < 0)
+			return ret;
+		break;
+
+	case REGULATOR_MODE_IDLE:
+		ret = wm831x_set_bits(wm831x, ctrl_reg,
+				      WM831X_LDO1_LP_MODE,
+				      WM831X_LDO1_LP_MODE);
+		if (ret < 0)
+			return ret;
+
+		ret = wm831x_set_bits(wm831x, on_reg,
+				      WM831X_LDO1_ON_MODE,
+				      WM831X_LDO1_ON_MODE);
+		if (ret < 0)
+			return ret;
+
+	case REGULATOR_MODE_STANDBY:
+		ret = wm831x_set_bits(wm831x, ctrl_reg,
+				      WM831X_LDO1_LP_MODE, 0);
+		if (ret < 0)
+			return ret;
+
+		ret = wm831x_set_bits(wm831x, on_reg,
+				      WM831X_LDO1_ON_MODE,
+				      WM831X_LDO1_ON_MODE);
+		if (ret < 0)
+			return ret;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int wm831x_gp_ldo_get_status(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+	int ret;
+
+	/* Is the regulator on? */
+	ret = wm831x_reg_read(wm831x, WM831X_LDO_STATUS);
+	if (ret < 0)
+		return ret;
+	if (!(ret & mask))
+		return REGULATOR_STATUS_OFF;
+
+	/* Is it reporting under voltage? */
+	ret = wm831x_reg_read(wm831x, WM831X_LDO_UV_STATUS);
+	if (ret & mask)
+		return REGULATOR_STATUS_ERROR;
+
+	ret = wm831x_gp_ldo_get_mode(rdev);
+	if (ret < 0)
+		return ret;
+	else
+		return regulator_mode_to_status(ret);
+}
+
+static unsigned int wm831x_gp_ldo_get_optimum_mode(struct regulator_dev *rdev,
+						   int input_uV,
+						   int output_uV, int load_uA)
+{
+	if (load_uA < 20000)
+		return REGULATOR_MODE_STANDBY;
+	if (load_uA < 50000)
+		return REGULATOR_MODE_IDLE;
+	return REGULATOR_MODE_NORMAL;
+}
+
+
+static struct regulator_ops wm831x_gp_ldo_ops = {
+	.list_voltage = wm831x_gp_ldo_list_voltage,
+	.get_voltage = wm831x_gp_ldo_get_voltage,
+	.set_voltage = wm831x_gp_ldo_set_voltage,
+	.set_suspend_voltage = wm831x_gp_ldo_set_suspend_voltage,
+	.get_mode = wm831x_gp_ldo_get_mode,
+	.set_mode = wm831x_gp_ldo_set_mode,
+	.get_status = wm831x_gp_ldo_get_status,
+	.get_optimum_mode = wm831x_gp_ldo_get_optimum_mode,
+
+	.is_enabled = wm831x_ldo_is_enabled,
+	.enable = wm831x_ldo_enable,
+	.disable = wm831x_ldo_disable,
+};
+
+static __devinit int wm831x_gp_ldo_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	int id = pdev->id % ARRAY_SIZE(pdata->ldo);
+	struct wm831x_ldo *ldo;
+	struct resource *res;
+	int ret, irq;
+
+	dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
+
+	if (pdata == NULL || pdata->ldo[id] == NULL)
+		return -ENODEV;
+
+	ldo = kzalloc(sizeof(struct wm831x_ldo), GFP_KERNEL);
+	if (ldo == NULL) {
+		dev_err(&pdev->dev, "Unable to allocate private data\n");
+		return -ENOMEM;
+	}
+
+	ldo->wm831x = wm831x;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "No I/O resource\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	ldo->base = res->start;
+
+	snprintf(ldo->name, sizeof(ldo->name), "LDO%d", id + 1);
+	ldo->desc.name = ldo->name;
+	ldo->desc.id = id;
+	ldo->desc.type = REGULATOR_VOLTAGE;
+	ldo->desc.n_voltages = WM831X_GP_LDO_MAX_SELECTOR + 1;
+	ldo->desc.ops = &wm831x_gp_ldo_ops;
+	ldo->desc.owner = THIS_MODULE;
+
+	ldo->regulator = regulator_register(&ldo->desc, &pdev->dev,
+					     pdata->ldo[id], ldo);
+	if (IS_ERR(ldo->regulator)) {
+		ret = PTR_ERR(ldo->regulator);
+		dev_err(wm831x->dev, "Failed to register LDO%d: %d\n",
+			id + 1, ret);
+		goto err;
+	}
+
+	irq = platform_get_irq_byname(pdev, "UV");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_ldo_uv_irq,
+				 IRQF_TRIGGER_RISING, ldo->name,
+				 ldo);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request UV IRQ %d: %d\n",
+			irq, ret);
+		goto err_regulator;
+	}
+
+	platform_set_drvdata(pdev, ldo);
+
+	return 0;
+
+err_regulator:
+	regulator_unregister(ldo->regulator);
+err:
+	kfree(ldo);
+	return ret;
+}
+
+static __devexit int wm831x_gp_ldo_remove(struct platform_device *pdev)
+{
+	struct wm831x_ldo *ldo = platform_get_drvdata(pdev);
+	struct wm831x *wm831x = ldo->wm831x;
+
+	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), ldo);
+	regulator_unregister(ldo->regulator);
+	kfree(ldo);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_gp_ldo_driver = {
+	.probe = wm831x_gp_ldo_probe,
+	.remove = __devexit_p(wm831x_gp_ldo_remove),
+	.driver		= {
+		.name	= "wm831x-ldo",
+	},
+};
+
+/*
+ * Analogue LDOs
+ */
+
+
+#define WM831X_ALDO_SELECTOR_LOW 0xc
+#define WM831X_ALDO_MAX_SELECTOR 0x1f
+
+static int wm831x_aldo_list_voltage(struct regulator_dev *rdev,
+				      unsigned int selector)
+{
+	/* 1-1.6V in 50mV steps */
+	if (selector <= WM831X_ALDO_SELECTOR_LOW)
+		return 1000000 + (selector * 50000);
+	/* 1.7-3.5V in 50mV steps */
+	if (selector <= WM831X_ALDO_MAX_SELECTOR)
+		return 1600000 + ((selector - WM831X_ALDO_SELECTOR_LOW)
+				  * 100000);
+	return -EINVAL;
+}
+
+static int wm831x_aldo_set_voltage_int(struct regulator_dev *rdev, int reg,
+					 int min_uV, int max_uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int vsel, ret;
+
+	if (min_uV < 1000000)
+		vsel = 0;
+	else if (min_uV < 1700000)
+		vsel = ((min_uV - 1000000) / 50000);
+	else
+		vsel = ((min_uV - 1700000) / 100000)
+			+ WM831X_ALDO_SELECTOR_LOW + 1;
+
+	ret = wm831x_aldo_list_voltage(rdev, vsel);
+	if (ret < 0)
+		return ret;
+	if (ret < min_uV || ret > max_uV)
+		return -EINVAL;
+
+	return wm831x_set_bits(wm831x, reg, WM831X_LDO7_ON_VSEL_MASK, vsel);
+}
+
+static int wm831x_aldo_set_voltage(struct regulator_dev *rdev,
+				     int min_uV, int max_uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	int reg = ldo->base + WM831X_LDO_ON_CONTROL;
+
+	return wm831x_aldo_set_voltage_int(rdev, reg, min_uV, max_uV);
+}
+
+static int wm831x_aldo_set_suspend_voltage(struct regulator_dev *rdev,
+					     int uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	int reg = ldo->base + WM831X_LDO_SLEEP_CONTROL;
+
+	return wm831x_aldo_set_voltage_int(rdev, reg, uV, uV);
+}
+
+static int wm831x_aldo_get_voltage(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int reg = ldo->base + WM831X_LDO_ON_CONTROL;
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, reg);
+	if (ret < 0)
+		return ret;
+
+	ret &= WM831X_LDO7_ON_VSEL_MASK;
+
+	return wm831x_aldo_list_voltage(rdev, ret);
+}
+
+static unsigned int wm831x_aldo_get_mode(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int on_reg = ldo->base + WM831X_LDO_ON_CONTROL;
+	unsigned int ret;
+
+	ret = wm831x_reg_read(wm831x, on_reg);
+	if (ret < 0)
+		return 0;
+
+	if (ret & WM831X_LDO7_ON_MODE)
+		return REGULATOR_MODE_IDLE;
+	else
+		return REGULATOR_MODE_NORMAL;
+}
+
+static int wm831x_aldo_set_mode(struct regulator_dev *rdev,
+				  unsigned int mode)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int ctrl_reg = ldo->base + WM831X_LDO_CONTROL;
+	int on_reg = ldo->base + WM831X_LDO_ON_CONTROL;
+	int ret;
+
+
+	switch (mode) {
+	case REGULATOR_MODE_NORMAL:
+		ret = wm831x_set_bits(wm831x, on_reg,
+				      WM831X_LDO7_ON_MODE, 0);
+		if (ret < 0)
+			return ret;
+		break;
+
+	case REGULATOR_MODE_IDLE:
+		ret = wm831x_set_bits(wm831x, ctrl_reg,
+				      WM831X_LDO7_ON_MODE,
+				      WM831X_LDO7_ON_MODE);
+		if (ret < 0)
+			return ret;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int wm831x_aldo_get_status(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+	int ret;
+
+	/* Is the regulator on? */
+	ret = wm831x_reg_read(wm831x, WM831X_LDO_STATUS);
+	if (ret < 0)
+		return ret;
+	if (!(ret & mask))
+		return REGULATOR_STATUS_OFF;
+
+	/* Is it reporting under voltage? */
+	ret = wm831x_reg_read(wm831x, WM831X_LDO_UV_STATUS);
+	if (ret & mask)
+		return REGULATOR_STATUS_ERROR;
+
+	ret = wm831x_aldo_get_mode(rdev);
+	if (ret < 0)
+		return ret;
+	else
+		return regulator_mode_to_status(ret);
+}
+
+static struct regulator_ops wm831x_aldo_ops = {
+	.list_voltage = wm831x_aldo_list_voltage,
+	.get_voltage = wm831x_aldo_get_voltage,
+	.set_voltage = wm831x_aldo_set_voltage,
+	.set_suspend_voltage = wm831x_aldo_set_suspend_voltage,
+	.get_mode = wm831x_aldo_get_mode,
+	.set_mode = wm831x_aldo_set_mode,
+	.get_status = wm831x_aldo_get_status,
+
+	.is_enabled = wm831x_ldo_is_enabled,
+	.enable = wm831x_ldo_enable,
+	.disable = wm831x_ldo_disable,
+};
+
+static __devinit int wm831x_aldo_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	int id = pdev->id % ARRAY_SIZE(pdata->ldo);
+	struct wm831x_ldo *ldo;
+	struct resource *res;
+	int ret, irq;
+
+	dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
+
+	if (pdata == NULL || pdata->ldo[id] == NULL)
+		return -ENODEV;
+
+	ldo = kzalloc(sizeof(struct wm831x_ldo), GFP_KERNEL);
+	if (ldo == NULL) {
+		dev_err(&pdev->dev, "Unable to allocate private data\n");
+		return -ENOMEM;
+	}
+
+	ldo->wm831x = wm831x;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "No I/O resource\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	ldo->base = res->start;
+
+	snprintf(ldo->name, sizeof(ldo->name), "LDO%d", id + 1);
+	ldo->desc.name = ldo->name;
+	ldo->desc.id = id;
+	ldo->desc.type = REGULATOR_VOLTAGE;
+	ldo->desc.n_voltages = WM831X_ALDO_MAX_SELECTOR + 1;
+	ldo->desc.ops = &wm831x_aldo_ops;
+	ldo->desc.owner = THIS_MODULE;
+
+	ldo->regulator = regulator_register(&ldo->desc, &pdev->dev,
+					     pdata->ldo[id], ldo);
+	if (IS_ERR(ldo->regulator)) {
+		ret = PTR_ERR(ldo->regulator);
+		dev_err(wm831x->dev, "Failed to register LDO%d: %d\n",
+			id + 1, ret);
+		goto err;
+	}
+
+	irq = platform_get_irq_byname(pdev, "UV");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_ldo_uv_irq,
+				 IRQF_TRIGGER_RISING, ldo->name,
+				 ldo);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request UV IRQ %d: %d\n",
+			irq, ret);
+		goto err_regulator;
+	}
+
+	platform_set_drvdata(pdev, ldo);
+
+	return 0;
+
+err_regulator:
+	regulator_unregister(ldo->regulator);
+err:
+	kfree(ldo);
+	return ret;
+}
+
+static __devexit int wm831x_aldo_remove(struct platform_device *pdev)
+{
+	struct wm831x_ldo *ldo = platform_get_drvdata(pdev);
+	struct wm831x *wm831x = ldo->wm831x;
+
+	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), ldo);
+	regulator_unregister(ldo->regulator);
+	kfree(ldo);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_aldo_driver = {
+	.probe = wm831x_aldo_probe,
+	.remove = __devexit_p(wm831x_aldo_remove),
+	.driver		= {
+		.name	= "wm831x-aldo",
+	},
+};
+
+/*
+ * Alive LDO
+ */
+
+#define WM831X_ALIVE_LDO_MAX_SELECTOR 0xf
+
+static int wm831x_alive_ldo_list_voltage(struct regulator_dev *rdev,
+				      unsigned int selector)
+{
+	/* 0.8-1.55V in 50mV steps */
+	if (selector <= WM831X_ALIVE_LDO_MAX_SELECTOR)
+		return 800000 + (selector * 50000);
+	return -EINVAL;
+}
+
+static int wm831x_alive_ldo_set_voltage_int(struct regulator_dev *rdev,
+					    int reg,
+					    int min_uV, int max_uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int vsel, ret;
+
+	vsel = (min_uV - 800000) / 50000;
+
+	ret = wm831x_alive_ldo_list_voltage(rdev, vsel);
+	if (ret < 0)
+		return ret;
+	if (ret < min_uV || ret > max_uV)
+		return -EINVAL;
+
+	return wm831x_set_bits(wm831x, reg, WM831X_LDO11_ON_VSEL_MASK, vsel);
+}
+
+static int wm831x_alive_ldo_set_voltage(struct regulator_dev *rdev,
+				     int min_uV, int max_uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	int reg = ldo->base + WM831X_ALIVE_LDO_ON_CONTROL;
+
+	return wm831x_alive_ldo_set_voltage_int(rdev, reg, min_uV, max_uV);
+}
+
+static int wm831x_alive_ldo_set_suspend_voltage(struct regulator_dev *rdev,
+					     int uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	int reg = ldo->base + WM831X_ALIVE_LDO_SLEEP_CONTROL;
+
+	return wm831x_alive_ldo_set_voltage_int(rdev, reg, uV, uV);
+}
+
+static int wm831x_alive_ldo_get_voltage(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int reg = ldo->base + WM831X_ALIVE_LDO_ON_CONTROL;
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, reg);
+	if (ret < 0)
+		return ret;
+
+	ret &= WM831X_LDO11_ON_VSEL_MASK;
+
+	return wm831x_alive_ldo_list_voltage(rdev, ret);
+}
+
+static int wm831x_alive_ldo_get_status(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+	int ret;
+
+	/* Is the regulator on? */
+	ret = wm831x_reg_read(wm831x, WM831X_LDO_STATUS);
+	if (ret < 0)
+		return ret;
+	if (ret & mask)
+		return REGULATOR_STATUS_ON;
+	else
+		return REGULATOR_STATUS_OFF;
+}
+
+static struct regulator_ops wm831x_alive_ldo_ops = {
+	.list_voltage = wm831x_alive_ldo_list_voltage,
+	.get_voltage = wm831x_alive_ldo_get_voltage,
+	.set_voltage = wm831x_alive_ldo_set_voltage,
+	.set_suspend_voltage = wm831x_alive_ldo_set_suspend_voltage,
+	.get_status = wm831x_alive_ldo_get_status,
+
+	.is_enabled = wm831x_ldo_is_enabled,
+	.enable = wm831x_ldo_enable,
+	.disable = wm831x_ldo_disable,
+};
+
+static __devinit int wm831x_alive_ldo_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	int id = pdev->id % ARRAY_SIZE(pdata->ldo);
+	struct wm831x_ldo *ldo;
+	struct resource *res;
+	int ret;
+
+	dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
+
+	if (pdata == NULL || pdata->ldo[id] == NULL)
+		return -ENODEV;
+
+	ldo = kzalloc(sizeof(struct wm831x_ldo), GFP_KERNEL);
+	if (ldo == NULL) {
+		dev_err(&pdev->dev, "Unable to allocate private data\n");
+		return -ENOMEM;
+	}
+
+	ldo->wm831x = wm831x;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "No I/O resource\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	ldo->base = res->start;
+
+	snprintf(ldo->name, sizeof(ldo->name), "LDO%d", id + 1);
+	ldo->desc.name = ldo->name;
+	ldo->desc.id = id;
+	ldo->desc.type = REGULATOR_VOLTAGE;
+	ldo->desc.n_voltages = WM831X_ALIVE_LDO_MAX_SELECTOR + 1;
+	ldo->desc.ops = &wm831x_alive_ldo_ops;
+	ldo->desc.owner = THIS_MODULE;
+
+	ldo->regulator = regulator_register(&ldo->desc, &pdev->dev,
+					     pdata->ldo[id], ldo);
+	if (IS_ERR(ldo->regulator)) {
+		ret = PTR_ERR(ldo->regulator);
+		dev_err(wm831x->dev, "Failed to register LDO%d: %d\n",
+			id + 1, ret);
+		goto err;
+	}
+
+	platform_set_drvdata(pdev, ldo);
+
+	return 0;
+
+err:
+	kfree(ldo);
+	return ret;
+}
+
+static __devexit int wm831x_alive_ldo_remove(struct platform_device *pdev)
+{
+	struct wm831x_ldo *ldo = platform_get_drvdata(pdev);
+
+	regulator_unregister(ldo->regulator);
+	kfree(ldo);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_alive_ldo_driver = {
+	.probe = wm831x_alive_ldo_probe,
+	.remove = __devexit_p(wm831x_alive_ldo_remove),
+	.driver		= {
+		.name	= "wm831x-alive-ldo",
+	},
+};
+
+static int __init wm831x_ldo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&wm831x_gp_ldo_driver);
+	if (ret != 0)
+		pr_err("Failed to register WM831x GP LDO driver: %d\n", ret);
+
+	ret = platform_driver_register(&wm831x_aldo_driver);
+	if (ret != 0)
+		pr_err("Failed to register WM831x ALDO driver: %d\n", ret);
+
+	ret = platform_driver_register(&wm831x_alive_ldo_driver);
+	if (ret != 0)
+		pr_err("Failed to register WM831x alive LDO driver: %d\n",
+		       ret);
+
+	return 0;
+}
+subsys_initcall(wm831x_ldo_init);
+
+static void __exit wm831x_ldo_exit(void)
+{
+	platform_driver_unregister(&wm831x_alive_ldo_driver);
+	platform_driver_unregister(&wm831x_aldo_driver);
+	platform_driver_unregister(&wm831x_gp_ldo_driver);
+}
+module_exit(wm831x_ldo_exit);
+
+/* Module information */
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_DESCRIPTION("WM831x LDO driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm831x-ldo");
+MODULE_ALIAS("platform:wm831x-aldo");
+MODULE_ALIAS("platform:wm831x-aliveldo");
diff --git a/include/linux/mfd/wm831x/regulator.h b/include/linux/mfd/wm831x/regulator.h
index c74d6aafdca8..f95466343fb2 100644
--- a/include/linux/mfd/wm831x/regulator.h
+++ b/include/linux/mfd/wm831x/regulator.h
@@ -566,6 +566,588 @@
 #define WM831X_DC4_SLPENA_SHIFT                      8  /* DC4_SLPENA */
 #define WM831X_DC4_SLPENA_WIDTH                      1  /* DC4_SLPENA */
 
+/*
+ * R16488 (0x4068) - LDO1 Control
+ */
+#define WM831X_LDO1_ERR_ACT_MASK                0xC000  /* LDO1_ERR_ACT - [15:14] */
+#define WM831X_LDO1_ERR_ACT_SHIFT                   14  /* LDO1_ERR_ACT - [15:14] */
+#define WM831X_LDO1_ERR_ACT_WIDTH                    2  /* LDO1_ERR_ACT - [15:14] */
+#define WM831X_LDO1_HWC_SRC_MASK                0x1800  /* LDO1_HWC_SRC - [12:11] */
+#define WM831X_LDO1_HWC_SRC_SHIFT                   11  /* LDO1_HWC_SRC - [12:11] */
+#define WM831X_LDO1_HWC_SRC_WIDTH                    2  /* LDO1_HWC_SRC - [12:11] */
+#define WM831X_LDO1_HWC_VSEL                    0x0400  /* LDO1_HWC_VSEL */
+#define WM831X_LDO1_HWC_VSEL_MASK               0x0400  /* LDO1_HWC_VSEL */
+#define WM831X_LDO1_HWC_VSEL_SHIFT                  10  /* LDO1_HWC_VSEL */
+#define WM831X_LDO1_HWC_VSEL_WIDTH                   1  /* LDO1_HWC_VSEL */
+#define WM831X_LDO1_HWC_MODE_MASK               0x0300  /* LDO1_HWC_MODE - [9:8] */
+#define WM831X_LDO1_HWC_MODE_SHIFT                   8  /* LDO1_HWC_MODE - [9:8] */
+#define WM831X_LDO1_HWC_MODE_WIDTH                   2  /* LDO1_HWC_MODE - [9:8] */
+#define WM831X_LDO1_FLT                         0x0080  /* LDO1_FLT */
+#define WM831X_LDO1_FLT_MASK                    0x0080  /* LDO1_FLT */
+#define WM831X_LDO1_FLT_SHIFT                        7  /* LDO1_FLT */
+#define WM831X_LDO1_FLT_WIDTH                        1  /* LDO1_FLT */
+#define WM831X_LDO1_SWI                         0x0040  /* LDO1_SWI */
+#define WM831X_LDO1_SWI_MASK                    0x0040  /* LDO1_SWI */
+#define WM831X_LDO1_SWI_SHIFT                        6  /* LDO1_SWI */
+#define WM831X_LDO1_SWI_WIDTH                        1  /* LDO1_SWI */
+#define WM831X_LDO1_LP_MODE                     0x0001  /* LDO1_LP_MODE */
+#define WM831X_LDO1_LP_MODE_MASK                0x0001  /* LDO1_LP_MODE */
+#define WM831X_LDO1_LP_MODE_SHIFT                    0  /* LDO1_LP_MODE */
+#define WM831X_LDO1_LP_MODE_WIDTH                    1  /* LDO1_LP_MODE */
+
+/*
+ * R16489 (0x4069) - LDO1 ON Control
+ */
+#define WM831X_LDO1_ON_SLOT_MASK                0xE000  /* LDO1_ON_SLOT - [15:13] */
+#define WM831X_LDO1_ON_SLOT_SHIFT                   13  /* LDO1_ON_SLOT - [15:13] */
+#define WM831X_LDO1_ON_SLOT_WIDTH                    3  /* LDO1_ON_SLOT - [15:13] */
+#define WM831X_LDO1_ON_MODE                     0x0100  /* LDO1_ON_MODE */
+#define WM831X_LDO1_ON_MODE_MASK                0x0100  /* LDO1_ON_MODE */
+#define WM831X_LDO1_ON_MODE_SHIFT                    8  /* LDO1_ON_MODE */
+#define WM831X_LDO1_ON_MODE_WIDTH                    1  /* LDO1_ON_MODE */
+#define WM831X_LDO1_ON_VSEL_MASK                0x001F  /* LDO1_ON_VSEL - [4:0] */
+#define WM831X_LDO1_ON_VSEL_SHIFT                    0  /* LDO1_ON_VSEL - [4:0] */
+#define WM831X_LDO1_ON_VSEL_WIDTH                    5  /* LDO1_ON_VSEL - [4:0] */
+
+/*
+ * R16490 (0x406A) - LDO1 SLEEP Control
+ */
+#define WM831X_LDO1_SLP_SLOT_MASK               0xE000  /* LDO1_SLP_SLOT - [15:13] */
+#define WM831X_LDO1_SLP_SLOT_SHIFT                  13  /* LDO1_SLP_SLOT - [15:13] */
+#define WM831X_LDO1_SLP_SLOT_WIDTH                   3  /* LDO1_SLP_SLOT - [15:13] */
+#define WM831X_LDO1_SLP_MODE                    0x0100  /* LDO1_SLP_MODE */
+#define WM831X_LDO1_SLP_MODE_MASK               0x0100  /* LDO1_SLP_MODE */
+#define WM831X_LDO1_SLP_MODE_SHIFT                   8  /* LDO1_SLP_MODE */
+#define WM831X_LDO1_SLP_MODE_WIDTH                   1  /* LDO1_SLP_MODE */
+#define WM831X_LDO1_SLP_VSEL_MASK               0x001F  /* LDO1_SLP_VSEL - [4:0] */
+#define WM831X_LDO1_SLP_VSEL_SHIFT                   0  /* LDO1_SLP_VSEL - [4:0] */
+#define WM831X_LDO1_SLP_VSEL_WIDTH                   5  /* LDO1_SLP_VSEL - [4:0] */
+
+/*
+ * R16491 (0x406B) - LDO2 Control
+ */
+#define WM831X_LDO2_ERR_ACT_MASK                0xC000  /* LDO2_ERR_ACT - [15:14] */
+#define WM831X_LDO2_ERR_ACT_SHIFT                   14  /* LDO2_ERR_ACT - [15:14] */
+#define WM831X_LDO2_ERR_ACT_WIDTH                    2  /* LDO2_ERR_ACT - [15:14] */
+#define WM831X_LDO2_HWC_SRC_MASK                0x1800  /* LDO2_HWC_SRC - [12:11] */
+#define WM831X_LDO2_HWC_SRC_SHIFT                   11  /* LDO2_HWC_SRC - [12:11] */
+#define WM831X_LDO2_HWC_SRC_WIDTH                    2  /* LDO2_HWC_SRC - [12:11] */
+#define WM831X_LDO2_HWC_VSEL                    0x0400  /* LDO2_HWC_VSEL */
+#define WM831X_LDO2_HWC_VSEL_MASK               0x0400  /* LDO2_HWC_VSEL */
+#define WM831X_LDO2_HWC_VSEL_SHIFT                  10  /* LDO2_HWC_VSEL */
+#define WM831X_LDO2_HWC_VSEL_WIDTH                   1  /* LDO2_HWC_VSEL */
+#define WM831X_LDO2_HWC_MODE_MASK               0x0300  /* LDO2_HWC_MODE - [9:8] */
+#define WM831X_LDO2_HWC_MODE_SHIFT                   8  /* LDO2_HWC_MODE - [9:8] */
+#define WM831X_LDO2_HWC_MODE_WIDTH                   2  /* LDO2_HWC_MODE - [9:8] */
+#define WM831X_LDO2_FLT                         0x0080  /* LDO2_FLT */
+#define WM831X_LDO2_FLT_MASK                    0x0080  /* LDO2_FLT */
+#define WM831X_LDO2_FLT_SHIFT                        7  /* LDO2_FLT */
+#define WM831X_LDO2_FLT_WIDTH                        1  /* LDO2_FLT */
+#define WM831X_LDO2_SWI                         0x0040  /* LDO2_SWI */
+#define WM831X_LDO2_SWI_MASK                    0x0040  /* LDO2_SWI */
+#define WM831X_LDO2_SWI_SHIFT                        6  /* LDO2_SWI */
+#define WM831X_LDO2_SWI_WIDTH                        1  /* LDO2_SWI */
+#define WM831X_LDO2_LP_MODE                     0x0001  /* LDO2_LP_MODE */
+#define WM831X_LDO2_LP_MODE_MASK                0x0001  /* LDO2_LP_MODE */
+#define WM831X_LDO2_LP_MODE_SHIFT                    0  /* LDO2_LP_MODE */
+#define WM831X_LDO2_LP_MODE_WIDTH                    1  /* LDO2_LP_MODE */
+
+/*
+ * R16492 (0x406C) - LDO2 ON Control
+ */
+#define WM831X_LDO2_ON_SLOT_MASK                0xE000  /* LDO2_ON_SLOT - [15:13] */
+#define WM831X_LDO2_ON_SLOT_SHIFT                   13  /* LDO2_ON_SLOT - [15:13] */
+#define WM831X_LDO2_ON_SLOT_WIDTH                    3  /* LDO2_ON_SLOT - [15:13] */
+#define WM831X_LDO2_ON_MODE                     0x0100  /* LDO2_ON_MODE */
+#define WM831X_LDO2_ON_MODE_MASK                0x0100  /* LDO2_ON_MODE */
+#define WM831X_LDO2_ON_MODE_SHIFT                    8  /* LDO2_ON_MODE */
+#define WM831X_LDO2_ON_MODE_WIDTH                    1  /* LDO2_ON_MODE */
+#define WM831X_LDO2_ON_VSEL_MASK                0x001F  /* LDO2_ON_VSEL - [4:0] */
+#define WM831X_LDO2_ON_VSEL_SHIFT                    0  /* LDO2_ON_VSEL - [4:0] */
+#define WM831X_LDO2_ON_VSEL_WIDTH                    5  /* LDO2_ON_VSEL - [4:0] */
+
+/*
+ * R16493 (0x406D) - LDO2 SLEEP Control
+ */
+#define WM831X_LDO2_SLP_SLOT_MASK               0xE000  /* LDO2_SLP_SLOT - [15:13] */
+#define WM831X_LDO2_SLP_SLOT_SHIFT                  13  /* LDO2_SLP_SLOT - [15:13] */
+#define WM831X_LDO2_SLP_SLOT_WIDTH                   3  /* LDO2_SLP_SLOT - [15:13] */
+#define WM831X_LDO2_SLP_MODE                    0x0100  /* LDO2_SLP_MODE */
+#define WM831X_LDO2_SLP_MODE_MASK               0x0100  /* LDO2_SLP_MODE */
+#define WM831X_LDO2_SLP_MODE_SHIFT                   8  /* LDO2_SLP_MODE */
+#define WM831X_LDO2_SLP_MODE_WIDTH                   1  /* LDO2_SLP_MODE */
+#define WM831X_LDO2_SLP_VSEL_MASK               0x001F  /* LDO2_SLP_VSEL - [4:0] */
+#define WM831X_LDO2_SLP_VSEL_SHIFT                   0  /* LDO2_SLP_VSEL - [4:0] */
+#define WM831X_LDO2_SLP_VSEL_WIDTH                   5  /* LDO2_SLP_VSEL - [4:0] */
+
+/*
+ * R16494 (0x406E) - LDO3 Control
+ */
+#define WM831X_LDO3_ERR_ACT_MASK                0xC000  /* LDO3_ERR_ACT - [15:14] */
+#define WM831X_LDO3_ERR_ACT_SHIFT                   14  /* LDO3_ERR_ACT - [15:14] */
+#define WM831X_LDO3_ERR_ACT_WIDTH                    2  /* LDO3_ERR_ACT - [15:14] */
+#define WM831X_LDO3_HWC_SRC_MASK                0x1800  /* LDO3_HWC_SRC - [12:11] */
+#define WM831X_LDO3_HWC_SRC_SHIFT                   11  /* LDO3_HWC_SRC - [12:11] */
+#define WM831X_LDO3_HWC_SRC_WIDTH                    2  /* LDO3_HWC_SRC - [12:11] */
+#define WM831X_LDO3_HWC_VSEL                    0x0400  /* LDO3_HWC_VSEL */
+#define WM831X_LDO3_HWC_VSEL_MASK               0x0400  /* LDO3_HWC_VSEL */
+#define WM831X_LDO3_HWC_VSEL_SHIFT                  10  /* LDO3_HWC_VSEL */
+#define WM831X_LDO3_HWC_VSEL_WIDTH                   1  /* LDO3_HWC_VSEL */
+#define WM831X_LDO3_HWC_MODE_MASK               0x0300  /* LDO3_HWC_MODE - [9:8] */
+#define WM831X_LDO3_HWC_MODE_SHIFT                   8  /* LDO3_HWC_MODE - [9:8] */
+#define WM831X_LDO3_HWC_MODE_WIDTH                   2  /* LDO3_HWC_MODE - [9:8] */
+#define WM831X_LDO3_FLT                         0x0080  /* LDO3_FLT */
+#define WM831X_LDO3_FLT_MASK                    0x0080  /* LDO3_FLT */
+#define WM831X_LDO3_FLT_SHIFT                        7  /* LDO3_FLT */
+#define WM831X_LDO3_FLT_WIDTH                        1  /* LDO3_FLT */
+#define WM831X_LDO3_SWI                         0x0040  /* LDO3_SWI */
+#define WM831X_LDO3_SWI_MASK                    0x0040  /* LDO3_SWI */
+#define WM831X_LDO3_SWI_SHIFT                        6  /* LDO3_SWI */
+#define WM831X_LDO3_SWI_WIDTH                        1  /* LDO3_SWI */
+#define WM831X_LDO3_LP_MODE                     0x0001  /* LDO3_LP_MODE */
+#define WM831X_LDO3_LP_MODE_MASK                0x0001  /* LDO3_LP_MODE */
+#define WM831X_LDO3_LP_MODE_SHIFT                    0  /* LDO3_LP_MODE */
+#define WM831X_LDO3_LP_MODE_WIDTH                    1  /* LDO3_LP_MODE */
+
+/*
+ * R16495 (0x406F) - LDO3 ON Control
+ */
+#define WM831X_LDO3_ON_SLOT_MASK                0xE000  /* LDO3_ON_SLOT - [15:13] */
+#define WM831X_LDO3_ON_SLOT_SHIFT                   13  /* LDO3_ON_SLOT - [15:13] */
+#define WM831X_LDO3_ON_SLOT_WIDTH                    3  /* LDO3_ON_SLOT - [15:13] */
+#define WM831X_LDO3_ON_MODE                     0x0100  /* LDO3_ON_MODE */
+#define WM831X_LDO3_ON_MODE_MASK                0x0100  /* LDO3_ON_MODE */
+#define WM831X_LDO3_ON_MODE_SHIFT                    8  /* LDO3_ON_MODE */
+#define WM831X_LDO3_ON_MODE_WIDTH                    1  /* LDO3_ON_MODE */
+#define WM831X_LDO3_ON_VSEL_MASK                0x001F  /* LDO3_ON_VSEL - [4:0] */
+#define WM831X_LDO3_ON_VSEL_SHIFT                    0  /* LDO3_ON_VSEL - [4:0] */
+#define WM831X_LDO3_ON_VSEL_WIDTH                    5  /* LDO3_ON_VSEL - [4:0] */
+
+/*
+ * R16496 (0x4070) - LDO3 SLEEP Control
+ */
+#define WM831X_LDO3_SLP_SLOT_MASK               0xE000  /* LDO3_SLP_SLOT - [15:13] */
+#define WM831X_LDO3_SLP_SLOT_SHIFT                  13  /* LDO3_SLP_SLOT - [15:13] */
+#define WM831X_LDO3_SLP_SLOT_WIDTH                   3  /* LDO3_SLP_SLOT - [15:13] */
+#define WM831X_LDO3_SLP_MODE                    0x0100  /* LDO3_SLP_MODE */
+#define WM831X_LDO3_SLP_MODE_MASK               0x0100  /* LDO3_SLP_MODE */
+#define WM831X_LDO3_SLP_MODE_SHIFT                   8  /* LDO3_SLP_MODE */
+#define WM831X_LDO3_SLP_MODE_WIDTH                   1  /* LDO3_SLP_MODE */
+#define WM831X_LDO3_SLP_VSEL_MASK               0x001F  /* LDO3_SLP_VSEL - [4:0] */
+#define WM831X_LDO3_SLP_VSEL_SHIFT                   0  /* LDO3_SLP_VSEL - [4:0] */
+#define WM831X_LDO3_SLP_VSEL_WIDTH                   5  /* LDO3_SLP_VSEL - [4:0] */
+
+/*
+ * R16497 (0x4071) - LDO4 Control
+ */
+#define WM831X_LDO4_ERR_ACT_MASK                0xC000  /* LDO4_ERR_ACT - [15:14] */
+#define WM831X_LDO4_ERR_ACT_SHIFT                   14  /* LDO4_ERR_ACT - [15:14] */
+#define WM831X_LDO4_ERR_ACT_WIDTH                    2  /* LDO4_ERR_ACT - [15:14] */
+#define WM831X_LDO4_HWC_SRC_MASK                0x1800  /* LDO4_HWC_SRC - [12:11] */
+#define WM831X_LDO4_HWC_SRC_SHIFT                   11  /* LDO4_HWC_SRC - [12:11] */
+#define WM831X_LDO4_HWC_SRC_WIDTH                    2  /* LDO4_HWC_SRC - [12:11] */
+#define WM831X_LDO4_HWC_VSEL                    0x0400  /* LDO4_HWC_VSEL */
+#define WM831X_LDO4_HWC_VSEL_MASK               0x0400  /* LDO4_HWC_VSEL */
+#define WM831X_LDO4_HWC_VSEL_SHIFT                  10  /* LDO4_HWC_VSEL */
+#define WM831X_LDO4_HWC_VSEL_WIDTH                   1  /* LDO4_HWC_VSEL */
+#define WM831X_LDO4_HWC_MODE_MASK               0x0300  /* LDO4_HWC_MODE - [9:8] */
+#define WM831X_LDO4_HWC_MODE_SHIFT                   8  /* LDO4_HWC_MODE - [9:8] */
+#define WM831X_LDO4_HWC_MODE_WIDTH                   2  /* LDO4_HWC_MODE - [9:8] */
+#define WM831X_LDO4_FLT                         0x0080  /* LDO4_FLT */
+#define WM831X_LDO4_FLT_MASK                    0x0080  /* LDO4_FLT */
+#define WM831X_LDO4_FLT_SHIFT                        7  /* LDO4_FLT */
+#define WM831X_LDO4_FLT_WIDTH                        1  /* LDO4_FLT */
+#define WM831X_LDO4_SWI                         0x0040  /* LDO4_SWI */
+#define WM831X_LDO4_SWI_MASK                    0x0040  /* LDO4_SWI */
+#define WM831X_LDO4_SWI_SHIFT                        6  /* LDO4_SWI */
+#define WM831X_LDO4_SWI_WIDTH                        1  /* LDO4_SWI */
+#define WM831X_LDO4_LP_MODE                     0x0001  /* LDO4_LP_MODE */
+#define WM831X_LDO4_LP_MODE_MASK                0x0001  /* LDO4_LP_MODE */
+#define WM831X_LDO4_LP_MODE_SHIFT                    0  /* LDO4_LP_MODE */
+#define WM831X_LDO4_LP_MODE_WIDTH                    1  /* LDO4_LP_MODE */
+
+/*
+ * R16498 (0x4072) - LDO4 ON Control
+ */
+#define WM831X_LDO4_ON_SLOT_MASK                0xE000  /* LDO4_ON_SLOT - [15:13] */
+#define WM831X_LDO4_ON_SLOT_SHIFT                   13  /* LDO4_ON_SLOT - [15:13] */
+#define WM831X_LDO4_ON_SLOT_WIDTH                    3  /* LDO4_ON_SLOT - [15:13] */
+#define WM831X_LDO4_ON_MODE                     0x0100  /* LDO4_ON_MODE */
+#define WM831X_LDO4_ON_MODE_MASK                0x0100  /* LDO4_ON_MODE */
+#define WM831X_LDO4_ON_MODE_SHIFT                    8  /* LDO4_ON_MODE */
+#define WM831X_LDO4_ON_MODE_WIDTH                    1  /* LDO4_ON_MODE */
+#define WM831X_LDO4_ON_VSEL_MASK                0x001F  /* LDO4_ON_VSEL - [4:0] */
+#define WM831X_LDO4_ON_VSEL_SHIFT                    0  /* LDO4_ON_VSEL - [4:0] */
+#define WM831X_LDO4_ON_VSEL_WIDTH                    5  /* LDO4_ON_VSEL - [4:0] */
+
+/*
+ * R16499 (0x4073) - LDO4 SLEEP Control
+ */
+#define WM831X_LDO4_SLP_SLOT_MASK               0xE000  /* LDO4_SLP_SLOT - [15:13] */
+#define WM831X_LDO4_SLP_SLOT_SHIFT                  13  /* LDO4_SLP_SLOT - [15:13] */
+#define WM831X_LDO4_SLP_SLOT_WIDTH                   3  /* LDO4_SLP_SLOT - [15:13] */
+#define WM831X_LDO4_SLP_MODE                    0x0100  /* LDO4_SLP_MODE */
+#define WM831X_LDO4_SLP_MODE_MASK               0x0100  /* LDO4_SLP_MODE */
+#define WM831X_LDO4_SLP_MODE_SHIFT                   8  /* LDO4_SLP_MODE */
+#define WM831X_LDO4_SLP_MODE_WIDTH                   1  /* LDO4_SLP_MODE */
+#define WM831X_LDO4_SLP_VSEL_MASK               0x001F  /* LDO4_SLP_VSEL - [4:0] */
+#define WM831X_LDO4_SLP_VSEL_SHIFT                   0  /* LDO4_SLP_VSEL - [4:0] */
+#define WM831X_LDO4_SLP_VSEL_WIDTH                   5  /* LDO4_SLP_VSEL - [4:0] */
+
+/*
+ * R16500 (0x4074) - LDO5 Control
+ */
+#define WM831X_LDO5_ERR_ACT_MASK                0xC000  /* LDO5_ERR_ACT - [15:14] */
+#define WM831X_LDO5_ERR_ACT_SHIFT                   14  /* LDO5_ERR_ACT - [15:14] */
+#define WM831X_LDO5_ERR_ACT_WIDTH                    2  /* LDO5_ERR_ACT - [15:14] */
+#define WM831X_LDO5_HWC_SRC_MASK                0x1800  /* LDO5_HWC_SRC - [12:11] */
+#define WM831X_LDO5_HWC_SRC_SHIFT                   11  /* LDO5_HWC_SRC - [12:11] */
+#define WM831X_LDO5_HWC_SRC_WIDTH                    2  /* LDO5_HWC_SRC - [12:11] */
+#define WM831X_LDO5_HWC_VSEL                    0x0400  /* LDO5_HWC_VSEL */
+#define WM831X_LDO5_HWC_VSEL_MASK               0x0400  /* LDO5_HWC_VSEL */
+#define WM831X_LDO5_HWC_VSEL_SHIFT                  10  /* LDO5_HWC_VSEL */
+#define WM831X_LDO5_HWC_VSEL_WIDTH                   1  /* LDO5_HWC_VSEL */
+#define WM831X_LDO5_HWC_MODE_MASK               0x0300  /* LDO5_HWC_MODE - [9:8] */
+#define WM831X_LDO5_HWC_MODE_SHIFT                   8  /* LDO5_HWC_MODE - [9:8] */
+#define WM831X_LDO5_HWC_MODE_WIDTH                   2  /* LDO5_HWC_MODE - [9:8] */
+#define WM831X_LDO5_FLT                         0x0080  /* LDO5_FLT */
+#define WM831X_LDO5_FLT_MASK                    0x0080  /* LDO5_FLT */
+#define WM831X_LDO5_FLT_SHIFT                        7  /* LDO5_FLT */
+#define WM831X_LDO5_FLT_WIDTH                        1  /* LDO5_FLT */
+#define WM831X_LDO5_SWI                         0x0040  /* LDO5_SWI */
+#define WM831X_LDO5_SWI_MASK                    0x0040  /* LDO5_SWI */
+#define WM831X_LDO5_SWI_SHIFT                        6  /* LDO5_SWI */
+#define WM831X_LDO5_SWI_WIDTH                        1  /* LDO5_SWI */
+#define WM831X_LDO5_LP_MODE                     0x0001  /* LDO5_LP_MODE */
+#define WM831X_LDO5_LP_MODE_MASK                0x0001  /* LDO5_LP_MODE */
+#define WM831X_LDO5_LP_MODE_SHIFT                    0  /* LDO5_LP_MODE */
+#define WM831X_LDO5_LP_MODE_WIDTH                    1  /* LDO5_LP_MODE */
+
+/*
+ * R16501 (0x4075) - LDO5 ON Control
+ */
+#define WM831X_LDO5_ON_SLOT_MASK                0xE000  /* LDO5_ON_SLOT - [15:13] */
+#define WM831X_LDO5_ON_SLOT_SHIFT                   13  /* LDO5_ON_SLOT - [15:13] */
+#define WM831X_LDO5_ON_SLOT_WIDTH                    3  /* LDO5_ON_SLOT - [15:13] */
+#define WM831X_LDO5_ON_MODE                     0x0100  /* LDO5_ON_MODE */
+#define WM831X_LDO5_ON_MODE_MASK                0x0100  /* LDO5_ON_MODE */
+#define WM831X_LDO5_ON_MODE_SHIFT                    8  /* LDO5_ON_MODE */
+#define WM831X_LDO5_ON_MODE_WIDTH                    1  /* LDO5_ON_MODE */
+#define WM831X_LDO5_ON_VSEL_MASK                0x001F  /* LDO5_ON_VSEL - [4:0] */
+#define WM831X_LDO5_ON_VSEL_SHIFT                    0  /* LDO5_ON_VSEL - [4:0] */
+#define WM831X_LDO5_ON_VSEL_WIDTH                    5  /* LDO5_ON_VSEL - [4:0] */
+
+/*
+ * R16502 (0x4076) - LDO5 SLEEP Control
+ */
+#define WM831X_LDO5_SLP_SLOT_MASK               0xE000  /* LDO5_SLP_SLOT - [15:13] */
+#define WM831X_LDO5_SLP_SLOT_SHIFT                  13  /* LDO5_SLP_SLOT - [15:13] */
+#define WM831X_LDO5_SLP_SLOT_WIDTH                   3  /* LDO5_SLP_SLOT - [15:13] */
+#define WM831X_LDO5_SLP_MODE                    0x0100  /* LDO5_SLP_MODE */
+#define WM831X_LDO5_SLP_MODE_MASK               0x0100  /* LDO5_SLP_MODE */
+#define WM831X_LDO5_SLP_MODE_SHIFT                   8  /* LDO5_SLP_MODE */
+#define WM831X_LDO5_SLP_MODE_WIDTH                   1  /* LDO5_SLP_MODE */
+#define WM831X_LDO5_SLP_VSEL_MASK               0x001F  /* LDO5_SLP_VSEL - [4:0] */
+#define WM831X_LDO5_SLP_VSEL_SHIFT                   0  /* LDO5_SLP_VSEL - [4:0] */
+#define WM831X_LDO5_SLP_VSEL_WIDTH                   5  /* LDO5_SLP_VSEL - [4:0] */
+
+/*
+ * R16503 (0x4077) - LDO6 Control
+ */
+#define WM831X_LDO6_ERR_ACT_MASK                0xC000  /* LDO6_ERR_ACT - [15:14] */
+#define WM831X_LDO6_ERR_ACT_SHIFT                   14  /* LDO6_ERR_ACT - [15:14] */
+#define WM831X_LDO6_ERR_ACT_WIDTH                    2  /* LDO6_ERR_ACT - [15:14] */
+#define WM831X_LDO6_HWC_SRC_MASK                0x1800  /* LDO6_HWC_SRC - [12:11] */
+#define WM831X_LDO6_HWC_SRC_SHIFT                   11  /* LDO6_HWC_SRC - [12:11] */
+#define WM831X_LDO6_HWC_SRC_WIDTH                    2  /* LDO6_HWC_SRC - [12:11] */
+#define WM831X_LDO6_HWC_VSEL                    0x0400  /* LDO6_HWC_VSEL */
+#define WM831X_LDO6_HWC_VSEL_MASK               0x0400  /* LDO6_HWC_VSEL */
+#define WM831X_LDO6_HWC_VSEL_SHIFT                  10  /* LDO6_HWC_VSEL */
+#define WM831X_LDO6_HWC_VSEL_WIDTH                   1  /* LDO6_HWC_VSEL */
+#define WM831X_LDO6_HWC_MODE_MASK               0x0300  /* LDO6_HWC_MODE - [9:8] */
+#define WM831X_LDO6_HWC_MODE_SHIFT                   8  /* LDO6_HWC_MODE - [9:8] */
+#define WM831X_LDO6_HWC_MODE_WIDTH                   2  /* LDO6_HWC_MODE - [9:8] */
+#define WM831X_LDO6_FLT                         0x0080  /* LDO6_FLT */
+#define WM831X_LDO6_FLT_MASK                    0x0080  /* LDO6_FLT */
+#define WM831X_LDO6_FLT_SHIFT                        7  /* LDO6_FLT */
+#define WM831X_LDO6_FLT_WIDTH                        1  /* LDO6_FLT */
+#define WM831X_LDO6_SWI                         0x0040  /* LDO6_SWI */
+#define WM831X_LDO6_SWI_MASK                    0x0040  /* LDO6_SWI */
+#define WM831X_LDO6_SWI_SHIFT                        6  /* LDO6_SWI */
+#define WM831X_LDO6_SWI_WIDTH                        1  /* LDO6_SWI */
+#define WM831X_LDO6_LP_MODE                     0x0001  /* LDO6_LP_MODE */
+#define WM831X_LDO6_LP_MODE_MASK                0x0001  /* LDO6_LP_MODE */
+#define WM831X_LDO6_LP_MODE_SHIFT                    0  /* LDO6_LP_MODE */
+#define WM831X_LDO6_LP_MODE_WIDTH                    1  /* LDO6_LP_MODE */
+
+/*
+ * R16504 (0x4078) - LDO6 ON Control
+ */
+#define WM831X_LDO6_ON_SLOT_MASK                0xE000  /* LDO6_ON_SLOT - [15:13] */
+#define WM831X_LDO6_ON_SLOT_SHIFT                   13  /* LDO6_ON_SLOT - [15:13] */
+#define WM831X_LDO6_ON_SLOT_WIDTH                    3  /* LDO6_ON_SLOT - [15:13] */
+#define WM831X_LDO6_ON_MODE                     0x0100  /* LDO6_ON_MODE */
+#define WM831X_LDO6_ON_MODE_MASK                0x0100  /* LDO6_ON_MODE */
+#define WM831X_LDO6_ON_MODE_SHIFT                    8  /* LDO6_ON_MODE */
+#define WM831X_LDO6_ON_MODE_WIDTH                    1  /* LDO6_ON_MODE */
+#define WM831X_LDO6_ON_VSEL_MASK                0x001F  /* LDO6_ON_VSEL - [4:0] */
+#define WM831X_LDO6_ON_VSEL_SHIFT                    0  /* LDO6_ON_VSEL - [4:0] */
+#define WM831X_LDO6_ON_VSEL_WIDTH                    5  /* LDO6_ON_VSEL - [4:0] */
+
+/*
+ * R16505 (0x4079) - LDO6 SLEEP Control
+ */
+#define WM831X_LDO6_SLP_SLOT_MASK               0xE000  /* LDO6_SLP_SLOT - [15:13] */
+#define WM831X_LDO6_SLP_SLOT_SHIFT                  13  /* LDO6_SLP_SLOT - [15:13] */
+#define WM831X_LDO6_SLP_SLOT_WIDTH                   3  /* LDO6_SLP_SLOT - [15:13] */
+#define WM831X_LDO6_SLP_MODE                    0x0100  /* LDO6_SLP_MODE */
+#define WM831X_LDO6_SLP_MODE_MASK               0x0100  /* LDO6_SLP_MODE */
+#define WM831X_LDO6_SLP_MODE_SHIFT                   8  /* LDO6_SLP_MODE */
+#define WM831X_LDO6_SLP_MODE_WIDTH                   1  /* LDO6_SLP_MODE */
+#define WM831X_LDO6_SLP_VSEL_MASK               0x001F  /* LDO6_SLP_VSEL - [4:0] */
+#define WM831X_LDO6_SLP_VSEL_SHIFT                   0  /* LDO6_SLP_VSEL - [4:0] */
+#define WM831X_LDO6_SLP_VSEL_WIDTH                   5  /* LDO6_SLP_VSEL - [4:0] */
+
+/*
+ * R16506 (0x407A) - LDO7 Control
+ */
+#define WM831X_LDO7_ERR_ACT_MASK                0xC000  /* LDO7_ERR_ACT - [15:14] */
+#define WM831X_LDO7_ERR_ACT_SHIFT                   14  /* LDO7_ERR_ACT - [15:14] */
+#define WM831X_LDO7_ERR_ACT_WIDTH                    2  /* LDO7_ERR_ACT - [15:14] */
+#define WM831X_LDO7_HWC_SRC_MASK                0x1800  /* LDO7_HWC_SRC - [12:11] */
+#define WM831X_LDO7_HWC_SRC_SHIFT                   11  /* LDO7_HWC_SRC - [12:11] */
+#define WM831X_LDO7_HWC_SRC_WIDTH                    2  /* LDO7_HWC_SRC - [12:11] */
+#define WM831X_LDO7_HWC_VSEL                    0x0400  /* LDO7_HWC_VSEL */
+#define WM831X_LDO7_HWC_VSEL_MASK               0x0400  /* LDO7_HWC_VSEL */
+#define WM831X_LDO7_HWC_VSEL_SHIFT                  10  /* LDO7_HWC_VSEL */
+#define WM831X_LDO7_HWC_VSEL_WIDTH                   1  /* LDO7_HWC_VSEL */
+#define WM831X_LDO7_HWC_MODE_MASK               0x0300  /* LDO7_HWC_MODE - [9:8] */
+#define WM831X_LDO7_HWC_MODE_SHIFT                   8  /* LDO7_HWC_MODE - [9:8] */
+#define WM831X_LDO7_HWC_MODE_WIDTH                   2  /* LDO7_HWC_MODE - [9:8] */
+#define WM831X_LDO7_FLT                         0x0080  /* LDO7_FLT */
+#define WM831X_LDO7_FLT_MASK                    0x0080  /* LDO7_FLT */
+#define WM831X_LDO7_FLT_SHIFT                        7  /* LDO7_FLT */
+#define WM831X_LDO7_FLT_WIDTH                        1  /* LDO7_FLT */
+#define WM831X_LDO7_SWI                         0x0040  /* LDO7_SWI */
+#define WM831X_LDO7_SWI_MASK                    0x0040  /* LDO7_SWI */
+#define WM831X_LDO7_SWI_SHIFT                        6  /* LDO7_SWI */
+#define WM831X_LDO7_SWI_WIDTH                        1  /* LDO7_SWI */
+
+/*
+ * R16507 (0x407B) - LDO7 ON Control
+ */
+#define WM831X_LDO7_ON_SLOT_MASK                0xE000  /* LDO7_ON_SLOT - [15:13] */
+#define WM831X_LDO7_ON_SLOT_SHIFT                   13  /* LDO7_ON_SLOT - [15:13] */
+#define WM831X_LDO7_ON_SLOT_WIDTH                    3  /* LDO7_ON_SLOT - [15:13] */
+#define WM831X_LDO7_ON_MODE                     0x0100  /* LDO7_ON_MODE */
+#define WM831X_LDO7_ON_MODE_MASK                0x0100  /* LDO7_ON_MODE */
+#define WM831X_LDO7_ON_MODE_SHIFT                    8  /* LDO7_ON_MODE */
+#define WM831X_LDO7_ON_MODE_WIDTH                    1  /* LDO7_ON_MODE */
+#define WM831X_LDO7_ON_VSEL_MASK                0x001F  /* LDO7_ON_VSEL - [4:0] */
+#define WM831X_LDO7_ON_VSEL_SHIFT                    0  /* LDO7_ON_VSEL - [4:0] */
+#define WM831X_LDO7_ON_VSEL_WIDTH                    5  /* LDO7_ON_VSEL - [4:0] */
+
+/*
+ * R16508 (0x407C) - LDO7 SLEEP Control
+ */
+#define WM831X_LDO7_SLP_SLOT_MASK               0xE000  /* LDO7_SLP_SLOT - [15:13] */
+#define WM831X_LDO7_SLP_SLOT_SHIFT                  13  /* LDO7_SLP_SLOT - [15:13] */
+#define WM831X_LDO7_SLP_SLOT_WIDTH                   3  /* LDO7_SLP_SLOT - [15:13] */
+#define WM831X_LDO7_SLP_MODE                    0x0100  /* LDO7_SLP_MODE */
+#define WM831X_LDO7_SLP_MODE_MASK               0x0100  /* LDO7_SLP_MODE */
+#define WM831X_LDO7_SLP_MODE_SHIFT                   8  /* LDO7_SLP_MODE */
+#define WM831X_LDO7_SLP_MODE_WIDTH                   1  /* LDO7_SLP_MODE */
+#define WM831X_LDO7_SLP_VSEL_MASK               0x001F  /* LDO7_SLP_VSEL - [4:0] */
+#define WM831X_LDO7_SLP_VSEL_SHIFT                   0  /* LDO7_SLP_VSEL - [4:0] */
+#define WM831X_LDO7_SLP_VSEL_WIDTH                   5  /* LDO7_SLP_VSEL - [4:0] */
+
+/*
+ * R16509 (0x407D) - LDO8 Control
+ */
+#define WM831X_LDO8_ERR_ACT_MASK                0xC000  /* LDO8_ERR_ACT - [15:14] */
+#define WM831X_LDO8_ERR_ACT_SHIFT                   14  /* LDO8_ERR_ACT - [15:14] */
+#define WM831X_LDO8_ERR_ACT_WIDTH                    2  /* LDO8_ERR_ACT - [15:14] */
+#define WM831X_LDO8_HWC_SRC_MASK                0x1800  /* LDO8_HWC_SRC - [12:11] */
+#define WM831X_LDO8_HWC_SRC_SHIFT                   11  /* LDO8_HWC_SRC - [12:11] */
+#define WM831X_LDO8_HWC_SRC_WIDTH                    2  /* LDO8_HWC_SRC - [12:11] */
+#define WM831X_LDO8_HWC_VSEL                    0x0400  /* LDO8_HWC_VSEL */
+#define WM831X_LDO8_HWC_VSEL_MASK               0x0400  /* LDO8_HWC_VSEL */
+#define WM831X_LDO8_HWC_VSEL_SHIFT                  10  /* LDO8_HWC_VSEL */
+#define WM831X_LDO8_HWC_VSEL_WIDTH                   1  /* LDO8_HWC_VSEL */
+#define WM831X_LDO8_HWC_MODE_MASK               0x0300  /* LDO8_HWC_MODE - [9:8] */
+#define WM831X_LDO8_HWC_MODE_SHIFT                   8  /* LDO8_HWC_MODE - [9:8] */
+#define WM831X_LDO8_HWC_MODE_WIDTH                   2  /* LDO8_HWC_MODE - [9:8] */
+#define WM831X_LDO8_FLT                         0x0080  /* LDO8_FLT */
+#define WM831X_LDO8_FLT_MASK                    0x0080  /* LDO8_FLT */
+#define WM831X_LDO8_FLT_SHIFT                        7  /* LDO8_FLT */
+#define WM831X_LDO8_FLT_WIDTH                        1  /* LDO8_FLT */
+#define WM831X_LDO8_SWI                         0x0040  /* LDO8_SWI */
+#define WM831X_LDO8_SWI_MASK                    0x0040  /* LDO8_SWI */
+#define WM831X_LDO8_SWI_SHIFT                        6  /* LDO8_SWI */
+#define WM831X_LDO8_SWI_WIDTH                        1  /* LDO8_SWI */
+
+/*
+ * R16510 (0x407E) - LDO8 ON Control
+ */
+#define WM831X_LDO8_ON_SLOT_MASK                0xE000  /* LDO8_ON_SLOT - [15:13] */
+#define WM831X_LDO8_ON_SLOT_SHIFT                   13  /* LDO8_ON_SLOT - [15:13] */
+#define WM831X_LDO8_ON_SLOT_WIDTH                    3  /* LDO8_ON_SLOT - [15:13] */
+#define WM831X_LDO8_ON_MODE                     0x0100  /* LDO8_ON_MODE */
+#define WM831X_LDO8_ON_MODE_MASK                0x0100  /* LDO8_ON_MODE */
+#define WM831X_LDO8_ON_MODE_SHIFT                    8  /* LDO8_ON_MODE */
+#define WM831X_LDO8_ON_MODE_WIDTH                    1  /* LDO8_ON_MODE */
+#define WM831X_LDO8_ON_VSEL_MASK                0x001F  /* LDO8_ON_VSEL - [4:0] */
+#define WM831X_LDO8_ON_VSEL_SHIFT                    0  /* LDO8_ON_VSEL - [4:0] */
+#define WM831X_LDO8_ON_VSEL_WIDTH                    5  /* LDO8_ON_VSEL - [4:0] */
+
+/*
+ * R16511 (0x407F) - LDO8 SLEEP Control
+ */
+#define WM831X_LDO8_SLP_SLOT_MASK               0xE000  /* LDO8_SLP_SLOT - [15:13] */
+#define WM831X_LDO8_SLP_SLOT_SHIFT                  13  /* LDO8_SLP_SLOT - [15:13] */
+#define WM831X_LDO8_SLP_SLOT_WIDTH                   3  /* LDO8_SLP_SLOT - [15:13] */
+#define WM831X_LDO8_SLP_MODE                    0x0100  /* LDO8_SLP_MODE */
+#define WM831X_LDO8_SLP_MODE_MASK               0x0100  /* LDO8_SLP_MODE */
+#define WM831X_LDO8_SLP_MODE_SHIFT                   8  /* LDO8_SLP_MODE */
+#define WM831X_LDO8_SLP_MODE_WIDTH                   1  /* LDO8_SLP_MODE */
+#define WM831X_LDO8_SLP_VSEL_MASK               0x001F  /* LDO8_SLP_VSEL - [4:0] */
+#define WM831X_LDO8_SLP_VSEL_SHIFT                   0  /* LDO8_SLP_VSEL - [4:0] */
+#define WM831X_LDO8_SLP_VSEL_WIDTH                   5  /* LDO8_SLP_VSEL - [4:0] */
+
+/*
+ * R16512 (0x4080) - LDO9 Control
+ */
+#define WM831X_LDO9_ERR_ACT_MASK                0xC000  /* LDO9_ERR_ACT - [15:14] */
+#define WM831X_LDO9_ERR_ACT_SHIFT                   14  /* LDO9_ERR_ACT - [15:14] */
+#define WM831X_LDO9_ERR_ACT_WIDTH                    2  /* LDO9_ERR_ACT - [15:14] */
+#define WM831X_LDO9_HWC_SRC_MASK                0x1800  /* LDO9_HWC_SRC - [12:11] */
+#define WM831X_LDO9_HWC_SRC_SHIFT                   11  /* LDO9_HWC_SRC - [12:11] */
+#define WM831X_LDO9_HWC_SRC_WIDTH                    2  /* LDO9_HWC_SRC - [12:11] */
+#define WM831X_LDO9_HWC_VSEL                    0x0400  /* LDO9_HWC_VSEL */
+#define WM831X_LDO9_HWC_VSEL_MASK               0x0400  /* LDO9_HWC_VSEL */
+#define WM831X_LDO9_HWC_VSEL_SHIFT                  10  /* LDO9_HWC_VSEL */
+#define WM831X_LDO9_HWC_VSEL_WIDTH                   1  /* LDO9_HWC_VSEL */
+#define WM831X_LDO9_HWC_MODE_MASK               0x0300  /* LDO9_HWC_MODE - [9:8] */
+#define WM831X_LDO9_HWC_MODE_SHIFT                   8  /* LDO9_HWC_MODE - [9:8] */
+#define WM831X_LDO9_HWC_MODE_WIDTH                   2  /* LDO9_HWC_MODE - [9:8] */
+#define WM831X_LDO9_FLT                         0x0080  /* LDO9_FLT */
+#define WM831X_LDO9_FLT_MASK                    0x0080  /* LDO9_FLT */
+#define WM831X_LDO9_FLT_SHIFT                        7  /* LDO9_FLT */
+#define WM831X_LDO9_FLT_WIDTH                        1  /* LDO9_FLT */
+#define WM831X_LDO9_SWI                         0x0040  /* LDO9_SWI */
+#define WM831X_LDO9_SWI_MASK                    0x0040  /* LDO9_SWI */
+#define WM831X_LDO9_SWI_SHIFT                        6  /* LDO9_SWI */
+#define WM831X_LDO9_SWI_WIDTH                        1  /* LDO9_SWI */
+
+/*
+ * R16513 (0x4081) - LDO9 ON Control
+ */
+#define WM831X_LDO9_ON_SLOT_MASK                0xE000  /* LDO9_ON_SLOT - [15:13] */
+#define WM831X_LDO9_ON_SLOT_SHIFT                   13  /* LDO9_ON_SLOT - [15:13] */
+#define WM831X_LDO9_ON_SLOT_WIDTH                    3  /* LDO9_ON_SLOT - [15:13] */
+#define WM831X_LDO9_ON_MODE                     0x0100  /* LDO9_ON_MODE */
+#define WM831X_LDO9_ON_MODE_MASK                0x0100  /* LDO9_ON_MODE */
+#define WM831X_LDO9_ON_MODE_SHIFT                    8  /* LDO9_ON_MODE */
+#define WM831X_LDO9_ON_MODE_WIDTH                    1  /* LDO9_ON_MODE */
+#define WM831X_LDO9_ON_VSEL_MASK                0x001F  /* LDO9_ON_VSEL - [4:0] */
+#define WM831X_LDO9_ON_VSEL_SHIFT                    0  /* LDO9_ON_VSEL - [4:0] */
+#define WM831X_LDO9_ON_VSEL_WIDTH                    5  /* LDO9_ON_VSEL - [4:0] */
+
+/*
+ * R16514 (0x4082) - LDO9 SLEEP Control
+ */
+#define WM831X_LDO9_SLP_SLOT_MASK               0xE000  /* LDO9_SLP_SLOT - [15:13] */
+#define WM831X_LDO9_SLP_SLOT_SHIFT                  13  /* LDO9_SLP_SLOT - [15:13] */
+#define WM831X_LDO9_SLP_SLOT_WIDTH                   3  /* LDO9_SLP_SLOT - [15:13] */
+#define WM831X_LDO9_SLP_MODE                    0x0100  /* LDO9_SLP_MODE */
+#define WM831X_LDO9_SLP_MODE_MASK               0x0100  /* LDO9_SLP_MODE */
+#define WM831X_LDO9_SLP_MODE_SHIFT                   8  /* LDO9_SLP_MODE */
+#define WM831X_LDO9_SLP_MODE_WIDTH                   1  /* LDO9_SLP_MODE */
+#define WM831X_LDO9_SLP_VSEL_MASK               0x001F  /* LDO9_SLP_VSEL - [4:0] */
+#define WM831X_LDO9_SLP_VSEL_SHIFT                   0  /* LDO9_SLP_VSEL - [4:0] */
+#define WM831X_LDO9_SLP_VSEL_WIDTH                   5  /* LDO9_SLP_VSEL - [4:0] */
+
+/*
+ * R16515 (0x4083) - LDO10 Control
+ */
+#define WM831X_LDO10_ERR_ACT_MASK               0xC000  /* LDO10_ERR_ACT - [15:14] */
+#define WM831X_LDO10_ERR_ACT_SHIFT                  14  /* LDO10_ERR_ACT - [15:14] */
+#define WM831X_LDO10_ERR_ACT_WIDTH                   2  /* LDO10_ERR_ACT - [15:14] */
+#define WM831X_LDO10_HWC_SRC_MASK               0x1800  /* LDO10_HWC_SRC - [12:11] */
+#define WM831X_LDO10_HWC_SRC_SHIFT                  11  /* LDO10_HWC_SRC - [12:11] */
+#define WM831X_LDO10_HWC_SRC_WIDTH                   2  /* LDO10_HWC_SRC - [12:11] */
+#define WM831X_LDO10_HWC_VSEL                   0x0400  /* LDO10_HWC_VSEL */
+#define WM831X_LDO10_HWC_VSEL_MASK              0x0400  /* LDO10_HWC_VSEL */
+#define WM831X_LDO10_HWC_VSEL_SHIFT                 10  /* LDO10_HWC_VSEL */
+#define WM831X_LDO10_HWC_VSEL_WIDTH                  1  /* LDO10_HWC_VSEL */
+#define WM831X_LDO10_HWC_MODE_MASK              0x0300  /* LDO10_HWC_MODE - [9:8] */
+#define WM831X_LDO10_HWC_MODE_SHIFT                  8  /* LDO10_HWC_MODE - [9:8] */
+#define WM831X_LDO10_HWC_MODE_WIDTH                  2  /* LDO10_HWC_MODE - [9:8] */
+#define WM831X_LDO10_FLT                        0x0080  /* LDO10_FLT */
+#define WM831X_LDO10_FLT_MASK                   0x0080  /* LDO10_FLT */
+#define WM831X_LDO10_FLT_SHIFT                       7  /* LDO10_FLT */
+#define WM831X_LDO10_FLT_WIDTH                       1  /* LDO10_FLT */
+#define WM831X_LDO10_SWI                        0x0040  /* LDO10_SWI */
+#define WM831X_LDO10_SWI_MASK                   0x0040  /* LDO10_SWI */
+#define WM831X_LDO10_SWI_SHIFT                       6  /* LDO10_SWI */
+#define WM831X_LDO10_SWI_WIDTH                       1  /* LDO10_SWI */
+
+/*
+ * R16516 (0x4084) - LDO10 ON Control
+ */
+#define WM831X_LDO10_ON_SLOT_MASK               0xE000  /* LDO10_ON_SLOT - [15:13] */
+#define WM831X_LDO10_ON_SLOT_SHIFT                  13  /* LDO10_ON_SLOT - [15:13] */
+#define WM831X_LDO10_ON_SLOT_WIDTH                   3  /* LDO10_ON_SLOT - [15:13] */
+#define WM831X_LDO10_ON_MODE                    0x0100  /* LDO10_ON_MODE */
+#define WM831X_LDO10_ON_MODE_MASK               0x0100  /* LDO10_ON_MODE */
+#define WM831X_LDO10_ON_MODE_SHIFT                   8  /* LDO10_ON_MODE */
+#define WM831X_LDO10_ON_MODE_WIDTH                   1  /* LDO10_ON_MODE */
+#define WM831X_LDO10_ON_VSEL_MASK               0x001F  /* LDO10_ON_VSEL - [4:0] */
+#define WM831X_LDO10_ON_VSEL_SHIFT                   0  /* LDO10_ON_VSEL - [4:0] */
+#define WM831X_LDO10_ON_VSEL_WIDTH                   5  /* LDO10_ON_VSEL - [4:0] */
+
+/*
+ * R16517 (0x4085) - LDO10 SLEEP Control
+ */
+#define WM831X_LDO10_SLP_SLOT_MASK              0xE000  /* LDO10_SLP_SLOT - [15:13] */
+#define WM831X_LDO10_SLP_SLOT_SHIFT                 13  /* LDO10_SLP_SLOT - [15:13] */
+#define WM831X_LDO10_SLP_SLOT_WIDTH                  3  /* LDO10_SLP_SLOT - [15:13] */
+#define WM831X_LDO10_SLP_MODE                   0x0100  /* LDO10_SLP_MODE */
+#define WM831X_LDO10_SLP_MODE_MASK              0x0100  /* LDO10_SLP_MODE */
+#define WM831X_LDO10_SLP_MODE_SHIFT                  8  /* LDO10_SLP_MODE */
+#define WM831X_LDO10_SLP_MODE_WIDTH                  1  /* LDO10_SLP_MODE */
+#define WM831X_LDO10_SLP_VSEL_MASK              0x001F  /* LDO10_SLP_VSEL - [4:0] */
+#define WM831X_LDO10_SLP_VSEL_SHIFT                  0  /* LDO10_SLP_VSEL - [4:0] */
+#define WM831X_LDO10_SLP_VSEL_WIDTH                  5  /* LDO10_SLP_VSEL - [4:0] */
+
+/*
+ * R16519 (0x4087) - LDO11 ON Control
+ */
+#define WM831X_LDO11_ON_SLOT_MASK               0xE000  /* LDO11_ON_SLOT - [15:13] */
+#define WM831X_LDO11_ON_SLOT_SHIFT                  13  /* LDO11_ON_SLOT - [15:13] */
+#define WM831X_LDO11_ON_SLOT_WIDTH                   3  /* LDO11_ON_SLOT - [15:13] */
+#define WM831X_LDO11_OFFENA                     0x1000  /* LDO11_OFFENA */
+#define WM831X_LDO11_OFFENA_MASK                0x1000  /* LDO11_OFFENA */
+#define WM831X_LDO11_OFFENA_SHIFT                   12  /* LDO11_OFFENA */
+#define WM831X_LDO11_OFFENA_WIDTH                    1  /* LDO11_OFFENA */
+#define WM831X_LDO11_VSEL_SRC                   0x0080  /* LDO11_VSEL_SRC */
+#define WM831X_LDO11_VSEL_SRC_MASK              0x0080  /* LDO11_VSEL_SRC */
+#define WM831X_LDO11_VSEL_SRC_SHIFT                  7  /* LDO11_VSEL_SRC */
+#define WM831X_LDO11_VSEL_SRC_WIDTH                  1  /* LDO11_VSEL_SRC */
+#define WM831X_LDO11_ON_VSEL_MASK               0x000F  /* LDO11_ON_VSEL - [3:0] */
+#define WM831X_LDO11_ON_VSEL_SHIFT                   0  /* LDO11_ON_VSEL - [3:0] */
+#define WM831X_LDO11_ON_VSEL_WIDTH                   4  /* LDO11_ON_VSEL - [3:0] */
+
+/*
+ * R16520 (0x4088) - LDO11 SLEEP Control
+ */
+#define WM831X_LDO11_SLP_SLOT_MASK              0xE000  /* LDO11_SLP_SLOT - [15:13] */
+#define WM831X_LDO11_SLP_SLOT_SHIFT                 13  /* LDO11_SLP_SLOT - [15:13] */
+#define WM831X_LDO11_SLP_SLOT_WIDTH                  3  /* LDO11_SLP_SLOT - [15:13] */
+#define WM831X_LDO11_SLP_VSEL_MASK              0x000F  /* LDO11_SLP_VSEL - [3:0] */
+#define WM831X_LDO11_SLP_VSEL_SHIFT                  0  /* LDO11_SLP_VSEL - [3:0] */
+#define WM831X_LDO11_SLP_VSEL_WIDTH                  4  /* LDO11_SLP_VSEL - [3:0] */
+
 /*
  * R16526 (0x408E) - Power Good Source 1
  */
@@ -586,6 +1168,50 @@
 #define WM831X_DC1_OK_SHIFT                          0  /* DC1_OK */
 #define WM831X_DC1_OK_WIDTH                          1  /* DC1_OK */
 
+/*
+ * R16527 (0x408F) - Power Good Source 2
+ */
+#define WM831X_LDO10_OK                         0x0200  /* LDO10_OK */
+#define WM831X_LDO10_OK_MASK                    0x0200  /* LDO10_OK */
+#define WM831X_LDO10_OK_SHIFT                        9  /* LDO10_OK */
+#define WM831X_LDO10_OK_WIDTH                        1  /* LDO10_OK */
+#define WM831X_LDO9_OK                          0x0100  /* LDO9_OK */
+#define WM831X_LDO9_OK_MASK                     0x0100  /* LDO9_OK */
+#define WM831X_LDO9_OK_SHIFT                         8  /* LDO9_OK */
+#define WM831X_LDO9_OK_WIDTH                         1  /* LDO9_OK */
+#define WM831X_LDO8_OK                          0x0080  /* LDO8_OK */
+#define WM831X_LDO8_OK_MASK                     0x0080  /* LDO8_OK */
+#define WM831X_LDO8_OK_SHIFT                         7  /* LDO8_OK */
+#define WM831X_LDO8_OK_WIDTH                         1  /* LDO8_OK */
+#define WM831X_LDO7_OK                          0x0040  /* LDO7_OK */
+#define WM831X_LDO7_OK_MASK                     0x0040  /* LDO7_OK */
+#define WM831X_LDO7_OK_SHIFT                         6  /* LDO7_OK */
+#define WM831X_LDO7_OK_WIDTH                         1  /* LDO7_OK */
+#define WM831X_LDO6_OK                          0x0020  /* LDO6_OK */
+#define WM831X_LDO6_OK_MASK                     0x0020  /* LDO6_OK */
+#define WM831X_LDO6_OK_SHIFT                         5  /* LDO6_OK */
+#define WM831X_LDO6_OK_WIDTH                         1  /* LDO6_OK */
+#define WM831X_LDO5_OK                          0x0010  /* LDO5_OK */
+#define WM831X_LDO5_OK_MASK                     0x0010  /* LDO5_OK */
+#define WM831X_LDO5_OK_SHIFT                         4  /* LDO5_OK */
+#define WM831X_LDO5_OK_WIDTH                         1  /* LDO5_OK */
+#define WM831X_LDO4_OK                          0x0008  /* LDO4_OK */
+#define WM831X_LDO4_OK_MASK                     0x0008  /* LDO4_OK */
+#define WM831X_LDO4_OK_SHIFT                         3  /* LDO4_OK */
+#define WM831X_LDO4_OK_WIDTH                         1  /* LDO4_OK */
+#define WM831X_LDO3_OK                          0x0004  /* LDO3_OK */
+#define WM831X_LDO3_OK_MASK                     0x0004  /* LDO3_OK */
+#define WM831X_LDO3_OK_SHIFT                         2  /* LDO3_OK */
+#define WM831X_LDO3_OK_WIDTH                         1  /* LDO3_OK */
+#define WM831X_LDO2_OK                          0x0002  /* LDO2_OK */
+#define WM831X_LDO2_OK_MASK                     0x0002  /* LDO2_OK */
+#define WM831X_LDO2_OK_SHIFT                         1  /* LDO2_OK */
+#define WM831X_LDO2_OK_WIDTH                         1  /* LDO2_OK */
+#define WM831X_LDO1_OK                          0x0001  /* LDO1_OK */
+#define WM831X_LDO1_OK_MASK                     0x0001  /* LDO1_OK */
+#define WM831X_LDO1_OK_SHIFT                         0  /* LDO1_OK */
+#define WM831X_LDO1_OK_WIDTH                         1  /* LDO1_OK */
+
 #define WM831X_ISINK_MAX_ISEL 56
 extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL];
 
-- 
cgit v1.2.3


From 956f25a6778a2510d52973ab8a3ac2e03e2c3704 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Thu, 13 Aug 2009 11:49:23 +0200
Subject: mfd: AB3100 accessor function cleanups

This adds the _interruptible suffix to the AB3100 accessor
functions on par with mutex_lock_interruptible() that's used
for blocking simultaneous calls to the AB3100 acessor functions.
Since these accesses are slow on a 100kHz I2C bus and may line
up waiting for the mutex, we need to handle interruption by
system shutdown or kill signals and may just as well denote that
in the function names.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab3100-core.c  | 43 ++++++++++++++++++++++++-------------------
 include/linux/mfd/ab3100.h |  8 ++++----
 2 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
index 8ff10cb77cac..ffe4b6415465 100644
--- a/drivers/mfd/ab3100-core.c
+++ b/drivers/mfd/ab3100-core.c
@@ -77,7 +77,7 @@ u8 ab3100_get_chip_type(struct ab3100 *ab3100)
 }
 EXPORT_SYMBOL(ab3100_get_chip_type);
 
-int ab3100_set_register(struct ab3100 *ab3100, u8 reg, u8 regval)
+int ab3100_set_register_interruptible(struct ab3100 *ab3100, u8 reg, u8 regval)
 {
 	u8 regandval[2] = {reg, regval};
 	int err;
@@ -109,7 +109,8 @@ int ab3100_set_register(struct ab3100 *ab3100, u8 reg, u8 regval)
 	mutex_unlock(&ab3100->access_mutex);
 	return 0;
 }
-EXPORT_SYMBOL(ab3100_set_register);
+EXPORT_SYMBOL(ab3100_set_register_interruptible);
+
 
 /*
  * The test registers exist at an I2C bus address up one
@@ -118,7 +119,7 @@ EXPORT_SYMBOL(ab3100_set_register);
  * anyway. It's currently only used from this file so declare
  * it static and do not export.
  */
-static int ab3100_set_test_register(struct ab3100 *ab3100,
+static int ab3100_set_test_register_interruptible(struct ab3100 *ab3100,
 				    u8 reg, u8 regval)
 {
 	u8 regandval[2] = {reg, regval};
@@ -148,7 +149,8 @@ static int ab3100_set_test_register(struct ab3100 *ab3100,
 	return err;
 }
 
-int ab3100_get_register(struct ab3100 *ab3100, u8 reg, u8 *regval)
+
+int ab3100_get_register_interruptible(struct ab3100 *ab3100, u8 reg, u8 *regval)
 {
 	int err;
 
@@ -202,9 +204,10 @@ int ab3100_get_register(struct ab3100 *ab3100, u8 reg, u8 *regval)
 	mutex_unlock(&ab3100->access_mutex);
 	return err;
 }
-EXPORT_SYMBOL(ab3100_get_register);
+EXPORT_SYMBOL(ab3100_get_register_interruptible);
+
 
-int ab3100_get_register_page(struct ab3100 *ab3100,
+int ab3100_get_register_page_interruptible(struct ab3100 *ab3100,
 			     u8 first_reg, u8 *regvals, u8 numregs)
 {
 	int err;
@@ -258,9 +261,10 @@ int ab3100_get_register_page(struct ab3100 *ab3100,
 	mutex_unlock(&ab3100->access_mutex);
 	return err;
 }
-EXPORT_SYMBOL(ab3100_get_register_page);
+EXPORT_SYMBOL(ab3100_get_register_page_interruptible);
 
-int ab3100_mask_and_set_register(struct ab3100 *ab3100,
+
+int ab3100_mask_and_set_register_interruptible(struct ab3100 *ab3100,
 				 u8 reg, u8 andmask, u8 ormask)
 {
 	u8 regandval[2] = {reg, 0};
@@ -328,7 +332,8 @@ int ab3100_mask_and_set_register(struct ab3100 *ab3100,
 	mutex_unlock(&ab3100->access_mutex);
 	return err;
 }
-EXPORT_SYMBOL(ab3100_mask_and_set_register);
+EXPORT_SYMBOL(ab3100_mask_and_set_register_interruptible);
+
 
 /*
  * Register a simple callback for handling any AB3100 events.
@@ -371,7 +376,7 @@ static void ab3100_work(struct work_struct *work)
 	u32 fatevent;
 	int err;
 
-	err = ab3100_get_register_page(ab3100, AB3100_EVENTA1,
+	err = ab3100_get_register_page_interruptible(ab3100, AB3100_EVENTA1,
 				       event_regs, 3);
 	if (err)
 		goto err_event_wq;
@@ -435,7 +440,7 @@ static int ab3100_registers_print(struct seq_file *s, void *p)
 	seq_printf(s, "AB3100 registers:\n");
 
 	for (reg = 0; reg < 0xff; reg++) {
-		ab3100_get_register(ab3100, reg, &value);
+		ab3100_get_register_interruptible(ab3100, reg, &value);
 		seq_printf(s, "[0x%x]:  0x%x\n", reg, value);
 	}
 	return 0;
@@ -515,7 +520,7 @@ static ssize_t ab3100_get_set_reg(struct file *file,
 		u8 reg = (u8) user_reg;
 		u8 regvalue;
 
-		ab3100_get_register(ab3100, reg, &regvalue);
+		ab3100_get_register_interruptible(ab3100, reg, &regvalue);
 
 		dev_info(ab3100->dev,
 			 "debug read AB3100 reg[0x%02x]: 0x%02x\n",
@@ -547,8 +552,8 @@ static ssize_t ab3100_get_set_reg(struct file *file,
 			return -EINVAL;
 
 		value = (u8) user_value;
-		ab3100_set_register(ab3100, reg, value);
-		ab3100_get_register(ab3100, reg, &regvalue);
+		ab3100_set_register_interruptible(ab3100, reg, value);
+		ab3100_get_register_interruptible(ab3100, reg, &regvalue);
 
 		dev_info(ab3100->dev,
 			 "debug write reg[0x%02x] with 0x%02x, "
@@ -696,7 +701,7 @@ static int __init ab3100_setup(struct ab3100 *ab3100)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(ab3100_init_settings); i++) {
-		err = ab3100_set_register(ab3100,
+		err = ab3100_set_register_interruptible(ab3100,
 					  ab3100_init_settings[i].abreg,
 					  ab3100_init_settings[i].setting);
 		if (err)
@@ -705,14 +710,14 @@ static int __init ab3100_setup(struct ab3100 *ab3100)
 
 	/*
 	 * Special trick to make the AB3100 use the 32kHz clock (RTC)
-	 * bit 3 in test registe 0x02 is a special, undocumented test
+	 * bit 3 in test register 0x02 is a special, undocumented test
 	 * register bit that only exist in AB3100 P1E
 	 */
 	if (ab3100->chip_id == 0xc4) {
 		dev_warn(ab3100->dev,
 			 "AB3100 P1E variant detected, "
 			 "forcing chip to 32KHz\n");
-		err = ab3100_set_test_register(ab3100, 0x02, 0x08);
+		err = ab3100_set_test_register_interruptible(ab3100, 0x02, 0x08);
 	}
 
  exit_no_setup:
@@ -852,8 +857,8 @@ static int __init ab3100_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, ab3100);
 
 	/* Read chip ID register */
-	err = ab3100_get_register(ab3100, AB3100_CID,
-				  &ab3100->chip_id);
+	err = ab3100_get_register_interruptible(ab3100, AB3100_CID,
+						&ab3100->chip_id);
 	if (err) {
 		dev_err(&client->dev,
 			"could not communicate with the AB3100 analog "
diff --git a/include/linux/mfd/ab3100.h b/include/linux/mfd/ab3100.h
index 7a3f316e3848..56343b8013b5 100644
--- a/include/linux/mfd/ab3100.h
+++ b/include/linux/mfd/ab3100.h
@@ -86,11 +86,11 @@ struct ab3100 {
 	bool startup_events_read;
 };
 
-int ab3100_set_register(struct ab3100 *ab3100, u8 reg, u8 regval);
-int ab3100_get_register(struct ab3100 *ab3100, u8 reg, u8 *regval);
-int ab3100_get_register_page(struct ab3100 *ab3100,
+int ab3100_set_register_interruptible(struct ab3100 *ab3100, u8 reg, u8 regval);
+int ab3100_get_register_interruptible(struct ab3100 *ab3100, u8 reg, u8 *regval);
+int ab3100_get_register_page_interruptible(struct ab3100 *ab3100,
 			     u8 first_reg, u8 *regvals, u8 numregs);
-int ab3100_mask_and_set_register(struct ab3100 *ab3100,
+int ab3100_mask_and_set_register_interruptible(struct ab3100 *ab3100,
 				 u8 reg, u8 andmask, u8 ormask);
 u8 ab3100_get_chip_type(struct ab3100 *ab3100);
 int ab3100_event_register(struct ab3100 *ab3100,
-- 
cgit v1.2.3


From 8238addcc52c94c59b10c3c1e9850d3a7921f825 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 19 Aug 2009 01:40:28 +0200
Subject: mfd: Add Freescale MC13783 driver

This driver provides the core Freescale MC13783 support. It
registers the client platform_devices and provides access
to the A/D converter.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig                 |  10 +
 drivers/mfd/Makefile                |   2 +
 drivers/mfd/mc13783-core.c          | 427 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/mc13783-private.h | 396 +++++++++++++++++++++++++++++++++
 include/linux/mfd/mc13783.h         |  84 +++++++
 5 files changed, 919 insertions(+)
 create mode 100644 drivers/mfd/mc13783-core.c
 create mode 100644 include/linux/mfd/mc13783-private.h
 create mode 100644 include/linux/mfd/mc13783.h

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 0273456af77f..a4f3dff30ba5 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -238,6 +238,16 @@ config MFD_PCF50633
 	  facilities, and registers devices for the various functions
 	  so that function-specific drivers can bind to them.
 
+config MFD_MC13783
+	tristate "Support Freescale MC13783"
+	depends on SPI_MASTER
+	select MFD_CORE
+	help
+	  Support for the Freescale (Atlas) MC13783 PMIC and audio CODEC.
+	  This driver provides common support for accessing  the device,
+	  additional drivers must be enabled in order to use the
+	  functionality of the device.
+
 config PCF50633_ADC
 	tristate "Support for NXP PCF50633 ADC"
 	depends on MFD_PCF50633
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 285cb320e6a6..7fec04fb5f47 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -26,6 +26,8 @@ obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
 obj-$(CONFIG_TWL4030_CORE)	+= twl4030-core.o twl4030-irq.o
 
+obj-$(CONFIG_MFD_MC13783)	+= mc13783-core.o
+
 obj-$(CONFIG_MFD_CORE)		+= mfd-core.o
 
 obj-$(CONFIG_EZX_PCAP)		+= ezx-pcap.o
diff --git a/drivers/mfd/mc13783-core.c b/drivers/mfd/mc13783-core.c
new file mode 100644
index 000000000000..e354d2912ef1
--- /dev/null
+++ b/drivers/mfd/mc13783-core.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2009 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
+ *
+ * This code is in parts based on wm8350-core.c and pcf50633-core.c
+ *
+ * Initial development of this code was funded by
+ * Phytec Messtechnik GmbH, http://www.phytec.de
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/mfd/mc13783-private.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/mc13783.h>
+#include <linux/completion.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/core.h>
+#include <linux/spi/spi.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+
+#define MC13783_MAX_REG_NUM	0x3f
+#define MC13783_FRAME_MASK	0x00ffffff
+#define MC13783_MAX_REG_NUM	0x3f
+#define MC13783_REG_NUM_SHIFT	0x19
+#define MC13783_WRITE_BIT_SHIFT	31
+
+static inline int spi_rw(struct spi_device *spi, u8 * buf, size_t len)
+{
+	struct spi_transfer t = {
+		.tx_buf = (const void *)buf,
+		.rx_buf = buf,
+		.len = len,
+		.cs_change = 0,
+		.delay_usecs = 0,
+	};
+	struct spi_message m;
+
+	spi_message_init(&m);
+	spi_message_add_tail(&t, &m);
+	if (spi_sync(spi, &m) != 0 || m.status != 0)
+		return -EINVAL;
+	return len - m.actual_length;
+}
+
+static int mc13783_read(struct mc13783 *mc13783, int reg_num, u32 *reg_val)
+{
+	unsigned int frame = 0;
+	int ret = 0;
+
+	if (reg_num > MC13783_MAX_REG_NUM)
+		return -EINVAL;
+
+	frame |= reg_num << MC13783_REG_NUM_SHIFT;
+
+	ret = spi_rw(mc13783->spi_device, (u8 *)&frame, 4);
+
+	*reg_val = frame & MC13783_FRAME_MASK;
+
+	return ret;
+}
+
+static int mc13783_write(struct mc13783 *mc13783, int reg_num, u32 reg_val)
+{
+	unsigned int frame = 0;
+
+	if (reg_num > MC13783_MAX_REG_NUM)
+		return -EINVAL;
+
+	frame |= (1 << MC13783_WRITE_BIT_SHIFT);
+	frame |= reg_num << MC13783_REG_NUM_SHIFT;
+	frame |= reg_val & MC13783_FRAME_MASK;
+
+	return spi_rw(mc13783->spi_device, (u8 *)&frame, 4);
+}
+
+int mc13783_reg_read(struct mc13783 *mc13783, int reg_num, u32 *reg_val)
+{
+	int ret;
+
+	mutex_lock(&mc13783->io_lock);
+	ret = mc13783_read(mc13783, reg_num, reg_val);
+	mutex_unlock(&mc13783->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mc13783_reg_read);
+
+int mc13783_reg_write(struct mc13783 *mc13783, int reg_num, u32 reg_val)
+{
+	int ret;
+
+	mutex_lock(&mc13783->io_lock);
+	ret = mc13783_write(mc13783, reg_num, reg_val);
+	mutex_unlock(&mc13783->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mc13783_reg_write);
+
+/**
+ * mc13783_set_bits - Bitmask write
+ *
+ * @mc13783: Pointer to mc13783 control structure
+ * @reg:    Register to access
+ * @mask:   Mask of bits to change
+ * @val:    Value to set for masked bits
+ */
+int mc13783_set_bits(struct mc13783 *mc13783, int reg, u32 mask, u32 val)
+{
+	u32 tmp;
+	int ret;
+
+	mutex_lock(&mc13783->io_lock);
+
+	ret = mc13783_read(mc13783, reg, &tmp);
+	tmp = (tmp & ~mask) | val;
+	if (ret == 0)
+		ret = mc13783_write(mc13783, reg, tmp);
+
+	mutex_unlock(&mc13783->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mc13783_set_bits);
+
+int mc13783_register_irq(struct mc13783 *mc13783, int irq,
+		void (*handler) (int, void *), void *data)
+{
+	if (irq < 0 || irq > MC13783_NUM_IRQ || !handler)
+		return -EINVAL;
+
+	if (WARN_ON(mc13783->irq_handler[irq].handler))
+		return -EBUSY;
+
+	mutex_lock(&mc13783->io_lock);
+	mc13783->irq_handler[irq].handler = handler;
+	mc13783->irq_handler[irq].data = data;
+	mutex_unlock(&mc13783->io_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mc13783_register_irq);
+
+int mc13783_free_irq(struct mc13783 *mc13783, int irq)
+{
+	if (irq < 0 || irq > MC13783_NUM_IRQ)
+		return -EINVAL;
+
+	mutex_lock(&mc13783->io_lock);
+	mc13783->irq_handler[irq].handler = NULL;
+	mutex_unlock(&mc13783->io_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mc13783_free_irq);
+
+static void mc13783_irq_work(struct work_struct *work)
+{
+	struct mc13783 *mc13783 = container_of(work, struct mc13783, work);
+	int i;
+	unsigned int adc_sts;
+
+	/* check if the adc has finished any completion */
+	mc13783_reg_read(mc13783, MC13783_REG_INTERRUPT_STATUS_0, &adc_sts);
+	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_STATUS_0,
+			adc_sts & MC13783_INT_STAT_ADCDONEI);
+
+	if (adc_sts & MC13783_INT_STAT_ADCDONEI)
+		complete_all(&mc13783->adc_done);
+
+	for (i = 0; i < MC13783_NUM_IRQ; i++)
+		if (mc13783->irq_handler[i].handler)
+			mc13783->irq_handler[i].handler(i,
+					mc13783->irq_handler[i].data);
+	enable_irq(mc13783->irq);
+}
+
+static irqreturn_t mc13783_interrupt(int irq, void *dev_id)
+{
+	struct mc13783 *mc13783 = dev_id;
+
+	disable_irq_nosync(irq);
+
+	schedule_work(&mc13783->work);
+	return IRQ_HANDLED;
+}
+
+/* set adc to ts interrupt mode, which generates touchscreen wakeup interrupt */
+static inline void mc13783_adc_set_ts_irq_mode(struct mc13783 *mc13783)
+{
+	unsigned int reg_adc0, reg_adc1;
+
+	reg_adc0 = MC13783_ADC0_ADREFEN | MC13783_ADC0_ADREFMODE
+			| MC13783_ADC0_TSMOD0;
+	reg_adc1 = MC13783_ADC1_ADEN | MC13783_ADC1_ADTRIGIGN;
+
+	mc13783_reg_write(mc13783, MC13783_REG_ADC_0, reg_adc0);
+	mc13783_reg_write(mc13783, MC13783_REG_ADC_1, reg_adc1);
+}
+
+int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
+		unsigned int channel, unsigned int *sample)
+{
+	unsigned int reg_adc0, reg_adc1;
+	int i;
+
+	mutex_lock(&mc13783->adc_conv_lock);
+
+	/* set up auto incrementing anyway to make quick read */
+	reg_adc0 =  MC13783_ADC0_ADINC1 | MC13783_ADC0_ADINC2;
+	/* enable the adc, ignore external triggering and set ASC to trigger
+	 * conversion */
+	reg_adc1 =  MC13783_ADC1_ADEN | MC13783_ADC1_ADTRIGIGN
+		| MC13783_ADC1_ASC;
+
+	/* setup channel number */
+	if (channel > 7)
+		reg_adc1 |= MC13783_ADC1_ADSEL;
+
+	switch (mode) {
+	case MC13783_ADC_MODE_TS:
+		/* enables touch screen reference mode and set touchscreen mode
+		 * to position mode */
+		reg_adc0 |= MC13783_ADC0_ADREFEN | MC13783_ADC0_ADREFMODE
+			| MC13783_ADC0_TSMOD0 | MC13783_ADC0_TSMOD1;
+		reg_adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
+		break;
+	case MC13783_ADC_MODE_SINGLE_CHAN:
+		reg_adc1 |= (channel & 0x7) << MC13783_ADC1_CHAN0_SHIFT;
+		reg_adc1 |= MC13783_ADC1_RAND;
+		break;
+	case MC13783_ADC_MODE_MULT_CHAN:
+		reg_adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	mc13783_reg_write(mc13783, MC13783_REG_ADC_0, reg_adc0);
+	mc13783_reg_write(mc13783, MC13783_REG_ADC_1, reg_adc1);
+
+	wait_for_completion_interruptible(&mc13783->adc_done);
+
+	for (i = 0; i < 4; i++)
+		mc13783_reg_read(mc13783, MC13783_REG_ADC_2, &sample[i]);
+
+	if (mc13783->ts_active)
+		mc13783_adc_set_ts_irq_mode(mc13783);
+
+	mutex_unlock(&mc13783->adc_conv_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mc13783_adc_do_conversion);
+
+void mc13783_adc_set_ts_status(struct mc13783 *mc13783, unsigned int status)
+{
+	mc13783->ts_active = status;
+}
+EXPORT_SYMBOL_GPL(mc13783_adc_set_ts_status);
+
+static int mc13783_check_revision(struct mc13783 *mc13783)
+{
+	u32 rev_id, rev1, rev2, finid, icid;
+
+	mc13783_read(mc13783, MC13783_REG_REVISION, &rev_id);
+
+	rev1 = (rev_id & 0x018) >> 3;
+	rev2 = (rev_id & 0x007);
+	icid = (rev_id & 0x01C0) >> 6;
+	finid = (rev_id & 0x01E00) >> 9;
+
+	/* Ver 0.2 is actually 3.2a.  Report as 3.2 */
+	if ((rev1 == 0) && (rev2 == 2))
+		rev1 = 3;
+
+	if (rev1 == 0 || icid != 2) {
+		dev_err(mc13783->dev, "No MC13783 detected.\n");
+		return -ENODEV;
+	}
+
+	mc13783->revision = ((rev1 * 10) + rev2);
+	dev_info(mc13783->dev, "MC13783 Rev %d.%d FinVer %x detected\n", rev1,
+	       rev2, finid);
+
+	return 0;
+}
+
+/*
+ * Register a client device.  This is non-fatal since there is no need to
+ * fail the entire device init due to a single platform device failing.
+ */
+static void mc13783_client_dev_register(struct mc13783 *mc13783,
+				       const char *name)
+{
+	struct mfd_cell cell = {};
+
+	cell.name = name;
+
+	mfd_add_devices(mc13783->dev, -1, &cell, 1, NULL, 0);
+}
+
+static int __devinit mc13783_probe(struct spi_device *spi)
+{
+	struct mc13783 *mc13783;
+	struct mc13783_platform_data *pdata = spi->dev.platform_data;
+	int ret;
+
+	mc13783 = kzalloc(sizeof(struct mc13783), GFP_KERNEL);
+	if (!mc13783)
+		return -ENOMEM;
+
+	dev_set_drvdata(&spi->dev, mc13783);
+	spi->mode = SPI_MODE_0 | SPI_CS_HIGH;
+	spi->bits_per_word = 32;
+	spi_setup(spi);
+
+	mc13783->spi_device = spi;
+	mc13783->dev = &spi->dev;
+	mc13783->irq = spi->irq;
+
+	INIT_WORK(&mc13783->work, mc13783_irq_work);
+	mutex_init(&mc13783->io_lock);
+	mutex_init(&mc13783->adc_conv_lock);
+	init_completion(&mc13783->adc_done);
+
+	if (pdata) {
+		mc13783->flags = pdata->flags;
+		mc13783->regulators = pdata->regulators;
+		mc13783->num_regulators = pdata->num_regulators;
+	}
+
+	if (mc13783_check_revision(mc13783)) {
+		ret = -ENODEV;
+		goto err_out;
+	}
+
+	/* clear and mask all interrupts */
+	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_STATUS_0, 0x00ffffff);
+	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_MASK_0, 0x00ffffff);
+	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_STATUS_1, 0x00ffffff);
+	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_MASK_1, 0x00ffffff);
+
+	/* unmask adcdone interrupts */
+	mc13783_set_bits(mc13783, MC13783_REG_INTERRUPT_MASK_0,
+			MC13783_INT_MASK_ADCDONEM, 0);
+
+	ret = request_irq(mc13783->irq, mc13783_interrupt,
+			IRQF_DISABLED | IRQF_TRIGGER_HIGH, "mc13783",
+			mc13783);
+	if (ret)
+		goto err_out;
+
+	if (mc13783->flags & MC13783_USE_CODEC)
+		mc13783_client_dev_register(mc13783, "mc13783-codec");
+	if (mc13783->flags & MC13783_USE_ADC)
+		mc13783_client_dev_register(mc13783, "mc13783-adc");
+	if (mc13783->flags & MC13783_USE_RTC)
+		mc13783_client_dev_register(mc13783, "mc13783-rtc");
+	if (mc13783->flags & MC13783_USE_REGULATOR)
+		mc13783_client_dev_register(mc13783, "mc13783-regulator");
+	if (mc13783->flags & MC13783_USE_TOUCHSCREEN)
+		mc13783_client_dev_register(mc13783, "mc13783-ts");
+
+	return 0;
+
+err_out:
+	kfree(mc13783);
+	return ret;
+}
+
+static int __devexit mc13783_remove(struct spi_device *spi)
+{
+	struct mc13783 *mc13783;
+
+	mc13783 = dev_get_drvdata(&spi->dev);
+
+	free_irq(mc13783->irq, mc13783);
+
+	mfd_remove_devices(&spi->dev);
+
+	return 0;
+}
+
+static struct spi_driver pmic_driver = {
+	.driver = {
+		   .name = "mc13783",
+		   .bus = &spi_bus_type,
+		   .owner = THIS_MODULE,
+	},
+	.probe = mc13783_probe,
+	.remove = __devexit_p(mc13783_remove),
+};
+
+static int __init pmic_init(void)
+{
+	return spi_register_driver(&pmic_driver);
+}
+subsys_initcall(pmic_init);
+
+static void __exit pmic_exit(void)
+{
+	spi_unregister_driver(&pmic_driver);
+}
+module_exit(pmic_exit);
+
+MODULE_DESCRIPTION("Core/Protocol driver for Freescale MC13783 PMIC");
+MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
+MODULE_LICENSE("GPL");
+
diff --git a/include/linux/mfd/mc13783-private.h b/include/linux/mfd/mc13783-private.h
new file mode 100644
index 000000000000..47e698cb0f16
--- /dev/null
+++ b/include/linux/mfd/mc13783-private.h
@@ -0,0 +1,396 @@
+/*
+ * Copyright 2009 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
+ *
+ * Initial development of this code was funded by
+ * Phytec Messtechnik GmbH, http://www.phytec.de
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __LINUX_MFD_MC13783_PRIV_H
+#define __LINUX_MFD_MC13783_PRIV_H
+
+#include <linux/platform_device.h>
+#include <linux/mfd/mc13783.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+
+struct mc13783_irq {
+	void (*handler)(int, void *);
+	void *data;
+};
+
+#define MC13783_NUM_IRQ		2
+#define MC13783_IRQ_TS		0
+#define MC13783_IRQ_REGULATOR	1
+
+#define MC13783_ADC_MODE_TS		1
+#define MC13783_ADC_MODE_SINGLE_CHAN	2
+#define MC13783_ADC_MODE_MULT_CHAN	3
+
+struct mc13783 {
+	int revision;
+	struct device *dev;
+	struct spi_device *spi_device;
+
+	int (*read_dev)(void *data, char reg, int count, u32 *dst);
+	int (*write_dev)(void *data, char reg, int count, const u32 *src);
+
+	struct mutex io_lock;
+	void *io_data;
+	int irq;
+	unsigned int flags;
+
+	struct mc13783_irq irq_handler[MC13783_NUM_IRQ];
+	struct work_struct work;
+	struct completion adc_done;
+	unsigned int ts_active;
+	struct mutex adc_conv_lock;
+
+	struct mc13783_regulator_init_data *regulators;
+	int num_regulators;
+};
+
+int mc13783_reg_read(struct mc13783 *, int reg_num, u32 *);
+int mc13783_reg_write(struct mc13783 *, int, u32);
+int mc13783_set_bits(struct mc13783 *, int, u32, u32);
+int mc13783_free_irq(struct mc13783 *mc13783, int irq);
+int mc13783_register_irq(struct mc13783 *mc13783, int irq,
+		void (*handler) (int, void *), void *data);
+
+#define MC13783_REG_INTERRUPT_STATUS_0		 0
+#define MC13783_REG_INTERRUPT_MASK_0		 1
+#define MC13783_REG_INTERRUPT_SENSE_0		 2
+#define MC13783_REG_INTERRUPT_STATUS_1		 3
+#define MC13783_REG_INTERRUPT_MASK_1		 4
+#define MC13783_REG_INTERRUPT_SENSE_1		 5
+#define MC13783_REG_POWER_UP_MODE_SENSE		 6
+#define MC13783_REG_REVISION			 7
+#define MC13783_REG_SEMAPHORE			 8
+#define MC13783_REG_ARBITRATION_PERIPHERAL_AUDIO 9
+#define MC13783_REG_ARBITRATION_SWITCHERS	10
+#define MC13783_REG_ARBITRATION_REGULATORS_0	11
+#define MC13783_REG_ARBITRATION_REGULATORS_1	12
+#define MC13783_REG_POWER_CONTROL_0		13
+#define MC13783_REG_POWER_CONTROL_1		14
+#define MC13783_REG_POWER_CONTROL_2		15
+#define MC13783_REG_REGEN_ASSIGNMENT		16
+#define MC13783_REG_CONTROL_SPARE		17
+#define MC13783_REG_MEMORY_A			18
+#define MC13783_REG_MEMORY_B			19
+#define MC13783_REG_RTC_TIME			20
+#define MC13783_REG_RTC_ALARM			21
+#define MC13783_REG_RTC_DAY			22
+#define MC13783_REG_RTC_DAY_ALARM		23
+#define MC13783_REG_SWITCHERS_0			24
+#define MC13783_REG_SWITCHERS_1			25
+#define MC13783_REG_SWITCHERS_2			26
+#define MC13783_REG_SWITCHERS_3			27
+#define MC13783_REG_SWITCHERS_4			28
+#define MC13783_REG_SWITCHERS_5			29
+#define MC13783_REG_REGULATOR_SETTING_0		30
+#define MC13783_REG_REGULATOR_SETTING_1		31
+#define MC13783_REG_REGULATOR_MODE_0		32
+#define MC13783_REG_REGULATOR_MODE_1		33
+#define MC13783_REG_POWER_MISCELLANEOUS		34
+#define MC13783_REG_POWER_SPARE			35
+#define MC13783_REG_AUDIO_RX_0			36
+#define MC13783_REG_AUDIO_RX_1			37
+#define MC13783_REG_AUDIO_TX			38
+#define MC13783_REG_AUDIO_SSI_NETWORK		39
+#define MC13783_REG_AUDIO_CODEC			40
+#define MC13783_REG_AUDIO_STEREO_DAC		41
+#define MC13783_REG_AUDIO_SPARE			42
+#define MC13783_REG_ADC_0			43
+#define MC13783_REG_ADC_1			44
+#define MC13783_REG_ADC_2			45
+#define MC13783_REG_ADC_3			46
+#define MC13783_REG_ADC_4			47
+#define MC13783_REG_CHARGER			48
+#define MC13783_REG_USB				49
+#define MC13783_REG_CHARGE_USB_SPARE		50
+#define MC13783_REG_LED_CONTROL_0		51
+#define MC13783_REG_LED_CONTROL_1		52
+#define MC13783_REG_LED_CONTROL_2		53
+#define MC13783_REG_LED_CONTROL_3		54
+#define MC13783_REG_LED_CONTROL_4		55
+#define MC13783_REG_LED_CONTROL_5		56
+#define MC13783_REG_SPARE			57
+#define MC13783_REG_TRIM_0			58
+#define MC13783_REG_TRIM_1			59
+#define MC13783_REG_TEST_0			60
+#define MC13783_REG_TEST_1			61
+#define MC13783_REG_TEST_2			62
+#define MC13783_REG_TEST_3			63
+#define MC13783_REG_NB				64
+
+
+/*
+ * Interrupt Status
+ */
+#define MC13783_INT_STAT_ADCDONEI	(1 << 0)
+#define MC13783_INT_STAT_ADCBISDONEI	(1 << 1)
+#define MC13783_INT_STAT_TSI		(1 << 2)
+#define MC13783_INT_STAT_WHIGHI		(1 << 3)
+#define MC13783_INT_STAT_WLOWI		(1 << 4)
+#define MC13783_INT_STAT_CHGDETI	(1 << 6)
+#define MC13783_INT_STAT_CHGOVI		(1 << 7)
+#define MC13783_INT_STAT_CHGREVI	(1 << 8)
+#define MC13783_INT_STAT_CHGSHORTI	(1 << 9)
+#define MC13783_INT_STAT_CCCVI		(1 << 10)
+#define MC13783_INT_STAT_CHGCURRI	(1 << 11)
+#define MC13783_INT_STAT_BPONI		(1 << 12)
+#define MC13783_INT_STAT_LOBATLI	(1 << 13)
+#define MC13783_INT_STAT_LOBATHI	(1 << 14)
+#define MC13783_INT_STAT_UDPI		(1 << 15)
+#define MC13783_INT_STAT_USBI		(1 << 16)
+#define MC13783_INT_STAT_IDI		(1 << 19)
+#define MC13783_INT_STAT_Unused		(1 << 20)
+#define MC13783_INT_STAT_SE1I		(1 << 21)
+#define MC13783_INT_STAT_CKDETI		(1 << 22)
+#define MC13783_INT_STAT_UDMI		(1 << 23)
+
+/*
+ * Interrupt Mask
+ */
+#define MC13783_INT_MASK_ADCDONEM	(1 << 0)
+#define MC13783_INT_MASK_ADCBISDONEM	(1 << 1)
+#define MC13783_INT_MASK_TSM		(1 << 2)
+#define MC13783_INT_MASK_WHIGHM		(1 << 3)
+#define MC13783_INT_MASK_WLOWM		(1 << 4)
+#define MC13783_INT_MASK_CHGDETM	(1 << 6)
+#define MC13783_INT_MASK_CHGOVM		(1 << 7)
+#define MC13783_INT_MASK_CHGREVM	(1 << 8)
+#define MC13783_INT_MASK_CHGSHORTM	(1 << 9)
+#define MC13783_INT_MASK_CCCVM		(1 << 10)
+#define MC13783_INT_MASK_CHGCURRM	(1 << 11)
+#define MC13783_INT_MASK_BPONM		(1 << 12)
+#define MC13783_INT_MASK_LOBATLM	(1 << 13)
+#define MC13783_INT_MASK_LOBATHM	(1 << 14)
+#define MC13783_INT_MASK_UDPM		(1 << 15)
+#define MC13783_INT_MASK_USBM		(1 << 16)
+#define MC13783_INT_MASK_IDM		(1 << 19)
+#define MC13783_INT_MASK_SE1M		(1 << 21)
+#define MC13783_INT_MASK_CKDETM		(1 << 22)
+
+/*
+ * Reg Regulator Mode 0
+ */
+#define MC13783_REGCTRL_VAUDIO_EN	(1 << 0)
+#define MC13783_REGCTRL_VAUDIO_STBY	(1 << 1)
+#define MC13783_REGCTRL_VAUDIO_MODE	(1 << 2)
+#define MC13783_REGCTRL_VIOHI_EN	(1 << 3)
+#define MC13783_REGCTRL_VIOHI_STBY	(1 << 4)
+#define MC13783_REGCTRL_VIOHI_MODE	(1 << 5)
+#define MC13783_REGCTRL_VIOLO_EN	(1 << 6)
+#define MC13783_REGCTRL_VIOLO_STBY 	(1 << 7)
+#define MC13783_REGCTRL_VIOLO_MODE	(1 << 8)
+#define MC13783_REGCTRL_VDIG_EN		(1 << 9)
+#define MC13783_REGCTRL_VDIG_STBY	(1 << 10)
+#define MC13783_REGCTRL_VDIG_MODE	(1 << 11)
+#define MC13783_REGCTRL_VGEN_EN		(1 << 12)
+#define MC13783_REGCTRL_VGEN_STBY	(1 << 13)
+#define MC13783_REGCTRL_VGEN_MODE	(1 << 14)
+#define MC13783_REGCTRL_VRFDIG_EN	(1 << 15)
+#define MC13783_REGCTRL_VRFDIG_STBY	(1 << 16)
+#define MC13783_REGCTRL_VRFDIG_MODE	(1 << 17)
+#define MC13783_REGCTRL_VRFREF_EN	(1 << 18)
+#define MC13783_REGCTRL_VRFREF_STBY	(1 << 19)
+#define MC13783_REGCTRL_VRFREF_MODE	(1 << 20)
+#define MC13783_REGCTRL_VRFCP_EN	(1 << 21)
+#define MC13783_REGCTRL_VRFCP_STBY	(1 << 22)
+#define MC13783_REGCTRL_VRFCP_MODE	(1 << 23)
+
+/*
+ * Reg Regulator Mode 1
+ */
+#define MC13783_REGCTRL_VSIM_EN		(1 << 0)
+#define MC13783_REGCTRL_VSIM_STBY	(1 << 1)
+#define MC13783_REGCTRL_VSIM_MODE	(1 << 2)
+#define MC13783_REGCTRL_VESIM_EN	(1 << 3)
+#define MC13783_REGCTRL_VESIM_STBY	(1 << 4)
+#define MC13783_REGCTRL_VESIM_MODE	(1 << 5)
+#define MC13783_REGCTRL_VCAM_EN		(1 << 6)
+#define MC13783_REGCTRL_VCAM_STBY	(1 << 7)
+#define MC13783_REGCTRL_VCAM_MODE	(1 << 8)
+#define	MC13783_REGCTRL_VRFBG_EN	(1 << 9)
+#define MC13783_REGCTRL_VRFBG_STBY	(1 << 10)
+#define MC13783_REGCTRL_VVIB_EN		(1 << 11)
+#define MC13783_REGCTRL_VRF1_EN		(1 << 12)
+#define MC13783_REGCTRL_VRF1_STBY	(1 << 13)
+#define MC13783_REGCTRL_VRF1_MODE	(1 << 14)
+#define MC13783_REGCTRL_VRF2_EN		(1 << 15)
+#define MC13783_REGCTRL_VRF2_STBY	(1 << 16)
+#define MC13783_REGCTRL_VRF2_MODE	(1 << 17)
+#define MC13783_REGCTRL_VMMC1_EN	(1 << 18)
+#define MC13783_REGCTRL_VMMC1_STBY	(1 << 19)
+#define MC13783_REGCTRL_VMMC1_MODE	(1 << 20)
+#define MC13783_REGCTRL_VMMC2_EN	(1 << 21)
+#define MC13783_REGCTRL_VMMC2_STBY	(1 << 22)
+#define MC13783_REGCTRL_VMMC2_MODE	(1 << 23)
+
+/*
+ * Reg Regulator Misc.
+ */
+#define MC13783_REGCTRL_GPO1_EN		(1 << 6)
+#define MC13783_REGCTRL_GPO2_EN		(1 << 8)
+#define MC13783_REGCTRL_GPO3_EN		(1 << 10)
+#define MC13783_REGCTRL_GPO4_EN		(1 << 12)
+#define MC13783_REGCTRL_VIBPINCTRL	(1 << 14)
+
+/*
+ * Reg Switcher 4
+ */
+#define MC13783_SWCTRL_SW1A_MODE	(1 << 0)
+#define MC13783_SWCTRL_SW1A_STBY_MODE	(1 << 2)
+#define MC13783_SWCTRL_SW1A_DVS_SPEED	(1 << 6)
+#define MC13783_SWCTRL_SW1A_PANIC_MODE	(1 << 8)
+#define MC13783_SWCTRL_SW1A_SOFTSTART	(1 << 9)
+#define MC13783_SWCTRL_SW1B_MODE	(1 << 10)
+#define MC13783_SWCTRL_SW1B_STBY_MODE	(1 << 12)
+#define MC13783_SWCTRL_SW1B_DVS_SPEED	(1 << 14)
+#define MC13783_SWCTRL_SW1B_PANIC_MODE	(1 << 16)
+#define MC13783_SWCTRL_SW1B_SOFTSTART	(1 << 17)
+#define MC13783_SWCTRL_PLL_EN		(1 << 18)
+#define MC13783_SWCTRL_PLL_FACTOR	(1 << 19)
+
+/*
+ * Reg Switcher 5
+ */
+#define MC13783_SWCTRL_SW2A_MODE	(1 << 0)
+#define MC13783_SWCTRL_SW2A_STBY_MODE	(1 << 2)
+#define MC13783_SWCTRL_SW2A_DVS_SPEED	(1 << 6)
+#define MC13783_SWCTRL_SW2A_PANIC_MODE	(1 << 8)
+#define MC13783_SWCTRL_SW2A_SOFTSTART	(1 << 9)
+#define MC13783_SWCTRL_SW2B_MODE	(1 << 10)
+#define MC13783_SWCTRL_SW2B_STBY_MODE	(1 << 12)
+#define MC13783_SWCTRL_SW2B_DVS_SPEED	(1 << 14)
+#define MC13783_SWCTRL_SW2B_PANIC_MODE	(1 << 16)
+#define MC13783_SWCTRL_SW2B_SOFTSTART	(1 << 17)
+#define MC13783_SWSET_SW3		(1 << 18)
+#define MC13783_SWCTRL_SW3_EN		(1 << 20)
+#define MC13783_SWCTRL_SW3_STBY		(1 << 21)
+#define MC13783_SWCTRL_SW3_MODE		(1 << 22)
+
+/*
+ * ADC/Touch
+ */
+#define MC13783_ADC0_LICELLCON		(1 << 0)
+#define MC13783_ADC0_CHRGICON		(1 << 1)
+#define MC13783_ADC0_BATICON		(1 << 2)
+#define MC13783_ADC0_RTHEN 		(1 << 3)
+#define MC13783_ADC0_DTHEN		(1 << 4)
+#define MC13783_ADC0_UIDEN		(1 << 5)
+#define MC13783_ADC0_ADOUTEN 		(1 << 6)
+#define MC13783_ADC0_ADOUTPER		(1 << 7)
+#define MC13783_ADC0_ADREFEN		(1 << 10)
+#define MC13783_ADC0_ADREFMODE		(1 << 11)
+#define MC13783_ADC0_TSMOD0		(1 << 12)
+#define MC13783_ADC0_TSMOD1		(1 << 13)
+#define MC13783_ADC0_TSMOD2		(1 << 14)
+#define MC13783_ADC0_CHRGRAWDIV		(1 << 15)
+#define MC13783_ADC0_ADINC1		(1 << 16)
+#define MC13783_ADC0_ADINC2		(1 << 17)
+#define MC13783_ADC0_WCOMP		(1 << 18)
+#define MC13783_ADC0_ADCBIS0		(1 << 23)
+
+#define MC13783_ADC1_ADEN		(1 << 0)
+#define MC13783_ADC1_RAND		(1 << 1)
+#define MC13783_ADC1_ADSEL		(1 << 3)
+#define MC13783_ADC1_TRIGMASK		(1 << 4)
+#define MC13783_ADC1_ADA10		(1 << 5)
+#define MC13783_ADC1_ADA11		(1 << 6)
+#define MC13783_ADC1_ADA12		(1 << 7)
+#define MC13783_ADC1_ADA20		(1 << 8)
+#define MC13783_ADC1_ADA21		(1 << 9)
+#define MC13783_ADC1_ADA22		(1 << 10)
+#define MC13783_ADC1_ATO0		(1 << 11)
+#define MC13783_ADC1_ATO1		(1 << 12)
+#define MC13783_ADC1_ATO2		(1 << 13)
+#define MC13783_ADC1_ATO3		(1 << 14)
+#define MC13783_ADC1_ATO4		(1 << 15)
+#define MC13783_ADC1_ATO5		(1 << 16)
+#define MC13783_ADC1_ATO6		(1 << 17)
+#define MC13783_ADC1_ATO7		(1 << 18)
+#define MC13783_ADC1_ATOX		(1 << 19)
+#define MC13783_ADC1_ASC		(1 << 20)
+#define MC13783_ADC1_ADTRIGIGN		(1 << 21)
+#define MC13783_ADC1_ADONESHOT		(1 << 22)
+#define MC13783_ADC1_ADCBIS1		(1 << 23)
+
+#define MC13783_ADC1_CHAN0_SHIFT	5
+#define MC13783_ADC1_CHAN1_SHIFT	8
+
+#define MC13783_ADC2_ADD10		(1 << 2)
+#define MC13783_ADC2_ADD11		(1 << 3)
+#define MC13783_ADC2_ADD12		(1 << 4)
+#define MC13783_ADC2_ADD13		(1 << 5)
+#define MC13783_ADC2_ADD14		(1 << 6)
+#define MC13783_ADC2_ADD15		(1 << 7)
+#define MC13783_ADC2_ADD16		(1 << 8)
+#define MC13783_ADC2_ADD17		(1 << 9)
+#define MC13783_ADC2_ADD18		(1 << 10)
+#define MC13783_ADC2_ADD19		(1 << 11)
+#define MC13783_ADC2_ADD20		(1 << 14)
+#define MC13783_ADC2_ADD21		(1 << 15)
+#define MC13783_ADC2_ADD22		(1 << 16)
+#define MC13783_ADC2_ADD23		(1 << 17)
+#define MC13783_ADC2_ADD24		(1 << 18)
+#define MC13783_ADC2_ADD25		(1 << 19)
+#define MC13783_ADC2_ADD26		(1 << 20)
+#define MC13783_ADC2_ADD27		(1 << 21)
+#define MC13783_ADC2_ADD28		(1 << 22)
+#define MC13783_ADC2_ADD29		(1 << 23)
+
+#define MC13783_ADC3_WHIGH0		(1 << 0)
+#define MC13783_ADC3_WHIGH1		(1 << 1)
+#define MC13783_ADC3_WHIGH2		(1 << 2)
+#define MC13783_ADC3_WHIGH3		(1 << 3)
+#define MC13783_ADC3_WHIGH4		(1 << 4)
+#define MC13783_ADC3_WHIGH5		(1 << 5)
+#define MC13783_ADC3_ICID0		(1 << 6)
+#define MC13783_ADC3_ICID1		(1 << 7)
+#define MC13783_ADC3_ICID2		(1 << 8)
+#define MC13783_ADC3_WLOW0		(1 << 9)
+#define MC13783_ADC3_WLOW1		(1 << 10)
+#define MC13783_ADC3_WLOW2		(1 << 11)
+#define MC13783_ADC3_WLOW3		(1 << 12)
+#define MC13783_ADC3_WLOW4		(1 << 13)
+#define MC13783_ADC3_WLOW5		(1 << 14)
+#define MC13783_ADC3_ADCBIS2		(1 << 23)
+
+#define MC13783_ADC4_ADDBIS10		(1 << 2)
+#define MC13783_ADC4_ADDBIS11		(1 << 3)
+#define MC13783_ADC4_ADDBIS12		(1 << 4)
+#define MC13783_ADC4_ADDBIS13		(1 << 5)
+#define MC13783_ADC4_ADDBIS14		(1 << 6)
+#define MC13783_ADC4_ADDBIS15		(1 << 7)
+#define MC13783_ADC4_ADDBIS16		(1 << 8)
+#define MC13783_ADC4_ADDBIS17		(1 << 9)
+#define MC13783_ADC4_ADDBIS18		(1 << 10)
+#define MC13783_ADC4_ADDBIS19		(1 << 11)
+#define MC13783_ADC4_ADDBIS20		(1 << 14)
+#define MC13783_ADC4_ADDBIS21		(1 << 15)
+#define MC13783_ADC4_ADDBIS22		(1 << 16)
+#define MC13783_ADC4_ADDBIS23		(1 << 17)
+#define MC13783_ADC4_ADDBIS24		(1 << 18)
+#define MC13783_ADC4_ADDBIS25		(1 << 19)
+#define MC13783_ADC4_ADDBIS26		(1 << 20)
+#define MC13783_ADC4_ADDBIS27		(1 << 21)
+#define MC13783_ADC4_ADDBIS28		(1 << 22)
+#define MC13783_ADC4_ADDBIS29		(1 << 23)
+
+#endif /* __LINUX_MFD_MC13783_PRIV_H */
+
diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
new file mode 100644
index 000000000000..b3a2a7243573
--- /dev/null
+++ b/include/linux/mfd/mc13783.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2009 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
+ *
+ * Initial development of this code was funded by
+ * Phytec Messtechnik GmbH, http://www.phytec.de
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __INCLUDE_LINUX_MFD_MC13783_H
+#define __INCLUDE_LINUX_MFD_MC13783_H
+
+struct mc13783;
+struct regulator_init_data;
+
+struct mc13783_regulator_init_data {
+	int id;
+	struct regulator_init_data *init_data;
+};
+
+struct mc13783_platform_data {
+	struct mc13783_regulator_init_data *regulators;
+	int num_regulators;
+	unsigned int flags;
+};
+
+/* mc13783_platform_data flags */
+#define MC13783_USE_TOUCHSCREEN (1 << 0)
+#define MC13783_USE_CODEC	(1 << 1)
+#define MC13783_USE_ADC		(1 << 2)
+#define MC13783_USE_RTC		(1 << 3)
+#define MC13783_USE_REGULATOR	(1 << 4)
+
+int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
+		unsigned int channel, unsigned int *sample);
+
+void mc13783_adc_set_ts_status(struct mc13783 *mc13783, unsigned int status);
+
+#define	MC13783_SW_SW1A		0
+#define	MC13783_SW_SW1B		1
+#define	MC13783_SW_SW2A		2
+#define	MC13783_SW_SW2B		3
+#define	MC13783_SW_SW3		4
+#define	MC13783_SW_PLL		5
+#define	MC13783_REGU_VAUDIO	6
+#define	MC13783_REGU_VIOHI	7
+#define	MC13783_REGU_VIOLO	8
+#define	MC13783_REGU_VDIG	9
+#define	MC13783_REGU_VGEN	10
+#define	MC13783_REGU_VRFDIG	11
+#define	MC13783_REGU_VRFREF	12
+#define	MC13783_REGU_VRFCP	13
+#define	MC13783_REGU_VSIM	14
+#define	MC13783_REGU_VESIM	15
+#define	MC13783_REGU_VCAM	16
+#define	MC13783_REGU_VRFBG	17
+#define	MC13783_REGU_VVIB	18
+#define	MC13783_REGU_VRF1	19
+#define	MC13783_REGU_VRF2	20
+#define	MC13783_REGU_VMMC1	21
+#define	MC13783_REGU_VMMC2	22
+#define	MC13783_REGU_GPO1	23
+#define	MC13783_REGU_GPO2	24
+#define	MC13783_REGU_GPO3	25
+#define	MC13783_REGU_GPO4	26
+#define	MC13783_REGU_V1		27
+#define	MC13783_REGU_V2		28
+#define	MC13783_REGU_V3		29
+#define	MC13783_REGU_V4		30
+
+#endif /* __INCLUDE_LINUX_MFD_MC13783_H */
+
-- 
cgit v1.2.3


From ebf0bd366ed8161e6fbc919705d878ccbfd51624 Mon Sep 17 00:00:00 2001
From: Amit Kucheria <amit.kucheria@verdurent.com>
Date: Mon, 31 Aug 2009 18:32:18 +0200
Subject: mfd: Add support for TWL4030/5030 dynamic power switching

The TWL4030/5030 family of multifunction devices allows board-specific
control of the the various regulators, clock and reset lines through
'scripts' that are loaded into its memory. This allows for Dynamic Power
Switching (DPS).

Implement board-independent core support for DPS that is then used by
board-specific code to load custom DPS scripts.

Signed-off-by: Amit Kucheria <amit.kucheria@verdurent.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig         |  13 ++
 drivers/mfd/Makefile        |   1 +
 drivers/mfd/twl4030-core.c  |  10 +
 drivers/mfd/twl4030-power.c | 466 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/twl4030.h |  94 ++++++++-
 5 files changed, 574 insertions(+), 10 deletions(-)
 create mode 100644 drivers/mfd/twl4030-power.c

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 4fd4f913c36b..570be139f9df 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -108,6 +108,19 @@ config TWL4030_CORE
 	  high speed USB OTG transceiver, an audio codec (on most
 	  versions) and many other features.
 
+config TWL4030_POWER
+	bool "Support power resources on TWL4030 family chips"
+	depends on TWL4030_CORE && ARM
+	help
+	  Say yes here if you want to use the power resources on the
+	  TWL4030 family chips.  Most of these resources are regulators,
+	  which have a separate driver; some are control signals, such
+	  as clock request handshaking.
+
+	  This driver uses board-specific data to initialize the resources
+	  and load scripts controling which resources are switched off/on
+	  or reset when a sleep, wakeup or warm reset event occurs.
+
 config MFD_TMIO
 	bool
 	default n
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 6a5d8cb545fc..f3b277b90d40 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
 obj-$(CONFIG_TWL4030_CORE)	+= twl4030-core.o twl4030-irq.o
+obj-$(CONFIG_TWL4030_POWER)    += twl4030-power.o
 
 obj-$(CONFIG_MFD_MC13783)	+= mc13783-core.o
 
diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index 1fd2620819d0..e424cf6d8e9e 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -89,6 +89,12 @@
 #define twl_has_madc()	false
 #endif
 
+#ifdef CONFIG_TWL4030_POWER
+#define twl_has_power()        true
+#else
+#define twl_has_power()        false
+#endif
+
 #if defined(CONFIG_RTC_DRV_TWL4030) || defined(CONFIG_RTC_DRV_TWL4030_MODULE)
 #define twl_has_rtc()	true
 #else
@@ -801,6 +807,10 @@ twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	/* setup clock framework */
 	clocks_init(&client->dev);
 
+	/* load power event scripts */
+	if (twl_has_power() && pdata->power)
+		twl4030_power_init(pdata->power);
+
 	/* Maybe init the T2 Interrupt subsystem */
 	if (client->irq
 			&& pdata->irq_base
diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
new file mode 100644
index 000000000000..e7688b041264
--- /dev/null
+++ b/drivers/mfd/twl4030-power.c
@@ -0,0 +1,466 @@
+/*
+ * linux/drivers/i2c/chips/twl4030-power.c
+ *
+ * Handle TWL4030 Power initialization
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ * Copyright (C) 2006 Texas Instruments, Inc
+ *
+ * Written by 	Kalle Jokiniemi
+ *		Peter De Schrijver <peter.de-schrijver@nokia.com>
+ * Several fixes by Amit Kucheria <amit.kucheria@verdurent.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/pm.h>
+#include <linux/i2c/twl4030.h>
+#include <linux/platform_device.h>
+
+#include <asm/mach-types.h>
+
+static u8 twl4030_start_script_address = 0x2b;
+
+#define PWR_P1_SW_EVENTS	0x10
+#define PWR_DEVOFF	(1<<0)
+
+#define PHY_TO_OFF_PM_MASTER(p)		(p - 0x36)
+#define PHY_TO_OFF_PM_RECEIVER(p)	(p - 0x5b)
+
+/* resource - hfclk */
+#define R_HFCLKOUT_DEV_GRP 	PHY_TO_OFF_PM_RECEIVER(0xe6)
+
+/* PM events */
+#define R_P1_SW_EVENTS		PHY_TO_OFF_PM_MASTER(0x46)
+#define R_P2_SW_EVENTS		PHY_TO_OFF_PM_MASTER(0x47)
+#define R_P3_SW_EVENTS		PHY_TO_OFF_PM_MASTER(0x48)
+#define R_CFG_P1_TRANSITION	PHY_TO_OFF_PM_MASTER(0x36)
+#define R_CFG_P2_TRANSITION	PHY_TO_OFF_PM_MASTER(0x37)
+#define R_CFG_P3_TRANSITION	PHY_TO_OFF_PM_MASTER(0x38)
+
+#define LVL_WAKEUP	0x08
+
+#define ENABLE_WARMRESET (1<<4)
+
+#define END_OF_SCRIPT		0x3f
+
+#define R_SEQ_ADD_A2S		PHY_TO_OFF_PM_MASTER(0x55)
+#define R_SEQ_ADD_S2A12		PHY_TO_OFF_PM_MASTER(0x56)
+#define	R_SEQ_ADD_S2A3		PHY_TO_OFF_PM_MASTER(0x57)
+#define	R_SEQ_ADD_WARM		PHY_TO_OFF_PM_MASTER(0x58)
+#define R_MEMORY_ADDRESS	PHY_TO_OFF_PM_MASTER(0x59)
+#define R_MEMORY_DATA		PHY_TO_OFF_PM_MASTER(0x5a)
+
+#define R_PROTECT_KEY		0x0E
+#define KEY_1			0xC0
+#define KEY_2			0x0C
+
+/* resource configuration registers */
+
+#define DEVGROUP_OFFSET		0
+#define TYPE_OFFSET		1
+
+/* Bit positions */
+#define DEVGROUP_SHIFT		5
+#define DEVGROUP_MASK		(7 << DEVGROUP_SHIFT)
+#define TYPE_SHIFT		0
+#define TYPE_MASK		(7 << TYPE_SHIFT)
+#define TYPE2_SHIFT		3
+#define TYPE2_MASK		(3 << TYPE2_SHIFT)
+
+static u8 res_config_addrs[] = {
+	[RES_VAUX1]	= 0x17,
+	[RES_VAUX2]	= 0x1b,
+	[RES_VAUX3]	= 0x1f,
+	[RES_VAUX4]	= 0x23,
+	[RES_VMMC1]	= 0x27,
+	[RES_VMMC2]	= 0x2b,
+	[RES_VPLL1]	= 0x2f,
+	[RES_VPLL2]	= 0x33,
+	[RES_VSIM]	= 0x37,
+	[RES_VDAC]	= 0x3b,
+	[RES_VINTANA1]	= 0x3f,
+	[RES_VINTANA2]	= 0x43,
+	[RES_VINTDIG]	= 0x47,
+	[RES_VIO]	= 0x4b,
+	[RES_VDD1]	= 0x55,
+	[RES_VDD2]	= 0x63,
+	[RES_VUSB_1V5]	= 0x71,
+	[RES_VUSB_1V8]	= 0x74,
+	[RES_VUSB_3V1]	= 0x77,
+	[RES_VUSBCP]	= 0x7a,
+	[RES_REGEN]	= 0x7f,
+	[RES_NRES_PWRON] = 0x82,
+	[RES_CLKEN]	= 0x85,
+	[RES_SYSEN]	= 0x88,
+	[RES_HFCLKOUT]	= 0x8b,
+	[RES_32KCLKOUT]	= 0x8e,
+	[RES_RESET]	= 0x91,
+	[RES_Main_Ref]	= 0x94,
+};
+
+static int __init twl4030_write_script_byte(u8 address, u8 byte)
+{
+	int err;
+
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+				R_MEMORY_ADDRESS);
+	if (err)
+		goto out;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, byte,
+				R_MEMORY_DATA);
+out:
+	return err;
+}
+
+static int __init twl4030_write_script_ins(u8 address, u16 pmb_message,
+					   u8 delay, u8 next)
+{
+	int err;
+
+	address *= 4;
+	err = twl4030_write_script_byte(address++, pmb_message >> 8);
+	if (err)
+		goto out;
+	err = twl4030_write_script_byte(address++, pmb_message & 0xff);
+	if (err)
+		goto out;
+	err = twl4030_write_script_byte(address++, delay);
+	if (err)
+		goto out;
+	err = twl4030_write_script_byte(address++, next);
+out:
+	return err;
+}
+
+static int __init twl4030_write_script(u8 address, struct twl4030_ins *script,
+				       int len)
+{
+	int err;
+
+	for (; len; len--, address++, script++) {
+		if (len == 1) {
+			err = twl4030_write_script_ins(address,
+						script->pmb_message,
+						script->delay,
+						END_OF_SCRIPT);
+			if (err)
+				break;
+		} else {
+			err = twl4030_write_script_ins(address,
+						script->pmb_message,
+						script->delay,
+						address + 1);
+			if (err)
+				break;
+		}
+	}
+	return err;
+}
+
+static int __init twl4030_config_wakeup3_sequence(u8 address)
+{
+	int err;
+	u8 data;
+
+	/* Set SLEEP to ACTIVE SEQ address for P3 */
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+				R_SEQ_ADD_S2A3);
+	if (err)
+		goto out;
+
+	/* P3 LVL_WAKEUP should be on LEVEL */
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
+				R_P3_SW_EVENTS);
+	if (err)
+		goto out;
+	data |= LVL_WAKEUP;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
+				R_P3_SW_EVENTS);
+out:
+	if (err)
+		pr_err("TWL4030 wakeup sequence for P3 config error\n");
+	return err;
+}
+
+static int __init twl4030_config_wakeup12_sequence(u8 address)
+{
+	int err = 0;
+	u8 data;
+
+	/* Set SLEEP to ACTIVE SEQ address for P1 and P2 */
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+				R_SEQ_ADD_S2A12);
+	if (err)
+		goto out;
+
+	/* P1/P2 LVL_WAKEUP should be on LEVEL */
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
+				R_P1_SW_EVENTS);
+	if (err)
+		goto out;
+
+	data |= LVL_WAKEUP;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
+				R_P1_SW_EVENTS);
+	if (err)
+		goto out;
+
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
+				R_P2_SW_EVENTS);
+	if (err)
+		goto out;
+
+	data |= LVL_WAKEUP;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
+				R_P2_SW_EVENTS);
+	if (err)
+		goto out;
+
+	if (machine_is_omap_3430sdp() || machine_is_omap_ldp()) {
+		/* Disabling AC charger effect on sleep-active transitions */
+		err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
+					R_CFG_P1_TRANSITION);
+		if (err)
+			goto out;
+		data &= ~(1<<1);
+		err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data ,
+					R_CFG_P1_TRANSITION);
+		if (err)
+			goto out;
+	}
+
+out:
+	if (err)
+		pr_err("TWL4030 wakeup sequence for P1 and P2" \
+			"config error\n");
+	return err;
+}
+
+static int __init twl4030_config_sleep_sequence(u8 address)
+{
+	int err;
+
+	/* Set ACTIVE to SLEEP SEQ address in T2 memory*/
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+				R_SEQ_ADD_A2S);
+
+	if (err)
+		pr_err("TWL4030 sleep sequence config error\n");
+
+	return err;
+}
+
+static int __init twl4030_config_warmreset_sequence(u8 address)
+{
+	int err;
+	u8 rd_data;
+
+	/* Set WARM RESET SEQ address for P1 */
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+				R_SEQ_ADD_WARM);
+	if (err)
+		goto out;
+
+	/* P1/P2/P3 enable WARMRESET */
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
+				R_P1_SW_EVENTS);
+	if (err)
+		goto out;
+
+	rd_data |= ENABLE_WARMRESET;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
+				R_P1_SW_EVENTS);
+	if (err)
+		goto out;
+
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
+				R_P2_SW_EVENTS);
+	if (err)
+		goto out;
+
+	rd_data |= ENABLE_WARMRESET;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
+				R_P2_SW_EVENTS);
+	if (err)
+		goto out;
+
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
+				R_P3_SW_EVENTS);
+	if (err)
+		goto out;
+
+	rd_data |= ENABLE_WARMRESET;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
+				R_P3_SW_EVENTS);
+out:
+	if (err)
+		pr_err("TWL4030 warmreset seq config error\n");
+	return err;
+}
+
+static int __init twl4030_configure_resource(struct twl4030_resconfig *rconfig)
+{
+	int rconfig_addr;
+	int err;
+	u8 type;
+	u8 grp;
+
+	if (rconfig->resource > TOTAL_RESOURCES) {
+		pr_err("TWL4030 Resource %d does not exist\n",
+			rconfig->resource);
+		return -EINVAL;
+	}
+
+	rconfig_addr = res_config_addrs[rconfig->resource];
+
+	/* Set resource group */
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &grp,
+				rconfig_addr + DEVGROUP_OFFSET);
+	if (err) {
+		pr_err("TWL4030 Resource %d group could not be read\n",
+			rconfig->resource);
+		return err;
+	}
+
+	if (rconfig->devgroup >= 0) {
+		grp &= ~DEVGROUP_MASK;
+		grp |= rconfig->devgroup << DEVGROUP_SHIFT;
+		err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+					grp, rconfig_addr + DEVGROUP_OFFSET);
+		if (err < 0) {
+			pr_err("TWL4030 failed to program devgroup\n");
+			return err;
+		}
+	}
+
+	/* Set resource types */
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &type,
+				rconfig_addr + TYPE_OFFSET);
+	if (err < 0) {
+		pr_err("TWL4030 Resource %d type could not be read\n",
+			rconfig->resource);
+		return err;
+	}
+
+	if (rconfig->type >= 0) {
+		type &= ~TYPE_MASK;
+		type |= rconfig->type << TYPE_SHIFT;
+	}
+
+	if (rconfig->type2 >= 0) {
+		type &= ~TYPE2_MASK;
+		type |= rconfig->type2 << TYPE2_SHIFT;
+	}
+
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+				type, rconfig_addr + TYPE_OFFSET);
+	if (err < 0) {
+		pr_err("TWL4030 failed to program resource type\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int __init load_twl4030_script(struct twl4030_script *tscript,
+	       u8 address)
+{
+	int err;
+
+	/* Make sure the script isn't going beyond last valid address (0x3f) */
+	if ((address + tscript->size) > END_OF_SCRIPT) {
+		pr_err("TWL4030 scripts too big error\n");
+		return -EINVAL;
+	}
+
+	err = twl4030_write_script(address, tscript->script, tscript->size);
+	if (err)
+		goto out;
+
+	if (tscript->flags & TWL4030_WRST_SCRIPT) {
+		err = twl4030_config_warmreset_sequence(address);
+		if (err)
+			goto out;
+	}
+	if (tscript->flags & TWL4030_WAKEUP12_SCRIPT) {
+		err = twl4030_config_wakeup12_sequence(address);
+		if (err)
+			goto out;
+	}
+	if (tscript->flags & TWL4030_WAKEUP3_SCRIPT) {
+		err = twl4030_config_wakeup3_sequence(address);
+		if (err)
+			goto out;
+	}
+	if (tscript->flags & TWL4030_SLEEP_SCRIPT)
+		err = twl4030_config_sleep_sequence(address);
+out:
+	return err;
+}
+
+void __init twl4030_power_init(struct twl4030_power_data *twl4030_scripts)
+{
+	int err = 0;
+	int i;
+	struct twl4030_resconfig *resconfig;
+	u8 address = twl4030_start_script_address;
+
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_1,
+				R_PROTECT_KEY);
+	if (err)
+		goto unlock;
+
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_2,
+				R_PROTECT_KEY);
+	if (err)
+		goto unlock;
+
+	for (i = 0; i < twl4030_scripts->num; i++) {
+		err = load_twl4030_script(twl4030_scripts->scripts[i], address);
+		if (err)
+			goto load;
+		address += twl4030_scripts->scripts[i]->size;
+	}
+
+	resconfig = twl4030_scripts->resource_config;
+	if (resconfig) {
+		while (resconfig->resource) {
+			err = twl4030_configure_resource(resconfig);
+			if (err)
+				goto resource;
+			resconfig++;
+
+		}
+	}
+
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0, R_PROTECT_KEY);
+	if (err)
+		pr_err("TWL4030 Unable to relock registers\n");
+	return;
+
+unlock:
+	if (err)
+		pr_err("TWL4030 Unable to unlock registers\n");
+	return;
+load:
+	if (err)
+		pr_err("TWL4030 failed to load scripts\n");
+	return;
+resource:
+	if (err)
+		pr_err("TWL4030 failed to configure resource\n");
+	return;
+}
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index 3fd21d7cb6bf..2d02dfd7076c 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -223,19 +223,28 @@ int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 
 /* Power bus message definitions */
 
-#define DEV_GRP_NULL		0x0
-#define DEV_GRP_P1		0x1
-#define DEV_GRP_P2		0x2
-#define DEV_GRP_P3		0x4
+/* The TWL4030/5030 splits its power-management resources (the various
+ * regulators, clock and reset lines) into 3 processor groups - P1, P2 and
+ * P3. These groups can then be configured to transition between sleep, wait-on
+ * and active states by sending messages to the power bus.  See Section 5.4.2
+ * Power Resources of TWL4030 TRM
+ */
 
-#define RES_GRP_RES		0x0
-#define RES_GRP_PP		0x1
-#define RES_GRP_RC		0x2
+/* Processor groups */
+#define DEV_GRP_NULL		0x0
+#define DEV_GRP_P1		0x1	/* P1: all OMAP devices */
+#define DEV_GRP_P2		0x2	/* P2: all Modem devices */
+#define DEV_GRP_P3		0x4	/* P3: all peripheral devices */
+
+/* Resource groups */
+#define RES_GRP_RES		0x0	/* Reserved */
+#define RES_GRP_PP		0x1	/* Power providers */
+#define RES_GRP_RC		0x2	/* Reset and control */
 #define RES_GRP_PP_RC		0x3
-#define RES_GRP_PR		0x4
+#define RES_GRP_PR		0x4	/* Power references */
 #define RES_GRP_PP_PR		0x5
 #define RES_GRP_RC_PR		0x6
-#define RES_GRP_ALL		0x7
+#define RES_GRP_ALL		0x7	/* All resource groups */
 
 #define RES_TYPE2_R0		0x0
 
@@ -246,6 +255,41 @@ int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 #define RES_STATE_SLEEP		0x8
 #define RES_STATE_OFF		0x0
 
+/* Power resources */
+
+/* Power providers */
+#define RES_VAUX1               1
+#define RES_VAUX2               2
+#define RES_VAUX3               3
+#define RES_VAUX4               4
+#define RES_VMMC1               5
+#define RES_VMMC2               6
+#define RES_VPLL1               7
+#define RES_VPLL2               8
+#define RES_VSIM                9
+#define RES_VDAC                10
+#define RES_VINTANA1            11
+#define RES_VINTANA2            12
+#define RES_VINTDIG             13
+#define RES_VIO                 14
+#define RES_VDD1                15
+#define RES_VDD2                16
+#define RES_VUSB_1V5            17
+#define RES_VUSB_1V8            18
+#define RES_VUSB_3V1            19
+#define RES_VUSBCP              20
+#define RES_REGEN               21
+/* Reset and control */
+#define RES_NRES_PWRON          22
+#define RES_CLKEN               23
+#define RES_SYSEN               24
+#define RES_HFCLKOUT            25
+#define RES_32KCLKOUT           26
+#define RES_RESET               27
+/* Power Reference */
+#define RES_Main_Ref            28
+
+#define TOTAL_RESOURCES		28
 /*
  * Power Bus Message Format ... these can be sent individually by Linux,
  * but are usually part of downloaded scripts that are run when various
@@ -327,6 +371,36 @@ struct twl4030_usb_data {
 	enum twl4030_usb_mode	usb_mode;
 };
 
+struct twl4030_ins {
+	u16 pmb_message;
+	u8 delay;
+};
+
+struct twl4030_script {
+	struct twl4030_ins *script;
+	unsigned size;
+	u8 flags;
+#define TWL4030_WRST_SCRIPT	(1<<0)
+#define TWL4030_WAKEUP12_SCRIPT	(1<<1)
+#define TWL4030_WAKEUP3_SCRIPT	(1<<2)
+#define TWL4030_SLEEP_SCRIPT	(1<<3)
+};
+
+struct twl4030_resconfig {
+	u8 resource;
+	u8 devgroup;	/* Processor group that Power resource belongs to */
+	u8 type;	/* Power resource addressed, 6 / broadcast message */
+	u8 type2;	/* Power resource addressed, 3 / broadcast message */
+};
+
+struct twl4030_power_data {
+	struct twl4030_script **scripts;
+	unsigned num;
+	struct twl4030_resconfig *resource_config;
+};
+
+extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts);
+
 struct twl4030_platform_data {
 	unsigned				irq_base, irq_end;
 	struct twl4030_bci_platform_data	*bci;
@@ -334,6 +408,7 @@ struct twl4030_platform_data {
 	struct twl4030_madc_platform_data	*madc;
 	struct twl4030_keypad_data		*keypad;
 	struct twl4030_usb_data			*usb;
+	struct twl4030_power_data		*power;
 
 	/* LDO regulators */
 	struct regulator_init_data		*vdac;
@@ -364,7 +439,6 @@ int twl4030_sih_setup(int module);
 #define TWL4030_VAUX3_DEV_GRP		0x1F
 #define TWL4030_VAUX3_DEDICATED		0x22
 
-
 #if defined(CONFIG_TWL4030_BCI_BATTERY) || \
 	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
 	extern int twl4030charger_usb_en(int enable);
-- 
cgit v1.2.3


From 8aba721b23917bc6d374ad42bf80bde5058710e2 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Thu, 27 Aug 2009 20:49:08 +0200
Subject: mfd: Fix ab3100-otp build failure

ab3100.h should include linux/workqueue.h for otp to build properly.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab3100-core.c  | 1 -
 include/linux/mfd/ab3100.h | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
index 1d8ac1a1e304..a848df77514a 100644
--- a/drivers/mfd/ab3100-core.c
+++ b/drivers/mfd/ab3100-core.c
@@ -14,7 +14,6 @@
 #include <linux/platform_device.h>
 #include <linux/device.h>
 #include <linux/interrupt.h>
-#include <linux/workqueue.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
diff --git a/include/linux/mfd/ab3100.h b/include/linux/mfd/ab3100.h
index 56343b8013b5..3ead5cfb638c 100644
--- a/include/linux/mfd/ab3100.h
+++ b/include/linux/mfd/ab3100.h
@@ -6,6 +6,7 @@
  */
 
 #include <linux/device.h>
+#include <linux/workqueue.h>
 
 #ifndef MFD_AB3100_H
 #define MFD_AB3100_H
-- 
cgit v1.2.3


From d619bc143e311a738113dbbe7792bd032403939f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Wed, 9 Sep 2009 11:31:00 +0200
Subject: regulator: AB3100 support

This adds support for the regulators found in the AB3100
Mixed-Signal IC.

It further also defines platform data for the ST-Ericsson
U300 platform and extends the AB3100 MFD driver so that
platform/board data with regulation constraints and an init
function can be passed down all the way from the board to
the regulators.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab3100-core.c  |   4 +
 drivers/regulator/Kconfig  |   9 +
 drivers/regulator/Makefile |   1 +
 drivers/regulator/ab3100.c | 694 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/ab3100.h |  28 ++
 5 files changed, 736 insertions(+)
 create mode 100644 drivers/regulator/ab3100.c

(limited to 'include')

diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
index a848df77514a..c533f86ff5ea 100644
--- a/drivers/mfd/ab3100-core.c
+++ b/drivers/mfd/ab3100-core.c
@@ -837,6 +837,8 @@ static int __init ab3100_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
 	struct ab3100 *ab3100;
+	struct ab3100_platform_data *ab3100_plf_data =
+		client->dev.platform_data;
 	int err;
 	int i;
 
@@ -920,6 +922,8 @@ static int __init ab3100_probe(struct i2c_client *client,
 	for (i = 0; i < ARRAY_SIZE(ab3100_platform_devs); i++) {
 		ab3100_platform_devs[i]->dev.parent =
 			&client->dev;
+		ab3100_platform_devs[i]->dev.platform_data =
+			ab3100_plf_data;
 		platform_set_drvdata(ab3100_platform_devs[i], ab3100);
 	}
 
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index c505714b0a98..2dc42bbf6fe9 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -138,5 +138,14 @@ config REGULATOR_MC13783
 	  Say y here to support the regulators found on the Freescale MC13783
 	  PMIC.
 
+config REGULATOR_AB3100
+	tristate "ST-Ericsson AB3100 Regulator functions"
+	depends on AB3100_CORE
+	default y if AB3100_CORE
+	help
+	 These regulators correspond to functionality in the
+	 AB3100 analog baseband dealing with power regulators
+	 for the system.
+
 endif
 
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 5034830a395e..768b3316d6eb 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -21,5 +21,6 @@ obj-$(CONFIG_REGULATOR_DA903X)	+= da903x.o
 obj-$(CONFIG_REGULATOR_PCF50633) += pcf50633-regulator.o
 obj-$(CONFIG_REGULATOR_PCAP) += pcap-regulator.o
 obj-$(CONFIG_REGULATOR_MC13783) += mc13783.o
+obj-$(CONFIG_REGULATOR_AB3100) += ab3100.o
 
 ccflags-$(CONFIG_REGULATOR_DEBUG) += -DDEBUG
diff --git a/drivers/regulator/ab3100.c b/drivers/regulator/ab3100.c
new file mode 100644
index 000000000000..156979d1f41e
--- /dev/null
+++ b/drivers/regulator/ab3100.c
@@ -0,0 +1,694 @@
+/*
+ * drivers/regulator/ab3100.c
+ *
+ * Copyright (C) 2008-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Low-level control of the AB3100 IC Low Dropout (LDO)
+ * regulators, external regulator and buck converter
+ * Author: Mattias Wallin <mattias.wallin@stericsson.com>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/mfd/ab3100.h>
+
+/* LDO registers and some handy masking definitions for AB3100 */
+#define AB3100_LDO_A		0x40
+#define AB3100_LDO_C		0x41
+#define AB3100_LDO_D		0x42
+#define AB3100_LDO_E		0x43
+#define AB3100_LDO_E_SLEEP	0x44
+#define AB3100_LDO_F		0x45
+#define AB3100_LDO_G		0x46
+#define AB3100_LDO_H		0x47
+#define AB3100_LDO_H_SLEEP_MODE	0
+#define AB3100_LDO_H_SLEEP_EN	2
+#define AB3100_LDO_ON		4
+#define AB3100_LDO_H_VSEL_AC	5
+#define AB3100_LDO_K		0x48
+#define AB3100_LDO_EXT		0x49
+#define AB3100_BUCK		0x4A
+#define AB3100_BUCK_SLEEP	0x4B
+#define AB3100_REG_ON_MASK	0x10
+
+/**
+ * struct ab3100_regulator
+ * A struct passed around the individual regulator functions
+ * @platform_device: platform device holding this regulator
+ * @ab3100: handle to the AB3100 parent chip
+ * @plfdata: AB3100 platform data passed in at probe time
+ * @regreg: regulator register number in the AB3100
+ * @fixed_voltage: a fixed voltage for this regulator, if this
+ *          0 the voltages array is used instead.
+ * @typ_voltages: an array of available typical voltages for
+ *          this regulator
+ * @voltages_len: length of the array of available voltages
+ */
+struct ab3100_regulator {
+	struct regulator_dev *rdev;
+	struct ab3100 *ab3100;
+	struct ab3100_platform_data *plfdata;
+	u8 regreg;
+	int fixed_voltage;
+	int const *typ_voltages;
+	u8 voltages_len;
+};
+
+/* The order in which registers are initialized */
+static const u8 ab3100_reg_init_order[AB3100_NUM_REGULATORS+2] = {
+	AB3100_LDO_A,
+	AB3100_LDO_C,
+	AB3100_LDO_E,
+	AB3100_LDO_E_SLEEP,
+	AB3100_LDO_F,
+	AB3100_LDO_G,
+	AB3100_LDO_H,
+	AB3100_LDO_K,
+	AB3100_LDO_EXT,
+	AB3100_BUCK,
+	AB3100_BUCK_SLEEP,
+	AB3100_LDO_D,
+};
+
+/* Preset (hardware defined) voltages for these regulators */
+#define LDO_A_VOLTAGE 2750000
+#define LDO_C_VOLTAGE 2650000
+#define LDO_D_VOLTAGE 2650000
+
+static const int const ldo_e_buck_typ_voltages[] = {
+	1800000,
+	1400000,
+	1300000,
+	1200000,
+	1100000,
+	1050000,
+	900000,
+};
+
+static const int const ldo_f_typ_voltages[] = {
+	1800000,
+	1400000,
+	1300000,
+	1200000,
+	1100000,
+	1050000,
+	2500000,
+	2650000,
+};
+
+static const int const ldo_g_typ_voltages[] = {
+	2850000,
+	2750000,
+	1800000,
+	1500000,
+};
+
+static const int const ldo_h_typ_voltages[] = {
+	2750000,
+	1800000,
+	1500000,
+	1200000,
+};
+
+static const int const ldo_k_typ_voltages[] = {
+	2750000,
+	1800000,
+};
+
+
+/* The regulator devices */
+static struct ab3100_regulator
+ab3100_regulators[AB3100_NUM_REGULATORS] = {
+	{
+		.regreg = AB3100_LDO_A,
+		.fixed_voltage = LDO_A_VOLTAGE,
+	},
+	{
+		.regreg = AB3100_LDO_C,
+		.fixed_voltage = LDO_C_VOLTAGE,
+	},
+	{
+		.regreg = AB3100_LDO_D,
+		.fixed_voltage = LDO_D_VOLTAGE,
+	},
+	{
+		.regreg = AB3100_LDO_E,
+		.typ_voltages = ldo_e_buck_typ_voltages,
+		.voltages_len = ARRAY_SIZE(ldo_e_buck_typ_voltages),
+	},
+	{
+		.regreg = AB3100_LDO_F,
+		.typ_voltages = ldo_f_typ_voltages,
+		.voltages_len = ARRAY_SIZE(ldo_f_typ_voltages),
+	},
+	{
+		.regreg = AB3100_LDO_G,
+		.typ_voltages = ldo_g_typ_voltages,
+		.voltages_len = ARRAY_SIZE(ldo_g_typ_voltages),
+	},
+	{
+		.regreg = AB3100_LDO_H,
+		.typ_voltages = ldo_h_typ_voltages,
+		.voltages_len = ARRAY_SIZE(ldo_h_typ_voltages),
+	},
+	{
+		.regreg = AB3100_LDO_K,
+		.typ_voltages = ldo_k_typ_voltages,
+		.voltages_len = ARRAY_SIZE(ldo_k_typ_voltages),
+	},
+	{
+		.regreg = AB3100_LDO_EXT,
+		/* No voltages for the external regulator */
+	},
+	{
+		.regreg = AB3100_BUCK,
+		.typ_voltages = ldo_e_buck_typ_voltages,
+		.voltages_len = ARRAY_SIZE(ldo_e_buck_typ_voltages),
+	},
+};
+
+/*
+ * General functions for enable, disable and is_enabled used for
+ * LDO: A,C,E,F,G,H,K,EXT and BUCK
+ */
+static int ab3100_enable_regulator(struct regulator_dev *reg)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	int err;
+	u8 regval;
+
+	err = ab3100_get_register_interruptible(abreg->ab3100, abreg->regreg,
+						&regval);
+	if (err) {
+		dev_warn(&reg->dev, "failed to get regid %d value\n",
+			 abreg->regreg);
+		return err;
+	}
+
+	/* The regulator is already on, no reason to go further */
+	if (regval & AB3100_REG_ON_MASK)
+		return 0;
+
+	regval |= AB3100_REG_ON_MASK;
+
+	err = ab3100_set_register_interruptible(abreg->ab3100, abreg->regreg,
+						regval);
+	if (err) {
+		dev_warn(&reg->dev, "failed to set regid %d value\n",
+			 abreg->regreg);
+		return err;
+	}
+
+	/* Per-regulator power on delay from spec */
+	switch (abreg->regreg) {
+	case AB3100_LDO_A: /* Fallthrough */
+	case AB3100_LDO_C: /* Fallthrough */
+	case AB3100_LDO_D: /* Fallthrough */
+	case AB3100_LDO_E: /* Fallthrough */
+	case AB3100_LDO_H: /* Fallthrough */
+	case AB3100_LDO_K:
+		udelay(200);
+		break;
+	case AB3100_LDO_F:
+		udelay(600);
+		break;
+	case AB3100_LDO_G:
+		udelay(400);
+		break;
+	case AB3100_BUCK:
+		mdelay(1);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int ab3100_disable_regulator(struct regulator_dev *reg)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	int err;
+	u8 regval;
+
+	/*
+	 * LDO D is a special regulator. When it is disabled, the entire
+	 * system is shut down. So this is handled specially.
+	 */
+	if (abreg->regreg == AB3100_LDO_D) {
+		int i;
+
+		dev_info(&reg->dev, "disabling LDO D - shut down system\n");
+		/*
+		 * Set regulators to default values, ignore any errors,
+		 * we're going DOWN
+		 */
+		for (i = 0; i < ARRAY_SIZE(ab3100_reg_init_order); i++) {
+			(void) ab3100_set_register_interruptible(abreg->ab3100,
+					ab3100_reg_init_order[i],
+					abreg->plfdata->reg_initvals[i]);
+		}
+
+		/* Setting LDO D to 0x00 cuts the power to the SoC */
+		return ab3100_set_register_interruptible(abreg->ab3100,
+							 AB3100_LDO_D, 0x00U);
+
+	}
+
+	/*
+	 * All other regulators are handled here
+	 */
+	err = ab3100_get_register_interruptible(abreg->ab3100, abreg->regreg,
+						&regval);
+	if (err) {
+		dev_err(&reg->dev, "unable to get register 0x%x\n",
+			abreg->regreg);
+		return err;
+	}
+	regval &= ~AB3100_REG_ON_MASK;
+	return ab3100_set_register_interruptible(abreg->ab3100, abreg->regreg,
+						 regval);
+}
+
+static int ab3100_is_enabled_regulator(struct regulator_dev *reg)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	u8 regval;
+	int err;
+
+	err = ab3100_get_register_interruptible(abreg->ab3100, abreg->regreg,
+						&regval);
+	if (err) {
+		dev_err(&reg->dev, "unable to get register 0x%x\n",
+			abreg->regreg);
+		return err;
+	}
+
+	return regval & AB3100_REG_ON_MASK;
+}
+
+static int ab3100_list_voltage_regulator(struct regulator_dev *reg,
+					 unsigned selector)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+
+	if (selector > abreg->voltages_len)
+		return -EINVAL;
+	return abreg->typ_voltages[selector];
+}
+
+static int ab3100_get_voltage_regulator(struct regulator_dev *reg)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	u8 regval;
+	int err;
+
+	/* Return the voltage for fixed regulators immediately */
+	if (abreg->fixed_voltage)
+		return abreg->fixed_voltage;
+
+	/*
+	 * For variable types, read out setting and index into
+	 * supplied voltage list.
+	 */
+	err = ab3100_get_register_interruptible(abreg->ab3100,
+						abreg->regreg, &regval);
+	if (err) {
+		dev_warn(&reg->dev,
+			 "failed to get regulator value in register %02x\n",
+			 abreg->regreg);
+		return err;
+	}
+
+	/* The 3 highest bits index voltages */
+	regval &= 0xE0;
+	regval >>= 5;
+
+	if (regval > abreg->voltages_len) {
+		dev_err(&reg->dev,
+			"regulator register %02x contains an illegal voltage setting\n",
+			abreg->regreg);
+		return -EINVAL;
+	}
+
+	return abreg->typ_voltages[regval];
+}
+
+static int ab3100_get_best_voltage_index(struct regulator_dev *reg,
+				   int min_uV, int max_uV)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	int i;
+	int bestmatch;
+	int bestindex;
+
+	/*
+	 * Locate the minimum voltage fitting the criteria on
+	 * this regulator. The switchable voltages are not
+	 * in strict falling order so we need to check them
+	 * all for the best match.
+	 */
+	bestmatch = INT_MAX;
+	bestindex = -1;
+	for (i = 0; i < abreg->voltages_len; i++) {
+		if (abreg->typ_voltages[i] <= max_uV &&
+		    abreg->typ_voltages[i] >= min_uV &&
+		    abreg->typ_voltages[i] < bestmatch) {
+			bestmatch = abreg->typ_voltages[i];
+			bestindex = i;
+		}
+	}
+
+	if (bestindex < 0) {
+		dev_warn(&reg->dev, "requested %d<=x<=%d uV, out of range!\n",
+			 min_uV, max_uV);
+		return -EINVAL;
+	}
+	return bestindex;
+}
+
+static int ab3100_set_voltage_regulator(struct regulator_dev *reg,
+					int min_uV, int max_uV)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	u8 regval;
+	int err;
+	int bestindex;
+
+	bestindex = ab3100_get_best_voltage_index(reg, min_uV, max_uV);
+	if (bestindex < 0)
+		return bestindex;
+
+	err = ab3100_get_register_interruptible(abreg->ab3100,
+						abreg->regreg, &regval);
+	if (err) {
+		dev_warn(&reg->dev,
+			 "failed to get regulator register %02x\n",
+			 abreg->regreg);
+		return err;
+	}
+
+	/* The highest three bits control the variable regulators */
+	regval &= ~0xE0;
+	regval |= (bestindex << 5);
+
+	err = ab3100_set_register_interruptible(abreg->ab3100,
+						abreg->regreg, regval);
+	if (err)
+		dev_warn(&reg->dev, "failed to set regulator register %02x\n",
+			abreg->regreg);
+
+	return err;
+}
+
+static int ab3100_set_suspend_voltage_regulator(struct regulator_dev *reg,
+						int uV)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	u8 regval;
+	int err;
+	int bestindex;
+	u8 targetreg;
+
+	if (abreg->regreg == AB3100_LDO_E)
+		targetreg = AB3100_LDO_E_SLEEP;
+	else if (abreg->regreg == AB3100_BUCK)
+		targetreg = AB3100_BUCK_SLEEP;
+	else
+		return -EINVAL;
+
+	/* LDO E and BUCK have special suspend voltages you can set */
+	bestindex = ab3100_get_best_voltage_index(reg, uV, uV);
+
+	err = ab3100_get_register_interruptible(abreg->ab3100,
+						targetreg, &regval);
+	if (err) {
+		dev_warn(&reg->dev,
+			 "failed to get regulator register %02x\n",
+			 targetreg);
+		return err;
+	}
+
+	/* The highest three bits control the variable regulators */
+	regval &= ~0xE0;
+	regval |= (bestindex << 5);
+
+	err = ab3100_set_register_interruptible(abreg->ab3100,
+						targetreg, regval);
+	if (err)
+		dev_warn(&reg->dev, "failed to set regulator register %02x\n",
+			abreg->regreg);
+
+	return err;
+}
+
+/*
+ * The external regulator can just define a fixed voltage.
+ */
+static int ab3100_get_voltage_regulator_external(struct regulator_dev *reg)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+
+	return abreg->plfdata->external_voltage;
+}
+
+static struct regulator_ops regulator_ops_fixed = {
+	.enable      = ab3100_enable_regulator,
+	.disable     = ab3100_disable_regulator,
+	.is_enabled  = ab3100_is_enabled_regulator,
+	.get_voltage = ab3100_get_voltage_regulator,
+};
+
+static struct regulator_ops regulator_ops_variable = {
+	.enable      = ab3100_enable_regulator,
+	.disable     = ab3100_disable_regulator,
+	.is_enabled  = ab3100_is_enabled_regulator,
+	.get_voltage = ab3100_get_voltage_regulator,
+	.set_voltage = ab3100_set_voltage_regulator,
+	.list_voltage = ab3100_list_voltage_regulator,
+};
+
+static struct regulator_ops regulator_ops_variable_sleepable = {
+	.enable      = ab3100_enable_regulator,
+	.disable     = ab3100_disable_regulator,
+	.is_enabled  = ab3100_is_enabled_regulator,
+	.get_voltage = ab3100_get_voltage_regulator,
+	.set_voltage = ab3100_set_voltage_regulator,
+	.set_suspend_voltage = ab3100_set_suspend_voltage_regulator,
+	.list_voltage = ab3100_list_voltage_regulator,
+};
+
+/*
+ * LDO EXT is an external regulator so it is really
+ * not possible to set any voltage locally here, AB3100
+ * is an on/off switch plain an simple. The external
+ * voltage is defined in the board set-up if any.
+ */
+static struct regulator_ops regulator_ops_external = {
+	.enable      = ab3100_enable_regulator,
+	.disable     = ab3100_disable_regulator,
+	.is_enabled  = ab3100_is_enabled_regulator,
+	.get_voltage = ab3100_get_voltage_regulator_external,
+};
+
+static struct regulator_desc
+ab3100_regulator_desc[AB3100_NUM_REGULATORS] = {
+	{
+		.name = "LDO_A",
+		.id   = AB3100_LDO_A,
+		.ops  = &regulator_ops_fixed,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_C",
+		.id   = AB3100_LDO_C,
+		.ops  = &regulator_ops_fixed,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_D",
+		.id   = AB3100_LDO_D,
+		.ops  = &regulator_ops_fixed,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_E",
+		.id   = AB3100_LDO_E,
+		.ops  = &regulator_ops_variable_sleepable,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_F",
+		.id   = AB3100_LDO_F,
+		.ops  = &regulator_ops_variable,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_G",
+		.id   = AB3100_LDO_G,
+		.ops  = &regulator_ops_variable,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_H",
+		.id   = AB3100_LDO_H,
+		.ops  = &regulator_ops_variable,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_K",
+		.id   = AB3100_LDO_K,
+		.ops  = &regulator_ops_variable,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_EXT",
+		.id   = AB3100_LDO_EXT,
+		.ops  = &regulator_ops_external,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "BUCK",
+		.id   = AB3100_BUCK,
+		.ops  = &regulator_ops_variable_sleepable,
+		.type = REGULATOR_VOLTAGE,
+	},
+};
+
+/*
+ * NOTE: the following functions are regulators pluralis - it is the
+ * binding to the AB3100 core driver and the parent platform device
+ * for all the different regulators.
+ */
+
+static int __init ab3100_regulators_probe(struct platform_device *pdev)
+{
+	struct ab3100_platform_data *plfdata = pdev->dev.platform_data;
+	struct ab3100 *ab3100 = platform_get_drvdata(pdev);
+	int err = 0;
+	u8 data;
+	int i;
+
+	/* Check chip state */
+	err = ab3100_get_register_interruptible(ab3100,
+						AB3100_LDO_D, &data);
+	if (err) {
+		dev_err(&pdev->dev, "could not read initial status of LDO_D\n");
+		return err;
+	}
+	if (data & 0x10)
+		dev_notice(&pdev->dev,
+			   "chip is already in active mode (Warm start)\n");
+	else
+		dev_notice(&pdev->dev,
+			   "chip is in inactive mode (Cold start)\n");
+
+	/* Set up regulators */
+	for (i = 0; i < ARRAY_SIZE(ab3100_reg_init_order); i++) {
+		err = ab3100_set_register_interruptible(ab3100,
+					ab3100_reg_init_order[i],
+					plfdata->reg_initvals[i]);
+		if (err) {
+			dev_err(&pdev->dev, "regulator initialization failed with error %d\n",
+				err);
+			return err;
+		}
+	}
+
+	if (err) {
+		dev_err(&pdev->dev,
+			"LDO D regulator initialization failed with error %d\n",
+			err);
+		return err;
+	}
+
+	/* Register the regulators */
+	for (i = 0; i < AB3100_NUM_REGULATORS; i++) {
+		struct ab3100_regulator *reg = &ab3100_regulators[i];
+		struct regulator_dev *rdev;
+
+		/*
+		 * Initialize per-regulator struct.
+		 * Inherit platform data, this comes down from the
+		 * i2c boarddata, from the machine. So if you want to
+		 * see what it looks like for a certain machine, go
+		 * into the machine I2C setup.
+		 */
+		reg->ab3100 = ab3100;
+		reg->plfdata = plfdata;
+
+		/*
+		 * Register the regulator, pass around
+		 * the ab3100_regulator struct
+		 */
+		rdev = regulator_register(&ab3100_regulator_desc[i],
+					  &pdev->dev,
+					  &plfdata->reg_constraints[i],
+					  reg);
+
+		if (IS_ERR(rdev)) {
+			err = PTR_ERR(rdev);
+			dev_err(&pdev->dev,
+				"%s: failed to register regulator %s err %d\n",
+				__func__, ab3100_regulator_desc[i].name,
+				err);
+			i--;
+			/* remove the already registered regulators */
+			while (i > 0) {
+				regulator_unregister(ab3100_regulators[i].rdev);
+				i--;
+			}
+			return err;
+		}
+
+		/* Then set a pointer back to the registered regulator */
+		reg->rdev = rdev;
+	}
+
+	return 0;
+}
+
+static int __exit ab3100_regulators_remove(struct platform_device *pdev)
+{
+	int i;
+
+	for (i = 0; i < AB3100_NUM_REGULATORS; i++) {
+		struct ab3100_regulator *reg = &ab3100_regulators[i];
+
+		regulator_unregister(reg->rdev);
+	}
+	return 0;
+}
+
+static struct platform_driver ab3100_regulators_driver = {
+	.driver = {
+		.name  = "ab3100-regulators",
+		.owner = THIS_MODULE,
+	},
+	.probe = ab3100_regulators_probe,
+	.remove = __exit_p(ab3100_regulators_remove),
+};
+
+static __init int ab3100_regulators_init(void)
+{
+	return platform_driver_register(&ab3100_regulators_driver);
+}
+
+static __exit void ab3100_regulators_exit(void)
+{
+	platform_driver_register(&ab3100_regulators_driver);
+}
+
+subsys_initcall(ab3100_regulators_init);
+module_exit(ab3100_regulators_exit);
+
+MODULE_AUTHOR("Mattias Wallin <mattias.wallin@stericsson.com>");
+MODULE_DESCRIPTION("AB3100 Regulator driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:ab3100-regulators");
diff --git a/include/linux/mfd/ab3100.h b/include/linux/mfd/ab3100.h
index 3ead5cfb638c..e9aa4c9d749d 100644
--- a/include/linux/mfd/ab3100.h
+++ b/include/linux/mfd/ab3100.h
@@ -7,6 +7,7 @@
 
 #include <linux/device.h>
 #include <linux/workqueue.h>
+#include <linux/regulator/machine.h>
 
 #ifndef MFD_AB3100_H
 #define MFD_AB3100_H
@@ -57,6 +58,14 @@
 #define AB3100_STR_BATT_REMOVAL				(0x40)
 #define AB3100_STR_VBUS					(0x80)
 
+/*
+ * AB3100 contains 8 regulators, one external regulator controller
+ * and a buck converter, further the LDO E and buck converter can
+ * have separate settings if they are in sleep mode, this is
+ * modeled as a separate regulator.
+ */
+#define AB3100_NUM_REGULATORS				10
+
 /**
  * struct ab3100
  * @access_mutex: lock out concurrent accesses to the AB3100 registers
@@ -87,6 +96,25 @@ struct ab3100 {
 	bool startup_events_read;
 };
 
+/**
+ * struct ab3100_platform_data
+ * Data supplied to initialize board connections to the AB3100
+ * @reg_constraints: regulator constraints for target board
+ *     the order of these constraints are: LDO A, C, D, E,
+ *     F, G, H, K, EXT and BUCK.
+ * @reg_initvals: initial values for the regulator registers
+ *     plus two sleep settings for LDO E and the BUCK converter.
+ *     exactly AB3100_NUM_REGULATORS+2 values must be sent in.
+ *     Order: LDO A, C, E, E sleep, F, G, H, K, EXT, BUCK,
+ *     BUCK sleep, LDO D. (LDO D need to be initialized last.)
+ * @external_voltage: voltage level of the external regulator.
+ */
+struct ab3100_platform_data {
+	struct regulator_init_data reg_constraints[AB3100_NUM_REGULATORS];
+	u8 reg_initvals[AB3100_NUM_REGULATORS+2];
+	int external_voltage;
+};
+
 int ab3100_set_register_interruptible(struct ab3100 *ab3100, u8 reg, u8 regval);
 int ab3100_get_register_interruptible(struct ab3100 *ab3100, u8 reg, u8 *regval);
 int ab3100_get_register_page_interruptible(struct ab3100 *ab3100,
-- 
cgit v1.2.3


From ad4b78bbcbab66998b05d422ac6106b645796e54 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 16 Sep 2009 12:31:31 +0200
Subject: sched: Add new wakeup preemption mode: WAKEUP_RUNNING

Create a new wakeup preemption mode, preempt towards tasks that run
shorter on avg. It sets next buddy to be sure we actually run the task
we preempted for.

Test results:

 root@twins:~# while :; do :; done &
 [1] 6537
 root@twins:~# while :; do :; done &
 [2] 6538
 root@twins:~# while :; do :; done &
 [3] 6539
 root@twins:~# while :; do :; done &
 [4] 6540

 root@twins:/home/peter# ./latt -c4 sleep 4
 Entries: 48 (clients=4)

 Averages:
 ------------------------------
        Max          4750 usec
        Avg           497 usec
        Stdev         737 usec

 root@twins:/home/peter# echo WAKEUP_RUNNING > /debug/sched_features

 root@twins:/home/peter# ./latt -c4 sleep 4
 Entries: 48 (clients=4)

 Averages:
 ------------------------------
        Max            14 usec
        Avg             5 usec
        Stdev           3 usec

Disabled by default - needs more testing.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <new-submission>
---
 include/linux/sched.h   |  2 ++
 kernel/sched.c          | 17 ++++++++++-------
 kernel/sched_debug.c    |  1 +
 kernel/sched_fair.c     | 14 +++++++++++---
 kernel/sched_features.h |  5 +++++
 5 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4a39bb2b4a4..8af3d249170e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1113,6 +1113,8 @@ struct sched_entity {
 	u64			start_runtime;
 	u64			avg_wakeup;
 
+	u64			avg_running;
+
 #ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
 	u64			wait_max;
diff --git a/kernel/sched.c b/kernel/sched.c
index 969dfaef2465..3bb4ea2ee6f0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2458,6 +2458,7 @@ static void __sched_fork(struct task_struct *p)
 	p->se.avg_overlap		= 0;
 	p->se.start_runtime		= 0;
 	p->se.avg_wakeup		= sysctl_sched_wakeup_granularity;
+	p->se.avg_running		= 0;
 
 #ifdef CONFIG_SCHEDSTATS
 	p->se.wait_start			= 0;
@@ -5310,14 +5311,13 @@ static inline void schedule_debug(struct task_struct *prev)
 #endif
 }
 
-static void put_prev_task(struct rq *rq, struct task_struct *prev)
+static void put_prev_task(struct rq *rq, struct task_struct *p)
 {
-	if (prev->state == TASK_RUNNING) {
-		u64 runtime = prev->se.sum_exec_runtime;
+	u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime;
 
-		runtime -= prev->se.prev_sum_exec_runtime;
-		runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
+	update_avg(&p->se.avg_running, runtime);
 
+	if (p->state == TASK_RUNNING) {
 		/*
 		 * In order to avoid avg_overlap growing stale when we are
 		 * indeed overlapping and hence not getting put to sleep, grow
@@ -5327,9 +5327,12 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev)
 		 * correlates to the amount of cache footprint a task can
 		 * build up.
 		 */
-		update_avg(&prev->se.avg_overlap, runtime);
+		runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
+		update_avg(&p->se.avg_overlap, runtime);
+	} else {
+		update_avg(&p->se.avg_running, 0);
 	}
-	prev->sched_class->put_prev_task(rq, prev);
+	p->sched_class->put_prev_task(rq, p);
 }
 
 /*
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 5ddbd0891267..efb84409bc43 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -395,6 +395,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.sum_exec_runtime);
 	PN(se.avg_overlap);
 	PN(se.avg_wakeup);
+	PN(se.avg_running);
 
 	nr_switches = p->nvcsw + p->nivcsw;
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c741cd9d38de..3e6f78c66876 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1605,9 +1605,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 		return;
 	}
 
-	if (!sched_feat(WAKEUP_PREEMPT))
-		return;
-
 	if ((sched_feat(WAKEUP_SYNC) && sync) ||
 	    (sched_feat(WAKEUP_OVERLAP) &&
 	     (se->avg_overlap < sysctl_sched_migration_cost &&
@@ -1616,6 +1613,17 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 		return;
 	}
 
+	if (sched_feat(WAKEUP_RUNNING)) {
+		if (pse->avg_running < se->avg_running) {
+			set_next_buddy(pse);
+			resched_task(curr);
+			return;
+		}
+	}
+
+	if (!sched_feat(WAKEUP_PREEMPT))
+		return;
+
 	find_matching_se(&se, &pse);
 
 	BUG_ON(!pse);
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index d5059fd761d9..0d94083582c7 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -53,6 +53,11 @@ SCHED_FEAT(WAKEUP_SYNC, 0)
  */
 SCHED_FEAT(WAKEUP_OVERLAP, 0)
 
+/*
+ * Wakeup preemption towards tasks that run short
+ */
+SCHED_FEAT(WAKEUP_RUNNING, 0)
+
 /*
  * Use the SYNC wakeup hint, pipes and the likes use this to indicate
  * the remote end is likely to consume the data we just wrote, and
-- 
cgit v1.2.3


From c4c259bcc27c4242b012106afdba183622b1735f Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 15 Sep 2009 16:27:45 +0200
Subject: HID: consolidate connect and disconnect into core code

HID core registers input, hidraw and hiddev devices, but leaves
unregistering it up to the individual driver, which is not really nice.
Let's move all the logic to the core.

Reported-by: Marcel Holtmann <marcel@holtmann.org>
Reported-by: Brian Rogers <brian@xyzw.org>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c        | 11 +++++++++++
 drivers/hid/usbhid/hid-core.c | 16 +++++-----------
 include/linux/hid.h           |  3 +++
 net/bluetooth/hidp/core.c     |  7 -------
 4 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index ca9bb26c2076..be34d32906bd 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1237,6 +1237,17 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
 }
 EXPORT_SYMBOL_GPL(hid_connect);
 
+void hid_disconnect(struct hid_device *hdev)
+{
+	if (hdev->claimed & HID_CLAIMED_INPUT)
+		hidinput_disconnect(hdev);
+	if (hdev->claimed & HID_CLAIMED_HIDDEV)
+		hdev->hiddev_disconnect(hdev);
+	if (hdev->claimed & HID_CLAIMED_HIDRAW)
+		hidraw_disconnect(hdev);
+}
+EXPORT_SYMBOL_GPL(hid_disconnect);
+
 /* a list of devices for which there is a specialized driver on HID bus */
 static const struct hid_device_id hid_blacklist[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) },
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 1b0e07a67d6d..03bd703255a3 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -1041,13 +1041,6 @@ static void usbhid_stop(struct hid_device *hid)
 
 	hid_cancel_delayed_stuff(usbhid);
 
-	if (hid->claimed & HID_CLAIMED_INPUT)
-		hidinput_disconnect(hid);
-	if (hid->claimed & HID_CLAIMED_HIDDEV)
-		hiddev_disconnect(hid);
-	if (hid->claimed & HID_CLAIMED_HIDRAW)
-		hidraw_disconnect(hid);
-
 	hid->claimed = 0;
 
 	usb_free_urb(usbhid->urbin);
@@ -1085,7 +1078,7 @@ static struct hid_ll_driver usb_hid_driver = {
 	.hidinput_input_event = usb_hidinput_input_event,
 };
 
-static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id)
+static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *id)
 {
 	struct usb_host_interface *interface = intf->cur_altsetting;
 	struct usb_device *dev = interface_to_usbdev(intf);
@@ -1117,6 +1110,7 @@ static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	hid->ff_init = hid_pidff_init;
 #ifdef CONFIG_USB_HIDDEV
 	hid->hiddev_connect = hiddev_connect;
+	hid->hiddev_disconnect = hiddev_disconnect;
 	hid->hiddev_hid_event = hiddev_hid_event;
 	hid->hiddev_report_event = hiddev_report_event;
 #endif
@@ -1177,7 +1171,7 @@ err:
 	return ret;
 }
 
-static void hid_disconnect(struct usb_interface *intf)
+static void usbhid_disconnect(struct usb_interface *intf)
 {
 	struct hid_device *hid = usb_get_intfdata(intf);
 	struct usbhid_device *usbhid;
@@ -1359,8 +1353,8 @@ MODULE_DEVICE_TABLE (usb, hid_usb_ids);
 
 static struct usb_driver hid_driver = {
 	.name =		"usbhid",
-	.probe =	hid_probe,
-	.disconnect =	hid_disconnect,
+	.probe =	usbhid_probe,
+	.disconnect =	usbhid_disconnect,
 #ifdef CONFIG_PM
 	.suspend =	hid_suspend,
 	.resume =	hid_resume,
diff --git a/include/linux/hid.h b/include/linux/hid.h
index a0ebdace7baa..10f628416740 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -494,6 +494,7 @@ struct hid_device {							/* device report descriptor */
 
 	/* hiddev event handler */
 	int (*hiddev_connect)(struct hid_device *, unsigned int);
+	void (*hiddev_disconnect)(struct hid_device *);
 	void (*hiddev_hid_event) (struct hid_device *, struct hid_field *field,
 				  struct hid_usage *, __s32);
 	void (*hiddev_report_event) (struct hid_device *, struct hid_report *);
@@ -691,6 +692,7 @@ struct hid_device *hid_allocate_device(void);
 int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
 int hid_check_keys_pressed(struct hid_device *hid);
 int hid_connect(struct hid_device *hid, unsigned int connect_mask);
+void hid_disconnect(struct hid_device *hid);
 
 /**
  * hid_map_usage - map usage input bits
@@ -800,6 +802,7 @@ static inline int __must_check hid_hw_start(struct hid_device *hdev,
  */
 static inline void hid_hw_stop(struct hid_device *hdev)
 {
+	hid_disconnect(hdev);
 	hdev->ll_driver->stop(hdev);
 }
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 09bedeb5579c..49d8495d69be 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -577,11 +577,6 @@ static int hidp_session(void *arg)
 	}
 
 	if (session->hid) {
-		if (session->hid->claimed & HID_CLAIMED_INPUT)
-			hidinput_disconnect(session->hid);
-		if (session->hid->claimed & HID_CLAIMED_HIDRAW)
-			hidraw_disconnect(session->hid);
-
 		hid_destroy_device(session->hid);
 		session->hid = NULL;
 	}
@@ -747,8 +742,6 @@ static void hidp_stop(struct hid_device *hid)
 	skb_queue_purge(&session->ctrl_transmit);
 	skb_queue_purge(&session->intr_transmit);
 
-	if (hid->claimed & HID_CLAIMED_INPUT)
-		hidinput_disconnect(hid);
 	hid->claimed = 0;
 }
 
-- 
cgit v1.2.3


From 37bce07077b0c335d8747f1ddb27ad585434a47e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 16 Sep 2009 19:07:32 +0100
Subject: mfd: Convert WM8350 to use request_threaded_irq()

Instead of hand rolling our own variant.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8350-core.c       | 24 ++++++------------------
 include/linux/mfd/wm8350/core.h |  1 -
 2 files changed, 6 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 9d662a576a41..ba27c9dc1ad3 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -353,15 +353,15 @@ static void wm8350_irq_call_handler(struct wm8350 *wm8350, int irq)
 }
 
 /*
- * wm8350_irq_worker actually handles the interrupts.  Since all
+ * This is a threaded IRQ handler so can access I2C/SPI.  Since all
  * interrupts are clear on read the IRQ line will be reasserted and
  * the physical IRQ will be handled again if another interrupt is
  * asserted while we run - in the normal course of events this is a
  * rare occurrence so we save I2C/SPI reads.
  */
-static void wm8350_irq_worker(struct work_struct *work)
+static irqreturn_t wm8350_irq(int irq, void *data)
 {
-	struct wm8350 *wm8350 = container_of(work, struct wm8350, irq_work);
+	struct wm8350 *wm8350 = data;
 	u16 level_one, status1, status2, comp;
 
 	/* TODO: Use block reads to improve performance? */
@@ -552,16 +552,6 @@ static void wm8350_irq_worker(struct work_struct *work)
 		}
 	}
 
-	enable_irq(wm8350->chip_irq);
-}
-
-static irqreturn_t wm8350_irq(int irq, void *data)
-{
-	struct wm8350 *wm8350 = data;
-
-	disable_irq_nosync(irq);
-	schedule_work(&wm8350->irq_work);
-
 	return IRQ_HANDLED;
 }
 
@@ -1428,9 +1418,8 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 
 	mutex_init(&wm8350->auxadc_mutex);
 	mutex_init(&wm8350->irq_mutex);
-	INIT_WORK(&wm8350->irq_work, wm8350_irq_worker);
 	if (irq) {
-		int flags = 0;
+		int flags = IRQF_ONESHOT;
 
 		if (pdata && pdata->irq_high) {
 			flags |= IRQF_TRIGGER_HIGH;
@@ -1444,8 +1433,8 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 					  WM8350_IRQ_POL);
 		}
 
-		ret = request_irq(irq, wm8350_irq, flags,
-				  "wm8350", wm8350);
+		ret = request_threaded_irq(irq, NULL, wm8350_irq, flags,
+					   "wm8350", wm8350);
 		if (ret != 0) {
 			dev_err(wm8350->dev, "Failed to request IRQ: %d\n",
 				ret);
@@ -1505,7 +1494,6 @@ void wm8350_device_exit(struct wm8350 *wm8350)
 	platform_device_unregister(wm8350->codec.pdev);
 
 	free_irq(wm8350->chip_irq, wm8350);
-	flush_work(&wm8350->irq_work);
 	kfree(wm8350->reg_cache);
 }
 EXPORT_SYMBOL_GPL(wm8350_device_exit);
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 969b0b55615c..1d595de6a055 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -626,7 +626,6 @@ struct wm8350 {
 	struct mutex auxadc_mutex;
 
 	/* Interrupt handling */
-	struct work_struct irq_work;
 	struct mutex irq_mutex; /* IRQ table mutex */
 	struct wm8350_irq irq[WM8350_NUM_IRQ];
 	int chip_irq;
-- 
cgit v1.2.3


From c0826574ddc0df486ecfc2d655e08904c6513209 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 17 Sep 2009 17:03:06 +1000
Subject: nfsd: return success for non-NFS4 nfs4_state_start

Today's linux-next build (sparc64_defconfig) failed like this:

In file included from arch/sparc/kernel/sys_sparc32.c:32:
include/linux/nfsd/nfsd.h: In function 'nfs4_state_start':
include/linux/nfsd/nfsd.h:177: error: no return statement in function returning non-void

Caused by commit 29ab23cc5d351658d01a4327d55e9106a73fd04f ("nfsd4: allow
nfs4 state startup to fail").  Please, if you add code that depends on a
CONFIG option, build with that option enabled and disabled.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/nfsd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 24fdf89cea83..03bbe9039104 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -174,7 +174,7 @@ int nfs4_reset_recoverydir(char *recdir);
 #else
 static inline int nfs4_state_init(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
-static inline int nfs4_state_start(void) { }
+static inline int nfs4_state_start(void) { return 0; }
 static inline void nfs4_state_shutdown(void) { }
 static inline time_t nfs4_lease_time(void) { return 0; }
 static inline void nfs4_reset_lease(time_t leasetime) { }
-- 
cgit v1.2.3


From e2d4304b7d2b85c45de89ec420037d6b9261a12d Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Thu, 17 Sep 2009 09:40:31 -0700
Subject: PCI: fix VGA arbiter header file

Remove reference to vgaarb.c and replace it with a comment about the
arbiter itself.

Reported-by: Tiago Vignatti <tiago.vignatti@nokia.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/vgaarb.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index e81c64af80c1..a1fa1da8dc1a 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -1,5 +1,6 @@
 /*
- * vgaarb.c
+ * The VGA aribiter manages VGA space routing and VGA resource decode to
+ * allow multiple VGA devices to be used in a system in a safe way.
  *
  * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
  * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com>
-- 
cgit v1.2.3


From 5dd4de587fd9c25cb32a7a0fe9feec3647509b6f Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 17 Sep 2009 17:38:32 +0800
Subject: softirq: add BLOCK_IOPOLL to softirq_to_name

With BLOCK_IOPOLL_SOFTIRQ added, softirq_to_name[] and
show_softirq_name() needs to be updated.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4AB20398.8070209@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/irq.h | 21 +++++++++++----------
 kernel/softirq.c           |  2 +-
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 1cb0c3aa11e6..b89f9db4a404 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -8,16 +8,17 @@
 #include <linux/interrupt.h>
 
 #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
-#define show_softirq_name(val)			\
-	__print_symbolic(val,			\
-			 softirq_name(HI),	\
-			 softirq_name(TIMER),	\
-			 softirq_name(NET_TX),	\
-			 softirq_name(NET_RX),	\
-			 softirq_name(BLOCK),	\
-			 softirq_name(TASKLET),	\
-			 softirq_name(SCHED),	\
-			 softirq_name(HRTIMER),	\
+#define show_softirq_name(val)				\
+	__print_symbolic(val,				\
+			 softirq_name(HI),		\
+			 softirq_name(TIMER),		\
+			 softirq_name(NET_TX),		\
+			 softirq_name(NET_RX),		\
+			 softirq_name(BLOCK),		\
+			 softirq_name(BLOCK_IOPOLL),	\
+			 softirq_name(TASKLET),		\
+			 softirq_name(SCHED),		\
+			 softirq_name(HRTIMER),		\
 			 softirq_name(RCU))
 
 /**
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7db25067cd2d..f8749e5216e0 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -57,7 +57,7 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp
 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
 
 char *softirq_to_name[NR_SOFTIRQS] = {
-	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK",
+	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
 	"TASKLET", "SCHED", "HRTIMER",	"RCU"
 };
 
-- 
cgit v1.2.3


From 2667de81f3256c944b06abdf2c56c2f192fcb724 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 Sep 2009 19:01:10 +0200
Subject: perf_counter: Allow for a wakeup watermark

Currently we wake the mmap() consumer once every PAGE_SIZE of data
and/or once event wakeup_events when specified.

For high speed sampling this results in too many wakeups wrt. the
buffer size, hence change this.

We move the default wakeup limit to 1/4-th the buffer size, and
provide for means to manually specify this limit.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 10 ++++++++--
 kernel/perf_counter.c        | 32 +++++++++++++++++++-------------
 2 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 972f90d7a32f..6c1ef72ea501 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -199,10 +199,14 @@ struct perf_counter_attr {
 				inherit_stat   :  1, /* per task counts       */
 				enable_on_exec :  1, /* next exec enables     */
 				task           :  1, /* trace fork/exit       */
+				watermark      :  1, /* wakeup_watermark      */
 
-				__reserved_1   : 50;
+				__reserved_1   : 49;
 
-	__u32			wakeup_events;	/* wakeup every n events */
+	union {
+		__u32		wakeup_events;	  /* wakeup every n events */
+		__u32		wakeup_watermark; /* bytes before wakeup   */
+	};
 	__u32			__reserved_2;
 
 	__u64			__reserved_3;
@@ -521,6 +525,8 @@ struct perf_mmap_data {
 	atomic_t			wakeup;		/* needs a wakeup    */
 	atomic_t			lost;		/* nr records lost   */
 
+	long				watermark;	/* wakeup watermark  */
+
 	struct perf_counter_mmap_page   *user_page;
 	void				*data_pages[0];
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fe0d1adde804..29b73b6e8146 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2176,6 +2176,13 @@ static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
 	data->nr_pages = nr_pages;
 	atomic_set(&data->lock, -1);
 
+	if (counter->attr.watermark) {
+		data->watermark = min_t(long, PAGE_SIZE * nr_pages,
+				      counter->attr.wakeup_watermark);
+	}
+	if (!data->watermark)
+		data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
+
 	rcu_assign_pointer(counter->data, data);
 
 	return 0;
@@ -2517,23 +2524,15 @@ struct perf_output_handle {
 	unsigned long		flags;
 };
 
-static bool perf_output_space(struct perf_mmap_data *data,
-			      unsigned int offset, unsigned int head)
+static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
+			      unsigned long offset, unsigned long head)
 {
-	unsigned long tail;
 	unsigned long mask;
 
 	if (!data->writable)
 		return true;
 
 	mask = (data->nr_pages << PAGE_SHIFT) - 1;
-	/*
-	 * Userspace could choose to issue a mb() before updating the tail
-	 * pointer. So that all reads will be completed before the write is
-	 * issued.
-	 */
-	tail = ACCESS_ONCE(data->user_page->data_tail);
-	smp_rmb();
 
 	offset = (offset - tail) & mask;
 	head   = (head   - tail) & mask;
@@ -2679,7 +2678,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
 {
 	struct perf_counter *output_counter;
 	struct perf_mmap_data *data;
-	unsigned int offset, head;
+	unsigned long tail, offset, head;
 	int have_lost;
 	struct {
 		struct perf_event_header header;
@@ -2717,16 +2716,23 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	perf_output_lock(handle);
 
 	do {
+		/*
+		 * Userspace could choose to issue a mb() before updating the
+		 * tail pointer. So that all reads will be completed before the
+		 * write is issued.
+		 */
+		tail = ACCESS_ONCE(data->user_page->data_tail);
+		smp_rmb();
 		offset = head = atomic_long_read(&data->head);
 		head += size;
-		if (unlikely(!perf_output_space(data, offset, head)))
+		if (unlikely(!perf_output_space(data, tail, offset, head)))
 			goto fail;
 	} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
 
 	handle->offset	= offset;
 	handle->head	= head;
 
-	if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT))
+	if (head - tail > data->watermark)
 		atomic_set(&data->wakeup, 1);
 
 	if (have_lost) {
-- 
cgit v1.2.3


From d15d6e6cc340566d53d953ffdec2c9e96816fa52 Mon Sep 17 00:00:00 2001
From: "John(Jung-Ik) Lee" <jilee@google.com>
Date: Mon, 14 Sep 2009 21:32:33 -0700
Subject: libata: Add pata_atp867x driver for Artop/Acard ATP867X controllers

This is a new pata driver for ARTOP 867X 64bit 4-channel UDMA133 ATA ctrls.
Based on the Atp867 data sheet rev 1.2, Acard, and in part on early ide codes
from Eric Uhrhane <ericu@google.com>.

Signed-off-by: John(Jung-Ik) Lee <jilee@google.com>
Reviewed-by:  Grant Grundler <grundler@google.com>
Reviewed-by:  Gwendal Gringo <gwendal@google.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/Kconfig        |   9 +
 drivers/ata/Makefile       |   1 +
 drivers/ata/pata_atp867x.c | 548 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h    |   2 +
 4 files changed, 560 insertions(+)
 create mode 100644 drivers/ata/pata_atp867x.c

(limited to 'include')

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index ab2fa4eeb364..f2df6e2a224c 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -255,6 +255,15 @@ config PATA_ARTOP
 
 	  If unsure, say N.
 
+config PATA_ATP867X
+	tristate "ARTOP/Acard ATP867X PATA support"
+	depends on PCI
+	help
+	  This option enables support for ARTOP/Acard ATP867X PATA
+	  controllers.
+
+	  If unsure, say N.
+
 config PATA_AT32
 	tristate "Atmel AVR32 PATA support (Experimental)"
 	depends on AVR32 && PLATFORM_AT32AP && EXPERIMENTAL
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 463eb52236aa..01e126f343b3 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_SATA_FSL)		+= sata_fsl.o
 obj-$(CONFIG_PATA_ALI)		+= pata_ali.o
 obj-$(CONFIG_PATA_AMD)		+= pata_amd.o
 obj-$(CONFIG_PATA_ARTOP)	+= pata_artop.o
+obj-$(CONFIG_PATA_ATP867X)	+= pata_atp867x.o
 obj-$(CONFIG_PATA_AT32)		+= pata_at32.o
 obj-$(CONFIG_PATA_ATIIXP)	+= pata_atiixp.o
 obj-$(CONFIG_PATA_CMD640_PCI)	+= pata_cmd640.o
diff --git a/drivers/ata/pata_atp867x.c b/drivers/ata/pata_atp867x.c
new file mode 100644
index 000000000000..7990de925d2e
--- /dev/null
+++ b/drivers/ata/pata_atp867x.c
@@ -0,0 +1,548 @@
+/*
+ * pata_atp867x.c - ARTOP 867X 64bit 4-channel UDMA133 ATA controller driver
+ *
+ *	(C) 2009 Google Inc. John(Jung-Ik) Lee <jilee@google.com>
+ *
+ * Per Atp867 data sheet rev 1.2, Acard.
+ * Based in part on early ide code from
+ *	2003-2004 by Eric Uhrhane, Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ * TODO:
+ *   1. RAID features [comparison, XOR, striping, mirroring, etc.]
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <scsi/scsi_host.h>
+#include <linux/libata.h>
+
+#define	DRV_NAME	"pata_atp867x"
+#define	DRV_VERSION	"0.7.5"
+
+/*
+ * IO Registers
+ * Note that all runtime hot priv ports are cached in ap private_data
+ */
+
+enum {
+	ATP867X_IO_CHANNEL_OFFSET	= 0x10,
+
+	/*
+	 * IO Register Bitfields
+	 */
+
+	ATP867X_IO_PIOSPD_ACTIVE_SHIFT	= 4,
+	ATP867X_IO_PIOSPD_RECOVER_SHIFT	= 0,
+
+	ATP867X_IO_DMAMODE_MSTR_SHIFT	= 0,
+	ATP867X_IO_DMAMODE_MSTR_MASK	= 0x07,
+	ATP867X_IO_DMAMODE_SLAVE_SHIFT	= 4,
+	ATP867X_IO_DMAMODE_SLAVE_MASK	= 0x70,
+
+	ATP867X_IO_DMAMODE_UDMA_6	= 0x07,
+	ATP867X_IO_DMAMODE_UDMA_5	= 0x06,
+	ATP867X_IO_DMAMODE_UDMA_4	= 0x05,
+	ATP867X_IO_DMAMODE_UDMA_3	= 0x04,
+	ATP867X_IO_DMAMODE_UDMA_2	= 0x03,
+	ATP867X_IO_DMAMODE_UDMA_1	= 0x02,
+	ATP867X_IO_DMAMODE_UDMA_0	= 0x01,
+	ATP867X_IO_DMAMODE_DISABLE	= 0x00,
+
+	ATP867X_IO_SYS_INFO_66MHZ	= 0x04,
+	ATP867X_IO_SYS_INFO_SLOW_UDMA5	= 0x02,
+	ATP867X_IO_SYS_MASK_RESERVED	= (~0xf1),
+
+	ATP867X_IO_PORTSPD_VAL		= 0x1143,
+	ATP867X_PREREAD_VAL		= 0x0200,
+
+	ATP867X_NUM_PORTS		= 4,
+	ATP867X_BAR_IOBASE		= 0,
+	ATP867X_BAR_ROMBASE		= 6,
+};
+
+#define ATP867X_IOBASE(ap)		((ap)->host->iomap[0])
+#define ATP867X_SYS_INFO(ap)		(0x3F + ATP867X_IOBASE(ap))
+
+#define ATP867X_IO_PORTBASE(ap, port)	(0x00 + ATP867X_IOBASE(ap) + \
+					(port) * ATP867X_IO_CHANNEL_OFFSET)
+#define ATP867X_IO_DMABASE(ap, port)	(0x40 + \
+					ATP867X_IO_PORTBASE((ap), (port)))
+
+#define ATP867X_IO_STATUS(ap, port)	(0x07 + \
+					ATP867X_IO_PORTBASE((ap), (port)))
+#define ATP867X_IO_ALTSTATUS(ap, port)	(0x0E + \
+					ATP867X_IO_PORTBASE((ap), (port)))
+
+/*
+ * hot priv ports
+ */
+#define ATP867X_IO_MSTRPIOSPD(ap, port)	(0x08 + \
+					ATP867X_IO_DMABASE((ap), (port)))
+#define ATP867X_IO_SLAVPIOSPD(ap, port)	(0x09 + \
+					ATP867X_IO_DMABASE((ap), (port)))
+#define ATP867X_IO_8BPIOSPD(ap, port)	(0x0A + \
+					ATP867X_IO_DMABASE((ap), (port)))
+#define ATP867X_IO_DMAMODE(ap, port)	(0x0B + \
+					ATP867X_IO_DMABASE((ap), (port)))
+
+#define ATP867X_IO_PORTSPD(ap, port)	(0x4A + \
+					ATP867X_IO_PORTBASE((ap), (port)))
+#define ATP867X_IO_PREREAD(ap, port)	(0x4C + \
+					ATP867X_IO_PORTBASE((ap), (port)))
+
+struct atp867x_priv {
+	void __iomem *dma_mode;
+	void __iomem *mstr_piospd;
+	void __iomem *slave_piospd;
+	void __iomem *eightb_piospd;
+	int		pci66mhz;
+};
+
+static inline u8 atp867x_speed_to_mode(u8 speed)
+{
+	return speed - XFER_UDMA_0 + 1;
+}
+
+static void atp867x_set_dmamode(struct ata_port *ap, struct ata_device *adev)
+{
+	struct pci_dev *pdev	= to_pci_dev(ap->host->dev);
+	struct atp867x_priv *dp = ap->private_data;
+	u8 speed = adev->dma_mode;
+	u8 b;
+	u8 mode;
+
+	mode = atp867x_speed_to_mode(speed);
+
+	/*
+	 * Doc 6.6.9: decrease the udma mode value by 1 for safer UDMA speed
+	 * on 66MHz bus
+	 *   rev-A: UDMA_1~4 (5, 6 no change)
+	 *   rev-B: all UDMA modes
+	 *   UDMA_0 stays not to disable UDMA
+	 */
+	if (dp->pci66mhz && mode > ATP867X_IO_DMAMODE_UDMA_0  &&
+	   (pdev->device == PCI_DEVICE_ID_ARTOP_ATP867B ||
+	    mode < ATP867X_IO_DMAMODE_UDMA_5))
+		mode--;
+
+	b = ioread8(dp->dma_mode);
+	if (adev->devno & 1) {
+		b = (b & ~ATP867X_IO_DMAMODE_SLAVE_MASK) |
+			(mode << ATP867X_IO_DMAMODE_SLAVE_SHIFT);
+	} else {
+		b = (b & ~ATP867X_IO_DMAMODE_MSTR_MASK) |
+			(mode << ATP867X_IO_DMAMODE_MSTR_SHIFT);
+	}
+	iowrite8(b, dp->dma_mode);
+}
+
+static int atp867x_get_active_clocks_shifted(unsigned int clk)
+{
+	unsigned char clocks = clk;
+
+	switch (clocks) {
+	case 0:
+		clocks = 1;
+		break;
+	case 1 ... 7:
+		break;
+	case 8 ... 12:
+		clocks = 7;
+		break;
+	default:
+		printk(KERN_WARNING "ATP867X: active %dclk is invalid. "
+			"Using default 8clk.\n", clk);
+		clocks = 0;	/* 8 clk */
+		break;
+	}
+	return clocks << ATP867X_IO_PIOSPD_ACTIVE_SHIFT;
+}
+
+static int atp867x_get_recover_clocks_shifted(unsigned int clk)
+{
+	unsigned char clocks = clk;
+
+	switch (clocks) {
+	case 0:
+		clocks = 1;
+		break;
+	case 1 ... 11:
+		break;
+	case 12:
+		clocks = 0;
+		break;
+	case 13: case 14:
+		--clocks;
+		break;
+	case 15:
+		break;
+	default:
+		printk(KERN_WARNING "ATP867X: recover %dclk is invalid. "
+			"Using default 15clk.\n", clk);
+		clocks = 0;	/* 12 clk */
+		break;
+	}
+	return clocks << ATP867X_IO_PIOSPD_RECOVER_SHIFT;
+}
+
+static void atp867x_set_piomode(struct ata_port *ap, struct ata_device *adev)
+{
+	struct ata_device *peer = ata_dev_pair(adev);
+	struct atp867x_priv *dp = ap->private_data;
+	u8 speed = adev->pio_mode;
+	struct ata_timing t, p;
+	int T, UT;
+	u8 b;
+
+	T = 1000000000 / 33333;
+	UT = T / 4;
+
+	ata_timing_compute(adev, speed, &t, T, UT);
+	if (peer && peer->pio_mode) {
+		ata_timing_compute(peer, peer->pio_mode, &p, T, UT);
+		ata_timing_merge(&p, &t, &t, ATA_TIMING_8BIT);
+	}
+
+	b = ioread8(dp->dma_mode);
+	if (adev->devno & 1)
+		b = (b & ~ATP867X_IO_DMAMODE_SLAVE_MASK);
+	else
+		b = (b & ~ATP867X_IO_DMAMODE_MSTR_MASK);
+	iowrite8(b, dp->dma_mode);
+
+	b = atp867x_get_active_clocks_shifted(t.active) |
+		atp867x_get_recover_clocks_shifted(t.recover);
+	if (dp->pci66mhz)
+		b += 0x10;
+
+	if (adev->devno & 1)
+		iowrite8(b, dp->slave_piospd);
+	else
+		iowrite8(b, dp->mstr_piospd);
+
+	/*
+	 * use the same value for comand timing as for PIO timimg
+	 */
+	iowrite8(b, dp->eightb_piospd);
+}
+
+static int atp867x_cable_detect(struct ata_port *ap)
+{
+	return ATA_CBL_PATA40_SHORT;
+}
+
+static struct scsi_host_template atp867x_sht = {
+	ATA_BMDMA_SHT(DRV_NAME),
+};
+
+static struct ata_port_operations atp867x_ops = {
+	.inherits		= &ata_bmdma_port_ops,
+	.cable_detect		= atp867x_cable_detect,
+	.set_piomode		= atp867x_set_piomode,
+	.set_dmamode		= atp867x_set_dmamode,
+};
+
+
+#ifdef	ATP867X_DEBUG
+static void atp867x_check_res(struct pci_dev *pdev)
+{
+	int i;
+	unsigned long start, len;
+
+	/* Check the PCI resources for this channel are enabled */
+	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+		start = pci_resource_start(pdev, i);
+		len   = pci_resource_len(pdev, i);
+		printk(KERN_DEBUG "ATP867X: resource start:len=%lx:%lx\n",
+			start, len);
+	}
+}
+
+static void atp867x_check_ports(struct ata_port *ap, int port)
+{
+	struct ata_ioports *ioaddr = &ap->ioaddr;
+	struct atp867x_priv *dp = ap->private_data;
+
+	printk(KERN_DEBUG "ATP867X: port[%d] addresses\n"
+		"  cmd_addr	=0x%llx, 0x%llx\n"
+		"  ctl_addr	=0x%llx, 0x%llx\n"
+		"  bmdma_addr	=0x%llx, 0x%llx\n"
+		"  data_addr	=0x%llx\n"
+		"  error_addr	=0x%llx\n"
+		"  feature_addr	=0x%llx\n"
+		"  nsect_addr	=0x%llx\n"
+		"  lbal_addr	=0x%llx\n"
+		"  lbam_addr	=0x%llx\n"
+		"  lbah_addr	=0x%llx\n"
+		"  device_addr	=0x%llx\n"
+		"  status_addr	=0x%llx\n"
+		"  command_addr	=0x%llx\n"
+		"  dp->dma_mode	=0x%llx\n"
+		"  dp->mstr_piospd	=0x%llx\n"
+		"  dp->slave_piospd	=0x%llx\n"
+		"  dp->eightb_piospd	=0x%llx\n"
+		"  dp->pci66mhz		=0x%lx\n",
+		port,
+		(unsigned long long)ioaddr->cmd_addr,
+		(unsigned long long)ATP867X_IO_PORTBASE(ap, port),
+		(unsigned long long)ioaddr->ctl_addr,
+		(unsigned long long)ATP867X_IO_ALTSTATUS(ap, port),
+		(unsigned long long)ioaddr->bmdma_addr,
+		(unsigned long long)ATP867X_IO_DMABASE(ap, port),
+		(unsigned long long)ioaddr->data_addr,
+		(unsigned long long)ioaddr->error_addr,
+		(unsigned long long)ioaddr->feature_addr,
+		(unsigned long long)ioaddr->nsect_addr,
+		(unsigned long long)ioaddr->lbal_addr,
+		(unsigned long long)ioaddr->lbam_addr,
+		(unsigned long long)ioaddr->lbah_addr,
+		(unsigned long long)ioaddr->device_addr,
+		(unsigned long long)ioaddr->status_addr,
+		(unsigned long long)ioaddr->command_addr,
+		(unsigned long long)dp->dma_mode,
+		(unsigned long long)dp->mstr_piospd,
+		(unsigned long long)dp->slave_piospd,
+		(unsigned long long)dp->eightb_piospd,
+		(unsigned long)dp->pci66mhz);
+}
+#endif
+
+static int atp867x_set_priv(struct ata_port *ap)
+{
+	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+	struct atp867x_priv *dp;
+	int port = ap->port_no;
+
+	dp = ap->private_data =
+		devm_kzalloc(&pdev->dev, sizeof(*dp), GFP_KERNEL);
+	if (dp == NULL)
+		return -ENOMEM;
+
+	dp->dma_mode	 = ATP867X_IO_DMAMODE(ap, port);
+	dp->mstr_piospd	 = ATP867X_IO_MSTRPIOSPD(ap, port);
+	dp->slave_piospd = ATP867X_IO_SLAVPIOSPD(ap, port);
+	dp->eightb_piospd = ATP867X_IO_8BPIOSPD(ap, port);
+
+	dp->pci66mhz =
+		ioread8(ATP867X_SYS_INFO(ap)) & ATP867X_IO_SYS_INFO_66MHZ;
+
+	return 0;
+}
+
+static void atp867x_fixup(struct ata_host *host)
+{
+	struct pci_dev *pdev = to_pci_dev(host->dev);
+	struct ata_port *ap = host->ports[0];
+	int i;
+	u8 v;
+
+	/*
+	 * Broken BIOS might not set latency high enough
+	 */
+	pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &v);
+	if (v < 0x80) {
+		v = 0x80;
+		pci_write_config_byte(pdev, PCI_LATENCY_TIMER, v);
+		printk(KERN_DEBUG "ATP867X: set latency timer of device %s"
+			" to %d\n", pci_name(pdev), v);
+	}
+
+	/*
+	 * init 8bit io ports speed(0aaarrrr) to 43h and
+	 * init udma modes of master/slave to 0/0(11h)
+	 */
+	for (i = 0; i < ATP867X_NUM_PORTS; i++)
+		iowrite16(ATP867X_IO_PORTSPD_VAL, ATP867X_IO_PORTSPD(ap, i));
+
+	/*
+	 * init PreREAD counts
+	 */
+	for (i = 0; i < ATP867X_NUM_PORTS; i++)
+		iowrite16(ATP867X_PREREAD_VAL, ATP867X_IO_PREREAD(ap, i));
+
+	v = ioread8(ATP867X_IOBASE(ap) + 0x28);
+	v &= 0xcf;	/* Enable INTA#: bit4=0 means enable */
+	v |= 0xc0;	/* Enable PCI burst, MRM & not immediate interrupts */
+	iowrite8(v, ATP867X_IOBASE(ap) + 0x28);
+
+	/*
+	 * Turn off the over clocked udma5 mode, only for Rev-B
+	 */
+	v = ioread8(ATP867X_SYS_INFO(ap));
+	v &= ATP867X_IO_SYS_MASK_RESERVED;
+	if (pdev->device == PCI_DEVICE_ID_ARTOP_ATP867B)
+		v |= ATP867X_IO_SYS_INFO_SLOW_UDMA5;
+	iowrite8(v, ATP867X_SYS_INFO(ap));
+}
+
+static int atp867x_ata_pci_sff_init_host(struct ata_host *host)
+{
+	struct device *gdev = host->dev;
+	struct pci_dev *pdev = to_pci_dev(gdev);
+	unsigned int mask = 0;
+	int i, rc;
+
+	/*
+	 * do not map rombase
+	 */
+	rc = pcim_iomap_regions(pdev, 1 << ATP867X_BAR_IOBASE, DRV_NAME);
+	if (rc == -EBUSY)
+		pcim_pin_device(pdev);
+	if (rc)
+		return rc;
+	host->iomap = pcim_iomap_table(pdev);
+
+#ifdef	ATP867X_DEBUG
+	atp867x_check_res(pdev);
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++)
+		printk(KERN_DEBUG "ATP867X: iomap[%d]=0x%llx\n", i,
+			(unsigned long long)(host->iomap[i]));
+#endif
+
+	/*
+	 * request, iomap BARs and init port addresses accordingly
+	 */
+	for (i = 0; i < host->n_ports; i++) {
+		struct ata_port *ap = host->ports[i];
+		struct ata_ioports *ioaddr = &ap->ioaddr;
+
+		ioaddr->cmd_addr = ATP867X_IO_PORTBASE(ap, i);
+		ioaddr->ctl_addr = ioaddr->altstatus_addr
+				 = ATP867X_IO_ALTSTATUS(ap, i);
+		ioaddr->bmdma_addr = ATP867X_IO_DMABASE(ap, i);
+
+		ata_sff_std_ports(ioaddr);
+		rc = atp867x_set_priv(ap);
+		if (rc)
+			return rc;
+
+#ifdef	ATP867X_DEBUG
+		atp867x_check_ports(ap, i);
+#endif
+		ata_port_desc(ap, "cmd 0x%lx ctl 0x%lx",
+			(unsigned long)ioaddr->cmd_addr,
+			(unsigned long)ioaddr->ctl_addr);
+		ata_port_desc(ap, "bmdma 0x%lx",
+			(unsigned long)ioaddr->bmdma_addr);
+
+		mask |= 1 << i;
+	}
+
+	if (!mask) {
+		dev_printk(KERN_ERR, gdev, "no available native port\n");
+		return -ENODEV;
+	}
+
+	atp867x_fixup(host);
+
+	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	if (rc)
+		return rc;
+
+	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	return rc;
+}
+
+static int atp867x_init_one(struct pci_dev *pdev,
+	const struct pci_device_id *id)
+{
+	static int printed_version;
+	static const struct ata_port_info info_867x = {
+		.flags		= ATA_FLAG_SLAVE_POSS,
+		.pio_mask	= ATA_PIO4,
+		.mwdma_mask	= ATA_MWDMA2,
+		.udma_mask 	= ATA_UDMA6,
+		.port_ops	= &atp867x_ops,
+	};
+
+	struct ata_host *host;
+	const struct ata_port_info *ppi[] = { &info_867x, NULL };
+	int rc;
+
+	if (!printed_version++)
+		dev_printk(KERN_INFO, &pdev->dev, "version " DRV_VERSION "\n");
+
+	rc = pcim_enable_device(pdev);
+	if (rc)
+		return rc;
+
+	printk(KERN_INFO "ATP867X: ATP867 ATA UDMA133 controller (rev %02X)",
+		pdev->device);
+
+	host = ata_host_alloc_pinfo(&pdev->dev, ppi, ATP867X_NUM_PORTS);
+	if (!host) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			"failed to allocate ATA host\n");
+		rc = -ENOMEM;
+		goto err_out;
+	}
+
+	rc = atp867x_ata_pci_sff_init_host(host);
+	if (rc) {
+		dev_printk(KERN_ERR, &pdev->dev, "failed to init host\n");
+		goto err_out;
+	}
+
+	pci_set_master(pdev);
+
+	rc = ata_host_activate(host, pdev->irq, ata_sff_interrupt,
+				IRQF_SHARED, &atp867x_sht);
+	if (rc)
+		dev_printk(KERN_ERR, &pdev->dev, "failed to activate host\n");
+
+err_out:
+	return rc;
+}
+
+static struct pci_device_id atp867x_pci_tbl[] = {
+	{ PCI_VDEVICE(ARTOP, PCI_DEVICE_ID_ARTOP_ATP867A),	0 },
+	{ PCI_VDEVICE(ARTOP, PCI_DEVICE_ID_ARTOP_ATP867B),	0 },
+	{ },
+};
+
+static struct pci_driver atp867x_driver = {
+	.name 		= DRV_NAME,
+	.id_table 	= atp867x_pci_tbl,
+	.probe 		= atp867x_init_one,
+	.remove		= ata_pci_remove_one,
+};
+
+static int __init atp867x_init(void)
+{
+	return pci_register_driver(&atp867x_driver);
+}
+
+static void __exit atp867x_exit(void)
+{
+	pci_unregister_driver(&atp867x_driver);
+}
+
+MODULE_AUTHOR("John(Jung-Ik) Lee, Google Inc.");
+MODULE_DESCRIPTION("low level driver for Artop/Acard 867x ATA controller");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, atp867x_pci_tbl);
+MODULE_VERSION(DRV_VERSION);
+
+module_init(atp867x_init);
+module_exit(atp867x_exit);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 8975add8668f..3b6b788fe2b5 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1529,6 +1529,8 @@
 #define PCI_DEVICE_ID_ARTOP_ATP860R	0x0007
 #define PCI_DEVICE_ID_ARTOP_ATP865	0x0008
 #define PCI_DEVICE_ID_ARTOP_ATP865R	0x0009
+#define PCI_DEVICE_ID_ARTOP_ATP867A	0x000A
+#define PCI_DEVICE_ID_ARTOP_ATP867B	0x000B
 #define PCI_DEVICE_ID_ARTOP_AEC7610	0x8002
 #define PCI_DEVICE_ID_ARTOP_AEC7612UW	0x8010
 #define PCI_DEVICE_ID_ARTOP_AEC7612U	0x8020
-- 
cgit v1.2.3


From 3ef94daae7530b4ebcd2e5f48f1028cd2d2470ba Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 14 Sep 2009 16:50:29 +0100
Subject: drm/i915: Add ioctl to set 'purgeability' of objects

Similar to the madvise() concept, the application may wish to mark some
data as volatile. That is in the event of memory pressure the kernel is
free to discard such buffers safe in the knowledge that the application
can recreate them on demand, and is simply using these as a cache.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_dma.c |  1 +
 drivers/gpu/drm/i915/i915_drv.h |  7 ++++
 drivers/gpu/drm/i915/i915_gem.c | 81 +++++++++++++++++++++++++++++++++++++----
 include/drm/i915_drm.h          | 18 +++++++++
 4 files changed, 99 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index f47adb4aa59a..250999cdf814 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1616,6 +1616,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0),
+	DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, 0),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bbcf5fc72666..2f89c2136be9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -566,6 +566,11 @@ struct drm_i915_gem_object {
 	 * in an execbuffer object list.
 	 */
 	int in_execbuffer;
+
+	/**
+	 * Advice: are the backing pages purgeable?
+	 */
+	int madv;
 };
 
 /**
@@ -705,6 +710,8 @@ int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_priv);
+int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv);
 int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
 int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2fff2e0a976e..2ab30f251943 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1436,13 +1436,21 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
 	if (obj_priv->tiling_mode != I915_TILING_NONE)
 		i915_gem_object_save_bit_17_swizzle(obj);
 
-	for (i = 0; i < page_count; i++)
-		if (obj_priv->pages[i] != NULL) {
-			if (obj_priv->dirty)
-				set_page_dirty(obj_priv->pages[i]);
-			mark_page_accessed(obj_priv->pages[i]);
-			page_cache_release(obj_priv->pages[i]);
-		}
+	if (obj_priv->madv == I915_MADV_DONTNEED)
+	    obj_priv->dirty = 0;
+
+	for (i = 0; i < page_count; i++) {
+		if (obj_priv->pages[i] == NULL)
+			break;
+
+		if (obj_priv->dirty)
+			set_page_dirty(obj_priv->pages[i]);
+
+		if (obj_priv->madv == I915_MADV_WILLNEED)
+		    mark_page_accessed(obj_priv->pages[i]);
+
+		page_cache_release(obj_priv->pages[i]);
+	}
 	obj_priv->dirty = 0;
 
 	drm_free_large(obj_priv->pages);
@@ -2412,6 +2420,12 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
 
 	if (dev_priv->mm.suspended)
 		return -EBUSY;
+
+	if (obj_priv->madv == I915_MADV_DONTNEED) {
+		DRM_ERROR("Attempting to bind a purgeable object\n");
+		return -EINVAL;
+	}
+
 	if (alignment == 0)
 		alignment = i915_gem_get_gtt_alignment(obj);
 	if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
@@ -3679,6 +3693,13 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
 	}
 	obj_priv = obj->driver_private;
 
+	if (obj_priv->madv == I915_MADV_DONTNEED) {
+		DRM_ERROR("Attempting to pin a I915_MADV_DONTNEED buffer\n");
+		drm_gem_object_unreference(obj);
+		mutex_unlock(&dev->struct_mutex);
+		return -EINVAL;
+	}
+
 	if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
 		DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
 			  args->handle);
@@ -3791,6 +3812,49 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
     return i915_gem_ring_throttle(dev, file_priv);
 }
 
+int
+i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv)
+{
+	struct drm_i915_gem_madvise *args = data;
+	struct drm_gem_object *obj;
+	struct drm_i915_gem_object *obj_priv;
+
+	switch (args->madv) {
+	case I915_MADV_DONTNEED:
+	case I915_MADV_WILLNEED:
+	    break;
+	default:
+	    return -EINVAL;
+	}
+
+	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+	if (obj == NULL) {
+		DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
+			  args->handle);
+		return -EBADF;
+	}
+
+	mutex_lock(&dev->struct_mutex);
+	obj_priv = obj->driver_private;
+
+	if (obj_priv->pin_count) {
+		drm_gem_object_unreference(obj);
+		mutex_unlock(&dev->struct_mutex);
+
+		DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
+		return -EINVAL;
+	}
+
+	obj_priv->madv = args->madv;
+	args->retained = obj_priv->gtt_space != NULL;
+
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
 int i915_gem_init_object(struct drm_gem_object *obj)
 {
 	struct drm_i915_gem_object *obj_priv;
@@ -3815,6 +3879,7 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 	obj_priv->fence_reg = I915_FENCE_REG_NONE;
 	INIT_LIST_HEAD(&obj_priv->list);
 	INIT_LIST_HEAD(&obj_priv->fence_list);
+	obj_priv->madv = I915_MADV_WILLNEED;
 
 	return 0;
 }
@@ -4506,7 +4571,7 @@ i915_gem_object_truncate(struct drm_gem_object *obj)
 static inline int
 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
 {
-	return !obj_priv->dirty;
+	return !obj_priv->dirty || obj_priv->madv == I915_MADV_DONTNEED;
 }
 
 static int
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 8e1e92583fbc..607c9da061e8 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -185,6 +185,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_GET_APERTURE 0x23
 #define DRM_I915_GEM_MMAP_GTT	0x24
 #define DRM_I915_GET_PIPE_FROM_CRTC_ID	0x25
+#define DRM_I915_GEM_MADVISE	0x26
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -221,6 +222,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_GET_TILING	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling)
 #define DRM_IOCTL_I915_GEM_GET_APERTURE	DRM_IOR  (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture)
 #define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_intel_get_pipe_from_crtc_id)
+#define DRM_IOCTL_I915_GEM_MADVISE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -667,4 +669,20 @@ struct drm_i915_get_pipe_from_crtc_id {
 	__u32 pipe;
 };
 
+#define I915_MADV_WILLNEED 0
+#define I915_MADV_DONTNEED 1
+
+struct drm_i915_gem_madvise {
+	/** Handle of the buffer to change the backing store advice */
+	__u32 handle;
+
+	/* Advice: either the buffer will be needed again in the near future,
+	 *         or wont be and could be discarded under memory pressure.
+	 */
+	__u32 madv;
+
+	/** Whether the backing store still exists. */
+	__u32 retained;
+};
+
 #endif				/* _I915_DRM_H_ */
-- 
cgit v1.2.3


From 1a133e0c9dabda23e6693cabfdc1d5106dca5fc2 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Tue, 15 Sep 2009 16:57:24 -0700
Subject: ACPI: make ACPI button funcs no-ops if not built in

Yakui pointed out that we don't properly no-op the ACPI button routines
if the button driver isn't built in.  This will cause problems if ACPI
is disabled, so provide stub functions in that case.

Reported-by: ykzhao <yakui.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/acpi/button.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/acpi/button.h b/include/acpi/button.h
index bb643a79d651..97eea0e4c016 100644
--- a/include/acpi/button.h
+++ b/include/acpi/button.h
@@ -3,8 +3,23 @@
 
 #include <linux/notifier.h>
 
+#if defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE)
 extern int acpi_lid_notifier_register(struct notifier_block *nb);
 extern int acpi_lid_notifier_unregister(struct notifier_block *nb);
 extern int acpi_lid_open(void);
+#else
+static inline int acpi_lid_notifier_register(struct notifier_block *nb)
+{
+	return 0;
+}
+static inline int acpi_lid_notifier_unregister(struct notifier_block *nb)
+{
+	return 0;
+}
+static inline int acpi_lid_open(void)
+{
+	return 1;
+}
+#endif /* defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) */
 
 #endif /* ACPI_BUTTON_H */
-- 
cgit v1.2.3


From c3422bea5f09b0e85704f51f2b01271630b8940b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 13 Sep 2009 09:15:10 -0700
Subject: rcu: Simplify rcu_read_unlock_special() quiescent-state accounting

The earlier approach required two scheduling-clock ticks to note an
preemptable-RCU quiescent state in the situation in which the
scheduling-clock interrupt is unlucky enough to always interrupt an
RCU read-side critical section.

With this change, the quiescent state is instead noted by the
outermost rcu_read_unlock() immediately following the first
scheduling-clock tick, or, alternatively, by the first subsequent
context switch.  Therefore, this change also speeds up grace
periods.

Suggested-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
LKML-Reference: <12528585111945-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  1 -
 kernel/rcutree.c        | 15 ++++++--------
 kernel/rcutree_plugin.h | 54 ++++++++++++++++++++++++-------------------------
 3 files changed, 32 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f3d74bd04d18..c62a9f84d614 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1740,7 +1740,6 @@ extern cputime_t task_gtime(struct task_struct *p);
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
 #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
-#define RCU_READ_UNLOCK_GOT_QS  (1 << 2) /* CPU has responded to RCU core. */
 
 static inline void rcu_copy_process(struct task_struct *p)
 {
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e9a4ae94647f..6c99553e9f15 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -107,27 +107,23 @@ static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp,
  */
 void rcu_sched_qs(int cpu)
 {
-	unsigned long flags;
 	struct rcu_data *rdp;
 
-	local_irq_save(flags);
 	rdp = &per_cpu(rcu_sched_data, cpu);
-	rdp->passed_quiesc = 1;
 	rdp->passed_quiesc_completed = rdp->completed;
-	rcu_preempt_qs(cpu);
-	local_irq_restore(flags);
+	barrier();
+	rdp->passed_quiesc = 1;
+	rcu_preempt_note_context_switch(cpu);
 }
 
 void rcu_bh_qs(int cpu)
 {
-	unsigned long flags;
 	struct rcu_data *rdp;
 
-	local_irq_save(flags);
 	rdp = &per_cpu(rcu_bh_data, cpu);
-	rdp->passed_quiesc = 1;
 	rdp->passed_quiesc_completed = rdp->completed;
-	local_irq_restore(flags);
+	barrier();
+	rdp->passed_quiesc = 1;
 }
 
 #ifdef CONFIG_NO_HZ
@@ -615,6 +611,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
 
 	/* Advance to a new grace period and initialize state. */
 	rsp->gpnum++;
+	WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
 	rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
 	rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
 	record_gp_stall_check_time(rsp);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index b8e4b0384f00..c9616e48379b 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -64,34 +64,42 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
  * not in a quiescent state.  There might be any number of tasks blocked
  * while in an RCU read-side critical section.
  */
-static void rcu_preempt_qs_record(int cpu)
+static void rcu_preempt_qs(int cpu)
 {
 	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
-	rdp->passed_quiesc = 1;
 	rdp->passed_quiesc_completed = rdp->completed;
+	barrier();
+	rdp->passed_quiesc = 1;
 }
 
 /*
- * We have entered the scheduler or are between softirqs in ksoftirqd.
- * If we are in an RCU read-side critical section, we need to reflect
- * that in the state of the rcu_node structure corresponding to this CPU.
- * Caller must disable hardirqs.
+ * We have entered the scheduler, and the current task might soon be
+ * context-switched away from.  If this task is in an RCU read-side
+ * critical section, we will no longer be able to rely on the CPU to
+ * record that fact, so we enqueue the task on the appropriate entry
+ * of the blocked_tasks[] array.  The task will dequeue itself when
+ * it exits the outermost enclosing RCU read-side critical section.
+ * Therefore, the current grace period cannot be permitted to complete
+ * until the blocked_tasks[] entry indexed by the low-order bit of
+ * rnp->gpnum empties.
+ *
+ * Caller must disable preemption.
  */
-static void rcu_preempt_qs(int cpu)
+static void rcu_preempt_note_context_switch(int cpu)
 {
 	struct task_struct *t = current;
+	unsigned long flags;
 	int phase;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 
 	if (t->rcu_read_lock_nesting &&
 	    (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
-		WARN_ON_ONCE(cpu != smp_processor_id());
 
 		/* Possibly blocking in an RCU read-side critical section. */
 		rdp = rcu_preempt_state.rda[cpu];
 		rnp = rdp->mynode;
-		spin_lock(&rnp->lock);
+		spin_lock_irqsave(&rnp->lock, flags);
 		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
 		t->rcu_blocked_node = rnp;
 
@@ -112,7 +120,7 @@ static void rcu_preempt_qs(int cpu)
 		phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1);
 		list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
 		smp_mb();  /* Ensure later ctxt swtch seen after above. */
-		spin_unlock(&rnp->lock);
+		spin_unlock_irqrestore(&rnp->lock, flags);
 	}
 
 	/*
@@ -124,9 +132,8 @@ static void rcu_preempt_qs(int cpu)
 	 * grace period, then the fact that the task has been enqueued
 	 * means that we continue to block the current grace period.
 	 */
-	rcu_preempt_qs_record(cpu);
-	t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS |
-					RCU_READ_UNLOCK_GOT_QS);
+	rcu_preempt_qs(cpu);
+	t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 }
 
 /*
@@ -162,7 +169,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
 	special = t->rcu_read_unlock_special;
 	if (special & RCU_READ_UNLOCK_NEED_QS) {
 		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
-		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS;
+		rcu_preempt_qs(smp_processor_id());
 	}
 
 	/* Hardware IRQ handlers cannot block. */
@@ -199,9 +206,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		 */
 		if (!empty && rnp->qsmask == 0 &&
 		    list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
-			t->rcu_read_unlock_special &=
-				~(RCU_READ_UNLOCK_NEED_QS |
-				  RCU_READ_UNLOCK_GOT_QS);
+			t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 			if (rnp->parent == NULL) {
 				/* Only one rcu_node in the tree. */
 				cpu_quiet_msk_finish(&rcu_preempt_state, flags);
@@ -352,19 +357,12 @@ static void rcu_preempt_check_callbacks(int cpu)
 	struct task_struct *t = current;
 
 	if (t->rcu_read_lock_nesting == 0) {
-		t->rcu_read_unlock_special &=
-			~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS);
-		rcu_preempt_qs_record(cpu);
+		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+		rcu_preempt_qs(cpu);
 		return;
 	}
 	if (per_cpu(rcu_preempt_data, cpu).qs_pending) {
-		if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) {
-			rcu_preempt_qs_record(cpu);
-			t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS;
-		} else if (!(t->rcu_read_unlock_special &
-			     RCU_READ_UNLOCK_NEED_QS)) {
-			t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
-		}
+		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 	}
 }
 
@@ -451,7 +449,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
  * Because preemptable RCU does not exist, we never have to check for
  * CPUs being in quiescent states.
  */
-static void rcu_preempt_qs(int cpu)
+static void rcu_preempt_note_context_switch(int cpu)
 {
 }
 
-- 
cgit v1.2.3


From 16e3081191837a6a04733de5cd5d1d1b303140d4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 13 Sep 2009 09:15:11 -0700
Subject: rcu: Fix synchronize_rcu() for TREE_PREEMPT_RCU

The redirection of synchronize_sched() to synchronize_rcu() was
appropriate for TREE_RCU, but not for TREE_PREEMPT_RCU.

Fix this by creating an underlying synchronize_sched().  TREE_RCU
then redirects synchronize_rcu() to synchronize_sched(), while
TREE_PREEMPT_RCU has its own version of synchronize_rcu().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
LKML-Reference: <12528585111916-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 23 +++++------------------
 include/linux/rcutree.h  |  4 ++--
 kernel/rcupdate.c        | 44 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 50 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 95e0615f4d75..39dce83c4865 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -52,8 +52,13 @@ struct rcu_head {
 };
 
 /* Exported common interfaces */
+#ifdef CONFIG_TREE_PREEMPT_RCU
 extern void synchronize_rcu(void);
+#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#define synchronize_rcu synchronize_sched
+#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 extern void synchronize_rcu_bh(void);
+extern void synchronize_sched(void);
 extern void rcu_barrier(void);
 extern void rcu_barrier_bh(void);
 extern void rcu_barrier_sched(void);
@@ -261,24 +266,6 @@ struct rcu_synchronize {
 
 extern void wakeme_after_rcu(struct rcu_head  *head);
 
-/**
- * synchronize_sched - block until all CPUs have exited any non-preemptive
- * kernel code sequences.
- *
- * This means that all preempt_disable code sequences, including NMI and
- * hardware-interrupt handlers, in progress on entry will have completed
- * before this primitive returns.  However, this does not guarantee that
- * softirq handlers will have completed, since in some kernels, these
- * handlers can run in process context, and can block.
- *
- * This primitive provides the guarantees made by the (now removed)
- * synchronize_kernel() API.  In contrast, synchronize_rcu() only
- * guarantees that rcu_read_lock() sections will have completed.
- * In "classic RCU", these two guarantees happen to be one and
- * the same, but can differ in realtime RCU implementations.
- */
-#define synchronize_sched() __synchronize_sched()
-
 /**
  * call_rcu - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index a89307717825..00d08c0cbcc1 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -53,6 +53,8 @@ static inline void __rcu_read_unlock(void)
 	preempt_enable();
 }
 
+#define __synchronize_sched() synchronize_rcu()
+
 static inline void exit_rcu(void)
 {
 }
@@ -68,8 +70,6 @@ static inline void __rcu_read_unlock_bh(void)
 	local_bh_enable();
 }
 
-#define __synchronize_sched() synchronize_rcu()
-
 extern void call_rcu_sched(struct rcu_head *head,
 			   void (*func)(struct rcu_head *rcu));
 
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index bd5d5c8e5140..28d2f24e7871 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -74,6 +74,8 @@ void wakeme_after_rcu(struct rcu_head  *head)
 	complete(&rcu->completion);
 }
 
+#ifdef CONFIG_TREE_PREEMPT_RCU
+
 /**
  * synchronize_rcu - wait until a grace period has elapsed.
  *
@@ -87,7 +89,7 @@ void synchronize_rcu(void)
 {
 	struct rcu_synchronize rcu;
 
-	if (rcu_blocking_is_gp())
+	if (!rcu_scheduler_active)
 		return;
 
 	init_completion(&rcu.completion);
@@ -98,6 +100,46 @@ void synchronize_rcu(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+
+/**
+ * synchronize_sched - wait until an rcu-sched grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu-sched
+ * grace period has elapsed, in other words after all currently executing
+ * rcu-sched read-side critical sections have completed.   These read-side
+ * critical sections are delimited by rcu_read_lock_sched() and
+ * rcu_read_unlock_sched(), and may be nested.  Note that preempt_disable(),
+ * local_irq_disable(), and so on may be used in place of
+ * rcu_read_lock_sched().
+ *
+ * This means that all preempt_disable code sequences, including NMI and
+ * hardware-interrupt handlers, in progress on entry will have completed
+ * before this primitive returns.  However, this does not guarantee that
+ * softirq handlers will have completed, since in some kernels, these
+ * handlers can run in process context, and can block.
+ *
+ * This primitive provides the guarantees made by the (now removed)
+ * synchronize_kernel() API.  In contrast, synchronize_rcu() only
+ * guarantees that rcu_read_lock() sections will have completed.
+ * In "classic RCU", these two guarantees happen to be one and
+ * the same, but can differ in realtime RCU implementations.
+ */
+void synchronize_sched(void)
+{
+	struct rcu_synchronize rcu;
+
+	if (rcu_blocking_is_gp())
+		return;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu_sched(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(synchronize_sched);
+
 /**
  * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
  *
-- 
cgit v1.2.3


From e5e25cf47b0bdd1f7e9b8bb6368ee48e16de0c87 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 18 Sep 2009 00:54:43 +0200
Subject: tracing: Factorize the events profile accounting

Factorize the events enabling accounting in a common tracing core
helper. This reduces the size of the profile_enable() and
profile_disable() callbacks for each trace events.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  4 ++--
 include/linux/syscalls.h           | 24 ++++++++----------------
 include/trace/ftrace.h             | 28 ++++++++--------------------
 kernel/trace/trace_event_profile.c | 20 ++++++++++++++++++--
 4 files changed, 36 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index bd099ba82ccc..bc103d7b1ca8 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -130,8 +130,8 @@ struct ftrace_event_call {
 	void			*data;
 
 	atomic_t		profile_count;
-	int			(*profile_enable)(struct ftrace_event_call *);
-	void			(*profile_disable)(struct ftrace_event_call *);
+	int			(*profile_enable)(void);
+	void			(*profile_disable)(void);
 };
 
 #define MAX_FILTER_PRED		32
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a8e37821cc60..7d9803cbb20f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -100,33 +100,25 @@ struct perf_counter_attr;
 
 #ifdef CONFIG_EVENT_PROFILE
 #define TRACE_SYS_ENTER_PROFILE(sname)					       \
-static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call)  \
+static int prof_sysenter_enable_##sname(void)				       \
 {									       \
-	int ret = 0;							       \
-	if (!atomic_inc_return(&event_enter_##sname.profile_count))	       \
-		ret = reg_prof_syscall_enter("sys"#sname);		       \
-	return ret;							       \
+	return reg_prof_syscall_enter("sys"#sname);			       \
 }									       \
 									       \
-static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\
+static void prof_sysenter_disable_##sname(void)				       \
 {									       \
-	if (atomic_add_negative(-1, &event_enter_##sname.profile_count))       \
-		unreg_prof_syscall_enter("sys"#sname);			       \
+	unreg_prof_syscall_enter("sys"#sname);				       \
 }
 
 #define TRACE_SYS_EXIT_PROFILE(sname)					       \
-static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call)   \
+static int prof_sysexit_enable_##sname(void)				       \
 {									       \
-	int ret = 0;							       \
-	if (!atomic_inc_return(&event_exit_##sname.profile_count))	       \
-		ret = reg_prof_syscall_exit("sys"#sname);		       \
-	return ret;							       \
+	return reg_prof_syscall_exit("sys"#sname);			       \
 }									       \
 									       \
-static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
+static void prof_sysexit_disable_##sname(void)				       \
 {                                                                              \
-	if (atomic_add_negative(-1, &event_exit_##sname.profile_count))	       \
-		unreg_prof_syscall_exit("sys"#sname);			       \
+	unreg_prof_syscall_exit("sys"#sname);				       \
 }
 
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 72a3b437b829..a822087857e9 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -382,20 +382,14 @@ static inline int ftrace_get_offsets_##call(				\
  *
  * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
  *
- * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call)
+ * static int ftrace_profile_enable_<call>(void)
  * {
- * 	int ret = 0;
- *
- * 	if (!atomic_inc_return(&event_call->profile_count))
- * 		ret = register_trace_<call>(ftrace_profile_<call>);
- *
- * 	return ret;
+ * 	return register_trace_<call>(ftrace_profile_<call>);
  * }
  *
- * static void ftrace_profile_disable_<call>(struct ftrace_event_call *event_call)
+ * static void ftrace_profile_disable_<call>(void)
  * {
- * 	if (atomic_add_negative(-1, &event->call->profile_count))
- * 		unregister_trace_<call>(ftrace_profile_<call>);
+ * 	unregister_trace_<call>(ftrace_profile_<call>);
  * }
  *
  */
@@ -405,20 +399,14 @@ static inline int ftrace_get_offsets_##call(				\
 									\
 static void ftrace_profile_##call(proto);				\
 									\
-static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
+static int ftrace_profile_enable_##call(void)				\
 {									\
-	int ret = 0;							\
-									\
-	if (!atomic_inc_return(&event_call->profile_count))		\
-		ret = register_trace_##call(ftrace_profile_##call);	\
-									\
-	return ret;							\
+	return register_trace_##call(ftrace_profile_##call);		\
 }									\
 									\
-static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
+static void ftrace_profile_disable_##call(void)				\
 {									\
-	if (atomic_add_negative(-1, &event_call->profile_count))	\
-		unregister_trace_##call(ftrace_profile_##call);		\
+	unregister_trace_##call(ftrace_profile_##call);			\
 }
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 55a25c933d15..df4a74efd50c 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,6 +8,14 @@
 #include <linux/module.h>
 #include "trace.h"
 
+static int ftrace_profile_enable_event(struct ftrace_event_call *event)
+{
+	if (atomic_inc_return(&event->profile_count))
+		return 0;
+
+	return event->profile_enable();
+}
+
 int ftrace_profile_enable(int event_id)
 {
 	struct ftrace_event_call *event;
@@ -17,7 +25,7 @@ int ftrace_profile_enable(int event_id)
 	list_for_each_entry(event, &ftrace_events, list) {
 		if (event->id == event_id && event->profile_enable &&
 		    try_module_get(event->mod)) {
-			ret = event->profile_enable(event);
+			ret = ftrace_profile_enable_event(event);
 			break;
 		}
 	}
@@ -26,6 +34,14 @@ int ftrace_profile_enable(int event_id)
 	return ret;
 }
 
+static void ftrace_profile_disable_event(struct ftrace_event_call *event)
+{
+	if (!atomic_add_negative(-1, &event->profile_count))
+		return;
+
+	event->profile_disable();
+}
+
 void ftrace_profile_disable(int event_id)
 {
 	struct ftrace_event_call *event;
@@ -33,7 +49,7 @@ void ftrace_profile_disable(int event_id)
 	mutex_lock(&event_mutex);
 	list_for_each_entry(event, &ftrace_events, list) {
 		if (event->id == event_id) {
-			event->profile_disable(event);
+			ftrace_profile_disable_event(event);
 			module_put(event->mod);
 			break;
 		}
-- 
cgit v1.2.3


From 20ab4425a77a1f34028cc6ce57053c22c184ba5f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 18 Sep 2009 06:10:28 +0200
Subject: tracing: Allocate the ftrace event profile buffer dynamically

Currently the trace event profile buffer is allocated in the stack. But
this may be too much for the stack, as the events can have large
statically defined field size and can also grow with dynamic arrays.

Allocate two per cpu buffer for all profiled events. The first cpu
buffer is used to host every non-nmi context traces. It is protected
by disabling the interrupts while writing and committing the trace.

The second buffer is reserved for nmi. So that there is no race between
them and the first buffer.

The whole write/commit section is rcu protected because we release
these buffers while deactivating the last profiling trace event.

v2: Move the buffers from trace_event to be global, as pointed by
    Steven Rostedt.

v3: Fix the syscall events to handle the profiling buffer races
    by disabling interrupts, now that the buffers are globals.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  6 +++
 include/trace/ftrace.h             | 83 +++++++++++++++++++++-----------
 kernel/trace/trace_event_profile.c | 61 +++++++++++++++++++++++-
 kernel/trace/trace_syscalls.c      | 97 ++++++++++++++++++++++++++++++--------
 4 files changed, 199 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index bc103d7b1ca8..4ec5e67e18cf 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -4,6 +4,7 @@
 #include <linux/ring_buffer.h>
 #include <linux/trace_seq.h>
 #include <linux/percpu.h>
+#include <linux/hardirq.h>
 
 struct trace_array;
 struct tracer;
@@ -134,6 +135,11 @@ struct ftrace_event_call {
 	void			(*profile_disable)(void);
 };
 
+#define FTRACE_MAX_PROFILE_SIZE	2048
+
+extern char			*trace_profile_buf;
+extern char			*trace_profile_buf_nmi;
+
 #define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a822087857e9..a0361cb69769 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -648,11 +648,12 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *	struct ftrace_raw_##call *entry;
  *	u64 __addr = 0, __count = 1;
  *	unsigned long irq_flags;
+ *	struct trace_entry *ent;
  *	int __entry_size;
  *	int __data_size;
+ *	int __cpu
  *	int pc;
  *
- *	local_save_flags(irq_flags);
  *	pc = preempt_count();
  *
  *	__data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
@@ -663,25 +664,34 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *			     sizeof(u64));
  *	__entry_size -= sizeof(u32);
  *
- *	do {
- *		char raw_data[__entry_size]; <- allocate our sample in the stack
- *		struct trace_entry *ent;
+ *	// Protect the non nmi buffer
+ *	// This also protects the rcu read side
+ *	local_irq_save(irq_flags);
+ *	__cpu = smp_processor_id();
+ *
+ *	if (in_nmi())
+ *		raw_data = rcu_dereference(trace_profile_buf_nmi);
+ *	else
+ *		raw_data = rcu_dereference(trace_profile_buf);
+ *
+ *	if (!raw_data)
+ *		goto end;
  *
- *		zero dead bytes from alignment to avoid stack leak to userspace:
+ *	raw_data = per_cpu_ptr(raw_data, __cpu);
  *
- *		*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
- *		entry = (struct ftrace_raw_<call> *)raw_data;
- *		ent = &entry->ent;
- *		tracing_generic_entry_update(ent, irq_flags, pc);
- *		ent->type = event_call->id;
+ *	//zero dead bytes from alignment to avoid stack leak to userspace:
+ *	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
+ *	entry = (struct ftrace_raw_<call> *)raw_data;
+ *	ent = &entry->ent;
+ *	tracing_generic_entry_update(ent, irq_flags, pc);
+ *	ent->type = event_call->id;
  *
- *		<tstruct> <- do some jobs with dynamic arrays
+ *	<tstruct> <- do some jobs with dynamic arrays
  *
- *		<assign>  <- affect our values
+ *	<assign>  <- affect our values
  *
- *		perf_tpcounter_event(event_call->id, __addr, __count, entry,
- *			     __entry_size);  <- submit them to perf counter
- *	} while (0);
+ *	perf_tpcounter_event(event_call->id, __addr, __count, entry,
+ *		     __entry_size);  <- submit them to perf counter
  *
  * }
  */
@@ -704,11 +714,13 @@ static void ftrace_profile_##call(proto)				\
 	struct ftrace_raw_##call *entry;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
+	struct trace_entry *ent;					\
 	int __entry_size;						\
 	int __data_size;						\
+	char *raw_data;							\
+	int __cpu;							\
 	int pc;								\
 									\
-	local_save_flags(irq_flags);					\
 	pc = preempt_count();						\
 									\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
@@ -716,23 +728,38 @@ static void ftrace_profile_##call(proto)				\
 			     sizeof(u64));				\
 	__entry_size -= sizeof(u32);					\
 									\
-	do {								\
-		char raw_data[__entry_size];				\
-		struct trace_entry *ent;				\
+	if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE,		\
+		      "profile buffer not large enough"))		\
+		return;							\
+									\
+	local_irq_save(irq_flags);					\
+	__cpu = smp_processor_id();					\
 									\
-		*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;	\
-		entry = (struct ftrace_raw_##call *)raw_data;		\
-		ent = &entry->ent;					\
-		tracing_generic_entry_update(ent, irq_flags, pc);	\
-		ent->type = event_call->id;				\
+	if (in_nmi())							\
+		raw_data = rcu_dereference(trace_profile_buf_nmi);		\
+	else								\
+		raw_data = rcu_dereference(trace_profile_buf);		\
 									\
-		tstruct							\
+	if (!raw_data)							\
+		goto end;						\
 									\
-		{ assign; }						\
+	raw_data = per_cpu_ptr(raw_data, __cpu);			\
 									\
-		perf_tpcounter_event(event_call->id, __addr, __count, entry,\
+	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;		\
+	entry = (struct ftrace_raw_##call *)raw_data;			\
+	ent = &entry->ent;						\
+	tracing_generic_entry_update(ent, irq_flags, pc);		\
+	ent->type = event_call->id;					\
+									\
+	tstruct								\
+									\
+	{ assign; }							\
+									\
+	perf_tpcounter_event(event_call->id, __addr, __count, entry,	\
 			     __entry_size);				\
-	} while (0);							\
+									\
+end:									\
+	local_irq_restore(irq_flags);					\
 									\
 }
 
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index df4a74efd50c..3aaa77c3309b 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,12 +8,52 @@
 #include <linux/module.h>
 #include "trace.h"
 
+/*
+ * We can't use a size but a type in alloc_percpu()
+ * So let's create a dummy type that matches the desired size
+ */
+typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
+
+char		*trace_profile_buf;
+char 		*trace_profile_buf_nmi;
+
+/* Count the events in use (per event id, not per instance) */
+static int	total_profile_count;
+
 static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 {
+	char *buf;
+	int ret = -ENOMEM;
+
 	if (atomic_inc_return(&event->profile_count))
 		return 0;
 
-	return event->profile_enable();
+	if (!total_profile_count++) {
+		buf = (char *)alloc_percpu(profile_buf_t);
+		if (!buf)
+			goto fail_buf;
+
+		rcu_assign_pointer(trace_profile_buf, buf);
+
+		buf = (char *)alloc_percpu(profile_buf_t);
+		if (!buf)
+			goto fail_buf_nmi;
+
+		rcu_assign_pointer(trace_profile_buf_nmi, buf);
+	}
+
+	ret = event->profile_enable();
+	if (!ret)
+		return 0;
+
+	kfree(trace_profile_buf_nmi);
+fail_buf_nmi:
+	kfree(trace_profile_buf);
+fail_buf:
+	total_profile_count--;
+	atomic_dec(&event->profile_count);
+
+	return ret;
 }
 
 int ftrace_profile_enable(int event_id)
@@ -36,10 +76,29 @@ int ftrace_profile_enable(int event_id)
 
 static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 {
+	char *buf, *nmi_buf;
+
 	if (!atomic_add_negative(-1, &event->profile_count))
 		return;
 
 	event->profile_disable();
+
+	if (!--total_profile_count) {
+		buf = trace_profile_buf;
+		rcu_assign_pointer(trace_profile_buf, NULL);
+
+		nmi_buf = trace_profile_buf_nmi;
+		rcu_assign_pointer(trace_profile_buf_nmi, NULL);
+
+		/*
+		 * Ensure every events in profiling have finished before
+		 * releasing the buffers
+		 */
+		synchronize_sched();
+
+		free_percpu(buf);
+		free_percpu(nmi_buf);
+	}
 }
 
 void ftrace_profile_disable(int event_id)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8712ce3c6a0e..7a3550cf2597 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -384,10 +384,13 @@ static int sys_prof_refcount_exit;
 
 static void prof_syscall_enter(struct pt_regs *regs, long id)
 {
-	struct syscall_trace_enter *rec;
 	struct syscall_metadata *sys_data;
+	struct syscall_trace_enter *rec;
+	unsigned long flags;
+	char *raw_data;
 	int syscall_nr;
 	int size;
+	int cpu;
 
 	syscall_nr = syscall_get_nr(current, regs);
 	if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -402,20 +405,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	size = ALIGN(size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
 
-	do {
-		char raw_data[size];
+	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+		      "profile buffer not large enough"))
+		return;
+
+	/* Protect the per cpu buffer, begin the rcu read side */
+	local_irq_save(flags);
 
-		/* zero the dead bytes from align to not leak stack to user */
-		*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+	cpu = smp_processor_id();
+
+	if (in_nmi())
+		raw_data = rcu_dereference(trace_profile_buf_nmi);
+	else
+		raw_data = rcu_dereference(trace_profile_buf);
+
+	if (!raw_data)
+		goto end;
 
-		rec = (struct syscall_trace_enter *) raw_data;
-		tracing_generic_entry_update(&rec->ent, 0, 0);
-		rec->ent.type = sys_data->enter_id;
-		rec->nr = syscall_nr;
-		syscall_get_arguments(current, regs, 0, sys_data->nb_args,
-				       (unsigned long *)&rec->args);
-		perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
-	} while(0);
+	raw_data = per_cpu_ptr(raw_data, cpu);
+
+	/* zero the dead bytes from align to not leak stack to user */
+	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+	rec = (struct syscall_trace_enter *) raw_data;
+	tracing_generic_entry_update(&rec->ent, 0, 0);
+	rec->ent.type = sys_data->enter_id;
+	rec->nr = syscall_nr;
+	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
+			       (unsigned long *)&rec->args);
+	perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
+
+end:
+	local_irq_restore(flags);
 }
 
 int reg_prof_syscall_enter(char *name)
@@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name)
 static void prof_syscall_exit(struct pt_regs *regs, long ret)
 {
 	struct syscall_metadata *sys_data;
-	struct syscall_trace_exit rec;
+	struct syscall_trace_exit *rec;
+	unsigned long flags;
 	int syscall_nr;
+	char *raw_data;
+	int size;
+	int cpu;
 
 	syscall_nr = syscall_get_nr(current, regs);
 	if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	if (!sys_data)
 		return;
 
-	tracing_generic_entry_update(&rec.ent, 0, 0);
-	rec.ent.type = sys_data->exit_id;
-	rec.nr = syscall_nr;
-	rec.ret = syscall_get_return_value(current, regs);
+	/* We can probably do that at build time */
+	size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
+	size -= sizeof(u32);
 
-	perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
+	/*
+	 * Impossible, but be paranoid with the future
+	 * How to put this check outside runtime?
+	 */
+	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+		"exit event has grown above profile buffer size"))
+		return;
+
+	/* Protect the per cpu buffer, begin the rcu read side */
+	local_irq_save(flags);
+	cpu = smp_processor_id();
+
+	if (in_nmi())
+		raw_data = rcu_dereference(trace_profile_buf_nmi);
+	else
+		raw_data = rcu_dereference(trace_profile_buf);
+
+	if (!raw_data)
+		goto end;
+
+	raw_data = per_cpu_ptr(raw_data, cpu);
+
+	/* zero the dead bytes from align to not leak stack to user */
+	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+	rec = (struct syscall_trace_exit *)raw_data;
+
+	tracing_generic_entry_update(&rec->ent, 0, 0);
+	rec->ent.type = sys_data->exit_id;
+	rec->nr = syscall_nr;
+	rec->ret = syscall_get_return_value(current, regs);
+
+	perf_tpcounter_event(sys_data->exit_id, 0, 1, rec, size);
+
+end:
+	local_irq_restore(flags);
 }
 
 int reg_prof_syscall_exit(char *name)
-- 
cgit v1.2.3


From 733289c2656c556d5cf36eafa1c8ec77222c359f Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Wed, 16 Sep 2009 15:24:21 +0200
Subject: drm/radeon/kms: don't fail if we fail to init GPU acceleration

Userspace can query if acceleration is working or not true get
info ioctl and could fallback to software if for some reason
kernel failed to initialize KMS. This should allow to give a
working KMS setup in all case (even with non functionning accel).

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r420.c          |  2 ++
 drivers/gpu/drm/radeon/r600.c          | 33 +++++++++++++------------
 drivers/gpu/drm/radeon/radeon.h        |  1 +
 drivers/gpu/drm/radeon/radeon_device.c | 44 ++++++++++++++--------------------
 drivers/gpu/drm/radeon/radeon_kms.c    |  3 +++
 drivers/gpu/drm/radeon/rv770.c         | 33 +++++++++++++------------
 include/drm/radeon_drm.h               |  1 +
 7 files changed, 61 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 33a25a4377b8..2142a4781970 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -329,6 +329,7 @@ int r420_init(struct radeon_device *rdev)
 			return r;
 	}
 	r300_set_reg_safe(rdev);
+	rdev->accel_working = true;
 	r = r420_resume(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
@@ -343,6 +344,7 @@ int r420_init(struct radeon_device *rdev)
 			r100_pci_gart_fini(rdev);
 		radeon_agp_fini(rdev);
 		radeon_irq_kms_fini(rdev);
+		rdev->accel_working = false;
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 65699e9f2025..af430d719e7f 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1573,6 +1573,7 @@ int r600_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	rdev->accel_working = true;
 	r = r600_resume(rdev);
 	if (r) {
 		if (rdev->flags & RADEON_IS_AGP) {
@@ -1581,22 +1582,24 @@ int r600_init(struct radeon_device *rdev)
 			rdev->flags &= ~RADEON_IS_AGP;
 			return r600_init(rdev);
 		}
-		return r;
-	}
-	r = radeon_ib_pool_init(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
-		return r;
+		rdev->accel_working = false;
 	}
-	r = r600_blit_init(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failled blitter (%d).\n", r);
-		return r;
-	}
-	r = radeon_ib_test(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
-			return r;
+	if (rdev->accel_working) {
+		r = radeon_ib_pool_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+			rdev->accel_working = false;
+		}
+		r = r600_blit_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled blitter (%d).\n", r);
+			rdev->accel_working = false;
+		}
+		r = radeon_ib_test(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+			rdev->accel_working = false;
+		}
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5bfc05612ac2..d7c4efd08928 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -793,6 +793,7 @@ struct radeon_device {
 	bool				suspend;
 	bool				need_dma32;
 	bool				new_init_path;
+	bool				accel_working;
 	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
 	const struct firmware *me_fw;	/* all family ME firmware */
 	const struct firmware *pfp_fw;	/* r6/700 PFP firmware */
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index db5ae73d6289..0b5014c2ae7f 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -504,6 +504,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	rdev->usec_timeout = RADEON_MAX_USEC_TIMEOUT;
 	rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
 	rdev->gpu_lockup = false;
+	rdev->accel_working = false;
 	/* mutex initialization are all done here so we
 	 * can recall function without having locking issues */
 	mutex_init(&rdev->cs_mutex);
@@ -649,35 +650,26 @@ int radeon_device_init(struct radeon_device *rdev,
 		/* Initialize GART (initialize after TTM so we can allocate
 		 * memory through TTM but finalize after TTM) */
 		r = radeon_gart_enable(rdev);
-		if (!r) {
+		if (r)
+			return 0;
 			r = radeon_gem_init(rdev);
-		}
+		if (r)
+			return 0;
 
 		/* 1M ring buffer */
-		if (!r) {
-			r = radeon_cp_init(rdev, 1024 * 1024);
-		}
-		if (!r) {
-			r = radeon_wb_init(rdev);
-			if (r) {
-				DRM_ERROR("radeon: failled initializing WB (%d).\n", r);
-				return r;
-			}
-		}
-		if (!r) {
-			r = radeon_ib_pool_init(rdev);
-			if (r) {
-				DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
-				return r;
-			}
-		}
-		if (!r) {
-			r = radeon_ib_test(rdev);
-			if (r) {
-				DRM_ERROR("radeon: failled testing IB (%d).\n", r);
-				return r;
-			}
-		}
+		r = radeon_cp_init(rdev, 1024 * 1024);
+		if (r)
+			return 0;
+		r = radeon_wb_init(rdev);
+		if (r)
+			DRM_ERROR("radeon: failled initializing WB (%d).\n", r);
+		r = radeon_ib_pool_init(rdev);
+		if (r)
+			return 0;
+		r = radeon_ib_test(rdev);
+		if (r)
+			return 0;
+		rdev->accel_working = true;
 	}
 	DRM_INFO("radeon: kernel modesetting successfully initialized.\n");
 	if (radeon_testing) {
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index ac8505fe2ca7..709bd892b3a9 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -112,6 +112,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	case RADEON_INFO_NUM_Z_PIPES:
 		value = rdev->num_z_pipes;
 		break;
+	case RADEON_INFO_ACCEL_WORKING:
+		value = rdev->accel_working;
+		break;
 	default:
 		DRM_DEBUG("Invalid request %d\n", info->request);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 4f2098bc7974..be2f86539ebc 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -953,6 +953,7 @@ int rv770_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	rdev->accel_working = true;
 	r = rv770_resume(rdev);
 	if (r) {
 		if (rdev->flags & RADEON_IS_AGP) {
@@ -961,22 +962,24 @@ int rv770_init(struct radeon_device *rdev)
 			rdev->flags &= ~RADEON_IS_AGP;
 			return rv770_init(rdev);
 		}
-		return r;
-	}
-	r = r600_blit_init(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failled blitter (%d).\n", r);
-		return r;
+		rdev->accel_working = false;
 	}
-	r = radeon_ib_pool_init(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
-		return r;
-	}
-	r = radeon_ib_test(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
-			return r;
+	if (rdev->accel_working) {
+		r = r600_blit_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled blitter (%d).\n", r);
+			rdev->accel_working = false;
+		}
+		r = radeon_ib_pool_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+			rdev->accel_working = false;
+		}
+		r = radeon_ib_test(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+			rdev->accel_working = false;
+		}
 	}
 	return 0;
 }
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index 2ba61e18fc8b..b67bbd75bd20 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -899,6 +899,7 @@ struct drm_radeon_cs {
 #define RADEON_INFO_DEVICE_ID		0x00
 #define RADEON_INFO_NUM_GB_PIPES	0x01
 #define RADEON_INFO_NUM_Z_PIPES 	0x02
+#define RADEON_INFO_ACCEL_WORKING	0x03
 
 struct drm_radeon_info {
 	uint32_t		request;
-- 
cgit v1.2.3


From c88f9f0c91de55efaece6d9bd9ec920b90244776 Mon Sep 17 00:00:00 2001
From: Michel Dänzer <daenzer@vmware.com>
Date: Tue, 15 Sep 2009 17:09:30 +0200
Subject: drm/radeon/kms: Use surfaces for scanout / cursor byte swapping on
 big endian.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michel Dänzer <daenzer@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r100.c          |   5 ++
 drivers/gpu/drm/radeon/radeon_fb.c     | 121 +++++----------------------------
 drivers/gpu/drm/radeon/radeon_object.c |   2 +
 include/drm/radeon_drm.h               |  11 +--
 4 files changed, 31 insertions(+), 108 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index fa0fdc1e3457..737970b43aef 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2235,6 +2235,11 @@ int r100_set_surface_reg(struct radeon_device *rdev, int reg,
 			flags |= R300_SURF_TILE_MICRO;
 	}
 
+	if (tiling_flags & RADEON_TILING_SWAP_16BIT)
+		flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
+	if (tiling_flags & RADEON_TILING_SWAP_32BIT)
+		flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
+
 	DRM_DEBUG("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
 	WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
 	WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 19e244a512ba..944e4fa78db5 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -45,71 +45,9 @@ struct radeon_fb_device {
 	struct radeon_device		*rdev;
 };
 
-static int radeon_fb_check_var(struct fb_var_screeninfo *var,
-			       struct fb_info *info)
-{
-	int ret;
-	ret = drm_fb_helper_check_var(var, info);
-	if (ret)
-		return ret;
-
-	/* big endian override for radeon endian workaround */
-#ifdef __BIG_ENDIAN
-	{
-		int depth;
-		switch (var->bits_per_pixel) {
-		case 16:
-			depth = (var->green.length == 6) ? 16 : 15;
-			break;
-		case 32:
-			depth = (var->transp.length > 0) ? 32 : 24;
-			break;
-		default:
-			depth = var->bits_per_pixel;
-			break;
-		}
-		switch (depth) {
-		case 8:
-			var->red.offset = 0;
-			var->green.offset = 0;
-			var->blue.offset = 0;
-			var->red.length = 8;
-			var->green.length = 8;
-			var->blue.length = 8;
-			var->transp.length = 0;
-			var->transp.offset = 0;
-			break;
-		case 24:
-			var->red.offset = 8;
-			var->green.offset = 16;
-			var->blue.offset = 24;
-			var->red.length = 8;
-			var->green.length = 8;
-			var->blue.length = 8;
-			var->transp.length = 0;
-			var->transp.offset = 0;
-			break;
-		case 32:
-			var->red.offset = 8;
-			var->green.offset = 16;
-			var->blue.offset = 24;
-			var->red.length = 8;
-			var->green.length = 8;
-			var->blue.length = 8;
-			var->transp.length = 8;
-			var->transp.offset = 0;
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
-#endif
-	return 0;
-}
-
 static struct fb_ops radeonfb_ops = {
 	.owner = THIS_MODULE,
-	.fb_check_var = radeon_fb_check_var,
+	.fb_check_var = drm_fb_helper_check_var,
 	.fb_set_par = drm_fb_helper_set_par,
 	.fb_setcolreg = drm_fb_helper_setcolreg,
 	.fb_fillrect = cfb_fillrect,
@@ -206,6 +144,7 @@ int radeonfb_create(struct drm_device *dev,
 	void *fbptr = NULL;
 	unsigned long tmp;
 	bool fb_tiled = false; /* useful for testing */
+	u32 tiling_flags = 0;
 
 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
@@ -230,7 +169,22 @@ int radeonfb_create(struct drm_device *dev,
 	robj = gobj->driver_private;
 
 	if (fb_tiled)
-		radeon_object_set_tiling_flags(robj, RADEON_TILING_MACRO|RADEON_TILING_SURFACE, mode_cmd.pitch);
+		tiling_flags = RADEON_TILING_MACRO;
+
+#ifdef __BIG_ENDIAN
+	switch (mode_cmd.bpp) {
+	case 32:
+		tiling_flags |= RADEON_TILING_SWAP_32BIT;
+		break;
+	case 16:
+		tiling_flags |= RADEON_TILING_SWAP_16BIT;
+	default:
+		break;
+	}
+#endif
+
+	if (tiling_flags)
+		radeon_object_set_tiling_flags(robj, tiling_flags | RADEON_TILING_SURFACE, mode_cmd.pitch);
 	mutex_lock(&rdev->ddev->struct_mutex);
 	fb = radeon_framebuffer_create(rdev->ddev, &mode_cmd, gobj);
 	if (fb == NULL) {
@@ -313,45 +267,6 @@ int radeonfb_create(struct drm_device *dev,
 	DRM_INFO("fb depth is %d\n", fb->depth);
 	DRM_INFO("   pitch is %d\n", fb->pitch);
 
-#ifdef __BIG_ENDIAN
-	/* fill var sets defaults for this stuff - override
-	   on big endian */
-	switch (fb->depth) {
-	case 8:
-		info->var.red.offset = 0;
-		info->var.green.offset = 0;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8; /* 8bit DAC */
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 24:
-		info->var.red.offset = 8;
-		info->var.green.offset = 16;
-		info->var.blue.offset = 24;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 32:
-		info->var.red.offset = 8;
-		info->var.green.offset = 16;
-		info->var.blue.offset = 24;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 8;
-		break;
-	default:
-		break;
-	}
-#endif
-
 	fb->fbdev = info;
 	rfbdev->rfb = rfb;
 	rfbdev->rdev = rdev;
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 1500d5bc7af5..73af463b7a59 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -188,6 +188,7 @@ int radeon_object_kmap(struct radeon_object *robj, void **ptr)
 	if (ptr) {
 		*ptr = robj->kptr;
 	}
+	radeon_object_check_tiling(robj, 0, 0);
 	return 0;
 }
 
@@ -200,6 +201,7 @@ void radeon_object_kunmap(struct radeon_object *robj)
 	}
 	robj->kptr = NULL;
 	spin_unlock(&robj->tobj.lock);
+	radeon_object_check_tiling(robj, 0, 0);
 	ttm_bo_kunmap(&robj->kmap);
 }
 
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index b67bbd75bd20..3b9932ab1756 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -802,11 +802,12 @@ struct drm_radeon_gem_create {
 	uint32_t	flags;
 };
 
-#define RADEON_TILING_MACRO 0x1
-#define RADEON_TILING_MICRO 0x2
-#define RADEON_TILING_SWAP  0x4
-#define RADEON_TILING_SURFACE  0x8 /* this object requires a surface
-				    * when mapped - i.e. front buffer */
+#define RADEON_TILING_MACRO       0x1
+#define RADEON_TILING_MICRO       0x2
+#define RADEON_TILING_SWAP_16BIT  0x4
+#define RADEON_TILING_SWAP_32BIT  0x8
+#define RADEON_TILING_SURFACE     0x10 /* this object requires a surface
+					* when mapped - i.e. front buffer */
 
 struct drm_radeon_gem_set_tiling {
 	uint32_t	handle;
-- 
cgit v1.2.3


From 181d683d752c432635eda0f182ee71548c1f1820 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 16 Sep 2009 01:06:43 -0700
Subject: Input: libps2 - additional locking for i8042 ports

The serio ports on i8042 are not completely isolated; while we provide
enough locking to ensure proper serialization when accessing control
and data registers AUX and KBD ports can still have an effect on each
other on PS/2 protocol level. The most prominent effect is that
issuing a command for the device connected to one port may cause
abort of the command currently executing by the device connected to
another port.

Since i8042 nor serio subsystem are not aware of the details of the
PS/2 protocol (length of the commands and their replies and so on) the
locking should be done on libps2 level by adding special handling when
we see that we are dealing with serio port on i8042.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/mouse/sentelic.c  | 18 ++++++++++--------
 drivers/input/serio/i8042.c     | 41 +++++++++++++++++++++++++++++++++++++++++
 drivers/input/serio/libps2.c    | 28 ++++++++++++++++++++++++----
 drivers/leds/leds-clevo-mail.c  |  8 ++++++++
 drivers/platform/x86/acer-wmi.c |  2 ++
 include/linux/i8042.h           | 30 ++++++++++++++++++++++++++++++
 include/linux/libps2.h          |  2 ++
 7 files changed, 117 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c
index 84e2fc04d11b..f84cbd97c884 100644
--- a/drivers/input/mouse/sentelic.c
+++ b/drivers/input/mouse/sentelic.c
@@ -92,7 +92,8 @@ static int fsp_reg_read(struct psmouse *psmouse, int reg_addr, int *reg_val)
 	 */
 	ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE);
 	psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
-	mutex_lock(&ps2dev->cmd_mutex);
+
+	ps2_begin_command(ps2dev);
 
 	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
 		goto out;
@@ -126,7 +127,7 @@ static int fsp_reg_read(struct psmouse *psmouse, int reg_addr, int *reg_val)
 	rc = 0;
 
  out:
-	mutex_unlock(&ps2dev->cmd_mutex);
+	ps2_end_command(ps2dev);
 	ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE);
 	psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
 	dev_dbg(&ps2dev->serio->dev, "READ REG: 0x%02x is 0x%02x (rc = %d)\n",
@@ -140,7 +141,7 @@ static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val)
 	unsigned char v;
 	int rc = -1;
 
-	mutex_lock(&ps2dev->cmd_mutex);
+	ps2_begin_command(ps2dev);
 
 	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
 		goto out;
@@ -179,7 +180,7 @@ static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val)
 	rc = 0;
 
  out:
-	mutex_unlock(&ps2dev->cmd_mutex);
+	ps2_end_command(ps2dev);
 	dev_dbg(&ps2dev->serio->dev, "WRITE REG: 0x%02x to 0x%02x (rc = %d)\n",
 		reg_addr, reg_val, rc);
 	return rc;
@@ -214,7 +215,8 @@ static int fsp_page_reg_read(struct psmouse *psmouse, int *reg_val)
 
 	ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE);
 	psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
-	mutex_lock(&ps2dev->cmd_mutex);
+
+	ps2_begin_command(ps2dev);
 
 	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
 		goto out;
@@ -236,7 +238,7 @@ static int fsp_page_reg_read(struct psmouse *psmouse, int *reg_val)
 	rc = 0;
 
  out:
-	mutex_unlock(&ps2dev->cmd_mutex);
+	ps2_end_command(ps2dev);
 	ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE);
 	psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
 	dev_dbg(&ps2dev->serio->dev, "READ PAGE REG: 0x%02x (rc = %d)\n",
@@ -250,7 +252,7 @@ static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val)
 	unsigned char v;
 	int rc = -1;
 
-	mutex_lock(&ps2dev->cmd_mutex);
+	ps2_begin_command(ps2dev);
 
 	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
 		goto out;
@@ -275,7 +277,7 @@ static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val)
 	rc = 0;
 
  out:
-	mutex_unlock(&ps2dev->cmd_mutex);
+	ps2_end_command(ps2dev);
 	dev_dbg(&ps2dev->serio->dev, "WRITE PAGE REG: to 0x%02x (rc = %d)\n",
 		reg_val, rc);
 	return rc;
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index eb3ff94af58c..bc56e52b945f 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -87,8 +87,22 @@ static bool i8042_bypass_aux_irq_test;
 
 #include "i8042.h"
 
+/*
+ * i8042_lock protects serialization between i8042_command and
+ * the interrupt handler.
+ */
 static DEFINE_SPINLOCK(i8042_lock);
 
+/*
+ * Writers to AUX and KBD ports as well as users issuing i8042_command
+ * directly should acquire i8042_mutex (by means of calling
+ * i8042_lock_chip() and i8042_unlock_ship() helpers) to ensure that
+ * they do not disturb each other (unfortunately in many i8042
+ * implementations write to one of the ports will immediately abort
+ * command that is being processed by another port).
+ */
+static DEFINE_MUTEX(i8042_mutex);
+
 struct i8042_port {
 	struct serio *serio;
 	int irq;
@@ -113,6 +127,18 @@ static struct platform_device *i8042_platform_device;
 
 static irqreturn_t i8042_interrupt(int irq, void *dev_id);
 
+void i8042_lock_chip(void)
+{
+	mutex_lock(&i8042_mutex);
+}
+EXPORT_SYMBOL(i8042_lock_chip);
+
+void i8042_unlock_chip(void)
+{
+	mutex_unlock(&i8042_mutex);
+}
+EXPORT_SYMBOL(i8042_unlock_chip);
+
 /*
  * The i8042_wait_read() and i8042_wait_write functions wait for the i8042 to
  * be ready for reading values from it / writing values to it.
@@ -1161,6 +1187,21 @@ static void __devexit i8042_unregister_ports(void)
 	}
 }
 
+/*
+ * Checks whether port belongs to i8042 controller.
+ */
+bool i8042_check_port_owner(const struct serio *port)
+{
+	int i;
+
+	for (i = 0; i < I8042_NUM_PORTS; i++)
+		if (i8042_ports[i].serio == port)
+			return true;
+
+	return false;
+}
+EXPORT_SYMBOL(i8042_check_port_owner);
+
 static void i8042_free_irqs(void)
 {
 	if (i8042_aux_irq_registered)
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index 3a95b508bf27..769ba65a585a 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/input.h>
 #include <linux/serio.h>
+#include <linux/i8042.h>
 #include <linux/init.h>
 #include <linux/libps2.h>
 
@@ -54,6 +55,24 @@ int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout)
 }
 EXPORT_SYMBOL(ps2_sendbyte);
 
+void ps2_begin_command(struct ps2dev *ps2dev)
+{
+	mutex_lock(&ps2dev->cmd_mutex);
+
+	if (i8042_check_port_owner(ps2dev->serio))
+		i8042_lock_chip();
+}
+EXPORT_SYMBOL(ps2_begin_command);
+
+void ps2_end_command(struct ps2dev *ps2dev)
+{
+	if (i8042_check_port_owner(ps2dev->serio))
+		i8042_unlock_chip();
+
+	mutex_unlock(&ps2dev->cmd_mutex);
+}
+EXPORT_SYMBOL(ps2_end_command);
+
 /*
  * ps2_drain() waits for device to transmit requested number of bytes
  * and discards them.
@@ -66,7 +85,7 @@ void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout)
 		maxbytes = sizeof(ps2dev->cmdbuf);
 	}
 
-	mutex_lock(&ps2dev->cmd_mutex);
+	ps2_begin_command(ps2dev);
 
 	serio_pause_rx(ps2dev->serio);
 	ps2dev->flags = PS2_FLAG_CMD;
@@ -76,7 +95,8 @@ void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout)
 	wait_event_timeout(ps2dev->wait,
 			   !(ps2dev->flags & PS2_FLAG_CMD),
 			   msecs_to_jiffies(timeout));
-	mutex_unlock(&ps2dev->cmd_mutex);
+
+	ps2_end_command(ps2dev);
 }
 EXPORT_SYMBOL(ps2_drain);
 
@@ -237,9 +257,9 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
 {
 	int rc;
 
-	mutex_lock(&ps2dev->cmd_mutex);
+	ps2_begin_command(ps2dev);
 	rc = __ps2_command(ps2dev, param, command);
-	mutex_unlock(&ps2dev->cmd_mutex);
+	ps2_end_command(ps2dev);
 
 	return rc;
 }
diff --git a/drivers/leds/leds-clevo-mail.c b/drivers/leds/leds-clevo-mail.c
index 1813c84ea5fc..f2242db54016 100644
--- a/drivers/leds/leds-clevo-mail.c
+++ b/drivers/leds/leds-clevo-mail.c
@@ -93,6 +93,8 @@ static struct dmi_system_id __initdata mail_led_whitelist[] = {
 static void clevo_mail_led_set(struct led_classdev *led_cdev,
 				enum led_brightness value)
 {
+	i8042_lock_chip();
+
 	if (value == LED_OFF)
 		i8042_command(NULL, CLEVO_MAIL_LED_OFF);
 	else if (value <= LED_HALF)
@@ -100,6 +102,8 @@ static void clevo_mail_led_set(struct led_classdev *led_cdev,
 	else
 		i8042_command(NULL, CLEVO_MAIL_LED_BLINK_1HZ);
 
+	i8042_unlock_chip();
+
 }
 
 static int clevo_mail_led_blink(struct led_classdev *led_cdev,
@@ -108,6 +112,8 @@ static int clevo_mail_led_blink(struct led_classdev *led_cdev,
 {
 	int status = -EINVAL;
 
+	i8042_lock_chip();
+
 	if (*delay_on == 0 /* ms */ && *delay_off == 0 /* ms */) {
 		/* Special case: the leds subsystem requested us to
 		 * chose one user friendly blinking of the LED, and
@@ -135,6 +141,8 @@ static int clevo_mail_led_blink(struct led_classdev *led_cdev,
 		       *delay_on, *delay_off);
 	}
 
+	i8042_unlock_chip();
+
 	return status;
 }
 
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index fb45f5ee8df1..454970d2d701 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -746,7 +746,9 @@ static acpi_status WMID_set_u32(u32 value, u32 cap, struct wmi_interface *iface)
 			return AE_BAD_PARAMETER;
 		if (quirks->mailled == 1) {
 			param = value ? 0x92 : 0x93;
+			i8042_lock_chip();
 			i8042_command(&param, 0x1059);
+			i8042_unlock_chip();
 			return 0;
 		}
 		break;
diff --git a/include/linux/i8042.h b/include/linux/i8042.h
index 7907a72403ee..60c3360ef6ad 100644
--- a/include/linux/i8042.h
+++ b/include/linux/i8042.h
@@ -7,6 +7,7 @@
  * the Free Software Foundation.
  */
 
+#include <linux/types.h>
 
 /*
  * Standard commands.
@@ -30,6 +31,35 @@
 #define I8042_CMD_MUX_PFX	0x0090
 #define I8042_CMD_MUX_SEND	0x1090
 
+struct serio;
+
+#if defined(CONFIG_SERIO_I8042) || defined(CONFIG_SERIO_I8042_MODULE)
+
+void i8042_lock_chip(void);
+void i8042_unlock_chip(void);
 int i8042_command(unsigned char *param, int command);
+bool i8042_check_port_owner(const struct serio *);
+
+#else
+
+void i8042_lock_chip(void)
+{
+}
+
+void i8042_unlock_chip(void)
+{
+}
+
+int i8042_command(unsigned char *param, int command)
+{
+	return -ENOSYS;
+}
+
+bool i8042_check_port_owner(const struct serio *serio)
+{
+	return false;
+}
+
+#endif
 
 #endif
diff --git a/include/linux/libps2.h b/include/linux/libps2.h
index fcf5fbe6a50c..79603a6c356f 100644
--- a/include/linux/libps2.h
+++ b/include/linux/libps2.h
@@ -44,6 +44,8 @@ struct ps2dev {
 void ps2_init(struct ps2dev *ps2dev, struct serio *serio);
 int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout);
 void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout);
+void ps2_begin_command(struct ps2dev *ps2dev);
+void ps2_end_command(struct ps2dev *ps2dev);
 int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command);
 int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command);
 int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data);
-- 
cgit v1.2.3


From ffd0db97196c1057f09c2ab42dd5b30e94e511d9 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 16 Sep 2009 01:06:43 -0700
Subject: Input: add generic suspend and resume for input devices

Automatically turn off leds and sound effects as part of suspend
process and restore led state, sounds and repeat rate at resume.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/input.h |  2 +-
 2 files changed, 64 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 7c237e6ac711..b8ed4294fccd 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/types.h>
 #include <linux/input.h>
 #include <linux/module.h>
 #include <linux/random.h>
@@ -514,7 +515,7 @@ static void input_disconnect_device(struct input_dev *dev)
 	 * that there are no threads in the middle of input_open_device()
 	 */
 	mutex_lock(&dev->mutex);
-	dev->going_away = 1;
+	dev->going_away = true;
 	mutex_unlock(&dev->mutex);
 
 	spin_lock_irq(&dev->event_lock);
@@ -1259,10 +1260,71 @@ static int input_dev_uevent(struct device *device, struct kobj_uevent_env *env)
 	return 0;
 }
 
+#define INPUT_DO_TOGGLE(dev, type, bits, on)			\
+	do {							\
+		int i;						\
+		if (!test_bit(EV_##type, dev->evbit))		\
+			break;					\
+		for (i = 0; i < type##_MAX; i++) {		\
+			if (!test_bit(i, dev->bits##bit) ||	\
+			    !test_bit(i, dev->bits))		\
+				continue;			\
+			dev->event(dev, EV_##type, i, on);	\
+		}						\
+	} while (0)
+
+static void input_dev_reset(struct input_dev *dev, bool activate)
+{
+	if (!dev->event)
+		return;
+
+	INPUT_DO_TOGGLE(dev, LED, led, activate);
+	INPUT_DO_TOGGLE(dev, SND, snd, activate);
+
+	if (activate && test_bit(EV_REP, dev->evbit)) {
+		dev->event(dev, EV_REP, REP_PERIOD, dev->rep[REP_PERIOD]);
+		dev->event(dev, EV_REP, REP_DELAY, dev->rep[REP_DELAY]);
+	}
+}
+
+#ifdef CONFIG_PM
+static int input_dev_suspend(struct device *dev)
+{
+	struct input_dev *input_dev = to_input_dev(dev);
+
+	mutex_lock(&input_dev->mutex);
+	input_dev_reset(input_dev, false);
+	mutex_unlock(&input_dev->mutex);
+
+	return 0;
+}
+
+static int input_dev_resume(struct device *dev)
+{
+	struct input_dev *input_dev = to_input_dev(dev);
+
+	mutex_lock(&input_dev->mutex);
+	input_dev_reset(input_dev, true);
+	mutex_unlock(&input_dev->mutex);
+
+	return 0;
+}
+
+static const struct dev_pm_ops input_dev_pm_ops = {
+	.suspend	= input_dev_suspend,
+	.resume		= input_dev_resume,
+	.poweroff	= input_dev_suspend,
+	.restore	= input_dev_resume,
+};
+#endif /* CONFIG_PM */
+
 static struct device_type input_dev_type = {
 	.groups		= input_dev_attr_groups,
 	.release	= input_dev_release,
 	.uevent		= input_dev_uevent,
+#ifdef CONFIG_PM
+	.pm		= &input_dev_pm_ops,
+#endif
 };
 
 static char *input_nodename(struct device *dev)
diff --git a/include/linux/input.h b/include/linux/input.h
index 8b3bc3e0d146..0ccfc30cd40f 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1123,7 +1123,7 @@ struct input_dev {
 	struct mutex mutex;
 
 	unsigned int users;
-	int going_away;
+	bool going_away;
 
 	struct device dev;
 
-- 
cgit v1.2.3


From 38e783b38148531c0840ac130b97eb8158f84b48 Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Thu, 17 Sep 2009 22:35:45 -0700
Subject: Input: add touchscreen driver for MELFAS MCS-5000 controller

The MELPAS MCS-5000 is the touchscreen controller. The overview of this
controller can see at the following website:

http://www.melfas.com/product/product01.asp?k_r=eng_

This driver is tested on s3c6410 NCP board and supports only the i2c
interface.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/Kconfig      |  11 ++
 drivers/input/touchscreen/Makefile     |   1 +
 drivers/input/touchscreen/mcs5000_ts.c | 318 +++++++++++++++++++++++++++++++++
 include/linux/i2c/mcs5000_ts.h         |  24 +++
 4 files changed, 354 insertions(+)
 create mode 100644 drivers/input/touchscreen/mcs5000_ts.c
 create mode 100644 include/linux/i2c/mcs5000_ts.h

(limited to 'include')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 5a252d449e55..6dfb0a2b0a7d 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -169,6 +169,17 @@ config TOUCHSCREEN_WACOM_W8001
 	  To compile this driver as a module, choose M here: the
 	  module will be called wacom_w8001.
 
+config TOUCHSCREEN_MCS5000
+	tristate "MELFAS MCS-5000 touchscreen"
+	depends on I2C
+	help
+	  Say Y here if you have the MELFAS MCS-5000 touchscreen controller
+	  chip in your system.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called mcs5000_ts.
 
 config TOUCHSCREEN_MTOUCH
 	tristate "MicroTouch serial touchscreens"
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 3e1c5e0b952f..a882ca16506b 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_TOUCHSCREEN_EETI)		+= eeti_ts.o
 obj-$(CONFIG_TOUCHSCREEN_ELO)		+= elo.o
 obj-$(CONFIG_TOUCHSCREEN_FUJITSU)	+= fujitsu_ts.o
 obj-$(CONFIG_TOUCHSCREEN_INEXIO)	+= inexio.o
+obj-$(CONFIG_TOUCHSCREEN_MCS5000)	+= mcs5000_ts.o
 obj-$(CONFIG_TOUCHSCREEN_MIGOR)		+= migor_ts.o
 obj-$(CONFIG_TOUCHSCREEN_MTOUCH)	+= mtouch.o
 obj-$(CONFIG_TOUCHSCREEN_MK712)		+= mk712.o
diff --git a/drivers/input/touchscreen/mcs5000_ts.c b/drivers/input/touchscreen/mcs5000_ts.c
new file mode 100644
index 000000000000..4c28b89757f9
--- /dev/null
+++ b/drivers/input/touchscreen/mcs5000_ts.c
@@ -0,0 +1,318 @@
+/*
+ * mcs5000_ts.c - Touchscreen driver for MELFAS MCS-5000 controller
+ *
+ * Copyright (C) 2009 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * Based on wm97xx-core.c
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/i2c/mcs5000_ts.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/irq.h>
+
+/* Registers */
+#define MCS5000_TS_STATUS		0x00
+#define STATUS_OFFSET			0
+#define STATUS_NO			(0 << STATUS_OFFSET)
+#define STATUS_INIT			(1 << STATUS_OFFSET)
+#define STATUS_SENSING			(2 << STATUS_OFFSET)
+#define STATUS_COORD			(3 << STATUS_OFFSET)
+#define STATUS_GESTURE			(4 << STATUS_OFFSET)
+#define ERROR_OFFSET			4
+#define ERROR_NO			(0 << ERROR_OFFSET)
+#define ERROR_POWER_ON_RESET		(1 << ERROR_OFFSET)
+#define ERROR_INT_RESET			(2 << ERROR_OFFSET)
+#define ERROR_EXT_RESET			(3 << ERROR_OFFSET)
+#define ERROR_INVALID_REG_ADDRESS	(8 << ERROR_OFFSET)
+#define ERROR_INVALID_REG_VALUE		(9 << ERROR_OFFSET)
+
+#define MCS5000_TS_OP_MODE		0x01
+#define RESET_OFFSET			0
+#define RESET_NO			(0 << RESET_OFFSET)
+#define RESET_EXT_SOFT			(1 << RESET_OFFSET)
+#define OP_MODE_OFFSET			1
+#define OP_MODE_SLEEP			(0 << OP_MODE_OFFSET)
+#define OP_MODE_ACTIVE			(1 << OP_MODE_OFFSET)
+#define GESTURE_OFFSET			4
+#define GESTURE_DISABLE			(0 << GESTURE_OFFSET)
+#define GESTURE_ENABLE			(1 << GESTURE_OFFSET)
+#define PROXIMITY_OFFSET		5
+#define PROXIMITY_DISABLE		(0 << PROXIMITY_OFFSET)
+#define PROXIMITY_ENABLE		(1 << PROXIMITY_OFFSET)
+#define SCAN_MODE_OFFSET		6
+#define SCAN_MODE_INTERRUPT		(0 << SCAN_MODE_OFFSET)
+#define SCAN_MODE_POLLING		(1 << SCAN_MODE_OFFSET)
+#define REPORT_RATE_OFFSET		7
+#define REPORT_RATE_40			(0 << REPORT_RATE_OFFSET)
+#define REPORT_RATE_80			(1 << REPORT_RATE_OFFSET)
+
+#define MCS5000_TS_SENS_CTL		0x02
+#define MCS5000_TS_FILTER_CTL		0x03
+#define PRI_FILTER_OFFSET		0
+#define SEC_FILTER_OFFSET		4
+
+#define MCS5000_TS_X_SIZE_UPPER		0x08
+#define MCS5000_TS_X_SIZE_LOWER		0x09
+#define MCS5000_TS_Y_SIZE_UPPER		0x0A
+#define MCS5000_TS_Y_SIZE_LOWER		0x0B
+
+#define MCS5000_TS_INPUT_INFO		0x10
+#define INPUT_TYPE_OFFSET		0
+#define INPUT_TYPE_NONTOUCH		(0 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_SINGLE		(1 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_DUAL			(2 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_PALM			(3 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_PROXIMITY		(7 << INPUT_TYPE_OFFSET)
+#define GESTURE_CODE_OFFSET		3
+#define GESTURE_CODE_NO			(0 << GESTURE_CODE_OFFSET)
+
+#define MCS5000_TS_X_POS_UPPER		0x11
+#define MCS5000_TS_X_POS_LOWER		0x12
+#define MCS5000_TS_Y_POS_UPPER		0x13
+#define MCS5000_TS_Y_POS_LOWER		0x14
+#define MCS5000_TS_Z_POS		0x15
+#define MCS5000_TS_WIDTH		0x16
+#define MCS5000_TS_GESTURE_VAL		0x17
+#define MCS5000_TS_MODULE_REV		0x20
+#define MCS5000_TS_FIRMWARE_VER		0x21
+
+/* Touchscreen absolute values */
+#define MCS5000_MAX_XC			0x3ff
+#define MCS5000_MAX_YC			0x3ff
+
+enum mcs5000_ts_read_offset {
+	READ_INPUT_INFO,
+	READ_X_POS_UPPER,
+	READ_X_POS_LOWER,
+	READ_Y_POS_UPPER,
+	READ_Y_POS_LOWER,
+	READ_BLOCK_SIZE,
+};
+
+/* Each client has this additional data */
+struct mcs5000_ts_data {
+	struct i2c_client *client;
+	struct input_dev *input_dev;
+	const struct mcs5000_ts_platform_data *platform_data;
+};
+
+static irqreturn_t mcs5000_ts_interrupt(int irq, void *dev_id)
+{
+	struct mcs5000_ts_data *data = dev_id;
+	struct i2c_client *client = data->client;
+	u8 buffer[READ_BLOCK_SIZE];
+	int err;
+	int x;
+	int y;
+
+	err = i2c_smbus_read_i2c_block_data(client, MCS5000_TS_INPUT_INFO,
+			READ_BLOCK_SIZE, buffer);
+	if (err < 0) {
+		dev_err(&client->dev, "%s, err[%d]\n", __func__, err);
+		goto out;
+	}
+
+	switch (buffer[READ_INPUT_INFO]) {
+	case INPUT_TYPE_NONTOUCH:
+		input_report_key(data->input_dev, BTN_TOUCH, 0);
+		input_sync(data->input_dev);
+		break;
+
+	case INPUT_TYPE_SINGLE:
+		x = (buffer[READ_X_POS_UPPER] << 8) | buffer[READ_X_POS_LOWER];
+		y = (buffer[READ_Y_POS_UPPER] << 8) | buffer[READ_Y_POS_LOWER];
+
+		input_report_key(data->input_dev, BTN_TOUCH, 1);
+		input_report_abs(data->input_dev, ABS_X, x);
+		input_report_abs(data->input_dev, ABS_Y, y);
+		input_sync(data->input_dev);
+		break;
+
+	case INPUT_TYPE_DUAL:
+		/* TODO */
+		break;
+
+	case INPUT_TYPE_PALM:
+		/* TODO */
+		break;
+
+	case INPUT_TYPE_PROXIMITY:
+		/* TODO */
+		break;
+
+	default:
+		dev_err(&client->dev, "Unknown ts input type %d\n",
+				buffer[READ_INPUT_INFO]);
+		break;
+	}
+
+ out:
+	return IRQ_HANDLED;
+}
+
+static void mcs5000_ts_phys_init(struct mcs5000_ts_data *data)
+{
+	const struct mcs5000_ts_platform_data *platform_data =
+		data->platform_data;
+	struct i2c_client *client = data->client;
+
+	/* Touch reset & sleep mode */
+	i2c_smbus_write_byte_data(client, MCS5000_TS_OP_MODE,
+			RESET_EXT_SOFT | OP_MODE_SLEEP);
+
+	/* Touch size */
+	i2c_smbus_write_byte_data(client, MCS5000_TS_X_SIZE_UPPER,
+			platform_data->x_size >> 8);
+	i2c_smbus_write_byte_data(client, MCS5000_TS_X_SIZE_LOWER,
+			platform_data->x_size & 0xff);
+	i2c_smbus_write_byte_data(client, MCS5000_TS_Y_SIZE_UPPER,
+			platform_data->y_size >> 8);
+	i2c_smbus_write_byte_data(client, MCS5000_TS_Y_SIZE_LOWER,
+			platform_data->y_size & 0xff);
+
+	/* Touch active mode & 80 report rate */
+	i2c_smbus_write_byte_data(data->client, MCS5000_TS_OP_MODE,
+			OP_MODE_ACTIVE | REPORT_RATE_80);
+}
+
+static int __devinit mcs5000_ts_probe(struct i2c_client *client,
+		const struct i2c_device_id *id)
+{
+	struct mcs5000_ts_data *data;
+	struct input_dev *input_dev;
+	int ret;
+
+	if (!client->dev.platform_data)
+		return -EINVAL;
+
+	data = kzalloc(sizeof(struct mcs5000_ts_data), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!data || !input_dev) {
+		dev_err(&client->dev, "Failed to allocate memory\n");
+		ret = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	data->client = client;
+	data->input_dev = input_dev;
+	data->platform_data = client->dev.platform_data;
+
+	input_dev->name = "MELPAS MCS-5000 Touchscreen";
+	input_dev->id.bustype = BUS_I2C;
+	input_dev->dev.parent = &client->dev;
+
+	__set_bit(EV_ABS, input_dev->evbit);
+	__set_bit(EV_KEY, input_dev->evbit);
+	__set_bit(BTN_TOUCH, input_dev->keybit);
+	input_set_abs_params(input_dev, ABS_X, 0, MCS5000_MAX_XC, 0, 0);
+	input_set_abs_params(input_dev, ABS_Y, 0, MCS5000_MAX_YC, 0, 0);
+
+	input_set_drvdata(input_dev, data);
+
+	if (data->platform_data->cfg_pin)
+		data->platform_data->cfg_pin();
+
+	ret = request_threaded_irq(client->irq, NULL, mcs5000_ts_interrupt,
+			IRQF_TRIGGER_LOW | IRQF_ONESHOT, "mcs5000_ts", data);
+
+	if (ret < 0) {
+		dev_err(&client->dev, "Failed to register interrupt\n");
+		goto err_free_mem;
+	}
+
+	ret = input_register_device(data->input_dev);
+	if (ret < 0)
+		goto err_free_irq;
+
+	mcs5000_ts_phys_init(data);
+	i2c_set_clientdata(client, data);
+
+	return 0;
+
+err_free_irq:
+	free_irq(client->irq, data);
+err_free_mem:
+	input_free_device(input_dev);
+	kfree(data);
+	return ret;
+}
+
+static int __devexit mcs5000_ts_remove(struct i2c_client *client)
+{
+	struct mcs5000_ts_data *data = i2c_get_clientdata(client);
+
+	free_irq(client->irq, data);
+	input_unregister_device(data->input_dev);
+	kfree(data);
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int mcs5000_ts_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+	/* Touch sleep mode */
+	i2c_smbus_write_byte_data(client, MCS5000_TS_OP_MODE, OP_MODE_SLEEP);
+
+	return 0;
+}
+
+static int mcs5000_ts_resume(struct i2c_client *client)
+{
+	struct mcs5000_ts_data *data = i2c_get_clientdata(client);
+
+	mcs5000_ts_phys_init(data);
+
+	return 0;
+}
+#else
+#define mcs5000_ts_suspend	NULL
+#define mcs5000_ts_resume	NULL
+#endif
+
+static const struct i2c_device_id mcs5000_ts_id[] = {
+	{ "mcs5000_ts", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, mcs5000_ts_id);
+
+static struct i2c_driver mcs5000_ts_driver = {
+	.probe		= mcs5000_ts_probe,
+	.remove		= __devexit_p(mcs5000_ts_remove),
+	.suspend	= mcs5000_ts_suspend,
+	.resume		= mcs5000_ts_resume,
+	.driver = {
+		.name = "mcs5000_ts",
+	},
+	.id_table	= mcs5000_ts_id,
+};
+
+static int __init mcs5000_ts_init(void)
+{
+	return i2c_add_driver(&mcs5000_ts_driver);
+}
+
+static void __exit mcs5000_ts_exit(void)
+{
+	i2c_del_driver(&mcs5000_ts_driver);
+}
+
+module_init(mcs5000_ts_init);
+module_exit(mcs5000_ts_exit);
+
+/* Module information */
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_DESCRIPTION("Touchscreen driver for MELFAS MCS-5000 controller");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/mcs5000_ts.h b/include/linux/i2c/mcs5000_ts.h
new file mode 100644
index 000000000000..5a117b5ca15e
--- /dev/null
+++ b/include/linux/i2c/mcs5000_ts.h
@@ -0,0 +1,24 @@
+/*
+ * mcs5000_ts.h
+ *
+ * Copyright (C) 2009 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MCS5000_TS_H
+#define __LINUX_MCS5000_TS_H
+
+/* platform data for the MELFAS MCS-5000 touchscreen driver */
+struct mcs5000_ts_platform_data {
+	void (*cfg_pin)(void);
+	int x_size;
+	int y_size;
+};
+
+#endif	/* __LINUX_MCS5000_TS_H */
-- 
cgit v1.2.3


From 88751dd6ce1fb0627c36c4ab08a40730e5a50d3e Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Thu, 17 Sep 2009 22:39:38 -0700
Subject: Input: add driver for ADP5588 QWERTY I2C Keypad

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/Kconfig        |  10 +
 drivers/input/keyboard/Makefile       |   1 +
 drivers/input/keyboard/adp5588-keys.c | 361 ++++++++++++++++++++++++++++++++++
 include/linux/i2c/adp5588.h           |  92 +++++++++
 4 files changed, 464 insertions(+)
 create mode 100644 drivers/input/keyboard/adp5588-keys.c
 create mode 100644 include/linux/i2c/adp5588.h

(limited to 'include')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index b14bd3a07140..d615c09a83c6 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -24,6 +24,16 @@ config KEYBOARD_AAED2000
 	  To compile this driver as a module, choose M here: the
 	  module will be called aaed2000_kbd.
 
+config KEYBOARD_ADP5588
+	tristate "ADP5588 I2C QWERTY Keypad and IO Expander"
+	depends on I2C
+	help
+	  Say Y here if you want to use a ADP5588 attached to your
+	  system I2C bus.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called adp5588-keys.
+
 config KEYBOARD_AMIGA
 	tristate "Amiga keyboard"
 	depends on AMIGA
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index ab35ac36111e..a5c08cdf8083 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -5,6 +5,7 @@
 # Each configuration option enables a list of files.
 
 obj-$(CONFIG_KEYBOARD_AAED2000)		+= aaed2000_kbd.o
+obj-$(CONFIG_KEYBOARD_ADP5588)		+= adp5588-keys.o
 obj-$(CONFIG_KEYBOARD_AMIGA)		+= amikbd.o
 obj-$(CONFIG_KEYBOARD_ATARI)		+= atakbd.o
 obj-$(CONFIG_KEYBOARD_ATKBD)		+= atkbd.o
diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c
new file mode 100644
index 000000000000..d48c808d5928
--- /dev/null
+++ b/drivers/input/keyboard/adp5588-keys.c
@@ -0,0 +1,361 @@
+/*
+ * File: drivers/input/keyboard/adp5588_keys.c
+ * Description:  keypad driver for ADP5588 I2C QWERTY Keypad and IO Expander
+ * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Copyright (C) 2008-2009 Analog Devices Inc.
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
+#include <linux/input.h>
+#include <linux/i2c.h>
+
+#include <linux/i2c/adp5588.h>
+
+ /* Configuration Register1 */
+#define AUTO_INC	(1 << 7)
+#define GPIEM_CFG	(1 << 6)
+#define OVR_FLOW_M	(1 << 5)
+#define INT_CFG		(1 << 4)
+#define OVR_FLOW_IEN	(1 << 3)
+#define K_LCK_IM	(1 << 2)
+#define GPI_IEN		(1 << 1)
+#define KE_IEN		(1 << 0)
+
+/* Interrupt Status Register */
+#define CMP2_INT	(1 << 5)
+#define CMP1_INT	(1 << 4)
+#define OVR_FLOW_INT	(1 << 3)
+#define K_LCK_INT	(1 << 2)
+#define GPI_INT		(1 << 1)
+#define KE_INT		(1 << 0)
+
+/* Key Lock and Event Counter Register */
+#define K_LCK_EN	(1 << 6)
+#define LCK21		0x30
+#define KEC		0xF
+
+/* Key Event Register xy */
+#define KEY_EV_PRESSED		(1 << 7)
+#define KEY_EV_MASK		(0x7F)
+
+#define KP_SEL(x)		(0xFFFF >> (16 - x))	/* 2^x-1 */
+
+#define KEYP_MAX_EVENT		10
+
+/*
+ * Early pre 4.0 Silicon required to delay readout by at least 25ms,
+ * since the Event Counter Register updated 25ms after the interrupt
+ * asserted.
+ */
+#define WA_DELAYED_READOUT_REVID(rev)		((rev) < 4)
+
+struct adp5588_kpad {
+	struct i2c_client *client;
+	struct input_dev *input;
+	struct delayed_work work;
+	unsigned long delay;
+	unsigned short keycode[ADP5588_KEYMAPSIZE];
+};
+
+static int adp5588_read(struct i2c_client *client, u8 reg)
+{
+	int ret = i2c_smbus_read_byte_data(client, reg);
+
+	if (ret < 0)
+		dev_err(&client->dev, "Read Error\n");
+
+	return ret;
+}
+
+static int adp5588_write(struct i2c_client *client, u8 reg, u8 val)
+{
+	return i2c_smbus_write_byte_data(client, reg, val);
+}
+
+static void adp5588_work(struct work_struct *work)
+{
+	struct adp5588_kpad *kpad = container_of(work,
+						struct adp5588_kpad, work.work);
+	struct i2c_client *client = kpad->client;
+	int i, key, status, ev_cnt;
+
+	status = adp5588_read(client, INT_STAT);
+
+	if (status & OVR_FLOW_INT)	/* Unlikely and should never happen */
+		dev_err(&client->dev, "Event Overflow Error\n");
+
+	if (status & KE_INT) {
+		ev_cnt = adp5588_read(client, KEY_LCK_EC_STAT) & KEC;
+		if (ev_cnt) {
+			for (i = 0; i < ev_cnt; i++) {
+				key = adp5588_read(client, Key_EVENTA + i);
+				input_report_key(kpad->input,
+					kpad->keycode[(key & KEY_EV_MASK) - 1],
+					key & KEY_EV_PRESSED);
+			}
+			input_sync(kpad->input);
+		}
+	}
+	adp5588_write(client, INT_STAT, status); /* Status is W1C */
+}
+
+static irqreturn_t adp5588_irq(int irq, void *handle)
+{
+	struct adp5588_kpad *kpad = handle;
+
+	/*
+	 * use keventd context to read the event fifo registers
+	 * Schedule readout at least 25ms after notification for
+	 * REVID < 4
+	 */
+
+	schedule_delayed_work(&kpad->work, kpad->delay);
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit adp5588_setup(struct i2c_client *client)
+{
+	struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
+	int i, ret;
+
+	ret = adp5588_write(client, KP_GPIO1, KP_SEL(pdata->rows));
+	ret |= adp5588_write(client, KP_GPIO2, KP_SEL(pdata->cols) & 0xFF);
+	ret |= adp5588_write(client, KP_GPIO3, KP_SEL(pdata->cols) >> 8);
+
+	if (pdata->en_keylock) {
+		ret |= adp5588_write(client, UNLOCK1, pdata->unlock_key1);
+		ret |= adp5588_write(client, UNLOCK2, pdata->unlock_key2);
+		ret |= adp5588_write(client, KEY_LCK_EC_STAT, K_LCK_EN);
+	}
+
+	for (i = 0; i < KEYP_MAX_EVENT; i++)
+		ret |= adp5588_read(client, Key_EVENTA);
+
+	ret |= adp5588_write(client, INT_STAT, CMP2_INT | CMP1_INT |
+					OVR_FLOW_INT | K_LCK_INT |
+					GPI_INT | KE_INT); /* Status is W1C */
+
+	ret |= adp5588_write(client, CFG, INT_CFG | OVR_FLOW_IEN | KE_IEN);
+
+	if (ret < 0) {
+		dev_err(&client->dev, "Write Error\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __devinit adp5588_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct adp5588_kpad *kpad;
+	struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
+	struct input_dev *input;
+	unsigned int revid;
+	int ret, i;
+	int error;
+
+	if (!i2c_check_functionality(client->adapter,
+					I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev, "SMBUS Byte Data not Supported\n");
+		return -EIO;
+	}
+
+	if (!pdata) {
+		dev_err(&client->dev, "no platform data?\n");
+		return -EINVAL;
+	}
+
+	if (!pdata->rows || !pdata->cols || !pdata->keymap) {
+		dev_err(&client->dev, "no rows, cols or keymap from pdata\n");
+		return -EINVAL;
+	}
+
+	if (pdata->keymapsize != ADP5588_KEYMAPSIZE) {
+		dev_err(&client->dev, "invalid keymapsize\n");
+		return -EINVAL;
+	}
+
+	if (!client->irq) {
+		dev_err(&client->dev, "no IRQ?\n");
+		return -EINVAL;
+	}
+
+	kpad = kzalloc(sizeof(*kpad), GFP_KERNEL);
+	input = input_allocate_device();
+	if (!kpad || !input) {
+		error = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	kpad->client = client;
+	kpad->input = input;
+	INIT_DELAYED_WORK(&kpad->work, adp5588_work);
+
+	ret = adp5588_read(client, DEV_ID);
+	if (ret < 0) {
+		error = ret;
+		goto err_free_mem;
+	}
+
+	revid = (u8) ret & ADP5588_DEVICE_ID_MASK;
+	if (WA_DELAYED_READOUT_REVID(revid))
+		kpad->delay = msecs_to_jiffies(30);
+
+	input->name = client->name;
+	input->phys = "adp5588-keys/input0";
+	input->dev.parent = &client->dev;
+
+	input_set_drvdata(input, kpad);
+
+	input->id.bustype = BUS_I2C;
+	input->id.vendor = 0x0001;
+	input->id.product = 0x0001;
+	input->id.version = revid;
+
+	input->keycodesize = sizeof(kpad->keycode[0]);
+	input->keycodemax = pdata->keymapsize;
+	input->keycode = kpad->keycode;
+
+	memcpy(kpad->keycode, pdata->keymap,
+		pdata->keymapsize * input->keycodesize);
+
+	/* setup input device */
+	__set_bit(EV_KEY, input->evbit);
+
+	if (pdata->repeat)
+		__set_bit(EV_REP, input->evbit);
+
+	for (i = 0; i < input->keycodemax; i++)
+		__set_bit(kpad->keycode[i] & KEY_MAX, input->keybit);
+	__clear_bit(KEY_RESERVED, input->keybit);
+
+	error = input_register_device(input);
+	if (error) {
+		dev_err(&client->dev, "unable to register input device\n");
+		goto err_free_mem;
+	}
+
+	error = request_irq(client->irq, adp5588_irq,
+			    IRQF_TRIGGER_FALLING | IRQF_DISABLED,
+			    client->dev.driver->name, kpad);
+	if (error) {
+		dev_err(&client->dev, "irq %d busy?\n", client->irq);
+		goto err_unreg_dev;
+	}
+
+	error = adp5588_setup(client);
+	if (error)
+		goto err_free_irq;
+
+	device_init_wakeup(&client->dev, 1);
+	i2c_set_clientdata(client, kpad);
+
+	dev_info(&client->dev, "Rev.%d keypad, irq %d\n", revid, client->irq);
+	return 0;
+
+ err_free_irq:
+	free_irq(client->irq, kpad);
+ err_unreg_dev:
+	input_unregister_device(input);
+	input = NULL;
+ err_free_mem:
+	input_free_device(input);
+	kfree(kpad);
+
+	return error;
+}
+
+static int __devexit adp5588_remove(struct i2c_client *client)
+{
+	struct adp5588_kpad *kpad = i2c_get_clientdata(client);
+
+	adp5588_write(client, CFG, 0);
+	free_irq(client->irq, kpad);
+	cancel_delayed_work_sync(&kpad->work);
+	input_unregister_device(kpad->input);
+	i2c_set_clientdata(client, NULL);
+	kfree(kpad);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int adp5588_suspend(struct device *dev)
+{
+	struct adp5588_kpad *kpad = dev_get_drvdata(dev);
+	struct i2c_client *client = kpad->client;
+
+	disable_irq(client->irq);
+	cancel_delayed_work_sync(&kpad->work);
+
+	if (device_may_wakeup(&client->dev))
+		enable_irq_wake(client->irq);
+
+	return 0;
+}
+
+static int adp5588_resume(struct device *dev)
+{
+	struct adp5588_kpad *kpad = dev_get_drvdata(dev);
+	struct i2c_client *client = kpad->client;
+
+	if (device_may_wakeup(&client->dev))
+		disable_irq_wake(client->irq);
+
+	enable_irq(client->irq);
+
+	return 0;
+}
+
+static struct dev_pm_ops adp5588_dev_pm_ops = {
+	.suspend = adp5588_suspend,
+	.resume  = adp5588_resume,
+};
+#endif
+
+static const struct i2c_device_id adp5588_id[] = {
+	{ KBUILD_MODNAME, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, adp5588_id);
+
+static struct i2c_driver adp5588_driver = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+#ifdef CONFIG_PM
+		.pm   = &adp5588_dev_pm_ops,
+#endif
+	},
+	.probe    = adp5588_probe,
+	.remove   = __devexit_p(adp5588_remove),
+	.id_table = adp5588_id,
+};
+
+static int __init adp5588_init(void)
+{
+	return i2c_add_driver(&adp5588_driver);
+}
+module_init(adp5588_init);
+
+static void __exit adp5588_exit(void)
+{
+	i2c_del_driver(&adp5588_driver);
+}
+module_exit(adp5588_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("ADP5588 Keypad driver");
+MODULE_ALIAS("platform:adp5588-keys");
diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
new file mode 100644
index 000000000000..fc5db826b48e
--- /dev/null
+++ b/include/linux/i2c/adp5588.h
@@ -0,0 +1,92 @@
+/*
+ * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _ADP5588_H
+#define _ADP5588_H
+
+#define DEV_ID 0x00		/* Device ID */
+#define CFG 0x01		/* Configuration Register1 */
+#define INT_STAT 0x02		/* Interrupt Status Register */
+#define KEY_LCK_EC_STAT 0x03	/* Key Lock and Event Counter Register */
+#define Key_EVENTA 0x04		/* Key Event Register A */
+#define Key_EVENTB 0x05		/* Key Event Register B */
+#define Key_EVENTC 0x06		/* Key Event Register C */
+#define Key_EVENTD 0x07		/* Key Event Register D */
+#define Key_EVENTE 0x08		/* Key Event Register E */
+#define Key_EVENTF 0x09		/* Key Event Register F */
+#define Key_EVENTG 0x0A		/* Key Event Register G */
+#define Key_EVENTH 0x0B		/* Key Event Register H */
+#define Key_EVENTI 0x0C		/* Key Event Register I */
+#define Key_EVENTJ 0x0D		/* Key Event Register J */
+#define KP_LCK_TMR 0x0E		/* Keypad Lock1 to Lock2 Timer */
+#define UNLOCK1 0x0F		/* Unlock Key1 */
+#define UNLOCK2 0x10		/* Unlock Key2 */
+#define GPIO_INT_STAT1 0x11	/* GPIO Interrupt Status */
+#define GPIO_INT_STAT2 0x12	/* GPIO Interrupt Status */
+#define GPIO_INT_STAT3 0x13	/* GPIO Interrupt Status */
+#define GPIO_DAT_STAT1 0x14	/* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_STAT2 0x15	/* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_STAT3 0x16	/* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_OUT1 0x17	/* GPIO DATA OUT */
+#define GPIO_DAT_OUT2 0x18	/* GPIO DATA OUT */
+#define GPIO_DAT_OUT3 0x19	/* GPIO DATA OUT */
+#define GPIO_INT_EN1 0x1A	/* GPIO Interrupt Enable */
+#define GPIO_INT_EN2 0x1B	/* GPIO Interrupt Enable */
+#define GPIO_INT_EN3 0x1C	/* GPIO Interrupt Enable */
+#define KP_GPIO1 0x1D		/* Keypad or GPIO Selection */
+#define KP_GPIO2 0x1E		/* Keypad or GPIO Selection */
+#define KP_GPIO3 0x1F		/* Keypad or GPIO Selection */
+#define GPI_EM1 0x20		/* GPI Event Mode 1 */
+#define GPI_EM2 0x21		/* GPI Event Mode 2 */
+#define GPI_EM3 0x22		/* GPI Event Mode 3 */
+#define GPIO_DIR1 0x23		/* GPIO Data Direction */
+#define GPIO_DIR2 0x24		/* GPIO Data Direction */
+#define GPIO_DIR3 0x25		/* GPIO Data Direction */
+#define GPIO_INT_LVL1 0x26	/* GPIO Edge/Level Detect */
+#define GPIO_INT_LVL2 0x27	/* GPIO Edge/Level Detect */
+#define GPIO_INT_LVL3 0x28	/* GPIO Edge/Level Detect */
+#define Debounce_DIS1 0x29	/* Debounce Disable */
+#define Debounce_DIS2 0x2A	/* Debounce Disable */
+#define Debounce_DIS3 0x2B	/* Debounce Disable */
+#define GPIO_PULL1 0x2C		/* GPIO Pull Disable */
+#define GPIO_PULL2 0x2D		/* GPIO Pull Disable */
+#define GPIO_PULL3 0x2E		/* GPIO Pull Disable */
+#define CMP_CFG_STAT 0x30	/* Comparator Configuration and Status Register */
+#define CMP_CONFG_SENS1 0x31	/* Sensor1 Comparator Configuration Register */
+#define CMP_CONFG_SENS2 0x32	/* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */
+#define CMP1_LVL2_TRIP 0x33	/* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */
+#define CMP1_LVL2_HYS 0x34	/* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */
+#define CMP1_LVL3_TRIP 0x35	/* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */
+#define CMP1_LVL3_HYS 0x36	/* Sensor 2 Comparator Configuration Register */
+#define CMP2_LVL2_TRIP 0x37	/* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */
+#define CMP2_LVL2_HYS 0x38	/* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */
+#define CMP2_LVL3_TRIP 0x39	/* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */
+#define CMP2_LVL3_HYS 0x3A	/* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */
+#define CMP1_ADC_DAT_R1 0x3B	/* Comparator 1 ADC data Register1 */
+#define CMP1_ADC_DAT_R2 0x3C	/* Comparator 1 ADC data Register2 */
+#define CMP2_ADC_DAT_R1 0x3D	/* Comparator 2 ADC data Register1 */
+#define CMP2_ADC_DAT_R2 0x3E	/* Comparator 2 ADC data Register2 */
+
+#define ADP5588_DEVICE_ID_MASK	0xF
+
+/* Put one of these structures in i2c_board_info platform_data */
+
+#define ADP5588_KEYMAPSIZE	80
+
+struct adp5588_kpad_platform_data {
+	int rows;			/* Number of rows */
+	int cols;			/* Number of columns */
+	const unsigned short *keymap;	/* Pointer to keymap */
+	unsigned short keymapsize;	/* Keymap size */
+	unsigned repeat:1;		/* Enable key repeat */
+	unsigned en_keylock:1;		/* Enable Key Lock feature */
+	unsigned short unlock_key1;	/* Unlock Key 1 */
+	unsigned short unlock_key2;	/* Unlock Key 2 */
+};
+
+#endif
-- 
cgit v1.2.3


From 502a0106b2cc31940f690dc6693fddfd3b97cab5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:46:12 +0100
Subject: [WATCHDOG] Add support for WM831x watchdog

The WM831x series of devices provide a watchdog with configurable
behaviour on timer expiry.

Currently this driver support refreshes via a register or GPIO line and
autonomous refreshes from a hardware source (eg, a clock).

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/Kconfig            |   7 +
 drivers/watchdog/Makefile           |   1 +
 drivers/watchdog/wm831x_wdt.c       | 441 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/watchdog.h |  52 +++++
 4 files changed, 501 insertions(+)
 create mode 100644 drivers/watchdog/wm831x_wdt.c
 create mode 100644 include/linux/mfd/wm831x/watchdog.h

(limited to 'include')

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 3229889c3591..ff3eb8ff6bd7 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -55,6 +55,13 @@ config SOFT_WATCHDOG
 	  To compile this driver as a module, choose M here: the
 	  module will be called softdog.
 
+config WM831X_WATCHDOG
+	tristate "WM831x watchdog"
+	depends on MFD_WM831X
+	help
+	  Support for the watchdog in the WM831x AudioPlus PMICs.  When
+	  the watchdog triggers the system will be reset.
+
 config WM8350_WATCHDOG
 	tristate "WM8350 watchdog"
 	depends on MFD_WM8350
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index e1784a6208a7..348b3b862c99 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -141,5 +141,6 @@ obj-$(CONFIG_WATCHDOG_CP1XXX)		+= cpwd.o
 # XTENSA Architecture
 
 # Architecture Independant
+obj-$(CONFIG_WM831X_WATCHDOG) += wm831x_wdt.o
 obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o
 obj-$(CONFIG_SOFT_WATCHDOG) += softdog.o
diff --git a/drivers/watchdog/wm831x_wdt.c b/drivers/watchdog/wm831x_wdt.c
new file mode 100644
index 000000000000..775bcd807f31
--- /dev/null
+++ b/drivers/watchdog/wm831x_wdt.c
@@ -0,0 +1,441 @@
+/*
+ * Watchdog driver for the wm831x PMICs
+ *
+ * Copyright (C) 2009 Wolfson Microelectronics
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+#include <linux/uaccess.h>
+#include <linux/gpio.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/pdata.h>
+#include <linux/mfd/wm831x/watchdog.h>
+
+static int nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, int, 0);
+MODULE_PARM_DESC(nowayout,
+		 "Watchdog cannot be stopped once started (default="
+		 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static unsigned long wm831x_wdt_users;
+static struct miscdevice wm831x_wdt_miscdev;
+static int wm831x_wdt_expect_close;
+static DEFINE_MUTEX(wdt_mutex);
+static struct wm831x *wm831x;
+static unsigned int update_gpio;
+static unsigned int update_state;
+
+/* We can't use the sub-second values here but they're included
+ * for completeness.  */
+static struct {
+	int time;  /* Seconds */
+	u16 val;   /* WDOG_TO value */
+} wm831x_wdt_cfgs[] = {
+	{  1, 2 },
+	{  2, 3 },
+	{  4, 4 },
+	{  8, 5 },
+	{ 16, 6 },
+	{ 32, 7 },
+	{ 33, 7 },  /* Actually 32.768s so include both, others round down */
+};
+
+static int wm831x_wdt_set_timeout(struct wm831x *wm831x, u16 value)
+{
+	int ret;
+
+	mutex_lock(&wdt_mutex);
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret == 0) {
+		ret = wm831x_set_bits(wm831x, WM831X_WATCHDOG,
+				      WM831X_WDOG_TO_MASK, value);
+		wm831x_reg_lock(wm831x);
+	} else {
+		dev_err(wm831x->dev, "Failed to unlock security key: %d\n",
+			ret);
+	}
+
+	mutex_unlock(&wdt_mutex);
+
+	return ret;
+}
+
+static int wm831x_wdt_start(struct wm831x *wm831x)
+{
+	int ret;
+
+	mutex_lock(&wdt_mutex);
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret == 0) {
+		ret = wm831x_set_bits(wm831x, WM831X_WATCHDOG,
+				      WM831X_WDOG_ENA, WM831X_WDOG_ENA);
+		wm831x_reg_lock(wm831x);
+	} else {
+		dev_err(wm831x->dev, "Failed to unlock security key: %d\n",
+			ret);
+	}
+
+	mutex_unlock(&wdt_mutex);
+
+	return ret;
+}
+
+static int wm831x_wdt_stop(struct wm831x *wm831x)
+{
+	int ret;
+
+	mutex_lock(&wdt_mutex);
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret == 0) {
+		ret = wm831x_set_bits(wm831x, WM831X_WATCHDOG,
+				      WM831X_WDOG_ENA, 0);
+		wm831x_reg_lock(wm831x);
+	} else {
+		dev_err(wm831x->dev, "Failed to unlock security key: %d\n",
+			ret);
+	}
+
+	mutex_unlock(&wdt_mutex);
+
+	return ret;
+}
+
+static int wm831x_wdt_kick(struct wm831x *wm831x)
+{
+	int ret;
+	u16 reg;
+
+	mutex_lock(&wdt_mutex);
+
+	if (update_gpio) {
+		gpio_set_value_cansleep(update_gpio, update_state);
+		update_state = !update_state;
+		ret = 0;
+		goto out;
+	}
+
+
+	reg = wm831x_reg_read(wm831x, WM831X_WATCHDOG);
+
+	if (!(reg & WM831X_WDOG_RST_SRC)) {
+		dev_err(wm831x->dev, "Hardware watchdog update unsupported\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	reg |= WM831X_WDOG_RESET;
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret == 0) {
+		ret = wm831x_reg_write(wm831x, WM831X_WATCHDOG, reg);
+		wm831x_reg_lock(wm831x);
+	} else {
+		dev_err(wm831x->dev, "Failed to unlock security key: %d\n",
+			ret);
+	}
+
+out:
+	mutex_unlock(&wdt_mutex);
+
+	return ret;
+}
+
+static int wm831x_wdt_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	if (!wm831x)
+		return -ENODEV;
+
+	if (test_and_set_bit(0, &wm831x_wdt_users))
+		return -EBUSY;
+
+	ret = wm831x_wdt_start(wm831x);
+	if (ret != 0)
+		return ret;
+
+	return nonseekable_open(inode, file);
+}
+
+static int wm831x_wdt_release(struct inode *inode, struct file *file)
+{
+	if (wm831x_wdt_expect_close)
+		wm831x_wdt_stop(wm831x);
+	else {
+		dev_warn(wm831x->dev, "Watchdog device closed uncleanly\n");
+		wm831x_wdt_kick(wm831x);
+	}
+
+	clear_bit(0, &wm831x_wdt_users);
+
+	return 0;
+}
+
+static ssize_t wm831x_wdt_write(struct file *file,
+				const char __user *data, size_t count,
+				loff_t *ppos)
+{
+	size_t i;
+
+	if (count) {
+		wm831x_wdt_kick(wm831x);
+
+		if (!nowayout) {
+			/* In case it was set long ago */
+			wm831x_wdt_expect_close = 0;
+
+			/* scan to see whether or not we got the magic
+			   character */
+			for (i = 0; i != count; i++) {
+				char c;
+				if (get_user(c, data + i))
+					return -EFAULT;
+				if (c == 'V')
+					wm831x_wdt_expect_close = 42;
+			}
+		}
+	}
+	return count;
+}
+
+static struct watchdog_info ident = {
+	.options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
+	.identity = "WM831x Watchdog",
+};
+
+static long wm831x_wdt_ioctl(struct file *file, unsigned int cmd,
+			     unsigned long arg)
+{
+	int ret = -ENOTTY, time, i;
+	void __user *argp = (void __user *)arg;
+	int __user *p = argp;
+	u16 reg;
+
+	switch (cmd) {
+	case WDIOC_GETSUPPORT:
+		ret = copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0;
+		break;
+
+	case WDIOC_GETSTATUS:
+	case WDIOC_GETBOOTSTATUS:
+		ret = put_user(0, p);
+		break;
+
+	case WDIOC_SETOPTIONS:
+	{
+		int options;
+
+		if (get_user(options, p))
+			return -EFAULT;
+
+		ret = -EINVAL;
+
+		/* Setting both simultaneously means at least one must fail */
+		if (options == WDIOS_DISABLECARD)
+			ret = wm831x_wdt_start(wm831x);
+
+		if (options == WDIOS_ENABLECARD)
+			ret = wm831x_wdt_stop(wm831x);
+		break;
+	}
+
+	case WDIOC_KEEPALIVE:
+		ret = wm831x_wdt_kick(wm831x);
+		break;
+
+	case WDIOC_SETTIMEOUT:
+		ret = get_user(time, p);
+		if (ret)
+			break;
+
+		if (time == 0) {
+			if (nowayout)
+				ret = -EINVAL;
+			else
+				wm831x_wdt_stop(wm831x);
+			break;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(wm831x_wdt_cfgs); i++)
+			if (wm831x_wdt_cfgs[i].time == time)
+				break;
+		if (i == ARRAY_SIZE(wm831x_wdt_cfgs))
+			ret = -EINVAL;
+		else
+			ret = wm831x_wdt_set_timeout(wm831x,
+						     wm831x_wdt_cfgs[i].val);
+		break;
+
+	case WDIOC_GETTIMEOUT:
+		reg = wm831x_reg_read(wm831x, WM831X_WATCHDOG);
+		reg &= WM831X_WDOG_TO_MASK;
+		for (i = 0; i < ARRAY_SIZE(wm831x_wdt_cfgs); i++)
+			if (wm831x_wdt_cfgs[i].val == reg)
+				break;
+		if (i == ARRAY_SIZE(wm831x_wdt_cfgs)) {
+			dev_warn(wm831x->dev,
+				 "Unknown watchdog configuration: %x\n", reg);
+			ret = -EINVAL;
+		} else
+			ret = put_user(wm831x_wdt_cfgs[i].time, p);
+
+	}
+
+	return ret;
+}
+
+static const struct file_operations wm831x_wdt_fops = {
+	.owner = THIS_MODULE,
+	.llseek = no_llseek,
+	.write = wm831x_wdt_write,
+	.unlocked_ioctl = wm831x_wdt_ioctl,
+	.open = wm831x_wdt_open,
+	.release = wm831x_wdt_release,
+};
+
+static struct miscdevice wm831x_wdt_miscdev = {
+	.minor = WATCHDOG_MINOR,
+	.name = "watchdog",
+	.fops = &wm831x_wdt_fops,
+};
+
+static int __devinit wm831x_wdt_probe(struct platform_device *pdev)
+{
+	struct wm831x_pdata *chip_pdata;
+	struct wm831x_watchdog_pdata *pdata;
+	int reg, ret;
+
+	wm831x = dev_get_drvdata(pdev->dev.parent);
+
+	ret = wm831x_reg_read(wm831x, WM831X_WATCHDOG);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read watchdog status: %d\n",
+			ret);
+		goto err;
+	}
+	reg = ret;
+
+	if (reg & WM831X_WDOG_DEBUG)
+		dev_warn(wm831x->dev, "Watchdog is paused\n");
+
+	/* Apply any configuration */
+	if (pdev->dev.parent->platform_data) {
+		chip_pdata = pdev->dev.parent->platform_data;
+		pdata = chip_pdata->watchdog;
+	} else {
+		pdata = NULL;
+	}
+
+	if (pdata) {
+		reg &= ~(WM831X_WDOG_SECACT_MASK | WM831X_WDOG_PRIMACT_MASK |
+			 WM831X_WDOG_RST_SRC);
+
+		reg |= pdata->primary << WM831X_WDOG_PRIMACT_SHIFT;
+		reg |= pdata->secondary << WM831X_WDOG_SECACT_SHIFT;
+		reg |= pdata->software << WM831X_WDOG_RST_SRC_SHIFT;
+
+		if (pdata->update_gpio) {
+			ret = gpio_request(pdata->update_gpio,
+					   "Watchdog update");
+			if (ret < 0) {
+				dev_err(wm831x->dev,
+					"Failed to request update GPIO: %d\n",
+					ret);
+				goto err;
+			}
+
+			ret = gpio_direction_output(pdata->update_gpio, 0);
+			if (ret != 0) {
+				dev_err(wm831x->dev,
+					"gpio_direction_output returned: %d\n",
+					ret);
+				goto err_gpio;
+			}
+
+			update_gpio = pdata->update_gpio;
+
+			/* Make sure the watchdog takes hardware updates */
+			reg |= WM831X_WDOG_RST_SRC;
+		}
+
+		ret = wm831x_reg_unlock(wm831x);
+		if (ret == 0) {
+			ret = wm831x_reg_write(wm831x, WM831X_WATCHDOG, reg);
+			wm831x_reg_lock(wm831x);
+		} else {
+			dev_err(wm831x->dev,
+				"Failed to unlock security key: %d\n", ret);
+			goto err_gpio;
+		}
+	}
+
+	wm831x_wdt_miscdev.parent = &pdev->dev;
+
+	ret = misc_register(&wm831x_wdt_miscdev);
+	if (ret != 0) {
+		dev_err(wm831x->dev, "Failed to register miscdev: %d\n", ret);
+		goto err_gpio;
+	}
+
+	return 0;
+
+err_gpio:
+	if (update_gpio) {
+		gpio_free(update_gpio);
+		update_gpio = 0;
+	}
+err:
+	return ret;
+}
+
+static int __devexit wm831x_wdt_remove(struct platform_device *pdev)
+{
+	if (update_gpio) {
+		gpio_free(update_gpio);
+		update_gpio = 0;
+	}
+
+	misc_deregister(&wm831x_wdt_miscdev);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_wdt_driver = {
+	.probe = wm831x_wdt_probe,
+	.remove = __devexit_p(wm831x_wdt_remove),
+	.driver = {
+		.name = "wm831x-watchdog",
+	},
+};
+
+static int __init wm831x_wdt_init(void)
+{
+	return platform_driver_register(&wm831x_wdt_driver);
+}
+module_init(wm831x_wdt_init);
+
+static void __exit wm831x_wdt_exit(void)
+{
+	platform_driver_unregister(&wm831x_wdt_driver);
+}
+module_exit(wm831x_wdt_exit);
+
+MODULE_AUTHOR("Mark Brown");
+MODULE_DESCRIPTION("WM831x Watchdog");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm831x-watchdog");
diff --git a/include/linux/mfd/wm831x/watchdog.h b/include/linux/mfd/wm831x/watchdog.h
new file mode 100644
index 000000000000..97a99b52956f
--- /dev/null
+++ b/include/linux/mfd/wm831x/watchdog.h
@@ -0,0 +1,52 @@
+/*
+ * include/linux/mfd/wm831x/watchdog.h -- Watchdog for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_WATCHDOG_H__
+#define __MFD_WM831X_WATCHDOG_H__
+
+
+/*
+ * R16388 (0x4004) - Watchdog
+ */
+#define WM831X_WDOG_ENA                         0x8000  /* WDOG_ENA */
+#define WM831X_WDOG_ENA_MASK                    0x8000  /* WDOG_ENA */
+#define WM831X_WDOG_ENA_SHIFT                       15  /* WDOG_ENA */
+#define WM831X_WDOG_ENA_WIDTH                        1  /* WDOG_ENA */
+#define WM831X_WDOG_DEBUG                       0x4000  /* WDOG_DEBUG */
+#define WM831X_WDOG_DEBUG_MASK                  0x4000  /* WDOG_DEBUG */
+#define WM831X_WDOG_DEBUG_SHIFT                     14  /* WDOG_DEBUG */
+#define WM831X_WDOG_DEBUG_WIDTH                      1  /* WDOG_DEBUG */
+#define WM831X_WDOG_RST_SRC                     0x2000  /* WDOG_RST_SRC */
+#define WM831X_WDOG_RST_SRC_MASK                0x2000  /* WDOG_RST_SRC */
+#define WM831X_WDOG_RST_SRC_SHIFT                   13  /* WDOG_RST_SRC */
+#define WM831X_WDOG_RST_SRC_WIDTH                    1  /* WDOG_RST_SRC */
+#define WM831X_WDOG_SLPENA                      0x1000  /* WDOG_SLPENA */
+#define WM831X_WDOG_SLPENA_MASK                 0x1000  /* WDOG_SLPENA */
+#define WM831X_WDOG_SLPENA_SHIFT                    12  /* WDOG_SLPENA */
+#define WM831X_WDOG_SLPENA_WIDTH                     1  /* WDOG_SLPENA */
+#define WM831X_WDOG_RESET                       0x0800  /* WDOG_RESET */
+#define WM831X_WDOG_RESET_MASK                  0x0800  /* WDOG_RESET */
+#define WM831X_WDOG_RESET_SHIFT                     11  /* WDOG_RESET */
+#define WM831X_WDOG_RESET_WIDTH                      1  /* WDOG_RESET */
+#define WM831X_WDOG_SECACT_MASK                 0x0300  /* WDOG_SECACT - [9:8] */
+#define WM831X_WDOG_SECACT_SHIFT                     8  /* WDOG_SECACT - [9:8] */
+#define WM831X_WDOG_SECACT_WIDTH                     2  /* WDOG_SECACT - [9:8] */
+#define WM831X_WDOG_PRIMACT_MASK                0x0030  /* WDOG_PRIMACT - [5:4] */
+#define WM831X_WDOG_PRIMACT_SHIFT                    4  /* WDOG_PRIMACT - [5:4] */
+#define WM831X_WDOG_PRIMACT_WIDTH                    2  /* WDOG_PRIMACT - [5:4] */
+#define WM831X_WDOG_TO_MASK                     0x0007  /* WDOG_TO - [2:0] */
+#define WM831X_WDOG_TO_SHIFT                         0  /* WDOG_TO - [2:0] */
+#define WM831X_WDOG_TO_WIDTH                         3  /* WDOG_TO - [2:0] */
+
+#endif
-- 
cgit v1.2.3


From 6952b61de9984073289859073e8195ad0bee8fd5 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 18 Sep 2009 23:55:55 +0400
Subject: headers: taskstats_kern.h trim

Remove net/genetlink.h inclusion, now sched.c won't be recompiled
because of some networking changes.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delayacct.h      | 1 -
 include/linux/taskstats_kern.h | 1 -
 kernel/delayacct.c             | 1 +
 mm/memory.c                    | 1 +
 4 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index f352f06fa063..5076fe0c8a96 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -18,7 +18,6 @@
 #define _LINUX_DELAYACCT_H
 
 #include <linux/sched.h>
-#include <linux/taskstats_kern.h>
 
 /*
  * Per-task flags relevant to delay accounting
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index 7e9680f4afdd..3398f4553269 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -9,7 +9,6 @@
 
 #include <linux/taskstats.h>
 #include <linux/sched.h>
-#include <net/genetlink.h>
 
 #ifdef CONFIG_TASKSTATS
 extern struct kmem_cache *taskstats_cache;
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index abb6e17505e2..ead9b610aa71 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -15,6 +15,7 @@
 
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/taskstats.h>
 #include <linux/time.h>
 #include <linux/sysctl.h>
 #include <linux/delayacct.h>
diff --git a/mm/memory.c b/mm/memory.c
index aede2ce3aba4..e8f63d9961ea 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -56,6 +56,7 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 
+#include <asm/io.h>
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
-- 
cgit v1.2.3


From 5622f295b53fb60dbf9bed3e2c89d182490a8b7f Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Tue, 15 Sep 2009 13:00:23 +0200
Subject: x86, perf_counter, bts: Optimize BTS overflow handling

Draining the BTS buffer on a buffer overflow interrupt takes too
long resulting in a kernel lockup when tracing the kernel.

Restructure perf_counter sampling into sample creation and sample
output.

Prepare a single reference sample for BTS sampling and update the
from and to address fields when draining the BTS buffer. Drain the
entire BTS buffer between a single perf_output_begin() /
perf_output_end() pair.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090915130023.A16204@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c |  60 ++++---
 include/linux/perf_counter.h       |  68 +++++++-
 kernel/perf_counter.c              | 312 ++++++++++++++++++++-----------------
 3 files changed, 266 insertions(+), 174 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f9cd0849bd42..6a0e71b38126 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -36,10 +36,10 @@ static u64 perf_counter_mask __read_mostly;
 #define BTS_RECORD_SIZE		24
 
 /* The size of a per-cpu BTS buffer in bytes: */
-#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 1024)
+#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)
 
 /* The BTS overflow threshold in bytes from the end of the buffer: */
-#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 64)
+#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)
 
 
 /*
@@ -1488,8 +1488,7 @@ void perf_counter_print_debug(void)
 	local_irq_restore(flags);
 }
 
-static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
-				       struct perf_sample_data *data)
+static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc)
 {
 	struct debug_store *ds = cpuc->ds;
 	struct bts_record {
@@ -1498,8 +1497,11 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
 		u64	flags;
 	};
 	struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
-	unsigned long orig_ip = data->regs->ip;
 	struct bts_record *at, *top;
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+	struct perf_sample_data data;
+	struct pt_regs regs;
 
 	if (!counter)
 		return;
@@ -1510,19 +1512,38 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
 	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
 	top = (struct bts_record *)(unsigned long)ds->bts_index;
 
+	if (top <= at)
+		return;
+
 	ds->bts_index = ds->bts_buffer_base;
 
+
+	data.period	= counter->hw.last_period;
+	data.addr	= 0;
+	regs.ip		= 0;
+
+	/*
+	 * Prepare a generic sample, i.e. fill in the invariant fields.
+	 * We will overwrite the from and to address before we output
+	 * the sample.
+	 */
+	perf_prepare_sample(&header, &data, counter, &regs);
+
+	if (perf_output_begin(&handle, counter,
+			      header.size * (top - at), 1, 1))
+		return;
+
 	for (; at < top; at++) {
-		data->regs->ip	= at->from;
-		data->addr	= at->to;
+		data.ip		= at->from;
+		data.addr	= at->to;
 
-		perf_counter_output(counter, 1, data);
+		perf_output_sample(&handle, &header, &data, counter);
 	}
 
-	data->regs->ip	= orig_ip;
-	data->addr	= 0;
+	perf_output_end(&handle);
 
 	/* There's new data available. */
+	counter->hw.interrupts++;
 	counter->pending_kill = POLL_IN;
 }
 
@@ -1552,13 +1573,9 @@ static void x86_pmu_disable(struct perf_counter *counter)
 	x86_perf_counter_update(counter, hwc, idx);
 
 	/* Drain the remaining BTS records. */
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		struct perf_sample_data data;
-		struct pt_regs regs;
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
+		intel_pmu_drain_bts_buffer(cpuc);
 
-		data.regs = &regs;
-		intel_pmu_drain_bts_buffer(cpuc, &data);
-	}
 	cpuc->counters[idx] = NULL;
 	clear_bit(idx, cpuc->used_mask);
 
@@ -1619,7 +1636,6 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
 	int idx, handled = 0;
 	u64 val;
 
-	data.regs = regs;
 	data.addr = 0;
 
 	cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1644,7 +1660,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
 		if (!x86_perf_counter_set_period(counter, hwc, idx))
 			continue;
 
-		if (perf_counter_overflow(counter, 1, &data))
+		if (perf_counter_overflow(counter, 1, &data, regs))
 			p6_pmu_disable_counter(hwc, idx);
 	}
 
@@ -1665,13 +1681,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	int bit, loops;
 	u64 ack, status;
 
-	data.regs = regs;
 	data.addr = 0;
 
 	cpuc = &__get_cpu_var(cpu_hw_counters);
 
 	perf_disable();
-	intel_pmu_drain_bts_buffer(cpuc, &data);
+	intel_pmu_drain_bts_buffer(cpuc);
 	status = intel_pmu_get_status();
 	if (!status) {
 		perf_enable();
@@ -1702,7 +1717,7 @@ again:
 
 		data.period = counter->hw.last_period;
 
-		if (perf_counter_overflow(counter, 1, &data))
+		if (perf_counter_overflow(counter, 1, &data, regs))
 			intel_pmu_disable_counter(&counter->hw, bit);
 	}
 
@@ -1729,7 +1744,6 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 	int idx, handled = 0;
 	u64 val;
 
-	data.regs = regs;
 	data.addr = 0;
 
 	cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1754,7 +1768,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 		if (!x86_perf_counter_set_period(counter, hwc, idx))
 			continue;
 
-		if (perf_counter_overflow(counter, 1, &data))
+		if (perf_counter_overflow(counter, 1, &data, regs))
 			amd_pmu_disable_counter(hwc, idx);
 	}
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6c1ef72ea501..c7375f97aa19 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -691,6 +691,17 @@ struct perf_cpu_context {
 	int				recursion[4];
 };
 
+struct perf_output_handle {
+	struct perf_counter	*counter;
+	struct perf_mmap_data	*data;
+	unsigned long		head;
+	unsigned long		offset;
+	int			nmi;
+	int			sample;
+	int			locked;
+	unsigned long		flags;
+};
+
 #ifdef CONFIG_PERF_COUNTERS
 
 /*
@@ -722,16 +733,38 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 extern void perf_counter_update_userpage(struct perf_counter *counter);
 
 struct perf_sample_data {
-	struct pt_regs			*regs;
+	u64				type;
+
+	u64				ip;
+	struct {
+		u32	pid;
+		u32	tid;
+	}				tid_entry;
+	u64				time;
 	u64				addr;
+	u64				id;
+	u64				stream_id;
+	struct {
+		u32	cpu;
+		u32	reserved;
+	}				cpu_entry;
 	u64				period;
+	struct perf_callchain_entry	*callchain;
 	struct perf_raw_record		*raw;
 };
 
+extern void perf_output_sample(struct perf_output_handle *handle,
+			       struct perf_event_header *header,
+			       struct perf_sample_data *data,
+			       struct perf_counter *counter);
+extern void perf_prepare_sample(struct perf_event_header *header,
+				struct perf_sample_data *data,
+				struct perf_counter *counter,
+				struct pt_regs *regs);
+
 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
-				 struct perf_sample_data *data);
-extern void perf_counter_output(struct perf_counter *counter, int nmi,
-				struct perf_sample_data *data);
+				 struct perf_sample_data *data,
+				 struct pt_regs *regs);
 
 /*
  * Return 1 for a software counter, 0 for a hardware counter
@@ -781,6 +814,12 @@ extern void perf_tpcounter_event(int event_id, u64 addr, u64 count,
 #define perf_instruction_pointer(regs)	instruction_pointer(regs)
 #endif
 
+extern int perf_output_begin(struct perf_output_handle *handle,
+			     struct perf_counter *counter, unsigned int size,
+			     int nmi, int sample);
+extern void perf_output_end(struct perf_output_handle *handle);
+extern void perf_output_copy(struct perf_output_handle *handle,
+			     const void *buf, unsigned int len);
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -807,7 +846,28 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma)	{ }
 static inline void perf_counter_comm(struct task_struct *tsk)		{ }
 static inline void perf_counter_fork(struct task_struct *tsk)		{ }
 static inline void perf_counter_init(void)				{ }
+
+static inline int
+perf_output_begin(struct perf_output_handle *handle, struct perf_counter *c,
+		  unsigned int size, int nmi, int sample)		{ }
+static inline void perf_output_end(struct perf_output_handle *handle)	{ }
+static inline void
+perf_output_copy(struct perf_output_handle *handle,
+		 const void *buf, unsigned int len)			{ }
+static inline void
+perf_output_sample(struct perf_output_handle *handle,
+		   struct perf_event_header *header,
+		   struct perf_sample_data *data,
+		   struct perf_counter *counter)			{ }
+static inline void
+perf_prepare_sample(struct perf_event_header *header,
+		    struct perf_sample_data *data,
+		    struct perf_counter *counter,
+		    struct pt_regs *regs)				{ }
 #endif
 
+#define perf_output_put(handle, x) \
+	perf_output_copy((handle), &(x), sizeof(x))
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_PERF_COUNTER_H */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 29b73b6e8146..215845243a69 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2512,18 +2512,6 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 /*
  * Output
  */
-
-struct perf_output_handle {
-	struct perf_counter	*counter;
-	struct perf_mmap_data	*data;
-	unsigned long		head;
-	unsigned long		offset;
-	int			nmi;
-	int			sample;
-	int			locked;
-	unsigned long		flags;
-};
-
 static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
 			      unsigned long offset, unsigned long head)
 {
@@ -2633,8 +2621,8 @@ out:
 	local_irq_restore(handle->flags);
 }
 
-static void perf_output_copy(struct perf_output_handle *handle,
-			     const void *buf, unsigned int len)
+void perf_output_copy(struct perf_output_handle *handle,
+		      const void *buf, unsigned int len)
 {
 	unsigned int pages_mask;
 	unsigned int offset;
@@ -2669,12 +2657,9 @@ static void perf_output_copy(struct perf_output_handle *handle,
 	WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
 }
 
-#define perf_output_put(handle, x) \
-	perf_output_copy((handle), &(x), sizeof(x))
-
-static int perf_output_begin(struct perf_output_handle *handle,
-			     struct perf_counter *counter, unsigned int size,
-			     int nmi, int sample)
+int perf_output_begin(struct perf_output_handle *handle,
+		      struct perf_counter *counter, unsigned int size,
+		      int nmi, int sample)
 {
 	struct perf_counter *output_counter;
 	struct perf_mmap_data *data;
@@ -2756,7 +2741,7 @@ out:
 	return -ENOSPC;
 }
 
-static void perf_output_end(struct perf_output_handle *handle)
+void perf_output_end(struct perf_output_handle *handle)
 {
 	struct perf_counter *counter = handle->counter;
 	struct perf_mmap_data *data = handle->data;
@@ -2870,82 +2855,151 @@ static void perf_output_read(struct perf_output_handle *handle,
 		perf_output_read_one(handle, counter);
 }
 
-void perf_counter_output(struct perf_counter *counter, int nmi,
-				struct perf_sample_data *data)
+void perf_output_sample(struct perf_output_handle *handle,
+			struct perf_event_header *header,
+			struct perf_sample_data *data,
+			struct perf_counter *counter)
+{
+	u64 sample_type = data->type;
+
+	perf_output_put(handle, *header);
+
+	if (sample_type & PERF_SAMPLE_IP)
+		perf_output_put(handle, data->ip);
+
+	if (sample_type & PERF_SAMPLE_TID)
+		perf_output_put(handle, data->tid_entry);
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		perf_output_put(handle, data->time);
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		perf_output_put(handle, data->addr);
+
+	if (sample_type & PERF_SAMPLE_ID)
+		perf_output_put(handle, data->id);
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		perf_output_put(handle, data->stream_id);
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		perf_output_put(handle, data->cpu_entry);
+
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		perf_output_put(handle, data->period);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		perf_output_read(handle, counter);
+
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+		if (data->callchain) {
+			int size = 1;
+
+			if (data->callchain)
+				size += data->callchain->nr;
+
+			size *= sizeof(u64);
+
+			perf_output_copy(handle, data->callchain, size);
+		} else {
+			u64 nr = 0;
+			perf_output_put(handle, nr);
+		}
+	}
+
+	if (sample_type & PERF_SAMPLE_RAW) {
+		if (data->raw) {
+			perf_output_put(handle, data->raw->size);
+			perf_output_copy(handle, data->raw->data,
+					 data->raw->size);
+		} else {
+			struct {
+				u32	size;
+				u32	data;
+			} raw = {
+				.size = sizeof(u32),
+				.data = 0,
+			};
+			perf_output_put(handle, raw);
+		}
+	}
+}
+
+void perf_prepare_sample(struct perf_event_header *header,
+			 struct perf_sample_data *data,
+			 struct perf_counter *counter,
+			 struct pt_regs *regs)
 {
-	int ret;
 	u64 sample_type = counter->attr.sample_type;
-	struct perf_output_handle handle;
-	struct perf_event_header header;
-	u64 ip;
-	struct {
-		u32 pid, tid;
-	} tid_entry;
-	struct perf_callchain_entry *callchain = NULL;
-	int callchain_size = 0;
-	u64 time;
-	struct {
-		u32 cpu, reserved;
-	} cpu_entry;
 
-	header.type = PERF_EVENT_SAMPLE;
-	header.size = sizeof(header);
+	data->type = sample_type;
 
-	header.misc = 0;
-	header.misc |= perf_misc_flags(data->regs);
+	header->type = PERF_EVENT_SAMPLE;
+	header->size = sizeof(*header);
+
+	header->misc = 0;
+	header->misc |= perf_misc_flags(regs);
 
 	if (sample_type & PERF_SAMPLE_IP) {
-		ip = perf_instruction_pointer(data->regs);
-		header.size += sizeof(ip);
+		data->ip = perf_instruction_pointer(regs);
+
+		header->size += sizeof(data->ip);
 	}
 
 	if (sample_type & PERF_SAMPLE_TID) {
 		/* namespace issues */
-		tid_entry.pid = perf_counter_pid(counter, current);
-		tid_entry.tid = perf_counter_tid(counter, current);
+		data->tid_entry.pid = perf_counter_pid(counter, current);
+		data->tid_entry.tid = perf_counter_tid(counter, current);
 
-		header.size += sizeof(tid_entry);
+		header->size += sizeof(data->tid_entry);
 	}
 
 	if (sample_type & PERF_SAMPLE_TIME) {
 		/*
 		 * Maybe do better on x86 and provide cpu_clock_nmi()
 		 */
-		time = sched_clock();
+		data->time = sched_clock();
 
-		header.size += sizeof(u64);
+		header->size += sizeof(data->time);
 	}
 
 	if (sample_type & PERF_SAMPLE_ADDR)
-		header.size += sizeof(u64);
+		header->size += sizeof(data->addr);
 
-	if (sample_type & PERF_SAMPLE_ID)
-		header.size += sizeof(u64);
+	if (sample_type & PERF_SAMPLE_ID) {
+		data->id = primary_counter_id(counter);
 
-	if (sample_type & PERF_SAMPLE_STREAM_ID)
-		header.size += sizeof(u64);
+		header->size += sizeof(data->id);
+	}
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID) {
+		data->stream_id = counter->id;
+
+		header->size += sizeof(data->stream_id);
+	}
 
 	if (sample_type & PERF_SAMPLE_CPU) {
-		header.size += sizeof(cpu_entry);
+		data->cpu_entry.cpu		= raw_smp_processor_id();
+		data->cpu_entry.reserved	= 0;
 
-		cpu_entry.cpu = raw_smp_processor_id();
-		cpu_entry.reserved = 0;
+		header->size += sizeof(data->cpu_entry);
 	}
 
 	if (sample_type & PERF_SAMPLE_PERIOD)
-		header.size += sizeof(u64);
+		header->size += sizeof(data->period);
 
 	if (sample_type & PERF_SAMPLE_READ)
-		header.size += perf_counter_read_size(counter);
+		header->size += perf_counter_read_size(counter);
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		callchain = perf_callchain(data->regs);
+		int size = 1;
 
-		if (callchain) {
-			callchain_size = (1 + callchain->nr) * sizeof(u64);
-			header.size += callchain_size;
-		} else
-			header.size += sizeof(u64);
+		data->callchain = perf_callchain(regs);
+
+		if (data->callchain)
+			size += data->callchain->nr;
+
+		header->size += size * sizeof(u64);
 	}
 
 	if (sample_type & PERF_SAMPLE_RAW) {
@@ -2957,69 +3011,23 @@ void perf_counter_output(struct perf_counter *counter, int nmi,
 			size += sizeof(u32);
 
 		WARN_ON_ONCE(size & (sizeof(u64)-1));
-		header.size += size;
-	}
-
-	ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
-	if (ret)
-		return;
-
-	perf_output_put(&handle, header);
-
-	if (sample_type & PERF_SAMPLE_IP)
-		perf_output_put(&handle, ip);
-
-	if (sample_type & PERF_SAMPLE_TID)
-		perf_output_put(&handle, tid_entry);
-
-	if (sample_type & PERF_SAMPLE_TIME)
-		perf_output_put(&handle, time);
-
-	if (sample_type & PERF_SAMPLE_ADDR)
-		perf_output_put(&handle, data->addr);
-
-	if (sample_type & PERF_SAMPLE_ID) {
-		u64 id = primary_counter_id(counter);
-
-		perf_output_put(&handle, id);
+		header->size += size;
 	}
+}
 
-	if (sample_type & PERF_SAMPLE_STREAM_ID)
-		perf_output_put(&handle, counter->id);
-
-	if (sample_type & PERF_SAMPLE_CPU)
-		perf_output_put(&handle, cpu_entry);
-
-	if (sample_type & PERF_SAMPLE_PERIOD)
-		perf_output_put(&handle, data->period);
+static void perf_counter_output(struct perf_counter *counter, int nmi,
+				struct perf_sample_data *data,
+				struct pt_regs *regs)
+{
+	struct perf_output_handle handle;
+	struct perf_event_header header;
 
-	if (sample_type & PERF_SAMPLE_READ)
-		perf_output_read(&handle, counter);
+	perf_prepare_sample(&header, data, counter, regs);
 
-	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		if (callchain)
-			perf_output_copy(&handle, callchain, callchain_size);
-		else {
-			u64 nr = 0;
-			perf_output_put(&handle, nr);
-		}
-	}
+	if (perf_output_begin(&handle, counter, header.size, nmi, 1))
+		return;
 
-	if (sample_type & PERF_SAMPLE_RAW) {
-		if (data->raw) {
-			perf_output_put(&handle, data->raw->size);
-			perf_output_copy(&handle, data->raw->data, data->raw->size);
-		} else {
-			struct {
-				u32	size;
-				u32	data;
-			} raw = {
-				.size = sizeof(u32),
-				.data = 0,
-			};
-			perf_output_put(&handle, raw);
-		}
-	}
+	perf_output_sample(&handle, &header, data, counter);
 
 	perf_output_end(&handle);
 }
@@ -3501,7 +3509,8 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
  */
 
 static int __perf_counter_overflow(struct perf_counter *counter, int nmi,
-				   int throttle, struct perf_sample_data *data)
+				   int throttle, struct perf_sample_data *data,
+				   struct pt_regs *regs)
 {
 	int events = atomic_read(&counter->event_limit);
 	struct hw_perf_counter *hwc = &counter->hw;
@@ -3557,14 +3566,15 @@ static int __perf_counter_overflow(struct perf_counter *counter, int nmi,
 			perf_counter_disable(counter);
 	}
 
-	perf_counter_output(counter, nmi, data);
+	perf_counter_output(counter, nmi, data, regs);
 	return ret;
 }
 
 int perf_counter_overflow(struct perf_counter *counter, int nmi,
-			  struct perf_sample_data *data)
+			  struct perf_sample_data *data,
+			  struct pt_regs *regs)
 {
-	return __perf_counter_overflow(counter, nmi, 1, data);
+	return __perf_counter_overflow(counter, nmi, 1, data, regs);
 }
 
 /*
@@ -3602,7 +3612,8 @@ again:
 }
 
 static void perf_swcounter_overflow(struct perf_counter *counter,
-				    int nmi, struct perf_sample_data *data)
+				    int nmi, struct perf_sample_data *data,
+				    struct pt_regs *regs)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
 	int throttle = 0;
@@ -3615,7 +3626,8 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
 		return;
 
 	for (; overflow; overflow--) {
-		if (__perf_counter_overflow(counter, nmi, throttle, data)) {
+		if (__perf_counter_overflow(counter, nmi, throttle,
+					    data, regs)) {
 			/*
 			 * We inhibit the overflow from happening when
 			 * hwc->interrupts == MAX_INTERRUPTS.
@@ -3634,7 +3646,8 @@ static void perf_swcounter_unthrottle(struct perf_counter *counter)
 }
 
 static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
-			       int nmi, struct perf_sample_data *data)
+			       int nmi, struct perf_sample_data *data,
+			       struct pt_regs *regs)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
 
@@ -3643,11 +3656,11 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
 	if (!hwc->sample_period)
 		return;
 
-	if (!data->regs)
+	if (!regs)
 		return;
 
 	if (!atomic64_add_negative(nr, &hwc->period_left))
-		perf_swcounter_overflow(counter, nmi, data);
+		perf_swcounter_overflow(counter, nmi, data, regs);
 }
 
 static int perf_swcounter_is_counting(struct perf_counter *counter)
@@ -3706,7 +3719,8 @@ static int perf_swcounter_match(struct perf_counter *counter,
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 				     enum perf_type_id type,
 				     u32 event, u64 nr, int nmi,
-				     struct perf_sample_data *data)
+				     struct perf_sample_data *data,
+				     struct pt_regs *regs)
 {
 	struct perf_counter *counter;
 
@@ -3715,8 +3729,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_swcounter_match(counter, type, event, data->regs))
-			perf_swcounter_add(counter, nr, nmi, data);
+		if (perf_swcounter_match(counter, type, event, regs))
+			perf_swcounter_add(counter, nr, nmi, data, regs);
 	}
 	rcu_read_unlock();
 }
@@ -3737,7 +3751,8 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
 
 static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
 				    u64 nr, int nmi,
-				    struct perf_sample_data *data)
+				    struct perf_sample_data *data,
+				    struct pt_regs *regs)
 {
 	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
 	int *recursion = perf_swcounter_recursion_context(cpuctx);
@@ -3750,7 +3765,7 @@ static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
 	barrier();
 
 	perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
-				 nr, nmi, data);
+				 nr, nmi, data, regs);
 	rcu_read_lock();
 	/*
 	 * doesn't really matter which of the child contexts the
@@ -3758,7 +3773,7 @@ static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
 	 */
 	ctx = rcu_dereference(current->perf_counter_ctxp);
 	if (ctx)
-		perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data);
+		perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data, regs);
 	rcu_read_unlock();
 
 	barrier();
@@ -3772,11 +3787,11 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi,
 			    struct pt_regs *regs, u64 addr)
 {
 	struct perf_sample_data data = {
-		.regs = regs,
 		.addr = addr,
 	};
 
-	do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, &data);
+	do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi,
+				&data, regs);
 }
 
 static void perf_swcounter_read(struct perf_counter *counter)
@@ -3813,6 +3828,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 {
 	enum hrtimer_restart ret = HRTIMER_RESTART;
 	struct perf_sample_data data;
+	struct pt_regs *regs;
 	struct perf_counter *counter;
 	u64 period;
 
@@ -3820,17 +3836,17 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 	counter->pmu->read(counter);
 
 	data.addr = 0;
-	data.regs = get_irq_regs();
+	regs = get_irq_regs();
 	/*
 	 * In case we exclude kernel IPs or are somehow not in interrupt
 	 * context, provide the next best thing, the user IP.
 	 */
-	if ((counter->attr.exclude_kernel || !data.regs) &&
+	if ((counter->attr.exclude_kernel || !regs) &&
 			!counter->attr.exclude_user)
-		data.regs = task_pt_regs(current);
+		regs = task_pt_regs(current);
 
-	if (data.regs) {
-		if (perf_counter_overflow(counter, 0, &data))
+	if (regs) {
+		if (perf_counter_overflow(counter, 0, &data, regs))
 			ret = HRTIMER_NORESTART;
 	}
 
@@ -3966,15 +3982,17 @@ void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
 	};
 
 	struct perf_sample_data data = {
-		.regs = get_irq_regs(),
 		.addr = addr,
 		.raw = &raw,
 	};
 
-	if (!data.regs)
-		data.regs = task_pt_regs(current);
+	struct pt_regs *regs = get_irq_regs();
+
+	if (!regs)
+		regs = task_pt_regs(current);
 
-	do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data);
+	do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
+				&data, regs);
 }
 EXPORT_SYMBOL_GPL(perf_tpcounter_event);
 
-- 
cgit v1.2.3


From fc5377668c3d808e1d53c4aee152c836f55c3490 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Sep 2009 19:35:28 +0200
Subject: tracing: Remove markers

Now that the last users of markers have migrated to the event
tracer we can kill off the (now orphan) support code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20090917173527.GA1699@lst.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/markers.txt                 | 104 ----
 arch/powerpc/platforms/cell/spufs/file.c  |   1 -
 arch/powerpc/platforms/cell/spufs/sched.c |   1 -
 include/linux/kvm_host.h                  |   1 -
 include/linux/marker.h                    | 221 -------
 include/linux/module.h                    |  11 -
 init/Kconfig                              |   7 -
 kernel/Makefile                           |   1 -
 kernel/marker.c                           | 930 ------------------------------
 kernel/module.c                           |  18 -
 kernel/trace/trace_printk.c               |   1 -
 samples/Kconfig                           |   6 -
 samples/Makefile                          |   2 +-
 samples/markers/Makefile                  |   4 -
 samples/markers/marker-example.c          |  53 --
 samples/markers/probe-example.c           |  92 ---
 scripts/Makefile.modpost                  |  12 -
 17 files changed, 1 insertion(+), 1464 deletions(-)
 delete mode 100644 Documentation/markers.txt
 delete mode 100644 include/linux/marker.h
 delete mode 100644 kernel/marker.c
 delete mode 100644 samples/markers/Makefile
 delete mode 100644 samples/markers/marker-example.c
 delete mode 100644 samples/markers/probe-example.c

(limited to 'include')

diff --git a/Documentation/markers.txt b/Documentation/markers.txt
deleted file mode 100644
index d2b3d0e91b26..000000000000
--- a/Documentation/markers.txt
+++ /dev/null
@@ -1,104 +0,0 @@
- 	             Using the Linux Kernel Markers
-
-			    Mathieu Desnoyers
-
-
-This document introduces Linux Kernel Markers and their use. It provides
-examples of how to insert markers in the kernel and connect probe functions to
-them and provides some examples of probe functions.
-
-
-* Purpose of markers
-
-A marker placed in code provides a hook to call a function (probe) that you can
-provide at runtime. A marker can be "on" (a probe is connected to it) or "off"
-(no probe is attached). When a marker is "off" it has no effect, except for
-adding a tiny time penalty (checking a condition for a branch) and space
-penalty (adding a few bytes for the function call at the end of the
-instrumented function and adds a data structure in a separate section).  When a
-marker is "on", the function you provide is called each time the marker is
-executed, in the execution context of the caller. When the function provided
-ends its execution, it returns to the caller (continuing from the marker site).
-
-You can put markers at important locations in the code. Markers are
-lightweight hooks that can pass an arbitrary number of parameters,
-described in a printk-like format string, to the attached probe function.
-
-They can be used for tracing and performance accounting.
-
-
-* Usage
-
-In order to use the macro trace_mark, you should include linux/marker.h.
-
-#include <linux/marker.h>
-
-And,
-
-trace_mark(subsystem_event, "myint %d mystring %s", someint, somestring);
-Where :
-- subsystem_event is an identifier unique to your event
-    - subsystem is the name of your subsystem.
-    - event is the name of the event to mark.
-- "myint %d mystring %s" is the formatted string for the serializer. "myint" and
-  "mystring" are repectively the field names associated with the first and
-  second parameter.
-- someint is an integer.
-- somestring is a char pointer.
-
-Connecting a function (probe) to a marker is done by providing a probe (function
-to call) for the specific marker through marker_probe_register() and can be
-activated by calling marker_arm(). Marker deactivation can be done by calling
-marker_disarm() as many times as marker_arm() has been called. Removing a probe
-is done through marker_probe_unregister(); it will disarm the probe.
-
-marker_synchronize_unregister() must be called between probe unregistration and
-the first occurrence of
-- the end of module exit function,
-  to make sure there is no caller left using the probe;
-- the free of any resource used by the probes,
-  to make sure the probes wont be accessing invalid data.
-This, and the fact that preemption is disabled around the probe call, make sure
-that probe removal and module unload are safe. See the "Probe example" section
-below for a sample probe module.
-
-The marker mechanism supports inserting multiple instances of the same marker.
-Markers can be put in inline functions, inlined static functions, and
-unrolled loops as well as regular functions.
-
-The naming scheme "subsystem_event" is suggested here as a convention intended
-to limit collisions. Marker names are global to the kernel: they are considered
-as being the same whether they are in the core kernel image or in modules.
-Conflicting format strings for markers with the same name will cause the markers
-to be detected to have a different format string not to be armed and will output
-a printk warning which identifies the inconsistency:
-
-"Format mismatch for probe probe_name (format), marker (format)"
-
-Another way to use markers is to simply define the marker without generating any
-function call to actually call into the marker. This is useful in combination
-with tracepoint probes in a scheme like this :
-
-void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk);
-
-DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name,
-	"arg1 %u pid %d");
-
-notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk)
-{
-	struct marker *marker = &GET_MARKER(kernel_irq_entry);
-	/* write data to trace buffers ... */
-}
-
-* Probe / marker example
-
-See the example provided in samples/markers/src
-
-Compile them with your kernel.
-
-Run, as root :
-modprobe marker-example (insmod order is not important)
-modprobe probe-example
-cat /proc/marker-example (returns an expected error)
-rmmod marker-example probe-example
-dmesg
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index ab8aef9bb8ea..8f079b865ad0 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -29,7 +29,6 @@
 #include <linux/poll.h>
 #include <linux/ptrace.h>
 #include <linux/seq_file.h>
-#include <linux/marker.h>
 
 #include <asm/io.h>
 #include <asm/time.h>
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index bb5b77c66d05..4678078fede8 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -39,7 +39,6 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/marker.h>
 
 #include <asm/io.h>
 #include <asm/mmu_context.h>
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4af56036a6bf..b7bbb5ddd7ae 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -15,7 +15,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/preempt.h>
-#include <linux/marker.h>
 #include <linux/msi.h>
 #include <asm/signal.h>
 
diff --git a/include/linux/marker.h b/include/linux/marker.h
deleted file mode 100644
index b85e74ca782f..000000000000
--- a/include/linux/marker.h
+++ /dev/null
@@ -1,221 +0,0 @@
-#ifndef _LINUX_MARKER_H
-#define _LINUX_MARKER_H
-
-/*
- * Code markup for dynamic and static tracing.
- *
- * See Documentation/marker.txt.
- *
- * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
- */
-
-#include <stdarg.h>
-#include <linux/types.h>
-
-struct module;
-struct marker;
-
-/**
- * marker_probe_func - Type of a marker probe function
- * @probe_private: probe private data
- * @call_private: call site private data
- * @fmt: format string
- * @args: variable argument list pointer. Use a pointer to overcome C's
- *        inability to pass this around as a pointer in a portable manner in
- *        the callee otherwise.
- *
- * Type of marker probe functions. They receive the mdata and need to parse the
- * format string to recover the variable argument list.
- */
-typedef void marker_probe_func(void *probe_private, void *call_private,
-		const char *fmt, va_list *args);
-
-struct marker_probe_closure {
-	marker_probe_func *func;	/* Callback */
-	void *probe_private;		/* Private probe data */
-};
-
-struct marker {
-	const char *name;	/* Marker name */
-	const char *format;	/* Marker format string, describing the
-				 * variable argument list.
-				 */
-	char state;		/* Marker state. */
-	char ptype;		/* probe type : 0 : single, 1 : multi */
-				/* Probe wrapper */
-	void (*call)(const struct marker *mdata, void *call_private, ...);
-	struct marker_probe_closure single;
-	struct marker_probe_closure *multi;
-	const char *tp_name;	/* Optional tracepoint name */
-	void *tp_cb;		/* Optional tracepoint callback */
-} __attribute__((aligned(8)));
-
-#ifdef CONFIG_MARKERS
-
-#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format)		\
-		static const char __mstrtab_##name[]			\
-		__attribute__((section("__markers_strings")))		\
-		= #name "\0" format;					\
-		static struct marker __mark_##name			\
-		__attribute__((section("__markers"), aligned(8))) =	\
-		{ __mstrtab_##name, &__mstrtab_##name[sizeof(#name)],	\
-		  0, 0, marker_probe_cb, { __mark_empty_function, NULL},\
-		  NULL, tp_name_str, tp_cb }
-
-#define DEFINE_MARKER(name, format)					\
-		_DEFINE_MARKER(name, NULL, NULL, format)
-
-#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format)			\
-		_DEFINE_MARKER(name, #tp_name, tp_cb, format)
-
-/*
- * Note : the empty asm volatile with read constraint is used here instead of a
- * "used" attribute to fix a gcc 4.1.x bug.
- * Make sure the alignment of the structure in the __markers section will
- * not add unwanted padding between the beginning of the section and the
- * structure. Force alignment to the same alignment as the section start.
- *
- * The "generic" argument controls which marker enabling mechanism must be used.
- * If generic is true, a variable read is used.
- * If generic is false, immediate values are used.
- */
-#define __trace_mark(generic, name, call_private, format, args...)	\
-	do {								\
-		DEFINE_MARKER(name, format);				\
-		__mark_check_format(format, ## args);			\
-		if (unlikely(__mark_##name.state)) {			\
-			(*__mark_##name.call)				\
-				(&__mark_##name, call_private, ## args);\
-		}							\
-	} while (0)
-
-#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
-	do {								\
-		void __check_tp_type(void)				\
-		{							\
-			register_trace_##tp_name(tp_cb);		\
-		}							\
-		DEFINE_MARKER_TP(name, tp_name, tp_cb, format);		\
-		__mark_check_format(format, ## args);			\
-		(*__mark_##name.call)(&__mark_##name, call_private,	\
-					## args);			\
-	} while (0)
-
-extern void marker_update_probe_range(struct marker *begin,
-	struct marker *end);
-
-#define GET_MARKER(name)	(__mark_##name)
-
-#else /* !CONFIG_MARKERS */
-#define DEFINE_MARKER(name, tp_name, tp_cb, format)
-#define __trace_mark(generic, name, call_private, format, args...) \
-		__mark_check_format(format, ## args)
-#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
-	do {								\
-		void __check_tp_type(void)				\
-		{							\
-			register_trace_##tp_name(tp_cb);		\
-		}							\
-		__mark_check_format(format, ## args);			\
-	} while (0)
-static inline void marker_update_probe_range(struct marker *begin,
-	struct marker *end)
-{ }
-#define GET_MARKER(name)
-#endif /* CONFIG_MARKERS */
-
-/**
- * trace_mark - Marker using code patching
- * @name: marker name, not quoted.
- * @format: format string
- * @args...: variable argument list
- *
- * Places a marker using optimized code patching technique (imv_read())
- * to be enabled when immediate values are present.
- */
-#define trace_mark(name, format, args...) \
-	__trace_mark(0, name, NULL, format, ## args)
-
-/**
- * _trace_mark - Marker using variable read
- * @name: marker name, not quoted.
- * @format: format string
- * @args...: variable argument list
- *
- * Places a marker using a standard memory read (_imv_read()) to be
- * enabled. Should be used for markers in code paths where instruction
- * modification based enabling is not welcome. (__init and __exit functions,
- * lockdep, some traps, printk).
- */
-#define _trace_mark(name, format, args...) \
-	__trace_mark(1, name, NULL, format, ## args)
-
-/**
- * trace_mark_tp - Marker in a tracepoint callback
- * @name: marker name, not quoted.
- * @tp_name: tracepoint name, not quoted.
- * @tp_cb: tracepoint callback. Should have an associated global symbol so it
- *         is not optimized away by the compiler (should not be static).
- * @format: format string
- * @args...: variable argument list
- *
- * Places a marker in a tracepoint callback.
- */
-#define trace_mark_tp(name, tp_name, tp_cb, format, args...)	\
-	__trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args)
-
-/**
- * MARK_NOARGS - Format string for a marker with no argument.
- */
-#define MARK_NOARGS " "
-
-/* To be used for string format validity checking with gcc */
-static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...)
-{
-}
-
-#define __mark_check_format(format, args...)				\
-	do {								\
-		if (0)							\
-			___mark_check_format(format, ## args);		\
-	} while (0)
-
-extern marker_probe_func __mark_empty_function;
-
-extern void marker_probe_cb(const struct marker *mdata,
-	void *call_private, ...);
-
-/*
- * Connect a probe to a marker.
- * private data pointer must be a valid allocated memory address, or NULL.
- */
-extern int marker_probe_register(const char *name, const char *format,
-				marker_probe_func *probe, void *probe_private);
-
-/*
- * Returns the private data given to marker_probe_register.
- */
-extern int marker_probe_unregister(const char *name,
-	marker_probe_func *probe, void *probe_private);
-/*
- * Unregister a marker by providing the registered private data.
- */
-extern int marker_probe_unregister_private_data(marker_probe_func *probe,
-	void *probe_private);
-
-extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
-	int num);
-
-/*
- * marker_synchronize_unregister must be called between the last marker probe
- * unregistration and the first one of
- * - the end of module exit function
- * - the free of any resource used by the probes
- * to ensure the code and data are valid for any possibly running probes.
- */
-#define marker_synchronize_unregister() synchronize_sched()
-
-#endif
diff --git a/include/linux/module.h b/include/linux/module.h
index f8f92d015efe..1c755b2f937d 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -15,7 +15,6 @@
 #include <linux/stringify.h>
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
-#include <linux/marker.h>
 #include <linux/tracepoint.h>
 
 #include <asm/local.h>
@@ -327,10 +326,6 @@ struct module
 	/* The command line arguments (may be mangled).  People like
 	   keeping pointers to this stuff */
 	char *args;
-#ifdef CONFIG_MARKERS
-	struct marker *markers;
-	unsigned int num_markers;
-#endif
 #ifdef CONFIG_TRACEPOINTS
 	struct tracepoint *tracepoints;
 	unsigned int num_tracepoints;
@@ -535,8 +530,6 @@ int unregister_module_notifier(struct notifier_block * nb);
 
 extern void print_modules(void);
 
-extern void module_update_markers(void);
-
 extern void module_update_tracepoints(void);
 extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
 
@@ -651,10 +644,6 @@ static inline void print_modules(void)
 {
 }
 
-static inline void module_update_markers(void)
-{
-}
-
 static inline void module_update_tracepoints(void)
 {
 }
diff --git a/init/Kconfig b/init/Kconfig
index 8e8b76d8a272..4cc0fa13d5eb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1054,13 +1054,6 @@ config PROFILING
 config TRACEPOINTS
 	bool
 
-config MARKERS
-	bool "Activate markers"
-	select TRACEPOINTS
-	help
-	  Place an empty function call at each marker site. Can be
-	  dynamically changed for a probe function.
-
 source "arch/Kconfig"
 
 config SLOW_WORK
diff --git a/kernel/Makefile b/kernel/Makefile
index 3d9c7e27e3f9..7c9b0a585502 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -87,7 +87,6 @@ obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
-obj-$(CONFIG_MARKERS) += marker.o
 obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
diff --git a/kernel/marker.c b/kernel/marker.c
deleted file mode 100644
index ea54f2647868..000000000000
--- a/kernel/marker.c
+++ /dev/null
@@ -1,930 +0,0 @@
-/*
- * Copyright (C) 2007 Mathieu Desnoyers
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/types.h>
-#include <linux/jhash.h>
-#include <linux/list.h>
-#include <linux/rcupdate.h>
-#include <linux/marker.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-
-extern struct marker __start___markers[];
-extern struct marker __stop___markers[];
-
-/* Set to 1 to enable marker debug output */
-static const int marker_debug;
-
-/*
- * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
- * and module markers and the hash table.
- */
-static DEFINE_MUTEX(markers_mutex);
-
-/*
- * Marker hash table, containing the active markers.
- * Protected by module_mutex.
- */
-#define MARKER_HASH_BITS 6
-#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
-static struct hlist_head marker_table[MARKER_TABLE_SIZE];
-
-/*
- * Note about RCU :
- * It is used to make sure every handler has finished using its private data
- * between two consecutive operation (add or remove) on a given marker.  It is
- * also used to delay the free of multiple probes array until a quiescent state
- * is reached.
- * marker entries modifications are protected by the markers_mutex.
- */
-struct marker_entry {
-	struct hlist_node hlist;
-	char *format;
-			/* Probe wrapper */
-	void (*call)(const struct marker *mdata, void *call_private, ...);
-	struct marker_probe_closure single;
-	struct marker_probe_closure *multi;
-	int refcount;	/* Number of times armed. 0 if disarmed. */
-	struct rcu_head rcu;
-	void *oldptr;
-	int rcu_pending;
-	unsigned char ptype:1;
-	unsigned char format_allocated:1;
-	char name[0];	/* Contains name'\0'format'\0' */
-};
-
-/**
- * __mark_empty_function - Empty probe callback
- * @probe_private: probe private data
- * @call_private: call site private data
- * @fmt: format string
- * @...: variable argument list
- *
- * Empty callback provided as a probe to the markers. By providing this to a
- * disabled marker, we make sure the  execution flow is always valid even
- * though the function pointer change and the marker enabling are two distinct
- * operations that modifies the execution flow of preemptible code.
- */
-notrace void __mark_empty_function(void *probe_private, void *call_private,
-	const char *fmt, va_list *args)
-{
-}
-EXPORT_SYMBOL_GPL(__mark_empty_function);
-
-/*
- * marker_probe_cb Callback that prepares the variable argument list for probes.
- * @mdata: pointer of type struct marker
- * @call_private: caller site private data
- * @...:  Variable argument list.
- *
- * Since we do not use "typical" pointer based RCU in the 1 argument case, we
- * need to put a full smp_rmb() in this branch. This is why we do not use
- * rcu_dereference() for the pointer read.
- */
-notrace void marker_probe_cb(const struct marker *mdata,
-		void *call_private, ...)
-{
-	va_list args;
-	char ptype;
-
-	/*
-	 * rcu_read_lock_sched does two things : disabling preemption to make
-	 * sure the teardown of the callbacks can be done correctly when they
-	 * are in modules and they insure RCU read coherency.
-	 */
-	rcu_read_lock_sched_notrace();
-	ptype = mdata->ptype;
-	if (likely(!ptype)) {
-		marker_probe_func *func;
-		/* Must read the ptype before ptr. They are not data dependant,
-		 * so we put an explicit smp_rmb() here. */
-		smp_rmb();
-		func = mdata->single.func;
-		/* Must read the ptr before private data. They are not data
-		 * dependant, so we put an explicit smp_rmb() here. */
-		smp_rmb();
-		va_start(args, call_private);
-		func(mdata->single.probe_private, call_private, mdata->format,
-			&args);
-		va_end(args);
-	} else {
-		struct marker_probe_closure *multi;
-		int i;
-		/*
-		 * Read mdata->ptype before mdata->multi.
-		 */
-		smp_rmb();
-		multi = mdata->multi;
-		/*
-		 * multi points to an array, therefore accessing the array
-		 * depends on reading multi. However, even in this case,
-		 * we must insure that the pointer is read _before_ the array
-		 * data. Same as rcu_dereference, but we need a full smp_rmb()
-		 * in the fast path, so put the explicit barrier here.
-		 */
-		smp_read_barrier_depends();
-		for (i = 0; multi[i].func; i++) {
-			va_start(args, call_private);
-			multi[i].func(multi[i].probe_private, call_private,
-				mdata->format, &args);
-			va_end(args);
-		}
-	}
-	rcu_read_unlock_sched_notrace();
-}
-EXPORT_SYMBOL_GPL(marker_probe_cb);
-
-/*
- * marker_probe_cb Callback that does not prepare the variable argument list.
- * @mdata: pointer of type struct marker
- * @call_private: caller site private data
- * @...:  Variable argument list.
- *
- * Should be connected to markers "MARK_NOARGS".
- */
-static notrace void marker_probe_cb_noarg(const struct marker *mdata,
-		void *call_private, ...)
-{
-	va_list args;	/* not initialized */
-	char ptype;
-
-	rcu_read_lock_sched_notrace();
-	ptype = mdata->ptype;
-	if (likely(!ptype)) {
-		marker_probe_func *func;
-		/* Must read the ptype before ptr. They are not data dependant,
-		 * so we put an explicit smp_rmb() here. */
-		smp_rmb();
-		func = mdata->single.func;
-		/* Must read the ptr before private data. They are not data
-		 * dependant, so we put an explicit smp_rmb() here. */
-		smp_rmb();
-		func(mdata->single.probe_private, call_private, mdata->format,
-			&args);
-	} else {
-		struct marker_probe_closure *multi;
-		int i;
-		/*
-		 * Read mdata->ptype before mdata->multi.
-		 */
-		smp_rmb();
-		multi = mdata->multi;
-		/*
-		 * multi points to an array, therefore accessing the array
-		 * depends on reading multi. However, even in this case,
-		 * we must insure that the pointer is read _before_ the array
-		 * data. Same as rcu_dereference, but we need a full smp_rmb()
-		 * in the fast path, so put the explicit barrier here.
-		 */
-		smp_read_barrier_depends();
-		for (i = 0; multi[i].func; i++)
-			multi[i].func(multi[i].probe_private, call_private,
-				mdata->format, &args);
-	}
-	rcu_read_unlock_sched_notrace();
-}
-
-static void free_old_closure(struct rcu_head *head)
-{
-	struct marker_entry *entry = container_of(head,
-		struct marker_entry, rcu);
-	kfree(entry->oldptr);
-	/* Make sure we free the data before setting the pending flag to 0 */
-	smp_wmb();
-	entry->rcu_pending = 0;
-}
-
-static void debug_print_probes(struct marker_entry *entry)
-{
-	int i;
-
-	if (!marker_debug)
-		return;
-
-	if (!entry->ptype) {
-		printk(KERN_DEBUG "Single probe : %p %p\n",
-			entry->single.func,
-			entry->single.probe_private);
-	} else {
-		for (i = 0; entry->multi[i].func; i++)
-			printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
-				entry->multi[i].func,
-				entry->multi[i].probe_private);
-	}
-}
-
-static struct marker_probe_closure *
-marker_entry_add_probe(struct marker_entry *entry,
-		marker_probe_func *probe, void *probe_private)
-{
-	int nr_probes = 0;
-	struct marker_probe_closure *old, *new;
-
-	WARN_ON(!probe);
-
-	debug_print_probes(entry);
-	old = entry->multi;
-	if (!entry->ptype) {
-		if (entry->single.func == probe &&
-				entry->single.probe_private == probe_private)
-			return ERR_PTR(-EBUSY);
-		if (entry->single.func == __mark_empty_function) {
-			/* 0 -> 1 probes */
-			entry->single.func = probe;
-			entry->single.probe_private = probe_private;
-			entry->refcount = 1;
-			entry->ptype = 0;
-			debug_print_probes(entry);
-			return NULL;
-		} else {
-			/* 1 -> 2 probes */
-			nr_probes = 1;
-			old = NULL;
-		}
-	} else {
-		/* (N -> N+1), (N != 0, 1) probes */
-		for (nr_probes = 0; old[nr_probes].func; nr_probes++)
-			if (old[nr_probes].func == probe
-					&& old[nr_probes].probe_private
-						== probe_private)
-				return ERR_PTR(-EBUSY);
-	}
-	/* + 2 : one for new probe, one for NULL func */
-	new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
-			GFP_KERNEL);
-	if (new == NULL)
-		return ERR_PTR(-ENOMEM);
-	if (!old)
-		new[0] = entry->single;
-	else
-		memcpy(new, old,
-			nr_probes * sizeof(struct marker_probe_closure));
-	new[nr_probes].func = probe;
-	new[nr_probes].probe_private = probe_private;
-	entry->refcount = nr_probes + 1;
-	entry->multi = new;
-	entry->ptype = 1;
-	debug_print_probes(entry);
-	return old;
-}
-
-static struct marker_probe_closure *
-marker_entry_remove_probe(struct marker_entry *entry,
-		marker_probe_func *probe, void *probe_private)
-{
-	int nr_probes = 0, nr_del = 0, i;
-	struct marker_probe_closure *old, *new;
-
-	old = entry->multi;
-
-	debug_print_probes(entry);
-	if (!entry->ptype) {
-		/* 0 -> N is an error */
-		WARN_ON(entry->single.func == __mark_empty_function);
-		/* 1 -> 0 probes */
-		WARN_ON(probe && entry->single.func != probe);
-		WARN_ON(entry->single.probe_private != probe_private);
-		entry->single.func = __mark_empty_function;
-		entry->refcount = 0;
-		entry->ptype = 0;
-		debug_print_probes(entry);
-		return NULL;
-	} else {
-		/* (N -> M), (N > 1, M >= 0) probes */
-		for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
-			if ((!probe || old[nr_probes].func == probe)
-					&& old[nr_probes].probe_private
-						== probe_private)
-				nr_del++;
-		}
-	}
-
-	if (nr_probes - nr_del == 0) {
-		/* N -> 0, (N > 1) */
-		entry->single.func = __mark_empty_function;
-		entry->refcount = 0;
-		entry->ptype = 0;
-	} else if (nr_probes - nr_del == 1) {
-		/* N -> 1, (N > 1) */
-		for (i = 0; old[i].func; i++)
-			if ((probe && old[i].func != probe) ||
-					old[i].probe_private != probe_private)
-				entry->single = old[i];
-		entry->refcount = 1;
-		entry->ptype = 0;
-	} else {
-		int j = 0;
-		/* N -> M, (N > 1, M > 1) */
-		/* + 1 for NULL */
-		new = kzalloc((nr_probes - nr_del + 1)
-			* sizeof(struct marker_probe_closure), GFP_KERNEL);
-		if (new == NULL)
-			return ERR_PTR(-ENOMEM);
-		for (i = 0; old[i].func; i++)
-			if ((probe && old[i].func != probe) ||
-					old[i].probe_private != probe_private)
-				new[j++] = old[i];
-		entry->refcount = nr_probes - nr_del;
-		entry->ptype = 1;
-		entry->multi = new;
-	}
-	debug_print_probes(entry);
-	return old;
-}
-
-/*
- * Get marker if the marker is present in the marker hash table.
- * Must be called with markers_mutex held.
- * Returns NULL if not present.
- */
-static struct marker_entry *get_marker(const char *name)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct marker_entry *e;
-	u32 hash = jhash(name, strlen(name), 0);
-
-	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (!strcmp(name, e->name))
-			return e;
-	}
-	return NULL;
-}
-
-/*
- * Add the marker to the marker hash table. Must be called with markers_mutex
- * held.
- */
-static struct marker_entry *add_marker(const char *name, const char *format)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct marker_entry *e;
-	size_t name_len = strlen(name) + 1;
-	size_t format_len = 0;
-	u32 hash = jhash(name, name_len-1, 0);
-
-	if (format)
-		format_len = strlen(format) + 1;
-	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (!strcmp(name, e->name)) {
-			printk(KERN_NOTICE
-				"Marker %s busy\n", name);
-			return ERR_PTR(-EBUSY);	/* Already there */
-		}
-	}
-	/*
-	 * Using kmalloc here to allocate a variable length element. Could
-	 * cause some memory fragmentation if overused.
-	 */
-	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
-			GFP_KERNEL);
-	if (!e)
-		return ERR_PTR(-ENOMEM);
-	memcpy(&e->name[0], name, name_len);
-	if (format) {
-		e->format = &e->name[name_len];
-		memcpy(e->format, format, format_len);
-		if (strcmp(e->format, MARK_NOARGS) == 0)
-			e->call = marker_probe_cb_noarg;
-		else
-			e->call = marker_probe_cb;
-		trace_mark(core_marker_format, "name %s format %s",
-				e->name, e->format);
-	} else {
-		e->format = NULL;
-		e->call = marker_probe_cb;
-	}
-	e->single.func = __mark_empty_function;
-	e->single.probe_private = NULL;
-	e->multi = NULL;
-	e->ptype = 0;
-	e->format_allocated = 0;
-	e->refcount = 0;
-	e->rcu_pending = 0;
-	hlist_add_head(&e->hlist, head);
-	return e;
-}
-
-/*
- * Remove the marker from the marker hash table. Must be called with mutex_lock
- * held.
- */
-static int remove_marker(const char *name)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct marker_entry *e;
-	int found = 0;
-	size_t len = strlen(name) + 1;
-	u32 hash = jhash(name, len-1, 0);
-
-	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (!strcmp(name, e->name)) {
-			found = 1;
-			break;
-		}
-	}
-	if (!found)
-		return -ENOENT;
-	if (e->single.func != __mark_empty_function)
-		return -EBUSY;
-	hlist_del(&e->hlist);
-	if (e->format_allocated)
-		kfree(e->format);
-	/* Make sure the call_rcu has been executed */
-	if (e->rcu_pending)
-		rcu_barrier_sched();
-	kfree(e);
-	return 0;
-}
-
-/*
- * Set the mark_entry format to the format found in the element.
- */
-static int marker_set_format(struct marker_entry *entry, const char *format)
-{
-	entry->format = kstrdup(format, GFP_KERNEL);
-	if (!entry->format)
-		return -ENOMEM;
-	entry->format_allocated = 1;
-
-	trace_mark(core_marker_format, "name %s format %s",
-			entry->name, entry->format);
-	return 0;
-}
-
-/*
- * Sets the probe callback corresponding to one marker.
- */
-static int set_marker(struct marker_entry *entry, struct marker *elem,
-		int active)
-{
-	int ret = 0;
-	WARN_ON(strcmp(entry->name, elem->name) != 0);
-
-	if (entry->format) {
-		if (strcmp(entry->format, elem->format) != 0) {
-			printk(KERN_NOTICE
-				"Format mismatch for probe %s "
-				"(%s), marker (%s)\n",
-				entry->name,
-				entry->format,
-				elem->format);
-			return -EPERM;
-		}
-	} else {
-		ret = marker_set_format(entry, elem->format);
-		if (ret)
-			return ret;
-	}
-
-	/*
-	 * probe_cb setup (statically known) is done here. It is
-	 * asynchronous with the rest of execution, therefore we only
-	 * pass from a "safe" callback (with argument) to an "unsafe"
-	 * callback (does not set arguments).
-	 */
-	elem->call = entry->call;
-	/*
-	 * Sanity check :
-	 * We only update the single probe private data when the ptr is
-	 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
-	 */
-	WARN_ON(elem->single.func != __mark_empty_function
-		&& elem->single.probe_private != entry->single.probe_private
-		&& !elem->ptype);
-	elem->single.probe_private = entry->single.probe_private;
-	/*
-	 * Make sure the private data is valid when we update the
-	 * single probe ptr.
-	 */
-	smp_wmb();
-	elem->single.func = entry->single.func;
-	/*
-	 * We also make sure that the new probe callbacks array is consistent
-	 * before setting a pointer to it.
-	 */
-	rcu_assign_pointer(elem->multi, entry->multi);
-	/*
-	 * Update the function or multi probe array pointer before setting the
-	 * ptype.
-	 */
-	smp_wmb();
-	elem->ptype = entry->ptype;
-
-	if (elem->tp_name && (active ^ elem->state)) {
-		WARN_ON(!elem->tp_cb);
-		/*
-		 * It is ok to directly call the probe registration because type
-		 * checking has been done in the __trace_mark_tp() macro.
-		 */
-
-		if (active) {
-			/*
-			 * try_module_get should always succeed because we hold
-			 * lock_module() to get the tp_cb address.
-			 */
-			ret = try_module_get(__module_text_address(
-				(unsigned long)elem->tp_cb));
-			BUG_ON(!ret);
-			ret = tracepoint_probe_register_noupdate(
-				elem->tp_name,
-				elem->tp_cb);
-		} else {
-			ret = tracepoint_probe_unregister_noupdate(
-				elem->tp_name,
-				elem->tp_cb);
-			/*
-			 * tracepoint_probe_update_all() must be called
-			 * before the module containing tp_cb is unloaded.
-			 */
-			module_put(__module_text_address(
-				(unsigned long)elem->tp_cb));
-		}
-	}
-	elem->state = active;
-
-	return ret;
-}
-
-/*
- * Disable a marker and its probe callback.
- * Note: only waiting an RCU period after setting elem->call to the empty
- * function insures that the original callback is not used anymore. This insured
- * by rcu_read_lock_sched around the call site.
- */
-static void disable_marker(struct marker *elem)
-{
-	int ret;
-
-	/* leave "call" as is. It is known statically. */
-	if (elem->tp_name && elem->state) {
-		WARN_ON(!elem->tp_cb);
-		/*
-		 * It is ok to directly call the probe registration because type
-		 * checking has been done in the __trace_mark_tp() macro.
-		 */
-		ret = tracepoint_probe_unregister_noupdate(elem->tp_name,
-			elem->tp_cb);
-		WARN_ON(ret);
-		/*
-		 * tracepoint_probe_update_all() must be called
-		 * before the module containing tp_cb is unloaded.
-		 */
-		module_put(__module_text_address((unsigned long)elem->tp_cb));
-	}
-	elem->state = 0;
-	elem->single.func = __mark_empty_function;
-	/* Update the function before setting the ptype */
-	smp_wmb();
-	elem->ptype = 0;	/* single probe */
-	/*
-	 * Leave the private data and id there, because removal is racy and
-	 * should be done only after an RCU period. These are never used until
-	 * the next initialization anyway.
-	 */
-}
-
-/**
- * marker_update_probe_range - Update a probe range
- * @begin: beginning of the range
- * @end: end of the range
- *
- * Updates the probe callback corresponding to a range of markers.
- */
-void marker_update_probe_range(struct marker *begin,
-	struct marker *end)
-{
-	struct marker *iter;
-	struct marker_entry *mark_entry;
-
-	mutex_lock(&markers_mutex);
-	for (iter = begin; iter < end; iter++) {
-		mark_entry = get_marker(iter->name);
-		if (mark_entry) {
-			set_marker(mark_entry, iter, !!mark_entry->refcount);
-			/*
-			 * ignore error, continue
-			 */
-		} else {
-			disable_marker(iter);
-		}
-	}
-	mutex_unlock(&markers_mutex);
-}
-
-/*
- * Update probes, removing the faulty probes.
- *
- * Internal callback only changed before the first probe is connected to it.
- * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
- * transitions.  All other transitions will leave the old private data valid.
- * This makes the non-atomicity of the callback/private data updates valid.
- *
- * "special case" updates :
- * 0 -> 1 callback
- * 1 -> 0 callback
- * 1 -> 2 callbacks
- * 2 -> 1 callbacks
- * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
- * Site effect : marker_set_format may delete the marker entry (creating a
- * replacement).
- */
-static void marker_update_probes(void)
-{
-	/* Core kernel markers */
-	marker_update_probe_range(__start___markers, __stop___markers);
-	/* Markers in modules. */
-	module_update_markers();
-	tracepoint_probe_update_all();
-}
-
-/**
- * marker_probe_register -  Connect a probe to a marker
- * @name: marker name
- * @format: format string
- * @probe: probe handler
- * @probe_private: probe private data
- *
- * private data must be a valid allocated memory address, or NULL.
- * Returns 0 if ok, error value on error.
- * The probe address must at least be aligned on the architecture pointer size.
- */
-int marker_probe_register(const char *name, const char *format,
-			marker_probe_func *probe, void *probe_private)
-{
-	struct marker_entry *entry;
-	int ret = 0;
-	struct marker_probe_closure *old;
-
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry) {
-		entry = add_marker(name, format);
-		if (IS_ERR(entry))
-			ret = PTR_ERR(entry);
-	} else if (format) {
-		if (!entry->format)
-			ret = marker_set_format(entry, format);
-		else if (strcmp(entry->format, format))
-			ret = -EPERM;
-	}
-	if (ret)
-		goto end;
-
-	/*
-	 * If we detect that a call_rcu is pending for this marker,
-	 * make sure it's executed now.
-	 */
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	old = marker_entry_add_probe(entry, probe, probe_private);
-	if (IS_ERR(old)) {
-		ret = PTR_ERR(old);
-		goto end;
-	}
-	mutex_unlock(&markers_mutex);
-	marker_update_probes();
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry)
-		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
-end:
-	mutex_unlock(&markers_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(marker_probe_register);
-
-/**
- * marker_probe_unregister -  Disconnect a probe from a marker
- * @name: marker name
- * @probe: probe function pointer
- * @probe_private: probe private data
- *
- * Returns the private data given to marker_probe_register, or an ERR_PTR().
- * We do not need to call a synchronize_sched to make sure the probes have
- * finished running before doing a module unload, because the module unload
- * itself uses stop_machine(), which insures that every preempt disabled section
- * have finished.
- */
-int marker_probe_unregister(const char *name,
-	marker_probe_func *probe, void *probe_private)
-{
-	struct marker_entry *entry;
-	struct marker_probe_closure *old;
-	int ret = -ENOENT;
-
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry)
-		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	old = marker_entry_remove_probe(entry, probe, probe_private);
-	mutex_unlock(&markers_mutex);
-	marker_update_probes();
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry)
-		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
-	remove_marker(name);	/* Ignore busy error message */
-	ret = 0;
-end:
-	mutex_unlock(&markers_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(marker_probe_unregister);
-
-static struct marker_entry *
-get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
-{
-	struct marker_entry *entry;
-	unsigned int i;
-	struct hlist_head *head;
-	struct hlist_node *node;
-
-	for (i = 0; i < MARKER_TABLE_SIZE; i++) {
-		head = &marker_table[i];
-		hlist_for_each_entry(entry, node, head, hlist) {
-			if (!entry->ptype) {
-				if (entry->single.func == probe
-						&& entry->single.probe_private
-						== probe_private)
-					return entry;
-			} else {
-				struct marker_probe_closure *closure;
-				closure = entry->multi;
-				for (i = 0; closure[i].func; i++) {
-					if (closure[i].func == probe &&
-							closure[i].probe_private
-							== probe_private)
-						return entry;
-				}
-			}
-		}
-	}
-	return NULL;
-}
-
-/**
- * marker_probe_unregister_private_data -  Disconnect a probe from a marker
- * @probe: probe function
- * @probe_private: probe private data
- *
- * Unregister a probe by providing the registered private data.
- * Only removes the first marker found in hash table.
- * Return 0 on success or error value.
- * We do not need to call a synchronize_sched to make sure the probes have
- * finished running before doing a module unload, because the module unload
- * itself uses stop_machine(), which insures that every preempt disabled section
- * have finished.
- */
-int marker_probe_unregister_private_data(marker_probe_func *probe,
-		void *probe_private)
-{
-	struct marker_entry *entry;
-	int ret = 0;
-	struct marker_probe_closure *old;
-
-	mutex_lock(&markers_mutex);
-	entry = get_marker_from_private_data(probe, probe_private);
-	if (!entry) {
-		ret = -ENOENT;
-		goto end;
-	}
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	old = marker_entry_remove_probe(entry, NULL, probe_private);
-	mutex_unlock(&markers_mutex);
-	marker_update_probes();
-	mutex_lock(&markers_mutex);
-	entry = get_marker_from_private_data(probe, probe_private);
-	if (!entry)
-		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
-	remove_marker(entry->name);	/* Ignore busy error message */
-end:
-	mutex_unlock(&markers_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
-
-/**
- * marker_get_private_data - Get a marker's probe private data
- * @name: marker name
- * @probe: probe to match
- * @num: get the nth matching probe's private data
- *
- * Returns the nth private data pointer (starting from 0) matching, or an
- * ERR_PTR.
- * Returns the private data pointer, or an ERR_PTR.
- * The private data pointer should _only_ be dereferenced if the caller is the
- * owner of the data, or its content could vanish. This is mostly used to
- * confirm that a caller is the owner of a registered probe.
- */
-void *marker_get_private_data(const char *name, marker_probe_func *probe,
-		int num)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct marker_entry *e;
-	size_t name_len = strlen(name) + 1;
-	u32 hash = jhash(name, name_len-1, 0);
-	int i;
-
-	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (!strcmp(name, e->name)) {
-			if (!e->ptype) {
-				if (num == 0 && e->single.func == probe)
-					return e->single.probe_private;
-			} else {
-				struct marker_probe_closure *closure;
-				int match = 0;
-				closure = e->multi;
-				for (i = 0; closure[i].func; i++) {
-					if (closure[i].func != probe)
-						continue;
-					if (match++ == num)
-						return closure[i].probe_private;
-				}
-			}
-			break;
-		}
-	}
-	return ERR_PTR(-ENOENT);
-}
-EXPORT_SYMBOL_GPL(marker_get_private_data);
-
-#ifdef CONFIG_MODULES
-
-int marker_module_notify(struct notifier_block *self,
-			 unsigned long val, void *data)
-{
-	struct module *mod = data;
-
-	switch (val) {
-	case MODULE_STATE_COMING:
-		marker_update_probe_range(mod->markers,
-			mod->markers + mod->num_markers);
-		break;
-	case MODULE_STATE_GOING:
-		marker_update_probe_range(mod->markers,
-			mod->markers + mod->num_markers);
-		break;
-	}
-	return 0;
-}
-
-struct notifier_block marker_module_nb = {
-	.notifier_call = marker_module_notify,
-	.priority = 0,
-};
-
-static int init_markers(void)
-{
-	return register_module_notifier(&marker_module_nb);
-}
-__initcall(init_markers);
-
-#endif /* CONFIG_MODULES */
diff --git a/kernel/module.c b/kernel/module.c
index 05ce49ced8f6..b6ee424245dd 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2237,10 +2237,6 @@ static noinline struct module *load_module(void __user *umod,
 				  sizeof(*mod->ctors), &mod->num_ctors);
 #endif
 
-#ifdef CONFIG_MARKERS
-	mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers",
-				    sizeof(*mod->markers), &mod->num_markers);
-#endif
 #ifdef CONFIG_TRACEPOINTS
 	mod->tracepoints = section_objs(hdr, sechdrs, secstrings,
 					"__tracepoints",
@@ -2958,20 +2954,6 @@ void module_layout(struct module *mod,
 EXPORT_SYMBOL(module_layout);
 #endif
 
-#ifdef CONFIG_MARKERS
-void module_update_markers(void)
-{
-	struct module *mod;
-
-	mutex_lock(&module_mutex);
-	list_for_each_entry(mod, &modules, list)
-		if (!mod->taints)
-			marker_update_probe_range(mod->markers,
-				mod->markers + mod->num_markers);
-	mutex_unlock(&module_mutex);
-}
-#endif
-
 #ifdef CONFIG_TRACEPOINTS
 void module_update_tracepoints(void)
 {
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 687699d365ae..2547d8813cf0 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -11,7 +11,6 @@
 #include <linux/ftrace.h>
 #include <linux/string.h>
 #include <linux/module.h>
-#include <linux/marker.h>
 #include <linux/mutex.h>
 #include <linux/ctype.h>
 #include <linux/list.h>
diff --git a/samples/Kconfig b/samples/Kconfig
index 428b065ba695..b92bde3c6a89 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -7,12 +7,6 @@ menuconfig SAMPLES
 
 if SAMPLES
 
-config SAMPLE_MARKERS
-	tristate "Build markers examples -- loadable modules only"
-	depends on MARKERS && m
-	help
-	  This build markers example modules.
-
 config SAMPLE_TRACEPOINTS
 	tristate "Build tracepoints examples -- loadable modules only"
 	depends on TRACEPOINTS && m
diff --git a/samples/Makefile b/samples/Makefile
index 13e4b470b539..43343a03b1f4 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,3 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/ tracepoints/ trace_events/
+obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ tracepoints/ trace_events/
diff --git a/samples/markers/Makefile b/samples/markers/Makefile
deleted file mode 100644
index 6d7231265f0f..000000000000
--- a/samples/markers/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# builds the kprobes example kernel modules;
-# then to use one (as root):  insmod <module_name.ko>
-
-obj-$(CONFIG_SAMPLE_MARKERS) += probe-example.o marker-example.o
diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c
deleted file mode 100644
index e9cd9c0bc84f..000000000000
--- a/samples/markers/marker-example.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/* marker-example.c
- *
- * Executes a marker when /proc/marker-example is opened.
- *
- * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
- */
-
-#include <linux/module.h>
-#include <linux/marker.h>
-#include <linux/sched.h>
-#include <linux/proc_fs.h>
-
-struct proc_dir_entry *pentry_example;
-
-static int my_open(struct inode *inode, struct file *file)
-{
-	int i;
-
-	trace_mark(subsystem_event, "integer %d string %s", 123,
-		"example string");
-	for (i = 0; i < 10; i++)
-		trace_mark(subsystem_eventb, MARK_NOARGS);
-	return -EPERM;
-}
-
-static struct file_operations mark_ops = {
-	.open = my_open,
-};
-
-static int __init example_init(void)
-{
-	printk(KERN_ALERT "example init\n");
-	pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops);
-	if (!pentry_example)
-		return -EPERM;
-	return 0;
-}
-
-static void __exit example_exit(void)
-{
-	printk(KERN_ALERT "example exit\n");
-	remove_proc_entry("marker-example", NULL);
-}
-
-module_init(example_init)
-module_exit(example_exit)
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mathieu Desnoyers");
-MODULE_DESCRIPTION("Marker example");
diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
deleted file mode 100644
index 2dfb3b32937e..000000000000
--- a/samples/markers/probe-example.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/* probe-example.c
- *
- * Connects two functions to marker call sites.
- *
- * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/marker.h>
-#include <asm/atomic.h>
-
-struct probe_data {
-	const char *name;
-	const char *format;
-	marker_probe_func *probe_func;
-};
-
-void probe_subsystem_event(void *probe_data, void *call_data,
-	const char *format, va_list *args)
-{
-	/* Declare args */
-	unsigned int value;
-	const char *mystr;
-
-	/* Assign args */
-	value = va_arg(*args, typeof(value));
-	mystr = va_arg(*args, typeof(mystr));
-
-	/* Call printk */
-	printk(KERN_INFO "Value %u, string %s\n", value, mystr);
-
-	/* or count, check rights, serialize data in a buffer */
-}
-
-atomic_t eventb_count = ATOMIC_INIT(0);
-
-void probe_subsystem_eventb(void *probe_data, void *call_data,
-	const char *format, va_list *args)
-{
-	/* Increment counter */
-	atomic_inc(&eventb_count);
-}
-
-static struct probe_data probe_array[] =
-{
-	{	.name = "subsystem_event",
-		.format = "integer %d string %s",
-		.probe_func = probe_subsystem_event },
-	{	.name = "subsystem_eventb",
-		.format = MARK_NOARGS,
-		.probe_func = probe_subsystem_eventb },
-};
-
-static int __init probe_init(void)
-{
-	int result;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
-		result = marker_probe_register(probe_array[i].name,
-				probe_array[i].format,
-				probe_array[i].probe_func, &probe_array[i]);
-		if (result)
-			printk(KERN_INFO "Unable to register probe %s\n",
-				probe_array[i].name);
-	}
-	return 0;
-}
-
-static void __exit probe_fini(void)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(probe_array); i++)
-		marker_probe_unregister(probe_array[i].name,
-			probe_array[i].probe_func, &probe_array[i]);
-	printk(KERN_INFO "Number of event b : %u\n",
-			atomic_read(&eventb_count));
-	marker_synchronize_unregister();
-}
-
-module_init(probe_init);
-module_exit(probe_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mathieu Desnoyers");
-MODULE_DESCRIPTION("SUBSYSTEM Probe");
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index f4053dc7b5d6..8f14c81abbc7 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -13,7 +13,6 @@
 # 2) modpost is then used to
 # 3)  create one <module>.mod.c file pr. module
 # 4)  create one Module.symvers file with CRC for all exported symbols
-# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
 # 5) compile all <module>.mod.c files
 # 6) final link of the module to a <module.ko> file
 
@@ -59,10 +58,6 @@ include scripts/Makefile.lib
 
 kernelsymfile := $(objtree)/Module.symvers
 modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
-kernelmarkersfile := $(objtree)/Module.markers
-modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
-
-markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
 
 # Step 1), find all modules listed in $(MODVERDIR)/
 __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
@@ -85,8 +80,6 @@ modpost = scripts/mod/modpost                    \
  $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \
  $(if $(KBUILD_EXTMOD),-o $(modulesymfile))      \
  $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S)      \
- $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
- $(if $(CONFIG_MARKERS),-M $(markersfile))	 \
  $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \
  $(if $(cross_build),-c)
 
@@ -101,17 +94,12 @@ quiet_cmd_kernel-mod = MODPOST $@
       cmd_kernel-mod = $(modpost) $@
 
 vmlinux.o: FORCE
-	@rm -fr $(kernelmarkersfile)
 	$(call cmd,kernel-mod)
 
 # Declare generated files as targets for modpost
 $(symverfile):         __modpost ;
 $(modules:.ko=.mod.c): __modpost ;
 
-ifdef CONFIG_MARKERS
-$(markersfile):	       __modpost ;
-endif
-
 
 # Step 5), compile all *.mod.c files
 
-- 
cgit v1.2.3


From 6a891a3111fe701517bb31c2204304724c7299c8 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 18 Sep 2009 22:45:43 +0200
Subject: i2c: Drop unused i2c_driver.id field

Nobody is using i2c_driver.id any longer, so we can drop that field.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/i2c-id.h | 11 -----------
 include/linux/i2c.h    |  2 --
 2 files changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index c9087de5c6c6..e844a0b18695 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -27,17 +27,6 @@
    legacy chip driver needs to identify a bus or a bus driver needs to
    identify a legacy client. If you don't need them, just don't set them. */
 
-/*
- * ---- Driver types -----------------------------------------------------
- */
-
-#define I2C_DRIVERID_MSP3400	 1
-#define I2C_DRIVERID_TUNER	 2
-#define I2C_DRIVERID_TDA7432	27	/* Stereo sound processor	*/
-#define I2C_DRIVERID_TVAUDIO    29      /* Generic TV sound driver      */
-#define I2C_DRIVERID_SAA711X	73	/* saa711x video encoders	*/
-#define I2C_DRIVERID_INFRARED	75	/* I2C InfraRed on Video boards */
-
 /*
  * ---- Adapter types ----------------------------------------------------
  */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index f4784c0fe975..57d41b0abce2 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -98,7 +98,6 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
 
 /**
  * struct i2c_driver - represent an I2C device driver
- * @id: Unique driver ID (optional)
  * @class: What kind of i2c device we instantiate (for detect)
  * @attach_adapter: Callback for bus addition (for legacy drivers)
  * @detach_adapter: Callback for bus removal (for legacy drivers)
@@ -135,7 +134,6 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
  * not allowed.
  */
 struct i2c_driver {
-	int id;
 	unsigned int class;
 
 	/* Notifies the driver that a new bus has appeared or is about to be
-- 
cgit v1.2.3


From 76b3e28fa728bb68895cbd8375f5ce233bd891de Mon Sep 17 00:00:00 2001
From: Crane Cai <crane.cai@amd.com>
Date: Fri, 18 Sep 2009 22:45:50 +0200
Subject: i2c-piix4: Add AMD SB900 SMBus device ID

Add new SMBus device ID for AMD SB900.

Signed-off-by: Crane Cai <crane.cai@amd.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/i2c/busses/i2c-piix4 | 2 ++
 drivers/i2c/busses/Kconfig         | 3 ++-
 drivers/i2c/busses/i2c-piix4.c     | 9 ++++++---
 include/linux/pci_ids.h            | 1 +
 4 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4
index f889481762b5..c5b37c570554 100644
--- a/Documentation/i2c/busses/i2c-piix4
+++ b/Documentation/i2c/busses/i2c-piix4
@@ -8,6 +8,8 @@ Supported adapters:
     Datasheet: Only available via NDA from ServerWorks
   * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges
     Datasheet: Not publicly available
+  * AMD SB900
+    Datasheet: Not publicly available
   * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge
     Datasheet: Publicly available at the SMSC website http://www.smsc.com
 
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 8206442fbabd..bb216c5fe30e 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -113,7 +113,7 @@ config I2C_ISCH
 	  will be called i2c-isch.
 
 config I2C_PIIX4
-	tristate "Intel PIIX4 and compatible (ATI/Serverworks/Broadcom/SMSC)"
+	tristate "Intel PIIX4 and compatible (ATI/AMD/Serverworks/Broadcom/SMSC)"
 	depends on PCI
 	help
 	  If you say yes to this option, support will be included for the Intel
@@ -128,6 +128,7 @@ config I2C_PIIX4
 	    ATI SB600
 	    ATI SB700
 	    ATI SB800
+	    AMD SB900
 	    Serverworks OSB4
 	    Serverworks CSB5
 	    Serverworks CSB6
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index 0249a7d762b9..a782c7a08f9e 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -22,6 +22,7 @@
 	Intel PIIX4, 440MX
 	Serverworks OSB4, CSB5, CSB6, HT-1000, HT-1100
 	ATI IXP200, IXP300, IXP400, SB600, SB700, SB800
+	AMD SB900
 	SMSC Victory66
 
    Note: we assume there can only be one device, with one SMBus interface.
@@ -479,6 +480,7 @@ static struct pci_device_id piix4_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP300_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_SB900_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS,
 		     PCI_DEVICE_ID_SERVERWORKS_OSB4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS,
@@ -499,9 +501,10 @@ static int __devinit piix4_probe(struct pci_dev *dev,
 {
 	int retval;
 
-	if ((dev->vendor == PCI_VENDOR_ID_ATI) &&
-	    (dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS) &&
-	    (dev->revision >= 0x40))
+	if ((dev->vendor == PCI_VENDOR_ID_ATI &&
+	     dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS &&
+	     dev->revision >= 0x40) ||
+	    dev->vendor == PCI_VENDOR_ID_AMD)
 		/* base address location etc changed in SB800 */
 		retval = piix4_setup_sb800(dev, id);
 	else
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3b6b788fe2b5..7803565aa877 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -543,6 +543,7 @@
 #define PCI_DEVICE_ID_AMD_8131_BRIDGE	0x7450
 #define PCI_DEVICE_ID_AMD_8131_APIC	0x7451
 #define PCI_DEVICE_ID_AMD_8132_BRIDGE	0x7458
+#define PCI_DEVICE_ID_AMD_SB900_SMBUS	0x780b
 #define PCI_DEVICE_ID_AMD_CS5535_IDE    0x208F
 #define PCI_DEVICE_ID_AMD_CS5536_ISA    0x2090
 #define PCI_DEVICE_ID_AMD_CS5536_FLASH  0x2091
-- 
cgit v1.2.3


From ee2b805c8eb6459cf541ef141ff70dae17af59ca Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Sat, 15 Aug 2009 15:12:05 +0100
Subject: ARM: 5678/1: SSP/SPI PL022 polarity terminology fix

The definition of the SPI clock phase for the Motorola mode of
the PL022 driver was incorrect: the spec had been interpreted as
data being recieved on rising or falling edge of the clocks while
the correct interpretation is that data can be recieved on the
first or second edge transition, falling or rising depending on
the polarity setting.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-u300/spi.c   | 2 +-
 drivers/spi/amba-pl022.c   | 8 ++++----
 include/linux/amba/pl022.h | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-u300/spi.c b/arch/arm/mach-u300/spi.c
index 307d007ea7f3..f0e887bea30e 100644
--- a/arch/arm/mach-u300/spi.c
+++ b/arch/arm/mach-u300/spi.c
@@ -48,7 +48,7 @@ struct pl022_config_chip dummy_chip_info = {
 	.data_size = SSP_DATA_BITS_8, /* used to be 12 in some default */
 	.rx_lev_trig = SSP_RX_1_OR_MORE_ELEM,
 	.tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC,
-	.clk_phase = SSP_CLK_FALLING_EDGE,
+	.clk_phase = SSP_CLK_SECOND_EDGE,
 	.clk_pol = SSP_CLK_POL_IDLE_LOW,
 	.ctrl_len = SSP_BITS_12,
 	.wait_state = SSP_MWIRE_WAIT_ZERO,
diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c
index da76797ce8b9..35521af0d0d7 100644
--- a/drivers/spi/amba-pl022.c
+++ b/drivers/spi/amba-pl022.c
@@ -534,7 +534,7 @@ static void restore_state(struct pl022 *pl022)
 	GEN_MASK_BITS(SSP_DATA_BITS_12, SSP_CR0_MASK_DSS, 0)	| \
 	GEN_MASK_BITS(SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, SSP_CR0_MASK_HALFDUP, 5) | \
 	GEN_MASK_BITS(SSP_CLK_POL_IDLE_LOW, SSP_CR0_MASK_SPO, 6) | \
-	GEN_MASK_BITS(SSP_CLK_FALLING_EDGE, SSP_CR0_MASK_SPH, 7) | \
+	GEN_MASK_BITS(SSP_CLK_SECOND_EDGE, SSP_CR0_MASK_SPH, 7) | \
 	GEN_MASK_BITS(NMDK_SSP_DEFAULT_CLKRATE, SSP_CR0_MASK_SCR, 8) | \
 	GEN_MASK_BITS(SSP_BITS_8, SSP_CR0_MASK_CSS, 16)	| \
 	GEN_MASK_BITS(SSP_INTERFACE_MOTOROLA_SPI, SSP_CR0_MASK_FRF, 21) \
@@ -1249,8 +1249,8 @@ static int verify_controller_parameters(struct pl022 *pl022,
 		return -EINVAL;
 	}
 	if (chip_info->iface == SSP_INTERFACE_MOTOROLA_SPI) {
-		if ((chip_info->clk_phase != SSP_CLK_RISING_EDGE)
-		    && (chip_info->clk_phase != SSP_CLK_FALLING_EDGE)) {
+		if ((chip_info->clk_phase != SSP_CLK_FIRST_EDGE)
+		    && (chip_info->clk_phase != SSP_CLK_SECOND_EDGE)) {
 			dev_err(chip_info->dev,
 				"Clock Phase is configured incorrectly\n");
 			return -EINVAL;
@@ -1487,7 +1487,7 @@ static int pl022_setup(struct spi_device *spi)
 		chip_info->data_size = SSP_DATA_BITS_12;
 		chip_info->rx_lev_trig = SSP_RX_1_OR_MORE_ELEM;
 		chip_info->tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC;
-		chip_info->clk_phase = SSP_CLK_FALLING_EDGE;
+		chip_info->clk_phase = SSP_CLK_SECOND_EDGE;
 		chip_info->clk_pol = SSP_CLK_POL_IDLE_LOW;
 		chip_info->ctrl_len = SSP_BITS_8;
 		chip_info->wait_state = SSP_MWIRE_WAIT_ZERO;
diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h
index dcad0ffd1755..e4836c6b3dd7 100644
--- a/include/linux/amba/pl022.h
+++ b/include/linux/amba/pl022.h
@@ -136,12 +136,12 @@ enum ssp_tx_level_trig {
 
 /**
  * enum SPI Clock Phase - clock phase (Motorola SPI interface only)
- * @SSP_CLK_RISING_EDGE: Receive data on rising edge
- * @SSP_CLK_FALLING_EDGE: Receive data on falling edge
+ * @SSP_CLK_FIRST_EDGE: Receive data on first edge transition (actual direction depends on polarity)
+ * @SSP_CLK_SECOND_EDGE: Receive data on second edge transition (actual direction depends on polarity)
  */
 enum ssp_spi_clk_phase {
-	SSP_CLK_RISING_EDGE,
-	SSP_CLK_FALLING_EDGE
+	SSP_CLK_FIRST_EDGE,
+	SSP_CLK_SECOND_EDGE
 };
 
 /**
-- 
cgit v1.2.3


From b6e760f3097501e60e76fbcb7a313d42da930c1f Mon Sep 17 00:00:00 2001
From: Patrick Boettcher <pboettcher@dibcom.fr>
Date: Mon, 3 Aug 2009 14:39:15 -0300
Subject: V4L/DVB (12892): DVB-API: add support for ISDB-T and ISDB-Tsb
 (version 5.1)

This patch increments the DVB-API to version 5.1 in order to reflect the addition of ISDB-T and ISDB-Tsb on Linux' DVB-API.

Changes in detail:
- added a small document to describe how to use the API to tune to an ISDB-T or ISDB-Tsb channel
- added necessary fields to dtv_frontend_cache
- added a smarter clear-cache function which resets all fields of the dtv_frontend_cache
- added a TRANSMISSION_MODE_4K to fe_transmit_mode_t

Signed-off-by: Olivier Grenie <olgrenie@dibcom.fr>
Signed-off-by: Patrick Boettcher <pboettcher@dibcom.fr>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dvb_frontend.c | 202 +++++++++++++++++++++++++++++-
 drivers/media/dvb/dvb-core/dvb_frontend.h |  14 +++
 include/linux/dvb/frontend.h              |  44 +++++--
 include/linux/dvb/version.h               |   2 +-
 4 files changed, 247 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index d13ebcb0c6b6..826080416c93 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -850,6 +850,49 @@ static int dvb_frontend_check_parameters(struct dvb_frontend *fe,
 	return 0;
 }
 
+static int dvb_frontend_clear_cache(struct dvb_frontend *fe)
+{
+	int i;
+
+	memset(&(fe->dtv_property_cache), 0,
+			sizeof(struct dtv_frontend_properties));
+
+	fe->dtv_property_cache.state = DTV_CLEAR;
+	fe->dtv_property_cache.delivery_system = SYS_UNDEFINED;
+	fe->dtv_property_cache.inversion = INVERSION_AUTO;
+	fe->dtv_property_cache.fec_inner = FEC_AUTO;
+	fe->dtv_property_cache.transmission_mode = TRANSMISSION_MODE_AUTO;
+	fe->dtv_property_cache.bandwidth_hz = BANDWIDTH_AUTO;
+	fe->dtv_property_cache.guard_interval = GUARD_INTERVAL_AUTO;
+	fe->dtv_property_cache.hierarchy = HIERARCHY_AUTO;
+	fe->dtv_property_cache.symbol_rate = QAM_AUTO;
+	fe->dtv_property_cache.code_rate_HP = FEC_AUTO;
+	fe->dtv_property_cache.code_rate_LP = FEC_AUTO;
+
+	fe->dtv_property_cache.isdbt_partial_reception = -1;
+	fe->dtv_property_cache.isdbt_sb_mode = -1;
+	fe->dtv_property_cache.isdbt_sb_subchannel = -1;
+	fe->dtv_property_cache.isdbt_sb_segment_idx = -1;
+	fe->dtv_property_cache.isdbt_sb_segment_count = -1;
+	fe->dtv_property_cache.isdbt_layer_enabled = 0x7;
+	for (i = 0; i < 3; i++) {
+		fe->dtv_property_cache.layer[i].fec = FEC_AUTO;
+		fe->dtv_property_cache.layer[i].modulation = QAM_AUTO;
+		fe->dtv_property_cache.layer[i].interleaving = -1;
+		fe->dtv_property_cache.layer[i].segment_count = -1;
+	}
+
+	return 0;
+}
+
+#define _DTV_CMD(n, s, b) \
+[n] = { \
+	.name = #n, \
+	.cmd  = n, \
+	.set  = s,\
+	.buffer = b \
+}
+
 static struct dtv_cmds_h dtv_cmds[] = {
 	[DTV_TUNE] = {
 		.name	= "DTV_TUNE",
@@ -949,6 +992,43 @@ static struct dtv_cmds_h dtv_cmds[] = {
 		.cmd	= DTV_TRANSMISSION_MODE,
 		.set	= 1,
 	},
+
+	_DTV_CMD(DTV_ISDBT_PARTIAL_RECEPTION, 1, 0),
+	_DTV_CMD(DTV_ISDBT_SOUND_BROADCASTING, 1, 0),
+	_DTV_CMD(DTV_ISDBT_SB_SUBCHANNEL_ID, 1, 0),
+	_DTV_CMD(DTV_ISDBT_SB_SEGMENT_IDX, 1, 0),
+	_DTV_CMD(DTV_ISDBT_SB_SEGMENT_COUNT, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_FEC, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_MODULATION, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_SEGMENT_COUNT, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_TIME_INTERLEAVING, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_FEC, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_MODULATION, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_SEGMENT_COUNT, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_TIME_INTERLEAVING, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_FEC, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_MODULATION, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_SEGMENT_COUNT, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_TIME_INTERLEAVING, 1, 0),
+
+	_DTV_CMD(DTV_ISDBT_PARTIAL_RECEPTION, 0, 0),
+	_DTV_CMD(DTV_ISDBT_SOUND_BROADCASTING, 0, 0),
+	_DTV_CMD(DTV_ISDBT_SB_SUBCHANNEL_ID, 0, 0),
+	_DTV_CMD(DTV_ISDBT_SB_SEGMENT_IDX, 0, 0),
+	_DTV_CMD(DTV_ISDBT_SB_SEGMENT_COUNT, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_FEC, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_MODULATION, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_SEGMENT_COUNT, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_TIME_INTERLEAVING, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_FEC, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_MODULATION, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_SEGMENT_COUNT, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_TIME_INTERLEAVING, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_FEC, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_MODULATION, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_SEGMENT_COUNT, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_TIME_INTERLEAVING, 0, 0),
+
 	/* Get */
 	[DTV_DISEQC_SLAVE_REPLY] = {
 		.name	= "DTV_DISEQC_SLAVE_REPLY",
@@ -956,6 +1036,7 @@ static struct dtv_cmds_h dtv_cmds[] = {
 		.set	= 0,
 		.buffer	= 1,
 	},
+
 	[DTV_API_VERSION] = {
 		.name	= "DTV_API_VERSION",
 		.cmd	= DTV_API_VERSION,
@@ -1165,14 +1246,21 @@ static void dtv_property_adv_params_sync(struct dvb_frontend *fe)
 	if(c->delivery_system == SYS_ISDBT) {
 		/* Fake out a generic DVB-T request so we pass validation in the ioctl */
 		p->frequency = c->frequency;
-		p->inversion = INVERSION_AUTO;
+		p->inversion = c->inversion;
 		p->u.ofdm.constellation = QAM_AUTO;
 		p->u.ofdm.code_rate_HP = FEC_AUTO;
 		p->u.ofdm.code_rate_LP = FEC_AUTO;
-		p->u.ofdm.bandwidth = BANDWIDTH_AUTO;
 		p->u.ofdm.transmission_mode = TRANSMISSION_MODE_AUTO;
 		p->u.ofdm.guard_interval = GUARD_INTERVAL_AUTO;
 		p->u.ofdm.hierarchy_information = HIERARCHY_AUTO;
+		if (c->bandwidth_hz == 8000000)
+			p->u.ofdm.bandwidth = BANDWIDTH_8_MHZ;
+		else if (c->bandwidth_hz == 7000000)
+			p->u.ofdm.bandwidth = BANDWIDTH_7_MHZ;
+		else if (c->bandwidth_hz == 6000000)
+			p->u.ofdm.bandwidth = BANDWIDTH_6_MHZ;
+		else
+			p->u.ofdm.bandwidth = BANDWIDTH_AUTO;
 	}
 }
 
@@ -1274,6 +1362,59 @@ static int dtv_property_process_get(struct dvb_frontend *fe,
 	case DTV_HIERARCHY:
 		tvp->u.data = fe->dtv_property_cache.hierarchy;
 		break;
+
+	/* ISDB-T Support here */
+	case DTV_ISDBT_PARTIAL_RECEPTION:
+		tvp->u.data = fe->dtv_property_cache.isdbt_partial_reception;
+		break;
+	case DTV_ISDBT_SOUND_BROADCASTING:
+		tvp->u.data = fe->dtv_property_cache.isdbt_sb_mode;
+		break;
+	case DTV_ISDBT_SB_SUBCHANNEL_ID:
+		tvp->u.data = fe->dtv_property_cache.isdbt_sb_subchannel;
+		break;
+	case DTV_ISDBT_SB_SEGMENT_IDX:
+		tvp->u.data = fe->dtv_property_cache.isdbt_sb_segment_idx;
+		break;
+	case DTV_ISDBT_SB_SEGMENT_COUNT:
+		tvp->u.data = fe->dtv_property_cache.isdbt_sb_segment_count;
+		break;
+	case DTV_ISDBT_LAYERA_FEC:
+		tvp->u.data = fe->dtv_property_cache.layer[0].fec;
+		break;
+	case DTV_ISDBT_LAYERA_MODULATION:
+		tvp->u.data = fe->dtv_property_cache.layer[0].modulation;
+		break;
+	case DTV_ISDBT_LAYERA_SEGMENT_COUNT:
+		tvp->u.data = fe->dtv_property_cache.layer[0].segment_count;
+		break;
+	case DTV_ISDBT_LAYERA_TIME_INTERLEAVING:
+		tvp->u.data = fe->dtv_property_cache.layer[0].interleaving;
+		break;
+	case DTV_ISDBT_LAYERB_FEC:
+		tvp->u.data = fe->dtv_property_cache.layer[1].fec;
+		break;
+	case DTV_ISDBT_LAYERB_MODULATION:
+		tvp->u.data = fe->dtv_property_cache.layer[1].modulation;
+		break;
+	case DTV_ISDBT_LAYERB_SEGMENT_COUNT:
+		tvp->u.data = fe->dtv_property_cache.layer[1].segment_count;
+		break;
+	case DTV_ISDBT_LAYERB_TIME_INTERLEAVING:
+		tvp->u.data = fe->dtv_property_cache.layer[1].interleaving;
+		break;
+	case DTV_ISDBT_LAYERC_FEC:
+		tvp->u.data = fe->dtv_property_cache.layer[2].fec;
+		break;
+	case DTV_ISDBT_LAYERC_MODULATION:
+		tvp->u.data = fe->dtv_property_cache.layer[2].modulation;
+		break;
+	case DTV_ISDBT_LAYERC_SEGMENT_COUNT:
+		tvp->u.data = fe->dtv_property_cache.layer[2].segment_count;
+		break;
+	case DTV_ISDBT_LAYERC_TIME_INTERLEAVING:
+		tvp->u.data = fe->dtv_property_cache.layer[2].interleaving;
+		break;
 	default:
 		r = -1;
 	}
@@ -1302,10 +1443,8 @@ static int dtv_property_process_set(struct dvb_frontend *fe,
 		/* Reset a cache of data specific to the frontend here. This does
 		 * not effect hardware.
 		 */
+		dvb_frontend_clear_cache(fe);
 		dprintk("%s() Flushing property cache\n", __func__);
-		memset(&fe->dtv_property_cache, 0, sizeof(struct dtv_frontend_properties));
-		fe->dtv_property_cache.state = tvp->cmd;
-		fe->dtv_property_cache.delivery_system = SYS_UNDEFINED;
 		break;
 	case DTV_TUNE:
 		/* interpret the cache of data, build either a traditional frontend
@@ -1371,6 +1510,59 @@ static int dtv_property_process_set(struct dvb_frontend *fe,
 	case DTV_HIERARCHY:
 		fe->dtv_property_cache.hierarchy = tvp->u.data;
 		break;
+
+	/* ISDB-T Support here */
+	case DTV_ISDBT_PARTIAL_RECEPTION:
+		fe->dtv_property_cache.isdbt_partial_reception = tvp->u.data;
+		break;
+	case DTV_ISDBT_SOUND_BROADCASTING:
+		fe->dtv_property_cache.isdbt_sb_mode = tvp->u.data;
+		break;
+	case DTV_ISDBT_SB_SUBCHANNEL_ID:
+		fe->dtv_property_cache.isdbt_sb_subchannel = tvp->u.data;
+		break;
+	case DTV_ISDBT_SB_SEGMENT_IDX:
+		fe->dtv_property_cache.isdbt_sb_segment_idx = tvp->u.data;
+		break;
+	case DTV_ISDBT_SB_SEGMENT_COUNT:
+		fe->dtv_property_cache.isdbt_sb_segment_count = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERA_FEC:
+		fe->dtv_property_cache.layer[0].fec = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERA_MODULATION:
+		fe->dtv_property_cache.layer[0].modulation = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERA_SEGMENT_COUNT:
+		fe->dtv_property_cache.layer[0].segment_count = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERA_TIME_INTERLEAVING:
+		fe->dtv_property_cache.layer[0].interleaving = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERB_FEC:
+		fe->dtv_property_cache.layer[1].fec = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERB_MODULATION:
+		fe->dtv_property_cache.layer[1].modulation = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERB_SEGMENT_COUNT:
+		fe->dtv_property_cache.layer[1].segment_count = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERB_TIME_INTERLEAVING:
+		fe->dtv_property_cache.layer[1].interleaving = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERC_FEC:
+		fe->dtv_property_cache.layer[2].fec = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERC_MODULATION:
+		fe->dtv_property_cache.layer[2].modulation = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERC_SEGMENT_COUNT:
+		fe->dtv_property_cache.layer[2].segment_count = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERC_TIME_INTERLEAVING:
+		fe->dtv_property_cache.layer[2].interleaving = tvp->u.data;
+		break;
 	default:
 		r = -1;
 	}
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h
index e176da472d7a..9e46f1772c54 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.h
@@ -341,6 +341,20 @@ struct dtv_frontend_properties {
 	fe_rolloff_t		rolloff;
 
 	fe_delivery_system_t	delivery_system;
+
+	/* ISDB-T specifics */
+	u8			isdbt_partial_reception;
+	u8			isdbt_sb_mode;
+	u8			isdbt_sb_subchannel;
+	u32			isdbt_sb_segment_idx;
+	u32			isdbt_sb_segment_count;
+	u8			isdbt_layer_enabled;
+	struct {
+	    u8			segment_count;
+	    fe_code_rate_t	fec;
+	    fe_modulation_t	modulation;
+	    u8			interleaving;
+	} layer[3];
 };
 
 struct dvb_frontend {
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 51c8d2d49e42..25b01c14727b 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -173,7 +173,8 @@ typedef enum fe_modulation {
 typedef enum fe_transmit_mode {
 	TRANSMISSION_MODE_2K,
 	TRANSMISSION_MODE_8K,
-	TRANSMISSION_MODE_AUTO
+	TRANSMISSION_MODE_AUTO,
+	TRANSMISSION_MODE_4K
 } fe_transmit_mode_t;
 
 typedef enum fe_bandwidth {
@@ -268,15 +269,40 @@ struct dvb_frontend_event {
 #define DTV_FE_CAPABILITY	16
 #define DTV_DELIVERY_SYSTEM	17
 
-#define DTV_API_VERSION				35
-#define DTV_API_VERSION				35
-#define DTV_CODE_RATE_HP			36
-#define DTV_CODE_RATE_LP			37
-#define DTV_GUARD_INTERVAL			38
-#define DTV_TRANSMISSION_MODE			39
-#define DTV_HIERARCHY				40
+/* ISDB-T and ISDB-Tsb */
+#define DTV_ISDBT_PARTIAL_RECEPTION	18
+#define DTV_ISDBT_SOUND_BROADCASTING	19
 
-#define DTV_MAX_COMMAND				DTV_HIERARCHY
+#define DTV_ISDBT_SB_SUBCHANNEL_ID	20
+#define DTV_ISDBT_SB_SEGMENT_IDX	21
+#define DTV_ISDBT_SB_SEGMENT_COUNT	22
+
+#define DTV_ISDBT_LAYERA_FEC			23
+#define DTV_ISDBT_LAYERA_MODULATION		24
+#define DTV_ISDBT_LAYERA_SEGMENT_COUNT		25
+#define DTV_ISDBT_LAYERA_TIME_INTERLEAVING	26
+
+#define DTV_ISDBT_LAYERB_FEC			27
+#define DTV_ISDBT_LAYERB_MODULATION		28
+#define DTV_ISDBT_LAYERB_SEGMENT_COUNT		29
+#define DTV_ISDBT_LAYERB_TIME_INTERLEAVING	30
+
+#define DTV_ISDBT_LAYERC_FEC			31
+#define DTV_ISDBT_LAYERC_MODULATION		32
+#define DTV_ISDBT_LAYERC_SEGMENT_COUNT		33
+#define DTV_ISDBT_LAYERC_TIME_INTERLEAVING	34
+
+#define DTV_API_VERSION		35
+
+#define DTV_CODE_RATE_HP	36
+#define DTV_CODE_RATE_LP	37
+#define DTV_GUARD_INTERVAL	38
+#define DTV_TRANSMISSION_MODE	39
+#define DTV_HIERARCHY		40
+
+#define DTV_ISDBT_LAYER_ENABLED	41
+
+#define DTV_MAX_COMMAND				DTV_ISDBT_LAYER_ENABLED
 
 typedef enum fe_pilot {
 	PILOT_ON,
diff --git a/include/linux/dvb/version.h b/include/linux/dvb/version.h
index 25b823b81734..540b0583d9fb 100644
--- a/include/linux/dvb/version.h
+++ b/include/linux/dvb/version.h
@@ -24,6 +24,6 @@
 #define _DVBVERSION_H_
 
 #define DVB_API_VERSION 5
-#define DVB_API_VERSION_MINOR 0
+#define DVB_API_VERSION_MINOR 1
 
 #endif /*_DVBVERSION_H_*/
-- 
cgit v1.2.3


From 9afef394308cf63ddb67b003a1c6036615456eb9 Mon Sep 17 00:00:00 2001
From: Steven Toth <stoth@kernellabs.com>
Date: Sat, 9 May 2009 13:24:12 -0300
Subject: V4L/DVB (12922): Add the SAA7164 I2C bus identifier

Add the SAA7164 I2C bus identifier

Signed-off-by: Steven Toth <stoth@kernellabs.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/i2c-id.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index c9087de5c6c6..271b67a3167e 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -53,6 +53,7 @@
 #define I2C_HW_B_AU0828		0x010023 /* auvitek au0828 usb bridge */
 #define I2C_HW_B_CX231XX	0x010024 /* Conexant CX231XX USB based cards */
 #define I2C_HW_B_HDPVR		0x010025 /* Hauppauge HD PVR */
+#define I2C_HW_B_SAA7164        0x010024 /* NXP 7164 based tv cards */
 
 /* --- SGI adapters							*/
 #define I2C_HW_SGI_VINO		0x160000
-- 
cgit v1.2.3


From e558170a91677d3065be3922bb4467d8969d875c Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Tue, 15 Sep 2009 14:37:20 -0300
Subject: V4L/DVB (12950): tuner-simple: add Philips CU1216L

add Philips CU1216L NIM

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/CARDLIST.tuner  |  1 +
 drivers/media/common/tuners/tuner-types.c | 23 +++++++++++++++++++++++
 include/media/tuner.h                     |  1 +
 3 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner
index ba9fa679e2d3..3561b09fb416 100644
--- a/Documentation/video4linux/CARDLIST.tuner
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -79,3 +79,4 @@ tuner=78 - Philips FMD1216MEX MK3 Hybrid Tuner
 tuner=79 - Philips PAL/SECAM multi (FM1216 MK5)
 tuner=80 - Philips FQ1216LME MK3 PAL/SECAM w/active loopthrough
 tuner=81 - Partsnic (Daewoo) PTI-5NF05
+tuner=82 - Philips CU1216L
diff --git a/drivers/media/common/tuners/tuner-types.c b/drivers/media/common/tuners/tuner-types.c
index 5c6ef1e23c94..a2204df796ec 100644
--- a/drivers/media/common/tuners/tuner-types.c
+++ b/drivers/media/common/tuners/tuner-types.c
@@ -1320,6 +1320,23 @@ static struct tuner_params tuner_partsnic_pti_5nf05_params[] = {
 	},
 };
 
+/* --------- TUNER_PHILIPS_CU1216L - DVB-C NIM ------------------------- */
+
+static struct tuner_range tuner_cu1216l_ranges[] = {
+	{ 16 * 160.25 /*MHz*/, 0xce, 0x01 },
+	{ 16 * 444.25 /*MHz*/, 0xce, 0x02 },
+	{ 16 * 999.99        , 0xce, 0x04 },
+};
+
+static struct tuner_params tuner_philips_cu1216l_params[] = {
+	{
+		.type   = TUNER_PARAM_TYPE_DIGITAL,
+		.ranges = tuner_cu1216l_ranges,
+		.count  = ARRAY_SIZE(tuner_cu1216l_ranges),
+		.iffreq = 16 * 36.125, /*MHz*/
+	},
+};
+
 /* --------------------------------------------------------------------- */
 
 struct tunertype tuners[] = {
@@ -1778,6 +1795,12 @@ struct tunertype tuners[] = {
 		.params = tuner_partsnic_pti_5nf05_params,
 		.count  = ARRAY_SIZE(tuner_partsnic_pti_5nf05_params),
 	},
+	[TUNER_PHILIPS_CU1216L] = {
+		.name = "Philips CU1216L",
+		.params = tuner_philips_cu1216l_params,
+		.count  = ARRAY_SIZE(tuner_philips_cu1216l_params),
+		.stepsize = 62500,
+	},
 };
 EXPORT_SYMBOL(tuners);
 
diff --git a/include/media/tuner.h b/include/media/tuner.h
index c146f2f530b0..b1f57e175e9a 100644
--- a/include/media/tuner.h
+++ b/include/media/tuner.h
@@ -127,6 +127,7 @@
 #define TUNER_PHILIPS_FM1216MK5		79
 #define TUNER_PHILIPS_FQ1216LME_MK3	80	/* Active loopthrough, no FM */
 #define TUNER_PARTSNIC_PTI_5NF05	81
+#define TUNER_PHILIPS_CU1216L           82
 
 /* tv card specific */
 #define TDA9887_PRESENT 		(1<<0)
-- 
cgit v1.2.3


From 93463895ae0a87b689d71d65c44d5ccdcd950dc4 Mon Sep 17 00:00:00 2001
From: Michael Krufky <mkrufky@kernellabs.com>
Date: Tue, 15 Sep 2009 23:04:18 -0300
Subject: V4L/DVB (12964): tuner-core: add support for NXP TDA18271 without
 TDA829X demod

Add support for NXP TDA18271 as a standalone tuner, allowing the use of
analog demodulators other than the Philips/NXP TDA829x.

Signed-off-by: Michael Krufky <mkrufky@kernellabs.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/CARDLIST.tuner  |  1 +
 drivers/media/common/tuners/tuner-types.c |  4 ++++
 drivers/media/video/tuner-core.c          | 12 ++++++++++++
 include/media/tuner.h                     |  1 +
 4 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner
index 3561b09fb416..e0d298fe8830 100644
--- a/Documentation/video4linux/CARDLIST.tuner
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -80,3 +80,4 @@ tuner=79 - Philips PAL/SECAM multi (FM1216 MK5)
 tuner=80 - Philips FQ1216LME MK3 PAL/SECAM w/active loopthrough
 tuner=81 - Partsnic (Daewoo) PTI-5NF05
 tuner=82 - Philips CU1216L
+tuner=83 - NXP TDA18271
diff --git a/drivers/media/common/tuners/tuner-types.c b/drivers/media/common/tuners/tuner-types.c
index a2204df796ec..2b876f3988c1 100644
--- a/drivers/media/common/tuners/tuner-types.c
+++ b/drivers/media/common/tuners/tuner-types.c
@@ -1801,6 +1801,10 @@ struct tunertype tuners[] = {
 		.count  = ARRAY_SIZE(tuner_philips_cu1216l_params),
 		.stepsize = 62500,
 	},
+	[TUNER_NXP_TDA18271] = {
+		.name   = "NXP TDA18271",
+		/* see tda18271-fe.c for details */
+	},
 };
 EXPORT_SYMBOL(tuners);
 
diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c
index 2816f1839230..aba92e2313d8 100644
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -29,6 +29,7 @@
 #include "tuner-simple.h"
 #include "tda9887.h"
 #include "xc5000.h"
+#include "tda18271.h"
 
 #define UNSET (-1U)
 
@@ -420,6 +421,17 @@ static void set_type(struct i2c_client *c, unsigned int type,
 			goto attach_failed;
 		break;
 	}
+	case TUNER_NXP_TDA18271:
+	{
+		struct tda18271_config cfg = {
+			.config = t->config,
+		};
+
+		if (!dvb_attach(tda18271_attach, &t->fe, t->i2c->addr,
+				t->i2c->adapter, &cfg))
+			goto attach_failed;
+		break;
+	}
 	default:
 		if (!dvb_attach(simple_tuner_attach, &t->fe,
 				t->i2c->adapter, t->i2c->addr, t->type))
diff --git a/include/media/tuner.h b/include/media/tuner.h
index b1f57e175e9a..4d5b53ff17db 100644
--- a/include/media/tuner.h
+++ b/include/media/tuner.h
@@ -128,6 +128,7 @@
 #define TUNER_PHILIPS_FQ1216LME_MK3	80	/* Active loopthrough, no FM */
 #define TUNER_PARTSNIC_PTI_5NF05	81
 #define TUNER_PHILIPS_CU1216L           82
+#define TUNER_NXP_TDA18271		83
 
 /* tv card specific */
 #define TDA9887_PRESENT 		(1<<0)
-- 
cgit v1.2.3


From c9230457a98f33fe3658fd0bd9b9ceffdd97b63b Mon Sep 17 00:00:00 2001
From: Steven Toth <stoth@kernellabs.com>
Date: Thu, 17 Sep 2009 15:05:38 -0300
Subject: V4L/DVB (12974): SAA7164: Remove the SAA7164 bus id, no longer
 required.

SAA7164: Remove the SAA7164 bus id, no longer required.

Signed-off-by: Steven Toth <stoth@kernellabs.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/saa7164/saa7164-i2c.c | 1 -
 include/linux/i2c-id.h                    | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/saa7164/saa7164-i2c.c b/drivers/media/video/saa7164/saa7164-i2c.c
index 4c431b4ac8db..8198e6833f75 100644
--- a/drivers/media/video/saa7164/saa7164-i2c.c
+++ b/drivers/media/video/saa7164/saa7164-i2c.c
@@ -116,7 +116,6 @@ static struct i2c_algorithm saa7164_i2c_algo_template = {
 static struct i2c_adapter saa7164_i2c_adap_template = {
 	.name              = "saa7164",
 	.owner             = THIS_MODULE,
-	.id                = I2C_HW_B_SAA7164,
 	.algo              = &saa7164_i2c_algo_template,
 	.client_register   = attach_inform,
 	.client_unregister = detach_inform,
diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index 271b67a3167e..c9087de5c6c6 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -53,7 +53,6 @@
 #define I2C_HW_B_AU0828		0x010023 /* auvitek au0828 usb bridge */
 #define I2C_HW_B_CX231XX	0x010024 /* Conexant CX231XX USB based cards */
 #define I2C_HW_B_HDPVR		0x010025 /* Hauppauge HD PVR */
-#define I2C_HW_B_SAA7164        0x010024 /* NXP 7164 based tv cards */
 
 /* --- SGI adapters							*/
 #define I2C_HW_SGI_VINO		0x160000
-- 
cgit v1.2.3


From 62ef80a1f3fb69711dc7e59394ddc8e88097a7cc Mon Sep 17 00:00:00 2001
From: Muralidharan Karicheri <m-karicheri2@ti.com>
Date: Fri, 19 Jun 2009 07:13:44 -0300
Subject: V4L/DVB (12246): tvp514x: Migration to sub-device framework

This patch converts TVP514x driver to sub-device framework
from V4L2-int framework.

[hverkuil@xs4all.nl: remove inline from the dump_reg function]
Signed-off-by: Brijesh Jadav <brijesh.j@ti.com>
Signed-off-by: Hardik Shah <hardik.shah@ti.com>
Signed-off-by: Vaibhav Hiremath <hvaibhav@ti.com>
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/tvp514x.c      | 875 +++++++++++++++----------------------
 drivers/media/video/tvp514x_regs.h |  10 -
 include/media/tvp514x.h            |   4 -
 3 files changed, 349 insertions(+), 540 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/tvp514x.c b/drivers/media/video/tvp514x.c
index 3750f7fadb12..c8386e2f7a99 100644
--- a/drivers/media/video/tvp514x.c
+++ b/drivers/media/video/tvp514x.c
@@ -31,7 +31,10 @@
 #include <linux/i2c.h>
 #include <linux/delay.h>
 #include <linux/videodev2.h>
-#include <media/v4l2-int-device.h>
+
+#include <media/v4l2-device.h>
+#include <media/v4l2-common.h>
+#include <media/v4l2-chip-ident.h>
 #include <media/tvp514x.h>
 
 #include "tvp514x_regs.h"
@@ -49,13 +52,11 @@ static int debug;
 module_param(debug, bool, 0644);
 MODULE_PARM_DESC(debug, "Debug level (0-1)");
 
-#define dump_reg(client, reg, val)				\
-	do {							\
-		val = tvp514x_read_reg(client, reg);		\
-		v4l_info(client, "Reg(0x%.2X): 0x%.2X\n", reg, val); \
-	} while (0)
+MODULE_AUTHOR("Texas Instruments");
+MODULE_DESCRIPTION("TVP514X linux decoder driver");
+MODULE_LICENSE("GPL");
 
-/**
+/*
  * enum tvp514x_std - enum for supported standards
  */
 enum tvp514x_std {
@@ -64,15 +65,7 @@ enum tvp514x_std {
 	STD_INVALID
 };
 
-/**
- * enum tvp514x_state - enum for different decoder states
- */
-enum tvp514x_state {
-	STATE_NOT_DETECTED,
-	STATE_DETECTED
-};
-
-/**
+/*
  * struct tvp514x_std_info - Structure to store standard informations
  * @width: Line width in pixels
  * @height:Number of active lines
@@ -87,35 +80,29 @@ struct tvp514x_std_info {
 };
 
 static struct tvp514x_reg tvp514x_reg_list_default[0x40];
-/**
+/*
  * struct tvp514x_decoder - TVP5146/47 decoder object
- * @v4l2_int_device: Slave handle
- * @tvp514x_slave: Slave pointer which is used by @v4l2_int_device
+ * @sd: Subdevice Slave handle
  * @tvp514x_regs: copy of hw's regs with preset values.
  * @pdata: Board specific
- * @client: I2C client data
- * @id: Entry from I2C table
  * @ver: Chip version
- * @state: TVP5146/47 decoder state - detected or not-detected
+ * @streaming: TVP5146/47 decoder streaming - enabled or disabled.
  * @pix: Current pixel format
  * @num_fmts: Number of formats
  * @fmt_list: Format list
  * @current_std: Current standard
  * @num_stds: Number of standards
  * @std_list: Standards list
- * @route: input and output routing at chip level
+ * @input: Input routing at chip level
+ * @output: Output routing at chip level
  */
 struct tvp514x_decoder {
-	struct v4l2_int_device v4l2_int_device;
-	struct v4l2_int_slave tvp514x_slave;
+	struct v4l2_subdev sd;
 	struct tvp514x_reg tvp514x_regs[ARRAY_SIZE(tvp514x_reg_list_default)];
 	const struct tvp514x_platform_data *pdata;
-	struct i2c_client *client;
-
-	struct i2c_device_id *id;
 
 	int ver;
-	enum tvp514x_state state;
+	int streaming;
 
 	struct v4l2_pix_format pix;
 	int num_fmts;
@@ -124,8 +111,11 @@ struct tvp514x_decoder {
 	enum tvp514x_std current_std;
 	int num_stds;
 	struct tvp514x_std_info *std_list;
-
-	struct v4l2_routing route;
+	/*
+	 * Input and Output Routing parameters
+	 */
+	u32 input;
+	u32 output;
 };
 
 /* TVP514x default register values */
@@ -191,7 +181,8 @@ static struct tvp514x_reg tvp514x_reg_list_default[] = {
 	{TOK_TERM, 0, 0},
 };
 
-/* List of image formats supported by TVP5146/47 decoder
+/*
+ * List of image formats supported by TVP5146/47 decoder
  * Currently we are using 8 bit mode only, but can be
  * extended to 10/20 bit mode.
  */
@@ -240,35 +231,29 @@ static struct tvp514x_std_info tvp514x_std_list[] = {
 	},
 	/* Standard: need to add for additional standard */
 };
-/*
- * Control structure for Auto Gain
- *     This is temporary data, will get replaced once
- *     v4l2_ctrl_query_fill supports it.
- */
-static const struct v4l2_queryctrl tvp514x_autogain_ctrl = {
-	.id = V4L2_CID_AUTOGAIN,
-	.name = "Gain, Automatic",
-	.type = V4L2_CTRL_TYPE_BOOLEAN,
-	.minimum = 0,
-	.maximum = 1,
-	.step = 1,
-	.default_value = 1,
-};
+
+
+static inline struct tvp514x_decoder *to_decoder(struct v4l2_subdev *sd)
+{
+	return container_of(sd, struct tvp514x_decoder, sd);
+}
+
 
 /*
  * Read a value from a register in an TVP5146/47 decoder device.
  * Returns value read if successful, or non-zero (-1) otherwise.
  */
-static int tvp514x_read_reg(struct i2c_client *client, u8 reg)
+static int tvp514x_read_reg(struct v4l2_subdev *sd, u8 reg)
 {
-	int err;
-	int retry = 0;
+	int err, retry = 0;
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
 read_again:
 
 	err = i2c_smbus_read_byte_data(client, reg);
 	if (err == -1) {
 		if (retry <= I2C_RETRY_COUNT) {
-			v4l_warn(client, "Read: retry ... %d\n", retry);
+			v4l2_warn(sd, "Read: retry ... %d\n", retry);
 			retry++;
 			msleep_interruptible(10);
 			goto read_again;
@@ -278,20 +263,29 @@ read_again:
 	return err;
 }
 
+static void dump_reg(struct v4l2_subdev *sd, u8 reg)
+{
+	u32 val;
+
+	val = tvp514x_read_reg(sd, reg);
+	v4l2_info(sd, "Reg(0x%.2X): 0x%.2X\n", reg, val);
+}
+
 /*
  * Write a value to a register in an TVP5146/47 decoder device.
  * Returns zero if successful, or non-zero otherwise.
  */
-static int tvp514x_write_reg(struct i2c_client *client, u8 reg, u8 val)
+static int tvp514x_write_reg(struct v4l2_subdev *sd, u8 reg, u8 val)
 {
-	int err;
-	int retry = 0;
+	int err, retry = 0;
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
 write_again:
 
 	err = i2c_smbus_write_byte_data(client, reg, val);
 	if (err) {
 		if (retry <= I2C_RETRY_COUNT) {
-			v4l_warn(client, "Write: retry ... %d\n", retry);
+			v4l2_warn(sd, "Write: retry ... %d\n", retry);
 			retry++;
 			msleep_interruptible(10);
 			goto write_again;
@@ -311,7 +305,7 @@ write_again:
  * reglist - list of registers to be written
  * Returns zero if successful, or non-zero otherwise.
  */
-static int tvp514x_write_regs(struct i2c_client *client,
+static int tvp514x_write_regs(struct v4l2_subdev *sd,
 			      const struct tvp514x_reg reglist[])
 {
 	int err;
@@ -326,9 +320,9 @@ static int tvp514x_write_regs(struct i2c_client *client,
 		if (next->token == TOK_SKIP)
 			continue;
 
-		err = tvp514x_write_reg(client, next->reg, (u8) next->val);
+		err = tvp514x_write_reg(sd, next->reg, (u8) next->val);
 		if (err) {
-			v4l_err(client, "Write failed. Err[%d]\n", err);
+			v4l2_err(sd, "Write failed. Err[%d]\n", err);
 			return err;
 		}
 	}
@@ -339,17 +333,15 @@ static int tvp514x_write_regs(struct i2c_client *client,
  * tvp514x_get_current_std:
  * Returns the current standard detected by TVP5146/47
  */
-static enum tvp514x_std tvp514x_get_current_std(struct tvp514x_decoder
-						*decoder)
+static enum tvp514x_std tvp514x_get_current_std(struct v4l2_subdev *sd)
 {
 	u8 std, std_status;
 
-	std = tvp514x_read_reg(decoder->client, REG_VIDEO_STD);
-	if ((std & VIDEO_STD_MASK) == VIDEO_STD_AUTO_SWITCH_BIT) {
+	std = tvp514x_read_reg(sd, REG_VIDEO_STD);
+	if ((std & VIDEO_STD_MASK) == VIDEO_STD_AUTO_SWITCH_BIT)
 		/* use the standard status register */
-		std_status = tvp514x_read_reg(decoder->client,
-				REG_VIDEO_STD_STATUS);
-	} else
+		std_status = tvp514x_read_reg(sd, REG_VIDEO_STD_STATUS);
+	else
 		std_status = std;	/* use the standard register itself */
 
 	switch (std_status & VIDEO_STD_MASK) {
@@ -369,70 +361,69 @@ static enum tvp514x_std tvp514x_get_current_std(struct tvp514x_decoder
 /*
  * TVP5146/47 register dump function
  */
-static void tvp514x_reg_dump(struct tvp514x_decoder *decoder)
+static void tvp514x_reg_dump(struct v4l2_subdev *sd)
 {
-	u8 value;
-
-	dump_reg(decoder->client, REG_INPUT_SEL, value);
-	dump_reg(decoder->client, REG_AFE_GAIN_CTRL, value);
-	dump_reg(decoder->client, REG_VIDEO_STD, value);
-	dump_reg(decoder->client, REG_OPERATION_MODE, value);
-	dump_reg(decoder->client, REG_COLOR_KILLER, value);
-	dump_reg(decoder->client, REG_LUMA_CONTROL1, value);
-	dump_reg(decoder->client, REG_LUMA_CONTROL2, value);
-	dump_reg(decoder->client, REG_LUMA_CONTROL3, value);
-	dump_reg(decoder->client, REG_BRIGHTNESS, value);
-	dump_reg(decoder->client, REG_CONTRAST, value);
-	dump_reg(decoder->client, REG_SATURATION, value);
-	dump_reg(decoder->client, REG_HUE, value);
-	dump_reg(decoder->client, REG_CHROMA_CONTROL1, value);
-	dump_reg(decoder->client, REG_CHROMA_CONTROL2, value);
-	dump_reg(decoder->client, REG_COMP_PR_SATURATION, value);
-	dump_reg(decoder->client, REG_COMP_Y_CONTRAST, value);
-	dump_reg(decoder->client, REG_COMP_PB_SATURATION, value);
-	dump_reg(decoder->client, REG_COMP_Y_BRIGHTNESS, value);
-	dump_reg(decoder->client, REG_AVID_START_PIXEL_LSB, value);
-	dump_reg(decoder->client, REG_AVID_START_PIXEL_MSB, value);
-	dump_reg(decoder->client, REG_AVID_STOP_PIXEL_LSB, value);
-	dump_reg(decoder->client, REG_AVID_STOP_PIXEL_MSB, value);
-	dump_reg(decoder->client, REG_HSYNC_START_PIXEL_LSB, value);
-	dump_reg(decoder->client, REG_HSYNC_START_PIXEL_MSB, value);
-	dump_reg(decoder->client, REG_HSYNC_STOP_PIXEL_LSB, value);
-	dump_reg(decoder->client, REG_HSYNC_STOP_PIXEL_MSB, value);
-	dump_reg(decoder->client, REG_VSYNC_START_LINE_LSB, value);
-	dump_reg(decoder->client, REG_VSYNC_START_LINE_MSB, value);
-	dump_reg(decoder->client, REG_VSYNC_STOP_LINE_LSB, value);
-	dump_reg(decoder->client, REG_VSYNC_STOP_LINE_MSB, value);
-	dump_reg(decoder->client, REG_VBLK_START_LINE_LSB, value);
-	dump_reg(decoder->client, REG_VBLK_START_LINE_MSB, value);
-	dump_reg(decoder->client, REG_VBLK_STOP_LINE_LSB, value);
-	dump_reg(decoder->client, REG_VBLK_STOP_LINE_MSB, value);
-	dump_reg(decoder->client, REG_SYNC_CONTROL, value);
-	dump_reg(decoder->client, REG_OUTPUT_FORMATTER1, value);
-	dump_reg(decoder->client, REG_OUTPUT_FORMATTER2, value);
-	dump_reg(decoder->client, REG_OUTPUT_FORMATTER3, value);
-	dump_reg(decoder->client, REG_OUTPUT_FORMATTER4, value);
-	dump_reg(decoder->client, REG_OUTPUT_FORMATTER5, value);
-	dump_reg(decoder->client, REG_OUTPUT_FORMATTER6, value);
-	dump_reg(decoder->client, REG_CLEAR_LOST_LOCK, value);
+	dump_reg(sd, REG_INPUT_SEL);
+	dump_reg(sd, REG_AFE_GAIN_CTRL);
+	dump_reg(sd, REG_VIDEO_STD);
+	dump_reg(sd, REG_OPERATION_MODE);
+	dump_reg(sd, REG_COLOR_KILLER);
+	dump_reg(sd, REG_LUMA_CONTROL1);
+	dump_reg(sd, REG_LUMA_CONTROL2);
+	dump_reg(sd, REG_LUMA_CONTROL3);
+	dump_reg(sd, REG_BRIGHTNESS);
+	dump_reg(sd, REG_CONTRAST);
+	dump_reg(sd, REG_SATURATION);
+	dump_reg(sd, REG_HUE);
+	dump_reg(sd, REG_CHROMA_CONTROL1);
+	dump_reg(sd, REG_CHROMA_CONTROL2);
+	dump_reg(sd, REG_COMP_PR_SATURATION);
+	dump_reg(sd, REG_COMP_Y_CONTRAST);
+	dump_reg(sd, REG_COMP_PB_SATURATION);
+	dump_reg(sd, REG_COMP_Y_BRIGHTNESS);
+	dump_reg(sd, REG_AVID_START_PIXEL_LSB);
+	dump_reg(sd, REG_AVID_START_PIXEL_MSB);
+	dump_reg(sd, REG_AVID_STOP_PIXEL_LSB);
+	dump_reg(sd, REG_AVID_STOP_PIXEL_MSB);
+	dump_reg(sd, REG_HSYNC_START_PIXEL_LSB);
+	dump_reg(sd, REG_HSYNC_START_PIXEL_MSB);
+	dump_reg(sd, REG_HSYNC_STOP_PIXEL_LSB);
+	dump_reg(sd, REG_HSYNC_STOP_PIXEL_MSB);
+	dump_reg(sd, REG_VSYNC_START_LINE_LSB);
+	dump_reg(sd, REG_VSYNC_START_LINE_MSB);
+	dump_reg(sd, REG_VSYNC_STOP_LINE_LSB);
+	dump_reg(sd, REG_VSYNC_STOP_LINE_MSB);
+	dump_reg(sd, REG_VBLK_START_LINE_LSB);
+	dump_reg(sd, REG_VBLK_START_LINE_MSB);
+	dump_reg(sd, REG_VBLK_STOP_LINE_LSB);
+	dump_reg(sd, REG_VBLK_STOP_LINE_MSB);
+	dump_reg(sd, REG_SYNC_CONTROL);
+	dump_reg(sd, REG_OUTPUT_FORMATTER1);
+	dump_reg(sd, REG_OUTPUT_FORMATTER2);
+	dump_reg(sd, REG_OUTPUT_FORMATTER3);
+	dump_reg(sd, REG_OUTPUT_FORMATTER4);
+	dump_reg(sd, REG_OUTPUT_FORMATTER5);
+	dump_reg(sd, REG_OUTPUT_FORMATTER6);
+	dump_reg(sd, REG_CLEAR_LOST_LOCK);
 }
 
 /*
  * Configure the TVP5146/47 with the current register settings
  * Returns zero if successful, or non-zero otherwise.
  */
-static int tvp514x_configure(struct tvp514x_decoder *decoder)
+static int tvp514x_configure(struct v4l2_subdev *sd,
+		struct tvp514x_decoder *decoder)
 {
 	int err;
 
 	/* common register initialization */
 	err =
-	    tvp514x_write_regs(decoder->client, decoder->tvp514x_regs);
+	    tvp514x_write_regs(sd, decoder->tvp514x_regs);
 	if (err)
 		return err;
 
 	if (debug)
-		tvp514x_reg_dump(decoder);
+		tvp514x_reg_dump(sd);
 
 	return 0;
 }
@@ -445,15 +436,17 @@ static int tvp514x_configure(struct tvp514x_decoder *decoder)
  * Returns ENODEV error number if no device is detected, or zero
  * if a device is detected.
  */
-static int tvp514x_detect(struct tvp514x_decoder *decoder)
+static int tvp514x_detect(struct v4l2_subdev *sd,
+		struct tvp514x_decoder *decoder)
 {
 	u8 chip_id_msb, chip_id_lsb, rom_ver;
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	chip_id_msb = tvp514x_read_reg(decoder->client, REG_CHIP_ID_MSB);
-	chip_id_lsb = tvp514x_read_reg(decoder->client, REG_CHIP_ID_LSB);
-	rom_ver = tvp514x_read_reg(decoder->client, REG_ROM_VERSION);
+	chip_id_msb = tvp514x_read_reg(sd, REG_CHIP_ID_MSB);
+	chip_id_lsb = tvp514x_read_reg(sd, REG_CHIP_ID_LSB);
+	rom_ver = tvp514x_read_reg(sd, REG_ROM_VERSION);
 
-	v4l_dbg(1, debug, decoder->client,
+	v4l2_dbg(1, debug, sd,
 		 "chip id detected msb:0x%x lsb:0x%x rom version:0x%x\n",
 		 chip_id_msb, chip_id_lsb, rom_ver);
 	if ((chip_id_msb != TVP514X_CHIP_ID_MSB)
@@ -462,19 +455,16 @@ static int tvp514x_detect(struct tvp514x_decoder *decoder)
 		/* We didn't read the values we expected, so this must not be
 		 * an TVP5146/47.
 		 */
-		v4l_err(decoder->client,
-			"chip id mismatch msb:0x%x lsb:0x%x\n",
-			chip_id_msb, chip_id_lsb);
+		v4l2_err(sd, "chip id mismatch msb:0x%x lsb:0x%x\n",
+				chip_id_msb, chip_id_lsb);
 		return -ENODEV;
 	}
 
 	decoder->ver = rom_ver;
-	decoder->state = STATE_DETECTED;
 
-	v4l_info(decoder->client,
-			"%s found at 0x%x (%s)\n", decoder->client->name,
-			decoder->client->addr << 1,
-			decoder->client->adapter->name);
+	v4l2_info(sd, "%s (Version - 0x%.2x) found at 0x%x (%s)\n",
+			client->name, decoder->ver,
+			client->addr << 1, client->adapter->name);
 	return 0;
 }
 
@@ -483,17 +473,17 @@ static int tvp514x_detect(struct tvp514x_decoder *decoder)
  * TVP5146/47 decoder driver.
  */
 
-/**
- * ioctl_querystd - V4L2 decoder interface handler for VIDIOC_QUERYSTD ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_querystd - V4L2 decoder interface handler for VIDIOC_QUERYSTD ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @std_id: standard V4L2 std_id ioctl enum
  *
  * Returns the current standard detected by TVP5146/47. If no active input is
  * detected, returns -EINVAL
  */
-static int ioctl_querystd(struct v4l2_int_device *s, v4l2_std_id *std_id)
+static int tvp514x_querystd(struct v4l2_subdev *sd, v4l2_std_id *std_id)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	enum tvp514x_std current_std;
 	enum tvp514x_input input_sel;
 	u8 sync_lock_status, lock_mask;
@@ -502,11 +492,11 @@ static int ioctl_querystd(struct v4l2_int_device *s, v4l2_std_id *std_id)
 		return -EINVAL;
 
 	/* get the current standard */
-	current_std = tvp514x_get_current_std(decoder);
+	current_std = tvp514x_get_current_std(sd);
 	if (current_std == STD_INVALID)
 		return -EINVAL;
 
-	input_sel = decoder->route.input;
+	input_sel = decoder->input;
 
 	switch (input_sel) {
 	case INPUT_CVBS_VI1A:
@@ -544,42 +534,39 @@ static int ioctl_querystd(struct v4l2_int_device *s, v4l2_std_id *std_id)
 		return -EINVAL;
 	}
 	/* check whether signal is locked */
-	sync_lock_status = tvp514x_read_reg(decoder->client, REG_STATUS1);
+	sync_lock_status = tvp514x_read_reg(sd, REG_STATUS1);
 	if (lock_mask != (sync_lock_status & lock_mask))
 		return -EINVAL;	/* No input detected */
 
 	decoder->current_std = current_std;
 	*std_id = decoder->std_list[current_std].standard.id;
 
-	v4l_dbg(1, debug, decoder->client, "Current STD: %s",
+	v4l2_dbg(1, debug, sd, "Current STD: %s",
 			decoder->std_list[current_std].standard.name);
 	return 0;
 }
 
-/**
- * ioctl_s_std - V4L2 decoder interface handler for VIDIOC_S_STD ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_s_std - V4L2 decoder interface handler for VIDIOC_S_STD ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @std_id: standard V4L2 v4l2_std_id ioctl enum
  *
  * If std_id is supported, sets the requested standard. Otherwise, returns
  * -EINVAL
  */
-static int ioctl_s_std(struct v4l2_int_device *s, v4l2_std_id *std_id)
+static int tvp514x_s_std(struct v4l2_subdev *sd, v4l2_std_id std_id)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	int err, i;
 
-	if (std_id == NULL)
-		return -EINVAL;
-
 	for (i = 0; i < decoder->num_stds; i++)
-		if (*std_id & decoder->std_list[i].standard.id)
+		if (std_id & decoder->std_list[i].standard.id)
 			break;
 
 	if ((i == decoder->num_stds) || (i == STD_INVALID))
 		return -EINVAL;
 
-	err = tvp514x_write_reg(decoder->client, REG_VIDEO_STD,
+	err = tvp514x_write_reg(sd, REG_VIDEO_STD,
 				decoder->std_list[i].video_std);
 	if (err)
 		return err;
@@ -588,24 +575,24 @@ static int ioctl_s_std(struct v4l2_int_device *s, v4l2_std_id *std_id)
 	decoder->tvp514x_regs[REG_VIDEO_STD].val =
 		decoder->std_list[i].video_std;
 
-	v4l_dbg(1, debug, decoder->client, "Standard set to: %s",
+	v4l2_dbg(1, debug, sd, "Standard set to: %s",
 			decoder->std_list[i].standard.name);
 	return 0;
 }
 
-/**
- * ioctl_s_routing - V4L2 decoder interface handler for VIDIOC_S_INPUT ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_s_routing - V4L2 decoder interface handler for VIDIOC_S_INPUT ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @index: number of the input
  *
  * If index is valid, selects the requested input. Otherwise, returns -EINVAL if
  * the input is not supported or there is no active signal present in the
  * selected input.
  */
-static int ioctl_s_routing(struct v4l2_int_device *s,
-				struct v4l2_routing *route)
+static int tvp514x_s_routing(struct v4l2_subdev *sd,
+				u32 input, u32 output, u32 config)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	int err;
 	enum tvp514x_input input_sel;
 	enum tvp514x_output output_sel;
@@ -613,20 +600,20 @@ static int ioctl_s_routing(struct v4l2_int_device *s,
 	u8 sync_lock_status, lock_mask;
 	int try_count = LOCK_RETRY_COUNT;
 
-	if ((!route) || (route->input >= INPUT_INVALID) ||
-			(route->output >= OUTPUT_INVALID))
+	if ((input >= INPUT_INVALID) ||
+			(output >= OUTPUT_INVALID))
 		return -EINVAL;	/* Index out of bound */
 
-	input_sel = route->input;
-	output_sel = route->output;
+	input_sel = input;
+	output_sel = output;
 
-	err = tvp514x_write_reg(decoder->client, REG_INPUT_SEL, input_sel);
+	err = tvp514x_write_reg(sd, REG_INPUT_SEL, input_sel);
 	if (err)
 		return err;
 
-	output_sel |= tvp514x_read_reg(decoder->client,
+	output_sel |= tvp514x_read_reg(sd,
 			REG_OUTPUT_FORMATTER1) & 0x7;
-	err = tvp514x_write_reg(decoder->client, REG_OUTPUT_FORMATTER1,
+	err = tvp514x_write_reg(sd, REG_OUTPUT_FORMATTER1,
 			output_sel);
 	if (err)
 		return err;
@@ -637,7 +624,7 @@ static int ioctl_s_routing(struct v4l2_int_device *s,
 	/* Clear status */
 	msleep(LOCK_RETRY_DELAY);
 	err =
-	    tvp514x_write_reg(decoder->client, REG_CLEAR_LOST_LOCK, 0x01);
+	    tvp514x_write_reg(sd, REG_CLEAR_LOST_LOCK, 0x01);
 	if (err)
 		return err;
 
@@ -682,11 +669,11 @@ static int ioctl_s_routing(struct v4l2_int_device *s,
 		msleep(LOCK_RETRY_DELAY);
 
 		/* get the current standard for future reference */
-		current_std = tvp514x_get_current_std(decoder);
+		current_std = tvp514x_get_current_std(sd);
 		if (current_std == STD_INVALID)
 			continue;
 
-		sync_lock_status = tvp514x_read_reg(decoder->client,
+		sync_lock_status = tvp514x_read_reg(sd,
 				REG_STATUS1);
 		if (lock_mask == (sync_lock_status & lock_mask))
 			break;	/* Input detected */
@@ -696,28 +683,26 @@ static int ioctl_s_routing(struct v4l2_int_device *s,
 		return -EINVAL;
 
 	decoder->current_std = current_std;
-	decoder->route.input = route->input;
-	decoder->route.output = route->output;
+	decoder->input = input;
+	decoder->output = output;
 
-	v4l_dbg(1, debug, decoder->client,
-			"Input set to: %d, std : %d",
+	v4l2_dbg(1, debug, sd, "Input set to: %d, std : %d",
 			input_sel, current_std);
 
 	return 0;
 }
 
-/**
- * ioctl_queryctrl - V4L2 decoder interface handler for VIDIOC_QUERYCTRL ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_queryctrl - V4L2 decoder interface handler for VIDIOC_QUERYCTRL ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @qctrl: standard V4L2 v4l2_queryctrl structure
  *
  * If the requested control is supported, returns the control information.
  * Otherwise, returns -EINVAL if the control is not supported.
  */
 static int
-ioctl_queryctrl(struct v4l2_int_device *s, struct v4l2_queryctrl *qctrl)
+tvp514x_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qctrl)
 {
-	struct tvp514x_decoder *decoder = s->priv;
 	int err = -EINVAL;
 
 	if (qctrl == NULL)
@@ -744,30 +729,27 @@ ioctl_queryctrl(struct v4l2_int_device *s, struct v4l2_queryctrl *qctrl)
 		err = v4l2_ctrl_query_fill(qctrl, -180, 180, 180, 0);
 		break;
 	case V4L2_CID_AUTOGAIN:
-		/* Autogain is either 0 or 1*/
-		memcpy(qctrl, &tvp514x_autogain_ctrl,
-				sizeof(struct v4l2_queryctrl));
-		err = 0;
+		/*
+		 * Auto Gain supported is -
+		 * 	0 - 1 (Default - 1)
+		 */
+		err = v4l2_ctrl_query_fill(qctrl, 0, 1, 1, 1);
 		break;
 	default:
-		v4l_err(decoder->client,
-			"invalid control id %d\n", qctrl->id);
+		v4l2_err(sd, "invalid control id %d\n", qctrl->id);
 		return err;
 	}
 
-	v4l_dbg(1, debug, decoder->client,
-			"Query Control: %s : Min - %d, Max - %d, Def - %d",
-			qctrl->name,
-			qctrl->minimum,
-			qctrl->maximum,
+	v4l2_dbg(1, debug, sd, "Query Control:%s: Min - %d, Max - %d, Def - %d",
+			qctrl->name, qctrl->minimum, qctrl->maximum,
 			qctrl->default_value);
 
 	return err;
 }
 
-/**
- * ioctl_g_ctrl - V4L2 decoder interface handler for VIDIOC_G_CTRL ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_g_ctrl - V4L2 decoder interface handler for VIDIOC_G_CTRL ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @ctrl: pointer to v4l2_control structure
  *
  * If the requested control is supported, returns the control's current
@@ -775,9 +757,9 @@ ioctl_queryctrl(struct v4l2_int_device *s, struct v4l2_queryctrl *qctrl)
  * supported.
  */
 static int
-ioctl_g_ctrl(struct v4l2_int_device *s, struct v4l2_control *ctrl)
+tvp514x_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 
 	if (ctrl == NULL)
 		return -EINVAL;
@@ -811,74 +793,70 @@ ioctl_g_ctrl(struct v4l2_int_device *s, struct v4l2_control *ctrl)
 
 		break;
 	default:
-		v4l_err(decoder->client,
-			"invalid control id %d\n", ctrl->id);
+		v4l2_err(sd, "invalid control id %d\n", ctrl->id);
 		return -EINVAL;
 	}
 
-	v4l_dbg(1, debug, decoder->client,
-			"Get Control: ID - %d - %d",
+	v4l2_dbg(1, debug, sd, "Get Control: ID - %d - %d",
 			ctrl->id, ctrl->value);
 	return 0;
 }
 
-/**
- * ioctl_s_ctrl - V4L2 decoder interface handler for VIDIOC_S_CTRL ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_s_ctrl - V4L2 decoder interface handler for VIDIOC_S_CTRL ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @ctrl: pointer to v4l2_control structure
  *
  * If the requested control is supported, sets the control's current
  * value in HW. Otherwise, returns -EINVAL if the control is not supported.
  */
 static int
-ioctl_s_ctrl(struct v4l2_int_device *s, struct v4l2_control *ctrl)
+tvp514x_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	int err = -EINVAL, value;
 
 	if (ctrl == NULL)
 		return err;
 
-	value = (__s32) ctrl->value;
+	value = ctrl->value;
 
 	switch (ctrl->id) {
 	case V4L2_CID_BRIGHTNESS:
 		if (ctrl->value < 0 || ctrl->value > 255) {
-			v4l_err(decoder->client,
-					"invalid brightness setting %d\n",
+			v4l2_err(sd, "invalid brightness setting %d\n",
 					ctrl->value);
 			return -ERANGE;
 		}
-		err = tvp514x_write_reg(decoder->client, REG_BRIGHTNESS,
+		err = tvp514x_write_reg(sd, REG_BRIGHTNESS,
 				value);
 		if (err)
 			return err;
+
 		decoder->tvp514x_regs[REG_BRIGHTNESS].val = value;
 		break;
 	case V4L2_CID_CONTRAST:
 		if (ctrl->value < 0 || ctrl->value > 255) {
-			v4l_err(decoder->client,
-					"invalid contrast setting %d\n",
+			v4l2_err(sd, "invalid contrast setting %d\n",
 					ctrl->value);
 			return -ERANGE;
 		}
-		err = tvp514x_write_reg(decoder->client, REG_CONTRAST,
-				value);
+		err = tvp514x_write_reg(sd, REG_CONTRAST, value);
 		if (err)
 			return err;
+
 		decoder->tvp514x_regs[REG_CONTRAST].val = value;
 		break;
 	case V4L2_CID_SATURATION:
 		if (ctrl->value < 0 || ctrl->value > 255) {
-			v4l_err(decoder->client,
-					"invalid saturation setting %d\n",
+			v4l2_err(sd, "invalid saturation setting %d\n",
 					ctrl->value);
 			return -ERANGE;
 		}
-		err = tvp514x_write_reg(decoder->client, REG_SATURATION,
-				value);
+		err = tvp514x_write_reg(sd, REG_SATURATION, value);
 		if (err)
 			return err;
+
 		decoder->tvp514x_regs[REG_SATURATION].val = value;
 		break;
 	case V4L2_CID_HUE:
@@ -889,15 +867,13 @@ ioctl_s_ctrl(struct v4l2_int_device *s, struct v4l2_control *ctrl)
 		else if (value == 0)
 			value = 0;
 		else {
-			v4l_err(decoder->client,
-					"invalid hue setting %d\n",
-					ctrl->value);
+			v4l2_err(sd, "invalid hue setting %d\n", ctrl->value);
 			return -ERANGE;
 		}
-		err = tvp514x_write_reg(decoder->client, REG_HUE,
-				value);
+		err = tvp514x_write_reg(sd, REG_HUE, value);
 		if (err)
 			return err;
+
 		decoder->tvp514x_regs[REG_HUE].val = value;
 		break;
 	case V4L2_CID_AUTOGAIN:
@@ -906,41 +882,38 @@ ioctl_s_ctrl(struct v4l2_int_device *s, struct v4l2_control *ctrl)
 		else if (value == 0)
 			value = 0x0C;
 		else {
-			v4l_err(decoder->client,
-					"invalid auto gain setting %d\n",
+			v4l2_err(sd, "invalid auto gain setting %d\n",
 					ctrl->value);
 			return -ERANGE;
 		}
-		err = tvp514x_write_reg(decoder->client, REG_AFE_GAIN_CTRL,
-				value);
+		err = tvp514x_write_reg(sd, REG_AFE_GAIN_CTRL, value);
 		if (err)
 			return err;
+
 		decoder->tvp514x_regs[REG_AFE_GAIN_CTRL].val = value;
 		break;
 	default:
-		v4l_err(decoder->client,
-			"invalid control id %d\n", ctrl->id);
+		v4l2_err(sd, "invalid control id %d\n", ctrl->id);
 		return err;
 	}
 
-	v4l_dbg(1, debug, decoder->client,
-			"Set Control: ID - %d - %d",
+	v4l2_dbg(1, debug, sd, "Set Control: ID - %d - %d",
 			ctrl->id, ctrl->value);
 
 	return err;
 }
 
-/**
- * ioctl_enum_fmt_cap - Implement the CAPTURE buffer VIDIOC_ENUM_FMT ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_enum_fmt_cap - Implement the CAPTURE buffer VIDIOC_ENUM_FMT ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @fmt: standard V4L2 VIDIOC_ENUM_FMT ioctl structure
  *
  * Implement the VIDIOC_ENUM_FMT ioctl to enumerate supported formats
  */
 static int
-ioctl_enum_fmt_cap(struct v4l2_int_device *s, struct v4l2_fmtdesc *fmt)
+tvp514x_enum_fmt_cap(struct v4l2_subdev *sd, struct v4l2_fmtdesc *fmt)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	int index;
 
 	if (fmt == NULL)
@@ -956,16 +929,15 @@ ioctl_enum_fmt_cap(struct v4l2_int_device *s, struct v4l2_fmtdesc *fmt)
 	memcpy(fmt, &decoder->fmt_list[index],
 		sizeof(struct v4l2_fmtdesc));
 
-	v4l_dbg(1, debug, decoder->client,
-			"Current FMT: index - %d (%s)",
+	v4l2_dbg(1, debug, sd, "Current FMT: index - %d (%s)",
 			decoder->fmt_list[index].index,
 			decoder->fmt_list[index].description);
 	return 0;
 }
 
-/**
- * ioctl_try_fmt_cap - Implement the CAPTURE buffer VIDIOC_TRY_FMT ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_try_fmt_cap - Implement the CAPTURE buffer VIDIOC_TRY_FMT ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @f: pointer to standard V4L2 VIDIOC_TRY_FMT ioctl structure
  *
  * Implement the VIDIOC_TRY_FMT ioctl for the CAPTURE buffer type. This
@@ -973,9 +945,9 @@ ioctl_enum_fmt_cap(struct v4l2_int_device *s, struct v4l2_fmtdesc *fmt)
  * without actually making it take effect.
  */
 static int
-ioctl_try_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
+tvp514x_try_fmt_cap(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	int ifmt;
 	struct v4l2_pix_format *pix;
 	enum tvp514x_std current_std;
@@ -989,7 +961,7 @@ ioctl_try_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
 	pix = &f->fmt.pix;
 
 	/* Calculate height and width based on current standard */
-	current_std = tvp514x_get_current_std(decoder);
+	current_std = tvp514x_get_current_std(sd);
 	if (current_std == STD_INVALID)
 		return -EINVAL;
 
@@ -1012,17 +984,16 @@ ioctl_try_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
 	pix->colorspace = V4L2_COLORSPACE_SMPTE170M;
 	pix->priv = 0;
 
-	v4l_dbg(1, debug, decoder->client,
-			"Try FMT: pixelformat - %s, bytesperline - %d"
+	v4l2_dbg(1, debug, sd, "Try FMT: pixelformat - %s, bytesperline - %d"
 			"Width - %d, Height - %d",
 			decoder->fmt_list[ifmt].description, pix->bytesperline,
 			pix->width, pix->height);
 	return 0;
 }
 
-/**
- * ioctl_s_fmt_cap - V4L2 decoder interface handler for VIDIOC_S_FMT ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_s_fmt_cap - V4L2 decoder interface handler for VIDIOC_S_FMT ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @f: pointer to standard V4L2 VIDIOC_S_FMT ioctl structure
  *
  * If the requested format is supported, configures the HW to use that
@@ -1030,9 +1001,9 @@ ioctl_try_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
  * correctly configured.
  */
 static int
-ioctl_s_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
+tvp514x_s_fmt_cap(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	struct v4l2_pix_format *pix;
 	int rval;
 
@@ -1043,7 +1014,7 @@ ioctl_s_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
 		return -EINVAL;	/* only capture is supported */
 
 	pix = &f->fmt.pix;
-	rval = ioctl_try_fmt_cap(s, f);
+	rval = tvp514x_try_fmt_cap(sd, f);
 	if (rval)
 		return rval;
 
@@ -1052,18 +1023,18 @@ ioctl_s_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
 	return rval;
 }
 
-/**
- * ioctl_g_fmt_cap - V4L2 decoder interface handler for ioctl_g_fmt_cap
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_g_fmt_cap - V4L2 decoder interface handler for tvp514x_g_fmt_cap
+ * @sd: pointer to standard V4L2 sub-device structure
  * @f: pointer to standard V4L2 v4l2_format structure
  *
  * Returns the decoder's current pixel format in the v4l2_format
  * parameter.
  */
 static int
-ioctl_g_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
+tvp514x_g_fmt_cap(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 
 	if (f == NULL)
 		return -EINVAL;
@@ -1073,25 +1044,24 @@ ioctl_g_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
 
 	f->fmt.pix = decoder->pix;
 
-	v4l_dbg(1, debug, decoder->client,
-			"Current FMT: bytesperline - %d"
+	v4l2_dbg(1, debug, sd, "Current FMT: bytesperline - %d"
 			"Width - %d, Height - %d",
 			decoder->pix.bytesperline,
 			decoder->pix.width, decoder->pix.height);
 	return 0;
 }
 
-/**
- * ioctl_g_parm - V4L2 decoder interface handler for VIDIOC_G_PARM ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_g_parm - V4L2 decoder interface handler for VIDIOC_G_PARM ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @a: pointer to standard V4L2 VIDIOC_G_PARM ioctl structure
  *
  * Returns the decoder's video CAPTURE parameters.
  */
 static int
-ioctl_g_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
+tvp514x_g_parm(struct v4l2_subdev *sd, struct v4l2_streamparm *a)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	struct v4l2_captureparm *cparm;
 	enum tvp514x_std current_std;
 
@@ -1105,7 +1075,7 @@ ioctl_g_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
 	a->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 
 	/* get the current standard */
-	current_std = tvp514x_get_current_std(decoder);
+	current_std = tvp514x_get_current_std(sd);
 	if (current_std == STD_INVALID)
 		return -EINVAL;
 
@@ -1119,18 +1089,18 @@ ioctl_g_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
 	return 0;
 }
 
-/**
- * ioctl_s_parm - V4L2 decoder interface handler for VIDIOC_S_PARM ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_s_parm - V4L2 decoder interface handler for VIDIOC_S_PARM ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @a: pointer to standard V4L2 VIDIOC_S_PARM ioctl structure
  *
  * Configures the decoder to use the input parameters, if possible. If
  * not possible, returns the appropriate error code.
  */
 static int
-ioctl_s_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
+tvp514x_s_parm(struct v4l2_subdev *sd, struct v4l2_streamparm *a)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	struct v4l2_fract *timeperframe;
 	enum tvp514x_std current_std;
 
@@ -1143,7 +1113,7 @@ ioctl_s_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
 	timeperframe = &a->parm.capture.timeperframe;
 
 	/* get the current standard */
-	current_std = tvp514x_get_current_std(decoder);
+	current_std = tvp514x_get_current_std(sd);
 	if (current_std == STD_INVALID)
 		return -EINVAL;
 
@@ -1155,112 +1125,59 @@ ioctl_s_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
 	return 0;
 }
 
-/**
- * ioctl_g_ifparm - V4L2 decoder interface handler for vidioc_int_g_ifparm_num
- * @s: pointer to standard V4L2 device structure
- * @p: pointer to standard V4L2 vidioc_int_g_ifparm_num ioctl structure
- *
- * Gets slave interface parameters.
- * Calculates the required xclk value to support the requested
- * clock parameters in p. This value is returned in the p
- * parameter.
- */
-static int ioctl_g_ifparm(struct v4l2_int_device *s, struct v4l2_ifparm *p)
-{
-	struct tvp514x_decoder *decoder = s->priv;
-	int rval;
-
-	if (p == NULL)
-		return -EINVAL;
-
-	if (NULL == decoder->pdata->ifparm)
-		return -EINVAL;
-
-	rval = decoder->pdata->ifparm(p);
-	if (rval) {
-		v4l_err(decoder->client, "g_ifparm.Err[%d]\n", rval);
-		return rval;
-	}
-
-	p->u.bt656.clock_curr = TVP514X_XCLK_BT656;
-
-	return 0;
-}
-
-/**
- * ioctl_g_priv - V4L2 decoder interface handler for vidioc_int_g_priv_num
- * @s: pointer to standard V4L2 device structure
- * @p: void pointer to hold decoder's private data address
- *
- * Returns device's (decoder's) private data area address in p parameter
- */
-static int ioctl_g_priv(struct v4l2_int_device *s, void *p)
-{
-	struct tvp514x_decoder *decoder = s->priv;
-
-	if (NULL == decoder->pdata->priv_data_set)
-		return -EINVAL;
-
-	return decoder->pdata->priv_data_set(p);
-}
-
-/**
- * ioctl_s_power - V4L2 decoder interface handler for vidioc_int_s_power_num
- * @s: pointer to standard V4L2 device structure
- * @on: power state to which device is to be set
+/*
+ * tvp514x_s_stream - V4L2 decoder interface handler for vidioc_int_s_power_num
+ * @sd: pointer to standard V4L2 sub-device structure
+ * @enable: streaming enable or disable
  *
- * Sets devices power state to requrested state, if possible.
+ * Sets streaming to enable or disable, if possible.
  */
-static int ioctl_s_power(struct v4l2_int_device *s, enum v4l2_power on)
+static int tvp514x_s_stream(struct v4l2_subdev *sd, int enable)
 {
-	struct tvp514x_decoder *decoder = s->priv;
 	int err = 0;
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 
-	switch (on) {
-	case V4L2_POWER_OFF:
-		/* Power Down Sequence */
-		err =
-		    tvp514x_write_reg(decoder->client, REG_OPERATION_MODE,
-					0x01);
-		/* Disable mux for TVP5146/47 decoder data path */
-		if (decoder->pdata->power_set)
-			err |= decoder->pdata->power_set(on);
-		decoder->state = STATE_NOT_DETECTED;
-		break;
+	if (decoder->streaming == enable)
+		return 0;
 
-	case V4L2_POWER_STANDBY:
-		if (decoder->pdata->power_set)
-			err = decoder->pdata->power_set(on);
+	switch (enable) {
+	case 0:
+	{
+		/* Power Down Sequence */
+		err = tvp514x_write_reg(sd, REG_OPERATION_MODE, 0x01);
+		if (err) {
+			v4l2_err(sd, "Unable to turn off decoder\n");
+			return err;
+		}
+		decoder->streaming = enable;
 		break;
+	}
+	case 1:
+	{
+		struct tvp514x_reg *int_seq = (struct tvp514x_reg *)
+				client->driver->id_table->driver_data;
 
-	case V4L2_POWER_ON:
-		/* Enable mux for TVP5146/47 decoder data path */
-		if ((decoder->pdata->power_set) &&
-				(decoder->state == STATE_NOT_DETECTED)) {
-			int i;
-			struct tvp514x_init_seq *int_seq =
-				(struct tvp514x_init_seq *)
-				decoder->id->driver_data;
-
-			err = decoder->pdata->power_set(on);
-
-			/* Power Up Sequence */
-			for (i = 0; i < int_seq->no_regs; i++) {
-				err |= tvp514x_write_reg(decoder->client,
-						int_seq->init_reg_seq[i].reg,
-						int_seq->init_reg_seq[i].val);
-			}
-			/* Detect the sensor is not already detected */
-			err |= tvp514x_detect(decoder);
-			if (err) {
-				v4l_err(decoder->client,
-						"Unable to detect decoder\n");
-				return err;
-			}
+		/* Power Up Sequence */
+		err = tvp514x_write_regs(sd, int_seq);
+		if (err) {
+			v4l2_err(sd, "Unable to turn on decoder\n");
+			return err;
+		}
+		/* Detect if not already detected */
+		err = tvp514x_detect(sd, decoder);
+		if (err) {
+			v4l2_err(sd, "Unable to detect decoder\n");
+			return err;
 		}
-		err |= tvp514x_configure(decoder);
+		err = tvp514x_configure(sd, decoder);
+		if (err) {
+			v4l2_err(sd, "Unable to configure decoder\n");
+			return err;
+		}
+		decoder->streaming = enable;
 		break;
-
+	}
 	default:
 		err = -ENODEV;
 		break;
@@ -1269,93 +1186,37 @@ static int ioctl_s_power(struct v4l2_int_device *s, enum v4l2_power on)
 	return err;
 }
 
-/**
- * ioctl_init - V4L2 decoder interface handler for VIDIOC_INT_INIT
- * @s: pointer to standard V4L2 device structure
- *
- * Initialize the decoder device (calls tvp514x_configure())
- */
-static int ioctl_init(struct v4l2_int_device *s)
-{
-	struct tvp514x_decoder *decoder = s->priv;
-
-	/* Set default standard to auto */
-	decoder->tvp514x_regs[REG_VIDEO_STD].val =
-	    VIDEO_STD_AUTO_SWITCH_BIT;
-
-	return tvp514x_configure(decoder);
-}
-
-/**
- * ioctl_dev_exit - V4L2 decoder interface handler for vidioc_int_dev_exit_num
- * @s: pointer to standard V4L2 device structure
- *
- * Delinitialise the dev. at slave detach. The complement of ioctl_dev_init.
- */
-static int ioctl_dev_exit(struct v4l2_int_device *s)
-{
-	return 0;
-}
-
-/**
- * ioctl_dev_init - V4L2 decoder interface handler for vidioc_int_dev_init_num
- * @s: pointer to standard V4L2 device structure
- *
- * Initialise the device when slave attaches to the master. Returns 0 if
- * TVP5146/47 device could be found, otherwise returns appropriate error.
- */
-static int ioctl_dev_init(struct v4l2_int_device *s)
-{
-	struct tvp514x_decoder *decoder = s->priv;
-	int err;
-
-	err = tvp514x_detect(decoder);
-	if (err < 0) {
-		v4l_err(decoder->client,
-			"Unable to detect decoder\n");
-		return err;
-	}
-
-	v4l_info(decoder->client,
-		 "chip version 0x%.2x detected\n", decoder->ver);
+static const struct v4l2_subdev_core_ops tvp514x_core_ops = {
+	.queryctrl = tvp514x_queryctrl,
+	.g_ctrl = tvp514x_g_ctrl,
+	.s_ctrl = tvp514x_s_ctrl,
+	.s_std = tvp514x_s_std,
+};
 
-	return 0;
-}
+static const struct v4l2_subdev_video_ops tvp514x_video_ops = {
+	.s_routing = tvp514x_s_routing,
+	.querystd = tvp514x_querystd,
+	.enum_fmt = tvp514x_enum_fmt_cap,
+	.g_fmt = tvp514x_g_fmt_cap,
+	.try_fmt = tvp514x_try_fmt_cap,
+	.s_fmt = tvp514x_s_fmt_cap,
+	.g_parm = tvp514x_g_parm,
+	.s_parm = tvp514x_s_parm,
+	.s_stream = tvp514x_s_stream,
+};
 
-static struct v4l2_int_ioctl_desc tvp514x_ioctl_desc[] = {
-	{vidioc_int_dev_init_num, (v4l2_int_ioctl_func*) ioctl_dev_init},
-	{vidioc_int_dev_exit_num, (v4l2_int_ioctl_func*) ioctl_dev_exit},
-	{vidioc_int_s_power_num, (v4l2_int_ioctl_func*) ioctl_s_power},
-	{vidioc_int_g_priv_num, (v4l2_int_ioctl_func*) ioctl_g_priv},
-	{vidioc_int_g_ifparm_num, (v4l2_int_ioctl_func*) ioctl_g_ifparm},
-	{vidioc_int_init_num, (v4l2_int_ioctl_func*) ioctl_init},
-	{vidioc_int_enum_fmt_cap_num,
-	 (v4l2_int_ioctl_func *) ioctl_enum_fmt_cap},
-	{vidioc_int_try_fmt_cap_num,
-	 (v4l2_int_ioctl_func *) ioctl_try_fmt_cap},
-	{vidioc_int_g_fmt_cap_num,
-	 (v4l2_int_ioctl_func *) ioctl_g_fmt_cap},
-	{vidioc_int_s_fmt_cap_num,
-	 (v4l2_int_ioctl_func *) ioctl_s_fmt_cap},
-	{vidioc_int_g_parm_num, (v4l2_int_ioctl_func *) ioctl_g_parm},
-	{vidioc_int_s_parm_num, (v4l2_int_ioctl_func *) ioctl_s_parm},
-	{vidioc_int_queryctrl_num,
-	 (v4l2_int_ioctl_func *) ioctl_queryctrl},
-	{vidioc_int_g_ctrl_num, (v4l2_int_ioctl_func *) ioctl_g_ctrl},
-	{vidioc_int_s_ctrl_num, (v4l2_int_ioctl_func *) ioctl_s_ctrl},
-	{vidioc_int_querystd_num, (v4l2_int_ioctl_func *) ioctl_querystd},
-	{vidioc_int_s_std_num, (v4l2_int_ioctl_func *) ioctl_s_std},
-	{vidioc_int_s_video_routing_num,
-		(v4l2_int_ioctl_func *) ioctl_s_routing},
+static const struct v4l2_subdev_ops tvp514x_ops = {
+	.core = &tvp514x_core_ops,
+	.video = &tvp514x_video_ops,
 };
 
 static struct tvp514x_decoder tvp514x_dev = {
-	.state = STATE_NOT_DETECTED,
+	.streaming = 0,
 
 	.fmt_list = tvp514x_fmt_list,
 	.num_fmts = ARRAY_SIZE(tvp514x_fmt_list),
 
-	.pix = {		/* Default to NTSC 8-bit YUV 422 */
+	.pix = {/* Default to NTSC 8-bit YUV 422 */
 		.width = NTSC_NUM_ACTIVE_PIXELS,
 		.height = NTSC_NUM_ACTIVE_LINES,
 		.pixelformat = V4L2_PIX_FMT_UYVY,
@@ -1369,20 +1230,13 @@ static struct tvp514x_decoder tvp514x_dev = {
 	.current_std = STD_NTSC_MJ,
 	.std_list = tvp514x_std_list,
 	.num_stds = ARRAY_SIZE(tvp514x_std_list),
-	.v4l2_int_device = {
-		.module = THIS_MODULE,
-		.name = TVP514X_MODULE_NAME,
-		.type = v4l2_int_type_slave,
-	},
-	.tvp514x_slave = {
-		.ioctls = tvp514x_ioctl_desc,
-		.num_ioctls = ARRAY_SIZE(tvp514x_ioctl_desc),
-	},
+
 };
 
-/**
+/*
  * tvp514x_probe - decoder driver i2c probe handler
  * @client: i2c driver client device structure
+ * @id: i2c driver id table
  *
  * Register decoder as an i2c client device and V4L2
  * device.
@@ -1391,82 +1245,71 @@ static int
 tvp514x_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	struct tvp514x_decoder *decoder;
-	int err;
+	struct v4l2_subdev *sd;
 
 	/* Check if the adapter supports the needed features */
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
 		return -EIO;
 
+	if (!client->dev.platform_data) {
+		v4l2_err(client, "No platform data!!\n");
+		return -ENODEV;
+	}
+
 	decoder = kzalloc(sizeof(*decoder), GFP_KERNEL);
 	if (!decoder)
 		return -ENOMEM;
 
-	if (!client->dev.platform_data) {
-		v4l_err(client, "No platform data!!\n");
-		err = -ENODEV;
-		goto out_free;
-	}
-
+	/*
+	 * Initialize the tvp514x_decoder with default configuration
+	 */
 	*decoder = tvp514x_dev;
-	decoder->v4l2_int_device.priv = decoder;
-	decoder->pdata = client->dev.platform_data;
-	decoder->v4l2_int_device.u.slave = &decoder->tvp514x_slave;
+	/* Copy default register configuration */
 	memcpy(decoder->tvp514x_regs, tvp514x_reg_list_default,
 			sizeof(tvp514x_reg_list_default));
+
+	/*
+	 * Copy board specific information here
+	 */
+	decoder->pdata = client->dev.platform_data;
+
 	/*
 	 * Fetch platform specific data, and configure the
 	 * tvp514x_reg_list[] accordingly. Since this is one
 	 * time configuration, no need to preserve.
 	 */
 	decoder->tvp514x_regs[REG_OUTPUT_FORMATTER2].val |=
-			(decoder->pdata->clk_polarity << 1);
+		(decoder->pdata->clk_polarity << 1);
 	decoder->tvp514x_regs[REG_SYNC_CONTROL].val |=
-			((decoder->pdata->hs_polarity << 2) |
-			(decoder->pdata->vs_polarity << 3));
-	/*
-	 * Save the id data, required for power up sequence
-	 */
-	decoder->id = (struct i2c_device_id *)id;
-	/* Attach to Master */
-	strcpy(decoder->v4l2_int_device.u.slave->attach_to,
-			decoder->pdata->master);
-	decoder->client = client;
-	i2c_set_clientdata(client, decoder);
+		((decoder->pdata->hs_polarity << 2) |
+		 (decoder->pdata->vs_polarity << 3));
+	/* Set default standard to auto */
+	decoder->tvp514x_regs[REG_VIDEO_STD].val =
+		VIDEO_STD_AUTO_SWITCH_BIT;
 
 	/* Register with V4L2 layer as slave device */
-	err = v4l2_int_device_register(&decoder->v4l2_int_device);
-	if (err) {
-		i2c_set_clientdata(client, NULL);
-		v4l_err(client,
-			"Unable to register to v4l2. Err[%d]\n", err);
-		goto out_free;
-
-	} else
-		v4l_info(client, "Registered to v4l2 master %s!!\n",
-				decoder->pdata->master);
+	sd = &decoder->sd;
+	v4l2_i2c_subdev_init(sd, client, &tvp514x_ops);
+
+	v4l2_info(sd, "%s decoder driver registered !!\n", sd->name);
+
 	return 0;
 
-out_free:
-	kfree(decoder);
-	return err;
 }
 
-/**
+/*
  * tvp514x_remove - decoder driver i2c remove handler
  * @client: i2c driver client device structure
  *
  * Unregister decoder as an i2c client device and V4L2
  * device. Complement of tvp514x_probe().
  */
-static int __exit tvp514x_remove(struct i2c_client *client)
+static int tvp514x_remove(struct i2c_client *client)
 {
-	struct tvp514x_decoder *decoder = i2c_get_clientdata(client);
-
-	if (!client->adapter)
-		return -ENODEV;	/* our client isn't attached */
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 
-	v4l2_int_device_unregister(&decoder->v4l2_int_device);
-	i2c_set_clientdata(client, NULL);
+	v4l2_device_unregister_subdev(sd);
 	kfree(decoder);
 	return 0;
 }
@@ -1485,11 +1328,9 @@ static const struct tvp514x_reg tvp5146_init_reg_seq[] = {
 	{TOK_WRITE, REG_VBUS_DATA_ACCESS_NO_VBUS_ADDR_INCR, 0x00},
 	{TOK_WRITE, REG_OPERATION_MODE, 0x01},
 	{TOK_WRITE, REG_OPERATION_MODE, 0x00},
+	{TOK_TERM, 0, 0},
 };
-static const struct tvp514x_init_seq tvp5146_init = {
-	.no_regs = ARRAY_SIZE(tvp5146_init_reg_seq),
-	.init_reg_seq = tvp5146_init_reg_seq,
-};
+
 /*
  * TVP5147 Init/Power on Sequence
  */
@@ -1512,22 +1353,18 @@ static const struct tvp514x_reg tvp5147_init_reg_seq[] =	{
 	{TOK_WRITE, REG_VBUS_DATA_ACCESS_NO_VBUS_ADDR_INCR, 0x00},
 	{TOK_WRITE, REG_OPERATION_MODE, 0x01},
 	{TOK_WRITE, REG_OPERATION_MODE, 0x00},
+	{TOK_TERM, 0, 0},
 };
-static const struct tvp514x_init_seq tvp5147_init = {
-	.no_regs = ARRAY_SIZE(tvp5147_init_reg_seq),
-	.init_reg_seq = tvp5147_init_reg_seq,
-};
+
 /*
  * TVP5146M2/TVP5147M1 Init/Power on Sequence
  */
 static const struct tvp514x_reg tvp514xm_init_reg_seq[] = {
 	{TOK_WRITE, REG_OPERATION_MODE, 0x01},
 	{TOK_WRITE, REG_OPERATION_MODE, 0x00},
+	{TOK_TERM, 0, 0},
 };
-static const struct tvp514x_init_seq tvp514xm_init = {
-	.no_regs = ARRAY_SIZE(tvp514xm_init_reg_seq),
-	.init_reg_seq = tvp514xm_init_reg_seq,
-};
+
 /*
  * I2C Device Table -
  *
@@ -1535,48 +1372,34 @@ static const struct tvp514x_init_seq tvp514xm_init = {
  * driver_data - Driver data
  */
 static const struct i2c_device_id tvp514x_id[] = {
-	{"tvp5146", (unsigned long)&tvp5146_init},
-	{"tvp5146m2", (unsigned long)&tvp514xm_init},
-	{"tvp5147", (unsigned long)&tvp5147_init},
-	{"tvp5147m1", (unsigned long)&tvp514xm_init},
+	{"tvp5146", (unsigned long)tvp5146_init_reg_seq},
+	{"tvp5146m2", (unsigned long)tvp514xm_init_reg_seq},
+	{"tvp5147", (unsigned long)tvp5147_init_reg_seq},
+	{"tvp5147m1", (unsigned long)tvp514xm_init_reg_seq},
 	{},
 };
 
 MODULE_DEVICE_TABLE(i2c, tvp514x_id);
 
-static struct i2c_driver tvp514x_i2c_driver = {
+static struct i2c_driver tvp514x_driver = {
 	.driver = {
-		   .name = TVP514X_MODULE_NAME,
-		   .owner = THIS_MODULE,
-		   },
+		.owner = THIS_MODULE,
+		.name = TVP514X_MODULE_NAME,
+	},
 	.probe = tvp514x_probe,
-	.remove = __exit_p(tvp514x_remove),
+	.remove = tvp514x_remove,
 	.id_table = tvp514x_id,
 };
 
-/**
- * tvp514x_init
- *
- * Module init function
- */
 static int __init tvp514x_init(void)
 {
-	return i2c_add_driver(&tvp514x_i2c_driver);
+	return i2c_add_driver(&tvp514x_driver);
 }
 
-/**
- * tvp514x_cleanup
- *
- * Module exit function
- */
-static void __exit tvp514x_cleanup(void)
+static void __exit tvp514x_exit(void)
 {
-	i2c_del_driver(&tvp514x_i2c_driver);
+	i2c_del_driver(&tvp514x_driver);
 }
 
 module_init(tvp514x_init);
-module_exit(tvp514x_cleanup);
-
-MODULE_AUTHOR("Texas Instruments");
-MODULE_DESCRIPTION("TVP514X linux decoder driver");
-MODULE_LICENSE("GPL");
+module_exit(tvp514x_exit);
diff --git a/drivers/media/video/tvp514x_regs.h b/drivers/media/video/tvp514x_regs.h
index 351620aeecc2..18f29ad0dfe2 100644
--- a/drivers/media/video/tvp514x_regs.h
+++ b/drivers/media/video/tvp514x_regs.h
@@ -284,14 +284,4 @@ struct tvp514x_reg {
 	u32 val;
 };
 
-/**
- * struct tvp514x_init_seq - Structure for TVP5146/47/46M2/47M1 power up
- *		Sequence.
- * @ no_regs - Number of registers to write for power up sequence.
- * @ init_reg_seq - Array of registers and respective value to write.
- */
-struct tvp514x_init_seq {
-	unsigned int no_regs;
-	const struct tvp514x_reg *init_reg_seq;
-};
 #endif				/* ifndef _TVP514X_REGS_H */
diff --git a/include/media/tvp514x.h b/include/media/tvp514x.h
index 5e7ee968c6dc..74387e83f5b9 100644
--- a/include/media/tvp514x.h
+++ b/include/media/tvp514x.h
@@ -104,10 +104,6 @@ enum tvp514x_output {
  * @ vs_polarity: VSYNC Polarity configuration for current interface.
  */
 struct tvp514x_platform_data {
-	char *master;
-	int (*power_set) (enum v4l2_power on);
-	int (*ifparm) (struct v4l2_ifparm *p);
-	int (*priv_data_set) (void *);
 	/* Interface control params */
 	bool clk_polarity;
 	bool hs_polarity;
-- 
cgit v1.2.3


From 7da8a6cb3e5b60e73b196f1c71031423e0791032 Mon Sep 17 00:00:00 2001
From: Muralidharan Karicheri <m-karicheri2@ti.com>
Date: Mon, 6 Jul 2009 15:04:12 -0300
Subject: V4L/DVB (12248): v4l: vpfe capture bridge driver for DM355 and DM6446

This the vpfe capture bridge driver for doing video
capture on DM355 and DM6446 evms. The ccdc hw modules register with the
driver and are used for configuring the CCD Controller for a specific
decoder interface. The driver also registers the sub devices required
for a specific evm. More than one sub devices can be registered.
This allows driver to switch dynamically to capture video from
any sub device that is registered. Currently only one sub device
(tvp5146) is supported. But in future this driver is expected
to do capture from sensor devices such as Micron's MT9T001, MT9T031
and MT9P031 etc. The driver currently supports MMAP based IO.

Reviewed by: Laurent Pinchart <laurent.pinchart@skynet.be>
Reviewed by: Alexey Klimov <klimov.linux@gmail.com>

Signed-off-by: Muralidharan Karicheri <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/davinci/vpfe_capture.c | 2124 ++++++++++++++++++++++++++++
 include/media/davinci/vpfe_capture.h       |  198 +++
 include/media/davinci/vpfe_types.h         |   51 +
 3 files changed, 2373 insertions(+)
 create mode 100644 drivers/media/video/davinci/vpfe_capture.c
 create mode 100644 include/media/davinci/vpfe_capture.h
 create mode 100644 include/media/davinci/vpfe_types.h

(limited to 'include')

diff --git a/drivers/media/video/davinci/vpfe_capture.c b/drivers/media/video/davinci/vpfe_capture.c
new file mode 100644
index 000000000000..402ce43ef38e
--- /dev/null
+++ b/drivers/media/video/davinci/vpfe_capture.c
@@ -0,0 +1,2124 @@
+/*
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ * Driver name : VPFE Capture driver
+ *    VPFE Capture driver allows applications to capture and stream video
+ *    frames on DaVinci SoCs (DM6446, DM355 etc) from a YUV source such as
+ *    TVP5146 or  Raw Bayer RGB image data from an image sensor
+ *    such as Microns' MT9T001, MT9T031 etc.
+ *
+ *    These SoCs have, in common, a Video Processing Subsystem (VPSS) that
+ *    consists of a Video Processing Front End (VPFE) for capturing
+ *    video/raw image data and Video Processing Back End (VPBE) for displaying
+ *    YUV data through an in-built analog encoder or Digital LCD port. This
+ *    driver is for capture through VPFE. A typical EVM using these SoCs have
+ *    following high level configuration.
+ *
+ *
+ *    decoder(TVP5146/		YUV/
+ * 	     MT9T001)   -->  Raw Bayer RGB ---> MUX -> VPFE (CCDC/ISIF)
+ *    				data input              |      |
+ *							V      |
+ *						      SDRAM    |
+ *							       V
+ *							   Image Processor
+ *							       |
+ *							       V
+ *							     SDRAM
+ *    The data flow happens from a decoder connected to the VPFE over a
+ *    YUV embedded (BT.656/BT.1120) or separate sync or raw bayer rgb interface
+ *    and to the input of VPFE through an optional MUX (if more inputs are
+ *    to be interfaced on the EVM). The input data is first passed through
+ *    CCDC (CCD Controller, a.k.a Image Sensor Interface, ISIF). The CCDC
+ *    does very little or no processing on YUV data and does pre-process Raw
+ *    Bayer RGB data through modules such as Defect Pixel Correction (DFC)
+ *    Color Space Conversion (CSC), data gain/offset etc. After this, data
+ *    can be written to SDRAM or can be connected to the image processing
+ *    block such as IPIPE (on DM355 only).
+ *
+ *    Features supported
+ *  		- MMAP IO
+ *		- Capture using TVP5146 over BT.656
+ *		- support for interfacing decoders using sub device model
+ *		- Work with DM355 or DM6446 CCDC to do Raw Bayer RGB/YUV
+ *		  data capture to SDRAM.
+ *    TODO list
+ *		- Support multiple REQBUF after open
+ *		- Support for de-allocating buffers through REQBUF
+ *		- Support for Raw Bayer RGB capture
+ *		- Support for chaining Image Processor
+ *		- Support for static allocation of buffers
+ *		- Support for USERPTR IO
+ *		- Support for STREAMON before QBUF
+ *		- Support for control ioctls
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/version.h>
+#include <media/v4l2-common.h>
+#include <linux/io.h>
+#include <media/davinci/vpfe_capture.h>
+#include "ccdc_hw_device.h"
+
+static int debug;
+static u32 numbuffers = 3;
+static u32 bufsize = (720 * 576 * 2);
+
+module_param(numbuffers, uint, S_IRUGO);
+module_param(bufsize, uint, S_IRUGO);
+module_param(debug, int, 0644);
+
+MODULE_PARM_DESC(numbuffers, "buffer count (default:3)");
+MODULE_PARM_DESC(bufsize, "buffer size in bytes (default:720 x 576 x 2)");
+MODULE_PARM_DESC(debug, "Debug level 0-1");
+
+MODULE_DESCRIPTION("VPFE Video for Linux Capture Driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Texas Instruments");
+
+/* standard information */
+struct vpfe_standard {
+	v4l2_std_id std_id;
+	unsigned int width;
+	unsigned int height;
+	struct v4l2_fract pixelaspect;
+	/* 0 - progressive, 1 - interlaced */
+	int frame_format;
+};
+
+/* ccdc configuration */
+struct ccdc_config {
+	/* This make sure vpfe is probed and ready to go */
+	int vpfe_probed;
+	/* name of ccdc device */
+	char name[32];
+	/* for storing mem maps for CCDC */
+	int ccdc_addr_size;
+	void *__iomem ccdc_addr;
+};
+
+/* data structures */
+static struct vpfe_config_params config_params = {
+	.min_numbuffers = 3,
+	.numbuffers = 3,
+	.min_bufsize = 720 * 480 * 2,
+	.device_bufsize = 720 * 576 * 2,
+};
+
+/* ccdc device registered */
+static struct ccdc_hw_device *ccdc_dev;
+/* lock for accessing ccdc information */
+static DEFINE_MUTEX(ccdc_lock);
+/* ccdc configuration */
+static struct ccdc_config *ccdc_cfg;
+
+const struct vpfe_standard vpfe_standards[] = {
+	{V4L2_STD_525_60, 720, 480, {11, 10}, 1},
+	{V4L2_STD_625_50, 720, 576, {54, 59}, 1},
+};
+
+/* Used when raw Bayer image from ccdc is directly captured to SDRAM */
+static const struct vpfe_pixel_format vpfe_pix_fmts[] = {
+	{
+		.fmtdesc = {
+			.index = 0,
+			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+			.description = "Bayer GrRBGb 8bit A-Law compr.",
+			.pixelformat = V4L2_PIX_FMT_SBGGR8,
+		},
+		.bpp = 1,
+	},
+	{
+		.fmtdesc = {
+			.index = 1,
+			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+			.description = "Bayer GrRBGb - 16bit",
+			.pixelformat = V4L2_PIX_FMT_SBGGR16,
+		},
+		.bpp = 2,
+	},
+	{
+		.fmtdesc = {
+			.index = 2,
+			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+			.description = "Bayer GrRBGb 8bit DPCM compr.",
+			.pixelformat = V4L2_PIX_FMT_SGRBG10DPCM8,
+		},
+		.bpp = 1,
+	},
+	{
+		.fmtdesc = {
+			.index = 3,
+			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+			.description = "YCbCr 4:2:2 Interleaved UYVY",
+			.pixelformat = V4L2_PIX_FMT_UYVY,
+		},
+		.bpp = 2,
+	},
+	{
+		.fmtdesc = {
+			.index = 4,
+			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+			.description = "YCbCr 4:2:2 Interleaved YUYV",
+			.pixelformat = V4L2_PIX_FMT_YUYV,
+		},
+		.bpp = 2,
+	},
+	{
+		.fmtdesc = {
+			.index = 5,
+			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+			.description = "Y/CbCr 4:2:0 - Semi planar",
+			.pixelformat = V4L2_PIX_FMT_NV12,
+		},
+		.bpp = 1,
+	},
+};
+
+/*
+ * vpfe_lookup_pix_format()
+ * lookup an entry in the vpfe pix format table based on pix_format
+ */
+static const struct vpfe_pixel_format *vpfe_lookup_pix_format(u32 pix_format)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vpfe_pix_fmts); i++) {
+		if (pix_format == vpfe_pix_fmts[i].fmtdesc.pixelformat)
+			return &vpfe_pix_fmts[i];
+	}
+	return NULL;
+}
+
+/*
+ * vpfe_register_ccdc_device. CCDC module calls this to
+ * register with vpfe capture
+ */
+int vpfe_register_ccdc_device(struct ccdc_hw_device *dev)
+{
+	int ret = 0;
+	printk(KERN_NOTICE "vpfe_register_ccdc_device: %s\n", dev->name);
+
+	BUG_ON(!dev->hw_ops.open);
+	BUG_ON(!dev->hw_ops.enable);
+	BUG_ON(!dev->hw_ops.set_hw_if_params);
+	BUG_ON(!dev->hw_ops.configure);
+	BUG_ON(!dev->hw_ops.set_buftype);
+	BUG_ON(!dev->hw_ops.get_buftype);
+	BUG_ON(!dev->hw_ops.enum_pix);
+	BUG_ON(!dev->hw_ops.set_frame_format);
+	BUG_ON(!dev->hw_ops.get_frame_format);
+	BUG_ON(!dev->hw_ops.get_pixel_format);
+	BUG_ON(!dev->hw_ops.set_pixel_format);
+	BUG_ON(!dev->hw_ops.set_params);
+	BUG_ON(!dev->hw_ops.set_image_window);
+	BUG_ON(!dev->hw_ops.get_image_window);
+	BUG_ON(!dev->hw_ops.get_line_length);
+	BUG_ON(!dev->hw_ops.setfbaddr);
+	BUG_ON(!dev->hw_ops.getfid);
+
+	mutex_lock(&ccdc_lock);
+	if (NULL == ccdc_cfg) {
+		/*
+		 * TODO. Will this ever happen? if so, we need to fix it.
+		 * Proabably we need to add the request to a linked list and
+		 * walk through it during vpfe probe
+		 */
+		printk(KERN_ERR "vpfe capture not initialized\n");
+		ret = -1;
+		goto unlock;
+	}
+
+	if (strcmp(dev->name, ccdc_cfg->name)) {
+		/* ignore this ccdc */
+		ret = -1;
+		goto unlock;
+	}
+
+	if (ccdc_dev) {
+		printk(KERN_ERR "ccdc already registered\n");
+		ret = -1;
+		goto unlock;
+	}
+
+	ccdc_dev = dev;
+	dev->hw_ops.set_ccdc_base(ccdc_cfg->ccdc_addr,
+				  ccdc_cfg->ccdc_addr_size);
+unlock:
+	mutex_unlock(&ccdc_lock);
+	return ret;
+}
+EXPORT_SYMBOL(vpfe_register_ccdc_device);
+
+/*
+ * vpfe_unregister_ccdc_device. CCDC module calls this to
+ * unregister with vpfe capture
+ */
+void vpfe_unregister_ccdc_device(struct ccdc_hw_device *dev)
+{
+	if (NULL == dev) {
+		printk(KERN_ERR "invalid ccdc device ptr\n");
+		return;
+	}
+
+	printk(KERN_NOTICE "vpfe_unregister_ccdc_device, dev->name = %s\n",
+		dev->name);
+
+	if (strcmp(dev->name, ccdc_cfg->name)) {
+		/* ignore this ccdc */
+		return;
+	}
+
+	mutex_lock(&ccdc_lock);
+	ccdc_dev = NULL;
+	mutex_unlock(&ccdc_lock);
+	return;
+}
+EXPORT_SYMBOL(vpfe_unregister_ccdc_device);
+
+/*
+ * vpfe_get_ccdc_image_format - Get image parameters based on CCDC settings
+ */
+static int vpfe_get_ccdc_image_format(struct vpfe_device *vpfe_dev,
+				 struct v4l2_format *f)
+{
+	struct v4l2_rect image_win;
+	enum ccdc_buftype buf_type;
+	enum ccdc_frmfmt frm_fmt;
+
+	memset(f, 0, sizeof(*f));
+	f->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
+	ccdc_dev->hw_ops.get_image_window(&image_win);
+	f->fmt.pix.width = image_win.width;
+	f->fmt.pix.height = image_win.height;
+	f->fmt.pix.bytesperline = ccdc_dev->hw_ops.get_line_length();
+	f->fmt.pix.sizeimage = f->fmt.pix.bytesperline *
+				f->fmt.pix.height;
+	buf_type = ccdc_dev->hw_ops.get_buftype();
+	f->fmt.pix.pixelformat = ccdc_dev->hw_ops.get_pixel_format();
+	frm_fmt = ccdc_dev->hw_ops.get_frame_format();
+	if (frm_fmt == CCDC_FRMFMT_PROGRESSIVE)
+		f->fmt.pix.field = V4L2_FIELD_NONE;
+	else if (frm_fmt == CCDC_FRMFMT_INTERLACED) {
+		if (buf_type == CCDC_BUFTYPE_FLD_INTERLEAVED)
+			f->fmt.pix.field = V4L2_FIELD_INTERLACED;
+		else if (buf_type == CCDC_BUFTYPE_FLD_SEPARATED)
+			f->fmt.pix.field = V4L2_FIELD_SEQ_TB;
+		else {
+			v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf_type\n");
+			return -EINVAL;
+		}
+	} else {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid frm_fmt\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * vpfe_config_ccdc_image_format()
+ * For a pix format, configure ccdc to setup the capture
+ */
+static int vpfe_config_ccdc_image_format(struct vpfe_device *vpfe_dev)
+{
+	enum ccdc_frmfmt frm_fmt = CCDC_FRMFMT_INTERLACED;
+	int ret = 0;
+
+	if (ccdc_dev->hw_ops.set_pixel_format(
+			vpfe_dev->fmt.fmt.pix.pixelformat) < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"couldn't set pix format in ccdc\n");
+		return -EINVAL;
+	}
+	/* configure the image window */
+	ccdc_dev->hw_ops.set_image_window(&vpfe_dev->crop);
+
+	switch (vpfe_dev->fmt.fmt.pix.field) {
+	case V4L2_FIELD_INTERLACED:
+		/* do nothing, since it is default */
+		ret = ccdc_dev->hw_ops.set_buftype(
+				CCDC_BUFTYPE_FLD_INTERLEAVED);
+		break;
+	case V4L2_FIELD_NONE:
+		frm_fmt = CCDC_FRMFMT_PROGRESSIVE;
+		/* buffer type only applicable for interlaced scan */
+		break;
+	case V4L2_FIELD_SEQ_TB:
+		ret = ccdc_dev->hw_ops.set_buftype(
+				CCDC_BUFTYPE_FLD_SEPARATED);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* set the frame format */
+	if (!ret)
+		ret = ccdc_dev->hw_ops.set_frame_format(frm_fmt);
+	return ret;
+}
+/*
+ * vpfe_config_image_format()
+ * For a given standard, this functions sets up the default
+ * pix format & crop values in the vpfe device and ccdc.  It first
+ * starts with defaults based values from the standard table.
+ * It then checks if sub device support g_fmt and then override the
+ * values based on that.Sets crop values to match with scan resolution
+ * starting at 0,0. It calls vpfe_config_ccdc_image_format() set the
+ * values in ccdc
+ */
+static int vpfe_config_image_format(struct vpfe_device *vpfe_dev,
+				    const v4l2_std_id *std_id)
+{
+	struct vpfe_subdev_info *sdinfo = vpfe_dev->current_subdev;
+	int i, ret = 0;
+
+	for (i = 0; i < ARRAY_SIZE(vpfe_standards); i++) {
+		if (vpfe_standards[i].std_id & *std_id) {
+			vpfe_dev->std_info.active_pixels =
+					vpfe_standards[i].width;
+			vpfe_dev->std_info.active_lines =
+					vpfe_standards[i].height;
+			vpfe_dev->std_info.frame_format =
+					vpfe_standards[i].frame_format;
+			vpfe_dev->std_index = i;
+			break;
+		}
+	}
+
+	if (i ==  ARRAY_SIZE(vpfe_standards)) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "standard not supported\n");
+		return -EINVAL;
+	}
+
+	vpfe_dev->crop.top = 0;
+	vpfe_dev->crop.left = 0;
+	vpfe_dev->crop.width = vpfe_dev->std_info.active_pixels;
+	vpfe_dev->crop.height = vpfe_dev->std_info.active_lines;
+	vpfe_dev->fmt.fmt.pix.width = vpfe_dev->crop.width;
+	vpfe_dev->fmt.fmt.pix.height = vpfe_dev->crop.height;
+
+	/* first field and frame format based on standard frame format */
+	if (vpfe_dev->std_info.frame_format) {
+		vpfe_dev->fmt.fmt.pix.field = V4L2_FIELD_INTERLACED;
+		/* assume V4L2_PIX_FMT_UYVY as default */
+		vpfe_dev->fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_UYVY;
+	} else {
+		vpfe_dev->fmt.fmt.pix.field = V4L2_FIELD_NONE;
+		/* assume V4L2_PIX_FMT_SBGGR8 */
+		vpfe_dev->fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_SBGGR8;
+	}
+
+	/* if sub device supports g_fmt, override the defaults */
+	ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev,
+			sdinfo->grp_id, video, g_fmt, &vpfe_dev->fmt);
+
+	if (ret && ret != -ENOIOCTLCMD) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"error in getting g_fmt from sub device\n");
+		return ret;
+	}
+
+	/* Sets the values in CCDC */
+	ret = vpfe_config_ccdc_image_format(vpfe_dev);
+	if (ret)
+		return ret;
+
+	/* Update the values of sizeimage and bytesperline */
+	if (!ret) {
+		vpfe_dev->fmt.fmt.pix.bytesperline =
+			ccdc_dev->hw_ops.get_line_length();
+		vpfe_dev->fmt.fmt.pix.sizeimage =
+			vpfe_dev->fmt.fmt.pix.bytesperline *
+			vpfe_dev->fmt.fmt.pix.height;
+	}
+	return ret;
+}
+
+static int vpfe_initialize_device(struct vpfe_device *vpfe_dev)
+{
+	int ret = 0;
+
+	/* set first input of current subdevice as the current input */
+	vpfe_dev->current_input = 0;
+
+	/* set default standard */
+	vpfe_dev->std_index = 0;
+
+	/* Configure the default format information */
+	ret = vpfe_config_image_format(vpfe_dev,
+				&vpfe_standards[vpfe_dev->std_index].std_id);
+	if (ret)
+		return ret;
+
+	/* now open the ccdc device to initialize it */
+	mutex_lock(&ccdc_lock);
+	if (NULL == ccdc_dev) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "ccdc device not registered\n");
+		ret = -ENODEV;
+		goto unlock;
+	}
+
+	if (!try_module_get(ccdc_dev->owner)) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Couldn't lock ccdc module\n");
+		ret = -ENODEV;
+		goto unlock;
+	}
+	ret = ccdc_dev->hw_ops.open(vpfe_dev->pdev);
+	if (!ret)
+		vpfe_dev->initialized = 1;
+unlock:
+	mutex_unlock(&ccdc_lock);
+	return ret;
+}
+
+/*
+ * vpfe_open : It creates object of file handle structure and
+ * stores it in private_data  member of filepointer
+ */
+static int vpfe_open(struct file *file)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_fh *fh;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_open\n");
+
+	if (!vpfe_dev->cfg->num_subdevs) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "No decoder registered\n");
+		return -ENODEV;
+	}
+
+	/* Allocate memory for the file handle object */
+	fh = kmalloc(sizeof(struct vpfe_fh), GFP_KERNEL);
+	if (NULL == fh) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"unable to allocate memory for file handle object\n");
+		return -ENOMEM;
+	}
+	/* store pointer to fh in private_data member of file */
+	file->private_data = fh;
+	fh->vpfe_dev = vpfe_dev;
+	mutex_lock(&vpfe_dev->lock);
+	/* If decoder is not initialized. initialize it */
+	if (!vpfe_dev->initialized) {
+		if (vpfe_initialize_device(vpfe_dev)) {
+			mutex_unlock(&vpfe_dev->lock);
+			return -ENODEV;
+		}
+	}
+	/* Increment device usrs counter */
+	vpfe_dev->usrs++;
+	/* Set io_allowed member to false */
+	fh->io_allowed = 0;
+	/* Initialize priority of this instance to default priority */
+	fh->prio = V4L2_PRIORITY_UNSET;
+	v4l2_prio_open(&vpfe_dev->prio, &fh->prio);
+	mutex_unlock(&vpfe_dev->lock);
+	return 0;
+}
+
+static void vpfe_schedule_next_buffer(struct vpfe_device *vpfe_dev)
+{
+	unsigned long addr;
+
+	vpfe_dev->next_frm = list_entry(vpfe_dev->dma_queue.next,
+					struct videobuf_buffer, queue);
+	list_del(&vpfe_dev->next_frm->queue);
+	vpfe_dev->next_frm->state = VIDEOBUF_ACTIVE;
+	addr = videobuf_to_dma_contig(vpfe_dev->next_frm);
+	ccdc_dev->hw_ops.setfbaddr(addr);
+}
+
+static void vpfe_process_buffer_complete(struct vpfe_device *vpfe_dev)
+{
+	struct timeval timevalue;
+
+	do_gettimeofday(&timevalue);
+	vpfe_dev->cur_frm->ts = timevalue;
+	vpfe_dev->cur_frm->state = VIDEOBUF_DONE;
+	vpfe_dev->cur_frm->size = vpfe_dev->fmt.fmt.pix.sizeimage;
+	wake_up_interruptible(&vpfe_dev->cur_frm->done);
+	vpfe_dev->cur_frm = vpfe_dev->next_frm;
+}
+
+/* ISR for VINT0*/
+static irqreturn_t vpfe_isr(int irq, void *dev_id)
+{
+	struct vpfe_device *vpfe_dev = dev_id;
+	enum v4l2_field field;
+	unsigned long addr;
+	int fid;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "\nStarting vpfe_isr...\n");
+	field = vpfe_dev->fmt.fmt.pix.field;
+
+	/* if streaming not started, don't do anything */
+	if (!vpfe_dev->started)
+		return IRQ_HANDLED;
+
+	/* only for 6446 this will be applicable */
+	if (NULL != ccdc_dev->hw_ops.reset)
+		ccdc_dev->hw_ops.reset();
+
+	if (field == V4L2_FIELD_NONE) {
+		/* handle progressive frame capture */
+		v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev,
+			"frame format is progressive...\n");
+		if (vpfe_dev->cur_frm != vpfe_dev->next_frm)
+			vpfe_process_buffer_complete(vpfe_dev);
+		return IRQ_HANDLED;
+	}
+
+	/* interlaced or TB capture check which field we are in hardware */
+	fid = ccdc_dev->hw_ops.getfid();
+
+	/* switch the software maintained field id */
+	vpfe_dev->field_id ^= 1;
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "field id = %x:%x.\n",
+		fid, vpfe_dev->field_id);
+	if (fid == vpfe_dev->field_id) {
+		/* we are in-sync here,continue */
+		if (fid == 0) {
+			/*
+			 * One frame is just being captured. If the next frame
+			 * is available, release the current frame and move on
+			 */
+			if (vpfe_dev->cur_frm != vpfe_dev->next_frm)
+				vpfe_process_buffer_complete(vpfe_dev);
+			/*
+			 * based on whether the two fields are stored
+			 * interleavely or separately in memory, reconfigure
+			 * the CCDC memory address
+			 */
+			if (field == V4L2_FIELD_SEQ_TB) {
+				addr =
+				  videobuf_to_dma_contig(vpfe_dev->cur_frm);
+				addr += vpfe_dev->field_off;
+				ccdc_dev->hw_ops.setfbaddr(addr);
+			}
+			return IRQ_HANDLED;
+		}
+		/*
+		 * if one field is just being captured configure
+		 * the next frame get the next frame from the empty
+		 * queue if no frame is available hold on to the
+		 * current buffer
+		 */
+		spin_lock(&vpfe_dev->dma_queue_lock);
+		if (!list_empty(&vpfe_dev->dma_queue) &&
+		    vpfe_dev->cur_frm == vpfe_dev->next_frm)
+			vpfe_schedule_next_buffer(vpfe_dev);
+		spin_unlock(&vpfe_dev->dma_queue_lock);
+	} else if (fid == 0) {
+		/*
+		 * out of sync. Recover from any hardware out-of-sync.
+		 * May loose one frame
+		 */
+		vpfe_dev->field_id = fid;
+	}
+	return IRQ_HANDLED;
+}
+
+/* vdint1_isr - isr handler for VINT1 interrupt */
+static irqreturn_t vdint1_isr(int irq, void *dev_id)
+{
+	struct vpfe_device *vpfe_dev = dev_id;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "\nInside vdint1_isr...\n");
+
+	/* if streaming not started, don't do anything */
+	if (!vpfe_dev->started)
+		return IRQ_HANDLED;
+
+	spin_lock(&vpfe_dev->dma_queue_lock);
+	if ((vpfe_dev->fmt.fmt.pix.field == V4L2_FIELD_NONE) &&
+	    !list_empty(&vpfe_dev->dma_queue) &&
+	    vpfe_dev->cur_frm == vpfe_dev->next_frm)
+		vpfe_schedule_next_buffer(vpfe_dev);
+	spin_unlock(&vpfe_dev->dma_queue_lock);
+	return IRQ_HANDLED;
+}
+
+static void vpfe_detach_irq(struct vpfe_device *vpfe_dev)
+{
+	enum ccdc_frmfmt frame_format;
+
+	frame_format = ccdc_dev->hw_ops.get_frame_format();
+	if (frame_format == CCDC_FRMFMT_PROGRESSIVE)
+		free_irq(IRQ_VDINT1, vpfe_dev);
+}
+
+static int vpfe_attach_irq(struct vpfe_device *vpfe_dev)
+{
+	enum ccdc_frmfmt frame_format;
+
+	frame_format = ccdc_dev->hw_ops.get_frame_format();
+	if (frame_format == CCDC_FRMFMT_PROGRESSIVE) {
+		return request_irq(vpfe_dev->ccdc_irq1, vdint1_isr,
+				    IRQF_DISABLED, "vpfe_capture1",
+				    vpfe_dev);
+	}
+	return 0;
+}
+
+/* vpfe_stop_ccdc_capture: stop streaming in ccdc/isif */
+static void vpfe_stop_ccdc_capture(struct vpfe_device *vpfe_dev)
+{
+	vpfe_dev->started = 0;
+	ccdc_dev->hw_ops.enable(0);
+	if (ccdc_dev->hw_ops.enable_out_to_sdram)
+		ccdc_dev->hw_ops.enable_out_to_sdram(0);
+}
+
+/*
+ * vpfe_release : This function deletes buffer queue, frees the
+ * buffers and the vpfe file  handle
+ */
+static int vpfe_release(struct file *file)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_fh *fh = file->private_data;
+	struct vpfe_subdev_info *sdinfo;
+	int ret;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_release\n");
+
+	/* Get the device lock */
+	mutex_lock(&vpfe_dev->lock);
+	/* if this instance is doing IO */
+	if (fh->io_allowed) {
+		if (vpfe_dev->started) {
+			sdinfo = vpfe_dev->current_subdev;
+			ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev,
+							 sdinfo->grp_id,
+							 video, s_stream, 0);
+			if (ret && (ret != -ENOIOCTLCMD))
+				v4l2_err(&vpfe_dev->v4l2_dev,
+				"stream off failed in subdev\n");
+			vpfe_stop_ccdc_capture(vpfe_dev);
+			vpfe_detach_irq(vpfe_dev);
+			videobuf_streamoff(&vpfe_dev->buffer_queue);
+		}
+		vpfe_dev->io_usrs = 0;
+		vpfe_dev->numbuffers = config_params.numbuffers;
+	}
+
+	/* Decrement device usrs counter */
+	vpfe_dev->usrs--;
+	/* Close the priority */
+	v4l2_prio_close(&vpfe_dev->prio, &fh->prio);
+	/* If this is the last file handle */
+	if (!vpfe_dev->usrs) {
+		vpfe_dev->initialized = 0;
+		if (ccdc_dev->hw_ops.close)
+			ccdc_dev->hw_ops.close(vpfe_dev->pdev);
+		module_put(ccdc_dev->owner);
+	}
+	mutex_unlock(&vpfe_dev->lock);
+	file->private_data = NULL;
+	/* Free memory allocated to file handle object */
+	kfree(fh);
+	return 0;
+}
+
+/*
+ * vpfe_mmap : It is used to map kernel space buffers
+ * into user spaces
+ */
+static int vpfe_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	/* Get the device object and file handle object */
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_mmap\n");
+
+	return videobuf_mmap_mapper(&vpfe_dev->buffer_queue, vma);
+}
+
+/*
+ * vpfe_poll: It is used for select/poll system call
+ */
+static unsigned int vpfe_poll(struct file *file, poll_table *wait)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_poll\n");
+
+	if (vpfe_dev->started)
+		return videobuf_poll_stream(file,
+					    &vpfe_dev->buffer_queue, wait);
+	return 0;
+}
+
+/* vpfe capture driver file operations */
+static const struct v4l2_file_operations vpfe_fops = {
+	.owner = THIS_MODULE,
+	.open = vpfe_open,
+	.release = vpfe_release,
+	.unlocked_ioctl = video_ioctl2,
+	.mmap = vpfe_mmap,
+	.poll = vpfe_poll
+};
+
+/*
+ * vpfe_check_format()
+ * This function adjust the input pixel format as per hardware
+ * capabilities and update the same in pixfmt.
+ * Following algorithm used :-
+ *
+ *	If given pixformat is not in the vpfe list of pix formats or not
+ *	supported by the hardware, current value of pixformat in the device
+ *	is used
+ *	If given field is not supported, then current field is used. If field
+ *	is different from current, then it is matched with that from sub device.
+ *	Minimum height is 2 lines for interlaced or tb field and 1 line for
+ *	progressive. Maximum height is clamped to active active lines of scan
+ *	Minimum width is 32 bytes in memory and width is clamped to active
+ *	pixels of scan.
+ *	bytesperline is a multiple of 32.
+ */
+static const struct vpfe_pixel_format *
+	vpfe_check_format(struct vpfe_device *vpfe_dev,
+			  struct v4l2_pix_format *pixfmt)
+{
+	u32 min_height = 1, min_width = 32, max_width, max_height;
+	const struct vpfe_pixel_format *vpfe_pix_fmt;
+	u32 pix;
+	int temp, found;
+
+	vpfe_pix_fmt = vpfe_lookup_pix_format(pixfmt->pixelformat);
+	if (NULL == vpfe_pix_fmt) {
+		/*
+		 * use current pixel format in the vpfe device. We
+		 * will find this pix format in the table
+		 */
+		pixfmt->pixelformat = vpfe_dev->fmt.fmt.pix.pixelformat;
+		vpfe_pix_fmt = vpfe_lookup_pix_format(pixfmt->pixelformat);
+	}
+
+	/* check if hw supports it */
+	temp = 0;
+	found = 0;
+	while (ccdc_dev->hw_ops.enum_pix(&pix, temp) >= 0) {
+		if (vpfe_pix_fmt->fmtdesc.pixelformat == pix) {
+			found = 1;
+			break;
+		}
+		temp++;
+	}
+
+	if (!found) {
+		/* use current pixel format */
+		pixfmt->pixelformat = vpfe_dev->fmt.fmt.pix.pixelformat;
+		/*
+		 * Since this is currently used in the vpfe device, we
+		 * will find this pix format in the table
+		 */
+		vpfe_pix_fmt = vpfe_lookup_pix_format(pixfmt->pixelformat);
+	}
+
+	/* check what field format is supported */
+	if (pixfmt->field == V4L2_FIELD_ANY) {
+		/* if field is any, use current value as default */
+		pixfmt->field = vpfe_dev->fmt.fmt.pix.field;
+	}
+
+	/*
+	 * if field is not same as current field in the vpfe device
+	 * try matching the field with the sub device field
+	 */
+	if (vpfe_dev->fmt.fmt.pix.field != pixfmt->field) {
+		/*
+		 * If field value is not in the supported fields, use current
+		 * field used in the device as default
+		 */
+		switch (pixfmt->field) {
+		case V4L2_FIELD_INTERLACED:
+		case V4L2_FIELD_SEQ_TB:
+			/* if sub device is supporting progressive, use that */
+			if (!vpfe_dev->std_info.frame_format)
+				pixfmt->field = V4L2_FIELD_NONE;
+			break;
+		case V4L2_FIELD_NONE:
+			if (vpfe_dev->std_info.frame_format)
+				pixfmt->field = V4L2_FIELD_INTERLACED;
+			break;
+
+		default:
+			/* use current field as default */
+			pixfmt->field = vpfe_dev->fmt.fmt.pix.field;
+			break;
+		}
+	}
+
+	/* Now adjust image resolutions supported */
+	if (pixfmt->field == V4L2_FIELD_INTERLACED ||
+	    pixfmt->field == V4L2_FIELD_SEQ_TB)
+		min_height = 2;
+
+	max_width = vpfe_dev->std_info.active_pixels;
+	max_height = vpfe_dev->std_info.active_lines;
+	min_width /= vpfe_pix_fmt->bpp;
+
+	v4l2_info(&vpfe_dev->v4l2_dev, "width = %d, height = %d, bpp = %d\n",
+		  pixfmt->width, pixfmt->height, vpfe_pix_fmt->bpp);
+
+	pixfmt->width = clamp((pixfmt->width), min_width, max_width);
+	pixfmt->height = clamp((pixfmt->height), min_height, max_height);
+
+	/* If interlaced, adjust height to be a multiple of 2 */
+	if (pixfmt->field == V4L2_FIELD_INTERLACED)
+		pixfmt->height &= (~1);
+	/*
+	 * recalculate bytesperline and sizeimage since width
+	 * and height might have changed
+	 */
+	pixfmt->bytesperline = (((pixfmt->width * vpfe_pix_fmt->bpp) + 31)
+				& ~31);
+	if (pixfmt->pixelformat == V4L2_PIX_FMT_NV12)
+		pixfmt->sizeimage =
+			pixfmt->bytesperline * pixfmt->height +
+			((pixfmt->bytesperline * pixfmt->height) >> 1);
+	else
+		pixfmt->sizeimage = pixfmt->bytesperline * pixfmt->height;
+
+	v4l2_info(&vpfe_dev->v4l2_dev, "adjusted width = %d, height ="
+		 " %d, bpp = %d, bytesperline = %d, sizeimage = %d\n",
+		 pixfmt->width, pixfmt->height, vpfe_pix_fmt->bpp,
+		 pixfmt->bytesperline, pixfmt->sizeimage);
+	return vpfe_pix_fmt;
+}
+
+static int vpfe_querycap(struct file *file, void  *priv,
+			       struct v4l2_capability *cap)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_querycap\n");
+
+	cap->version = VPFE_CAPTURE_VERSION_CODE;
+	cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	strlcpy(cap->driver, CAPTURE_DRV_NAME, sizeof(cap->driver));
+	strlcpy(cap->bus_info, "VPFE", sizeof(cap->bus_info));
+	strlcpy(cap->card, vpfe_dev->cfg->card_name, sizeof(cap->card));
+	return 0;
+}
+
+static int vpfe_g_fmt_vid_cap(struct file *file, void *priv,
+				struct v4l2_format *fmt)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_g_fmt_vid_cap\n");
+	/* Fill in the information about format */
+	*fmt = vpfe_dev->fmt;
+	return ret;
+}
+
+static int vpfe_enum_fmt_vid_cap(struct file *file, void  *priv,
+				   struct v4l2_fmtdesc *fmt)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	const struct vpfe_pixel_format *pix_fmt;
+	int temp_index;
+	u32 pix;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_enum_fmt_vid_cap\n");
+
+	if (ccdc_dev->hw_ops.enum_pix(&pix, fmt->index) < 0)
+		return -EINVAL;
+
+	/* Fill in the information about format */
+	pix_fmt = vpfe_lookup_pix_format(pix);
+	if (NULL != pix_fmt) {
+		temp_index = fmt->index;
+		*fmt = pix_fmt->fmtdesc;
+		fmt->index = temp_index;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int vpfe_s_fmt_vid_cap(struct file *file, void *priv,
+				struct v4l2_format *fmt)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	const struct vpfe_pixel_format *pix_fmts;
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_s_fmt_vid_cap\n");
+
+	/* If streaming is started, return error */
+	if (vpfe_dev->started) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Streaming is started\n");
+		return -EBUSY;
+	}
+
+	/* Check for valid frame format */
+	pix_fmts = vpfe_check_format(vpfe_dev, &fmt->fmt.pix);
+
+	if (NULL == pix_fmts)
+		return -EINVAL;
+
+	/* store the pixel format in the device  object */
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	/* First detach any IRQ if currently attached */
+	vpfe_detach_irq(vpfe_dev);
+	vpfe_dev->fmt = *fmt;
+	/* set image capture parameters in the ccdc */
+	ret = vpfe_config_ccdc_image_format(vpfe_dev);
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+static int vpfe_try_fmt_vid_cap(struct file *file, void *priv,
+				  struct v4l2_format *f)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	const struct vpfe_pixel_format *pix_fmts;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_try_fmt_vid_cap\n");
+
+	pix_fmts = vpfe_check_format(vpfe_dev, &f->fmt.pix);
+	if (NULL == pix_fmts)
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ * vpfe_get_subdev_input_index - Get subdev index and subdev input index for a
+ * given app input index
+ */
+static int vpfe_get_subdev_input_index(struct vpfe_device *vpfe_dev,
+					int *subdev_index,
+					int *subdev_input_index,
+					int app_input_index)
+{
+	struct vpfe_config *cfg = vpfe_dev->cfg;
+	struct vpfe_subdev_info *sdinfo;
+	int i, j = 0;
+
+	for (i = 0; i < cfg->num_subdevs; i++) {
+		sdinfo = &cfg->sub_devs[i];
+		if (app_input_index < (j + sdinfo->num_inputs)) {
+			*subdev_index = i;
+			*subdev_input_index = app_input_index - j;
+			return 0;
+		}
+		j += sdinfo->num_inputs;
+	}
+	return -EINVAL;
+}
+
+/*
+ * vpfe_get_app_input - Get app input index for a given subdev input index
+ * driver stores the input index of the current sub device and translate it
+ * when application request the current input
+ */
+static int vpfe_get_app_input_index(struct vpfe_device *vpfe_dev,
+				    int *app_input_index)
+{
+	struct vpfe_config *cfg = vpfe_dev->cfg;
+	struct vpfe_subdev_info *sdinfo;
+	int i, j = 0;
+
+	for (i = 0; i < cfg->num_subdevs; i++) {
+		sdinfo = &cfg->sub_devs[i];
+		if (!strcmp(sdinfo->name, vpfe_dev->current_subdev->name)) {
+			if (vpfe_dev->current_input >= sdinfo->num_inputs)
+				return -1;
+			*app_input_index = j + vpfe_dev->current_input;
+			return 0;
+		}
+		j += sdinfo->num_inputs;
+	}
+	return -EINVAL;
+}
+
+static int vpfe_enum_input(struct file *file, void *priv,
+				 struct v4l2_input *inp)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_subdev_info *sdinfo;
+	int subdev, index ;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_enum_input\n");
+
+	if (vpfe_get_subdev_input_index(vpfe_dev,
+					&subdev,
+					&index,
+					inp->index) < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "input information not found"
+			 " for the subdev\n");
+		return -EINVAL;
+	}
+	sdinfo = &vpfe_dev->cfg->sub_devs[subdev];
+	memcpy(inp, &sdinfo->inputs[index], sizeof(struct v4l2_input));
+	return 0;
+}
+
+static int vpfe_g_input(struct file *file, void *priv, unsigned int *index)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_g_input\n");
+
+	return vpfe_get_app_input_index(vpfe_dev, index);
+}
+
+
+static int vpfe_s_input(struct file *file, void *priv, unsigned int index)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_subdev_info *sdinfo;
+	int subdev_index, inp_index;
+	struct vpfe_route *route;
+	u32 input = 0, output = 0;
+	int ret = -EINVAL;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_s_input\n");
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	/*
+	 * If streaming is started return device busy
+	 * error
+	 */
+	if (vpfe_dev->started) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Streaming is on\n");
+		ret = -EBUSY;
+		goto unlock_out;
+	}
+
+	if (vpfe_get_subdev_input_index(vpfe_dev,
+					&subdev_index,
+					&inp_index,
+					index) < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "invalid input index\n");
+		goto unlock_out;
+	}
+
+	sdinfo = &vpfe_dev->cfg->sub_devs[subdev_index];
+	route = &sdinfo->routes[inp_index];
+	if (route && sdinfo->can_route) {
+		input = route->input;
+		output = route->output;
+	}
+
+	ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev, sdinfo->grp_id,
+					 video, s_routing, input, output, 0);
+
+	if (ret) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"vpfe_doioctl:error in setting input in decoder\n");
+		ret = -EINVAL;
+		goto unlock_out;
+	}
+	vpfe_dev->current_subdev = sdinfo;
+	vpfe_dev->current_input = index;
+	vpfe_dev->std_index = 0;
+
+	/* set the bus/interface parameter for the sub device in ccdc */
+	ret = ccdc_dev->hw_ops.set_hw_if_params(&sdinfo->ccdc_if_params);
+	if (ret)
+		goto unlock_out;
+
+	/* set the default image parameters in the device */
+	ret = vpfe_config_image_format(vpfe_dev,
+				&vpfe_standards[vpfe_dev->std_index].std_id);
+unlock_out:
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+static int vpfe_querystd(struct file *file, void *priv, v4l2_std_id *std_id)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_subdev_info *sdinfo;
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_querystd\n");
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	sdinfo = vpfe_dev->current_subdev;
+	if (ret)
+		return ret;
+	/* Call querystd function of decoder device */
+	ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev, sdinfo->grp_id,
+					 video, querystd, std_id);
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+static int vpfe_s_std(struct file *file, void *priv, v4l2_std_id *std_id)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_subdev_info *sdinfo;
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_s_std\n");
+
+	/* Call decoder driver function to set the standard */
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	sdinfo = vpfe_dev->current_subdev;
+	/* If streaming is started, return device busy error */
+	if (vpfe_dev->started) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "streaming is started\n");
+		ret = -EBUSY;
+		goto unlock_out;
+	}
+
+	ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev, sdinfo->grp_id,
+					 core, s_std, *std_id);
+	if (ret < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Failed to set standard\n");
+		goto unlock_out;
+	}
+	ret = vpfe_config_image_format(vpfe_dev, std_id);
+
+unlock_out:
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+static int vpfe_g_std(struct file *file, void *priv, v4l2_std_id *std_id)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_g_std\n");
+
+	*std_id = vpfe_standards[vpfe_dev->std_index].std_id;
+	return 0;
+}
+/*
+ *  Videobuf operations
+ */
+static int vpfe_videobuf_setup(struct videobuf_queue *vq,
+				unsigned int *count,
+				unsigned int *size)
+{
+	struct vpfe_fh *fh = vq->priv_data;
+	struct vpfe_device *vpfe_dev = fh->vpfe_dev;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_buffer_setup\n");
+	*size = config_params.device_bufsize;
+
+	if (*count < config_params.min_numbuffers)
+		*count = config_params.min_numbuffers;
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev,
+		"count=%d, size=%d\n", *count, *size);
+	return 0;
+}
+
+static int vpfe_videobuf_prepare(struct videobuf_queue *vq,
+				struct videobuf_buffer *vb,
+				enum v4l2_field field)
+{
+	struct vpfe_fh *fh = vq->priv_data;
+	struct vpfe_device *vpfe_dev = fh->vpfe_dev;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_buffer_prepare\n");
+
+	/* If buffer is not initialized, initialize it */
+	if (VIDEOBUF_NEEDS_INIT == vb->state) {
+		vb->width = vpfe_dev->fmt.fmt.pix.width;
+		vb->height = vpfe_dev->fmt.fmt.pix.height;
+		vb->size = vpfe_dev->fmt.fmt.pix.sizeimage;
+		vb->field = field;
+	}
+	vb->state = VIDEOBUF_PREPARED;
+	return 0;
+}
+
+static void vpfe_videobuf_queue(struct videobuf_queue *vq,
+				struct videobuf_buffer *vb)
+{
+	/* Get the file handle object and device object */
+	struct vpfe_fh *fh = vq->priv_data;
+	struct vpfe_device *vpfe_dev = fh->vpfe_dev;
+	unsigned long flags;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_buffer_queue\n");
+
+	/* add the buffer to the DMA queue */
+	spin_lock_irqsave(&vpfe_dev->dma_queue_lock, flags);
+	list_add_tail(&vb->queue, &vpfe_dev->dma_queue);
+	spin_unlock_irqrestore(&vpfe_dev->dma_queue_lock, flags);
+
+	/* Change state of the buffer */
+	vb->state = VIDEOBUF_QUEUED;
+}
+
+static void vpfe_videobuf_release(struct videobuf_queue *vq,
+				  struct videobuf_buffer *vb)
+{
+	struct vpfe_fh *fh = vq->priv_data;
+	struct vpfe_device *vpfe_dev = fh->vpfe_dev;
+	unsigned long flags;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_videobuf_release\n");
+
+	/*
+	 * We need to flush the buffer from the dma queue since
+	 * they are de-allocated
+	 */
+	spin_lock_irqsave(&vpfe_dev->dma_queue_lock, flags);
+	INIT_LIST_HEAD(&vpfe_dev->dma_queue);
+	spin_unlock_irqrestore(&vpfe_dev->dma_queue_lock, flags);
+	videobuf_dma_contig_free(vq, vb);
+	vb->state = VIDEOBUF_NEEDS_INIT;
+}
+
+static struct videobuf_queue_ops vpfe_videobuf_qops = {
+	.buf_setup      = vpfe_videobuf_setup,
+	.buf_prepare    = vpfe_videobuf_prepare,
+	.buf_queue      = vpfe_videobuf_queue,
+	.buf_release    = vpfe_videobuf_release,
+};
+
+/*
+ * vpfe_reqbufs. currently support REQBUF only once opening
+ * the device.
+ */
+static int vpfe_reqbufs(struct file *file, void *priv,
+			struct v4l2_requestbuffers *req_buf)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_fh *fh = file->private_data;
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_reqbufs\n");
+
+	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != req_buf->type) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buffer type\n");
+		return -EINVAL;
+	}
+
+	if (V4L2_MEMORY_USERPTR == req_buf->memory) {
+		/* we don't support user ptr IO */
+		v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_reqbufs:"
+			 " USERPTR IO not supported\n");
+		return  -EINVAL;
+	}
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	if (vpfe_dev->io_usrs != 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Only one IO user allowed\n");
+		ret = -EBUSY;
+		goto unlock_out;
+	}
+
+	vpfe_dev->memory = req_buf->memory;
+	videobuf_queue_dma_contig_init(&vpfe_dev->buffer_queue,
+				&vpfe_videobuf_qops,
+				NULL,
+				&vpfe_dev->irqlock,
+				req_buf->type,
+				vpfe_dev->fmt.fmt.pix.field,
+				sizeof(struct videobuf_buffer),
+				fh);
+
+	fh->io_allowed = 1;
+	vpfe_dev->io_usrs = 1;
+	INIT_LIST_HEAD(&vpfe_dev->dma_queue);
+	ret = videobuf_reqbufs(&vpfe_dev->buffer_queue, req_buf);
+unlock_out:
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+static int vpfe_querybuf(struct file *file, void *priv,
+			 struct v4l2_buffer *buf)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_querybuf\n");
+
+	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != buf->type) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf type\n");
+		return  -EINVAL;
+	}
+
+	if (vpfe_dev->memory != V4L2_MEMORY_MMAP) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid memory\n");
+		return -EINVAL;
+	}
+	/* Call videobuf_querybuf to get information */
+	return videobuf_querybuf(&vpfe_dev->buffer_queue, buf);
+}
+
+static int vpfe_qbuf(struct file *file, void *priv,
+		     struct v4l2_buffer *p)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_fh *fh = file->private_data;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_qbuf\n");
+
+	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != p->type) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf type\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * If this file handle is not allowed to do IO,
+	 * return error
+	 */
+	if (!fh->io_allowed) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "fh->io_allowed\n");
+		return -EACCES;
+	}
+	return videobuf_qbuf(&vpfe_dev->buffer_queue, p);
+}
+
+static int vpfe_dqbuf(struct file *file, void *priv,
+		      struct v4l2_buffer *buf)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_dqbuf\n");
+
+	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != buf->type) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf type\n");
+		return -EINVAL;
+	}
+	return videobuf_dqbuf(&vpfe_dev->buffer_queue,
+				      buf, file->f_flags & O_NONBLOCK);
+}
+
+/*
+ * vpfe_calculate_offsets : This function calculates buffers offset
+ * for top and bottom field
+ */
+static void vpfe_calculate_offsets(struct vpfe_device *vpfe_dev)
+{
+	struct v4l2_rect image_win;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_calculate_offsets\n");
+
+	ccdc_dev->hw_ops.get_image_window(&image_win);
+	vpfe_dev->field_off = image_win.height * image_win.width;
+}
+
+/* vpfe_start_ccdc_capture: start streaming in ccdc/isif */
+static void vpfe_start_ccdc_capture(struct vpfe_device *vpfe_dev)
+{
+	ccdc_dev->hw_ops.enable(1);
+	if (ccdc_dev->hw_ops.enable_out_to_sdram)
+		ccdc_dev->hw_ops.enable_out_to_sdram(1);
+	vpfe_dev->started = 1;
+}
+
+/*
+ * vpfe_streamon. Assume the DMA queue is not empty.
+ * application is expected to call QBUF before calling
+ * this ioctl. If not, driver returns error
+ */
+static int vpfe_streamon(struct file *file, void *priv,
+			 enum v4l2_buf_type buf_type)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_fh *fh = file->private_data;
+	struct vpfe_subdev_info *sdinfo;
+	unsigned long addr;
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_streamon\n");
+
+	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != buf_type) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf type\n");
+		return -EINVAL;
+	}
+
+	/* If file handle is not allowed IO, return error */
+	if (!fh->io_allowed) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "fh->io_allowed\n");
+		return -EACCES;
+	}
+
+	sdinfo = vpfe_dev->current_subdev;
+	ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev, sdinfo->grp_id,
+					video, s_stream, 1);
+
+	if (ret && (ret != -ENOIOCTLCMD)) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "stream on failed in subdev\n");
+		return -EINVAL;
+	}
+
+	/* If buffer queue is empty, return error */
+	if (list_empty(&vpfe_dev->buffer_queue.stream)) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "buffer queue is empty\n");
+		return -EIO;
+	}
+
+	/* Call videobuf_streamon to start streaming * in videobuf */
+	ret = videobuf_streamon(&vpfe_dev->buffer_queue);
+	if (ret)
+		return ret;
+
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		goto streamoff;
+	/* Get the next frame from the buffer queue */
+	vpfe_dev->next_frm = list_entry(vpfe_dev->dma_queue.next,
+					struct videobuf_buffer, queue);
+	vpfe_dev->cur_frm = vpfe_dev->next_frm;
+	/* Remove buffer from the buffer queue */
+	list_del(&vpfe_dev->cur_frm->queue);
+	/* Mark state of the current frame to active */
+	vpfe_dev->cur_frm->state = VIDEOBUF_ACTIVE;
+	/* Initialize field_id and started member */
+	vpfe_dev->field_id = 0;
+	addr = videobuf_to_dma_contig(vpfe_dev->cur_frm);
+
+	/* Calculate field offset */
+	vpfe_calculate_offsets(vpfe_dev);
+
+	if (vpfe_attach_irq(vpfe_dev) < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			 "Error in attaching interrupt handle\n");
+		ret = -EFAULT;
+		goto unlock_out;
+	}
+	if (ccdc_dev->hw_ops.configure() < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			 "Error in configuring ccdc\n");
+		ret = -EINVAL;
+		goto unlock_out;
+	}
+	ccdc_dev->hw_ops.setfbaddr((unsigned long)(addr));
+	vpfe_start_ccdc_capture(vpfe_dev);
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+unlock_out:
+	mutex_unlock(&vpfe_dev->lock);
+streamoff:
+	ret = videobuf_streamoff(&vpfe_dev->buffer_queue);
+	return ret;
+}
+
+static int vpfe_streamoff(struct file *file, void *priv,
+			  enum v4l2_buf_type buf_type)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_fh *fh = file->private_data;
+	struct vpfe_subdev_info *sdinfo;
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_streamoff\n");
+
+	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != buf_type) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf type\n");
+		return -EINVAL;
+	}
+
+	/* If io is allowed for this file handle, return error */
+	if (!fh->io_allowed) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "fh->io_allowed\n");
+		return -EACCES;
+	}
+
+	/* If streaming is not started, return error */
+	if (!vpfe_dev->started) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "device started\n");
+		return -EINVAL;
+	}
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	vpfe_stop_ccdc_capture(vpfe_dev);
+	vpfe_detach_irq(vpfe_dev);
+
+	sdinfo = vpfe_dev->current_subdev;
+	ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev, sdinfo->grp_id,
+					video, s_stream, 0);
+
+	if (ret && (ret != -ENOIOCTLCMD))
+		v4l2_err(&vpfe_dev->v4l2_dev, "stream off failed in subdev\n");
+	ret = videobuf_streamoff(&vpfe_dev->buffer_queue);
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+static int vpfe_cropcap(struct file *file, void *priv,
+			      struct v4l2_cropcap *crop)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_cropcap\n");
+
+	if (vpfe_dev->std_index > ARRAY_SIZE(vpfe_standards))
+		return -EINVAL;
+
+	memset(crop, 0, sizeof(struct v4l2_cropcap));
+	crop->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	crop->bounds.width = crop->defrect.width =
+		vpfe_standards[vpfe_dev->std_index].width;
+	crop->bounds.height = crop->defrect.height =
+		vpfe_standards[vpfe_dev->std_index].height;
+	crop->pixelaspect = vpfe_standards[vpfe_dev->std_index].pixelaspect;
+	return 0;
+}
+
+static int vpfe_g_crop(struct file *file, void *priv,
+			     struct v4l2_crop *crop)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_g_crop\n");
+
+	crop->c = vpfe_dev->crop;
+	return 0;
+}
+
+static int vpfe_s_crop(struct file *file, void *priv,
+			     struct v4l2_crop *crop)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_s_crop\n");
+
+	if (vpfe_dev->started) {
+		/* make sure streaming is not started */
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"Cannot change crop when streaming is ON\n");
+		return -EBUSY;
+	}
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	if (crop->c.top < 0 || crop->c.left < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"doesn't support negative values for top & left\n");
+		ret = -EINVAL;
+		goto unlock_out;
+	}
+
+	/* adjust the width to 16 pixel boundry */
+	crop->c.width = ((crop->c.width + 15) & ~0xf);
+
+	/* make sure parameters are valid */
+	if ((crop->c.left + crop->c.width >
+		vpfe_dev->std_info.active_pixels) ||
+	    (crop->c.top + crop->c.height >
+		vpfe_dev->std_info.active_lines)) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Error in S_CROP params\n");
+		ret = -EINVAL;
+		goto unlock_out;
+	}
+	ccdc_dev->hw_ops.set_image_window(&crop->c);
+	vpfe_dev->fmt.fmt.pix.width = crop->c.width;
+	vpfe_dev->fmt.fmt.pix.height = crop->c.height;
+	vpfe_dev->fmt.fmt.pix.bytesperline =
+		ccdc_dev->hw_ops.get_line_length();
+	vpfe_dev->fmt.fmt.pix.sizeimage =
+		vpfe_dev->fmt.fmt.pix.bytesperline *
+		vpfe_dev->fmt.fmt.pix.height;
+	vpfe_dev->crop = crop->c;
+unlock_out:
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+
+static long vpfe_param_handler(struct file *file, void *priv,
+		int cmd, void *param)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_param_handler\n");
+
+	if (vpfe_dev->started) {
+		/* only allowed if streaming is not started */
+		v4l2_err(&vpfe_dev->v4l2_dev, "device already started\n");
+		return -EBUSY;
+	}
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	switch (cmd) {
+	case VPFE_CMD_S_CCDC_RAW_PARAMS:
+		v4l2_warn(&vpfe_dev->v4l2_dev,
+			  "VPFE_CMD_S_CCDC_RAW_PARAMS: experimental ioctl\n");
+		ret = ccdc_dev->hw_ops.set_params(param);
+		if (ret) {
+			v4l2_err(&vpfe_dev->v4l2_dev,
+				"Error in setting parameters in CCDC\n");
+			goto unlock_out;
+		}
+		if (vpfe_get_ccdc_image_format(vpfe_dev, &vpfe_dev->fmt) < 0) {
+			v4l2_err(&vpfe_dev->v4l2_dev,
+				"Invalid image format at CCDC\n");
+			goto unlock_out;
+		}
+		break;
+	default:
+		ret = -EINVAL;
+	}
+unlock_out:
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+
+/* vpfe capture ioctl operations */
+static const struct v4l2_ioctl_ops vpfe_ioctl_ops = {
+	.vidioc_querycap	 = vpfe_querycap,
+	.vidioc_g_fmt_vid_cap    = vpfe_g_fmt_vid_cap,
+	.vidioc_enum_fmt_vid_cap = vpfe_enum_fmt_vid_cap,
+	.vidioc_s_fmt_vid_cap    = vpfe_s_fmt_vid_cap,
+	.vidioc_try_fmt_vid_cap  = vpfe_try_fmt_vid_cap,
+	.vidioc_enum_input	 = vpfe_enum_input,
+	.vidioc_g_input		 = vpfe_g_input,
+	.vidioc_s_input		 = vpfe_s_input,
+	.vidioc_querystd	 = vpfe_querystd,
+	.vidioc_s_std		 = vpfe_s_std,
+	.vidioc_g_std		 = vpfe_g_std,
+	.vidioc_reqbufs		 = vpfe_reqbufs,
+	.vidioc_querybuf	 = vpfe_querybuf,
+	.vidioc_qbuf		 = vpfe_qbuf,
+	.vidioc_dqbuf		 = vpfe_dqbuf,
+	.vidioc_streamon	 = vpfe_streamon,
+	.vidioc_streamoff	 = vpfe_streamoff,
+	.vidioc_cropcap		 = vpfe_cropcap,
+	.vidioc_g_crop		 = vpfe_g_crop,
+	.vidioc_s_crop		 = vpfe_s_crop,
+	.vidioc_default		 = vpfe_param_handler,
+};
+
+static struct vpfe_device *vpfe_initialize(void)
+{
+	struct vpfe_device *vpfe_dev;
+
+	/* Default number of buffers should be 3 */
+	if ((numbuffers > 0) &&
+	    (numbuffers < config_params.min_numbuffers))
+		numbuffers = config_params.min_numbuffers;
+
+	/*
+	 * Set buffer size to min buffers size if invalid buffer size is
+	 * given
+	 */
+	if (bufsize < config_params.min_bufsize)
+		bufsize = config_params.min_bufsize;
+
+	config_params.numbuffers = numbuffers;
+
+	if (numbuffers)
+		config_params.device_bufsize = bufsize;
+
+	/* Allocate memory for device objects */
+	vpfe_dev = kzalloc(sizeof(*vpfe_dev), GFP_KERNEL);
+
+	return vpfe_dev;
+}
+
+static void vpfe_disable_clock(struct vpfe_device *vpfe_dev)
+{
+	struct vpfe_config *vpfe_cfg = vpfe_dev->cfg;
+
+	clk_disable(vpfe_cfg->vpssclk);
+	clk_put(vpfe_cfg->vpssclk);
+	clk_disable(vpfe_cfg->slaveclk);
+	clk_put(vpfe_cfg->slaveclk);
+	v4l2_info(vpfe_dev->pdev->driver,
+		 "vpfe vpss master & slave clocks disabled\n");
+}
+
+static int vpfe_enable_clock(struct vpfe_device *vpfe_dev)
+{
+	struct vpfe_config *vpfe_cfg = vpfe_dev->cfg;
+	int ret = -ENOENT;
+
+	vpfe_cfg->vpssclk = clk_get(vpfe_dev->pdev, "vpss_master");
+	if (NULL == vpfe_cfg->vpssclk) {
+		v4l2_err(vpfe_dev->pdev->driver, "No clock defined for"
+			 "vpss_master\n");
+		return ret;
+	}
+
+	if (clk_enable(vpfe_cfg->vpssclk)) {
+		v4l2_err(vpfe_dev->pdev->driver,
+			"vpfe vpss master clock not enabled\n");
+		goto out;
+	}
+	v4l2_info(vpfe_dev->pdev->driver,
+		 "vpfe vpss master clock enabled\n");
+
+	vpfe_cfg->slaveclk = clk_get(vpfe_dev->pdev, "vpss_slave");
+	if (NULL == vpfe_cfg->slaveclk) {
+		v4l2_err(vpfe_dev->pdev->driver,
+			"No clock defined for vpss slave\n");
+		goto out;
+	}
+
+	if (clk_enable(vpfe_cfg->slaveclk)) {
+		v4l2_err(vpfe_dev->pdev->driver,
+			 "vpfe vpss slave clock not enabled\n");
+		goto out;
+	}
+	v4l2_info(vpfe_dev->pdev->driver, "vpfe vpss slave clock enabled\n");
+	return 0;
+out:
+	if (vpfe_cfg->vpssclk)
+		clk_put(vpfe_cfg->vpssclk);
+	if (vpfe_cfg->slaveclk)
+		clk_put(vpfe_cfg->slaveclk);
+
+	return -1;
+}
+
+/*
+ * vpfe_probe : This function creates device entries by register
+ * itself to the V4L2 driver and initializes fields of each
+ * device objects
+ */
+static __init int vpfe_probe(struct platform_device *pdev)
+{
+	struct vpfe_subdev_info *sdinfo;
+	struct vpfe_config *vpfe_cfg;
+	struct resource *res1;
+	struct vpfe_device *vpfe_dev;
+	struct i2c_adapter *i2c_adap;
+	struct video_device *vfd;
+	int ret = -ENOMEM, i, j;
+	int num_subdevs = 0;
+
+	/* Get the pointer to the device object */
+	vpfe_dev = vpfe_initialize();
+
+	if (!vpfe_dev) {
+		v4l2_err(pdev->dev.driver,
+			"Failed to allocate memory for vpfe_dev\n");
+		return ret;
+	}
+
+	vpfe_dev->pdev = &pdev->dev;
+
+	if (NULL == pdev->dev.platform_data) {
+		v4l2_err(pdev->dev.driver, "Unable to get vpfe config\n");
+		ret = -ENOENT;
+		goto probe_free_dev_mem;
+	}
+
+	vpfe_cfg = pdev->dev.platform_data;
+	vpfe_dev->cfg = vpfe_cfg;
+	if (NULL == vpfe_cfg->ccdc ||
+	    NULL == vpfe_cfg->card_name ||
+	    NULL == vpfe_cfg->sub_devs) {
+		v4l2_err(pdev->dev.driver, "null ptr in vpfe_cfg\n");
+		ret = -ENOENT;
+		goto probe_free_dev_mem;
+	}
+
+	/* enable vpss clocks */
+	ret = vpfe_enable_clock(vpfe_dev);
+	if (ret)
+		goto probe_free_dev_mem;
+
+	mutex_lock(&ccdc_lock);
+	/* Allocate memory for ccdc configuration */
+	ccdc_cfg = kmalloc(sizeof(struct ccdc_config), GFP_KERNEL);
+	if (NULL == ccdc_cfg) {
+		v4l2_err(pdev->dev.driver,
+			 "Memory allocation failed for ccdc_cfg\n");
+		goto probe_disable_clock;
+	}
+
+	strncpy(ccdc_cfg->name, vpfe_cfg->ccdc, 32);
+	/* Get VINT0 irq resource */
+	res1 = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res1) {
+		v4l2_err(pdev->dev.driver,
+			 "Unable to get interrupt for VINT0\n");
+		ret = -ENOENT;
+		goto probe_disable_clock;
+	}
+	vpfe_dev->ccdc_irq0 = res1->start;
+
+	/* Get VINT1 irq resource */
+	res1 = platform_get_resource(pdev,
+				IORESOURCE_IRQ, 1);
+	if (!res1) {
+		v4l2_err(pdev->dev.driver,
+			 "Unable to get interrupt for VINT1\n");
+		ret = -ENOENT;
+		goto probe_disable_clock;
+	}
+	vpfe_dev->ccdc_irq1 = res1->start;
+
+	/* Get address base of CCDC */
+	res1 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res1) {
+		v4l2_err(pdev->dev.driver,
+			"Unable to get register address map\n");
+		ret = -ENOENT;
+		goto probe_disable_clock;
+	}
+
+	ccdc_cfg->ccdc_addr_size = res1->end - res1->start + 1;
+	if (!request_mem_region(res1->start, ccdc_cfg->ccdc_addr_size,
+				pdev->dev.driver->name)) {
+		v4l2_err(pdev->dev.driver,
+			"Failed request_mem_region for ccdc base\n");
+		ret = -ENXIO;
+		goto probe_disable_clock;
+	}
+	ccdc_cfg->ccdc_addr = ioremap_nocache(res1->start,
+					     ccdc_cfg->ccdc_addr_size);
+	if (!ccdc_cfg->ccdc_addr) {
+		v4l2_err(pdev->dev.driver, "Unable to ioremap ccdc addr\n");
+		ret = -ENXIO;
+		goto probe_out_release_mem1;
+	}
+
+	ret = request_irq(vpfe_dev->ccdc_irq0, vpfe_isr, IRQF_DISABLED,
+			  "vpfe_capture0", vpfe_dev);
+
+	if (0 != ret) {
+		v4l2_err(pdev->dev.driver, "Unable to request interrupt\n");
+		goto probe_out_unmap1;
+	}
+
+	/* Allocate memory for video device */
+	vfd = video_device_alloc();
+	if (NULL == vfd) {
+		ret = -ENOMEM;
+		v4l2_err(pdev->dev.driver,
+			"Unable to alloc video device\n");
+		goto probe_out_release_irq;
+	}
+
+	/* Initialize field of video device */
+	vfd->release		= video_device_release;
+	vfd->fops		= &vpfe_fops;
+	vfd->ioctl_ops		= &vpfe_ioctl_ops;
+	vfd->minor		= -1;
+	vfd->tvnorms		= 0;
+	vfd->current_norm	= V4L2_STD_PAL;
+	vfd->v4l2_dev 		= &vpfe_dev->v4l2_dev;
+	snprintf(vfd->name, sizeof(vfd->name),
+		 "%s_V%d.%d.%d",
+		 CAPTURE_DRV_NAME,
+		 (VPFE_CAPTURE_VERSION_CODE >> 16) & 0xff,
+		 (VPFE_CAPTURE_VERSION_CODE >> 8) & 0xff,
+		 (VPFE_CAPTURE_VERSION_CODE) & 0xff);
+	/* Set video_dev to the video device */
+	vpfe_dev->video_dev	= vfd;
+
+	ret = v4l2_device_register(&pdev->dev, &vpfe_dev->v4l2_dev);
+	if (ret) {
+		v4l2_err(pdev->dev.driver,
+			"Unable to register v4l2 device.\n");
+		goto probe_out_video_release;
+	}
+	v4l2_info(&vpfe_dev->v4l2_dev, "v4l2 device registered\n");
+	spin_lock_init(&vpfe_dev->irqlock);
+	spin_lock_init(&vpfe_dev->dma_queue_lock);
+	mutex_init(&vpfe_dev->lock);
+
+	/* Initialize field of the device objects */
+	vpfe_dev->numbuffers = config_params.numbuffers;
+
+	/* Initialize prio member of device object */
+	v4l2_prio_init(&vpfe_dev->prio);
+	/* register video device */
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev,
+		"trying to register vpfe device.\n");
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev,
+		"video_dev=%x\n", (int)&vpfe_dev->video_dev);
+	vpfe_dev->fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	ret = video_register_device(vpfe_dev->video_dev,
+				    VFL_TYPE_GRABBER, -1);
+
+	if (ret) {
+		v4l2_err(pdev->dev.driver,
+			"Unable to register video device.\n");
+		goto probe_out_v4l2_unregister;
+	}
+
+	v4l2_info(&vpfe_dev->v4l2_dev, "video device registered\n");
+	/* set the driver data in platform device */
+	platform_set_drvdata(pdev, vpfe_dev);
+	/* set driver private data */
+	video_set_drvdata(vpfe_dev->video_dev, vpfe_dev);
+	i2c_adap = i2c_get_adapter(1);
+	vpfe_cfg = pdev->dev.platform_data;
+	num_subdevs = vpfe_cfg->num_subdevs;
+	vpfe_dev->sd = kmalloc(sizeof(struct v4l2_subdev *) * num_subdevs,
+				GFP_KERNEL);
+	if (NULL == vpfe_dev->sd) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"unable to allocate memory for subdevice pointers\n");
+		ret = -ENOMEM;
+		goto probe_out_video_unregister;
+	}
+
+	for (i = 0; i < num_subdevs; i++) {
+		struct v4l2_input *inps;
+
+		sdinfo = &vpfe_cfg->sub_devs[i];
+
+		/* Load up the subdevice */
+		vpfe_dev->sd[i] =
+			v4l2_i2c_new_subdev_board(&vpfe_dev->v4l2_dev,
+						  i2c_adap,
+						  sdinfo->name,
+						  &sdinfo->board_info,
+						  NULL);
+		if (vpfe_dev->sd[i]) {
+			v4l2_info(&vpfe_dev->v4l2_dev,
+				  "v4l2 sub device %s registered\n",
+				  sdinfo->name);
+			vpfe_dev->sd[i]->grp_id = sdinfo->grp_id;
+			/* update tvnorms from the sub devices */
+			for (j = 0; j < sdinfo->num_inputs; j++) {
+				inps = &sdinfo->inputs[j];
+				vfd->tvnorms |= inps->std;
+			}
+		} else {
+			v4l2_info(&vpfe_dev->v4l2_dev,
+				  "v4l2 sub device %s register fails\n",
+				  sdinfo->name);
+			goto probe_sd_out;
+		}
+	}
+
+	/* set first sub device as current one */
+	vpfe_dev->current_subdev = &vpfe_cfg->sub_devs[0];
+
+	/* We have at least one sub device to work with */
+	mutex_unlock(&ccdc_lock);
+	return 0;
+
+probe_sd_out:
+	kfree(vpfe_dev->sd);
+probe_out_video_unregister:
+	video_unregister_device(vpfe_dev->video_dev);
+probe_out_v4l2_unregister:
+	v4l2_device_unregister(&vpfe_dev->v4l2_dev);
+probe_out_video_release:
+	if (vpfe_dev->video_dev->minor == -1)
+		video_device_release(vpfe_dev->video_dev);
+probe_out_release_irq:
+	free_irq(vpfe_dev->ccdc_irq0, vpfe_dev);
+probe_out_unmap1:
+	iounmap(ccdc_cfg->ccdc_addr);
+probe_out_release_mem1:
+	release_mem_region(res1->start, res1->end - res1->start + 1);
+probe_disable_clock:
+	vpfe_disable_clock(vpfe_dev);
+	mutex_unlock(&ccdc_lock);
+	kfree(ccdc_cfg);
+probe_free_dev_mem:
+	kfree(vpfe_dev);
+	return ret;
+}
+
+/*
+ * vpfe_remove : It un-register device from V4L2 driver
+ */
+static int vpfe_remove(struct platform_device *pdev)
+{
+	struct vpfe_device *vpfe_dev = platform_get_drvdata(pdev);
+	struct resource *res;
+
+	v4l2_info(pdev->dev.driver, "vpfe_remove\n");
+
+	free_irq(vpfe_dev->ccdc_irq0, vpfe_dev);
+	kfree(vpfe_dev->sd);
+	v4l2_device_unregister(&vpfe_dev->v4l2_dev);
+	video_unregister_device(vpfe_dev->video_dev);
+	mutex_lock(&ccdc_lock);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(res->start, res->end - res->start + 1);
+	iounmap(ccdc_cfg->ccdc_addr);
+	mutex_unlock(&ccdc_lock);
+	vpfe_disable_clock(vpfe_dev);
+	kfree(vpfe_dev);
+	kfree(ccdc_cfg);
+	return 0;
+}
+
+static int
+vpfe_suspend(struct device *dev)
+{
+	/* add suspend code here later */
+	return -1;
+}
+
+static int
+vpfe_resume(struct device *dev)
+{
+	/* add resume code here later */
+	return -1;
+}
+
+static struct dev_pm_ops vpfe_dev_pm_ops = {
+	.suspend = vpfe_suspend,
+	.resume = vpfe_resume,
+};
+
+static struct platform_driver vpfe_driver = {
+	.driver = {
+		.name = CAPTURE_DRV_NAME,
+		.owner = THIS_MODULE,
+		.pm = &vpfe_dev_pm_ops,
+	},
+	.probe = vpfe_probe,
+	.remove = __devexit_p(vpfe_remove),
+};
+
+static __init int vpfe_init(void)
+{
+	printk(KERN_NOTICE "vpfe_init\n");
+	/* Register driver to the kernel */
+	return platform_driver_register(&vpfe_driver);
+}
+
+/*
+ * vpfe_cleanup : This function un-registers device driver
+ */
+static void vpfe_cleanup(void)
+{
+	platform_driver_unregister(&vpfe_driver);
+}
+
+module_init(vpfe_init);
+module_exit(vpfe_cleanup);
diff --git a/include/media/davinci/vpfe_capture.h b/include/media/davinci/vpfe_capture.h
new file mode 100644
index 000000000000..71d8982e13ff
--- /dev/null
+++ b/include/media/davinci/vpfe_capture.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _VPFE_CAPTURE_H
+#define _VPFE_CAPTURE_H
+
+#ifdef __KERNEL__
+
+/* Header files */
+#include <media/v4l2-dev.h>
+#include <linux/videodev2.h>
+#include <linux/clk.h>
+#include <linux/i2c.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-device.h>
+#include <media/videobuf-dma-contig.h>
+#include <media/davinci/vpfe_types.h>
+
+#define VPFE_CAPTURE_NUM_DECODERS        5
+
+/* Macros */
+#define VPFE_MAJOR_RELEASE              0
+#define VPFE_MINOR_RELEASE              0
+#define VPFE_BUILD                      1
+#define VPFE_CAPTURE_VERSION_CODE       ((VPFE_MAJOR_RELEASE << 16) | \
+					(VPFE_MINOR_RELEASE << 8)  | \
+					VPFE_BUILD)
+
+#define CAPTURE_DRV_NAME		"vpfe-capture"
+
+struct vpfe_pixel_format {
+	struct v4l2_fmtdesc fmtdesc;
+	/* bytes per pixel */
+	int bpp;
+};
+
+struct vpfe_std_info {
+	int active_pixels;
+	int active_lines;
+	/* current frame format */
+	int frame_format;
+};
+
+struct vpfe_route {
+	u32 input;
+	u32 output;
+};
+
+struct vpfe_subdev_info {
+	/* Sub device name */
+	char name[32];
+	/* Sub device group id */
+	int grp_id;
+	/* Number of inputs supported */
+	int num_inputs;
+	/* inputs available at the sub device */
+	struct v4l2_input *inputs;
+	/* Sub dev routing information for each input */
+	struct vpfe_route *routes;
+	/* check if sub dev supports routing */
+	int can_route;
+	/* ccdc bus/interface configuration */
+	struct vpfe_hw_if_param ccdc_if_params;
+	/* i2c subdevice board info */
+	struct i2c_board_info board_info;
+};
+
+struct vpfe_config {
+	/* Number of sub devices connected to vpfe */
+	int num_subdevs;
+	/* information about each subdev */
+	struct vpfe_subdev_info *sub_devs;
+	/* evm card info */
+	char *card_name;
+	/* ccdc name */
+	char *ccdc;
+	/* vpfe clock */
+	struct clk *vpssclk;
+	struct clk *slaveclk;
+};
+
+struct vpfe_device {
+	/* V4l2 specific parameters */
+	/* Identifies video device for this channel */
+	struct video_device *video_dev;
+	/* sub devices */
+	struct v4l2_subdev **sd;
+	/* vpfe cfg */
+	struct vpfe_config *cfg;
+	/* V4l2 device */
+	struct v4l2_device v4l2_dev;
+	/* parent device */
+	struct device *pdev;
+	/* Used to keep track of state of the priority */
+	struct v4l2_prio_state prio;
+	/* number of open instances of the channel */
+	u32 usrs;
+	/* Indicates id of the field which is being displayed */
+	u32 field_id;
+	/* flag to indicate whether decoder is initialized */
+	u8 initialized;
+	/* current interface type */
+	struct vpfe_hw_if_param vpfe_if_params;
+	/* ptr to currently selected sub device */
+	struct vpfe_subdev_info *current_subdev;
+	/* current input at the sub device */
+	int current_input;
+	/* Keeps track of the information about the standard */
+	struct vpfe_std_info std_info;
+	/* std index into std table */
+	int std_index;
+	/* CCDC IRQs used when CCDC/ISIF output to SDRAM */
+	unsigned int ccdc_irq0;
+	unsigned int ccdc_irq1;
+	/* number of buffers in fbuffers */
+	u32 numbuffers;
+	/* List of buffer pointers for storing frames */
+	u8 *fbuffers[VIDEO_MAX_FRAME];
+	/* Pointer pointing to current v4l2_buffer */
+	struct videobuf_buffer *cur_frm;
+	/* Pointer pointing to next v4l2_buffer */
+	struct videobuf_buffer *next_frm;
+	/*
+	 * This field keeps track of type of buffer exchange mechanism
+	 * user has selected
+	 */
+	enum v4l2_memory memory;
+	/* Used to store pixel format */
+	struct v4l2_format fmt;
+	/*
+	 * used when IMP is chained to store the crop window which
+	 * is different from the image window
+	 */
+	struct v4l2_rect crop;
+	/* Buffer queue used in video-buf */
+	struct videobuf_queue buffer_queue;
+	/* Queue of filled frames */
+	struct list_head dma_queue;
+	/* Used in video-buf */
+	spinlock_t irqlock;
+	/* IRQ lock for DMA queue */
+	spinlock_t dma_queue_lock;
+	/* lock used to access this structure */
+	struct mutex lock;
+	/* number of users performing IO */
+	u32 io_usrs;
+	/* Indicates whether streaming started */
+	u8 started;
+	/*
+	 * offset where second field starts from the starting of the
+	 * buffer for field seperated YCbCr formats
+	 */
+	u32 field_off;
+};
+
+/* File handle structure */
+struct vpfe_fh {
+	struct vpfe_device *vpfe_dev;
+	/* Indicates whether this file handle is doing IO */
+	u8 io_allowed;
+	/* Used to keep track priority of this instance */
+	enum v4l2_priority prio;
+};
+
+struct vpfe_config_params {
+	u8 min_numbuffers;
+	u8 numbuffers;
+	u32 min_bufsize;
+	u32 device_bufsize;
+};
+
+#endif				/* End of __KERNEL__ */
+/**
+ * VPFE_CMD_S_CCDC_RAW_PARAMS - EXPERIMENTAL IOCTL to set raw capture params
+ * This can be used to configure modules such as defect pixel correction,
+ * color space conversion, culling etc. This is an experimental ioctl that
+ * will change in future kernels. So use this ioctl with care !
+ * TODO: This is to be split into multiple ioctls and also explore the
+ * possibility of extending the v4l2 api to include this
+ **/
+#define VPFE_CMD_S_CCDC_RAW_PARAMS _IOW('V', BASE_VIDIOC_PRIVATE + 1, \
+					void *)
+#endif				/* _DAVINCI_VPFE_H */
diff --git a/include/media/davinci/vpfe_types.h b/include/media/davinci/vpfe_types.h
new file mode 100644
index 000000000000..76fb74bad08c
--- /dev/null
+++ b/include/media/davinci/vpfe_types.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option)any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef _VPFE_TYPES_H
+#define _VPFE_TYPES_H
+
+#ifdef __KERNEL__
+
+enum vpfe_pin_pol {
+	VPFE_PINPOL_POSITIVE,
+	VPFE_PINPOL_NEGATIVE
+};
+
+enum vpfe_hw_if_type {
+	/* BT656 - 8 bit */
+	VPFE_BT656,
+	/* BT1120 - 16 bit */
+	VPFE_BT1120,
+	/* Raw Bayer */
+	VPFE_RAW_BAYER,
+	/* YCbCr - 8 bit with external sync */
+	VPFE_YCBCR_SYNC_8,
+	/* YCbCr - 16 bit with external sync */
+	VPFE_YCBCR_SYNC_16,
+	/* BT656 - 10 bit */
+	VPFE_BT656_10BIT
+};
+
+/* interface description */
+struct vpfe_hw_if_param {
+	enum vpfe_hw_if_type if_type;
+	enum vpfe_pin_pol hdpol;
+	enum vpfe_pin_pol vdpol;
+};
+
+#endif
+#endif
-- 
cgit v1.2.3


From dd2ceb1a4028dc9644ed4df80cea9c05ca0b5f6d Mon Sep 17 00:00:00 2001
From: Muralidharan Karicheri <m-karicheri2@ti.com>
Date: Fri, 3 Jul 2009 05:23:07 -0300
Subject: V4L/DVB (12250): v4l: dm355 ccdc module for vpfe capture driver

Adds ccdc hw module for DM355 CCDC. This registers with the bridge
driver a set of hw_ops for configuring the CCDC for a specific
decoder device connected to vpfe.

Reviewed by: Hans Verkuil <hverkuil@xs4all.nl>
Reviewed by: Laurent Pinchart <laurent.pinchart@skynet.be>
Reviewed by: Mauro Carvalho Chehab <mchehab@infradead.org>

Signed-off-by: Muralidharan Karicheri <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/davinci/dm355_ccdc.c      | 978 ++++++++++++++++++++++++++
 drivers/media/video/davinci/dm355_ccdc_regs.h | 310 ++++++++
 include/media/davinci/dm355_ccdc.h            | 321 +++++++++
 3 files changed, 1609 insertions(+)
 create mode 100644 drivers/media/video/davinci/dm355_ccdc.c
 create mode 100644 drivers/media/video/davinci/dm355_ccdc_regs.h
 create mode 100644 include/media/davinci/dm355_ccdc.h

(limited to 'include')

diff --git a/drivers/media/video/davinci/dm355_ccdc.c b/drivers/media/video/davinci/dm355_ccdc.c
new file mode 100644
index 000000000000..4629cabe3f28
--- /dev/null
+++ b/drivers/media/video/davinci/dm355_ccdc.c
@@ -0,0 +1,978 @@
+/*
+ * Copyright (C) 2005-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * CCDC hardware module for DM355
+ * ------------------------------
+ *
+ * This module is for configuring DM355 CCD controller of VPFE to capture
+ * Raw yuv or Bayer RGB data from a decoder. CCDC has several modules
+ * such as Defect Pixel Correction, Color Space Conversion etc to
+ * pre-process the Bayer RGB data, before writing it to SDRAM. This
+ * module also allows application to configure individual
+ * module parameters through VPFE_CMD_S_CCDC_RAW_PARAMS IOCTL.
+ * To do so, application include dm355_ccdc.h and vpfe_capture.h header
+ * files. The setparams() API is called by vpfe_capture driver
+ * to configure module parameters
+ *
+ * TODO: 1) Raw bayer parameter settings and bayer capture
+ * 	 2) Split module parameter structure to module specific ioctl structs
+ *	 3) add support for lense shading correction
+ *	 4) investigate if enum used for user space type definition
+ * 	    to be replaced by #defines or integer
+ */
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+#include <linux/videodev2.h>
+#include <media/davinci/dm355_ccdc.h>
+#include <media/davinci/vpss.h>
+#include "dm355_ccdc_regs.h"
+#include "ccdc_hw_device.h"
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CCDC Driver for DM355");
+MODULE_AUTHOR("Texas Instruments");
+
+static struct device *dev;
+
+/* Object for CCDC raw mode */
+static struct ccdc_params_raw ccdc_hw_params_raw = {
+	.pix_fmt = CCDC_PIXFMT_RAW,
+	.frm_fmt = CCDC_FRMFMT_PROGRESSIVE,
+	.win = CCDC_WIN_VGA,
+	.fid_pol = VPFE_PINPOL_POSITIVE,
+	.vd_pol = VPFE_PINPOL_POSITIVE,
+	.hd_pol = VPFE_PINPOL_POSITIVE,
+	.gain = {
+		.r_ye = 256,
+		.gb_g = 256,
+		.gr_cy = 256,
+		.b_mg = 256
+	},
+	.config_params = {
+		.datasft = 2,
+		.data_sz = CCDC_DATA_10BITS,
+		.mfilt1 = CCDC_NO_MEDIAN_FILTER1,
+		.mfilt2 = CCDC_NO_MEDIAN_FILTER2,
+		.alaw = {
+			.gama_wd = 2,
+		},
+		.blk_clamp = {
+			.sample_pixel = 1,
+			.dc_sub = 25
+		},
+		.col_pat_field0 = {
+			.olop = CCDC_GREEN_BLUE,
+			.olep = CCDC_BLUE,
+			.elop = CCDC_RED,
+			.elep = CCDC_GREEN_RED
+		},
+		.col_pat_field1 = {
+			.olop = CCDC_GREEN_BLUE,
+			.olep = CCDC_BLUE,
+			.elop = CCDC_RED,
+			.elep = CCDC_GREEN_RED
+		},
+	},
+};
+
+
+/* Object for CCDC ycbcr mode */
+static struct ccdc_params_ycbcr ccdc_hw_params_ycbcr = {
+	.win = CCDC_WIN_PAL,
+	.pix_fmt = CCDC_PIXFMT_YCBCR_8BIT,
+	.frm_fmt = CCDC_FRMFMT_INTERLACED,
+	.fid_pol = VPFE_PINPOL_POSITIVE,
+	.vd_pol = VPFE_PINPOL_POSITIVE,
+	.hd_pol = VPFE_PINPOL_POSITIVE,
+	.bt656_enable = 1,
+	.pix_order = CCDC_PIXORDER_CBYCRY,
+	.buf_type = CCDC_BUFTYPE_FLD_INTERLEAVED
+};
+
+static enum vpfe_hw_if_type ccdc_if_type;
+static void *__iomem ccdc_base_addr;
+static int ccdc_addr_size;
+
+/* Raw Bayer formats */
+static u32 ccdc_raw_bayer_pix_formats[] =
+		{V4L2_PIX_FMT_SBGGR8, V4L2_PIX_FMT_SBGGR16};
+
+/* Raw YUV formats */
+static u32 ccdc_raw_yuv_pix_formats[] =
+		{V4L2_PIX_FMT_UYVY, V4L2_PIX_FMT_YUYV};
+
+/* register access routines */
+static inline u32 regr(u32 offset)
+{
+	return __raw_readl(ccdc_base_addr + offset);
+}
+
+static inline void regw(u32 val, u32 offset)
+{
+	__raw_writel(val, ccdc_base_addr + offset);
+}
+
+static void ccdc_set_ccdc_base(void *addr, int size)
+{
+	ccdc_base_addr = addr;
+	ccdc_addr_size = size;
+}
+
+static void ccdc_enable(int en)
+{
+	unsigned int temp;
+	temp = regr(SYNCEN);
+	temp &= (~CCDC_SYNCEN_VDHDEN_MASK);
+	temp |= (en & CCDC_SYNCEN_VDHDEN_MASK);
+	regw(temp, SYNCEN);
+}
+
+static void ccdc_enable_output_to_sdram(int en)
+{
+	unsigned int temp;
+	temp = regr(SYNCEN);
+	temp &= (~(CCDC_SYNCEN_WEN_MASK));
+	temp |= ((en << CCDC_SYNCEN_WEN_SHIFT) & CCDC_SYNCEN_WEN_MASK);
+	regw(temp, SYNCEN);
+}
+
+static void ccdc_config_gain_offset(void)
+{
+	/* configure gain */
+	regw(ccdc_hw_params_raw.gain.r_ye, RYEGAIN);
+	regw(ccdc_hw_params_raw.gain.gr_cy, GRCYGAIN);
+	regw(ccdc_hw_params_raw.gain.gb_g, GBGGAIN);
+	regw(ccdc_hw_params_raw.gain.b_mg, BMGGAIN);
+	/* configure offset */
+	regw(ccdc_hw_params_raw.ccdc_offset, OFFSET);
+}
+
+/*
+ * ccdc_restore_defaults()
+ * This function restore power on defaults in the ccdc registers
+ */
+static int ccdc_restore_defaults(void)
+{
+	int i;
+
+	dev_dbg(dev, "\nstarting ccdc_restore_defaults...");
+	/* set all registers to zero */
+	for (i = 0; i <= CCDC_REG_LAST; i += 4)
+		regw(0, i);
+
+	/* now override the values with power on defaults in registers */
+	regw(MODESET_DEFAULT, MODESET);
+	/* no culling support */
+	regw(CULH_DEFAULT, CULH);
+	regw(CULV_DEFAULT, CULV);
+	/* Set default Gain and Offset */
+	ccdc_hw_params_raw.gain.r_ye = GAIN_DEFAULT;
+	ccdc_hw_params_raw.gain.gb_g = GAIN_DEFAULT;
+	ccdc_hw_params_raw.gain.gr_cy = GAIN_DEFAULT;
+	ccdc_hw_params_raw.gain.b_mg = GAIN_DEFAULT;
+	ccdc_config_gain_offset();
+	regw(OUTCLIP_DEFAULT, OUTCLIP);
+	regw(LSCCFG2_DEFAULT, LSCCFG2);
+	/* select ccdc input */
+	if (vpss_select_ccdc_source(VPSS_CCDCIN)) {
+		dev_dbg(dev, "\ncouldn't select ccdc input source");
+		return -EFAULT;
+	}
+	/* select ccdc clock */
+	if (vpss_enable_clock(VPSS_CCDC_CLOCK, 1) < 0) {
+		dev_dbg(dev, "\ncouldn't enable ccdc clock");
+		return -EFAULT;
+	}
+	dev_dbg(dev, "\nEnd of ccdc_restore_defaults...");
+	return 0;
+}
+
+static int ccdc_open(struct device *device)
+{
+	dev = device;
+	return ccdc_restore_defaults();
+}
+
+static int ccdc_close(struct device *device)
+{
+	/* disable clock */
+	vpss_enable_clock(VPSS_CCDC_CLOCK, 0);
+	/* do nothing for now */
+	return 0;
+}
+/*
+ * ccdc_setwin()
+ * This function will configure the window size to
+ * be capture in CCDC reg.
+ */
+static void ccdc_setwin(struct v4l2_rect *image_win,
+			enum ccdc_frmfmt frm_fmt, int ppc)
+{
+	int horz_start, horz_nr_pixels;
+	int vert_start, vert_nr_lines;
+	int mid_img = 0;
+
+	dev_dbg(dev, "\nStarting ccdc_setwin...");
+
+	/*
+	 * ppc - per pixel count. indicates how many pixels per cell
+	 * output to SDRAM. example, for ycbcr, it is one y and one c, so 2.
+	 * raw capture this is 1
+	 */
+	horz_start = image_win->left << (ppc - 1);
+	horz_nr_pixels = ((image_win->width) << (ppc - 1)) - 1;
+
+	/* Writing the horizontal info into the registers */
+	regw(horz_start, SPH);
+	regw(horz_nr_pixels, NPH);
+	vert_start = image_win->top;
+
+	if (frm_fmt == CCDC_FRMFMT_INTERLACED) {
+		vert_nr_lines = (image_win->height >> 1) - 1;
+		vert_start >>= 1;
+		/* Since first line doesn't have any data */
+		vert_start += 1;
+		/* configure VDINT0 and VDINT1 */
+		regw(vert_start, VDINT0);
+	} else {
+		/* Since first line doesn't have any data */
+		vert_start += 1;
+		vert_nr_lines = image_win->height - 1;
+		/* configure VDINT0 and VDINT1 */
+		mid_img = vert_start + (image_win->height / 2);
+		regw(vert_start, VDINT0);
+		regw(mid_img, VDINT1);
+	}
+	regw(vert_start & CCDC_START_VER_ONE_MASK, SLV0);
+	regw(vert_start & CCDC_START_VER_TWO_MASK, SLV1);
+	regw(vert_nr_lines & CCDC_NUM_LINES_VER, NLV);
+	dev_dbg(dev, "\nEnd of ccdc_setwin...");
+}
+
+static int validate_ccdc_param(struct ccdc_config_params_raw *ccdcparam)
+{
+	if (ccdcparam->datasft < CCDC_DATA_NO_SHIFT ||
+	    ccdcparam->datasft > CCDC_DATA_SHIFT_6BIT) {
+		dev_dbg(dev, "Invalid value of data shift\n");
+		return -EINVAL;
+	}
+
+	if (ccdcparam->mfilt1 < CCDC_NO_MEDIAN_FILTER1 ||
+	    ccdcparam->mfilt1 > CCDC_MEDIAN_FILTER1) {
+		dev_dbg(dev, "Invalid value of median filter1\n");
+		return -EINVAL;
+	}
+
+	if (ccdcparam->mfilt2 < CCDC_NO_MEDIAN_FILTER2 ||
+	    ccdcparam->mfilt2 > CCDC_MEDIAN_FILTER2) {
+		dev_dbg(dev, "Invalid value of median filter2\n");
+		return -EINVAL;
+	}
+
+	if ((ccdcparam->med_filt_thres < 0) ||
+	   (ccdcparam->med_filt_thres > CCDC_MED_FILT_THRESH)) {
+		dev_dbg(dev, "Invalid value of median filter thresold\n");
+		return -EINVAL;
+	}
+
+	if (ccdcparam->data_sz < CCDC_DATA_16BITS ||
+	    ccdcparam->data_sz > CCDC_DATA_8BITS) {
+		dev_dbg(dev, "Invalid value of data size\n");
+		return -EINVAL;
+	}
+
+	if (ccdcparam->alaw.enable) {
+		if (ccdcparam->alaw.gama_wd < CCDC_GAMMA_BITS_13_4 ||
+		    ccdcparam->alaw.gama_wd > CCDC_GAMMA_BITS_09_0) {
+			dev_dbg(dev, "Invalid value of ALAW\n");
+			return -EINVAL;
+		}
+	}
+
+	if (ccdcparam->blk_clamp.b_clamp_enable) {
+		if (ccdcparam->blk_clamp.sample_pixel < CCDC_SAMPLE_1PIXELS ||
+		    ccdcparam->blk_clamp.sample_pixel > CCDC_SAMPLE_16PIXELS) {
+			dev_dbg(dev, "Invalid value of sample pixel\n");
+			return -EINVAL;
+		}
+		if (ccdcparam->blk_clamp.sample_ln < CCDC_SAMPLE_1LINES ||
+		    ccdcparam->blk_clamp.sample_ln > CCDC_SAMPLE_16LINES) {
+			dev_dbg(dev, "Invalid value of sample lines\n");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+/* Parameter operations */
+static int ccdc_set_params(void __user *params)
+{
+	struct ccdc_config_params_raw ccdc_raw_params;
+	int x;
+
+	/* only raw module parameters can be set through the IOCTL */
+	if (ccdc_if_type != VPFE_RAW_BAYER)
+		return -EINVAL;
+
+	x = copy_from_user(&ccdc_raw_params, params, sizeof(ccdc_raw_params));
+	if (x) {
+		dev_dbg(dev, "ccdc_set_params: error in copying ccdc"
+			"params, %d\n", x);
+		return -EFAULT;
+	}
+
+	if (!validate_ccdc_param(&ccdc_raw_params)) {
+		memcpy(&ccdc_hw_params_raw.config_params,
+			&ccdc_raw_params,
+			sizeof(ccdc_raw_params));
+		return 0;
+	}
+	return -EINVAL;
+}
+
+/* This function will configure CCDC for YCbCr video capture */
+static void ccdc_config_ycbcr(void)
+{
+	struct ccdc_params_ycbcr *params = &ccdc_hw_params_ycbcr;
+	u32 temp;
+
+	/* first set the CCDC power on defaults values in all registers */
+	dev_dbg(dev, "\nStarting ccdc_config_ycbcr...");
+	ccdc_restore_defaults();
+
+	/* configure pixel format & video frame format */
+	temp = (((params->pix_fmt & CCDC_INPUT_MODE_MASK) <<
+		CCDC_INPUT_MODE_SHIFT) |
+		((params->frm_fmt & CCDC_FRM_FMT_MASK) <<
+		CCDC_FRM_FMT_SHIFT));
+
+	/* setup BT.656 sync mode */
+	if (params->bt656_enable) {
+		regw(CCDC_REC656IF_BT656_EN, REC656IF);
+		/*
+		 * configure the FID, VD, HD pin polarity fld,hd pol positive,
+		 * vd negative, 8-bit pack mode
+		 */
+		temp |= CCDC_VD_POL_NEGATIVE;
+	} else {		/* y/c external sync mode */
+		temp |= (((params->fid_pol & CCDC_FID_POL_MASK) <<
+			CCDC_FID_POL_SHIFT) |
+			((params->hd_pol & CCDC_HD_POL_MASK) <<
+			CCDC_HD_POL_SHIFT) |
+			((params->vd_pol & CCDC_VD_POL_MASK) <<
+			CCDC_VD_POL_SHIFT));
+	}
+
+	/* pack the data to 8-bit */
+	temp |= CCDC_DATA_PACK_ENABLE;
+
+	regw(temp, MODESET);
+
+	/* configure video window */
+	ccdc_setwin(&params->win, params->frm_fmt, 2);
+
+	/* configure the order of y cb cr in SD-RAM */
+	temp = (params->pix_order << CCDC_Y8POS_SHIFT);
+	temp |= CCDC_LATCH_ON_VSYNC_DISABLE | CCDC_CCDCFG_FIDMD_NO_LATCH_VSYNC;
+	regw(temp, CCDCFG);
+
+	/*
+	 * configure the horizontal line offset. This is done by rounding up
+	 * width to a multiple of 16 pixels and multiply by two to account for
+	 * y:cb:cr 4:2:2 data
+	 */
+	regw(((params->win.width * 2 + 31) >> 5), HSIZE);
+
+	/* configure the memory line offset */
+	if (params->buf_type == CCDC_BUFTYPE_FLD_INTERLEAVED) {
+		/* two fields are interleaved in memory */
+		regw(CCDC_SDOFST_FIELD_INTERLEAVED, SDOFST);
+	}
+
+	dev_dbg(dev, "\nEnd of ccdc_config_ycbcr...\n");
+}
+
+/*
+ * ccdc_config_black_clamp()
+ * configure parameters for Optical Black Clamp
+ */
+static void ccdc_config_black_clamp(struct ccdc_black_clamp *bclamp)
+{
+	u32 val;
+
+	if (!bclamp->b_clamp_enable) {
+		/* configure DCSub */
+		regw(bclamp->dc_sub & CCDC_BLK_DC_SUB_MASK, DCSUB);
+		regw(0x0000, CLAMP);
+		return;
+	}
+	/* Enable the Black clamping, set sample lines and pixels */
+	val = (bclamp->start_pixel & CCDC_BLK_ST_PXL_MASK) |
+	      ((bclamp->sample_pixel & CCDC_BLK_SAMPLE_LN_MASK) <<
+		CCDC_BLK_SAMPLE_LN_SHIFT) | CCDC_BLK_CLAMP_ENABLE;
+	regw(val, CLAMP);
+
+	/* If Black clamping is enable then make dcsub 0 */
+	val = (bclamp->sample_ln & CCDC_NUM_LINE_CALC_MASK)
+			<< CCDC_NUM_LINE_CALC_SHIFT;
+	regw(val, DCSUB);
+}
+
+/*
+ * ccdc_config_black_compense()
+ * configure parameters for Black Compensation
+ */
+static void ccdc_config_black_compense(struct ccdc_black_compensation *bcomp)
+{
+	u32 val;
+
+	val = (bcomp->b & CCDC_BLK_COMP_MASK) |
+		((bcomp->gb & CCDC_BLK_COMP_MASK) <<
+		CCDC_BLK_COMP_GB_COMP_SHIFT);
+	regw(val, BLKCMP1);
+
+	val = ((bcomp->gr & CCDC_BLK_COMP_MASK) <<
+		CCDC_BLK_COMP_GR_COMP_SHIFT) |
+		((bcomp->r & CCDC_BLK_COMP_MASK) <<
+		CCDC_BLK_COMP_R_COMP_SHIFT);
+	regw(val, BLKCMP0);
+}
+
+/*
+ * ccdc_write_dfc_entry()
+ * write an entry in the dfc table.
+ */
+int ccdc_write_dfc_entry(int index, struct ccdc_vertical_dft *dfc)
+{
+/* TODO This is to be re-visited and adjusted */
+#define DFC_WRITE_WAIT_COUNT	1000
+	u32 val, count = DFC_WRITE_WAIT_COUNT;
+
+	regw(dfc->dft_corr_vert[index], DFCMEM0);
+	regw(dfc->dft_corr_horz[index], DFCMEM1);
+	regw(dfc->dft_corr_sub1[index], DFCMEM2);
+	regw(dfc->dft_corr_sub2[index], DFCMEM3);
+	regw(dfc->dft_corr_sub3[index], DFCMEM4);
+	/* set WR bit to write */
+	val = regr(DFCMEMCTL) | CCDC_DFCMEMCTL_DFCMWR_MASK;
+	regw(val, DFCMEMCTL);
+
+	/*
+	 * Assume, it is very short. If we get an error, we need to
+	 * adjust this value
+	 */
+	while (regr(DFCMEMCTL) & CCDC_DFCMEMCTL_DFCMWR_MASK)
+		count--;
+	/*
+	 * TODO We expect the count to be non-zero to be successful. Adjust
+	 * the count if write requires more time
+	 */
+
+	if (count) {
+		dev_err(dev, "defect table write timeout !!!\n");
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * ccdc_config_vdfc()
+ * configure parameters for Vertical Defect Correction
+ */
+static int ccdc_config_vdfc(struct ccdc_vertical_dft *dfc)
+{
+	u32 val;
+	int i;
+
+	/* Configure General Defect Correction. The table used is from IPIPE */
+	val = dfc->gen_dft_en & CCDC_DFCCTL_GDFCEN_MASK;
+
+	/* Configure Vertical Defect Correction if needed */
+	if (!dfc->ver_dft_en) {
+		/* Enable only General Defect Correction */
+		regw(val, DFCCTL);
+		return 0;
+	}
+
+	if (dfc->table_size > CCDC_DFT_TABLE_SIZE)
+		return -EINVAL;
+
+	val |= CCDC_DFCCTL_VDFC_DISABLE;
+	val |= (dfc->dft_corr_ctl.vdfcsl & CCDC_DFCCTL_VDFCSL_MASK) <<
+		CCDC_DFCCTL_VDFCSL_SHIFT;
+	val |= (dfc->dft_corr_ctl.vdfcuda & CCDC_DFCCTL_VDFCUDA_MASK) <<
+		CCDC_DFCCTL_VDFCUDA_SHIFT;
+	val |= (dfc->dft_corr_ctl.vdflsft & CCDC_DFCCTL_VDFLSFT_MASK) <<
+		CCDC_DFCCTL_VDFLSFT_SHIFT;
+	regw(val , DFCCTL);
+
+	/* clear address ptr to offset 0 */
+	val = CCDC_DFCMEMCTL_DFCMARST_MASK << CCDC_DFCMEMCTL_DFCMARST_SHIFT;
+
+	/* write defect table entries */
+	for (i = 0; i < dfc->table_size; i++) {
+		/* increment address for non zero index */
+		if (i != 0)
+			val = CCDC_DFCMEMCTL_INC_ADDR;
+		regw(val, DFCMEMCTL);
+		if (ccdc_write_dfc_entry(i, dfc) < 0)
+			return -EFAULT;
+	}
+
+	/* update saturation level and enable dfc */
+	regw(dfc->saturation_ctl & CCDC_VDC_DFCVSAT_MASK, DFCVSAT);
+	val = regr(DFCCTL) | (CCDC_DFCCTL_VDFCEN_MASK <<
+			CCDC_DFCCTL_VDFCEN_SHIFT);
+	regw(val, DFCCTL);
+	return 0;
+}
+
+/*
+ * ccdc_config_csc()
+ * configure parameters for color space conversion
+ * Each register CSCM0-7 has two values in S8Q5 format.
+ */
+static void ccdc_config_csc(struct ccdc_csc *csc)
+{
+	u32 val1, val2;
+	int i;
+
+	if (!csc->enable)
+		return;
+
+	/* Enable the CSC sub-module */
+	regw(CCDC_CSC_ENABLE, CSCCTL);
+
+	/* Converting the co-eff as per the format of the register */
+	for (i = 0; i < CCDC_CSC_COEFF_TABLE_SIZE; i++) {
+		if ((i % 2) == 0) {
+			/* CSCM - LSB */
+			val1 = (csc->coeff[i].integer &
+				CCDC_CSC_COEF_INTEG_MASK)
+				<< CCDC_CSC_COEF_INTEG_SHIFT;
+			/*
+			 * convert decimal part to binary. Use 2 decimal
+			 * precision, user values range from .00 - 0.99
+			 */
+			val1 |= (((csc->coeff[i].decimal &
+				CCDC_CSC_COEF_DECIMAL_MASK) *
+				CCDC_CSC_DEC_MAX) / 100);
+		} else {
+
+			/* CSCM - MSB */
+			val2 = (csc->coeff[i].integer &
+				CCDC_CSC_COEF_INTEG_MASK)
+				<< CCDC_CSC_COEF_INTEG_SHIFT;
+			val2 |= (((csc->coeff[i].decimal &
+				 CCDC_CSC_COEF_DECIMAL_MASK) *
+				 CCDC_CSC_DEC_MAX) / 100);
+			val2 <<= CCDC_CSCM_MSB_SHIFT;
+			val2 |= val1;
+			regw(val2, (CSCM0 + ((i - 1) << 1)));
+		}
+	}
+}
+
+/*
+ * ccdc_config_color_patterns()
+ * configure parameters for color patterns
+ */
+static void ccdc_config_color_patterns(struct ccdc_col_pat *pat0,
+				       struct ccdc_col_pat *pat1)
+{
+	u32 val;
+
+	val = (pat0->olop | (pat0->olep << 2) | (pat0->elop << 4) |
+		(pat0->elep << 6) | (pat1->olop << 8) | (pat1->olep << 10) |
+		(pat1->elop << 12) | (pat1->elep << 14));
+	regw(val, COLPTN);
+}
+
+/* This function will configure CCDC for Raw mode image capture */
+static int ccdc_config_raw(void)
+{
+	struct ccdc_params_raw *params = &ccdc_hw_params_raw;
+	struct ccdc_config_params_raw *config_params =
+		&ccdc_hw_params_raw.config_params;
+	unsigned int val;
+
+	dev_dbg(dev, "\nStarting ccdc_config_raw...");
+
+	/* restore power on defaults to register */
+	ccdc_restore_defaults();
+
+	/* CCDCFG register:
+	 * set CCD Not to swap input since input is RAW data
+	 * set FID detection function to Latch at V-Sync
+	 * set WENLOG - ccdc valid area to AND
+	 * set TRGSEL to WENBIT
+	 * set EXTRG to DISABLE
+	 * disable latching function on VSYNC - shadowed registers
+	 */
+	regw(CCDC_YCINSWP_RAW | CCDC_CCDCFG_FIDMD_LATCH_VSYNC |
+	     CCDC_CCDCFG_WENLOG_AND | CCDC_CCDCFG_TRGSEL_WEN |
+	     CCDC_CCDCFG_EXTRG_DISABLE | CCDC_LATCH_ON_VSYNC_DISABLE, CCDCFG);
+
+	/*
+	 * Set VDHD direction to input,  input type to raw input
+	 * normal data polarity, do not use external WEN
+	 */
+	val = (CCDC_VDHDOUT_INPUT | CCDC_RAW_IP_MODE | CCDC_DATAPOL_NORMAL |
+		CCDC_EXWEN_DISABLE);
+
+	/*
+	 * Configure the vertical sync polarity (MODESET.VDPOL), horizontal
+	 * sync polarity (MODESET.HDPOL), field id polarity (MODESET.FLDPOL),
+	 * frame format(progressive or interlace), & pixel format (Input mode)
+	 */
+	val |= (((params->vd_pol & CCDC_VD_POL_MASK) << CCDC_VD_POL_SHIFT) |
+		((params->hd_pol & CCDC_HD_POL_MASK) << CCDC_HD_POL_SHIFT) |
+		((params->fid_pol & CCDC_FID_POL_MASK) << CCDC_FID_POL_SHIFT) |
+		((params->frm_fmt & CCDC_FRM_FMT_MASK) << CCDC_FRM_FMT_SHIFT) |
+		((params->pix_fmt & CCDC_PIX_FMT_MASK) << CCDC_PIX_FMT_SHIFT));
+
+	/* set pack for alaw compression */
+	if ((config_params->data_sz == CCDC_DATA_8BITS) ||
+	     config_params->alaw.enable)
+		val |= CCDC_DATA_PACK_ENABLE;
+
+	/* Configure for LPF */
+	if (config_params->lpf_enable)
+		val |= (config_params->lpf_enable & CCDC_LPF_MASK) <<
+			CCDC_LPF_SHIFT;
+
+	/* Configure the data shift */
+	val |= (config_params->datasft & CCDC_DATASFT_MASK) <<
+		CCDC_DATASFT_SHIFT;
+	regw(val , MODESET);
+	dev_dbg(dev, "\nWriting 0x%x to MODESET...\n", val);
+
+	/* Configure the Median Filter threshold */
+	regw((config_params->med_filt_thres) & CCDC_MED_FILT_THRESH, MEDFILT);
+
+	/* Configure GAMMAWD register. defaur 11-2, and Mosaic cfa pattern */
+	val = CCDC_GAMMA_BITS_11_2 << CCDC_GAMMAWD_INPUT_SHIFT |
+		CCDC_CFA_MOSAIC;
+
+	/* Enable and configure aLaw register if needed */
+	if (config_params->alaw.enable) {
+		val |= (CCDC_ALAW_ENABLE |
+			((config_params->alaw.gama_wd &
+			CCDC_ALAW_GAMA_WD_MASK) <<
+			CCDC_GAMMAWD_INPUT_SHIFT));
+	}
+
+	/* Configure Median filter1 & filter2 */
+	val |= ((config_params->mfilt1 << CCDC_MFILT1_SHIFT) |
+		(config_params->mfilt2 << CCDC_MFILT2_SHIFT));
+
+	regw(val, GAMMAWD);
+	dev_dbg(dev, "\nWriting 0x%x to GAMMAWD...\n", val);
+
+	/* configure video window */
+	ccdc_setwin(&params->win, params->frm_fmt, 1);
+
+	/* Optical Clamp Averaging */
+	ccdc_config_black_clamp(&config_params->blk_clamp);
+
+	/* Black level compensation */
+	ccdc_config_black_compense(&config_params->blk_comp);
+
+	/* Vertical Defect Correction if needed */
+	if (ccdc_config_vdfc(&config_params->vertical_dft) < 0)
+		return -EFAULT;
+
+	/* color space conversion */
+	ccdc_config_csc(&config_params->csc);
+
+	/* color pattern */
+	ccdc_config_color_patterns(&config_params->col_pat_field0,
+				   &config_params->col_pat_field1);
+
+	/* Configure the Gain  & offset control */
+	ccdc_config_gain_offset();
+
+	dev_dbg(dev, "\nWriting %x to COLPTN...\n", val);
+
+	/* Configure DATAOFST  register */
+	val = (config_params->data_offset.horz_offset & CCDC_DATAOFST_MASK) <<
+		CCDC_DATAOFST_H_SHIFT;
+	val |= (config_params->data_offset.vert_offset & CCDC_DATAOFST_MASK) <<
+		CCDC_DATAOFST_V_SHIFT;
+	regw(val, DATAOFST);
+
+	/* configuring HSIZE register */
+	val = (params->horz_flip_enable & CCDC_HSIZE_FLIP_MASK) <<
+		CCDC_HSIZE_FLIP_SHIFT;
+
+	/* If pack 8 is enable then 1 pixel will take 1 byte */
+	if ((config_params->data_sz == CCDC_DATA_8BITS) ||
+	     config_params->alaw.enable) {
+		val |= (((params->win.width) + 31) >> 5) &
+			CCDC_HSIZE_VAL_MASK;
+
+		/* adjust to multiple of 32 */
+		dev_dbg(dev, "\nWriting 0x%x to HSIZE...\n",
+		       (((params->win.width) + 31) >> 5) &
+			CCDC_HSIZE_VAL_MASK);
+	} else {
+		/* else one pixel will take 2 byte */
+		val |= (((params->win.width * 2) + 31) >> 5) &
+			CCDC_HSIZE_VAL_MASK;
+
+		dev_dbg(dev, "\nWriting 0x%x to HSIZE...\n",
+		       (((params->win.width * 2) + 31) >> 5) &
+			CCDC_HSIZE_VAL_MASK);
+	}
+	regw(val, HSIZE);
+
+	/* Configure SDOFST register */
+	if (params->frm_fmt == CCDC_FRMFMT_INTERLACED) {
+		if (params->image_invert_enable) {
+			/* For interlace inverse mode */
+			regw(CCDC_SDOFST_INTERLACE_INVERSE, SDOFST);
+			dev_dbg(dev, "\nWriting %x to SDOFST...\n",
+				CCDC_SDOFST_INTERLACE_INVERSE);
+		} else {
+			/* For interlace non inverse mode */
+			regw(CCDC_SDOFST_INTERLACE_NORMAL, SDOFST);
+			dev_dbg(dev, "\nWriting %x to SDOFST...\n",
+				CCDC_SDOFST_INTERLACE_NORMAL);
+		}
+	} else if (params->frm_fmt == CCDC_FRMFMT_PROGRESSIVE) {
+		if (params->image_invert_enable) {
+			/* For progessive inverse mode */
+			regw(CCDC_SDOFST_PROGRESSIVE_INVERSE, SDOFST);
+			dev_dbg(dev, "\nWriting %x to SDOFST...\n",
+				CCDC_SDOFST_PROGRESSIVE_INVERSE);
+		} else {
+			/* For progessive non inverse mode */
+			regw(CCDC_SDOFST_PROGRESSIVE_NORMAL, SDOFST);
+			dev_dbg(dev, "\nWriting %x to SDOFST...\n",
+				CCDC_SDOFST_PROGRESSIVE_NORMAL);
+		}
+	}
+	dev_dbg(dev, "\nend of ccdc_config_raw...");
+	return 0;
+}
+
+static int ccdc_configure(void)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		return ccdc_config_raw();
+	else
+		ccdc_config_ycbcr();
+	return 0;
+}
+
+static int ccdc_set_buftype(enum ccdc_buftype buf_type)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_hw_params_raw.buf_type = buf_type;
+	else
+		ccdc_hw_params_ycbcr.buf_type = buf_type;
+	return 0;
+}
+static enum ccdc_buftype ccdc_get_buftype(void)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		return ccdc_hw_params_raw.buf_type;
+	return ccdc_hw_params_ycbcr.buf_type;
+}
+
+static int ccdc_enum_pix(u32 *pix, int i)
+{
+	int ret = -EINVAL;
+	if (ccdc_if_type == VPFE_RAW_BAYER) {
+		if (i < ARRAY_SIZE(ccdc_raw_bayer_pix_formats)) {
+			*pix = ccdc_raw_bayer_pix_formats[i];
+			ret = 0;
+		}
+	} else {
+		if (i < ARRAY_SIZE(ccdc_raw_yuv_pix_formats)) {
+			*pix = ccdc_raw_yuv_pix_formats[i];
+			ret = 0;
+		}
+	}
+	return ret;
+}
+
+static int ccdc_set_pixel_format(u32 pixfmt)
+{
+	struct ccdc_a_law *alaw =
+		&ccdc_hw_params_raw.config_params.alaw;
+
+	if (ccdc_if_type == VPFE_RAW_BAYER) {
+		ccdc_hw_params_raw.pix_fmt = CCDC_PIXFMT_RAW;
+		if (pixfmt == V4L2_PIX_FMT_SBGGR8)
+			alaw->enable = 1;
+		else if (pixfmt != V4L2_PIX_FMT_SBGGR16)
+			return -EINVAL;
+	} else {
+		if (pixfmt == V4L2_PIX_FMT_YUYV)
+			ccdc_hw_params_ycbcr.pix_order = CCDC_PIXORDER_YCBYCR;
+		else if (pixfmt == V4L2_PIX_FMT_UYVY)
+			ccdc_hw_params_ycbcr.pix_order = CCDC_PIXORDER_CBYCRY;
+		else
+			return -EINVAL;
+	}
+	return 0;
+}
+static u32 ccdc_get_pixel_format(void)
+{
+	struct ccdc_a_law *alaw =
+		&ccdc_hw_params_raw.config_params.alaw;
+	u32 pixfmt;
+
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		if (alaw->enable)
+			pixfmt = V4L2_PIX_FMT_SBGGR8;
+		else
+			pixfmt = V4L2_PIX_FMT_SBGGR16;
+	else {
+		if (ccdc_hw_params_ycbcr.pix_order == CCDC_PIXORDER_YCBYCR)
+			pixfmt = V4L2_PIX_FMT_YUYV;
+		else
+			pixfmt = V4L2_PIX_FMT_UYVY;
+	}
+	return pixfmt;
+}
+static int ccdc_set_image_window(struct v4l2_rect *win)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_hw_params_raw.win = *win;
+	else
+		ccdc_hw_params_ycbcr.win = *win;
+	return 0;
+}
+
+static void ccdc_get_image_window(struct v4l2_rect *win)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		*win = ccdc_hw_params_raw.win;
+	else
+		*win = ccdc_hw_params_ycbcr.win;
+}
+
+static unsigned int ccdc_get_line_length(void)
+{
+	struct ccdc_config_params_raw *config_params =
+		&ccdc_hw_params_raw.config_params;
+	unsigned int len;
+
+	if (ccdc_if_type == VPFE_RAW_BAYER) {
+		if ((config_params->alaw.enable) ||
+		    (config_params->data_sz == CCDC_DATA_8BITS))
+			len = ccdc_hw_params_raw.win.width;
+		else
+			len = ccdc_hw_params_raw.win.width * 2;
+	} else
+		len = ccdc_hw_params_ycbcr.win.width * 2;
+	return ALIGN(len, 32);
+}
+
+static int ccdc_set_frame_format(enum ccdc_frmfmt frm_fmt)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_hw_params_raw.frm_fmt = frm_fmt;
+	else
+		ccdc_hw_params_ycbcr.frm_fmt = frm_fmt;
+	return 0;
+}
+
+static enum ccdc_frmfmt ccdc_get_frame_format(void)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		return ccdc_hw_params_raw.frm_fmt;
+	else
+		return ccdc_hw_params_ycbcr.frm_fmt;
+}
+
+static int ccdc_getfid(void)
+{
+	return  (regr(MODESET) >> 15) & 1;
+}
+
+/* misc operations */
+static inline void ccdc_setfbaddr(unsigned long addr)
+{
+	regw((addr >> 21) & 0x007f, STADRH);
+	regw((addr >> 5) & 0x0ffff, STADRL);
+}
+
+static int ccdc_set_hw_if_params(struct vpfe_hw_if_param *params)
+{
+	ccdc_if_type = params->if_type;
+
+	switch (params->if_type) {
+	case VPFE_BT656:
+	case VPFE_YCBCR_SYNC_16:
+	case VPFE_YCBCR_SYNC_8:
+		ccdc_hw_params_ycbcr.vd_pol = params->vdpol;
+		ccdc_hw_params_ycbcr.hd_pol = params->hdpol;
+		break;
+	default:
+		/* TODO add support for raw bayer here */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct ccdc_hw_device ccdc_hw_dev = {
+	.name = "DM355 CCDC",
+	.owner = THIS_MODULE,
+	.hw_ops = {
+		.open = ccdc_open,
+		.close = ccdc_close,
+		.set_ccdc_base = ccdc_set_ccdc_base,
+		.enable = ccdc_enable,
+		.enable_out_to_sdram = ccdc_enable_output_to_sdram,
+		.set_hw_if_params = ccdc_set_hw_if_params,
+		.set_params = ccdc_set_params,
+		.configure = ccdc_configure,
+		.set_buftype = ccdc_set_buftype,
+		.get_buftype = ccdc_get_buftype,
+		.enum_pix = ccdc_enum_pix,
+		.set_pixel_format = ccdc_set_pixel_format,
+		.get_pixel_format = ccdc_get_pixel_format,
+		.set_frame_format = ccdc_set_frame_format,
+		.get_frame_format = ccdc_get_frame_format,
+		.set_image_window = ccdc_set_image_window,
+		.get_image_window = ccdc_get_image_window,
+		.get_line_length = ccdc_get_line_length,
+		.setfbaddr = ccdc_setfbaddr,
+		.getfid = ccdc_getfid,
+	},
+};
+
+static int dm355_ccdc_init(void)
+{
+	printk(KERN_NOTICE "dm355_ccdc_init\n");
+	if (vpfe_register_ccdc_device(&ccdc_hw_dev) < 0)
+		return -1;
+	printk(KERN_NOTICE "%s is registered with vpfe.\n",
+		ccdc_hw_dev.name);
+	return 0;
+}
+
+static void dm355_ccdc_exit(void)
+{
+	vpfe_unregister_ccdc_device(&ccdc_hw_dev);
+}
+
+module_init(dm355_ccdc_init);
+module_exit(dm355_ccdc_exit);
diff --git a/drivers/media/video/davinci/dm355_ccdc_regs.h b/drivers/media/video/davinci/dm355_ccdc_regs.h
new file mode 100644
index 000000000000..d6d2ef0533b5
--- /dev/null
+++ b/drivers/media/video/davinci/dm355_ccdc_regs.h
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2005-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _DM355_CCDC_REGS_H
+#define _DM355_CCDC_REGS_H
+
+/**************************************************************************\
+* Register OFFSET Definitions
+\**************************************************************************/
+#define SYNCEN				0x00
+#define MODESET				0x04
+#define HDWIDTH				0x08
+#define VDWIDTH				0x0c
+#define PPLN				0x10
+#define LPFR				0x14
+#define SPH				0x18
+#define NPH				0x1c
+#define SLV0				0x20
+#define SLV1				0x24
+#define NLV				0x28
+#define CULH				0x2c
+#define CULV				0x30
+#define HSIZE				0x34
+#define SDOFST				0x38
+#define STADRH				0x3c
+#define STADRL				0x40
+#define CLAMP				0x44
+#define DCSUB				0x48
+#define COLPTN				0x4c
+#define BLKCMP0				0x50
+#define BLKCMP1				0x54
+#define MEDFILT				0x58
+#define RYEGAIN				0x5c
+#define GRCYGAIN			0x60
+#define GBGGAIN				0x64
+#define BMGGAIN				0x68
+#define OFFSET				0x6c
+#define OUTCLIP				0x70
+#define VDINT0				0x74
+#define VDINT1				0x78
+#define RSV0				0x7c
+#define GAMMAWD				0x80
+#define REC656IF			0x84
+#define CCDCFG				0x88
+#define FMTCFG				0x8c
+#define FMTPLEN				0x90
+#define FMTSPH				0x94
+#define FMTLNH				0x98
+#define FMTSLV				0x9c
+#define FMTLNV				0xa0
+#define FMTRLEN				0xa4
+#define FMTHCNT				0xa8
+#define FMT_ADDR_PTR_B			0xac
+#define FMT_ADDR_PTR(i)			(FMT_ADDR_PTR_B + (i * 4))
+#define FMTPGM_VF0			0xcc
+#define FMTPGM_VF1			0xd0
+#define FMTPGM_AP0			0xd4
+#define FMTPGM_AP1			0xd8
+#define FMTPGM_AP2			0xdc
+#define FMTPGM_AP3                      0xe0
+#define FMTPGM_AP4                      0xe4
+#define FMTPGM_AP5                      0xe8
+#define FMTPGM_AP6                      0xec
+#define FMTPGM_AP7                      0xf0
+#define LSCCFG1                         0xf4
+#define LSCCFG2                         0xf8
+#define LSCH0                           0xfc
+#define LSCV0                           0x100
+#define LSCKH                           0x104
+#define LSCKV                           0x108
+#define LSCMEMCTL                       0x10c
+#define LSCMEMD                         0x110
+#define LSCMEMQ                         0x114
+#define DFCCTL                          0x118
+#define DFCVSAT                         0x11c
+#define DFCMEMCTL                       0x120
+#define DFCMEM0                         0x124
+#define DFCMEM1                         0x128
+#define DFCMEM2                         0x12c
+#define DFCMEM3                         0x130
+#define DFCMEM4                         0x134
+#define CSCCTL                          0x138
+#define CSCM0                           0x13c
+#define CSCM1                           0x140
+#define CSCM2                           0x144
+#define CSCM3                           0x148
+#define CSCM4                           0x14c
+#define CSCM5                           0x150
+#define CSCM6                           0x154
+#define CSCM7                           0x158
+#define DATAOFST			0x15c
+#define CCDC_REG_LAST			DATAOFST
+/**************************************************************
+*	Define for various register bit mask and shifts for CCDC
+*
+**************************************************************/
+#define CCDC_RAW_IP_MODE			0
+#define CCDC_VDHDOUT_INPUT			0
+#define CCDC_YCINSWP_RAW			(0 << 4)
+#define CCDC_EXWEN_DISABLE 			0
+#define CCDC_DATAPOL_NORMAL			0
+#define CCDC_CCDCFG_FIDMD_LATCH_VSYNC		0
+#define CCDC_CCDCFG_FIDMD_NO_LATCH_VSYNC	(1 << 6)
+#define CCDC_CCDCFG_WENLOG_AND			0
+#define CCDC_CCDCFG_TRGSEL_WEN			0
+#define CCDC_CCDCFG_EXTRG_DISABLE		0
+#define CCDC_CFA_MOSAIC				0
+#define CCDC_Y8POS_SHIFT			11
+
+#define CCDC_VDC_DFCVSAT_MASK			0x3fff
+#define CCDC_DATAOFST_MASK			0x0ff
+#define CCDC_DATAOFST_H_SHIFT			0
+#define CCDC_DATAOFST_V_SHIFT			8
+#define CCDC_GAMMAWD_CFA_MASK			1
+#define CCDC_GAMMAWD_CFA_SHIFT			5
+#define CCDC_GAMMAWD_INPUT_SHIFT		2
+#define CCDC_FID_POL_MASK			1
+#define CCDC_FID_POL_SHIFT			4
+#define CCDC_HD_POL_MASK			1
+#define CCDC_HD_POL_SHIFT			3
+#define CCDC_VD_POL_MASK			1
+#define CCDC_VD_POL_SHIFT			2
+#define CCDC_VD_POL_NEGATIVE			(1 << 2)
+#define CCDC_FRM_FMT_MASK			1
+#define CCDC_FRM_FMT_SHIFT			7
+#define CCDC_DATA_SZ_MASK			7
+#define CCDC_DATA_SZ_SHIFT			8
+#define CCDC_VDHDOUT_MASK			1
+#define CCDC_VDHDOUT_SHIFT			0
+#define CCDC_EXWEN_MASK				1
+#define CCDC_EXWEN_SHIFT			5
+#define CCDC_INPUT_MODE_MASK			3
+#define CCDC_INPUT_MODE_SHIFT			12
+#define CCDC_PIX_FMT_MASK			3
+#define CCDC_PIX_FMT_SHIFT			12
+#define CCDC_DATAPOL_MASK			1
+#define CCDC_DATAPOL_SHIFT			6
+#define CCDC_WEN_ENABLE				(1 << 1)
+#define CCDC_VDHDEN_ENABLE			(1 << 16)
+#define CCDC_LPF_ENABLE				(1 << 14)
+#define CCDC_ALAW_ENABLE			1
+#define CCDC_ALAW_GAMA_WD_MASK			7
+#define CCDC_REC656IF_BT656_EN			3
+
+#define CCDC_FMTCFG_FMTMODE_MASK 		3
+#define CCDC_FMTCFG_FMTMODE_SHIFT		1
+#define CCDC_FMTCFG_LNUM_MASK			3
+#define CCDC_FMTCFG_LNUM_SHIFT			4
+#define CCDC_FMTCFG_ADDRINC_MASK		7
+#define CCDC_FMTCFG_ADDRINC_SHIFT		8
+
+#define CCDC_CCDCFG_FIDMD_SHIFT			6
+#define	CCDC_CCDCFG_WENLOG_SHIFT		8
+#define CCDC_CCDCFG_TRGSEL_SHIFT		9
+#define CCDC_CCDCFG_EXTRG_SHIFT			10
+#define CCDC_CCDCFG_MSBINVI_SHIFT		13
+
+#define CCDC_HSIZE_FLIP_SHIFT			12
+#define CCDC_HSIZE_FLIP_MASK			1
+#define CCDC_HSIZE_VAL_MASK			0xFFF
+#define CCDC_SDOFST_FIELD_INTERLEAVED		0x249
+#define CCDC_SDOFST_INTERLACE_INVERSE		0x4B6D
+#define CCDC_SDOFST_INTERLACE_NORMAL		0x0B6D
+#define CCDC_SDOFST_PROGRESSIVE_INVERSE		0x4000
+#define CCDC_SDOFST_PROGRESSIVE_NORMAL		0
+#define CCDC_START_PX_HOR_MASK			0x7FFF
+#define CCDC_NUM_PX_HOR_MASK			0x7FFF
+#define CCDC_START_VER_ONE_MASK			0x7FFF
+#define CCDC_START_VER_TWO_MASK			0x7FFF
+#define CCDC_NUM_LINES_VER			0x7FFF
+
+#define CCDC_BLK_CLAMP_ENABLE			(1 << 15)
+#define CCDC_BLK_SGAIN_MASK			0x1F
+#define CCDC_BLK_ST_PXL_MASK			0x1FFF
+#define CCDC_BLK_SAMPLE_LN_MASK			3
+#define CCDC_BLK_SAMPLE_LN_SHIFT		13
+
+#define CCDC_NUM_LINE_CALC_MASK			3
+#define CCDC_NUM_LINE_CALC_SHIFT		14
+
+#define CCDC_BLK_DC_SUB_MASK			0x3FFF
+#define CCDC_BLK_COMP_MASK			0xFF
+#define CCDC_BLK_COMP_GB_COMP_SHIFT		8
+#define CCDC_BLK_COMP_GR_COMP_SHIFT		0
+#define CCDC_BLK_COMP_R_COMP_SHIFT		8
+#define CCDC_LATCH_ON_VSYNC_DISABLE		(1 << 15)
+#define CCDC_LATCH_ON_VSYNC_ENABLE		(0 << 15)
+#define CCDC_FPC_ENABLE				(1 << 15)
+#define CCDC_FPC_FPC_NUM_MASK 			0x7FFF
+#define CCDC_DATA_PACK_ENABLE			(1 << 11)
+#define CCDC_FMT_HORZ_FMTLNH_MASK		0x1FFF
+#define CCDC_FMT_HORZ_FMTSPH_MASK		0x1FFF
+#define CCDC_FMT_HORZ_FMTSPH_SHIFT		16
+#define CCDC_FMT_VERT_FMTLNV_MASK		0x1FFF
+#define CCDC_FMT_VERT_FMTSLV_MASK		0x1FFF
+#define CCDC_FMT_VERT_FMTSLV_SHIFT		16
+#define CCDC_VP_OUT_VERT_NUM_MASK		0x3FFF
+#define CCDC_VP_OUT_VERT_NUM_SHIFT		17
+#define CCDC_VP_OUT_HORZ_NUM_MASK		0x1FFF
+#define CCDC_VP_OUT_HORZ_NUM_SHIFT		4
+#define CCDC_VP_OUT_HORZ_ST_MASK		0xF
+
+#define CCDC_CSC_COEF_INTEG_MASK		7
+#define CCDC_CSC_COEF_DECIMAL_MASK		0x1f
+#define CCDC_CSC_COEF_INTEG_SHIFT		5
+#define CCDC_CSCM_MSB_SHIFT			8
+#define CCDC_CSC_ENABLE				1
+#define CCDC_CSC_DEC_MAX			32
+
+#define CCDC_MFILT1_SHIFT			10
+#define CCDC_MFILT2_SHIFT			8
+#define CCDC_MED_FILT_THRESH			0x3FFF
+#define CCDC_LPF_MASK				1
+#define CCDC_LPF_SHIFT				14
+#define CCDC_OFFSET_MASK			0x3FF
+#define CCDC_DATASFT_MASK			7
+#define CCDC_DATASFT_SHIFT			8
+
+#define CCDC_DF_ENABLE				1
+
+#define CCDC_FMTPLEN_P0_MASK			0xF
+#define CCDC_FMTPLEN_P1_MASK			0xF
+#define CCDC_FMTPLEN_P2_MASK			7
+#define CCDC_FMTPLEN_P3_MASK			7
+#define CCDC_FMTPLEN_P0_SHIFT			0
+#define CCDC_FMTPLEN_P1_SHIFT			4
+#define CCDC_FMTPLEN_P2_SHIFT			8
+#define CCDC_FMTPLEN_P3_SHIFT			12
+
+#define CCDC_FMTSPH_MASK			0x1FFF
+#define CCDC_FMTLNH_MASK			0x1FFF
+#define CCDC_FMTSLV_MASK			0x1FFF
+#define CCDC_FMTLNV_MASK			0x7FFF
+#define CCDC_FMTRLEN_MASK			0x1FFF
+#define CCDC_FMTHCNT_MASK			0x1FFF
+
+#define CCDC_ADP_INIT_MASK			0x1FFF
+#define CCDC_ADP_LINE_SHIFT			13
+#define CCDC_ADP_LINE_MASK			3
+#define CCDC_FMTPGN_APTR_MASK			7
+
+#define CCDC_DFCCTL_GDFCEN_MASK			1
+#define CCDC_DFCCTL_VDFCEN_MASK			1
+#define CCDC_DFCCTL_VDFC_DISABLE		(0 << 4)
+#define CCDC_DFCCTL_VDFCEN_SHIFT		4
+#define CCDC_DFCCTL_VDFCSL_MASK			3
+#define CCDC_DFCCTL_VDFCSL_SHIFT		5
+#define CCDC_DFCCTL_VDFCUDA_MASK		1
+#define CCDC_DFCCTL_VDFCUDA_SHIFT		7
+#define CCDC_DFCCTL_VDFLSFT_MASK		3
+#define CCDC_DFCCTL_VDFLSFT_SHIFT		8
+#define CCDC_DFCMEMCTL_DFCMARST_MASK		1
+#define CCDC_DFCMEMCTL_DFCMARST_SHIFT		2
+#define CCDC_DFCMEMCTL_DFCMWR_MASK		1
+#define CCDC_DFCMEMCTL_DFCMWR_SHIFT		0
+#define CCDC_DFCMEMCTL_INC_ADDR			(0 << 2)
+
+#define CCDC_LSCCFG_GFTSF_MASK			7
+#define CCDC_LSCCFG_GFTSF_SHIFT			1
+#define CCDC_LSCCFG_GFTINV_MASK			0xf
+#define CCDC_LSCCFG_GFTINV_SHIFT		4
+#define CCDC_LSC_GFTABLE_SEL_MASK		3
+#define CCDC_LSC_GFTABLE_EPEL_SHIFT		8
+#define CCDC_LSC_GFTABLE_OPEL_SHIFT		10
+#define CCDC_LSC_GFTABLE_EPOL_SHIFT		12
+#define CCDC_LSC_GFTABLE_OPOL_SHIFT		14
+#define CCDC_LSC_GFMODE_MASK			3
+#define CCDC_LSC_GFMODE_SHIFT			4
+#define CCDC_LSC_DISABLE			0
+#define CCDC_LSC_ENABLE				1
+#define CCDC_LSC_TABLE1_SLC			0
+#define CCDC_LSC_TABLE2_SLC			1
+#define CCDC_LSC_TABLE3_SLC			2
+#define CCDC_LSC_MEMADDR_RESET			(1 << 2)
+#define CCDC_LSC_MEMADDR_INCR			(0 << 2)
+#define CCDC_LSC_FRAC_MASK_T1			0xFF
+#define CCDC_LSC_INT_MASK			3
+#define CCDC_LSC_FRAC_MASK			0x3FFF
+#define CCDC_LSC_CENTRE_MASK			0x3FFF
+#define CCDC_LSC_COEF_MASK			0xff
+#define CCDC_LSC_COEFL_SHIFT			0
+#define CCDC_LSC_COEFU_SHIFT			8
+#define CCDC_GAIN_MASK				0x7FF
+#define CCDC_SYNCEN_VDHDEN_MASK			(1 << 0)
+#define CCDC_SYNCEN_WEN_MASK			(1 << 1)
+#define CCDC_SYNCEN_WEN_SHIFT			1
+
+/* Power on Defaults in hardware */
+#define MODESET_DEFAULT				0x200
+#define CULH_DEFAULT				0xFFFF
+#define CULV_DEFAULT				0xFF
+#define GAIN_DEFAULT				256
+#define OUTCLIP_DEFAULT				0x3FFF
+#define LSCCFG2_DEFAULT				0xE
+
+#endif
diff --git a/include/media/davinci/dm355_ccdc.h b/include/media/davinci/dm355_ccdc.h
new file mode 100644
index 000000000000..df8a7b107477
--- /dev/null
+++ b/include/media/davinci/dm355_ccdc.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2005-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _DM355_CCDC_H
+#define _DM355_CCDC_H
+#include <media/davinci/ccdc_types.h>
+#include <media/davinci/vpfe_types.h>
+
+/* enum for No of pixel per line to be avg. in Black Clamping */
+enum ccdc_sample_length {
+	CCDC_SAMPLE_1PIXELS,
+	CCDC_SAMPLE_2PIXELS,
+	CCDC_SAMPLE_4PIXELS,
+	CCDC_SAMPLE_8PIXELS,
+	CCDC_SAMPLE_16PIXELS
+};
+
+/* enum for No of lines in Black Clamping */
+enum ccdc_sample_line {
+	CCDC_SAMPLE_1LINES,
+	CCDC_SAMPLE_2LINES,
+	CCDC_SAMPLE_4LINES,
+	CCDC_SAMPLE_8LINES,
+	CCDC_SAMPLE_16LINES
+};
+
+/* enum for Alaw gama width */
+enum ccdc_gamma_width {
+	CCDC_GAMMA_BITS_13_4,
+	CCDC_GAMMA_BITS_12_3,
+	CCDC_GAMMA_BITS_11_2,
+	CCDC_GAMMA_BITS_10_1,
+	CCDC_GAMMA_BITS_09_0
+};
+
+enum ccdc_colpats {
+	CCDC_RED,
+	CCDC_GREEN_RED,
+	CCDC_GREEN_BLUE,
+	CCDC_BLUE
+};
+
+struct ccdc_col_pat {
+	enum ccdc_colpats olop;
+	enum ccdc_colpats olep;
+	enum ccdc_colpats elop;
+	enum ccdc_colpats elep;
+};
+
+enum ccdc_datasft {
+	CCDC_DATA_NO_SHIFT,
+	CCDC_DATA_SHIFT_1BIT,
+	CCDC_DATA_SHIFT_2BIT,
+	CCDC_DATA_SHIFT_3BIT,
+	CCDC_DATA_SHIFT_4BIT,
+	CCDC_DATA_SHIFT_5BIT,
+	CCDC_DATA_SHIFT_6BIT
+};
+
+enum ccdc_data_size {
+	CCDC_DATA_16BITS,
+	CCDC_DATA_15BITS,
+	CCDC_DATA_14BITS,
+	CCDC_DATA_13BITS,
+	CCDC_DATA_12BITS,
+	CCDC_DATA_11BITS,
+	CCDC_DATA_10BITS,
+	CCDC_DATA_8BITS
+};
+enum ccdc_mfilt1 {
+	CCDC_NO_MEDIAN_FILTER1,
+	CCDC_AVERAGE_FILTER1,
+	CCDC_MEDIAN_FILTER1
+};
+
+enum ccdc_mfilt2 {
+	CCDC_NO_MEDIAN_FILTER2,
+	CCDC_AVERAGE_FILTER2,
+	CCDC_MEDIAN_FILTER2
+};
+
+/* structure for ALaw */
+struct ccdc_a_law {
+	/* Enable/disable A-Law */
+	unsigned char enable;
+	/* Gama Width Input */
+	enum ccdc_gamma_width gama_wd;
+};
+
+/* structure for Black Clamping */
+struct ccdc_black_clamp {
+	/* only if bClampEnable is TRUE */
+	unsigned char b_clamp_enable;
+	/* only if bClampEnable is TRUE */
+	enum ccdc_sample_length sample_pixel;
+	/* only if bClampEnable is TRUE */
+	enum ccdc_sample_line sample_ln;
+	/* only if bClampEnable is TRUE */
+	unsigned short start_pixel;
+	/* only if bClampEnable is FALSE */
+	unsigned short sgain;
+	unsigned short dc_sub;
+};
+
+/* structure for Black Level Compensation */
+struct ccdc_black_compensation {
+	/* Constant value to subtract from Red component */
+	unsigned char r;
+	/* Constant value to subtract from Gr component */
+	unsigned char gr;
+	/* Constant value to subtract from Blue component */
+	unsigned char b;
+	/* Constant value to subtract from Gb component */
+	unsigned char gb;
+};
+
+struct ccdc_float {
+	int integer;
+	unsigned int decimal;
+};
+
+#define CCDC_CSC_COEFF_TABLE_SIZE	16
+/* structure for color space converter */
+struct ccdc_csc {
+	unsigned char enable;
+	/*
+	 * S8Q5. Use 2 decimal precision, user values range from -3.00 to 3.99.
+	 * example - to use 1.03, set integer part as 1, and decimal part as 3
+	 * to use -1.03, set integer part as -1 and decimal part as 3
+	 */
+	struct ccdc_float coeff[CCDC_CSC_COEFF_TABLE_SIZE];
+};
+
+/* Structures for Vertical Defect Correction*/
+enum ccdc_vdf_csl {
+	CCDC_VDF_NORMAL,
+	CCDC_VDF_HORZ_INTERPOL_SAT,
+	CCDC_VDF_HORZ_INTERPOL
+};
+
+enum ccdc_vdf_cuda {
+	CCDC_VDF_WHOLE_LINE_CORRECT,
+	CCDC_VDF_UPPER_DISABLE
+};
+
+enum ccdc_dfc_mwr {
+	CCDC_DFC_MWR_WRITE_COMPLETE,
+	CCDC_DFC_WRITE_REG
+};
+
+enum ccdc_dfc_mrd {
+	CCDC_DFC_READ_COMPLETE,
+	CCDC_DFC_READ_REG
+};
+
+enum ccdc_dfc_ma_rst {
+	CCDC_DFC_INCR_ADDR,
+	CCDC_DFC_CLR_ADDR
+};
+
+enum ccdc_dfc_mclr {
+	CCDC_DFC_CLEAR_COMPLETE,
+	CCDC_DFC_CLEAR
+};
+
+struct ccdc_dft_corr_ctl {
+	enum ccdc_vdf_csl vdfcsl;
+	enum ccdc_vdf_cuda vdfcuda;
+	unsigned int vdflsft;
+};
+
+struct ccdc_dft_corr_mem_ctl {
+	enum ccdc_dfc_mwr dfcmwr;
+	enum ccdc_dfc_mrd dfcmrd;
+	enum ccdc_dfc_ma_rst dfcmarst;
+	enum ccdc_dfc_mclr dfcmclr;
+};
+
+#define CCDC_DFT_TABLE_SIZE	16
+/*
+ * Main Structure for vertical defect correction. Vertical defect
+ * correction can correct upto 16 defects if defects less than 16
+ * then pad the rest with 0
+ */
+struct ccdc_vertical_dft {
+	unsigned char ver_dft_en;
+	unsigned char gen_dft_en;
+	unsigned int saturation_ctl;
+	struct ccdc_dft_corr_ctl dft_corr_ctl;
+	struct ccdc_dft_corr_mem_ctl dft_corr_mem_ctl;
+	int table_size;
+	unsigned int dft_corr_horz[CCDC_DFT_TABLE_SIZE];
+	unsigned int dft_corr_vert[CCDC_DFT_TABLE_SIZE];
+	unsigned int dft_corr_sub1[CCDC_DFT_TABLE_SIZE];
+	unsigned int dft_corr_sub2[CCDC_DFT_TABLE_SIZE];
+	unsigned int dft_corr_sub3[CCDC_DFT_TABLE_SIZE];
+};
+
+struct ccdc_data_offset {
+	unsigned char horz_offset;
+	unsigned char vert_offset;
+};
+
+/*
+ * Structure for CCDC configuration parameters for raw capture mode passed
+ * by application
+ */
+struct ccdc_config_params_raw {
+	/* data shift to be applied before storing */
+	enum ccdc_datasft datasft;
+	/* data size value from 8 to 16 bits */
+	enum ccdc_data_size data_sz;
+	/* median filter for sdram */
+	enum ccdc_mfilt1 mfilt1;
+	enum ccdc_mfilt2 mfilt2;
+	/* low pass filter enable/disable */
+	unsigned char lpf_enable;
+	/* Threshold of median filter */
+	int med_filt_thres;
+	/*
+	 * horz and vertical data offset. Appliable for defect correction
+	 * and lsc
+	 */
+	struct ccdc_data_offset data_offset;
+	/* Structure for Optional A-Law */
+	struct ccdc_a_law alaw;
+	/* Structure for Optical Black Clamp */
+	struct ccdc_black_clamp blk_clamp;
+	/* Structure for Black Compensation */
+	struct ccdc_black_compensation blk_comp;
+	/* struture for vertical Defect Correction Module Configuration */
+	struct ccdc_vertical_dft vertical_dft;
+	/* structure for color space converter Module Configuration */
+	struct ccdc_csc csc;
+	/* color patters for bayer capture */
+	struct ccdc_col_pat col_pat_field0;
+	struct ccdc_col_pat col_pat_field1;
+};
+
+#ifdef __KERNEL__
+#include <linux/io.h>
+
+#define CCDC_WIN_PAL	{0, 0, 720, 576}
+#define CCDC_WIN_VGA	{0, 0, 640, 480}
+
+struct ccdc_params_ycbcr {
+	/* pixel format */
+	enum ccdc_pixfmt pix_fmt;
+	/* progressive or interlaced frame */
+	enum ccdc_frmfmt frm_fmt;
+	/* video window */
+	struct v4l2_rect win;
+	/* field id polarity */
+	enum vpfe_pin_pol fid_pol;
+	/* vertical sync polarity */
+	enum vpfe_pin_pol vd_pol;
+	/* horizontal sync polarity */
+	enum vpfe_pin_pol hd_pol;
+	/* enable BT.656 embedded sync mode */
+	int bt656_enable;
+	/* cb:y:cr:y or y:cb:y:cr in memory */
+	enum ccdc_pixorder pix_order;
+	/* interleaved or separated fields  */
+	enum ccdc_buftype buf_type;
+};
+
+/* Gain applied to Raw Bayer data */
+struct ccdc_gain {
+	unsigned short r_ye;
+	unsigned short gr_cy;
+	unsigned short gb_g;
+	unsigned short b_mg;
+};
+
+/* Structure for CCDC configuration parameters for raw capture mode */
+struct ccdc_params_raw {
+	/* pixel format */
+	enum ccdc_pixfmt pix_fmt;
+	/* progressive or interlaced frame */
+	enum ccdc_frmfmt frm_fmt;
+	/* video window */
+	struct v4l2_rect win;
+	/* field id polarity */
+	enum vpfe_pin_pol fid_pol;
+	/* vertical sync polarity */
+	enum vpfe_pin_pol vd_pol;
+	/* horizontal sync polarity */
+	enum vpfe_pin_pol hd_pol;
+	/* interleaved or separated fields */
+	enum ccdc_buftype buf_type;
+	/* Gain values */
+	struct ccdc_gain gain;
+	/* offset */
+	unsigned int ccdc_offset;
+	/* horizontal flip enable */
+	unsigned char horz_flip_enable;
+	/*
+	 * enable to store the image in inverse order in memory
+	 * (bottom to top)
+	 */
+	unsigned char image_invert_enable;
+	/* Configurable part of raw data */
+	struct ccdc_config_params_raw config_params;
+};
+
+#endif
+#endif				/* DM355_CCDC_H */
-- 
cgit v1.2.3


From 5f15fbb68fd774780a7fa8fe25a88e4c9e518109 Mon Sep 17 00:00:00 2001
From: Muralidharan Karicheri <m-karicheri2@ti.com>
Date: Fri, 19 Jun 2009 09:18:14 -0300
Subject: V4L/DVB (12251): v4l: dm644x ccdc module for vpfe capture driver

This is the hw module for DM644x CCDC. This registers with the
vpfe capture driver and provides a set of hw_ops to configure
CCDC for a specific decoder device connected to the VPFE.

Reviewed by: Hans Verkuil <hverkuil@xs4all.nl>
Reviewed by: Laurent Pinchart <laurent.pinchart@skynet.be>

Signed-off-by: Muralidharan Karicheri <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/davinci/dm644x_ccdc.c      | 878 +++++++++++++++++++++++++
 drivers/media/video/davinci/dm644x_ccdc_regs.h | 145 ++++
 include/media/davinci/dm644x_ccdc.h            | 184 ++++++
 3 files changed, 1207 insertions(+)
 create mode 100644 drivers/media/video/davinci/dm644x_ccdc.c
 create mode 100644 drivers/media/video/davinci/dm644x_ccdc_regs.h
 create mode 100644 include/media/davinci/dm644x_ccdc.h

(limited to 'include')

diff --git a/drivers/media/video/davinci/dm644x_ccdc.c b/drivers/media/video/davinci/dm644x_ccdc.c
new file mode 100644
index 000000000000..2f19a919f477
--- /dev/null
+++ b/drivers/media/video/davinci/dm644x_ccdc.c
@@ -0,0 +1,878 @@
+/*
+ * Copyright (C) 2006-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * CCDC hardware module for DM6446
+ * ------------------------------
+ *
+ * This module is for configuring CCD controller of DM6446 VPFE to capture
+ * Raw yuv or Bayer RGB data from a decoder. CCDC has several modules
+ * such as Defect Pixel Correction, Color Space Conversion etc to
+ * pre-process the Raw Bayer RGB data, before writing it to SDRAM. This
+ * module also allows application to configure individual
+ * module parameters through VPFE_CMD_S_CCDC_RAW_PARAMS IOCTL.
+ * To do so, application includes dm644x_ccdc.h and vpfe_capture.h header
+ * files.  The setparams() API is called by vpfe_capture driver
+ * to configure module parameters. This file is named DM644x so that other
+ * variants such DM6443 may be supported using the same module.
+ *
+ * TODO: Test Raw bayer parameter settings and bayer capture
+ * 	 Split module parameter structure to module specific ioctl structs
+ * 	 investigate if enum used for user space type definition
+ * 	 to be replaced by #defines or integer
+ */
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+#include <linux/videodev2.h>
+#include <media/davinci/dm644x_ccdc.h>
+#include <media/davinci/vpss.h>
+#include "dm644x_ccdc_regs.h"
+#include "ccdc_hw_device.h"
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CCDC Driver for DM6446");
+MODULE_AUTHOR("Texas Instruments");
+
+static struct device *dev;
+
+/* Object for CCDC raw mode */
+static struct ccdc_params_raw ccdc_hw_params_raw = {
+	.pix_fmt = CCDC_PIXFMT_RAW,
+	.frm_fmt = CCDC_FRMFMT_PROGRESSIVE,
+	.win = CCDC_WIN_VGA,
+	.fid_pol = VPFE_PINPOL_POSITIVE,
+	.vd_pol = VPFE_PINPOL_POSITIVE,
+	.hd_pol = VPFE_PINPOL_POSITIVE,
+	.config_params = {
+		.data_sz = CCDC_DATA_10BITS,
+	},
+};
+
+/* Object for CCDC ycbcr mode */
+static struct ccdc_params_ycbcr ccdc_hw_params_ycbcr = {
+	.pix_fmt = CCDC_PIXFMT_YCBCR_8BIT,
+	.frm_fmt = CCDC_FRMFMT_INTERLACED,
+	.win = CCDC_WIN_PAL,
+	.fid_pol = VPFE_PINPOL_POSITIVE,
+	.vd_pol = VPFE_PINPOL_POSITIVE,
+	.hd_pol = VPFE_PINPOL_POSITIVE,
+	.bt656_enable = 1,
+	.pix_order = CCDC_PIXORDER_CBYCRY,
+	.buf_type = CCDC_BUFTYPE_FLD_INTERLEAVED
+};
+
+#define CCDC_MAX_RAW_YUV_FORMATS	2
+
+/* Raw Bayer formats */
+static u32 ccdc_raw_bayer_pix_formats[] =
+	{V4L2_PIX_FMT_SBGGR8, V4L2_PIX_FMT_SBGGR16};
+
+/* Raw YUV formats */
+static u32 ccdc_raw_yuv_pix_formats[] =
+	{V4L2_PIX_FMT_UYVY, V4L2_PIX_FMT_YUYV};
+
+static void *__iomem ccdc_base_addr;
+static int ccdc_addr_size;
+static enum vpfe_hw_if_type ccdc_if_type;
+
+/* register access routines */
+static inline u32 regr(u32 offset)
+{
+	return __raw_readl(ccdc_base_addr + offset);
+}
+
+static inline void regw(u32 val, u32 offset)
+{
+	__raw_writel(val, ccdc_base_addr + offset);
+}
+
+static void ccdc_set_ccdc_base(void *addr, int size)
+{
+	ccdc_base_addr = addr;
+	ccdc_addr_size = size;
+}
+
+static void ccdc_enable(int flag)
+{
+	regw(flag, CCDC_PCR);
+}
+
+static void ccdc_enable_vport(int flag)
+{
+	if (flag)
+		/* enable video port */
+		regw(CCDC_ENABLE_VIDEO_PORT, CCDC_FMTCFG);
+	else
+		regw(CCDC_DISABLE_VIDEO_PORT, CCDC_FMTCFG);
+}
+
+/*
+ * ccdc_setwin()
+ * This function will configure the window size
+ * to be capture in CCDC reg
+ */
+void ccdc_setwin(struct v4l2_rect *image_win,
+		enum ccdc_frmfmt frm_fmt,
+		int ppc)
+{
+	int horz_start, horz_nr_pixels;
+	int vert_start, vert_nr_lines;
+	int val = 0, mid_img = 0;
+
+	dev_dbg(dev, "\nStarting ccdc_setwin...");
+	/*
+	 * ppc - per pixel count. indicates how many pixels per cell
+	 * output to SDRAM. example, for ycbcr, it is one y and one c, so 2.
+	 * raw capture this is 1
+	 */
+	horz_start = image_win->left << (ppc - 1);
+	horz_nr_pixels = (image_win->width << (ppc - 1)) - 1;
+	regw((horz_start << CCDC_HORZ_INFO_SPH_SHIFT) | horz_nr_pixels,
+	     CCDC_HORZ_INFO);
+
+	vert_start = image_win->top;
+
+	if (frm_fmt == CCDC_FRMFMT_INTERLACED) {
+		vert_nr_lines = (image_win->height >> 1) - 1;
+		vert_start >>= 1;
+		/* Since first line doesn't have any data */
+		vert_start += 1;
+		/* configure VDINT0 */
+		val = (vert_start << CCDC_VDINT_VDINT0_SHIFT);
+		regw(val, CCDC_VDINT);
+
+	} else {
+		/* Since first line doesn't have any data */
+		vert_start += 1;
+		vert_nr_lines = image_win->height - 1;
+		/*
+		 * configure VDINT0 and VDINT1. VDINT1 will be at half
+		 * of image height
+		 */
+		mid_img = vert_start + (image_win->height / 2);
+		val = (vert_start << CCDC_VDINT_VDINT0_SHIFT) |
+		    (mid_img & CCDC_VDINT_VDINT1_MASK);
+		regw(val, CCDC_VDINT);
+
+	}
+	regw((vert_start << CCDC_VERT_START_SLV0_SHIFT) | vert_start,
+	     CCDC_VERT_START);
+	regw(vert_nr_lines, CCDC_VERT_LINES);
+	dev_dbg(dev, "\nEnd of ccdc_setwin...");
+}
+
+static void ccdc_readregs(void)
+{
+	unsigned int val = 0;
+
+	val = regr(CCDC_ALAW);
+	dev_notice(dev, "\nReading 0x%x to ALAW...\n", val);
+	val = regr(CCDC_CLAMP);
+	dev_notice(dev, "\nReading 0x%x to CLAMP...\n", val);
+	val = regr(CCDC_DCSUB);
+	dev_notice(dev, "\nReading 0x%x to DCSUB...\n", val);
+	val = regr(CCDC_BLKCMP);
+	dev_notice(dev, "\nReading 0x%x to BLKCMP...\n", val);
+	val = regr(CCDC_FPC_ADDR);
+	dev_notice(dev, "\nReading 0x%x to FPC_ADDR...\n", val);
+	val = regr(CCDC_FPC);
+	dev_notice(dev, "\nReading 0x%x to FPC...\n", val);
+	val = regr(CCDC_FMTCFG);
+	dev_notice(dev, "\nReading 0x%x to FMTCFG...\n", val);
+	val = regr(CCDC_COLPTN);
+	dev_notice(dev, "\nReading 0x%x to COLPTN...\n", val);
+	val = regr(CCDC_FMT_HORZ);
+	dev_notice(dev, "\nReading 0x%x to FMT_HORZ...\n", val);
+	val = regr(CCDC_FMT_VERT);
+	dev_notice(dev, "\nReading 0x%x to FMT_VERT...\n", val);
+	val = regr(CCDC_HSIZE_OFF);
+	dev_notice(dev, "\nReading 0x%x to HSIZE_OFF...\n", val);
+	val = regr(CCDC_SDOFST);
+	dev_notice(dev, "\nReading 0x%x to SDOFST...\n", val);
+	val = regr(CCDC_VP_OUT);
+	dev_notice(dev, "\nReading 0x%x to VP_OUT...\n", val);
+	val = regr(CCDC_SYN_MODE);
+	dev_notice(dev, "\nReading 0x%x to SYN_MODE...\n", val);
+	val = regr(CCDC_HORZ_INFO);
+	dev_notice(dev, "\nReading 0x%x to HORZ_INFO...\n", val);
+	val = regr(CCDC_VERT_START);
+	dev_notice(dev, "\nReading 0x%x to VERT_START...\n", val);
+	val = regr(CCDC_VERT_LINES);
+	dev_notice(dev, "\nReading 0x%x to VERT_LINES...\n", val);
+}
+
+static int validate_ccdc_param(struct ccdc_config_params_raw *ccdcparam)
+{
+	if (ccdcparam->alaw.enable) {
+		if ((ccdcparam->alaw.gama_wd > CCDC_GAMMA_BITS_09_0) ||
+		    (ccdcparam->alaw.gama_wd < CCDC_GAMMA_BITS_15_6) ||
+		    (ccdcparam->alaw.gama_wd < ccdcparam->data_sz)) {
+			dev_dbg(dev, "\nInvalid data line select");
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int ccdc_update_raw_params(struct ccdc_config_params_raw *raw_params)
+{
+	struct ccdc_config_params_raw *config_params =
+		&ccdc_hw_params_raw.config_params;
+	unsigned int *fpc_virtaddr = NULL;
+	unsigned int *fpc_physaddr = NULL;
+
+	memcpy(config_params, raw_params, sizeof(*raw_params));
+	/*
+	 * allocate memory for fault pixel table and copy the user
+	 * values to the table
+	 */
+	if (!config_params->fault_pxl.enable)
+		return 0;
+
+	fpc_physaddr = (unsigned int *)config_params->fault_pxl.fpc_table_addr;
+	fpc_virtaddr = (unsigned int *)phys_to_virt(
+				(unsigned long)fpc_physaddr);
+	/*
+	 * Allocate memory for FPC table if current
+	 * FPC table buffer is not big enough to
+	 * accomodate FPC Number requested
+	 */
+	if (raw_params->fault_pxl.fp_num != config_params->fault_pxl.fp_num) {
+		if (fpc_physaddr != NULL) {
+			free_pages((unsigned long)fpc_physaddr,
+				   get_order
+				   (config_params->fault_pxl.fp_num *
+				   FP_NUM_BYTES));
+		}
+
+		/* Allocate memory for FPC table */
+		fpc_virtaddr =
+			(unsigned int *)__get_free_pages(GFP_KERNEL | GFP_DMA,
+							 get_order(raw_params->
+							 fault_pxl.fp_num *
+							 FP_NUM_BYTES));
+
+		if (fpc_virtaddr == NULL) {
+			dev_dbg(dev,
+				"\nUnable to allocate memory for FPC");
+			return -EFAULT;
+		}
+		fpc_physaddr =
+		    (unsigned int *)virt_to_phys((void *)fpc_virtaddr);
+	}
+
+	/* Copy number of fault pixels and FPC table */
+	config_params->fault_pxl.fp_num = raw_params->fault_pxl.fp_num;
+	if (copy_from_user(fpc_virtaddr,
+			(void __user *)raw_params->fault_pxl.fpc_table_addr,
+			config_params->fault_pxl.fp_num * FP_NUM_BYTES)) {
+		dev_dbg(dev, "\n copy_from_user failed");
+		return -EFAULT;
+	}
+	config_params->fault_pxl.fpc_table_addr = (unsigned int)fpc_physaddr;
+	return 0;
+}
+
+static int ccdc_close(struct device *dev)
+{
+	struct ccdc_config_params_raw *config_params =
+		&ccdc_hw_params_raw.config_params;
+	unsigned int *fpc_physaddr = NULL, *fpc_virtaddr = NULL;
+
+	fpc_physaddr = (unsigned int *)config_params->fault_pxl.fpc_table_addr;
+
+	if (fpc_physaddr != NULL) {
+		fpc_virtaddr = (unsigned int *)
+		    phys_to_virt((unsigned long)fpc_physaddr);
+		free_pages((unsigned long)fpc_virtaddr,
+			   get_order(config_params->fault_pxl.fp_num *
+			   FP_NUM_BYTES));
+	}
+	return 0;
+}
+
+/*
+ * ccdc_restore_defaults()
+ * This function will write defaults to all CCDC registers
+ */
+static void ccdc_restore_defaults(void)
+{
+	int i;
+
+	/* disable CCDC */
+	ccdc_enable(0);
+	/* set all registers to default value */
+	for (i = 4; i <= 0x94; i += 4)
+		regw(0,  i);
+	regw(CCDC_NO_CULLING, CCDC_CULLING);
+	regw(CCDC_GAMMA_BITS_11_2, CCDC_ALAW);
+}
+
+static int ccdc_open(struct device *device)
+{
+	dev = device;
+	ccdc_restore_defaults();
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_enable_vport(1);
+	return 0;
+}
+
+static void ccdc_sbl_reset(void)
+{
+	vpss_clear_wbl_overflow(VPSS_PCR_CCDC_WBL_O);
+}
+
+/* Parameter operations */
+static int ccdc_set_params(void __user *params)
+{
+	struct ccdc_config_params_raw ccdc_raw_params;
+	int x;
+
+	if (ccdc_if_type != VPFE_RAW_BAYER)
+		return -EINVAL;
+
+	x = copy_from_user(&ccdc_raw_params, params, sizeof(ccdc_raw_params));
+	if (x) {
+		dev_dbg(dev, "ccdc_set_params: error in copying"
+			   "ccdc params, %d\n", x);
+		return -EFAULT;
+	}
+
+	if (!validate_ccdc_param(&ccdc_raw_params)) {
+		if (!ccdc_update_raw_params(&ccdc_raw_params))
+			return 0;
+	}
+	return -EINVAL;
+}
+
+/*
+ * ccdc_config_ycbcr()
+ * This function will configure CCDC for YCbCr video capture
+ */
+void ccdc_config_ycbcr(void)
+{
+	struct ccdc_params_ycbcr *params = &ccdc_hw_params_ycbcr;
+	u32 syn_mode;
+
+	dev_dbg(dev, "\nStarting ccdc_config_ycbcr...");
+	/*
+	 * first restore the CCDC registers to default values
+	 * This is important since we assume default values to be set in
+	 * a lot of registers that we didn't touch
+	 */
+	ccdc_restore_defaults();
+
+	/*
+	 * configure pixel format, frame format, configure video frame
+	 * format, enable output to SDRAM, enable internal timing generator
+	 * and 8bit pack mode
+	 */
+	syn_mode = (((params->pix_fmt & CCDC_SYN_MODE_INPMOD_MASK) <<
+		    CCDC_SYN_MODE_INPMOD_SHIFT) |
+		    ((params->frm_fmt & CCDC_SYN_FLDMODE_MASK) <<
+		    CCDC_SYN_FLDMODE_SHIFT) | CCDC_VDHDEN_ENABLE |
+		    CCDC_WEN_ENABLE | CCDC_DATA_PACK_ENABLE);
+
+	/* setup BT.656 sync mode */
+	if (params->bt656_enable) {
+		regw(CCDC_REC656IF_BT656_EN, CCDC_REC656IF);
+
+		/*
+		 * configure the FID, VD, HD pin polarity,
+		 * fld,hd pol positive, vd negative, 8-bit data
+		 */
+		syn_mode |= CCDC_SYN_MODE_VD_POL_NEGATIVE | CCDC_SYN_MODE_8BITS;
+	} else {
+		/* y/c external sync mode */
+		syn_mode |= (((params->fid_pol & CCDC_FID_POL_MASK) <<
+			     CCDC_FID_POL_SHIFT) |
+			     ((params->hd_pol & CCDC_HD_POL_MASK) <<
+			     CCDC_HD_POL_SHIFT) |
+			     ((params->vd_pol & CCDC_VD_POL_MASK) <<
+			     CCDC_VD_POL_SHIFT));
+	}
+	regw(syn_mode, CCDC_SYN_MODE);
+
+	/* configure video window */
+	ccdc_setwin(&params->win, params->frm_fmt, 2);
+
+	/*
+	 * configure the order of y cb cr in SDRAM, and disable latch
+	 * internal register on vsync
+	 */
+	regw((params->pix_order << CCDC_CCDCFG_Y8POS_SHIFT) |
+		 CCDC_LATCH_ON_VSYNC_DISABLE, CCDC_CCDCFG);
+
+	/*
+	 * configure the horizontal line offset. This should be a
+	 * on 32 byte bondary. So clear LSB 5 bits
+	 */
+	regw(((params->win.width * 2  + 31) & ~0x1f), CCDC_HSIZE_OFF);
+
+	/* configure the memory line offset */
+	if (params->buf_type == CCDC_BUFTYPE_FLD_INTERLEAVED)
+		/* two fields are interleaved in memory */
+		regw(CCDC_SDOFST_FIELD_INTERLEAVED, CCDC_SDOFST);
+
+	ccdc_sbl_reset();
+	dev_dbg(dev, "\nEnd of ccdc_config_ycbcr...\n");
+	ccdc_readregs();
+}
+
+static void ccdc_config_black_clamp(struct ccdc_black_clamp *bclamp)
+{
+	u32 val;
+
+	if (!bclamp->enable) {
+		/* configure DCSub */
+		val = (bclamp->dc_sub) & CCDC_BLK_DC_SUB_MASK;
+		regw(val, CCDC_DCSUB);
+		dev_dbg(dev, "\nWriting 0x%x to DCSUB...\n", val);
+		regw(CCDC_CLAMP_DEFAULT_VAL, CCDC_CLAMP);
+		dev_dbg(dev, "\nWriting 0x0000 to CLAMP...\n");
+		return;
+	}
+	/*
+	 * Configure gain,  Start pixel, No of line to be avg,
+	 * No of pixel/line to be avg, & Enable the Black clamping
+	 */
+	val = ((bclamp->sgain & CCDC_BLK_SGAIN_MASK) |
+	       ((bclamp->start_pixel & CCDC_BLK_ST_PXL_MASK) <<
+		CCDC_BLK_ST_PXL_SHIFT) |
+	       ((bclamp->sample_ln & CCDC_BLK_SAMPLE_LINE_MASK) <<
+		CCDC_BLK_SAMPLE_LINE_SHIFT) |
+	       ((bclamp->sample_pixel & CCDC_BLK_SAMPLE_LN_MASK) <<
+		CCDC_BLK_SAMPLE_LN_SHIFT) | CCDC_BLK_CLAMP_ENABLE);
+	regw(val, CCDC_CLAMP);
+	dev_dbg(dev, "\nWriting 0x%x to CLAMP...\n", val);
+	/* If Black clamping is enable then make dcsub 0 */
+	regw(CCDC_DCSUB_DEFAULT_VAL, CCDC_DCSUB);
+	dev_dbg(dev, "\nWriting 0x00000000 to DCSUB...\n");
+}
+
+static void ccdc_config_black_compense(struct ccdc_black_compensation *bcomp)
+{
+	u32 val;
+
+	val = ((bcomp->b & CCDC_BLK_COMP_MASK) |
+	      ((bcomp->gb & CCDC_BLK_COMP_MASK) <<
+	       CCDC_BLK_COMP_GB_COMP_SHIFT) |
+	      ((bcomp->gr & CCDC_BLK_COMP_MASK) <<
+	       CCDC_BLK_COMP_GR_COMP_SHIFT) |
+	      ((bcomp->r & CCDC_BLK_COMP_MASK) <<
+	       CCDC_BLK_COMP_R_COMP_SHIFT));
+	regw(val, CCDC_BLKCMP);
+}
+
+static void ccdc_config_fpc(struct ccdc_fault_pixel *fpc)
+{
+	u32 val;
+
+	/* Initially disable FPC */
+	val = CCDC_FPC_DISABLE;
+	regw(val, CCDC_FPC);
+
+	if (!fpc->enable)
+		return;
+
+	/* Configure Fault pixel if needed */
+	regw(fpc->fpc_table_addr, CCDC_FPC_ADDR);
+	dev_dbg(dev, "\nWriting 0x%x to FPC_ADDR...\n",
+		       (fpc->fpc_table_addr));
+	/* Write the FPC params with FPC disable */
+	val = fpc->fp_num & CCDC_FPC_FPC_NUM_MASK;
+	regw(val, CCDC_FPC);
+
+	dev_dbg(dev, "\nWriting 0x%x to FPC...\n", val);
+	/* read the FPC register */
+	val = regr(CCDC_FPC) | CCDC_FPC_ENABLE;
+	regw(val, CCDC_FPC);
+	dev_dbg(dev, "\nWriting 0x%x to FPC...\n", val);
+}
+
+/*
+ * ccdc_config_raw()
+ * This function will configure CCDC for Raw capture mode
+ */
+void ccdc_config_raw(void)
+{
+	struct ccdc_params_raw *params = &ccdc_hw_params_raw;
+	struct ccdc_config_params_raw *config_params =
+		&ccdc_hw_params_raw.config_params;
+	unsigned int syn_mode = 0;
+	unsigned int val;
+
+	dev_dbg(dev, "\nStarting ccdc_config_raw...");
+
+	/*      Reset CCDC */
+	ccdc_restore_defaults();
+
+	/* Disable latching function registers on VSYNC  */
+	regw(CCDC_LATCH_ON_VSYNC_DISABLE, CCDC_CCDCFG);
+
+	/*
+	 * Configure the vertical sync polarity(SYN_MODE.VDPOL),
+	 * horizontal sync polarity (SYN_MODE.HDPOL), frame id polarity
+	 * (SYN_MODE.FLDPOL), frame format(progressive or interlace),
+	 * data size(SYNMODE.DATSIZ), &pixel format (Input mode), output
+	 * SDRAM, enable internal timing generator
+	 */
+	syn_mode =
+		(((params->vd_pol & CCDC_VD_POL_MASK) << CCDC_VD_POL_SHIFT) |
+		((params->hd_pol & CCDC_HD_POL_MASK) << CCDC_HD_POL_SHIFT) |
+		((params->fid_pol & CCDC_FID_POL_MASK) << CCDC_FID_POL_SHIFT) |
+		((params->frm_fmt & CCDC_FRM_FMT_MASK) << CCDC_FRM_FMT_SHIFT) |
+		((config_params->data_sz & CCDC_DATA_SZ_MASK) <<
+		CCDC_DATA_SZ_SHIFT) |
+		((params->pix_fmt & CCDC_PIX_FMT_MASK) << CCDC_PIX_FMT_SHIFT) |
+		CCDC_WEN_ENABLE | CCDC_VDHDEN_ENABLE);
+
+	/* Enable and configure aLaw register if needed */
+	if (config_params->alaw.enable) {
+		val = ((config_params->alaw.gama_wd &
+		      CCDC_ALAW_GAMA_WD_MASK) | CCDC_ALAW_ENABLE);
+		regw(val, CCDC_ALAW);
+		dev_dbg(dev, "\nWriting 0x%x to ALAW...\n", val);
+	}
+
+	/* Configure video window */
+	ccdc_setwin(&params->win, params->frm_fmt, CCDC_PPC_RAW);
+
+	/* Configure Black Clamp */
+	ccdc_config_black_clamp(&config_params->blk_clamp);
+
+	/* Configure Black level compensation */
+	ccdc_config_black_compense(&config_params->blk_comp);
+
+	/* Configure Fault Pixel Correction */
+	ccdc_config_fpc(&config_params->fault_pxl);
+
+	/* If data size is 8 bit then pack the data */
+	if ((config_params->data_sz == CCDC_DATA_8BITS) ||
+	     config_params->alaw.enable)
+		syn_mode |= CCDC_DATA_PACK_ENABLE;
+
+#ifdef CONFIG_DM644X_VIDEO_PORT_ENABLE
+	/* enable video port */
+	val = CCDC_ENABLE_VIDEO_PORT;
+#else
+	/* disable video port */
+	val = CCDC_DISABLE_VIDEO_PORT;
+#endif
+
+	if (config_params->data_sz == CCDC_DATA_8BITS)
+		val |= (CCDC_DATA_10BITS & CCDC_FMTCFG_VPIN_MASK)
+		    << CCDC_FMTCFG_VPIN_SHIFT;
+	else
+		val |= (config_params->data_sz & CCDC_FMTCFG_VPIN_MASK)
+		    << CCDC_FMTCFG_VPIN_SHIFT;
+	/* Write value in FMTCFG */
+	regw(val, CCDC_FMTCFG);
+
+	dev_dbg(dev, "\nWriting 0x%x to FMTCFG...\n", val);
+	/* Configure the color pattern according to mt9t001 sensor */
+	regw(CCDC_COLPTN_VAL, CCDC_COLPTN);
+
+	dev_dbg(dev, "\nWriting 0xBB11BB11 to COLPTN...\n");
+	/*
+	 * Configure Data formatter(Video port) pixel selection
+	 * (FMT_HORZ, FMT_VERT)
+	 */
+	val = ((params->win.left & CCDC_FMT_HORZ_FMTSPH_MASK) <<
+	      CCDC_FMT_HORZ_FMTSPH_SHIFT) |
+	      (params->win.width & CCDC_FMT_HORZ_FMTLNH_MASK);
+	regw(val, CCDC_FMT_HORZ);
+
+	dev_dbg(dev, "\nWriting 0x%x to FMT_HORZ...\n", val);
+	val = (params->win.top & CCDC_FMT_VERT_FMTSLV_MASK)
+	    << CCDC_FMT_VERT_FMTSLV_SHIFT;
+	if (params->frm_fmt == CCDC_FRMFMT_PROGRESSIVE)
+		val |= (params->win.height) & CCDC_FMT_VERT_FMTLNV_MASK;
+	else
+		val |= (params->win.height >> 1) & CCDC_FMT_VERT_FMTLNV_MASK;
+
+	dev_dbg(dev, "\nparams->win.height  0x%x ...\n",
+	       params->win.height);
+	regw(val, CCDC_FMT_VERT);
+
+	dev_dbg(dev, "\nWriting 0x%x to FMT_VERT...\n", val);
+
+	dev_dbg(dev, "\nbelow regw(val, FMT_VERT)...");
+
+	/*
+	 * Configure Horizontal offset register. If pack 8 is enabled then
+	 * 1 pixel will take 1 byte
+	 */
+	if ((config_params->data_sz == CCDC_DATA_8BITS) ||
+	    config_params->alaw.enable)
+		regw((params->win.width + CCDC_32BYTE_ALIGN_VAL) &
+		    CCDC_HSIZE_OFF_MASK, CCDC_HSIZE_OFF);
+	else
+		/* else one pixel will take 2 byte */
+		regw(((params->win.width * CCDC_TWO_BYTES_PER_PIXEL) +
+		    CCDC_32BYTE_ALIGN_VAL) & CCDC_HSIZE_OFF_MASK,
+		    CCDC_HSIZE_OFF);
+
+	/* Set value for SDOFST */
+	if (params->frm_fmt == CCDC_FRMFMT_INTERLACED) {
+		if (params->image_invert_enable) {
+			/* For intelace inverse mode */
+			regw(CCDC_INTERLACED_IMAGE_INVERT, CCDC_SDOFST);
+			dev_dbg(dev, "\nWriting 0x4B6D to SDOFST...\n");
+		}
+
+		else {
+			/* For intelace non inverse mode */
+			regw(CCDC_INTERLACED_NO_IMAGE_INVERT, CCDC_SDOFST);
+			dev_dbg(dev, "\nWriting 0x0249 to SDOFST...\n");
+		}
+	} else if (params->frm_fmt == CCDC_FRMFMT_PROGRESSIVE) {
+		regw(CCDC_PROGRESSIVE_NO_IMAGE_INVERT, CCDC_SDOFST);
+		dev_dbg(dev, "\nWriting 0x0000 to SDOFST...\n");
+	}
+
+	/*
+	 * Configure video port pixel selection (VPOUT)
+	 * Here -1 is to make the height value less than FMT_VERT.FMTLNV
+	 */
+	if (params->frm_fmt == CCDC_FRMFMT_PROGRESSIVE)
+		val = (((params->win.height - 1) & CCDC_VP_OUT_VERT_NUM_MASK))
+		    << CCDC_VP_OUT_VERT_NUM_SHIFT;
+	else
+		val =
+		    ((((params->win.height >> CCDC_INTERLACED_HEIGHT_SHIFT) -
+		     1) & CCDC_VP_OUT_VERT_NUM_MASK)) <<
+		    CCDC_VP_OUT_VERT_NUM_SHIFT;
+
+	val |= ((((params->win.width))) & CCDC_VP_OUT_HORZ_NUM_MASK)
+	    << CCDC_VP_OUT_HORZ_NUM_SHIFT;
+	val |= (params->win.left) & CCDC_VP_OUT_HORZ_ST_MASK;
+	regw(val, CCDC_VP_OUT);
+
+	dev_dbg(dev, "\nWriting 0x%x to VP_OUT...\n", val);
+	regw(syn_mode, CCDC_SYN_MODE);
+	dev_dbg(dev, "\nWriting 0x%x to SYN_MODE...\n", syn_mode);
+
+	ccdc_sbl_reset();
+	dev_dbg(dev, "\nend of ccdc_config_raw...");
+	ccdc_readregs();
+}
+
+static int ccdc_configure(void)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_config_raw();
+	else
+		ccdc_config_ycbcr();
+	return 0;
+}
+
+static int ccdc_set_buftype(enum ccdc_buftype buf_type)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_hw_params_raw.buf_type = buf_type;
+	else
+		ccdc_hw_params_ycbcr.buf_type = buf_type;
+	return 0;
+}
+
+static enum ccdc_buftype ccdc_get_buftype(void)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		return ccdc_hw_params_raw.buf_type;
+	return ccdc_hw_params_ycbcr.buf_type;
+}
+
+static int ccdc_enum_pix(u32 *pix, int i)
+{
+	int ret = -EINVAL;
+	if (ccdc_if_type == VPFE_RAW_BAYER) {
+		if (i < ARRAY_SIZE(ccdc_raw_bayer_pix_formats)) {
+			*pix = ccdc_raw_bayer_pix_formats[i];
+			ret = 0;
+		}
+	} else {
+		if (i < ARRAY_SIZE(ccdc_raw_yuv_pix_formats)) {
+			*pix = ccdc_raw_yuv_pix_formats[i];
+			ret = 0;
+		}
+	}
+	return ret;
+}
+
+static int ccdc_set_pixel_format(u32 pixfmt)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER) {
+		ccdc_hw_params_raw.pix_fmt = CCDC_PIXFMT_RAW;
+		if (pixfmt == V4L2_PIX_FMT_SBGGR8)
+			ccdc_hw_params_raw.config_params.alaw.enable = 1;
+		else if (pixfmt != V4L2_PIX_FMT_SBGGR16)
+			return -EINVAL;
+	} else {
+		if (pixfmt == V4L2_PIX_FMT_YUYV)
+			ccdc_hw_params_ycbcr.pix_order = CCDC_PIXORDER_YCBYCR;
+		else if (pixfmt == V4L2_PIX_FMT_UYVY)
+			ccdc_hw_params_ycbcr.pix_order = CCDC_PIXORDER_CBYCRY;
+		else
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static u32 ccdc_get_pixel_format(void)
+{
+	struct ccdc_a_law *alaw =
+		&ccdc_hw_params_raw.config_params.alaw;
+	u32 pixfmt;
+
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		if (alaw->enable)
+			pixfmt = V4L2_PIX_FMT_SBGGR8;
+		else
+			pixfmt = V4L2_PIX_FMT_SBGGR16;
+	else {
+		if (ccdc_hw_params_ycbcr.pix_order == CCDC_PIXORDER_YCBYCR)
+			pixfmt = V4L2_PIX_FMT_YUYV;
+		else
+			pixfmt = V4L2_PIX_FMT_UYVY;
+	}
+	return pixfmt;
+}
+
+static int ccdc_set_image_window(struct v4l2_rect *win)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_hw_params_raw.win = *win;
+	else
+		ccdc_hw_params_ycbcr.win = *win;
+	return 0;
+}
+
+static void ccdc_get_image_window(struct v4l2_rect *win)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		*win = ccdc_hw_params_raw.win;
+	else
+		*win = ccdc_hw_params_ycbcr.win;
+}
+
+static unsigned int ccdc_get_line_length(void)
+{
+	struct ccdc_config_params_raw *config_params =
+		&ccdc_hw_params_raw.config_params;
+	unsigned int len;
+
+	if (ccdc_if_type == VPFE_RAW_BAYER) {
+		if ((config_params->alaw.enable) ||
+		    (config_params->data_sz == CCDC_DATA_8BITS))
+			len = ccdc_hw_params_raw.win.width;
+		else
+			len = ccdc_hw_params_raw.win.width * 2;
+	} else
+		len = ccdc_hw_params_ycbcr.win.width * 2;
+	return ALIGN(len, 32);
+}
+
+static int ccdc_set_frame_format(enum ccdc_frmfmt frm_fmt)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_hw_params_raw.frm_fmt = frm_fmt;
+	else
+		ccdc_hw_params_ycbcr.frm_fmt = frm_fmt;
+	return 0;
+}
+
+static enum ccdc_frmfmt ccdc_get_frame_format(void)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		return ccdc_hw_params_raw.frm_fmt;
+	else
+		return ccdc_hw_params_ycbcr.frm_fmt;
+}
+
+static int ccdc_getfid(void)
+{
+	return (regr(CCDC_SYN_MODE) >> 15) & 1;
+}
+
+/* misc operations */
+static inline void ccdc_setfbaddr(unsigned long addr)
+{
+	regw(addr & 0xffffffe0, CCDC_SDR_ADDR);
+}
+
+static int ccdc_set_hw_if_params(struct vpfe_hw_if_param *params)
+{
+	ccdc_if_type = params->if_type;
+
+	switch (params->if_type) {
+	case VPFE_BT656:
+	case VPFE_YCBCR_SYNC_16:
+	case VPFE_YCBCR_SYNC_8:
+		ccdc_hw_params_ycbcr.vd_pol = params->vdpol;
+		ccdc_hw_params_ycbcr.hd_pol = params->hdpol;
+		break;
+	default:
+		/* TODO add support for raw bayer here */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct ccdc_hw_device ccdc_hw_dev = {
+	.name = "DM6446 CCDC",
+	.owner = THIS_MODULE,
+	.hw_ops = {
+		.open = ccdc_open,
+		.close = ccdc_close,
+		.set_ccdc_base = ccdc_set_ccdc_base,
+		.reset = ccdc_sbl_reset,
+		.enable = ccdc_enable,
+		.set_hw_if_params = ccdc_set_hw_if_params,
+		.set_params = ccdc_set_params,
+		.configure = ccdc_configure,
+		.set_buftype = ccdc_set_buftype,
+		.get_buftype = ccdc_get_buftype,
+		.enum_pix = ccdc_enum_pix,
+		.set_pixel_format = ccdc_set_pixel_format,
+		.get_pixel_format = ccdc_get_pixel_format,
+		.set_frame_format = ccdc_set_frame_format,
+		.get_frame_format = ccdc_get_frame_format,
+		.set_image_window = ccdc_set_image_window,
+		.get_image_window = ccdc_get_image_window,
+		.get_line_length = ccdc_get_line_length,
+		.setfbaddr = ccdc_setfbaddr,
+		.getfid = ccdc_getfid,
+	},
+};
+
+static int dm644x_ccdc_init(void)
+{
+	printk(KERN_NOTICE "dm644x_ccdc_init\n");
+	if (vpfe_register_ccdc_device(&ccdc_hw_dev) < 0)
+		return -1;
+	printk(KERN_NOTICE "%s is registered with vpfe.\n",
+		ccdc_hw_dev.name);
+	return 0;
+}
+
+static void dm644x_ccdc_exit(void)
+{
+	vpfe_unregister_ccdc_device(&ccdc_hw_dev);
+}
+
+module_init(dm644x_ccdc_init);
+module_exit(dm644x_ccdc_exit);
diff --git a/drivers/media/video/davinci/dm644x_ccdc_regs.h b/drivers/media/video/davinci/dm644x_ccdc_regs.h
new file mode 100644
index 000000000000..6e5d05324466
--- /dev/null
+++ b/drivers/media/video/davinci/dm644x_ccdc_regs.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2006-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _DM644X_CCDC_REGS_H
+#define _DM644X_CCDC_REGS_H
+
+/**************************************************************************\
+* Register OFFSET Definitions
+\**************************************************************************/
+#define CCDC_PID				0x0
+#define CCDC_PCR				0x4
+#define CCDC_SYN_MODE				0x8
+#define CCDC_HD_VD_WID				0xc
+#define CCDC_PIX_LINES				0x10
+#define CCDC_HORZ_INFO				0x14
+#define CCDC_VERT_START				0x18
+#define CCDC_VERT_LINES				0x1c
+#define CCDC_CULLING				0x20
+#define CCDC_HSIZE_OFF				0x24
+#define CCDC_SDOFST				0x28
+#define CCDC_SDR_ADDR				0x2c
+#define CCDC_CLAMP				0x30
+#define CCDC_DCSUB				0x34
+#define CCDC_COLPTN				0x38
+#define CCDC_BLKCMP				0x3c
+#define CCDC_FPC				0x40
+#define CCDC_FPC_ADDR				0x44
+#define CCDC_VDINT				0x48
+#define CCDC_ALAW				0x4c
+#define CCDC_REC656IF				0x50
+#define CCDC_CCDCFG				0x54
+#define CCDC_FMTCFG				0x58
+#define CCDC_FMT_HORZ				0x5c
+#define CCDC_FMT_VERT				0x60
+#define CCDC_FMT_ADDR0				0x64
+#define CCDC_FMT_ADDR1				0x68
+#define CCDC_FMT_ADDR2				0x6c
+#define CCDC_FMT_ADDR3				0x70
+#define CCDC_FMT_ADDR4				0x74
+#define CCDC_FMT_ADDR5				0x78
+#define CCDC_FMT_ADDR6				0x7c
+#define CCDC_FMT_ADDR7				0x80
+#define CCDC_PRGEVEN_0				0x84
+#define CCDC_PRGEVEN_1				0x88
+#define CCDC_PRGODD_0				0x8c
+#define CCDC_PRGODD_1				0x90
+#define CCDC_VP_OUT				0x94
+
+
+/***************************************************************
+*	Define for various register bit mask and shifts for CCDC
+****************************************************************/
+#define CCDC_FID_POL_MASK			1
+#define CCDC_FID_POL_SHIFT			4
+#define CCDC_HD_POL_MASK			1
+#define CCDC_HD_POL_SHIFT			3
+#define CCDC_VD_POL_MASK			1
+#define CCDC_VD_POL_SHIFT			2
+#define CCDC_HSIZE_OFF_MASK			0xffffffe0
+#define CCDC_32BYTE_ALIGN_VAL			31
+#define CCDC_FRM_FMT_MASK			0x1
+#define CCDC_FRM_FMT_SHIFT			7
+#define CCDC_DATA_SZ_MASK			7
+#define CCDC_DATA_SZ_SHIFT			8
+#define CCDC_PIX_FMT_MASK			3
+#define CCDC_PIX_FMT_SHIFT			12
+#define CCDC_VP2SDR_DISABLE			0xFFFBFFFF
+#define CCDC_WEN_ENABLE				(1 << 17)
+#define CCDC_SDR2RSZ_DISABLE			0xFFF7FFFF
+#define CCDC_VDHDEN_ENABLE			(1 << 16)
+#define CCDC_LPF_ENABLE				(1 << 14)
+#define CCDC_ALAW_ENABLE			(1 << 3)
+#define CCDC_ALAW_GAMA_WD_MASK			7
+#define CCDC_BLK_CLAMP_ENABLE			(1 << 31)
+#define CCDC_BLK_SGAIN_MASK			0x1F
+#define CCDC_BLK_ST_PXL_MASK			0x7FFF
+#define CCDC_BLK_ST_PXL_SHIFT			10
+#define CCDC_BLK_SAMPLE_LN_MASK			7
+#define CCDC_BLK_SAMPLE_LN_SHIFT		28
+#define CCDC_BLK_SAMPLE_LINE_MASK		7
+#define CCDC_BLK_SAMPLE_LINE_SHIFT		25
+#define CCDC_BLK_DC_SUB_MASK			0x03FFF
+#define CCDC_BLK_COMP_MASK			0xFF
+#define CCDC_BLK_COMP_GB_COMP_SHIFT		8
+#define CCDC_BLK_COMP_GR_COMP_SHIFT		16
+#define CCDC_BLK_COMP_R_COMP_SHIFT		24
+#define CCDC_LATCH_ON_VSYNC_DISABLE		(1 << 15)
+#define CCDC_FPC_ENABLE				(1 << 15)
+#define CCDC_FPC_DISABLE			0
+#define CCDC_FPC_FPC_NUM_MASK 			0x7FFF
+#define CCDC_DATA_PACK_ENABLE			(1 << 11)
+#define CCDC_FMTCFG_VPIN_MASK			7
+#define CCDC_FMTCFG_VPIN_SHIFT			12
+#define CCDC_FMT_HORZ_FMTLNH_MASK		0x1FFF
+#define CCDC_FMT_HORZ_FMTSPH_MASK		0x1FFF
+#define CCDC_FMT_HORZ_FMTSPH_SHIFT		16
+#define CCDC_FMT_VERT_FMTLNV_MASK		0x1FFF
+#define CCDC_FMT_VERT_FMTSLV_MASK		0x1FFF
+#define CCDC_FMT_VERT_FMTSLV_SHIFT		16
+#define CCDC_VP_OUT_VERT_NUM_MASK		0x3FFF
+#define CCDC_VP_OUT_VERT_NUM_SHIFT		17
+#define CCDC_VP_OUT_HORZ_NUM_MASK		0x1FFF
+#define CCDC_VP_OUT_HORZ_NUM_SHIFT		4
+#define CCDC_VP_OUT_HORZ_ST_MASK		0xF
+#define CCDC_HORZ_INFO_SPH_SHIFT		16
+#define CCDC_VERT_START_SLV0_SHIFT		16
+#define CCDC_VDINT_VDINT0_SHIFT			16
+#define CCDC_VDINT_VDINT1_MASK			0xFFFF
+#define CCDC_PPC_RAW				1
+#define CCDC_DCSUB_DEFAULT_VAL			0
+#define CCDC_CLAMP_DEFAULT_VAL			0
+#define CCDC_ENABLE_VIDEO_PORT			0x8000
+#define CCDC_DISABLE_VIDEO_PORT			0
+#define CCDC_COLPTN_VAL				0xBB11BB11
+#define CCDC_TWO_BYTES_PER_PIXEL		2
+#define CCDC_INTERLACED_IMAGE_INVERT		0x4B6D
+#define CCDC_INTERLACED_NO_IMAGE_INVERT		0x0249
+#define CCDC_PROGRESSIVE_IMAGE_INVERT		0x4000
+#define CCDC_PROGRESSIVE_NO_IMAGE_INVERT	0
+#define CCDC_INTERLACED_HEIGHT_SHIFT		1
+#define CCDC_SYN_MODE_INPMOD_SHIFT		12
+#define CCDC_SYN_MODE_INPMOD_MASK		3
+#define CCDC_SYN_MODE_8BITS			(7 << 8)
+#define CCDC_SYN_FLDMODE_MASK			1
+#define CCDC_SYN_FLDMODE_SHIFT			7
+#define CCDC_REC656IF_BT656_EN			3
+#define CCDC_SYN_MODE_VD_POL_NEGATIVE		(1 << 2)
+#define CCDC_CCDCFG_Y8POS_SHIFT			11
+#define CCDC_SDOFST_FIELD_INTERLEAVED		0x249
+#define CCDC_NO_CULLING				0xffff00ff
+#endif
diff --git a/include/media/davinci/dm644x_ccdc.h b/include/media/davinci/dm644x_ccdc.h
new file mode 100644
index 000000000000..3e178eb52fb3
--- /dev/null
+++ b/include/media/davinci/dm644x_ccdc.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2006-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _DM644X_CCDC_H
+#define _DM644X_CCDC_H
+#include <media/davinci/ccdc_types.h>
+#include <media/davinci/vpfe_types.h>
+
+/* enum for No of pixel per line to be avg. in Black Clamping*/
+enum ccdc_sample_length {
+	CCDC_SAMPLE_1PIXELS,
+	CCDC_SAMPLE_2PIXELS,
+	CCDC_SAMPLE_4PIXELS,
+	CCDC_SAMPLE_8PIXELS,
+	CCDC_SAMPLE_16PIXELS
+};
+
+/* enum for No of lines in Black Clamping */
+enum ccdc_sample_line {
+	CCDC_SAMPLE_1LINES,
+	CCDC_SAMPLE_2LINES,
+	CCDC_SAMPLE_4LINES,
+	CCDC_SAMPLE_8LINES,
+	CCDC_SAMPLE_16LINES
+};
+
+/* enum for Alaw gama width */
+enum ccdc_gama_width {
+	CCDC_GAMMA_BITS_15_6,
+	CCDC_GAMMA_BITS_14_5,
+	CCDC_GAMMA_BITS_13_4,
+	CCDC_GAMMA_BITS_12_3,
+	CCDC_GAMMA_BITS_11_2,
+	CCDC_GAMMA_BITS_10_1,
+	CCDC_GAMMA_BITS_09_0
+};
+
+enum ccdc_data_size {
+	CCDC_DATA_16BITS,
+	CCDC_DATA_15BITS,
+	CCDC_DATA_14BITS,
+	CCDC_DATA_13BITS,
+	CCDC_DATA_12BITS,
+	CCDC_DATA_11BITS,
+	CCDC_DATA_10BITS,
+	CCDC_DATA_8BITS
+};
+
+/* structure for ALaw */
+struct ccdc_a_law {
+	/* Enable/disable A-Law */
+	unsigned char enable;
+	/* Gama Width Input */
+	enum ccdc_gama_width gama_wd;
+};
+
+/* structure for Black Clamping */
+struct ccdc_black_clamp {
+	unsigned char enable;
+	/* only if bClampEnable is TRUE */
+	enum ccdc_sample_length sample_pixel;
+	/* only if bClampEnable is TRUE */
+	enum ccdc_sample_line sample_ln;
+	/* only if bClampEnable is TRUE */
+	unsigned short start_pixel;
+	/* only if bClampEnable is TRUE */
+	unsigned short sgain;
+	/* only if bClampEnable is FALSE */
+	unsigned short dc_sub;
+};
+
+/* structure for Black Level Compensation */
+struct ccdc_black_compensation {
+	/* Constant value to subtract from Red component */
+	char r;
+	/* Constant value to subtract from Gr component */
+	char gr;
+	/* Constant value to subtract from Blue component */
+	char b;
+	/* Constant value to subtract from Gb component */
+	char gb;
+};
+
+/* structure for fault pixel correction */
+struct ccdc_fault_pixel {
+	/* Enable or Disable fault pixel correction */
+	unsigned char enable;
+	/* Number of fault pixel */
+	unsigned short fp_num;
+	/* Address of fault pixel table */
+	unsigned int fpc_table_addr;
+};
+
+/* Structure for CCDC configuration parameters for raw capture mode passed
+ * by application
+ */
+struct ccdc_config_params_raw {
+	/* data size value from 8 to 16 bits */
+	enum ccdc_data_size data_sz;
+	/* Structure for Optional A-Law */
+	struct ccdc_a_law alaw;
+	/* Structure for Optical Black Clamp */
+	struct ccdc_black_clamp blk_clamp;
+	/* Structure for Black Compensation */
+	struct ccdc_black_compensation blk_comp;
+	/* Structure for Fault Pixel Module Configuration */
+	struct ccdc_fault_pixel fault_pxl;
+};
+
+
+#ifdef __KERNEL__
+#include <linux/io.h>
+/* Define to enable/disable video port */
+#define FP_NUM_BYTES		4
+/* Define for extra pixel/line and extra lines/frame */
+#define NUM_EXTRAPIXELS		8
+#define NUM_EXTRALINES		8
+
+/* settings for commonly used video formats */
+#define CCDC_WIN_PAL     {0, 0, 720, 576}
+/* ntsc square pixel */
+#define CCDC_WIN_VGA	{0, 0, (640 + NUM_EXTRAPIXELS), (480 + NUM_EXTRALINES)}
+
+/* Structure for CCDC configuration parameters for raw capture mode */
+struct ccdc_params_raw {
+	/* pixel format */
+	enum ccdc_pixfmt pix_fmt;
+	/* progressive or interlaced frame */
+	enum ccdc_frmfmt frm_fmt;
+	/* video window */
+	struct v4l2_rect win;
+	/* field id polarity */
+	enum vpfe_pin_pol fid_pol;
+	/* vertical sync polarity */
+	enum vpfe_pin_pol vd_pol;
+	/* horizontal sync polarity */
+	enum vpfe_pin_pol hd_pol;
+	/* interleaved or separated fields */
+	enum ccdc_buftype buf_type;
+	/*
+	 * enable to store the image in inverse
+	 * order in memory(bottom to top)
+	 */
+	unsigned char image_invert_enable;
+	/* configurable paramaters */
+	struct ccdc_config_params_raw config_params;
+};
+
+struct ccdc_params_ycbcr {
+	/* pixel format */
+	enum ccdc_pixfmt pix_fmt;
+	/* progressive or interlaced frame */
+	enum ccdc_frmfmt frm_fmt;
+	/* video window */
+	struct v4l2_rect win;
+	/* field id polarity */
+	enum vpfe_pin_pol fid_pol;
+	/* vertical sync polarity */
+	enum vpfe_pin_pol vd_pol;
+	/* horizontal sync polarity */
+	enum vpfe_pin_pol hd_pol;
+	/* enable BT.656 embedded sync mode */
+	int bt656_enable;
+	/* cb:y:cr:y or y:cb:y:cr in memory */
+	enum ccdc_pixorder pix_order;
+	/* interleaved or separated fields  */
+	enum ccdc_buftype buf_type;
+};
+#endif
+#endif				/* _DM644X_CCDC_H */
-- 
cgit v1.2.3


From 92ee438b8e27f1b96ce5a7e8d73cb11b71a02584 Mon Sep 17 00:00:00 2001
From: Muralidharan Karicheri <m-karicheri2@ti.com>
Date: Fri, 19 Jun 2009 09:19:17 -0300
Subject: V4L/DVB (12252): v4l: ccdc types used across ccdc modules for vpfe
 capture driver

common types used across CCDC modules

Reviewed by: Hans Verkuil <hverkuil@xs4all.nl>
Reviewed by: Laurent Pinchart <laurent.pinchart@skynet.be>

Signed-off-by: Muralidharan Karicheri <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/davinci/ccdc_types.h | 43 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 include/media/davinci/ccdc_types.h

(limited to 'include')

diff --git a/include/media/davinci/ccdc_types.h b/include/media/davinci/ccdc_types.h
new file mode 100644
index 000000000000..5773874bf266
--- /dev/null
+++ b/include/media/davinci/ccdc_types.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ **************************************************************************/
+#ifndef _CCDC_TYPES_H
+#define _CCDC_TYPES_H
+enum ccdc_pixfmt {
+	CCDC_PIXFMT_RAW,
+	CCDC_PIXFMT_YCBCR_16BIT,
+	CCDC_PIXFMT_YCBCR_8BIT
+};
+
+enum ccdc_frmfmt {
+	CCDC_FRMFMT_PROGRESSIVE,
+	CCDC_FRMFMT_INTERLACED
+};
+
+/* PIXEL ORDER IN MEMORY from LSB to MSB */
+/* only applicable for 8-bit input mode  */
+enum ccdc_pixorder {
+	CCDC_PIXORDER_YCBYCR,
+	CCDC_PIXORDER_CBYCRY,
+};
+
+enum ccdc_buftype {
+	CCDC_BUFTYPE_FLD_INTERLEAVED,
+	CCDC_BUFTYPE_FLD_SEPARATED
+};
+#endif
-- 
cgit v1.2.3


From 7b140b89307a59527df644100ce5ab3bc1be7d1b Mon Sep 17 00:00:00 2001
From: Muralidharan Karicheri <m-karicheri2@ti.com>
Date: Fri, 19 Jun 2009 09:20:16 -0300
Subject: V4L/DVB (12253): v4l: common vpss module for video drivers

This is a new module added for vpss library functions that are
used for configuring vpss system module. All video drivers will
include vpss.h header file and call functions defined in this
module to configure vpss system module.

Reviewed by: Hans Verkuil <hverkuil@xs4all.nl>
Reviewed by: Laurent Pinchart <laurent.pinchart@skynet.be>
Reviewed by: Alexey Klimov <klimov.linux@gmail.com>
Signed-off-by: Muralidharan Karicheri <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/davinci/vpss.c | 301 +++++++++++++++++++++++++++++++++++++
 include/media/davinci/vpss.h       |  69 +++++++++
 2 files changed, 370 insertions(+)
 create mode 100644 drivers/media/video/davinci/vpss.c
 create mode 100644 include/media/davinci/vpss.h

(limited to 'include')

diff --git a/drivers/media/video/davinci/vpss.c b/drivers/media/video/davinci/vpss.c
new file mode 100644
index 000000000000..6d709ca8cfb0
--- /dev/null
+++ b/drivers/media/video/davinci/vpss.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright (C) 2009 Texas Instruments.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * common vpss driver for all video drivers.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/compiler.h>
+#include <linux/io.h>
+#include <mach/hardware.h>
+#include <media/davinci/vpss.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("VPSS Driver");
+MODULE_AUTHOR("Texas Instruments");
+
+/* DM644x defines */
+#define DM644X_SBL_PCR_VPSS		(4)
+
+/* vpss BL register offsets */
+#define DM355_VPSSBL_CCDCMUX		0x1c
+/* vpss CLK register offsets */
+#define DM355_VPSSCLK_CLKCTRL		0x04
+/* masks and shifts */
+#define VPSS_HSSISEL_SHIFT		4
+
+/*
+ * vpss operations. Depends on platform. Not all functions are available
+ * on all platforms. The api, first check if a functio is available before
+ * invoking it. In the probe, the function ptrs are intialized based on
+ * vpss name. vpss name can be "dm355_vpss", "dm644x_vpss" etc.
+ */
+struct vpss_hw_ops {
+	/* enable clock */
+	int (*enable_clock)(enum vpss_clock_sel clock_sel, int en);
+	/* select input to ccdc */
+	void (*select_ccdc_source)(enum vpss_ccdc_source_sel src_sel);
+	/* clear wbl overlflow bit */
+	int (*clear_wbl_overflow)(enum vpss_wbl_sel wbl_sel);
+};
+
+/* vpss configuration */
+struct vpss_oper_config {
+	__iomem void *vpss_bl_regs_base;
+	__iomem void *vpss_regs_base;
+	struct resource		*r1;
+	resource_size_t		len1;
+	struct resource		*r2;
+	resource_size_t		len2;
+	char vpss_name[32];
+	spinlock_t vpss_lock;
+	struct vpss_hw_ops hw_ops;
+};
+
+static struct vpss_oper_config oper_cfg;
+
+/* register access routines */
+static inline u32 bl_regr(u32 offset)
+{
+	return __raw_readl(oper_cfg.vpss_bl_regs_base + offset);
+}
+
+static inline void bl_regw(u32 val, u32 offset)
+{
+	__raw_writel(val, oper_cfg.vpss_bl_regs_base + offset);
+}
+
+static inline u32 vpss_regr(u32 offset)
+{
+	return __raw_readl(oper_cfg.vpss_regs_base + offset);
+}
+
+static inline void vpss_regw(u32 val, u32 offset)
+{
+	__raw_writel(val, oper_cfg.vpss_regs_base + offset);
+}
+
+static void dm355_select_ccdc_source(enum vpss_ccdc_source_sel src_sel)
+{
+	bl_regw(src_sel << VPSS_HSSISEL_SHIFT, DM355_VPSSBL_CCDCMUX);
+}
+
+int vpss_select_ccdc_source(enum vpss_ccdc_source_sel src_sel)
+{
+	if (!oper_cfg.hw_ops.select_ccdc_source)
+		return -1;
+
+	dm355_select_ccdc_source(src_sel);
+	return 0;
+}
+EXPORT_SYMBOL(vpss_select_ccdc_source);
+
+static int dm644x_clear_wbl_overflow(enum vpss_wbl_sel wbl_sel)
+{
+	u32 mask = 1, val;
+
+	if (wbl_sel < VPSS_PCR_AEW_WBL_0 ||
+	    wbl_sel > VPSS_PCR_CCDC_WBL_O)
+		return -1;
+
+	/* writing a 0 clear the overflow */
+	mask = ~(mask << wbl_sel);
+	val = bl_regr(DM644X_SBL_PCR_VPSS) & mask;
+	bl_regw(val, DM644X_SBL_PCR_VPSS);
+	return 0;
+}
+
+int vpss_clear_wbl_overflow(enum vpss_wbl_sel wbl_sel)
+{
+	if (!oper_cfg.hw_ops.clear_wbl_overflow)
+		return -1;
+
+	return oper_cfg.hw_ops.clear_wbl_overflow(wbl_sel);
+}
+EXPORT_SYMBOL(vpss_clear_wbl_overflow);
+
+/*
+ *  dm355_enable_clock - Enable VPSS Clock
+ *  @clock_sel: CLock to be enabled/disabled
+ *  @en: enable/disable flag
+ *
+ *  This is called to enable or disable a vpss clock
+ */
+static int dm355_enable_clock(enum vpss_clock_sel clock_sel, int en)
+{
+	unsigned long flags;
+	u32 utemp, mask = 0x1, shift = 0;
+
+	switch (clock_sel) {
+	case VPSS_VPBE_CLOCK:
+		/* nothing since lsb */
+		break;
+	case VPSS_VENC_CLOCK_SEL:
+		shift = 2;
+		break;
+	case VPSS_CFALD_CLOCK:
+		shift = 3;
+		break;
+	case VPSS_H3A_CLOCK:
+		shift = 4;
+		break;
+	case VPSS_IPIPE_CLOCK:
+		shift = 5;
+		break;
+	case VPSS_CCDC_CLOCK:
+		shift = 6;
+		break;
+	default:
+		printk(KERN_ERR "dm355_enable_clock:"
+				" Invalid selector: %d\n", clock_sel);
+		return -1;
+	}
+
+	spin_lock_irqsave(&oper_cfg.vpss_lock, flags);
+	utemp = vpss_regr(DM355_VPSSCLK_CLKCTRL);
+	if (!en)
+		utemp &= ~(mask << shift);
+	else
+		utemp |= (mask << shift);
+
+	vpss_regw(utemp, DM355_VPSSCLK_CLKCTRL);
+	spin_unlock_irqrestore(&oper_cfg.vpss_lock, flags);
+	return 0;
+}
+
+int vpss_enable_clock(enum vpss_clock_sel clock_sel, int en)
+{
+	if (!oper_cfg.hw_ops.enable_clock)
+		return -1;
+
+	return oper_cfg.hw_ops.enable_clock(clock_sel, en);
+}
+EXPORT_SYMBOL(vpss_enable_clock);
+
+static int __init vpss_probe(struct platform_device *pdev)
+{
+	int status, dm355 = 0;
+
+	if (!pdev->dev.platform_data) {
+		dev_err(&pdev->dev, "no platform data\n");
+		return -ENOENT;
+	}
+	strcpy(oper_cfg.vpss_name, pdev->dev.platform_data);
+
+	if (!strcmp(oper_cfg.vpss_name, "dm355_vpss"))
+		dm355 = 1;
+	else if (strcmp(oper_cfg.vpss_name, "dm644x_vpss")) {
+		dev_err(&pdev->dev, "vpss driver not supported on"
+			" this platform\n");
+		return -ENODEV;
+	}
+
+	dev_info(&pdev->dev, "%s vpss probed\n", oper_cfg.vpss_name);
+	oper_cfg.r1 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!oper_cfg.r1)
+		return -ENOENT;
+
+	oper_cfg.len1 = oper_cfg.r1->end - oper_cfg.r1->start + 1;
+
+	oper_cfg.r1 = request_mem_region(oper_cfg.r1->start, oper_cfg.len1,
+					 oper_cfg.r1->name);
+	if (!oper_cfg.r1)
+		return -EBUSY;
+
+	oper_cfg.vpss_bl_regs_base = ioremap(oper_cfg.r1->start, oper_cfg.len1);
+	if (!oper_cfg.vpss_bl_regs_base) {
+		status = -EBUSY;
+		goto fail1;
+	}
+
+	if (dm355) {
+		oper_cfg.r2 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+		if (!oper_cfg.r2) {
+			status = -ENOENT;
+			goto fail2;
+		}
+		oper_cfg.len2 = oper_cfg.r2->end - oper_cfg.r2->start + 1;
+		oper_cfg.r2 = request_mem_region(oper_cfg.r2->start,
+						 oper_cfg.len2,
+						 oper_cfg.r2->name);
+		if (!oper_cfg.r2) {
+			status = -EBUSY;
+			goto fail2;
+		}
+
+		oper_cfg.vpss_regs_base = ioremap(oper_cfg.r2->start,
+						  oper_cfg.len2);
+		if (!oper_cfg.vpss_regs_base) {
+			status = -EBUSY;
+			goto fail3;
+		}
+	}
+
+	if (dm355) {
+		oper_cfg.hw_ops.enable_clock = dm355_enable_clock;
+		oper_cfg.hw_ops.select_ccdc_source = dm355_select_ccdc_source;
+	} else
+		oper_cfg.hw_ops.clear_wbl_overflow = dm644x_clear_wbl_overflow;
+
+	spin_lock_init(&oper_cfg.vpss_lock);
+	dev_info(&pdev->dev, "%s vpss probe success\n", oper_cfg.vpss_name);
+	return 0;
+
+fail3:
+	release_mem_region(oper_cfg.r2->start, oper_cfg.len2);
+fail2:
+	iounmap(oper_cfg.vpss_bl_regs_base);
+fail1:
+	release_mem_region(oper_cfg.r1->start, oper_cfg.len1);
+	return status;
+}
+
+static int vpss_remove(struct platform_device *pdev)
+{
+	iounmap(oper_cfg.vpss_bl_regs_base);
+	release_mem_region(oper_cfg.r1->start, oper_cfg.len1);
+	if (!strcmp(oper_cfg.vpss_name, "dm355_vpss")) {
+		iounmap(oper_cfg.vpss_regs_base);
+		release_mem_region(oper_cfg.r2->start, oper_cfg.len2);
+	}
+	return 0;
+}
+
+static struct platform_driver vpss_driver = {
+	.driver = {
+		.name	= "vpss",
+		.owner = THIS_MODULE,
+	},
+	.remove = __devexit_p(vpss_remove),
+	.probe = vpss_probe,
+};
+
+static void vpss_exit(void)
+{
+	platform_driver_unregister(&vpss_driver);
+}
+
+static int __init vpss_init(void)
+{
+	return platform_driver_register(&vpss_driver);
+}
+subsys_initcall(vpss_init);
+module_exit(vpss_exit);
diff --git a/include/media/davinci/vpss.h b/include/media/davinci/vpss.h
new file mode 100644
index 000000000000..fcdff745fae2
--- /dev/null
+++ b/include/media/davinci/vpss.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * vpss - video processing subsystem module header file.
+ *
+ * Include this header file if a driver needs to configure vpss system
+ * module. It exports a set of library functions  for video drivers to
+ * configure vpss system module functions such as clock enable/disable,
+ * vpss interrupt mux to arm, and other common vpss system module
+ * functions.
+ */
+#ifndef _VPSS_H
+#define _VPSS_H
+
+/* selector for ccdc input selection on DM355 */
+enum vpss_ccdc_source_sel {
+	VPSS_CCDCIN,
+	VPSS_HSSIIN
+};
+
+/* Used for enable/diable VPSS Clock */
+enum vpss_clock_sel {
+	/* DM355/DM365 */
+	VPSS_CCDC_CLOCK,
+	VPSS_IPIPE_CLOCK,
+	VPSS_H3A_CLOCK,
+	VPSS_CFALD_CLOCK,
+	/*
+	 * When using VPSS_VENC_CLOCK_SEL in vpss_enable_clock() api
+	 * following applies:-
+	 * en = 0 selects ENC_CLK
+	 * en = 1 selects ENC_CLK/2
+	 */
+	VPSS_VENC_CLOCK_SEL,
+	VPSS_VPBE_CLOCK,
+};
+
+/* select input to ccdc on dm355 */
+int vpss_select_ccdc_source(enum vpss_ccdc_source_sel src_sel);
+/* enable/disable a vpss clock, 0 - success, -1 - failure */
+int vpss_enable_clock(enum vpss_clock_sel clock_sel, int en);
+
+/* wbl reset for dm644x */
+enum vpss_wbl_sel {
+	VPSS_PCR_AEW_WBL_0 = 16,
+	VPSS_PCR_AF_WBL_0,
+	VPSS_PCR_RSZ4_WBL_0,
+	VPSS_PCR_RSZ3_WBL_0,
+	VPSS_PCR_RSZ2_WBL_0,
+	VPSS_PCR_RSZ1_WBL_0,
+	VPSS_PCR_PREV_WBL_0,
+	VPSS_PCR_CCDC_WBL_O,
+};
+int vpss_clear_wbl_overflow(enum vpss_wbl_sel wbl_sel);
+#endif
-- 
cgit v1.2.3


From c41debafc6e396a8e15f1f017aec7c0cf67e1b54 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:06:21 -0300
Subject: V4L/DVB (12504): soc-camera: prepare soc_camera_platform.c and its
 users for conversion

soc_camera_platform.c is only used by y SuperH ap325rxa board. This patch
converts soc_camera_platform.c and its users for the soc-camera platform-
device conversion and also extends soc-camera core to handle non-I2C cameras.

Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 arch/sh/boards/board-ap325rxa.c     | 43 +++++++++++++++++++-------
 drivers/media/video/soc_camera.c    | 61 ++++++++++++++++++++++++++++---------
 include/media/soc_camera.h          |  6 ++++
 include/media/soc_camera_platform.h |  2 ++
 4 files changed, 86 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index 327d47c25a57..7e2d2de0384d 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -310,6 +310,9 @@ static int camera_set_capture(struct soc_camera_platform_info *info,
 	return ret;
 }
 
+static int ap325rxa_camera_add(struct soc_camera_link *icl, struct device *dev);
+static void ap325rxa_camera_del(struct soc_camera_link *icl);
+
 static struct soc_camera_platform_info camera_info = {
 	.iface = 0,
 	.format_name = "UYVY",
@@ -323,6 +326,10 @@ static struct soc_camera_platform_info camera_info = {
 	.bus_param = SOCAM_PCLK_SAMPLE_RISING | SOCAM_HSYNC_ACTIVE_HIGH |
 	SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_MASTER | SOCAM_DATAWIDTH_8,
 	.set_capture = camera_set_capture,
+	.link = {
+		.add_device	= ap325rxa_camera_add,
+		.del_device	= ap325rxa_camera_del,
+	},
 };
 
 static struct platform_device camera_device = {
@@ -332,15 +339,20 @@ static struct platform_device camera_device = {
 	},
 };
 
-static int __init camera_setup(void)
+static int ap325rxa_camera_add(struct soc_camera_link *icl,
+			       struct device *dev)
 {
-	if (camera_probe() > 0)
-		platform_device_register(&camera_device);
+	if (icl != &camera_info.link || camera_probe() <= 0)
+		return -ENODEV;
 
-	return 0;
+	return platform_device_register(&camera_device);
 }
-late_initcall(camera_setup);
 
+static void ap325rxa_camera_del(struct soc_camera_link *icl)
+{
+	if (icl == &camera_info.link)
+		platform_device_unregister(&camera_device);
+}
 #endif /* CONFIG_I2C */
 
 static int ov7725_power(struct device *dev, int mode)
@@ -423,11 +435,19 @@ static struct ov772x_camera_info ov7725_info = {
 	},
 };
 
-static struct platform_device ap325rxa_camera = {
-	.name	= "soc-camera-pdrv",
-	.id	= 0,
-	.dev	= {
-		.platform_data = &ov7725_info.link,
+static struct platform_device ap325rxa_camera[] = {
+	{
+		.name	= "soc-camera-pdrv",
+		.id	= 0,
+		.dev	= {
+			.platform_data = &ov7725_info.link,
+		},
+	}, {
+		.name	= "soc-camera-pdrv",
+		.id	= 1,
+		.dev	= {
+			.platform_data = &camera_info.link,
+		},
 	},
 };
 
@@ -438,7 +458,8 @@ static struct platform_device *ap325rxa_devices[] __initdata = {
 	&ceu_device,
 	&nand_flash_device,
 	&sdcard_cn3_device,
-	&ap325rxa_camera,
+	&ap325rxa_camera[0],
+	&ap325rxa_camera[1],
 };
 
 static struct spi_board_info ap325rxa_spi_devices[] = {
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 9f5ae8167855..0340754e5406 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -1165,45 +1165,76 @@ void soc_camera_video_stop(struct soc_camera_device *icd)
 }
 EXPORT_SYMBOL(soc_camera_video_stop);
 
-static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
+#ifdef CONFIG_I2C_BOARDINFO
+static int soc_camera_init_i2c(struct platform_device *pdev,
+			       struct soc_camera_link *icl)
 {
-	struct soc_camera_link *icl = pdev->dev.platform_data;
-	struct i2c_adapter *adap;
 	struct i2c_client *client;
+	struct i2c_adapter *adap = i2c_get_adapter(icl->i2c_adapter_id);
+	int ret;
 
-	if (!icl)
-		return -EINVAL;
-
-	adap = i2c_get_adapter(icl->i2c_adapter_id);
 	if (!adap) {
-		dev_warn(&pdev->dev, "Cannot get adapter #%d. No driver?\n",
-			 icl->i2c_adapter_id);
-		/* -ENODEV and -ENXIO do not produce an error on probe()... */
-		return -ENOENT;
+		ret = -ENODEV;
+		dev_err(&pdev->dev, "Cannot get adapter #%d. No driver?\n",
+			icl->i2c_adapter_id);
+		goto ei2cga;
 	}
 
 	icl->board_info->platform_data = icl;
 	client = i2c_new_device(adap, icl->board_info);
 	if (!client) {
-		i2c_put_adapter(adap);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto ei2cnd;
 	}
 
 	platform_set_drvdata(pdev, client);
 
 	return 0;
+ei2cnd:
+	i2c_put_adapter(adap);
+ei2cga:
+	return ret;
 }
 
-static int __devexit soc_camera_pdrv_remove(struct platform_device *pdev)
+static void soc_camera_free_i2c(struct platform_device *pdev)
 {
 	struct i2c_client *client = platform_get_drvdata(pdev);
 
 	if (!client)
-		return -ENODEV;
+		return;
 
 	i2c_unregister_device(client);
 	i2c_put_adapter(client->adapter);
+}
+#else
+#define soc_camera_init_i2c(d, icl)	(-ENODEV)
+#define soc_camera_free_i2c(d)		do {} while (0)
+#endif
 
+static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
+{
+	struct soc_camera_link *icl = pdev->dev.platform_data;
+
+	if (!icl)
+		return -EINVAL;
+
+	if (icl->board_info)
+		return soc_camera_init_i2c(pdev, icl);
+	else if (!icl->add_device || !icl->del_device)
+		return -EINVAL;
+
+	/* &pdev->dev will become &icd->dev */
+	return icl->add_device(icl, &pdev->dev);
+}
+
+static int __devexit soc_camera_pdrv_remove(struct platform_device *pdev)
+{
+	struct soc_camera_link *icl = pdev->dev.platform_data;
+
+	if (icl->board_info)
+		soc_camera_free_i2c(pdev);
+	else
+		icl->del_device(icl);
 	return 0;
 }
 
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 23ecead35e7a..813e12061daa 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -102,6 +102,12 @@ struct soc_camera_link {
 	int i2c_adapter_id;
 	struct i2c_board_info *board_info;
 	const char *module_name;
+	/*
+	 * For non-I2C devices platform platform has to provide methods to
+	 * add a device to the system and to remove
+	 */
+	int (*add_device)(struct soc_camera_link *, struct device *);
+	void (*del_device)(struct soc_camera_link *);
 	/* Optional callbacks to power on or off and reset the sensor */
 	int (*power)(struct device *, int);
 	int (*reset)(struct device *);
diff --git a/include/media/soc_camera_platform.h b/include/media/soc_camera_platform.h
index 1d092b4678aa..af224deadb43 100644
--- a/include/media/soc_camera_platform.h
+++ b/include/media/soc_camera_platform.h
@@ -12,6 +12,7 @@
 #define __SOC_CAMERA_H__
 
 #include <linux/videodev2.h>
+#include <media/soc_camera.h>
 
 struct soc_camera_platform_info {
 	int iface;
@@ -21,6 +22,7 @@ struct soc_camera_platform_info {
 	unsigned long bus_param;
 	void (*power)(int);
 	int (*set_capture)(struct soc_camera_platform_info *info, int enable);
+	struct soc_camera_link link;
 };
 
 #endif /* __SOC_CAMERA_H__ */
-- 
cgit v1.2.3


From bc1937b41d8253e2b554da385023a92189d38917 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:06:22 -0300
Subject: V4L/DVB (12505): soc_camera_platform: pass device pointer from
 soc-camera core on .add_device()

Add a struct device pointer to struct soc_camera_platform_info and let the user
(ap325rxa) pass it down to soc_camera_platform.c in its .add_device() method.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 arch/sh/boards/board-ap325rxa.c     | 2 ++
 include/media/soc_camera_platform.h | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index 7e2d2de0384d..97a2fc97e108 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -345,6 +345,8 @@ static int ap325rxa_camera_add(struct soc_camera_link *icl,
 	if (icl != &camera_info.link || camera_probe() <= 0)
 		return -ENODEV;
 
+	camera_info.dev = dev;
+
 	return platform_device_register(&camera_device);
 }
 
diff --git a/include/media/soc_camera_platform.h b/include/media/soc_camera_platform.h
index af224deadb43..3e8f020abf48 100644
--- a/include/media/soc_camera_platform.h
+++ b/include/media/soc_camera_platform.h
@@ -14,6 +14,8 @@
 #include <linux/videodev2.h>
 #include <media/soc_camera.h>
 
+struct device;
+
 struct soc_camera_platform_info {
 	int iface;
 	char *format_name;
@@ -21,6 +23,7 @@ struct soc_camera_platform_info {
 	struct v4l2_pix_format format;
 	unsigned long bus_param;
 	void (*power)(int);
+	struct device *dev;
 	int (*set_capture)(struct soc_camera_platform_info *info, int enable);
 	struct soc_camera_link link;
 };
-- 
cgit v1.2.3


From 40e2e0927003424c25807b575dd40da2b8685857 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:28:22 -0300
Subject: V4L/DVB (12506): soc-camera: convert to platform device

Convert soc-camera core and all drivers to platform device API. We already
converted platforms to register a platform device for each soc-camera client,
now we remove the compatibility code and switch completely to the new scheme.
This is a preparatory step for the v4l2-subdev conversion.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c              | 114 ++++----
 drivers/media/video/mt9m111.c              | 154 ++++++-----
 drivers/media/video/mt9t031.c              | 116 ++++----
 drivers/media/video/mt9v022.c              | 119 ++++----
 drivers/media/video/mx3_camera.c           |  27 +-
 drivers/media/video/ov772x.c               | 157 ++++++-----
 drivers/media/video/pxa_camera.c           |  29 +-
 drivers/media/video/sh_mobile_ceu_camera.c |  13 +-
 drivers/media/video/soc_camera.c           | 431 ++++++++++++++---------------
 drivers/media/video/soc_camera_platform.c  |  76 ++---
 drivers/media/video/tw9910.c               | 110 ++++----
 include/media/soc_camera.h                 |  27 +-
 include/media/soc_camera_platform.h        |   3 +-
 13 files changed, 699 insertions(+), 677 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 4d794b42d6cd..1e4f269fc08b 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -69,8 +69,6 @@ static const struct soc_camera_data_format mt9m001_monochrome_formats[] = {
 };
 
 struct mt9m001 {
-	struct i2c_client *client;
-	struct soc_camera_device icd;
 	int model;	/* V4L2_IDENT_MT9M001* codes from v4l2-chip-ident.h */
 	unsigned char autoexposure;
 };
@@ -111,11 +109,11 @@ static int reg_clear(struct i2c_client *client, const u8 reg,
 
 static int mt9m001_init(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
-	dev_dbg(icd->vdev->parent, "%s\n", __func__);
+	dev_dbg(&icd->dev, "%s\n", __func__);
 
 	if (icl->power) {
 		ret = icl->power(&client->dev, 1);
@@ -147,8 +145,8 @@ static int mt9m001_init(struct soc_camera_device *icd)
 
 static int mt9m001_release(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
 	/* Disable the chip */
 	reg_write(client, MT9M001_OUTPUT_CONTROL, 0);
@@ -161,7 +159,7 @@ static int mt9m001_release(struct soc_camera_device *icd)
 
 static int mt9m001_start_capture(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	/* Switch to master "normal" mode */
 	if (reg_write(client, MT9M001_OUTPUT_CONTROL, 2) < 0)
@@ -171,7 +169,7 @@ static int mt9m001_start_capture(struct soc_camera_device *icd)
 
 static int mt9m001_stop_capture(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	/* Stop sensor readout */
 	if (reg_write(client, MT9M001_OUTPUT_CONTROL, 0) < 0)
@@ -182,8 +180,7 @@ static int mt9m001_stop_capture(struct soc_camera_device *icd)
 static int mt9m001_set_bus_param(struct soc_camera_device *icd,
 				 unsigned long flags)
 {
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
-	struct soc_camera_link *icl = mt9m001->client->dev.platform_data;
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned long width_flag = flags & SOCAM_DATAWIDTH_MASK;
 
 	/* Only one width bit may be set */
@@ -205,8 +202,7 @@ static int mt9m001_set_bus_param(struct soc_camera_device *icd,
 
 static unsigned long mt9m001_query_bus_param(struct soc_camera_device *icd)
 {
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
-	struct soc_camera_link *icl = mt9m001->client->dev.platform_data;
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	/* MT9M001 has all capture_format parameters fixed */
 	unsigned long flags = SOCAM_PCLK_SAMPLE_FALLING |
 		SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_HIGH |
@@ -223,8 +219,8 @@ static unsigned long mt9m001_query_bus_param(struct soc_camera_device *icd)
 static int mt9m001_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
 	int ret;
 	const u16 hblank = 9, vblank = 25;
 
@@ -290,12 +286,13 @@ static int mt9m001_try_fmt(struct soc_camera_device *icd,
 static int mt9m001_get_chip_id(struct soc_camera_device *icd,
 			       struct v4l2_dbg_chip_ident *id)
 {
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match.addr != mt9m001->client->addr)
+	if (id->match.addr != client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9m001->model;
@@ -308,7 +305,7 @@ static int mt9m001_get_chip_id(struct soc_camera_device *icd,
 static int mt9m001_get_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -328,7 +325,7 @@ static int mt9m001_get_register(struct soc_camera_device *icd,
 static int mt9m001_set_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -381,15 +378,11 @@ static const struct v4l2_queryctrl mt9m001_controls[] = {
 	}
 };
 
-static int mt9m001_video_probe(struct soc_camera_device *);
-static void mt9m001_video_remove(struct soc_camera_device *);
 static int mt9m001_get_control(struct soc_camera_device *, struct v4l2_control *);
 static int mt9m001_set_control(struct soc_camera_device *, struct v4l2_control *);
 
 static struct soc_camera_ops mt9m001_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= mt9m001_video_probe,
-	.remove			= mt9m001_video_remove,
 	.init			= mt9m001_init,
 	.release		= mt9m001_release,
 	.start_capture		= mt9m001_start_capture,
@@ -412,8 +405,8 @@ static struct soc_camera_ops mt9m001_ops = {
 
 static int mt9m001_get_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
 	int data;
 
 	switch (ctrl->id) {
@@ -432,8 +425,8 @@ static int mt9m001_get_control(struct soc_camera_device *icd, struct v4l2_contro
 
 static int mt9m001_set_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
 	const struct v4l2_queryctrl *qctrl;
 	int data;
 
@@ -525,11 +518,11 @@ static int mt9m001_set_control(struct soc_camera_device *icd, struct v4l2_contro
 
 /* Interface active, can use i2c. If it fails, it can indeed mean, that
  * this wasn't our capture interface, so, we wait for the right one */
-static int mt9m001_video_probe(struct soc_camera_device *icd)
+static int mt9m001_video_probe(struct soc_camera_device *icd,
+			       struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	s32 data;
 	int ret;
 	unsigned long flags;
@@ -540,6 +533,11 @@ static int mt9m001_video_probe(struct soc_camera_device *icd)
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
+	/* Switch master clock on */
+	ret = soc_camera_video_start(icd, &client->dev);
+	if (ret)
+		return ret;
+
 	/* Enable the chip */
 	data = reg_write(client, MT9M001_CHIP_ENABLE, 1);
 	dev_dbg(&icd->dev, "write: %d\n", data);
@@ -547,6 +545,8 @@ static int mt9m001_video_probe(struct soc_camera_device *icd)
 	/* Read out the chip version register */
 	data = reg_read(client, MT9M001_CHIP_VERSION);
 
+	soc_camera_video_stop(icd);
+
 	/* must be 0x8411 or 0x8421 for colour sensor and 8431 for bw */
 	switch (data) {
 	case 0x8411:
@@ -559,10 +559,9 @@ static int mt9m001_video_probe(struct soc_camera_device *icd)
 		icd->formats = mt9m001_monochrome_formats;
 		break;
 	default:
-		ret = -ENODEV;
 		dev_err(&icd->dev,
 			"No MT9M001 chip detected, register read %x\n", data);
-		goto ei2c;
+		return -ENODEV;
 	}
 
 	icd->num_formats = 0;
@@ -588,26 +587,16 @@ static int mt9m001_video_probe(struct soc_camera_device *icd)
 	dev_info(&icd->dev, "Detected a MT9M001 chip ID %x (%s)\n", data,
 		 data == 0x8431 ? "C12STM" : "C12ST");
 
-	/* Now that we know the model, we can start video */
-	ret = soc_camera_video_start(icd);
-	if (ret)
-		goto eisis;
-
 	return 0;
-
-eisis:
-ei2c:
-	return ret;
 }
 
 static void mt9m001_video_remove(struct soc_camera_device *icd)
 {
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
-	struct soc_camera_link *icl = mt9m001->client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", mt9m001->client->addr,
+	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", client->addr,
 		icd->dev.parent, icd->vdev);
-	soc_camera_video_stop(icd);
 	if (icl->free_bus)
 		icl->free_bus(icl);
 }
@@ -616,11 +605,17 @@ static int mt9m001_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
 	struct mt9m001 *mt9m001;
-	struct soc_camera_device *icd;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct soc_camera_link *icl;
 	int ret;
 
+	if (!icd) {
+		dev_err(&client->dev, "MT9M001: missing soc-camera data!\n");
+		return -EINVAL;
+	}
+
+	icl = to_soc_camera_link(icd);
 	if (!icl) {
 		dev_err(&client->dev, "MT9M001 driver needs platform data\n");
 		return -EINVAL;
@@ -636,13 +631,10 @@ static int mt9m001_probe(struct i2c_client *client,
 	if (!mt9m001)
 		return -ENOMEM;
 
-	mt9m001->client = client;
 	i2c_set_clientdata(client, mt9m001);
 
 	/* Second stage probe - when a capture adapter is there */
-	icd = &mt9m001->icd;
 	icd->ops	= &mt9m001_ops;
-	icd->control	= &client->dev;
 	icd->x_min	= 20;
 	icd->y_min	= 12;
 	icd->x_current	= 20;
@@ -652,27 +644,29 @@ static int mt9m001_probe(struct i2c_client *client,
 	icd->height_min	= 32;
 	icd->height_max	= 1024;
 	icd->y_skip_top	= 1;
-	icd->iface	= icl->bus_id;
 	/* Simulated autoexposure. If enabled, we calculate shutter width
 	 * ourselves in the driver based on vertical blanking and frame width */
 	mt9m001->autoexposure = 1;
 
-	ret = soc_camera_device_register(icd);
-	if (ret)
-		goto eisdr;
-
-	return 0;
+	ret = mt9m001_video_probe(icd, client);
+	if (ret) {
+		icd->ops = NULL;
+		i2c_set_clientdata(client, NULL);
+		kfree(mt9m001);
+	}
 
-eisdr:
-	kfree(mt9m001);
 	return ret;
 }
 
 static int mt9m001_remove(struct i2c_client *client)
 {
 	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 
-	soc_camera_device_unregister(&mt9m001->icd);
+	icd->ops = NULL;
+	mt9m001_video_remove(icd);
+	i2c_set_clientdata(client, NULL);
+	client->driver = NULL;
 	kfree(mt9m001);
 
 	return 0;
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index fc5e2de03766..95c2f089605f 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -148,8 +148,6 @@ enum mt9m111_context {
 };
 
 struct mt9m111 {
-	struct i2c_client *client;
-	struct soc_camera_device icd;
 	int model;	/* V4L2_IDENT_MT9M11x* codes from v4l2-chip-ident.h */
 	enum mt9m111_context context;
 	struct v4l2_rect rect;
@@ -203,7 +201,7 @@ static int mt9m111_reg_write(struct i2c_client *client, const u16 reg,
 
 	ret = reg_page_map_set(client, reg);
 	if (!ret)
-		ret = i2c_smbus_write_word_data(client, (reg & 0xff),
+		ret = i2c_smbus_write_word_data(client, reg & 0xff,
 						swab16(data));
 	dev_dbg(&client->dev, "write reg.%03x = %04x -> %d\n", reg, data, ret);
 	return ret;
@@ -232,7 +230,7 @@ static int mt9m111_reg_clear(struct i2c_client *client, const u16 reg,
 static int mt9m111_set_context(struct soc_camera_device *icd,
 			       enum mt9m111_context ctxt)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int valB = MT9M111_CTXT_CTRL_RESTART | MT9M111_CTXT_CTRL_DEFECTCOR_B
 		| MT9M111_CTXT_CTRL_RESIZE_B | MT9M111_CTXT_CTRL_CTRL2_B
 		| MT9M111_CTXT_CTRL_GAMMA_B | MT9M111_CTXT_CTRL_READ_MODE_B
@@ -249,8 +247,8 @@ static int mt9m111_set_context(struct soc_camera_device *icd,
 static int mt9m111_setup_rect(struct soc_camera_device *icd,
 			      struct v4l2_rect *rect)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret, is_raw_format;
 	int width = rect->width;
 	int height = rect->height;
@@ -294,7 +292,7 @@ static int mt9m111_setup_rect(struct soc_camera_device *icd,
 
 static int mt9m111_setup_pixfmt(struct soc_camera_device *icd, u16 outfmt)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int ret;
 
 	ret = reg_write(OUTPUT_FORMAT_CTRL2_A, outfmt);
@@ -315,7 +313,8 @@ static int mt9m111_setfmt_bayer10(struct soc_camera_device *icd)
 
 static int mt9m111_setfmt_rgb565(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int val = 0;
 
 	if (mt9m111->swap_rgb_red_blue)
@@ -329,7 +328,8 @@ static int mt9m111_setfmt_rgb565(struct soc_camera_device *icd)
 
 static int mt9m111_setfmt_rgb555(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int val = 0;
 
 	if (mt9m111->swap_rgb_red_blue)
@@ -343,7 +343,8 @@ static int mt9m111_setfmt_rgb555(struct soc_camera_device *icd)
 
 static int mt9m111_setfmt_yuv(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int val = 0;
 
 	if (mt9m111->swap_yuv_cb_cr)
@@ -356,9 +357,9 @@ static int mt9m111_setfmt_yuv(struct soc_camera_device *icd)
 
 static int mt9m111_enable(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	if (icl->power) {
@@ -378,9 +379,9 @@ static int mt9m111_enable(struct soc_camera_device *icd)
 
 static int mt9m111_disable(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	ret = reg_clear(RESET, MT9M111_RESET_CHIP_ENABLE);
@@ -395,8 +396,8 @@ static int mt9m111_disable(struct soc_camera_device *icd)
 
 static int mt9m111_reset(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
 	ret = reg_set(RESET, MT9M111_RESET_RESET_MODE);
@@ -424,8 +425,7 @@ static int mt9m111_stop_capture(struct soc_camera_device *icd)
 
 static unsigned long mt9m111_query_bus_param(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
-	struct soc_camera_link *icl = mt9m111->client->dev.platform_data;
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned long flags = SOCAM_MASTER | SOCAM_PCLK_SAMPLE_RISING |
 		SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_HIGH |
 		SOCAM_DATA_ACTIVE_HIGH | SOCAM_DATAWIDTH_8;
@@ -441,7 +441,8 @@ static int mt9m111_set_bus_param(struct soc_camera_device *icd, unsigned long f)
 static int mt9m111_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	dev_dbg(&icd->dev, "%s left=%d, top=%d, width=%d, height=%d\n",
@@ -456,7 +457,8 @@ static int mt9m111_set_crop(struct soc_camera_device *icd,
 
 static int mt9m111_set_pixfmt(struct soc_camera_device *icd, u32 pixfmt)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	switch (pixfmt) {
@@ -506,7 +508,8 @@ static int mt9m111_set_pixfmt(struct soc_camera_device *icd, u32 pixfmt)
 static int mt9m111_set_fmt(struct soc_camera_device *icd,
 			   struct v4l2_format *f)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
 		.left	= mt9m111->rect.left,
@@ -544,12 +547,13 @@ static int mt9m111_try_fmt(struct soc_camera_device *icd,
 static int mt9m111_get_chip_id(struct soc_camera_device *icd,
 			       struct v4l2_dbg_chip_ident *id)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match.addr != mt9m111->client->addr)
+	if (id->match.addr != client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9m111->model;
@@ -562,8 +566,8 @@ static int mt9m111_get_chip_id(struct soc_camera_device *icd,
 static int mt9m111_get_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int val;
-	struct i2c_client *client = to_i2c_client(icd->control);
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
 		return -EINVAL;
@@ -583,7 +587,7 @@ static int mt9m111_get_register(struct soc_camera_device *icd,
 static int mt9m111_set_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
 		return -EINVAL;
@@ -635,8 +639,6 @@ static const struct v4l2_queryctrl mt9m111_controls[] = {
 	}
 };
 
-static int mt9m111_video_probe(struct soc_camera_device *);
-static void mt9m111_video_remove(struct soc_camera_device *);
 static int mt9m111_get_control(struct soc_camera_device *,
 			       struct v4l2_control *);
 static int mt9m111_set_control(struct soc_camera_device *,
@@ -647,8 +649,6 @@ static int mt9m111_release(struct soc_camera_device *icd);
 
 static struct soc_camera_ops mt9m111_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= mt9m111_video_probe,
-	.remove			= mt9m111_video_remove,
 	.init			= mt9m111_init,
 	.resume			= mt9m111_resume,
 	.release		= mt9m111_release,
@@ -672,8 +672,8 @@ static struct soc_camera_ops mt9m111_ops = {
 
 static int mt9m111_set_flip(struct soc_camera_device *icd, int flip, int mask)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	if (mt9m111->context == HIGHPOWER) {
@@ -693,7 +693,7 @@ static int mt9m111_set_flip(struct soc_camera_device *icd, int flip, int mask)
 
 static int mt9m111_get_global_gain(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int data;
 
 	data = reg_read(GLOBAL_GAIN);
@@ -705,7 +705,7 @@ static int mt9m111_get_global_gain(struct soc_camera_device *icd)
 
 static int mt9m111_set_global_gain(struct soc_camera_device *icd, int gain)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	u16 val;
 
 	if (gain > 63 * 2 * 2)
@@ -724,8 +724,8 @@ static int mt9m111_set_global_gain(struct soc_camera_device *icd, int gain)
 
 static int mt9m111_set_autoexposure(struct soc_camera_device *icd, int on)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	if (on)
@@ -741,8 +741,8 @@ static int mt9m111_set_autoexposure(struct soc_camera_device *icd, int on)
 
 static int mt9m111_set_autowhitebalance(struct soc_camera_device *icd, int on)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	if (on)
@@ -759,8 +759,8 @@ static int mt9m111_set_autowhitebalance(struct soc_camera_device *icd, int on)
 static int mt9m111_get_control(struct soc_camera_device *icd,
 			       struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int data;
 
 	switch (ctrl->id) {
@@ -803,7 +803,8 @@ static int mt9m111_get_control(struct soc_camera_device *icd,
 static int mt9m111_set_control(struct soc_camera_device *icd,
 			       struct v4l2_control *ctrl)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	const struct v4l2_queryctrl *qctrl;
 	int ret;
 
@@ -841,7 +842,8 @@ static int mt9m111_set_control(struct soc_camera_device *icd,
 
 static int mt9m111_restore_state(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 
 	mt9m111_set_context(icd, mt9m111->context);
 	mt9m111_set_pixfmt(icd, mt9m111->pixfmt);
@@ -856,7 +858,8 @@ static int mt9m111_restore_state(struct soc_camera_device *icd)
 
 static int mt9m111_resume(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret = 0;
 
 	if (mt9m111->powered) {
@@ -871,7 +874,8 @@ static int mt9m111_resume(struct soc_camera_device *icd)
 
 static int mt9m111_init(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	mt9m111->context = HIGHPOWER;
@@ -902,10 +906,10 @@ static int mt9m111_release(struct soc_camera_device *icd)
  * Interface active, can use i2c. If it fails, it can indeed mean, that
  * this wasn't our capture interface, so, we wait for the right one
  */
-static int mt9m111_video_probe(struct soc_camera_device *icd)
+static int mt9m111_video_probe(struct soc_camera_device *icd,
+			       struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	s32 data;
 	int ret;
 
@@ -917,6 +921,11 @@ static int mt9m111_video_probe(struct soc_camera_device *icd)
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
+	/* Switch master clock on */
+	ret = soc_camera_video_start(icd, &client->dev);
+	if (ret)
+		goto evstart;
+
 	ret = mt9m111_enable(icd);
 	if (ret)
 		goto ei2c;
@@ -945,40 +954,33 @@ static int mt9m111_video_probe(struct soc_camera_device *icd)
 
 	dev_info(&icd->dev, "Detected a MT9M11x chip ID %x\n", data);
 
-	ret = soc_camera_video_start(icd);
-	if (ret)
-		goto eisis;
-
 	mt9m111->autoexposure = 1;
 	mt9m111->autowhitebalance = 1;
 
 	mt9m111->swap_rgb_even_odd = 1;
 	mt9m111->swap_rgb_red_blue = 1;
 
-	return 0;
-eisis:
 ei2c:
+	soc_camera_video_stop(icd);
+evstart:
 	return ret;
 }
 
-static void mt9m111_video_remove(struct soc_camera_device *icd)
-{
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
-
-	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", mt9m111->client->addr,
-		mt9m111->icd.dev.parent, mt9m111->icd.vdev);
-	soc_camera_video_stop(&mt9m111->icd);
-}
-
 static int mt9m111_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
 	struct mt9m111 *mt9m111;
-	struct soc_camera_device *icd;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct soc_camera_link *icl;
 	int ret;
 
+	if (!icd) {
+		dev_err(&client->dev, "MT9M11x: missing soc-camera data!\n");
+		return -EINVAL;
+	}
+
+	icl = to_soc_camera_link(icd);
 	if (!icl) {
 		dev_err(&client->dev, "MT9M11x driver needs platform data\n");
 		return -EINVAL;
@@ -994,13 +996,10 @@ static int mt9m111_probe(struct i2c_client *client,
 	if (!mt9m111)
 		return -ENOMEM;
 
-	mt9m111->client = client;
 	i2c_set_clientdata(client, mt9m111);
 
 	/* Second stage probe - when a capture adapter is there */
-	icd 		= &mt9m111->icd;
 	icd->ops	= &mt9m111_ops;
-	icd->control	= &client->dev;
 	icd->x_min	= MT9M111_MIN_DARK_COLS;
 	icd->y_min	= MT9M111_MIN_DARK_ROWS;
 	icd->x_current	= icd->x_min;
@@ -1010,22 +1009,25 @@ static int mt9m111_probe(struct i2c_client *client,
 	icd->height_min	= MT9M111_MIN_DARK_COLS;
 	icd->height_max	= MT9M111_MAX_HEIGHT;
 	icd->y_skip_top	= 0;
-	icd->iface	= icl->bus_id;
 
-	ret = soc_camera_device_register(icd);
-	if (ret)
-		goto eisdr;
-	return 0;
+	ret = mt9m111_video_probe(icd, client);
+	if (ret) {
+		icd->ops = NULL;
+		i2c_set_clientdata(client, NULL);
+		kfree(mt9m111);
+	}
 
-eisdr:
-	kfree(mt9m111);
 	return ret;
 }
 
 static int mt9m111_remove(struct i2c_client *client)
 {
 	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
-	soc_camera_device_unregister(&mt9m111->icd);
+	struct soc_camera_device *icd = client->dev.platform_data;
+
+	icd->ops = NULL;
+	i2c_set_clientdata(client, NULL);
+	client->driver = NULL;
 	kfree(mt9m111);
 
 	return 0;
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index 4207fb342670..d9c7c2fd698a 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -68,8 +68,6 @@ static const struct soc_camera_data_format mt9t031_colour_formats[] = {
 };
 
 struct mt9t031 {
-	struct i2c_client *client;
-	struct soc_camera_device icd;
 	int model;	/* V4L2_IDENT_MT9T031* codes from v4l2-chip-ident.h */
 	unsigned char autoexposure;
 	u16 xskip;
@@ -138,8 +136,8 @@ static int get_shutter(struct i2c_client *client, u32 *data)
 
 static int mt9t031_init(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
 	if (icl->power) {
@@ -166,8 +164,8 @@ static int mt9t031_init(struct soc_camera_device *icd)
 
 static int mt9t031_release(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
 	/* Disable the chip */
 	reg_clear(client, MT9T031_OUTPUT_CONTROL, 2);
@@ -180,7 +178,7 @@ static int mt9t031_release(struct soc_camera_device *icd)
 
 static int mt9t031_start_capture(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	/* Switch to master "normal" mode */
 	if (reg_set(client, MT9T031_OUTPUT_CONTROL, 2) < 0)
@@ -190,7 +188,7 @@ static int mt9t031_start_capture(struct soc_camera_device *icd)
 
 static int mt9t031_stop_capture(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	/* Stop sensor readout */
 	if (reg_clear(client, MT9T031_OUTPUT_CONTROL, 2) < 0)
@@ -201,7 +199,7 @@ static int mt9t031_stop_capture(struct soc_camera_device *icd)
 static int mt9t031_set_bus_param(struct soc_camera_device *icd,
 				 unsigned long flags)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	/* The caller should have queried our parameters, check anyway */
 	if (flags & ~MT9T031_BUS_PARAM)
@@ -217,8 +215,7 @@ static int mt9t031_set_bus_param(struct soc_camera_device *icd,
 
 static unsigned long mt9t031_query_bus_param(struct soc_camera_device *icd)
 {
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
-	struct soc_camera_link *icl = mt9t031->client->dev.platform_data;
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
 	return soc_camera_apply_sensor_flags(icl, MT9T031_BUS_PARAM);
 }
@@ -238,8 +235,8 @@ static void recalculate_limits(struct soc_camera_device *icd,
 static int mt9t031_set_params(struct soc_camera_device *icd,
 			      struct v4l2_rect *rect, u16 xskip, u16 yskip)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 	int ret;
 	u16 xbin, ybin, width, height, left, top;
 	const u16 hblank = MT9T031_HORIZONTAL_BLANK,
@@ -336,7 +333,8 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 static int mt9t031_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 
 	/* CROP - no change in scaling, or in limits */
 	return mt9t031_set_params(icd, rect, mt9t031->xskip, mt9t031->yskip);
@@ -345,7 +343,8 @@ static int mt9t031_set_crop(struct soc_camera_device *icd,
 static int mt9t031_set_fmt(struct soc_camera_device *icd,
 			   struct v4l2_format *f)
 {
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 	int ret;
 	u16 xskip, yskip;
 	struct v4l2_rect rect = {
@@ -395,12 +394,13 @@ static int mt9t031_try_fmt(struct soc_camera_device *icd,
 static int mt9t031_get_chip_id(struct soc_camera_device *icd,
 			       struct v4l2_dbg_chip_ident *id)
 {
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match.addr != mt9t031->client->addr)
+	if (id->match.addr != client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9t031->model;
@@ -413,7 +413,7 @@ static int mt9t031_get_chip_id(struct soc_camera_device *icd,
 static int mt9t031_get_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -432,7 +432,7 @@ static int mt9t031_get_register(struct soc_camera_device *icd,
 static int mt9t031_set_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -493,15 +493,11 @@ static const struct v4l2_queryctrl mt9t031_controls[] = {
 	}
 };
 
-static int mt9t031_video_probe(struct soc_camera_device *);
-static void mt9t031_video_remove(struct soc_camera_device *);
 static int mt9t031_get_control(struct soc_camera_device *, struct v4l2_control *);
 static int mt9t031_set_control(struct soc_camera_device *, struct v4l2_control *);
 
 static struct soc_camera_ops mt9t031_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= mt9t031_video_probe,
-	.remove			= mt9t031_video_remove,
 	.init			= mt9t031_init,
 	.release		= mt9t031_release,
 	.start_capture		= mt9t031_start_capture,
@@ -524,8 +520,8 @@ static struct soc_camera_ops mt9t031_ops = {
 
 static int mt9t031_get_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 	int data;
 
 	switch (ctrl->id) {
@@ -550,8 +546,8 @@ static int mt9t031_get_control(struct soc_camera_device *icd, struct v4l2_contro
 
 static int mt9t031_set_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 	const struct v4l2_queryctrl *qctrl;
 	int data;
 
@@ -657,10 +653,10 @@ static int mt9t031_set_control(struct soc_camera_device *icd, struct v4l2_contro
 
 /* Interface active, can use i2c. If it fails, it can indeed mean, that
  * this wasn't our capture interface, so, we wait for the right one */
-static int mt9t031_video_probe(struct soc_camera_device *icd)
+static int mt9t031_video_probe(struct soc_camera_device *icd,
+			       struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 	s32 data;
 	int ret;
 
@@ -670,6 +666,11 @@ static int mt9t031_video_probe(struct soc_camera_device *icd)
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
+	/* Switch master clock on */
+	ret = soc_camera_video_start(icd, &client->dev);
+	if (ret)
+		return ret;
+
 	/* Enable the chip */
 	data = reg_write(client, MT9T031_CHIP_ENABLE, 1);
 	dev_dbg(&icd->dev, "write: %d\n", data);
@@ -677,6 +678,8 @@ static int mt9t031_video_probe(struct soc_camera_device *icd)
 	/* Read out the chip version register */
 	data = reg_read(client, MT9T031_CHIP_VERSION);
 
+	soc_camera_video_stop(icd);
+
 	switch (data) {
 	case 0x1621:
 		mt9t031->model = V4L2_IDENT_MT9T031;
@@ -684,44 +687,31 @@ static int mt9t031_video_probe(struct soc_camera_device *icd)
 		icd->num_formats = ARRAY_SIZE(mt9t031_colour_formats);
 		break;
 	default:
-		ret = -ENODEV;
 		dev_err(&icd->dev,
 			"No MT9T031 chip detected, register read %x\n", data);
-		goto ei2c;
+		return -ENODEV;
 	}
 
 	dev_info(&icd->dev, "Detected a MT9T031 chip ID %x\n", data);
 
-	/* Now that we know the model, we can start video */
-	ret = soc_camera_video_start(icd);
-	if (ret)
-		goto evstart;
-
 	return 0;
-
-evstart:
-ei2c:
-	return ret;
-}
-
-static void mt9t031_video_remove(struct soc_camera_device *icd)
-{
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
-
-	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", mt9t031->client->addr,
-		icd->dev.parent, icd->vdev);
-	soc_camera_video_stop(icd);
 }
 
 static int mt9t031_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
 	struct mt9t031 *mt9t031;
-	struct soc_camera_device *icd;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct soc_camera_link *icl;
 	int ret;
 
+	if (!icd) {
+		dev_err(&client->dev, "MT9T031: missing soc-camera data!\n");
+		return -EINVAL;
+	}
+
+	icl = to_soc_camera_link(icd);
 	if (!icl) {
 		dev_err(&client->dev, "MT9T031 driver needs platform data\n");
 		return -EINVAL;
@@ -737,13 +727,10 @@ static int mt9t031_probe(struct i2c_client *client,
 	if (!mt9t031)
 		return -ENOMEM;
 
-	mt9t031->client = client;
 	i2c_set_clientdata(client, mt9t031);
 
 	/* Second stage probe - when a capture adapter is there */
-	icd = &mt9t031->icd;
 	icd->ops	= &mt9t031_ops;
-	icd->control	= &client->dev;
 	icd->x_min	= MT9T031_COLUMN_SKIP;
 	icd->y_min	= MT9T031_ROW_SKIP;
 	icd->x_current	= icd->x_min;
@@ -753,7 +740,6 @@ static int mt9t031_probe(struct i2c_client *client,
 	icd->height_min	= MT9T031_MIN_HEIGHT;
 	icd->height_max	= MT9T031_MAX_HEIGHT;
 	icd->y_skip_top	= 0;
-	icd->iface	= icl->bus_id;
 	/* Simulated autoexposure. If enabled, we calculate shutter width
 	 * ourselves in the driver based on vertical blanking and frame width */
 	mt9t031->autoexposure = 1;
@@ -761,24 +747,24 @@ static int mt9t031_probe(struct i2c_client *client,
 	mt9t031->xskip = 1;
 	mt9t031->yskip = 1;
 
-	ret = soc_camera_device_register(icd);
-	if (ret)
-		goto eisdr;
-
-	return 0;
+	ret = mt9t031_video_probe(icd, client);
+	if (ret) {
+		icd->ops = NULL;
+		i2c_set_clientdata(client, NULL);
+		kfree(mt9t031);
+	}
 
-eisdr:
-	i2c_set_clientdata(client, NULL);
-	kfree(mt9t031);
 	return ret;
 }
 
 static int mt9t031_remove(struct i2c_client *client)
 {
 	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 
-	soc_camera_device_unregister(&mt9t031->icd);
+	icd->ops = NULL;
 	i2c_set_clientdata(client, NULL);
+	client->driver = NULL;
 	kfree(mt9t031);
 
 	return 0;
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index dbdcc86ae50d..959cc299f1ae 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -85,8 +85,6 @@ static const struct soc_camera_data_format mt9v022_monochrome_formats[] = {
 };
 
 struct mt9v022 {
-	struct i2c_client *client;
-	struct soc_camera_device icd;
 	int model;	/* V4L2_IDENT_MT9V022* codes from v4l2-chip-ident.h */
 	u16 chip_control;
 };
@@ -127,9 +125,9 @@ static int reg_clear(struct i2c_client *client, const u8 reg,
 
 static int mt9v022_init(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
 	int ret;
 
 	if (icl->power) {
@@ -173,19 +171,19 @@ static int mt9v022_init(struct soc_camera_device *icd)
 
 static int mt9v022_release(struct soc_camera_device *icd)
 {
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
-	struct soc_camera_link *icl = mt9v022->client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
 	if (icl->power)
-		icl->power(&mt9v022->client->dev, 0);
+		icl->power(&client->dev, 0);
 
 	return 0;
 }
 
 static int mt9v022_start_capture(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
 	/* Switch to master "normal" mode */
 	mt9v022->chip_control &= ~0x10;
 	if (reg_write(client, MT9V022_CHIP_CONTROL,
@@ -196,8 +194,8 @@ static int mt9v022_start_capture(struct soc_camera_device *icd)
 
 static int mt9v022_stop_capture(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
 	/* Switch to snapshot mode */
 	mt9v022->chip_control |= 0x10;
 	if (reg_write(client, MT9V022_CHIP_CONTROL,
@@ -209,9 +207,9 @@ static int mt9v022_stop_capture(struct soc_camera_device *icd)
 static int mt9v022_set_bus_param(struct soc_camera_device *icd,
 				 unsigned long flags)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned int width_flag = flags & SOCAM_DATAWIDTH_MASK;
 	int ret;
 	u16 pixclk = 0;
@@ -263,8 +261,7 @@ static int mt9v022_set_bus_param(struct soc_camera_device *icd,
 
 static unsigned long mt9v022_query_bus_param(struct soc_camera_device *icd)
 {
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
-	struct soc_camera_link *icl = mt9v022->client->dev.platform_data;
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned int width_flag;
 
 	if (icl->query_bus_param)
@@ -283,7 +280,7 @@ static unsigned long mt9v022_query_bus_param(struct soc_camera_device *icd)
 static int mt9v022_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int ret;
 
 	/* Like in example app. Contradicts the datasheet though */
@@ -326,7 +323,8 @@ static int mt9v022_set_crop(struct soc_camera_device *icd,
 static int mt9v022_set_fmt(struct soc_camera_device *icd,
 			   struct v4l2_format *f)
 {
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
 		.left	= icd->x_current,
@@ -374,12 +372,13 @@ static int mt9v022_try_fmt(struct soc_camera_device *icd,
 static int mt9v022_get_chip_id(struct soc_camera_device *icd,
 			       struct v4l2_dbg_chip_ident *id)
 {
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match.addr != mt9v022->client->addr)
+	if (id->match.addr != client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9v022->model;
@@ -392,7 +391,7 @@ static int mt9v022_get_chip_id(struct soc_camera_device *icd,
 static int mt9v022_get_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -412,7 +411,7 @@ static int mt9v022_get_register(struct soc_camera_device *icd,
 static int mt9v022_set_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -481,15 +480,11 @@ static const struct v4l2_queryctrl mt9v022_controls[] = {
 	}
 };
 
-static int mt9v022_video_probe(struct soc_camera_device *);
-static void mt9v022_video_remove(struct soc_camera_device *);
 static int mt9v022_get_control(struct soc_camera_device *, struct v4l2_control *);
 static int mt9v022_set_control(struct soc_camera_device *, struct v4l2_control *);
 
 static struct soc_camera_ops mt9v022_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= mt9v022_video_probe,
-	.remove			= mt9v022_video_remove,
 	.init			= mt9v022_init,
 	.release		= mt9v022_release,
 	.start_capture		= mt9v022_start_capture,
@@ -513,7 +508,7 @@ static struct soc_camera_ops mt9v022_ops = {
 static int mt9v022_get_control(struct soc_camera_device *icd,
 			       struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int data;
 
 	switch (ctrl->id) {
@@ -549,7 +544,7 @@ static int mt9v022_set_control(struct soc_camera_device *icd,
 			       struct v4l2_control *ctrl)
 {
 	int data;
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	const struct v4l2_queryctrl *qctrl;
 
 	qctrl = soc_camera_find_qctrl(&mt9v022_ops, ctrl->id);
@@ -646,11 +641,11 @@ static int mt9v022_set_control(struct soc_camera_device *icd,
 
 /* Interface active, can use i2c. If it fails, it can indeed mean, that
  * this wasn't our capture interface, so, we wait for the right one */
-static int mt9v022_video_probe(struct soc_camera_device *icd)
+static int mt9v022_video_probe(struct soc_camera_device *icd,
+			       struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	s32 data;
 	int ret;
 	unsigned long flags;
@@ -659,6 +654,11 @@ static int mt9v022_video_probe(struct soc_camera_device *icd)
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
+	/* Switch master clock on */
+	ret = soc_camera_video_start(icd, &client->dev);
+	if (ret)
+		return ret;
+
 	/* Read out the chip version register */
 	data = reg_read(client, MT9V022_CHIP_VERSION);
 
@@ -678,6 +678,8 @@ static int mt9v022_video_probe(struct soc_camera_device *icd)
 	udelay(200);
 	if (reg_read(client, MT9V022_RESET)) {
 		dev_err(&icd->dev, "Resetting MT9V022 failed!\n");
+		if (ret > 0)
+			ret = -EIO;
 		goto ei2c;
 	}
 
@@ -694,7 +696,7 @@ static int mt9v022_video_probe(struct soc_camera_device *icd)
 	}
 
 	if (ret < 0)
-		goto eisis;
+		goto ei2c;
 
 	icd->num_formats = 0;
 
@@ -716,29 +718,23 @@ static int mt9v022_video_probe(struct soc_camera_device *icd)
 	if (flags & SOCAM_DATAWIDTH_8)
 		icd->num_formats++;
 
-	ret = soc_camera_video_start(icd);
-	if (ret < 0)
-		goto eisis;
-
 	dev_info(&icd->dev, "Detected a MT9V022 chip ID %x, %s sensor\n",
 		 data, mt9v022->model == V4L2_IDENT_MT9V022IX7ATM ?
 		 "monochrome" : "colour");
 
-	return 0;
-
-eisis:
 ei2c:
+	soc_camera_video_stop(icd);
+
 	return ret;
 }
 
 static void mt9v022_video_remove(struct soc_camera_device *icd)
 {
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
-	struct soc_camera_link *icl = mt9v022->client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", mt9v022->client->addr,
+	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", client->addr,
 		icd->dev.parent, icd->vdev);
-	soc_camera_video_stop(icd);
 	if (icl->free_bus)
 		icl->free_bus(icl);
 }
@@ -747,11 +743,17 @@ static int mt9v022_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
 	struct mt9v022 *mt9v022;
-	struct soc_camera_device *icd;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct soc_camera_link *icl;
 	int ret;
 
+	if (!icd) {
+		dev_err(&client->dev, "MT9V022: missing soc-camera data!\n");
+		return -EINVAL;
+	}
+
+	icl = to_soc_camera_link(icd);
 	if (!icl) {
 		dev_err(&client->dev, "MT9V022 driver needs platform data\n");
 		return -EINVAL;
@@ -768,12 +770,9 @@ static int mt9v022_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	mt9v022->chip_control = MT9V022_CHIP_CONTROL_DEFAULT;
-	mt9v022->client = client;
 	i2c_set_clientdata(client, mt9v022);
 
-	icd = &mt9v022->icd;
 	icd->ops	= &mt9v022_ops;
-	icd->control	= &client->dev;
 	icd->x_min	= 1;
 	icd->y_min	= 4;
 	icd->x_current	= 1;
@@ -783,24 +782,26 @@ static int mt9v022_probe(struct i2c_client *client,
 	icd->height_min	= 32;
 	icd->height_max	= 480;
 	icd->y_skip_top	= 1;
-	icd->iface	= icl->bus_id;
-
-	ret = soc_camera_device_register(icd);
-	if (ret)
-		goto eisdr;
 
-	return 0;
+	ret = mt9v022_video_probe(icd, client);
+	if (ret) {
+		icd->ops = NULL;
+		i2c_set_clientdata(client, NULL);
+		kfree(mt9v022);
+	}
 
-eisdr:
-	kfree(mt9v022);
 	return ret;
 }
 
 static int mt9v022_remove(struct i2c_client *client)
 {
 	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 
-	soc_camera_device_unregister(&mt9v022->icd);
+	icd->ops = NULL;
+	mt9v022_video_remove(icd);
+	i2c_set_clientdata(client, NULL);
+	client->driver = NULL;
 	kfree(mt9v022);
 
 	return 0;
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index 9770cb7932ca..2edf77a6256b 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -503,18 +503,19 @@ static int mx3_camera_add_device(struct soc_camera_device *icd)
 
 	mx3_camera_activate(mx3_cam, icd);
 	ret = icd->ops->init(icd);
-	if (ret < 0) {
-		clk_disable(mx3_cam->clk);
+	if (ret < 0)
 		goto einit;
-	}
 
 	mx3_cam->icd = icd;
 
+	dev_info(&icd->dev, "MX3 Camera driver attached to camera %d\n",
+		 icd->devnum);
+
+	return 0;
+
 einit:
+	clk_disable(mx3_cam->clk);
 ebusy:
-	if (!ret)
-		dev_info(&icd->dev, "MX3 Camera driver attached to camera %d\n",
-			 icd->devnum);
 
 	return ret;
 }
@@ -947,9 +948,10 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 	camera_flags = icd->ops->query_bus_param(icd);
 
 	common_flags = soc_camera_bus_param_compatible(camera_flags, bus_flags);
+	dev_dbg(ici->dev, "Flags cam: 0x%lx host: 0x%lx common: 0x%lx\n",
+		camera_flags, bus_flags, common_flags);
 	if (!common_flags) {
-		dev_dbg(ici->dev, "no common flags: camera %lx, host %lx\n",
-			camera_flags, bus_flags);
+		dev_dbg(ici->dev, "no common flags");
 		return -EINVAL;
 	}
 
@@ -1002,8 +1004,11 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 			SOCAM_DATAWIDTH_4;
 
 	ret = icd->ops->set_bus_param(icd, common_flags);
-	if (ret < 0)
+	if (ret < 0) {
+		dev_dbg(ici->dev, "camera set_bus_param(%lx) returned %d\n",
+			common_flags, ret);
 		return ret;
+	}
 
 	/*
 	 * So far only gated clock mode is supported. Add a line
@@ -1127,8 +1132,9 @@ static int __devinit mx3_camera_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&mx3_cam->capture);
 	spin_lock_init(&mx3_cam->lock);
 
-	base = ioremap(res->start, res->end - res->start + 1);
+	base = ioremap(res->start, resource_size(res));
 	if (!base) {
+		pr_err("Couldn't map %x@%x\n", resource_size(res), res->start);
 		err = -ENOMEM;
 		goto eioremap;
 	}
@@ -1215,3 +1221,4 @@ module_exit(mx3_camera_exit);
 MODULE_DESCRIPTION("i.MX3x SoC Camera Host driver");
 MODULE_AUTHOR("Guennadi Liakhovetski <lg@denx.de>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" MX3_CAM_DRV_NAME);
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 0bce255168bd..3ea650d55b17 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -399,8 +399,6 @@ struct ov772x_win_size {
 
 struct ov772x_priv {
 	struct ov772x_camera_info        *info;
-	struct i2c_client                *client;
-	struct soc_camera_device          icd;
 	const struct ov772x_color_format *fmt;
 	const struct ov772x_win_size     *win;
 	int                               model;
@@ -619,53 +617,56 @@ static int ov772x_reset(struct i2c_client *client)
 
 static int ov772x_init(struct soc_camera_device *icd)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret = 0;
 
-	if (priv->info->link.power) {
-		ret = priv->info->link.power(&priv->client->dev, 1);
+	if (icl->power) {
+		ret = icl->power(&client->dev, 1);
 		if (ret < 0)
 			return ret;
 	}
 
-	if (priv->info->link.reset)
-		ret = priv->info->link.reset(&priv->client->dev);
+	if (icl->reset)
+		ret = icl->reset(&client->dev);
 
 	return ret;
 }
 
 static int ov772x_release(struct soc_camera_device *icd)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret = 0;
 
-	if (priv->info->link.power)
-		ret = priv->info->link.power(&priv->client->dev, 0);
+	if (icl->power)
+		ret = icl->power(&client->dev, 0);
 
 	return ret;
 }
 
 static int ov772x_start_capture(struct soc_camera_device *icd)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 
 	if (!priv->win || !priv->fmt) {
 		dev_err(&icd->dev, "norm or win select error\n");
 		return -EPERM;
 	}
 
-	ov772x_mask_set(priv->client, COM2, SOFT_SLEEP_MODE, 0);
+	ov772x_mask_set(client, COM2, SOFT_SLEEP_MODE, 0);
 
 	dev_dbg(&icd->dev,
-		 "format %s, win %s\n", priv->fmt->name, priv->win->name);
+		"format %s, win %s\n", priv->fmt->name, priv->win->name);
 
 	return 0;
 }
 
 static int ov772x_stop_capture(struct soc_camera_device *icd)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
-	ov772x_mask_set(priv->client, COM2, SOFT_SLEEP_MODE, SOFT_SLEEP_MODE);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	ov772x_mask_set(client, COM2, SOFT_SLEEP_MODE, SOFT_SLEEP_MODE);
 	return 0;
 }
 
@@ -677,8 +678,9 @@ static int ov772x_set_bus_param(struct soc_camera_device *icd,
 
 static unsigned long ov772x_query_bus_param(struct soc_camera_device *icd)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
-	struct soc_camera_link *icl = &priv->info->link;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned long flags = SOCAM_PCLK_SAMPLE_RISING | SOCAM_MASTER |
 		SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_HSYNC_ACTIVE_HIGH |
 		SOCAM_DATA_ACTIVE_HIGH | priv->info->buswidth;
@@ -689,7 +691,8 @@ static unsigned long ov772x_query_bus_param(struct soc_camera_device *icd)
 static int ov772x_get_control(struct soc_camera_device *icd,
 			      struct v4l2_control *ctrl)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 
 	switch (ctrl->id) {
 	case V4L2_CID_VFLIP:
@@ -705,7 +708,8 @@ static int ov772x_get_control(struct soc_camera_device *icd,
 static int ov772x_set_control(struct soc_camera_device *icd,
 			      struct v4l2_control *ctrl)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 	int ret = 0;
 	u8 val;
 
@@ -715,14 +719,14 @@ static int ov772x_set_control(struct soc_camera_device *icd,
 		priv->flag_vflip = ctrl->value;
 		if (priv->info->flags & OV772X_FLAG_VFLIP)
 			val ^= VFLIP_IMG;
-		ret = ov772x_mask_set(priv->client, COM3, VFLIP_IMG, val);
+		ret = ov772x_mask_set(client, COM3, VFLIP_IMG, val);
 		break;
 	case V4L2_CID_HFLIP:
 		val = ctrl->value ? HFLIP_IMG : 0x00;
 		priv->flag_hflip = ctrl->value;
 		if (priv->info->flags & OV772X_FLAG_HFLIP)
 			val ^= HFLIP_IMG;
-		ret = ov772x_mask_set(priv->client, COM3, HFLIP_IMG, val);
+		ret = ov772x_mask_set(client, COM3, HFLIP_IMG, val);
 		break;
 	}
 
@@ -730,9 +734,10 @@ static int ov772x_set_control(struct soc_camera_device *icd,
 }
 
 static int ov772x_get_chip_id(struct soc_camera_device *icd,
-			      struct v4l2_dbg_chip_ident   *id)
+			      struct v4l2_dbg_chip_ident *id)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 
 	id->ident    = priv->model;
 	id->revision = 0;
@@ -744,14 +749,14 @@ static int ov772x_get_chip_id(struct soc_camera_device *icd,
 static int ov772x_get_register(struct soc_camera_device *icd,
 			       struct v4l2_dbg_register *reg)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
-	int                 ret;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	int ret;
 
 	reg->size = 1;
 	if (reg->reg > 0xff)
 		return -EINVAL;
 
-	ret = i2c_smbus_read_byte_data(priv->client, reg->reg);
+	ret = i2c_smbus_read_byte_data(client, reg->reg);
 	if (ret < 0)
 		return ret;
 
@@ -763,13 +768,13 @@ static int ov772x_get_register(struct soc_camera_device *icd,
 static int ov772x_set_register(struct soc_camera_device *icd,
 			       struct v4l2_dbg_register *reg)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->reg > 0xff ||
 	    reg->val > 0xff)
 		return -EINVAL;
 
-	return i2c_smbus_write_byte_data(priv->client, reg->reg, reg->val);
+	return i2c_smbus_write_byte_data(client, reg->reg, reg->val);
 }
 #endif
 
@@ -793,9 +798,11 @@ ov772x_select_win(u32 width, u32 height)
 	return win;
 }
 
-static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
-			     u32 pixfmt)
+static int ov772x_set_params(struct soc_camera_device *icd,
+			     u32 width, u32 height, u32 pixfmt)
 {
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 	int ret = -EINVAL;
 	u8  val;
 	int i;
@@ -810,6 +817,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 			break;
 		}
 	}
+	dev_dbg(&icd->dev, "Using fmt %x #%d\n", pixfmt, i);
 	if (!priv->fmt)
 		goto ov772x_set_fmt_error;
 
@@ -821,7 +829,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 	/*
 	 * reset hardware
 	 */
-	ov772x_reset(priv->client);
+	ov772x_reset(client);
 
 	/*
 	 * Edge Ctrl
@@ -835,17 +843,17 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 		 * Remove it when manual mode.
 		 */
 
-		ret = ov772x_mask_set(priv->client, DSPAUTO, EDGE_ACTRL, 0x00);
+		ret = ov772x_mask_set(client, DSPAUTO, EDGE_ACTRL, 0x00);
 		if (ret < 0)
 			goto ov772x_set_fmt_error;
 
-		ret = ov772x_mask_set(priv->client,
+		ret = ov772x_mask_set(client,
 				      EDGE_TRSHLD, EDGE_THRESHOLD_MASK,
 				      priv->info->edgectrl.threshold);
 		if (ret < 0)
 			goto ov772x_set_fmt_error;
 
-		ret = ov772x_mask_set(priv->client,
+		ret = ov772x_mask_set(client,
 				      EDGE_STRNGT, EDGE_STRENGTH_MASK,
 				      priv->info->edgectrl.strength);
 		if (ret < 0)
@@ -857,13 +865,13 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 		 *
 		 * set upper and lower limit
 		 */
-		ret = ov772x_mask_set(priv->client,
+		ret = ov772x_mask_set(client,
 				      EDGE_UPPER, EDGE_UPPER_MASK,
 				      priv->info->edgectrl.upper);
 		if (ret < 0)
 			goto ov772x_set_fmt_error;
 
-		ret = ov772x_mask_set(priv->client,
+		ret = ov772x_mask_set(client,
 				      EDGE_LOWER, EDGE_LOWER_MASK,
 				      priv->info->edgectrl.lower);
 		if (ret < 0)
@@ -873,7 +881,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 	/*
 	 * set size format
 	 */
-	ret = ov772x_write_array(priv->client, priv->win->regs);
+	ret = ov772x_write_array(client, priv->win->regs);
 	if (ret < 0)
 		goto ov772x_set_fmt_error;
 
@@ -882,7 +890,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 	 */
 	val = priv->fmt->dsp3;
 	if (val) {
-		ret = ov772x_mask_set(priv->client,
+		ret = ov772x_mask_set(client,
 				      DSP_CTRL3, UV_MASK, val);
 		if (ret < 0)
 			goto ov772x_set_fmt_error;
@@ -901,7 +909,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 	if (priv->flag_hflip)
 		val ^= HFLIP_IMG;
 
-	ret = ov772x_mask_set(priv->client,
+	ret = ov772x_mask_set(client,
 			      COM3, SWAP_MASK | IMG_MASK, val);
 	if (ret < 0)
 		goto ov772x_set_fmt_error;
@@ -910,7 +918,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 	 * set COM7
 	 */
 	val = priv->win->com7_bit | priv->fmt->com7;
-	ret = ov772x_mask_set(priv->client,
+	ret = ov772x_mask_set(client,
 			      COM7, (SLCT_MASK | FMT_MASK | OFMT_MASK),
 			      val);
 	if (ret < 0)
@@ -920,7 +928,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 
 ov772x_set_fmt_error:
 
-	ov772x_reset(priv->client);
+	ov772x_reset(client);
 	priv->win = NULL;
 	priv->fmt = NULL;
 
@@ -930,22 +938,22 @@ ov772x_set_fmt_error:
 static int ov772x_set_crop(struct soc_camera_device *icd,
 			   struct v4l2_rect *rect)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 
 	if (!priv->fmt)
 		return -EINVAL;
 
-	return ov772x_set_params(priv, rect->width, rect->height,
+	return ov772x_set_params(icd, rect->width, rect->height,
 				 priv->fmt->fourcc);
 }
 
 static int ov772x_set_fmt(struct soc_camera_device *icd,
 			  struct v4l2_format *f)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
-	return ov772x_set_params(priv, pix->width, pix->height,
+	return ov772x_set_params(icd, pix->width, pix->height,
 				 pix->pixelformat);
 }
 
@@ -967,11 +975,13 @@ static int ov772x_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int ov772x_video_probe(struct soc_camera_device *icd)
+static int ov772x_video_probe(struct soc_camera_device *icd,
+			      struct i2c_client *client)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 	u8                  pid, ver;
 	const char         *devname;
+	int ret;
 
 	/*
 	 * We must have a parent by now. And it cannot be a wrong one.
@@ -993,11 +1003,16 @@ static int ov772x_video_probe(struct soc_camera_device *icd)
 	icd->formats     = ov772x_fmt_lists;
 	icd->num_formats = ARRAY_SIZE(ov772x_fmt_lists);
 
+	/* Switch master clock on */
+	ret = soc_camera_video_start(icd, &client->dev);
+	if (ret)
+		return ret;
+
 	/*
 	 * check and show product ID and manufacturer ID
 	 */
-	pid = i2c_smbus_read_byte_data(priv->client, PID);
-	ver = i2c_smbus_read_byte_data(priv->client, VER);
+	pid = i2c_smbus_read_byte_data(client, PID);
+	ver = i2c_smbus_read_byte_data(client, VER);
 
 	switch (VERSION(pid, ver)) {
 	case OV7720:
@@ -1011,7 +1026,8 @@ static int ov772x_video_probe(struct soc_camera_device *icd)
 	default:
 		dev_err(&icd->dev,
 			"Product ID error %x:%x\n", pid, ver);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto ever;
 	}
 
 	dev_info(&icd->dev,
@@ -1019,21 +1035,17 @@ static int ov772x_video_probe(struct soc_camera_device *icd)
 		 devname,
 		 pid,
 		 ver,
-		 i2c_smbus_read_byte_data(priv->client, MIDH),
-		 i2c_smbus_read_byte_data(priv->client, MIDL));
-
-	return soc_camera_video_start(icd);
-}
+		 i2c_smbus_read_byte_data(client, MIDH),
+		 i2c_smbus_read_byte_data(client, MIDL));
 
-static void ov772x_video_remove(struct soc_camera_device *icd)
-{
 	soc_camera_video_stop(icd);
+
+ever:
+	return ret;
 }
 
 static struct soc_camera_ops ov772x_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= ov772x_video_probe,
-	.remove			= ov772x_video_remove,
 	.init			= ov772x_init,
 	.release		= ov772x_release,
 	.start_capture		= ov772x_start_capture,
@@ -1059,19 +1071,25 @@ static struct soc_camera_ops ov772x_ops = {
  */
 
 static int ov772x_probe(struct i2c_client *client,
-			 const struct i2c_device_id *did)
+			const struct i2c_device_id *did)
 {
 	struct ov772x_priv        *priv;
 	struct ov772x_camera_info *info;
-	struct soc_camera_device  *icd;
+	struct soc_camera_device  *icd = client->dev.platform_data;
 	struct i2c_adapter        *adapter = to_i2c_adapter(client->dev.parent);
+	struct soc_camera_link    *icl;
 	int                        ret;
 
-	if (!client->dev.platform_data)
+	if (!icd) {
+		dev_err(&client->dev, "MT9M001: missing soc-camera data!\n");
 		return -EINVAL;
+	}
 
-	info = container_of(client->dev.platform_data,
-			    struct ov772x_camera_info, link);
+	icl = to_soc_camera_link(icd);
+	if (!icl)
+		return -EINVAL;
+
+	info = container_of(icl, struct ov772x_camera_info, link);
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) {
 		dev_err(&adapter->dev,
@@ -1085,19 +1103,15 @@ static int ov772x_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	priv->info   = info;
-	priv->client = client;
 	i2c_set_clientdata(client, priv);
 
-	icd             = &priv->icd;
 	icd->ops        = &ov772x_ops;
-	icd->control    = &client->dev;
 	icd->width_max  = MAX_WIDTH;
 	icd->height_max = MAX_HEIGHT;
-	icd->iface      = priv->info->link.bus_id;
-
-	ret = soc_camera_device_register(icd);
 
+	ret = ov772x_video_probe(icd, client);
 	if (ret) {
+		icd->ops = NULL;
 		i2c_set_clientdata(client, NULL);
 		kfree(priv);
 	}
@@ -1108,8 +1122,9 @@ static int ov772x_probe(struct i2c_client *client,
 static int ov772x_remove(struct i2c_client *client)
 {
 	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 
-	soc_camera_device_unregister(&priv->icd);
+	icd->ops = NULL;
 	i2c_set_clientdata(client, NULL);
 	kfree(priv);
 	return 0;
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index 016bb45ba0c3..8b9b44d86837 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -830,7 +830,8 @@ static void pxa_camera_init_videobuf(struct videobuf_queue *q,
 				sizeof(struct pxa_buffer), icd);
 }
 
-static u32 mclk_get_divisor(struct pxa_camera_dev *pcdev)
+static u32 mclk_get_divisor(struct platform_device *pdev,
+			    struct pxa_camera_dev *pcdev)
 {
 	unsigned long mclk = pcdev->mclk;
 	u32 div;
@@ -842,7 +843,7 @@ static u32 mclk_get_divisor(struct pxa_camera_dev *pcdev)
 	/* mclk <= ciclk / 4 (27.4.2) */
 	if (mclk > lcdclk / 4) {
 		mclk = lcdclk / 4;
-		dev_warn(pcdev->soc_host.dev, "Limiting master clock to %lu\n", mclk);
+		dev_warn(&pdev->dev, "Limiting master clock to %lu\n", mclk);
 	}
 
 	/* We verify mclk != 0, so if anyone breaks it, here comes their Oops */
@@ -852,8 +853,8 @@ static u32 mclk_get_divisor(struct pxa_camera_dev *pcdev)
 	if (pcdev->platform_flags & PXA_CAMERA_MCLK_EN)
 		pcdev->mclk = lcdclk / (2 * (div + 1));
 
-	dev_dbg(pcdev->soc_host.dev, "LCD clock %luHz, target freq %luHz, "
-		"divisor %u\n", lcdclk, mclk, div);
+	dev_dbg(&pdev->dev, "LCD clock %luHz, target freq %luHz, divisor %u\n",
+		lcdclk, mclk, div);
 
 	return div;
 }
@@ -958,15 +959,20 @@ static int pxa_camera_add_device(struct soc_camera_device *icd)
 		goto ebusy;
 	}
 
-	dev_info(&icd->dev, "PXA Camera driver attached to camera %d\n",
-		 icd->devnum);
-
 	pxa_camera_activate(pcdev);
 	ret = icd->ops->init(icd);
+	if (ret < 0)
+		goto einit;
+
+	pcdev->icd = icd;
 
-	if (!ret)
-		pcdev->icd = icd;
+	dev_info(&icd->dev, "PXA Camera driver attached to camera %d\n",
+		 icd->devnum);
 
+	return 0;
+
+einit:
+	pxa_camera_deactivate(pcdev);
 ebusy:
 	return ret;
 }
@@ -1575,8 +1581,7 @@ static int __devinit pxa_camera_probe(struct platform_device *pdev)
 		pcdev->mclk = 20000000;
 	}
 
-	pcdev->soc_host.dev = &pdev->dev;
-	pcdev->mclk_divisor = mclk_get_divisor(pcdev);
+	pcdev->mclk_divisor = mclk_get_divisor(pdev, pcdev);
 
 	INIT_LIST_HEAD(&pcdev->capture);
 	spin_lock_init(&pcdev->lock);
@@ -1641,6 +1646,7 @@ static int __devinit pxa_camera_probe(struct platform_device *pdev)
 	pcdev->soc_host.drv_name	= PXA_CAM_DRV_NAME;
 	pcdev->soc_host.ops		= &pxa_soc_camera_host_ops;
 	pcdev->soc_host.priv		= pcdev;
+	pcdev->soc_host.dev		= &pdev->dev;
 	pcdev->soc_host.nr		= pdev->id;
 
 	err = soc_camera_host_register(&pcdev->soc_host);
@@ -1722,3 +1728,4 @@ module_exit(pxa_camera_exit);
 MODULE_DESCRIPTION("PXA27x SoC Camera Host driver");
 MODULE_AUTHOR("Guennadi Liakhovetski <kernel@pengutronix.de>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" PXA_CAM_DRV_NAME);
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 61c47b824083..e7ac84daf670 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -356,11 +356,13 @@ static int sh_mobile_ceu_add_device(struct soc_camera_device *icd)
 		 "SuperH Mobile CEU driver attached to camera %d\n",
 		 icd->devnum);
 
+	clk_enable(pcdev->clk);
+
 	ret = icd->ops->init(icd);
-	if (ret)
+	if (ret) {
+		clk_disable(pcdev->clk);
 		goto err;
-
-	pm_runtime_get_sync(ici->dev);
+	}
 
 	ceu_write(pcdev, CAPSR, 1 << 16); /* reset */
 	while (ceu_read(pcdev, CSTSR) & 1)
@@ -394,10 +396,10 @@ static void sh_mobile_ceu_remove_device(struct soc_camera_device *icd)
 	}
 	spin_unlock_irqrestore(&pcdev->lock, flags);
 
-	pm_runtime_put_sync(ici->dev);
-
 	icd->ops->release(icd);
 
+	clk_disable(pcdev->clk);
+
 	dev_info(&icd->dev,
 		 "SuperH Mobile CEU driver detached from camera %d\n",
 		 icd->devnum);
@@ -946,3 +948,4 @@ module_exit(sh_mobile_ceu_exit);
 MODULE_DESCRIPTION("SuperH Mobile CEU driver");
 MODULE_AUTHOR("Magnus Damm");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:sh_mobile_ceu");
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 0340754e5406..20ef5c773fae 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -21,15 +21,15 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/vmalloc.h>
 
 #include <media/soc_camera.h>
 #include <media/v4l2-common.h>
-#include <media/v4l2-dev.h>
 #include <media/v4l2-ioctl.h>
+#include <media/v4l2-dev.h>
 #include <media/videobuf-core.h>
 
 /* Default to VGA resolution */
@@ -38,7 +38,7 @@
 
 static LIST_HEAD(hosts);
 static LIST_HEAD(devices);
-static DEFINE_MUTEX(list_lock);
+static DEFINE_MUTEX(list_lock);		/* Protects the list of hosts */
 
 const struct soc_camera_data_format *soc_camera_format_by_fourcc(
 	struct soc_camera_device *icd, unsigned int fourcc)
@@ -209,6 +209,7 @@ static int soc_camera_dqbuf(struct file *file, void *priv,
 	return videobuf_dqbuf(&icf->vb_vidq, p, file->f_flags & O_NONBLOCK);
 }
 
+/* Always entered with .video_lock held */
 static int soc_camera_init_user_formats(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
@@ -257,9 +258,12 @@ static int soc_camera_init_user_formats(struct soc_camera_device *icd)
 	return 0;
 }
 
+/* Always entered with .video_lock held */
 static void soc_camera_free_user_formats(struct soc_camera_device *icd)
 {
+	icd->current_fmt = NULL;
 	vfree(icd->user_formats);
+	icd->user_formats = NULL;
 }
 
 /* Called with .vb_lock held */
@@ -310,10 +314,6 @@ static int soc_camera_open(struct file *file)
 	struct soc_camera_file *icf;
 	int ret;
 
-	icf = vmalloc(sizeof(*icf));
-	if (!icf)
-		return -ENOMEM;
-
 	/*
 	 * It is safe to dereference these pointers now as long as a user has
 	 * the video device open - we are protected by the held cdev reference.
@@ -321,8 +321,17 @@ static int soc_camera_open(struct file *file)
 
 	vdev = video_devdata(file);
 	icd = container_of(vdev->parent, struct soc_camera_device, dev);
+
+	if (!icd->ops)
+		/* No device driver attached */
+		return -ENODEV;
+
 	ici = to_soc_camera_host(icd->dev.parent);
 
+	icf = vmalloc(sizeof(*icf));
+	if (!icf)
+		return -ENOMEM;
+
 	if (!try_module_get(icd->ops->owner)) {
 		dev_err(&icd->dev, "Couldn't lock sensor driver.\n");
 		ret = -EINVAL;
@@ -335,7 +344,7 @@ static int soc_camera_open(struct file *file)
 		goto emgi;
 	}
 
-	/* Protect against icd->remove() until we module_get() both drivers. */
+	/* Protect against icd->ops->remove() until we module_get() both drivers. */
 	mutex_lock(&icd->video_lock);
 
 	icf->icd = icd;
@@ -350,11 +359,18 @@ static int soc_camera_open(struct file *file)
 				.width		= icd->width,
 				.height		= icd->height,
 				.field		= icd->field,
-				.pixelformat	= icd->current_fmt->fourcc,
-				.colorspace	= icd->current_fmt->colorspace,
 			},
 		};
 
+		ret = soc_camera_init_user_formats(icd);
+		if (ret < 0)
+			goto eiufmt;
+
+		dev_dbg(&icd->dev, "Using fmt %x\n", icd->current_fmt->fourcc);
+
+		f.fmt.pix.pixelformat	= icd->current_fmt->fourcc;
+		f.fmt.pix.colorspace	= icd->current_fmt->colorspace;
+
 		ret = ici->ops->add(icd);
 		if (ret < 0) {
 			dev_err(&icd->dev, "Couldn't activate the camera: %d\n", ret);
@@ -383,6 +399,8 @@ static int soc_camera_open(struct file *file)
 esfmt:
 	ici->ops->remove(icd);
 eiciadd:
+	soc_camera_free_user_formats(icd);
+eiufmt:
 	icd->use_count--;
 	mutex_unlock(&icd->video_lock);
 	module_put(ici->ops->owner);
@@ -402,8 +420,10 @@ static int soc_camera_close(struct file *file)
 
 	mutex_lock(&icd->video_lock);
 	icd->use_count--;
-	if (!icd->use_count)
+	if (!icd->use_count) {
 		ici->ops->remove(icd);
+		soc_camera_free_user_formats(icd);
+	}
 
 	mutex_unlock(&icd->video_lock);
 
@@ -764,29 +784,6 @@ static int soc_camera_s_register(struct file *file, void *fh,
 }
 #endif
 
-static int device_register_link(struct soc_camera_device *icd)
-{
-	int ret = dev_set_name(&icd->dev, "%u-%u", icd->iface, icd->devnum);
-
-	if (!ret)
-		ret = device_register(&icd->dev);
-
-	if (ret < 0) {
-		/* Prevent calling device_unregister() */
-		icd->dev.parent = NULL;
-		dev_err(&icd->dev, "Cannot register device: %d\n", ret);
-	/* Even if probe() was unsuccessful for all registered drivers,
-	 * device_register() returns 0, and we add the link, just to
-	 * document this camera's control device */
-	} else if (icd->control)
-		/* Have to sysfs_remove_link() before device_unregister()? */
-		if (sysfs_create_link(&icd->dev.kobj, &icd->control->kobj,
-				      "control"))
-			dev_warn(&icd->dev,
-				 "Failed creating the control symlink\n");
-	return ret;
-}
-
 /* So far this function cannot fail */
 static void scan_add_host(struct soc_camera_host *ici)
 {
@@ -796,106 +793,124 @@ static void scan_add_host(struct soc_camera_host *ici)
 
 	list_for_each_entry(icd, &devices, list) {
 		if (icd->iface == ici->nr) {
+			int ret;
 			icd->dev.parent = ici->dev;
-			device_register_link(icd);
+			dev_set_name(&icd->dev, "%u-%u", icd->iface,
+				     icd->devnum);
+			ret = device_register(&icd->dev);
+			if (ret < 0) {
+				icd->dev.parent = NULL;
+				dev_err(&icd->dev,
+					"Cannot register device: %d\n", ret);
+			}
 		}
 	}
 
 	mutex_unlock(&list_lock);
 }
 
-/* return: 0 if no match found or a match found and
- * device_register() successful, error code otherwise */
-static int scan_add_device(struct soc_camera_device *icd)
+#ifdef CONFIG_I2C_BOARDINFO
+static int soc_camera_init_i2c(struct soc_camera_device *icd,
+			       struct soc_camera_link *icl)
 {
-	struct soc_camera_host *ici;
-	int ret = 0;
+	struct i2c_client *client;
+	struct i2c_adapter *adap = i2c_get_adapter(icl->i2c_adapter_id);
+	int ret;
 
-	mutex_lock(&list_lock);
+	if (!adap) {
+		ret = -ENODEV;
+		dev_err(&icd->dev, "Cannot get I2C adapter #%d. No driver?\n",
+			icl->i2c_adapter_id);
+		goto ei2cga;
+	}
 
-	list_add_tail(&icd->list, &devices);
+	icl->board_info->platform_data = icd;
 
-	/* Watch out for class_for_each_device / class_find_device API by
-	 * Dave Young <hidave.darkstar@gmail.com> */
-	list_for_each_entry(ici, &hosts, list) {
-		if (icd->iface == ici->nr) {
-			ret = 1;
-			icd->dev.parent = ici->dev;
-			break;
-		}
+	client = i2c_new_device(adap, icl->board_info);
+	if (!client) {
+		ret = -ENOMEM;
+		goto ei2cnd;
 	}
 
-	mutex_unlock(&list_lock);
-
-	if (ret)
-		ret = device_register_link(icd);
+	/*
+	 * We set icd drvdata at two locations - here and in
+	 * soc_camera_video_start(). Depending on the module loading /
+	 * initialisation order one of these locations will be entered first
+	 */
+	/* Use to_i2c_client(dev) to recover the i2c client */
+	dev_set_drvdata(&icd->dev, &client->dev);
 
+	return 0;
+ei2cnd:
+	i2c_put_adapter(adap);
+ei2cga:
 	return ret;
 }
 
+static void soc_camera_free_i2c(struct soc_camera_device *icd)
+{
+	struct i2c_client *client =
+		to_i2c_client(to_soc_camera_control(icd));
+	dev_set_drvdata(&icd->dev, NULL);
+	i2c_unregister_device(client);
+	i2c_put_adapter(client->adapter);
+}
+#else
+#define soc_camera_init_i2c(icd, icl)	(-ENODEV)
+#define soc_camera_free_i2c(icd)	do {} while (0)
+#endif
+
+static int video_dev_create(struct soc_camera_device *icd);
+/* Called during host-driver probe */
 static int soc_camera_probe(struct device *dev)
 {
 	struct soc_camera_device *icd = to_soc_camera_dev(dev);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
-	/*
-	 * Possible race scenario:
-	 * modprobe <camera-host-driver> triggers __func__
-	 * at this moment respective <camera-sensor-driver> gets rmmod'ed
-	 * to protect take module references.
-	 */
+	dev_info(dev, "Probing %s\n", dev_name(dev));
 
-	if (!try_module_get(icd->ops->owner)) {
-		dev_err(&icd->dev, "Couldn't lock sensor driver.\n");
-		ret = -EINVAL;
-		goto emgd;
-	}
+	ret = video_dev_create(icd);
+	if (ret < 0)
+		goto evdc;
 
-	if (!try_module_get(ici->ops->owner)) {
-		dev_err(&icd->dev, "Couldn't lock capture bus driver.\n");
+	/* Non-i2c cameras, e.g., soc_camera_platform, have no board_info */
+	if (icl->board_info) {
+		ret = soc_camera_init_i2c(icd, icl);
+		if (ret < 0)
+			goto eadddev;
+	} else if (!icl->add_device || !icl->del_device) {
 		ret = -EINVAL;
-		goto emgi;
+		goto eadddev;
+	} else {
+		ret = icl->add_device(icl, &icd->dev);
+		if (ret < 0)
+			goto eadddev;
 	}
 
-	mutex_lock(&icd->video_lock);
-
-	/* We only call ->add() here to activate and probe the camera.
-	 * We shall ->remove() and deactivate it immediately afterwards. */
-	ret = ici->ops->add(icd);
-	if (ret < 0)
-		goto eiadd;
-
-	ret = icd->ops->probe(icd);
-	if (ret >= 0) {
-		const struct v4l2_queryctrl *qctrl;
+	ret = video_register_device(icd->vdev, VFL_TYPE_GRABBER, icd->vdev->minor);
+	if (ret < 0) {
+		dev_err(&icd->dev, "video_register_device failed: %d\n", ret);
+		goto evidregd;
+	}
 
-		qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_GAIN);
-		icd->gain = qctrl ? qctrl->default_value : (unsigned short)~0;
-		qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-		icd->exposure = qctrl ? qctrl->default_value :
-			(unsigned short)~0;
+	/* Do we have to sysfs_remove_link() before device_unregister()? */
+	if (to_soc_camera_control(icd) &&
+	    sysfs_create_link(&icd->dev.kobj, &to_soc_camera_control(icd)->kobj,
+			      "control"))
+		dev_warn(&icd->dev, "Failed creating the control symlink\n");
 
-		ret = soc_camera_init_user_formats(icd);
-		if (ret < 0) {
-			if (icd->ops->remove)
-				icd->ops->remove(icd);
-			goto eiufmt;
-		}
 
-		icd->height	= DEFAULT_HEIGHT;
-		icd->width	= DEFAULT_WIDTH;
-		icd->field	= V4L2_FIELD_ANY;
-	}
+	return 0;
 
-eiufmt:
-	ici->ops->remove(icd);
-eiadd:
-	mutex_unlock(&icd->video_lock);
-	module_put(ici->ops->owner);
-emgi:
-	module_put(icd->ops->owner);
-emgd:
+evidregd:
+	if (icl->board_info)
+		soc_camera_free_i2c(icd);
+	else
+		icl->del_device(icl);
+eadddev:
+	video_device_release(icd->vdev);
+evdc:
 	return ret;
 }
 
@@ -904,13 +919,22 @@ emgd:
 static int soc_camera_remove(struct device *dev)
 {
 	struct soc_camera_device *icd = to_soc_camera_dev(dev);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	struct video_device *vdev = icd->vdev;
 
-	mutex_lock(&icd->video_lock);
-	if (icd->ops->remove)
-		icd->ops->remove(icd);
-	mutex_unlock(&icd->video_lock);
+	BUG_ON(!dev->parent);
 
-	soc_camera_free_user_formats(icd);
+	if (vdev) {
+		mutex_lock(&icd->video_lock);
+		video_unregister_device(vdev);
+		icd->vdev = NULL;
+		mutex_unlock(&icd->video_lock);
+	}
+
+	if (icl->board_info)
+		soc_camera_free_i2c(icd);
+	else
+		icl->del_device(icl);
 
 	return 0;
 }
@@ -1005,10 +1029,14 @@ void soc_camera_host_unregister(struct soc_camera_host *ici)
 
 	list_for_each_entry(icd, &devices, list) {
 		if (icd->dev.parent == ici->dev) {
+			/* The bus->remove will be called */
 			device_unregister(&icd->dev);
 			/* Not before device_unregister(), .remove
 			 * needs parent to call ici->ops->remove() */
 			icd->dev.parent = NULL;
+
+			/* If the host module is loaded again, device_register()
+			 * would complain "already initialised" */
 			memset(&icd->dev.kobj, 0, sizeof(icd->dev.kobj));
 		}
 	}
@@ -1020,26 +1048,14 @@ void soc_camera_host_unregister(struct soc_camera_host *ici)
 EXPORT_SYMBOL(soc_camera_host_unregister);
 
 /* Image capture device */
-int soc_camera_device_register(struct soc_camera_device *icd)
+static int soc_camera_device_register(struct soc_camera_device *icd)
 {
 	struct soc_camera_device *ix;
 	int num = -1, i;
 
-	if (!icd || !icd->ops ||
-	    !icd->ops->probe ||
-	    !icd->ops->init ||
-	    !icd->ops->release ||
-	    !icd->ops->start_capture ||
-	    !icd->ops->stop_capture ||
-	    !icd->ops->set_crop ||
-	    !icd->ops->set_fmt ||
-	    !icd->ops->try_fmt ||
-	    !icd->ops->query_bus_param ||
-	    !icd->ops->set_bus_param)
-		return -EINVAL;
-
 	for (i = 0; i < 256 && num < 0; i++) {
 		num = i;
+		/* Check if this index is available on this interface */
 		list_for_each_entry(ix, &devices, list) {
 			if (ix->iface == icd->iface && ix->devnum == i) {
 				num = -1;
@@ -1061,21 +1077,15 @@ int soc_camera_device_register(struct soc_camera_device *icd)
 	icd->host_priv		= NULL;
 	mutex_init(&icd->video_lock);
 
-	return scan_add_device(icd);
+	list_add_tail(&icd->list, &devices);
+
+	return 0;
 }
-EXPORT_SYMBOL(soc_camera_device_register);
 
-void soc_camera_device_unregister(struct soc_camera_device *icd)
+static void soc_camera_device_unregister(struct soc_camera_device *icd)
 {
-	mutex_lock(&list_lock);
 	list_del(&icd->list);
-
-	/* The bus->remove will be eventually called */
-	if (icd->dev.parent)
-		device_unregister(&icd->dev);
-	mutex_unlock(&list_lock);
 }
-EXPORT_SYMBOL(soc_camera_device_unregister);
 
 static const struct v4l2_ioctl_ops soc_camera_ioctl_ops = {
 	.vidioc_querycap	 = soc_camera_querycap,
@@ -1106,22 +1116,13 @@ static const struct v4l2_ioctl_ops soc_camera_ioctl_ops = {
 #endif
 };
 
-/*
- * Usually called from the struct soc_camera_ops .probe() method, i.e., from
- * soc_camera_probe() above with .video_lock held
- */
-int soc_camera_video_start(struct soc_camera_device *icd)
+static int video_dev_create(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
-	int err = -ENOMEM;
-	struct video_device *vdev;
+	struct video_device *vdev = video_device_alloc();
 
-	if (!icd->dev.parent)
-		return -ENODEV;
-
-	vdev = video_device_alloc();
 	if (!vdev)
-		goto evidallocd;
+		return -ENOMEM;
 	dev_dbg(ici->dev, "Allocated video_device %p\n", vdev);
 
 	strlcpy(vdev->name, ici->drv_name, sizeof(vdev->name));
@@ -1132,118 +1133,110 @@ int soc_camera_video_start(struct soc_camera_device *icd)
 	vdev->ioctl_ops		= &soc_camera_ioctl_ops;
 	vdev->release		= video_device_release;
 	vdev->minor		= -1;
-	vdev->tvnorms		= V4L2_STD_UNKNOWN,
+	vdev->tvnorms		= V4L2_STD_UNKNOWN;
 
-	err = video_register_device(vdev, VFL_TYPE_GRABBER, vdev->minor);
-	if (err < 0) {
-		dev_err(vdev->parent, "video_register_device failed\n");
-		goto evidregd;
-	}
 	icd->vdev = vdev;
 
 	return 0;
+}
 
-evidregd:
-	video_device_release(vdev);
-evidallocd:
-	return err;
+/*
+ * Usually called from the struct soc_camera_ops .probe() method, i.e., from
+ * soc_camera_probe() above with .video_lock held
+ */
+int soc_camera_video_start(struct soc_camera_device *icd, struct device *dev)
+{
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	const struct v4l2_queryctrl *qctrl;
+
+	if (!icd->dev.parent)
+		return -ENODEV;
+
+	if (!icd->ops ||
+	    !icd->ops->init ||
+	    !icd->ops->release ||
+	    !icd->ops->start_capture ||
+	    !icd->ops->stop_capture ||
+	    !icd->ops->set_fmt ||
+	    !icd->ops->try_fmt ||
+	    !icd->ops->query_bus_param ||
+	    !icd->ops->set_bus_param)
+		return -EINVAL;
+
+	/* See comment in soc_camera_probe() */
+	dev_set_drvdata(&icd->dev, dev);
+
+	qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_GAIN);
+	icd->gain = qctrl ? qctrl->default_value : (unsigned short)~0;
+	qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
+	icd->exposure = qctrl ? qctrl->default_value : (unsigned short)~0;
+
+	return ici->ops->add(icd);
 }
 EXPORT_SYMBOL(soc_camera_video_start);
 
 /* Called from client .remove() methods with .video_lock held */
 void soc_camera_video_stop(struct soc_camera_device *icd)
 {
-	struct video_device *vdev = icd->vdev;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	dev_dbg(&icd->dev, "%s\n", __func__);
 
-	if (!icd->dev.parent || !vdev)
-		return;
-
-	video_unregister_device(vdev);
-	icd->vdev = NULL;
+	ici->ops->remove(icd);
 }
 EXPORT_SYMBOL(soc_camera_video_stop);
 
-#ifdef CONFIG_I2C_BOARDINFO
-static int soc_camera_init_i2c(struct platform_device *pdev,
-			       struct soc_camera_link *icl)
+static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
 {
-	struct i2c_client *client;
-	struct i2c_adapter *adap = i2c_get_adapter(icl->i2c_adapter_id);
+	struct soc_camera_link *icl = pdev->dev.platform_data;
+	struct soc_camera_device *icd;
 	int ret;
 
-	if (!adap) {
-		ret = -ENODEV;
-		dev_err(&pdev->dev, "Cannot get adapter #%d. No driver?\n",
-			icl->i2c_adapter_id);
-		goto ei2cga;
-	}
+	if (!icl)
+		return -EINVAL;
 
-	icl->board_info->platform_data = icl;
-	client = i2c_new_device(adap, icl->board_info);
-	if (!client) {
-		ret = -ENOMEM;
-		goto ei2cnd;
-	}
+	icd = kzalloc(sizeof(*icd), GFP_KERNEL);
+	if (!icd)
+		return -ENOMEM;
 
-	platform_set_drvdata(pdev, client);
+	icd->iface = icl->bus_id;
+	platform_set_drvdata(pdev, icd);
+	icd->dev.platform_data = icl;
 
-	return 0;
-ei2cnd:
-	i2c_put_adapter(adap);
-ei2cga:
-	return ret;
-}
+	ret = soc_camera_device_register(icd);
+	if (ret < 0)
+		goto escdevreg;
 
-static void soc_camera_free_i2c(struct platform_device *pdev)
-{
-	struct i2c_client *client = platform_get_drvdata(pdev);
+	return 0;
 
-	if (!client)
-		return;
+escdevreg:
+	kfree(icd);
 
-	i2c_unregister_device(client);
-	i2c_put_adapter(client->adapter);
+	return ret;
 }
-#else
-#define soc_camera_init_i2c(d, icl)	(-ENODEV)
-#define soc_camera_free_i2c(d)		do {} while (0)
-#endif
 
-static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
+/* Only called on rmmod for each platform device, since they are not
+ * hot-pluggable. Now we know, that all our users - hosts and devices have
+ * been unloaded already */
+static int __devexit soc_camera_pdrv_remove(struct platform_device *pdev)
 {
-	struct soc_camera_link *icl = pdev->dev.platform_data;
+	struct soc_camera_device *icd = platform_get_drvdata(pdev);
 
-	if (!icl)
+	if (!icd)
 		return -EINVAL;
 
-	if (icl->board_info)
-		return soc_camera_init_i2c(pdev, icl);
-	else if (!icl->add_device || !icl->del_device)
-		return -EINVAL;
+	soc_camera_device_unregister(icd);
 
-	/* &pdev->dev will become &icd->dev */
-	return icl->add_device(icl, &pdev->dev);
-}
+	kfree(icd);
 
-static int __devexit soc_camera_pdrv_remove(struct platform_device *pdev)
-{
-	struct soc_camera_link *icl = pdev->dev.platform_data;
-
-	if (icl->board_info)
-		soc_camera_free_i2c(pdev);
-	else
-		icl->del_device(icl);
 	return 0;
 }
 
 static struct platform_driver __refdata soc_camera_pdrv = {
-	.probe	= soc_camera_pdrv_probe,
-	.remove	= __devexit_p(soc_camera_pdrv_remove),
-	.driver	= {
-		.name = "soc-camera-pdrv",
-		.owner = THIS_MODULE,
+	.remove  = __devexit_p(soc_camera_pdrv_remove),
+	.driver  = {
+		.name	= "soc-camera-pdrv",
+		.owner	= THIS_MODULE,
 	},
 };
 
@@ -1256,7 +1249,7 @@ static int __init soc_camera_init(void)
 	if (ret)
 		goto edrvr;
 
-	ret = platform_driver_register(&soc_camera_pdrv);
+	ret = platform_driver_probe(&soc_camera_pdrv, soc_camera_pdrv_probe);
 	if (ret)
 		goto epdr;
 
diff --git a/drivers/media/video/soc_camera_platform.c b/drivers/media/video/soc_camera_platform.c
index c48676356ab7..d84c134f8d59 100644
--- a/drivers/media/video/soc_camera_platform.c
+++ b/drivers/media/video/soc_camera_platform.c
@@ -21,35 +21,32 @@
 #include <media/soc_camera_platform.h>
 
 struct soc_camera_platform_priv {
-	struct soc_camera_platform_info *info;
-	struct soc_camera_device icd;
 	struct soc_camera_data_format format;
 };
 
 static struct soc_camera_platform_info *
 soc_camera_platform_get_info(struct soc_camera_device *icd)
 {
-	struct soc_camera_platform_priv *priv;
-	priv = container_of(icd, struct soc_camera_platform_priv, icd);
-	return priv->info;
+	struct platform_device *pdev = to_platform_device(dev_get_drvdata(&icd->dev));
+	return pdev->dev.platform_data;
 }
 
 static int soc_camera_platform_init(struct soc_camera_device *icd)
 {
-	struct soc_camera_platform_info *p = soc_camera_platform_get_info(icd);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-	if (p->power)
-		p->power(1);
+	if (icl->power)
+		icl->power(dev_get_drvdata(&icd->dev), 1);
 
 	return 0;
 }
 
 static int soc_camera_platform_release(struct soc_camera_device *icd)
 {
-	struct soc_camera_platform_info *p = soc_camera_platform_get_info(icd);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-	if (p->power)
-		p->power(0);
+	if (icl->power)
+		icl->power(dev_get_drvdata(&icd->dev), 0);
 
 	return 0;
 }
@@ -102,31 +99,29 @@ static int soc_camera_platform_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int soc_camera_platform_video_probe(struct soc_camera_device *icd)
+static int soc_camera_platform_video_probe(struct soc_camera_device *icd,
+					   struct platform_device *pdev)
 {
-	struct soc_camera_platform_priv *priv;
-	priv = container_of(icd, struct soc_camera_platform_priv, icd);
+	struct soc_camera_platform_priv *priv = platform_get_drvdata(pdev);
+	struct soc_camera_platform_info *p = pdev->dev.platform_data;
+	int ret;
 
-	priv->format.name = priv->info->format_name;
-	priv->format.depth = priv->info->format_depth;
-	priv->format.fourcc = priv->info->format.pixelformat;
-	priv->format.colorspace = priv->info->format.colorspace;
+	priv->format.name = p->format_name;
+	priv->format.depth = p->format_depth;
+	priv->format.fourcc = p->format.pixelformat;
+	priv->format.colorspace = p->format.colorspace;
 
 	icd->formats = &priv->format;
 	icd->num_formats = 1;
 
-	return soc_camera_video_start(icd);
-}
-
-static void soc_camera_platform_video_remove(struct soc_camera_device *icd)
-{
+	/* ..._video_start() does dev_set_drvdata(&icd->dev, &pdev->dev) */
+	ret = soc_camera_video_start(icd, &pdev->dev);
 	soc_camera_video_stop(icd);
+	return ret;
 }
 
 static struct soc_camera_ops soc_camera_platform_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= soc_camera_platform_video_probe,
-	.remove			= soc_camera_platform_video_remove,
 	.init			= soc_camera_platform_init,
 	.release		= soc_camera_platform_release,
 	.start_capture		= soc_camera_platform_start_capture,
@@ -141,11 +136,10 @@ static struct soc_camera_ops soc_camera_platform_ops = {
 static int soc_camera_platform_probe(struct platform_device *pdev)
 {
 	struct soc_camera_platform_priv *priv;
-	struct soc_camera_platform_info *p;
+	struct soc_camera_platform_info *p = pdev->dev.platform_data;
 	struct soc_camera_device *icd;
 	int ret;
 
-	p = pdev->dev.platform_data;
 	if (!p)
 		return -EINVAL;
 
@@ -153,31 +147,40 @@ static int soc_camera_platform_probe(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
-	priv->info = p;
 	platform_set_drvdata(pdev, priv);
 
-	icd = &priv->icd;
+	icd = to_soc_camera_dev(p->dev);
+	if (!icd)
+		goto enoicd;
+
 	icd->ops	= &soc_camera_platform_ops;
-	icd->control	= &pdev->dev;
+	dev_set_drvdata(&icd->dev, &pdev->dev);
 	icd->width_min	= 0;
-	icd->width_max	= priv->info->format.width;
+	icd->width_max	= p->format.width;
 	icd->height_min	= 0;
-	icd->height_max	= priv->info->format.height;
+	icd->height_max	= p->format.height;
 	icd->y_skip_top	= 0;
-	icd->iface	= priv->info->iface;
 
-	ret = soc_camera_device_register(icd);
-	if (ret)
+	ret = soc_camera_platform_video_probe(icd, pdev);
+	if (ret) {
+		icd->ops = NULL;
 		kfree(priv);
+	}
 
 	return ret;
+
+enoicd:
+	kfree(priv);
+	return -EINVAL;
 }
 
 static int soc_camera_platform_remove(struct platform_device *pdev)
 {
 	struct soc_camera_platform_priv *priv = platform_get_drvdata(pdev);
+	struct soc_camera_platform_info *p = pdev->dev.platform_data;
+	struct soc_camera_device *icd = to_soc_camera_dev(p->dev);
 
-	soc_camera_device_unregister(&priv->icd);
+	icd->ops = NULL;
 	kfree(priv);
 	return 0;
 }
@@ -206,3 +209,4 @@ module_exit(soc_camera_platform_module_exit);
 MODULE_DESCRIPTION("SoC Camera Platform driver");
 MODULE_AUTHOR("Magnus Damm");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:soc_camera_platform");
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index aa5065ea09ed..d780a509faa9 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -224,8 +224,6 @@ struct tw9910_hsync_ctrl {
 
 struct tw9910_priv {
 	struct tw9910_video_info       *info;
-	struct i2c_client              *client;
-	struct soc_camera_device        icd;
 	const struct tw9910_scale_ctrl *scale;
 };
 
@@ -511,35 +509,38 @@ tw9910_select_norm(struct soc_camera_device *icd, u32 width, u32 height)
  */
 static int tw9910_init(struct soc_camera_device *icd)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret = 0;
 
-	if (priv->info->link.power) {
-		ret = priv->info->link.power(&priv->client->dev, 1);
+	if (icl->power) {
+		ret = icl->power(&client->dev, 1);
 		if (ret < 0)
 			return ret;
 	}
 
-	if (priv->info->link.reset)
-		ret = priv->info->link.reset(&priv->client->dev);
+	if (icl->reset)
+		ret = icl->reset(&client->dev);
 
 	return ret;
 }
 
 static int tw9910_release(struct soc_camera_device *icd)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret = 0;
 
-	if (priv->info->link.power)
-		ret = priv->info->link.power(&priv->client->dev, 0);
+	if (icl->power)
+		ret = icl->power(&client->dev, 0);
 
 	return ret;
 }
 
 static int tw9910_start_capture(struct soc_camera_device *icd)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct tw9910_priv *priv = i2c_get_clientdata(client);
 
 	if (!priv->scale) {
 		dev_err(&icd->dev, "norm select error\n");
@@ -567,8 +568,9 @@ static int tw9910_set_bus_param(struct soc_camera_device *icd,
 
 static unsigned long tw9910_query_bus_param(struct soc_camera_device *icd)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
-	struct soc_camera_link *icl = priv->client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned long flags = SOCAM_PCLK_SAMPLE_RISING | SOCAM_MASTER |
 		SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_HSYNC_ACTIVE_HIGH |
 		SOCAM_DATA_ACTIVE_HIGH | priv->info->buswidth;
@@ -610,13 +612,13 @@ static int tw9910_enum_input(struct soc_camera_device *icd,
 static int tw9910_get_register(struct soc_camera_device *icd,
 			       struct v4l2_dbg_register *reg)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int ret;
 
 	if (reg->reg > 0xff)
 		return -EINVAL;
 
-	ret = i2c_smbus_read_byte_data(priv->client, reg->reg);
+	ret = i2c_smbus_read_byte_data(client, reg->reg);
 	if (ret < 0)
 		return ret;
 
@@ -631,20 +633,21 @@ static int tw9910_get_register(struct soc_camera_device *icd,
 static int tw9910_set_register(struct soc_camera_device *icd,
 			       struct v4l2_dbg_register *reg)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->reg > 0xff ||
 	    reg->val > 0xff)
 		return -EINVAL;
 
-	return i2c_smbus_write_byte_data(priv->client, reg->reg, reg->val);
+	return i2c_smbus_write_byte_data(client, reg->reg, reg->val);
 }
 #endif
 
 static int tw9910_set_crop(struct soc_camera_device *icd,
 			   struct v4l2_rect *rect)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct tw9910_priv *priv = i2c_get_clientdata(client);
 	int                 ret  = -EINVAL;
 	u8                  val;
 
@@ -658,8 +661,8 @@ static int tw9910_set_crop(struct soc_camera_device *icd,
 	/*
 	 * reset hardware
 	 */
-	tw9910_reset(priv->client);
-	ret = tw9910_write_array(priv->client, tw9910_default_regs);
+	tw9910_reset(client);
+	ret = tw9910_write_array(client, tw9910_default_regs);
 	if (ret < 0)
 		goto tw9910_set_fmt_error;
 
@@ -670,7 +673,7 @@ static int tw9910_set_crop(struct soc_camera_device *icd,
 	if (SOCAM_DATAWIDTH_16 == priv->info->buswidth)
 		val = LEN;
 
-	ret = tw9910_mask_set(priv->client, OPFORM, LEN, val);
+	ret = tw9910_mask_set(client, OPFORM, LEN, val);
 	if (ret < 0)
 		goto tw9910_set_fmt_error;
 
@@ -698,28 +701,28 @@ static int tw9910_set_crop(struct soc_camera_device *icd,
 		val = 0;
 	}
 
-	ret = tw9910_mask_set(priv->client, VBICNTL, RTSEL_MASK, val);
+	ret = tw9910_mask_set(client, VBICNTL, RTSEL_MASK, val);
 	if (ret < 0)
 		goto tw9910_set_fmt_error;
 
 	/*
 	 * set scale
 	 */
-	ret = tw9910_set_scale(priv->client, priv->scale);
+	ret = tw9910_set_scale(client, priv->scale);
 	if (ret < 0)
 		goto tw9910_set_fmt_error;
 
 	/*
 	 * set cropping
 	 */
-	ret = tw9910_set_cropping(priv->client, &tw9910_cropping_ctrl);
+	ret = tw9910_set_cropping(client, &tw9910_cropping_ctrl);
 	if (ret < 0)
 		goto tw9910_set_fmt_error;
 
 	/*
 	 * set hsync
 	 */
-	ret = tw9910_set_hsync(priv->client, &tw9910_hsync_ctrl);
+	ret = tw9910_set_hsync(client, &tw9910_hsync_ctrl);
 	if (ret < 0)
 		goto tw9910_set_fmt_error;
 
@@ -727,7 +730,7 @@ static int tw9910_set_crop(struct soc_camera_device *icd,
 
 tw9910_set_fmt_error:
 
-	tw9910_reset(priv->client);
+	tw9910_reset(client);
 	priv->scale = NULL;
 
 	return ret;
@@ -784,9 +787,10 @@ static int tw9910_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int tw9910_video_probe(struct soc_camera_device *icd)
+static int tw9910_video_probe(struct soc_camera_device *icd,
+			      struct i2c_client *client)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct tw9910_priv *priv = i2c_get_clientdata(client);
 	s32 val;
 	int ret;
 
@@ -810,10 +814,18 @@ static int tw9910_video_probe(struct soc_camera_device *icd)
 	icd->formats     = tw9910_color_fmt;
 	icd->num_formats = ARRAY_SIZE(tw9910_color_fmt);
 
+	/* Switch master clock on */
+	ret = soc_camera_video_start(icd, &client->dev);
+	if (ret)
+		return ret;
+
 	/*
 	 * check and show Product ID
 	 */
-	val = i2c_smbus_read_byte_data(priv->client, ID);
+	val = i2c_smbus_read_byte_data(client, ID);
+
+	soc_camera_video_stop(icd);
+
 	if (0x0B != GET_ID(val) ||
 	    0x00 != GET_ReV(val)) {
 		dev_err(&icd->dev,
@@ -824,25 +836,14 @@ static int tw9910_video_probe(struct soc_camera_device *icd)
 	dev_info(&icd->dev,
 		 "tw9910 Product ID %0x:%0x\n", GET_ID(val), GET_ReV(val));
 
-	ret = soc_camera_video_start(icd);
-	if (ret < 0)
-		return ret;
-
 	icd->vdev->tvnorms      = V4L2_STD_NTSC | V4L2_STD_PAL;
 	icd->vdev->current_norm = V4L2_STD_NTSC;
 
 	return ret;
 }
 
-static void tw9910_video_remove(struct soc_camera_device *icd)
-{
-	soc_camera_video_stop(icd);
-}
-
 static struct soc_camera_ops tw9910_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= tw9910_video_probe,
-	.remove			= tw9910_video_remove,
 	.init			= tw9910_init,
 	.release		= tw9910_release,
 	.start_capture		= tw9910_start_capture,
@@ -871,18 +872,25 @@ static int tw9910_probe(struct i2c_client *client,
 {
 	struct tw9910_priv             *priv;
 	struct tw9910_video_info       *info;
-	struct soc_camera_device       *icd;
+	struct soc_camera_device       *icd = client->dev.platform_data;
+	struct i2c_adapter             *adapter =
+		to_i2c_adapter(client->dev.parent);
+	struct soc_camera_link         *icl;
 	const struct tw9910_scale_ctrl *scale;
 	int                             i, ret;
 
-	if (!client->dev.platform_data)
+	if (!icd) {
+		dev_err(&client->dev, "TW9910: missing soc-camera data!\n");
 		return -EINVAL;
+	}
 
-	info = container_of(client->dev.platform_data,
-			    struct tw9910_video_info, link);
+	icl = to_soc_camera_link(icd);
+	if (!icl)
+		return -EINVAL;
 
-	if (!i2c_check_functionality(to_i2c_adapter(client->dev.parent),
-				     I2C_FUNC_SMBUS_BYTE_DATA)) {
+	info = container_of(icl, struct tw9910_video_info, link);
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) {
 		dev_err(&client->dev,
 			"I2C-Adapter doesn't support "
 			"I2C_FUNC_SMBUS_BYTE_DATA\n");
@@ -894,12 +902,9 @@ static int tw9910_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	priv->info   = info;
-	priv->client = client;
 	i2c_set_clientdata(client, priv);
 
-	icd          = &priv->icd;
 	icd->ops     = &tw9910_ops;
-	icd->control = &client->dev;
 	icd->iface   = info->link.bus_id;
 
 	/*
@@ -925,9 +930,9 @@ static int tw9910_probe(struct i2c_client *client,
 		icd->height_min = min(scale[i].height, icd->height_min);
 	}
 
-	ret = soc_camera_device_register(icd);
-
+	ret = tw9910_video_probe(icd, client);
 	if (ret) {
+		icd->ops = NULL;
 		i2c_set_clientdata(client, NULL);
 		kfree(priv);
 	}
@@ -938,8 +943,9 @@ static int tw9910_probe(struct i2c_client *client,
 static int tw9910_remove(struct i2c_client *client)
 {
 	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 
-	soc_camera_device_unregister(&priv->icd);
+	icd->ops = NULL;
 	i2c_set_clientdata(client, NULL);
 	kfree(priv);
 	return 0;
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 813e12061daa..d8b4256126a4 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -20,7 +20,6 @@
 struct soc_camera_device {
 	struct list_head list;
 	struct device dev;
-	struct device *control;
 	unsigned short width;		/* Current window */
 	unsigned short height;		/* sizes */
 	unsigned short x_min;		/* Camera capabilities */
@@ -131,17 +130,25 @@ static inline struct soc_camera_host *to_soc_camera_host(struct device *dev)
 	return dev_get_drvdata(dev);
 }
 
-extern int soc_camera_host_register(struct soc_camera_host *ici);
-extern void soc_camera_host_unregister(struct soc_camera_host *ici);
-extern int soc_camera_device_register(struct soc_camera_device *icd);
-extern void soc_camera_device_unregister(struct soc_camera_device *icd);
+static inline struct soc_camera_link *to_soc_camera_link(struct soc_camera_device *icd)
+{
+	return icd->dev.platform_data;
+}
 
-extern int soc_camera_video_start(struct soc_camera_device *icd);
-extern void soc_camera_video_stop(struct soc_camera_device *icd);
+static inline struct device *to_soc_camera_control(struct soc_camera_device *icd)
+{
+	return dev_get_drvdata(&icd->dev);
+}
 
-extern const struct soc_camera_data_format *soc_camera_format_by_fourcc(
+int soc_camera_host_register(struct soc_camera_host *ici);
+void soc_camera_host_unregister(struct soc_camera_host *ici);
+
+int soc_camera_video_start(struct soc_camera_device *icd, struct device *dev);
+void soc_camera_video_stop(struct soc_camera_device *icd);
+
+const struct soc_camera_data_format *soc_camera_format_by_fourcc(
 	struct soc_camera_device *icd, unsigned int fourcc);
-extern const struct soc_camera_format_xlate *soc_camera_xlate_by_fourcc(
+const struct soc_camera_format_xlate *soc_camera_xlate_by_fourcc(
 	struct soc_camera_device *icd, unsigned int fourcc);
 
 struct soc_camera_data_format {
@@ -170,8 +177,6 @@ struct soc_camera_format_xlate {
 
 struct soc_camera_ops {
 	struct module *owner;
-	int (*probe)(struct soc_camera_device *);
-	void (*remove)(struct soc_camera_device *);
 	int (*suspend)(struct soc_camera_device *, pm_message_t state);
 	int (*resume)(struct soc_camera_device *);
 	int (*init)(struct soc_camera_device *);
diff --git a/include/media/soc_camera_platform.h b/include/media/soc_camera_platform.h
index 3e8f020abf48..b144f947f1cb 100644
--- a/include/media/soc_camera_platform.h
+++ b/include/media/soc_camera_platform.h
@@ -18,11 +18,10 @@ struct device;
 
 struct soc_camera_platform_info {
 	int iface;
-	char *format_name;
+	const char *format_name;
 	unsigned long format_depth;
 	struct v4l2_pix_format format;
 	unsigned long bus_param;
-	void (*power)(int);
 	struct device *dev;
 	int (*set_capture)(struct soc_camera_platform_info *info, int enable);
 	struct soc_camera_link link;
-- 
cgit v1.2.3


From dd4f0ad4b027078b0642d99a2d30c9c93a5e38ac Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:34:17 -0300
Subject: V4L/DVB (12508): soc-camera: remove unused .iface from struct
 soc_camera_platform_info

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/soc_camera_platform.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/media/soc_camera_platform.h b/include/media/soc_camera_platform.h
index b144f947f1cb..bb70401b8141 100644
--- a/include/media/soc_camera_platform.h
+++ b/include/media/soc_camera_platform.h
@@ -17,7 +17,6 @@
 struct device;
 
 struct soc_camera_platform_info {
-	int iface;
 	const char *format_name;
 	unsigned long format_depth;
 	struct v4l2_pix_format format;
-- 
cgit v1.2.3


From 979ea1ddf80ac7383acdea03471355ca62702539 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:43:33 -0300
Subject: V4L/DVB (12510): soc-camera: (partially) convert to v4l2-(sub)dev API

Convert the soc-camera framework to use the v4l2-(sub)dev API. Start using
v4l2-subdev operations. Only a part of the interface between the
soc_camera core, soc_camera host drivers on one side and soc_camera device
drivers on the other side is replaced so far. The rest of the interface
will be replaced in incremental steps, and will require extensions and,
possibly, modifications to the v4l2-subdev code.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c              | 157 +++++++--------
 drivers/media/video/mt9m111.c              | 309 ++++++++++++-----------------
 drivers/media/video/mt9t031.c              | 173 ++++++++--------
 drivers/media/video/mt9v022.c              | 167 +++++++---------
 drivers/media/video/mx1_camera.c           |  31 ++-
 drivers/media/video/mx3_camera.c           |  54 ++---
 drivers/media/video/ov772x.c               | 169 +++++++---------
 drivers/media/video/pxa_camera.c           | 107 +++++-----
 drivers/media/video/sh_mobile_ceu_camera.c |  45 ++---
 drivers/media/video/soc_camera.c           | 250 +++++++++++++----------
 drivers/media/video/soc_camera_platform.c  | 115 +++++------
 drivers/media/video/tw9910.c               | 151 ++++++--------
 include/media/soc_camera.h                 |  23 +--
 13 files changed, 789 insertions(+), 962 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 1e4f269fc08b..2a73dac11d37 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -13,13 +13,13 @@
 #include <linux/i2c.h>
 #include <linux/log2.h>
 
-#include <media/v4l2-common.h>
+#include <media/v4l2-subdev.h>
 #include <media/v4l2-chip-ident.h>
 #include <media/soc_camera.h>
 
 /* mt9m001 i2c address 0x5d
- * The platform has to define i2c_board_info
- * and call i2c_register_board_info() */
+ * The platform has to define ctruct i2c_board_info objects and link to them
+ * from struct soc_camera_link */
 
 /* mt9m001 selected register addresses */
 #define MT9M001_CHIP_VERSION		0x00
@@ -69,10 +69,16 @@ static const struct soc_camera_data_format mt9m001_monochrome_formats[] = {
 };
 
 struct mt9m001 {
+	struct v4l2_subdev subdev;
 	int model;	/* V4L2_IDENT_MT9M001* codes from v4l2-chip-ident.h */
 	unsigned char autoexposure;
 };
 
+static struct mt9m001 *to_mt9m001(const struct i2c_client *client)
+{
+	return container_of(i2c_get_clientdata(client), struct mt9m001, subdev);
+}
+
 static int reg_read(struct i2c_client *client, const u8 reg)
 {
 	s32 data = i2c_smbus_read_word_data(client, reg);
@@ -110,32 +116,18 @@ static int reg_clear(struct i2c_client *client, const u8 reg,
 static int mt9m001_init(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
 	dev_dbg(&icd->dev, "%s\n", __func__);
 
-	if (icl->power) {
-		ret = icl->power(&client->dev, 1);
-		if (ret < 0) {
-			dev_err(icd->vdev->parent,
-				"Platform failed to power-on the camera.\n");
-			return ret;
-		}
-	}
-
-	/* The camera could have been already on, we reset it additionally */
-	if (icl->reset)
-		ret = icl->reset(&client->dev);
-	else
-		ret = -ENODEV;
+	/*
+	 * We don't know, whether platform provides reset,
+	 * issue a soft reset too
+	 */
+	ret = reg_write(client, MT9M001_RESET, 1);
+	if (!ret)
+		ret = reg_write(client, MT9M001_RESET, 0);
 
-	if (ret < 0) {
-		/* Either no platform reset, or platform reset failed */
-		ret = reg_write(client, MT9M001_RESET, 1);
-		if (!ret)
-			ret = reg_write(client, MT9M001_RESET, 0);
-	}
 	/* Disable chip, synchronous option update */
 	if (!ret)
 		ret = reg_write(client, MT9M001_OUTPUT_CONTROL, 0);
@@ -146,33 +138,19 @@ static int mt9m001_init(struct soc_camera_device *icd)
 static int mt9m001_release(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
 	/* Disable the chip */
 	reg_write(client, MT9M001_OUTPUT_CONTROL, 0);
 
-	if (icl->power)
-		icl->power(&client->dev, 0);
-
 	return 0;
 }
 
-static int mt9m001_start_capture(struct soc_camera_device *icd)
+static int mt9m001_s_stream(struct v4l2_subdev *sd, int enable)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
-	/* Switch to master "normal" mode */
-	if (reg_write(client, MT9M001_OUTPUT_CONTROL, 2) < 0)
-		return -EIO;
-	return 0;
-}
-
-static int mt9m001_stop_capture(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-
-	/* Stop sensor readout */
-	if (reg_write(client, MT9M001_OUTPUT_CONTROL, 0) < 0)
+	/* Switch to master "normal" mode or stop sensor readout */
+	if (reg_write(client, MT9M001_OUTPUT_CONTROL, enable ? 2 : 0) < 0)
 		return -EIO;
 	return 0;
 }
@@ -220,7 +198,7 @@ static int mt9m001_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
 	int ret;
 	const u16 hblank = 9, vblank = 25;
 
@@ -257,9 +235,10 @@ static int mt9m001_set_crop(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int mt9m001_set_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9m001_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_rect rect = {
 		.left	= icd->x_current,
 		.top	= icd->y_current,
@@ -271,9 +250,10 @@ static int mt9m001_set_fmt(struct soc_camera_device *icd,
 	return mt9m001_set_crop(icd, &rect);
 }
 
-static int mt9m001_try_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9m001_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
 	v4l_bound_align_image(&pix->width, 48, 1280, 1,
@@ -283,11 +263,11 @@ static int mt9m001_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9m001_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_dbg_chip_ident *id)
+static int mt9m001_g_chip_ident(struct v4l2_subdev *sd,
+				struct v4l2_dbg_chip_ident *id)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
@@ -302,10 +282,10 @@ static int mt9m001_get_chip_id(struct soc_camera_device *icd,
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int mt9m001_get_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9m001_g_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -322,10 +302,10 @@ static int mt9m001_get_register(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9m001_set_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9m001_s_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -378,35 +358,20 @@ static const struct v4l2_queryctrl mt9m001_controls[] = {
 	}
 };
 
-static int mt9m001_get_control(struct soc_camera_device *, struct v4l2_control *);
-static int mt9m001_set_control(struct soc_camera_device *, struct v4l2_control *);
-
 static struct soc_camera_ops mt9m001_ops = {
-	.owner			= THIS_MODULE,
 	.init			= mt9m001_init,
 	.release		= mt9m001_release,
-	.start_capture		= mt9m001_start_capture,
-	.stop_capture		= mt9m001_stop_capture,
 	.set_crop		= mt9m001_set_crop,
-	.set_fmt		= mt9m001_set_fmt,
-	.try_fmt		= mt9m001_try_fmt,
 	.set_bus_param		= mt9m001_set_bus_param,
 	.query_bus_param	= mt9m001_query_bus_param,
 	.controls		= mt9m001_controls,
 	.num_controls		= ARRAY_SIZE(mt9m001_controls),
-	.get_control		= mt9m001_get_control,
-	.set_control		= mt9m001_set_control,
-	.get_chip_id		= mt9m001_get_chip_id,
-#ifdef CONFIG_VIDEO_ADV_DEBUG
-	.get_register		= mt9m001_get_register,
-	.set_register		= mt9m001_set_register,
-#endif
 };
 
-static int mt9m001_get_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
+static int mt9m001_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
 	int data;
 
 	switch (ctrl->id) {
@@ -423,10 +388,11 @@ static int mt9m001_get_control(struct soc_camera_device *icd, struct v4l2_contro
 	return 0;
 }
 
-static int mt9m001_set_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
+static int mt9m001_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	const struct v4l2_queryctrl *qctrl;
 	int data;
 
@@ -521,10 +487,9 @@ static int mt9m001_set_control(struct soc_camera_device *icd, struct v4l2_contro
 static int mt9m001_video_probe(struct soc_camera_device *icd,
 			       struct i2c_client *client)
 {
-	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	s32 data;
-	int ret;
 	unsigned long flags;
 
 	/* We must have a parent by now. And it cannot be a wrong one.
@@ -533,11 +498,6 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
-	/* Switch master clock on */
-	ret = soc_camera_video_start(icd, &client->dev);
-	if (ret)
-		return ret;
-
 	/* Enable the chip */
 	data = reg_write(client, MT9M001_CHIP_ENABLE, 1);
 	dev_dbg(&icd->dev, "write: %d\n", data);
@@ -545,8 +505,6 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 	/* Read out the chip version register */
 	data = reg_read(client, MT9M001_CHIP_VERSION);
 
-	soc_camera_video_stop(icd);
-
 	/* must be 0x8411 or 0x8421 for colour sensor and 8431 for bw */
 	switch (data) {
 	case 0x8411:
@@ -601,6 +559,27 @@ static void mt9m001_video_remove(struct soc_camera_device *icd)
 		icl->free_bus(icl);
 }
 
+static struct v4l2_subdev_core_ops mt9m001_subdev_core_ops = {
+	.g_ctrl		= mt9m001_g_ctrl,
+	.s_ctrl		= mt9m001_s_ctrl,
+	.g_chip_ident	= mt9m001_g_chip_ident,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.g_register	= mt9m001_g_register,
+	.s_register	= mt9m001_s_register,
+#endif
+};
+
+static struct v4l2_subdev_video_ops mt9m001_subdev_video_ops = {
+	.s_stream	= mt9m001_s_stream,
+	.s_fmt		= mt9m001_s_fmt,
+	.try_fmt	= mt9m001_try_fmt,
+};
+
+static struct v4l2_subdev_ops mt9m001_subdev_ops = {
+	.core	= &mt9m001_subdev_core_ops,
+	.video	= &mt9m001_subdev_video_ops,
+};
+
 static int mt9m001_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
@@ -631,7 +610,7 @@ static int mt9m001_probe(struct i2c_client *client,
 	if (!mt9m001)
 		return -ENOMEM;
 
-	i2c_set_clientdata(client, mt9m001);
+	v4l2_i2c_subdev_init(&mt9m001->subdev, client, &mt9m001_subdev_ops);
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops	= &mt9m001_ops;
@@ -660,7 +639,7 @@ static int mt9m001_probe(struct i2c_client *client,
 
 static int mt9m001_remove(struct i2c_client *client)
 {
-	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
 	icd->ops = NULL;
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index 95c2f089605f..29f976afd465 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -148,6 +148,7 @@ enum mt9m111_context {
 };
 
 struct mt9m111 {
+	struct v4l2_subdev subdev;
 	int model;	/* V4L2_IDENT_MT9M11x* codes from v4l2-chip-ident.h */
 	enum mt9m111_context context;
 	struct v4l2_rect rect;
@@ -164,6 +165,11 @@ struct mt9m111 {
 	unsigned int autowhitebalance:1;
 };
 
+static struct mt9m111 *to_mt9m111(const struct i2c_client *client)
+{
+	return container_of(i2c_get_clientdata(client), struct mt9m111, subdev);
+}
+
 static int reg_page_map_set(struct i2c_client *client, const u16 reg)
 {
 	int ret;
@@ -227,10 +233,9 @@ static int mt9m111_reg_clear(struct i2c_client *client, const u16 reg,
 	return mt9m111_reg_write(client, reg, ret & ~data);
 }
 
-static int mt9m111_set_context(struct soc_camera_device *icd,
+static int mt9m111_set_context(struct i2c_client *client,
 			       enum mt9m111_context ctxt)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int valB = MT9M111_CTXT_CTRL_RESTART | MT9M111_CTXT_CTRL_DEFECTCOR_B
 		| MT9M111_CTXT_CTRL_RESIZE_B | MT9M111_CTXT_CTRL_CTRL2_B
 		| MT9M111_CTXT_CTRL_GAMMA_B | MT9M111_CTXT_CTRL_READ_MODE_B
@@ -244,11 +249,10 @@ static int mt9m111_set_context(struct soc_camera_device *icd,
 		return reg_write(CONTEXT_CONTROL, valA);
 }
 
-static int mt9m111_setup_rect(struct soc_camera_device *icd,
+static int mt9m111_setup_rect(struct i2c_client *client,
 			      struct v4l2_rect *rect)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret, is_raw_format;
 	int width = rect->width;
 	int height = rect->height;
@@ -290,9 +294,8 @@ static int mt9m111_setup_rect(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int mt9m111_setup_pixfmt(struct soc_camera_device *icd, u16 outfmt)
+static int mt9m111_setup_pixfmt(struct i2c_client *client, u16 outfmt)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int ret;
 
 	ret = reg_write(OUTPUT_FORMAT_CTRL2_A, outfmt);
@@ -301,20 +304,19 @@ static int mt9m111_setup_pixfmt(struct soc_camera_device *icd, u16 outfmt)
 	return ret;
 }
 
-static int mt9m111_setfmt_bayer8(struct soc_camera_device *icd)
+static int mt9m111_setfmt_bayer8(struct i2c_client *client)
 {
-	return mt9m111_setup_pixfmt(icd, MT9M111_OUTFMT_PROCESSED_BAYER);
+	return mt9m111_setup_pixfmt(client, MT9M111_OUTFMT_PROCESSED_BAYER);
 }
 
-static int mt9m111_setfmt_bayer10(struct soc_camera_device *icd)
+static int mt9m111_setfmt_bayer10(struct i2c_client *client)
 {
-	return mt9m111_setup_pixfmt(icd, MT9M111_OUTFMT_BYPASS_IFP);
+	return mt9m111_setup_pixfmt(client, MT9M111_OUTFMT_BYPASS_IFP);
 }
 
-static int mt9m111_setfmt_rgb565(struct soc_camera_device *icd)
+static int mt9m111_setfmt_rgb565(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int val = 0;
 
 	if (mt9m111->swap_rgb_red_blue)
@@ -323,13 +325,12 @@ static int mt9m111_setfmt_rgb565(struct soc_camera_device *icd)
 		val |= MT9M111_OUTFMT_SWAP_RGB_EVEN;
 	val |= MT9M111_OUTFMT_RGB | MT9M111_OUTFMT_RGB565;
 
-	return mt9m111_setup_pixfmt(icd, val);
+	return mt9m111_setup_pixfmt(client, val);
 }
 
-static int mt9m111_setfmt_rgb555(struct soc_camera_device *icd)
+static int mt9m111_setfmt_rgb555(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int val = 0;
 
 	if (mt9m111->swap_rgb_red_blue)
@@ -338,13 +339,12 @@ static int mt9m111_setfmt_rgb555(struct soc_camera_device *icd)
 		val |= MT9M111_OUTFMT_SWAP_RGB_EVEN;
 	val |= MT9M111_OUTFMT_RGB | MT9M111_OUTFMT_RGB555;
 
-	return mt9m111_setup_pixfmt(icd, val);
+	return mt9m111_setup_pixfmt(client, val);
 }
 
-static int mt9m111_setfmt_yuv(struct soc_camera_device *icd)
+static int mt9m111_setfmt_yuv(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int val = 0;
 
 	if (mt9m111->swap_yuv_cb_cr)
@@ -352,52 +352,22 @@ static int mt9m111_setfmt_yuv(struct soc_camera_device *icd)
 	if (mt9m111->swap_yuv_y_chromas)
 		val |= MT9M111_OUTFMT_SWAP_YCbCr_C_Y;
 
-	return mt9m111_setup_pixfmt(icd, val);
+	return mt9m111_setup_pixfmt(client, val);
 }
 
-static int mt9m111_enable(struct soc_camera_device *icd)
+static int mt9m111_enable(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
-	if (icl->power) {
-		ret = icl->power(&client->dev, 1);
-		if (ret < 0) {
-			dev_err(icd->vdev->parent,
-				"Platform failed to power-on the camera.\n");
-			return ret;
-		}
-	}
-
 	ret = reg_set(RESET, MT9M111_RESET_CHIP_ENABLE);
 	if (!ret)
 		mt9m111->powered = 1;
 	return ret;
 }
 
-static int mt9m111_disable(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
-	int ret;
-
-	ret = reg_clear(RESET, MT9M111_RESET_CHIP_ENABLE);
-	if (!ret)
-		mt9m111->powered = 0;
-
-	if (icl->power)
-		icl->power(&client->dev, 0);
-
-	return ret;
-}
-
-static int mt9m111_reset(struct soc_camera_device *icd)
+static int mt9m111_reset(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
 	ret = reg_set(RESET, MT9M111_RESET_RESET_MODE);
@@ -407,22 +377,9 @@ static int mt9m111_reset(struct soc_camera_device *icd)
 		ret = reg_clear(RESET, MT9M111_RESET_RESET_MODE
 				| MT9M111_RESET_RESET_SOC);
 
-	if (icl->reset)
-		icl->reset(&client->dev);
-
 	return ret;
 }
 
-static int mt9m111_start_capture(struct soc_camera_device *icd)
-{
-	return 0;
-}
-
-static int mt9m111_stop_capture(struct soc_camera_device *icd)
-{
-	return 0;
-}
-
 static unsigned long mt9m111_query_bus_param(struct soc_camera_device *icd)
 {
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
@@ -442,60 +399,59 @@ static int mt9m111_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	dev_dbg(&icd->dev, "%s left=%d, top=%d, width=%d, height=%d\n",
 		__func__, rect->left, rect->top, rect->width,
 		rect->height);
 
-	ret = mt9m111_setup_rect(icd, rect);
+	ret = mt9m111_setup_rect(client, rect);
 	if (!ret)
 		mt9m111->rect = *rect;
 	return ret;
 }
 
-static int mt9m111_set_pixfmt(struct soc_camera_device *icd, u32 pixfmt)
+static int mt9m111_set_pixfmt(struct i2c_client *client, u32 pixfmt)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	switch (pixfmt) {
 	case V4L2_PIX_FMT_SBGGR8:
-		ret = mt9m111_setfmt_bayer8(icd);
+		ret = mt9m111_setfmt_bayer8(client);
 		break;
 	case V4L2_PIX_FMT_SBGGR16:
-		ret = mt9m111_setfmt_bayer10(icd);
+		ret = mt9m111_setfmt_bayer10(client);
 		break;
 	case V4L2_PIX_FMT_RGB555:
-		ret = mt9m111_setfmt_rgb555(icd);
+		ret = mt9m111_setfmt_rgb555(client);
 		break;
 	case V4L2_PIX_FMT_RGB565:
-		ret = mt9m111_setfmt_rgb565(icd);
+		ret = mt9m111_setfmt_rgb565(client);
 		break;
 	case V4L2_PIX_FMT_UYVY:
 		mt9m111->swap_yuv_y_chromas = 0;
 		mt9m111->swap_yuv_cb_cr = 0;
-		ret = mt9m111_setfmt_yuv(icd);
+		ret = mt9m111_setfmt_yuv(client);
 		break;
 	case V4L2_PIX_FMT_VYUY:
 		mt9m111->swap_yuv_y_chromas = 0;
 		mt9m111->swap_yuv_cb_cr = 1;
-		ret = mt9m111_setfmt_yuv(icd);
+		ret = mt9m111_setfmt_yuv(client);
 		break;
 	case V4L2_PIX_FMT_YUYV:
 		mt9m111->swap_yuv_y_chromas = 1;
 		mt9m111->swap_yuv_cb_cr = 0;
-		ret = mt9m111_setfmt_yuv(icd);
+		ret = mt9m111_setfmt_yuv(client);
 		break;
 	case V4L2_PIX_FMT_YVYU:
 		mt9m111->swap_yuv_y_chromas = 1;
 		mt9m111->swap_yuv_cb_cr = 1;
-		ret = mt9m111_setfmt_yuv(icd);
+		ret = mt9m111_setfmt_yuv(client);
 		break;
 	default:
-		dev_err(&icd->dev, "Pixel format not handled : %x\n", pixfmt);
+		dev_err(&client->dev, "Pixel format not handled : %x\n", pixfmt);
 		ret = -EINVAL;
 	}
 
@@ -505,11 +461,10 @@ static int mt9m111_set_pixfmt(struct soc_camera_device *icd, u32 pixfmt)
 	return ret;
 }
 
-static int mt9m111_set_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9m111_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
 		.left	= mt9m111->rect.left,
@@ -519,20 +474,19 @@ static int mt9m111_set_fmt(struct soc_camera_device *icd,
 	};
 	int ret;
 
-	dev_dbg(&icd->dev, "%s fmt=%x left=%d, top=%d, width=%d, height=%d\n",
+	dev_dbg(&client->dev, "%s fmt=%x left=%d, top=%d, width=%d, height=%d\n",
 		__func__, pix->pixelformat, rect.left, rect.top, rect.width,
 		rect.height);
 
-	ret = mt9m111_setup_rect(icd, &rect);
+	ret = mt9m111_setup_rect(client, &rect);
 	if (!ret)
-		ret = mt9m111_set_pixfmt(icd, pix->pixelformat);
+		ret = mt9m111_set_pixfmt(client, pix->pixelformat);
 	if (!ret)
 		mt9m111->rect = rect;
 	return ret;
 }
 
-static int mt9m111_try_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9m111_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
@@ -544,11 +498,11 @@ static int mt9m111_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9m111_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_dbg_chip_ident *id)
+static int mt9m111_g_chip_ident(struct v4l2_subdev *sd,
+				struct v4l2_dbg_chip_ident *id)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
@@ -563,10 +517,10 @@ static int mt9m111_get_chip_id(struct soc_camera_device *icd,
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int mt9m111_get_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9m111_g_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 	int val;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
@@ -584,10 +538,10 @@ static int mt9m111_get_register(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9m111_set_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9m111_s_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
 		return -EINVAL;
@@ -639,41 +593,24 @@ static const struct v4l2_queryctrl mt9m111_controls[] = {
 	}
 };
 
-static int mt9m111_get_control(struct soc_camera_device *,
-			       struct v4l2_control *);
-static int mt9m111_set_control(struct soc_camera_device *,
-			       struct v4l2_control *);
 static int mt9m111_resume(struct soc_camera_device *icd);
 static int mt9m111_init(struct soc_camera_device *icd);
 static int mt9m111_release(struct soc_camera_device *icd);
 
 static struct soc_camera_ops mt9m111_ops = {
-	.owner			= THIS_MODULE,
 	.init			= mt9m111_init,
 	.resume			= mt9m111_resume,
 	.release		= mt9m111_release,
-	.start_capture		= mt9m111_start_capture,
-	.stop_capture		= mt9m111_stop_capture,
 	.set_crop		= mt9m111_set_crop,
-	.set_fmt		= mt9m111_set_fmt,
-	.try_fmt		= mt9m111_try_fmt,
 	.query_bus_param	= mt9m111_query_bus_param,
 	.set_bus_param		= mt9m111_set_bus_param,
 	.controls		= mt9m111_controls,
 	.num_controls		= ARRAY_SIZE(mt9m111_controls),
-	.get_control		= mt9m111_get_control,
-	.set_control		= mt9m111_set_control,
-	.get_chip_id		= mt9m111_get_chip_id,
-#ifdef CONFIG_VIDEO_ADV_DEBUG
-	.get_register		= mt9m111_get_register,
-	.set_register		= mt9m111_set_register,
-#endif
 };
 
-static int mt9m111_set_flip(struct soc_camera_device *icd, int flip, int mask)
+static int mt9m111_set_flip(struct i2c_client *client, int flip, int mask)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	if (mt9m111->context == HIGHPOWER) {
@@ -691,9 +628,8 @@ static int mt9m111_set_flip(struct soc_camera_device *icd, int flip, int mask)
 	return ret;
 }
 
-static int mt9m111_get_global_gain(struct soc_camera_device *icd)
+static int mt9m111_get_global_gain(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int data;
 
 	data = reg_read(GLOBAL_GAIN);
@@ -703,9 +639,9 @@ static int mt9m111_get_global_gain(struct soc_camera_device *icd)
 	return data;
 }
 
-static int mt9m111_set_global_gain(struct soc_camera_device *icd, int gain)
+static int mt9m111_set_global_gain(struct i2c_client *client, int gain)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_device *icd = client->dev.platform_data;
 	u16 val;
 
 	if (gain > 63 * 2 * 2)
@@ -722,10 +658,9 @@ static int mt9m111_set_global_gain(struct soc_camera_device *icd, int gain)
 	return reg_write(GLOBAL_GAIN, val);
 }
 
-static int mt9m111_set_autoexposure(struct soc_camera_device *icd, int on)
+static int mt9m111_set_autoexposure(struct i2c_client *client, int on)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	if (on)
@@ -739,10 +674,9 @@ static int mt9m111_set_autoexposure(struct soc_camera_device *icd, int on)
 	return ret;
 }
 
-static int mt9m111_set_autowhitebalance(struct soc_camera_device *icd, int on)
+static int mt9m111_set_autowhitebalance(struct i2c_client *client, int on)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	if (on)
@@ -756,11 +690,10 @@ static int mt9m111_set_autowhitebalance(struct soc_camera_device *icd, int on)
 	return ret;
 }
 
-static int mt9m111_get_control(struct soc_camera_device *icd,
-			       struct v4l2_control *ctrl)
+static int mt9m111_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int data;
 
 	switch (ctrl->id) {
@@ -785,7 +718,7 @@ static int mt9m111_get_control(struct soc_camera_device *icd,
 		ctrl->value = !!(data & MT9M111_RMB_MIRROR_COLS);
 		break;
 	case V4L2_CID_GAIN:
-		data = mt9m111_get_global_gain(icd);
+		data = mt9m111_get_global_gain(client);
 		if (data < 0)
 			return data;
 		ctrl->value = data;
@@ -800,38 +733,36 @@ static int mt9m111_get_control(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9m111_set_control(struct soc_camera_device *icd,
-			       struct v4l2_control *ctrl)
+static int mt9m111_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	const struct v4l2_queryctrl *qctrl;
 	int ret;
 
 	qctrl = soc_camera_find_qctrl(&mt9m111_ops, ctrl->id);
-
 	if (!qctrl)
 		return -EINVAL;
 
 	switch (ctrl->id) {
 	case V4L2_CID_VFLIP:
 		mt9m111->vflip = ctrl->value;
-		ret = mt9m111_set_flip(icd, ctrl->value,
+		ret = mt9m111_set_flip(client, ctrl->value,
 					MT9M111_RMB_MIRROR_ROWS);
 		break;
 	case V4L2_CID_HFLIP:
 		mt9m111->hflip = ctrl->value;
-		ret = mt9m111_set_flip(icd, ctrl->value,
+		ret = mt9m111_set_flip(client, ctrl->value,
 					MT9M111_RMB_MIRROR_COLS);
 		break;
 	case V4L2_CID_GAIN:
-		ret = mt9m111_set_global_gain(icd, ctrl->value);
+		ret = mt9m111_set_global_gain(client, ctrl->value);
 		break;
 	case V4L2_CID_EXPOSURE_AUTO:
-		ret =  mt9m111_set_autoexposure(icd, ctrl->value);
+		ret =  mt9m111_set_autoexposure(client, ctrl->value);
 		break;
 	case V4L2_CID_AUTO_WHITE_BALANCE:
-		ret =  mt9m111_set_autowhitebalance(icd, ctrl->value);
+		ret =  mt9m111_set_autowhitebalance(client, ctrl->value);
 		break;
 	default:
 		ret = -EINVAL;
@@ -840,34 +771,34 @@ static int mt9m111_set_control(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int mt9m111_restore_state(struct soc_camera_device *icd)
+static int mt9m111_restore_state(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
-
-	mt9m111_set_context(icd, mt9m111->context);
-	mt9m111_set_pixfmt(icd, mt9m111->pixfmt);
-	mt9m111_setup_rect(icd, &mt9m111->rect);
-	mt9m111_set_flip(icd, mt9m111->hflip, MT9M111_RMB_MIRROR_COLS);
-	mt9m111_set_flip(icd, mt9m111->vflip, MT9M111_RMB_MIRROR_ROWS);
-	mt9m111_set_global_gain(icd, icd->gain);
-	mt9m111_set_autoexposure(icd, mt9m111->autoexposure);
-	mt9m111_set_autowhitebalance(icd, mt9m111->autowhitebalance);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
+
+	mt9m111_set_context(client, mt9m111->context);
+	mt9m111_set_pixfmt(client, mt9m111->pixfmt);
+	mt9m111_setup_rect(client, &mt9m111->rect);
+	mt9m111_set_flip(client, mt9m111->hflip, MT9M111_RMB_MIRROR_COLS);
+	mt9m111_set_flip(client, mt9m111->vflip, MT9M111_RMB_MIRROR_ROWS);
+	mt9m111_set_global_gain(client, icd->gain);
+	mt9m111_set_autoexposure(client, mt9m111->autoexposure);
+	mt9m111_set_autowhitebalance(client, mt9m111->autowhitebalance);
 	return 0;
 }
 
 static int mt9m111_resume(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret = 0;
 
 	if (mt9m111->powered) {
-		ret = mt9m111_enable(icd);
+		ret = mt9m111_enable(client);
 		if (!ret)
-			ret = mt9m111_reset(icd);
+			ret = mt9m111_reset(client);
 		if (!ret)
-			ret = mt9m111_restore_state(icd);
+			ret = mt9m111_restore_state(client);
 	}
 	return ret;
 }
@@ -875,17 +806,17 @@ static int mt9m111_resume(struct soc_camera_device *icd)
 static int mt9m111_init(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	mt9m111->context = HIGHPOWER;
-	ret = mt9m111_enable(icd);
+	ret = mt9m111_enable(client);
 	if (!ret)
-		ret = mt9m111_reset(icd);
+		ret = mt9m111_reset(client);
 	if (!ret)
-		ret = mt9m111_set_context(icd, mt9m111->context);
+		ret = mt9m111_set_context(client, mt9m111->context);
 	if (!ret)
-		ret = mt9m111_set_autoexposure(icd, mt9m111->autoexposure);
+		ret = mt9m111_set_autoexposure(client, mt9m111->autoexposure);
 	if (ret)
 		dev_err(&icd->dev, "mt9m11x init failed: %d\n", ret);
 	return ret;
@@ -893,9 +824,14 @@ static int mt9m111_init(struct soc_camera_device *icd)
 
 static int mt9m111_release(struct soc_camera_device *icd)
 {
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
-	ret = mt9m111_disable(icd);
+	ret = reg_clear(RESET, MT9M111_RESET_CHIP_ENABLE);
+	if (!ret)
+		mt9m111->powered = 0;
+
 	if (ret < 0)
 		dev_err(&icd->dev, "mt9m11x release failed: %d\n", ret);
 
@@ -909,7 +845,7 @@ static int mt9m111_release(struct soc_camera_device *icd)
 static int mt9m111_video_probe(struct soc_camera_device *icd,
 			       struct i2c_client *client)
 {
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	s32 data;
 	int ret;
 
@@ -921,15 +857,10 @@ static int mt9m111_video_probe(struct soc_camera_device *icd,
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
-	/* Switch master clock on */
-	ret = soc_camera_video_start(icd, &client->dev);
-	if (ret)
-		goto evstart;
-
-	ret = mt9m111_enable(icd);
+	ret = mt9m111_enable(client);
 	if (ret)
 		goto ei2c;
-	ret = mt9m111_reset(icd);
+	ret = mt9m111_reset(client);
 	if (ret)
 		goto ei2c;
 
@@ -961,11 +892,29 @@ static int mt9m111_video_probe(struct soc_camera_device *icd,
 	mt9m111->swap_rgb_red_blue = 1;
 
 ei2c:
-	soc_camera_video_stop(icd);
-evstart:
 	return ret;
 }
 
+static struct v4l2_subdev_core_ops mt9m111_subdev_core_ops = {
+	.g_ctrl		= mt9m111_g_ctrl,
+	.s_ctrl		= mt9m111_s_ctrl,
+	.g_chip_ident	= mt9m111_g_chip_ident,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.g_register	= mt9m111_g_register,
+	.s_register	= mt9m111_s_register,
+#endif
+};
+
+static struct v4l2_subdev_video_ops mt9m111_subdev_video_ops = {
+	.s_fmt		= mt9m111_s_fmt,
+	.try_fmt	= mt9m111_try_fmt,
+};
+
+static struct v4l2_subdev_ops mt9m111_subdev_ops = {
+	.core	= &mt9m111_subdev_core_ops,
+	.video	= &mt9m111_subdev_video_ops,
+};
+
 static int mt9m111_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
@@ -996,7 +945,7 @@ static int mt9m111_probe(struct i2c_client *client,
 	if (!mt9m111)
 		return -ENOMEM;
 
-	i2c_set_clientdata(client, mt9m111);
+	v4l2_i2c_subdev_init(&mt9m111->subdev, client, &mt9m111_subdev_ops);
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops	= &mt9m111_ops;
@@ -1022,7 +971,7 @@ static int mt9m111_probe(struct i2c_client *client,
 
 static int mt9m111_remove(struct i2c_client *client)
 {
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
 	icd->ops = NULL;
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index d9c7c2fd698a..27a5edda902c 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -13,13 +13,13 @@
 #include <linux/i2c.h>
 #include <linux/log2.h>
 
-#include <media/v4l2-common.h>
+#include <media/v4l2-subdev.h>
 #include <media/v4l2-chip-ident.h>
 #include <media/soc_camera.h>
 
 /* mt9t031 i2c address 0x5d
- * The platform has to define i2c_board_info
- * and call i2c_register_board_info() */
+ * The platform has to define i2c_board_info and link to it from
+ * struct soc_camera_link */
 
 /* mt9t031 selected register addresses */
 #define MT9T031_CHIP_VERSION		0x00
@@ -68,12 +68,18 @@ static const struct soc_camera_data_format mt9t031_colour_formats[] = {
 };
 
 struct mt9t031 {
+	struct v4l2_subdev subdev;
 	int model;	/* V4L2_IDENT_MT9T031* codes from v4l2-chip-ident.h */
 	unsigned char autoexposure;
 	u16 xskip;
 	u16 yskip;
 };
 
+static struct mt9t031 *to_mt9t031(const struct i2c_client *client)
+{
+	return container_of(i2c_get_clientdata(client), struct mt9t031, subdev);
+}
+
 static int reg_read(struct i2c_client *client, const u8 reg)
 {
 	s32 data = i2c_smbus_read_word_data(client, reg);
@@ -134,21 +140,10 @@ static int get_shutter(struct i2c_client *client, u32 *data)
 	return ret < 0 ? ret : 0;
 }
 
-static int mt9t031_init(struct soc_camera_device *icd)
+static int mt9t031_idle(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
-	if (icl->power) {
-		ret = icl->power(&client->dev, 1);
-		if (ret < 0) {
-			dev_err(icd->vdev->parent,
-				"Platform failed to power-on the camera.\n");
-			return ret;
-		}
-	}
-
 	/* Disable chip output, synchronous option update */
 	ret = reg_write(client, MT9T031_RESET, 1);
 	if (ret >= 0)
@@ -156,43 +151,46 @@ static int mt9t031_init(struct soc_camera_device *icd)
 	if (ret >= 0)
 		ret = reg_clear(client, MT9T031_OUTPUT_CONTROL, 2);
 
-	if (ret < 0 && icl->power)
-		icl->power(&client->dev, 0);
-
 	return ret >= 0 ? 0 : -EIO;
 }
 
-static int mt9t031_release(struct soc_camera_device *icd)
+static int mt9t031_disable(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-
 	/* Disable the chip */
 	reg_clear(client, MT9T031_OUTPUT_CONTROL, 2);
 
-	if (icl->power)
-		icl->power(&client->dev, 0);
-
 	return 0;
 }
 
-static int mt9t031_start_capture(struct soc_camera_device *icd)
+static int mt9t031_init(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
-	/* Switch to master "normal" mode */
-	if (reg_set(client, MT9T031_OUTPUT_CONTROL, 2) < 0)
-		return -EIO;
-	return 0;
+	return mt9t031_idle(client);
 }
 
-static int mt9t031_stop_capture(struct soc_camera_device *icd)
+static int mt9t031_release(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
-	/* Stop sensor readout */
-	if (reg_clear(client, MT9T031_OUTPUT_CONTROL, 2) < 0)
+	return mt9t031_disable(client);
+}
+
+static int mt9t031_s_stream(struct v4l2_subdev *sd, int enable)
+{
+	struct i2c_client *client = sd->priv;
+	int ret;
+
+	if (enable)
+		/* Switch to master "normal" mode */
+		ret = reg_set(client, MT9T031_OUTPUT_CONTROL, 2);
+	else
+		/* Stop sensor readout */
+		ret = reg_clear(client, MT9T031_OUTPUT_CONTROL, 2);
+
+	if (ret < 0)
 		return -EIO;
+
 	return 0;
 }
 
@@ -236,7 +234,7 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 			      struct v4l2_rect *rect, u16 xskip, u16 yskip)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	int ret;
 	u16 xbin, ybin, width, height, left, top;
 	const u16 hblank = MT9T031_HORIZONTAL_BLANK,
@@ -334,17 +332,17 @@ static int mt9t031_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
 
 	/* CROP - no change in scaling, or in limits */
 	return mt9t031_set_params(icd, rect, mt9t031->xskip, mt9t031->yskip);
 }
 
-static int mt9t031_set_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9t031_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 	u16 xskip, yskip;
 	struct v4l2_rect rect = {
@@ -379,8 +377,7 @@ static int mt9t031_set_fmt(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int mt9t031_try_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9t031_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
@@ -391,11 +388,11 @@ static int mt9t031_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9t031_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_dbg_chip_ident *id)
+static int mt9t031_g_chip_ident(struct v4l2_subdev *sd,
+				struct v4l2_dbg_chip_ident *id)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
@@ -410,10 +407,10 @@ static int mt9t031_get_chip_id(struct soc_camera_device *icd,
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int mt9t031_get_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9t031_g_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -429,10 +426,10 @@ static int mt9t031_get_register(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9t031_set_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9t031_s_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -493,35 +490,20 @@ static const struct v4l2_queryctrl mt9t031_controls[] = {
 	}
 };
 
-static int mt9t031_get_control(struct soc_camera_device *, struct v4l2_control *);
-static int mt9t031_set_control(struct soc_camera_device *, struct v4l2_control *);
-
 static struct soc_camera_ops mt9t031_ops = {
-	.owner			= THIS_MODULE,
 	.init			= mt9t031_init,
 	.release		= mt9t031_release,
-	.start_capture		= mt9t031_start_capture,
-	.stop_capture		= mt9t031_stop_capture,
 	.set_crop		= mt9t031_set_crop,
-	.set_fmt		= mt9t031_set_fmt,
-	.try_fmt		= mt9t031_try_fmt,
 	.set_bus_param		= mt9t031_set_bus_param,
 	.query_bus_param	= mt9t031_query_bus_param,
 	.controls		= mt9t031_controls,
 	.num_controls		= ARRAY_SIZE(mt9t031_controls),
-	.get_control		= mt9t031_get_control,
-	.set_control		= mt9t031_set_control,
-	.get_chip_id		= mt9t031_get_chip_id,
-#ifdef CONFIG_VIDEO_ADV_DEBUG
-	.get_register		= mt9t031_get_register,
-	.set_register		= mt9t031_set_register,
-#endif
 };
 
-static int mt9t031_get_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
+static int mt9t031_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	int data;
 
 	switch (ctrl->id) {
@@ -544,10 +526,11 @@ static int mt9t031_get_control(struct soc_camera_device *icd, struct v4l2_contro
 	return 0;
 }
 
-static int mt9t031_set_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
+static int mt9t031_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	const struct v4l2_queryctrl *qctrl;
 	int data;
 
@@ -653,12 +636,11 @@ static int mt9t031_set_control(struct soc_camera_device *icd, struct v4l2_contro
 
 /* Interface active, can use i2c. If it fails, it can indeed mean, that
  * this wasn't our capture interface, so, we wait for the right one */
-static int mt9t031_video_probe(struct soc_camera_device *icd,
-			       struct i2c_client *client)
+static int mt9t031_video_probe(struct i2c_client *client)
 {
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	s32 data;
-	int ret;
 
 	/* We must have a parent by now. And it cannot be a wrong one.
 	 * So this entire test is completely redundant. */
@@ -666,11 +648,6 @@ static int mt9t031_video_probe(struct soc_camera_device *icd,
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
-	/* Switch master clock on */
-	ret = soc_camera_video_start(icd, &client->dev);
-	if (ret)
-		return ret;
-
 	/* Enable the chip */
 	data = reg_write(client, MT9T031_CHIP_ENABLE, 1);
 	dev_dbg(&icd->dev, "write: %d\n", data);
@@ -678,8 +655,6 @@ static int mt9t031_video_probe(struct soc_camera_device *icd,
 	/* Read out the chip version register */
 	data = reg_read(client, MT9T031_CHIP_VERSION);
 
-	soc_camera_video_stop(icd);
-
 	switch (data) {
 	case 0x1621:
 		mt9t031->model = V4L2_IDENT_MT9T031;
@@ -697,6 +672,27 @@ static int mt9t031_video_probe(struct soc_camera_device *icd,
 	return 0;
 }
 
+static struct v4l2_subdev_core_ops mt9t031_subdev_core_ops = {
+	.g_ctrl		= mt9t031_g_ctrl,
+	.s_ctrl		= mt9t031_s_ctrl,
+	.g_chip_ident	= mt9t031_g_chip_ident,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.g_register	= mt9t031_g_register,
+	.s_register	= mt9t031_s_register,
+#endif
+};
+
+static struct v4l2_subdev_video_ops mt9t031_subdev_video_ops = {
+	.s_stream	= mt9t031_s_stream,
+	.s_fmt		= mt9t031_s_fmt,
+	.try_fmt	= mt9t031_try_fmt,
+};
+
+static struct v4l2_subdev_ops mt9t031_subdev_ops = {
+	.core	= &mt9t031_subdev_core_ops,
+	.video	= &mt9t031_subdev_video_ops,
+};
+
 static int mt9t031_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
@@ -727,7 +723,7 @@ static int mt9t031_probe(struct i2c_client *client,
 	if (!mt9t031)
 		return -ENOMEM;
 
-	i2c_set_clientdata(client, mt9t031);
+	v4l2_i2c_subdev_init(&mt9t031->subdev, client, &mt9t031_subdev_ops);
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops	= &mt9t031_ops;
@@ -747,7 +743,12 @@ static int mt9t031_probe(struct i2c_client *client,
 	mt9t031->xskip = 1;
 	mt9t031->yskip = 1;
 
-	ret = mt9t031_video_probe(icd, client);
+	mt9t031_idle(client);
+
+	ret = mt9t031_video_probe(client);
+
+	mt9t031_disable(client);
+
 	if (ret) {
 		icd->ops = NULL;
 		i2c_set_clientdata(client, NULL);
@@ -759,7 +760,7 @@ static int mt9t031_probe(struct i2c_client *client,
 
 static int mt9t031_remove(struct i2c_client *client)
 {
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
 	icd->ops = NULL;
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index 959cc299f1ae..3cb9f0f1e256 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -14,13 +14,13 @@
 #include <linux/delay.h>
 #include <linux/log2.h>
 
-#include <media/v4l2-common.h>
+#include <media/v4l2-subdev.h>
 #include <media/v4l2-chip-ident.h>
 #include <media/soc_camera.h>
 
 /* mt9v022 i2c address 0x48, 0x4c, 0x58, 0x5c
- * The platform has to define i2c_board_info
- * and call i2c_register_board_info() */
+ * The platform has to define ctruct i2c_board_info objects and link to them
+ * from struct soc_camera_link */
 
 static char *sensor_type;
 module_param(sensor_type, charp, S_IRUGO);
@@ -85,10 +85,16 @@ static const struct soc_camera_data_format mt9v022_monochrome_formats[] = {
 };
 
 struct mt9v022 {
+	struct v4l2_subdev subdev;
 	int model;	/* V4L2_IDENT_MT9V022* codes from v4l2-chip-ident.h */
 	u16 chip_control;
 };
 
+static struct mt9v022 *to_mt9v022(const struct i2c_client *client)
+{
+	return container_of(i2c_get_clientdata(client), struct mt9v022, subdev);
+}
+
 static int reg_read(struct i2c_client *client, const u8 reg)
 {
 	s32 data = i2c_smbus_read_word_data(client, reg);
@@ -126,26 +132,9 @@ static int reg_clear(struct i2c_client *client, const u8 reg,
 static int mt9v022_init(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
 	int ret;
 
-	if (icl->power) {
-		ret = icl->power(&client->dev, 1);
-		if (ret < 0) {
-			dev_err(icd->vdev->parent,
-				"Platform failed to power-on the camera.\n");
-			return ret;
-		}
-	}
-
-	/*
-	 * The camera could have been already on, we hard-reset it additionally,
-	 * if available. Soft reset is done in video_probe().
-	 */
-	if (icl->reset)
-		icl->reset(&client->dev);
-
 	/* Almost the default mode: master, parallel, simultaneous, and an
 	 * undocumented bit 0x200, which is present in table 7, but not in 8,
 	 * plus snapshot mode to disable scan for now */
@@ -169,37 +158,19 @@ static int mt9v022_init(struct soc_camera_device *icd)
 	return ret;
 }
 
-static int mt9v022_release(struct soc_camera_device *icd)
+static int mt9v022_s_stream(struct v4l2_subdev *sd, int enable)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	struct i2c_client *client = sd->priv;
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
 
-	if (icl->power)
-		icl->power(&client->dev, 0);
-
-	return 0;
-}
-
-static int mt9v022_start_capture(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
-	/* Switch to master "normal" mode */
-	mt9v022->chip_control &= ~0x10;
-	if (reg_write(client, MT9V022_CHIP_CONTROL,
-		      mt9v022->chip_control) < 0)
-		return -EIO;
-	return 0;
-}
+	if (enable)
+		/* Switch to master "normal" mode */
+		mt9v022->chip_control &= ~0x10;
+	else
+		/* Switch to snapshot mode */
+		mt9v022->chip_control |= 0x10;
 
-static int mt9v022_stop_capture(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
-	/* Switch to snapshot mode */
-	mt9v022->chip_control |= 0x10;
-	if (reg_write(client, MT9V022_CHIP_CONTROL,
-		      mt9v022->chip_control) < 0)
+	if (reg_write(client, MT9V022_CHIP_CONTROL, mt9v022->chip_control) < 0)
 		return -EIO;
 	return 0;
 }
@@ -208,7 +179,7 @@ static int mt9v022_set_bus_param(struct soc_camera_device *icd,
 				 unsigned long flags)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned int width_flag = flags & SOCAM_DATAWIDTH_MASK;
 	int ret;
@@ -320,11 +291,11 @@ static int mt9v022_set_crop(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9v022_set_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9v022_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
 		.left	= icd->x_current,
@@ -357,9 +328,10 @@ static int mt9v022_set_fmt(struct soc_camera_device *icd,
 	return mt9v022_set_crop(icd, &rect);
 }
 
-static int mt9v022_try_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9v022_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
 	v4l_bound_align_image(&pix->width, 48, 752, 2 /* ? */,
@@ -369,11 +341,11 @@ static int mt9v022_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9v022_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_dbg_chip_ident *id)
+static int mt9v022_g_chip_ident(struct v4l2_subdev *sd,
+				struct v4l2_dbg_chip_ident *id)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
@@ -388,10 +360,10 @@ static int mt9v022_get_chip_id(struct soc_camera_device *icd,
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int mt9v022_get_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9v022_g_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -408,10 +380,10 @@ static int mt9v022_get_register(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9v022_set_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9v022_s_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -480,35 +452,18 @@ static const struct v4l2_queryctrl mt9v022_controls[] = {
 	}
 };
 
-static int mt9v022_get_control(struct soc_camera_device *, struct v4l2_control *);
-static int mt9v022_set_control(struct soc_camera_device *, struct v4l2_control *);
-
 static struct soc_camera_ops mt9v022_ops = {
-	.owner			= THIS_MODULE,
 	.init			= mt9v022_init,
-	.release		= mt9v022_release,
-	.start_capture		= mt9v022_start_capture,
-	.stop_capture		= mt9v022_stop_capture,
 	.set_crop		= mt9v022_set_crop,
-	.set_fmt		= mt9v022_set_fmt,
-	.try_fmt		= mt9v022_try_fmt,
 	.set_bus_param		= mt9v022_set_bus_param,
 	.query_bus_param	= mt9v022_query_bus_param,
 	.controls		= mt9v022_controls,
 	.num_controls		= ARRAY_SIZE(mt9v022_controls),
-	.get_control		= mt9v022_get_control,
-	.set_control		= mt9v022_set_control,
-	.get_chip_id		= mt9v022_get_chip_id,
-#ifdef CONFIG_VIDEO_ADV_DEBUG
-	.get_register		= mt9v022_get_register,
-	.set_register		= mt9v022_set_register,
-#endif
 };
 
-static int mt9v022_get_control(struct soc_camera_device *icd,
-			       struct v4l2_control *ctrl)
+static int mt9v022_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 	int data;
 
 	switch (ctrl->id) {
@@ -540,15 +495,14 @@ static int mt9v022_get_control(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9v022_set_control(struct soc_camera_device *icd,
-			       struct v4l2_control *ctrl)
+static int mt9v022_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
 	int data;
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	const struct v4l2_queryctrl *qctrl;
 
 	qctrl = soc_camera_find_qctrl(&mt9v022_ops, ctrl->id);
-
 	if (!qctrl)
 		return -EINVAL;
 
@@ -644,7 +598,7 @@ static int mt9v022_set_control(struct soc_camera_device *icd,
 static int mt9v022_video_probe(struct soc_camera_device *icd,
 			       struct i2c_client *client)
 {
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	s32 data;
 	int ret;
@@ -654,11 +608,6 @@ static int mt9v022_video_probe(struct soc_camera_device *icd,
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
-	/* Switch master clock on */
-	ret = soc_camera_video_start(icd, &client->dev);
-	if (ret)
-		return ret;
-
 	/* Read out the chip version register */
 	data = reg_read(client, MT9V022_CHIP_VERSION);
 
@@ -723,8 +672,6 @@ static int mt9v022_video_probe(struct soc_camera_device *icd,
 		 "monochrome" : "colour");
 
 ei2c:
-	soc_camera_video_stop(icd);
-
 	return ret;
 }
 
@@ -739,6 +686,27 @@ static void mt9v022_video_remove(struct soc_camera_device *icd)
 		icl->free_bus(icl);
 }
 
+static struct v4l2_subdev_core_ops mt9v022_subdev_core_ops = {
+	.g_ctrl		= mt9v022_g_ctrl,
+	.s_ctrl		= mt9v022_s_ctrl,
+	.g_chip_ident	= mt9v022_g_chip_ident,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.g_register	= mt9v022_g_register,
+	.s_register	= mt9v022_s_register,
+#endif
+};
+
+static struct v4l2_subdev_video_ops mt9v022_subdev_video_ops = {
+	.s_stream	= mt9v022_s_stream,
+	.s_fmt		= mt9v022_s_fmt,
+	.try_fmt	= mt9v022_try_fmt,
+};
+
+static struct v4l2_subdev_ops mt9v022_subdev_ops = {
+	.core	= &mt9v022_subdev_core_ops,
+	.video	= &mt9v022_subdev_video_ops,
+};
+
 static int mt9v022_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
@@ -769,8 +737,9 @@ static int mt9v022_probe(struct i2c_client *client,
 	if (!mt9v022)
 		return -ENOMEM;
 
+	v4l2_i2c_subdev_init(&mt9v022->subdev, client, &mt9v022_subdev_ops);
+
 	mt9v022->chip_control = MT9V022_CHIP_CONTROL_DEFAULT;
-	i2c_set_clientdata(client, mt9v022);
 
 	icd->ops	= &mt9v022_ops;
 	icd->x_min	= 1;
@@ -795,7 +764,7 @@ static int mt9v022_probe(struct i2c_client *client,
 
 static int mt9v022_remove(struct i2c_client *client)
 {
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
 	icd->ops = NULL;
diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index 736c31d23194..ea4ceaec85fe 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -219,7 +219,7 @@ static int mx1_camera_setup_dma(struct mx1_camera_dev *pcdev)
 	int ret;
 
 	if (unlikely(!pcdev->active)) {
-		dev_err(pcdev->soc_host.dev, "DMA End IRQ with no active buffer\n");
+		dev_err(pcdev->icd->dev.parent, "DMA End IRQ with no active buffer\n");
 		return -EFAULT;
 	}
 
@@ -229,7 +229,7 @@ static int mx1_camera_setup_dma(struct mx1_camera_dev *pcdev)
 		vbuf->size, pcdev->res->start +
 		CSIRXR, DMA_MODE_READ);
 	if (unlikely(ret))
-		dev_err(pcdev->soc_host.dev, "Failed to setup DMA sg list\n");
+		dev_err(pcdev->icd->dev.parent, "Failed to setup DMA sg list\n");
 
 	return ret;
 }
@@ -334,14 +334,14 @@ static void mx1_camera_dma_irq(int channel, void *data)
 	imx_dma_disable(channel);
 
 	if (unlikely(!pcdev->active)) {
-		dev_err(pcdev->soc_host.dev, "DMA End IRQ with no active buffer\n");
+		dev_err(pcdev->icd->dev.parent, "DMA End IRQ with no active buffer\n");
 		goto out;
 	}
 
 	vb = &pcdev->active->vb;
 	buf = container_of(vb, struct mx1_buffer, vb);
 	WARN_ON(buf->inwork || list_empty(&vb->queue));
-	dev_dbg(pcdev->soc_host.dev, "%s (vb=0x%p) 0x%08lx %d\n", __func__,
+	dev_dbg(pcdev->icd->dev.parent, "%s (vb=0x%p) 0x%08lx %d\n", __func__,
 		vb, vb->baddr, vb->bsize);
 
 	mx1_camera_wakeup(pcdev, vb, buf);
@@ -362,7 +362,7 @@ static void mx1_camera_init_videobuf(struct videobuf_queue *q,
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx1_camera_dev *pcdev = ici->priv;
 
-	videobuf_queue_dma_contig_init(q, &mx1_videobuf_ops, ici->dev,
+	videobuf_queue_dma_contig_init(q, &mx1_videobuf_ops, icd->dev.parent,
 					&pcdev->lock,
 					V4L2_BUF_TYPE_VIDEO_CAPTURE,
 					V4L2_FIELD_NONE,
@@ -381,7 +381,7 @@ static int mclk_get_divisor(struct mx1_camera_dev *pcdev)
 	 * they get a nice Oops */
 	div = (lcdclk + 2 * mclk - 1) / (2 * mclk) - 1;
 
-	dev_dbg(pcdev->soc_host.dev, "System clock %lukHz, target freq %dkHz, "
+	dev_dbg(pcdev->icd->dev.parent, "System clock %lukHz, target freq %dkHz, "
 		"divisor %lu\n", lcdclk / 1000, mclk / 1000, div);
 
 	return div;
@@ -391,7 +391,7 @@ static void mx1_camera_activate(struct mx1_camera_dev *pcdev)
 {
 	unsigned int csicr1 = CSICR1_EN;
 
-	dev_dbg(pcdev->soc_host.dev, "Activate device\n");
+	dev_dbg(pcdev->icd->dev.parent, "Activate device\n");
 
 	clk_enable(pcdev->clk);
 
@@ -407,7 +407,7 @@ static void mx1_camera_activate(struct mx1_camera_dev *pcdev)
 
 static void mx1_camera_deactivate(struct mx1_camera_dev *pcdev)
 {
-	dev_dbg(pcdev->soc_host.dev, "Deactivate device\n");
+	dev_dbg(pcdev->icd->dev.parent, "Deactivate device\n");
 
 	/* Disable all CSI interface */
 	__raw_writel(0x00, pcdev->base + CSICR1);
@@ -432,10 +432,8 @@ static int mx1_camera_add_device(struct soc_camera_device *icd)
 		 icd->devnum);
 
 	mx1_camera_activate(pcdev);
-	ret = icd->ops->init(icd);
 
-	if (!ret)
-		pcdev->icd = icd;
+	pcdev->icd = icd;
 
 ebusy:
 	return ret;
@@ -459,8 +457,6 @@ static void mx1_camera_remove_device(struct soc_camera_device *icd)
 	dev_info(&icd->dev, "MX1 Camera driver detached from camera %d\n",
 		 icd->devnum);
 
-	icd->ops->release(icd);
-
 	mx1_camera_deactivate(pcdev);
 
 	pcdev->icd = NULL;
@@ -546,11 +542,11 @@ static int mx1_camera_set_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pix->pixelformat);
+		dev_warn(icd->dev.parent, "Format %x not found\n", pix->pixelformat);
 		return -EINVAL;
 	}
 
-	ret = icd->ops->set_fmt(icd, f);
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, s_fmt, f);
 	if (!ret) {
 		icd->buswidth = xlate->buswidth;
 		icd->current_fmt = xlate->host_fmt;
@@ -562,10 +558,11 @@ static int mx1_camera_set_fmt(struct soc_camera_device *icd,
 static int mx1_camera_try_fmt(struct soc_camera_device *icd,
 			      struct v4l2_format *f)
 {
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	/* TODO: limit to mx1 hardware capabilities */
 
 	/* limit to sensor capabilities */
-	return icd->ops->try_fmt(icd, f);
+	return v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, try_fmt, f);
 }
 
 static int mx1_camera_reqbufs(struct soc_camera_file *icf,
@@ -737,7 +734,7 @@ static int __init mx1_camera_probe(struct platform_device *pdev)
 	pcdev->soc_host.drv_name	= DRIVER_NAME;
 	pcdev->soc_host.ops		= &mx1_soc_camera_host_ops;
 	pcdev->soc_host.priv		= pcdev;
-	pcdev->soc_host.dev		= &pdev->dev;
+	pcdev->soc_host.v4l2_dev.dev	= &pdev->dev;
 	pcdev->soc_host.nr		= pdev->id;
 	err = soc_camera_host_register(&pcdev->soc_host);
 	if (err)
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index 2edf77a6256b..677d355be8fc 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -431,7 +431,7 @@ static void mx3_camera_init_videobuf(struct videobuf_queue *q,
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx3_camera_dev *mx3_cam = ici->priv;
 
-	videobuf_queue_dma_contig_init(q, &mx3_videobuf_ops, ici->dev,
+	videobuf_queue_dma_contig_init(q, &mx3_videobuf_ops, icd->dev.parent,
 				       &mx3_cam->lock,
 				       V4L2_BUF_TYPE_VIDEO_CAPTURE,
 				       V4L2_FIELD_NONE,
@@ -494,17 +494,11 @@ static int mx3_camera_add_device(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx3_camera_dev *mx3_cam = ici->priv;
-	int ret;
 
-	if (mx3_cam->icd) {
-		ret = -EBUSY;
-		goto ebusy;
-	}
+	if (mx3_cam->icd)
+		return -EBUSY;
 
 	mx3_camera_activate(mx3_cam, icd);
-	ret = icd->ops->init(icd);
-	if (ret < 0)
-		goto einit;
 
 	mx3_cam->icd = icd;
 
@@ -512,12 +506,6 @@ static int mx3_camera_add_device(struct soc_camera_device *icd)
 		 icd->devnum);
 
 	return 0;
-
-einit:
-	clk_disable(mx3_cam->clk);
-ebusy:
-
-	return ret;
 }
 
 /* Called with .video_lock held */
@@ -534,8 +522,6 @@ static void mx3_camera_remove_device(struct soc_camera_device *icd)
 		*ichan = NULL;
 	}
 
-	icd->ops->release(icd);
-
 	clk_disable(mx3_cam->clk);
 
 	mx3_cam->icd = NULL;
@@ -600,7 +586,7 @@ static int test_platform_param(struct mx3_camera_dev *mx3_cam,
 		*flags |= SOCAM_DATAWIDTH_4;
 		break;
 	default:
-		dev_info(mx3_cam->soc_host.dev, "Unsupported bus width %d\n",
+		dev_info(mx3_cam->soc_host.v4l2_dev.dev, "Unsupported bus width %d\n",
 			 buswidth);
 		return -EINVAL;
 	}
@@ -616,7 +602,7 @@ static int mx3_camera_try_bus_param(struct soc_camera_device *icd,
 	unsigned long bus_flags, camera_flags;
 	int ret = test_platform_param(mx3_cam, depth, &bus_flags);
 
-	dev_dbg(ici->dev, "requested bus width %d bit: %d\n", depth, ret);
+	dev_dbg(icd->dev.parent, "requested bus width %d bit: %d\n", depth, ret);
 
 	if (ret < 0)
 		return ret;
@@ -639,7 +625,7 @@ static bool chan_filter(struct dma_chan *chan, void *arg)
 	if (!rq)
 		return false;
 
-	pdata = rq->mx3_cam->soc_host.dev->platform_data;
+	pdata = rq->mx3_cam->soc_host.v4l2_dev.dev->platform_data;
 
 	return rq->id == chan->chan_id &&
 		pdata->dma_dev == chan->device->dev;
@@ -699,7 +685,7 @@ static int mx3_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(ici->dev, "Providing format %s using %s\n",
+			dev_dbg(icd->dev.parent, "Providing format %s using %s\n",
 				mx3_camera_formats[0].name,
 				icd->formats[idx].name);
 		}
@@ -711,7 +697,7 @@ static int mx3_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(ici->dev, "Providing format %s using %s\n",
+			dev_dbg(icd->dev.parent, "Providing format %s using %s\n",
 				mx3_camera_formats[0].name,
 				icd->formats[idx].name);
 		}
@@ -724,7 +710,7 @@ passthrough:
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(ici->dev,
+			dev_dbg(icd->dev.parent,
 				"Providing format %s in pass-through mode\n",
 				icd->formats[idx].name);
 		}
@@ -831,7 +817,7 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pix->pixelformat);
+		dev_warn(icd->dev.parent, "Format %x not found\n", pix->pixelformat);
 		return -EINVAL;
 	}
 
@@ -847,7 +833,7 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 
 	configure_geometry(mx3_cam, &rect);
 
-	ret = icd->ops->set_fmt(icd, f);
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, s_fmt, f);
 	if (!ret) {
 		icd->buswidth = xlate->buswidth;
 		icd->current_fmt = xlate->host_fmt;
@@ -868,7 +854,7 @@ static int mx3_camera_try_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (pixfmt && !xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(icd->dev.parent, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
@@ -885,7 +871,7 @@ static int mx3_camera_try_fmt(struct soc_camera_device *icd,
 	/* camera has to see its format, but the user the original one */
 	pix->pixelformat = xlate->cam_fmt->fourcc;
 	/* limit to sensor capabilities */
-	ret = icd->ops->try_fmt(icd, f);
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, try_fmt, f);
 	pix->pixelformat = xlate->host_fmt->fourcc;
 
 	field = pix->field;
@@ -935,11 +921,11 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(icd->dev.parent, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
-	dev_dbg(ici->dev, "requested bus width %d bit: %d\n",
+	dev_dbg(icd->dev.parent, "requested bus width %d bit: %d\n",
 		icd->buswidth, ret);
 
 	if (ret < 0)
@@ -948,10 +934,10 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 	camera_flags = icd->ops->query_bus_param(icd);
 
 	common_flags = soc_camera_bus_param_compatible(camera_flags, bus_flags);
-	dev_dbg(ici->dev, "Flags cam: 0x%lx host: 0x%lx common: 0x%lx\n",
+	dev_dbg(icd->dev.parent, "Flags cam: 0x%lx host: 0x%lx common: 0x%lx\n",
 		camera_flags, bus_flags, common_flags);
 	if (!common_flags) {
-		dev_dbg(ici->dev, "no common flags");
+		dev_dbg(icd->dev.parent, "no common flags");
 		return -EINVAL;
 	}
 
@@ -1005,7 +991,7 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 
 	ret = icd->ops->set_bus_param(icd, common_flags);
 	if (ret < 0) {
-		dev_dbg(ici->dev, "camera set_bus_param(%lx) returned %d\n",
+		dev_dbg(icd->dev.parent, "camera set_bus_param(%lx) returned %d\n",
 			common_flags, ret);
 		return ret;
 	}
@@ -1060,7 +1046,7 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 
 	csi_reg_write(mx3_cam, sens_conf | dw, CSI_SENS_CONF);
 
-	dev_dbg(ici->dev, "Set SENS_CONF to %x\n", sens_conf | dw);
+	dev_dbg(icd->dev.parent, "Set SENS_CONF to %x\n", sens_conf | dw);
 
 	return 0;
 }
@@ -1145,7 +1131,7 @@ static int __devinit mx3_camera_probe(struct platform_device *pdev)
 	soc_host->drv_name	= MX3_CAM_DRV_NAME;
 	soc_host->ops		= &mx3_soc_camera_host_ops;
 	soc_host->priv		= mx3_cam;
-	soc_host->dev		= &pdev->dev;
+	soc_host->v4l2_dev.dev	= &pdev->dev;
 	soc_host->nr		= pdev->id;
 
 	err = soc_camera_host_register(soc_host);
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 3ea650d55b17..119159773a68 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -22,7 +22,7 @@
 #include <linux/delay.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-chip-ident.h>
-#include <media/v4l2-common.h>
+#include <media/v4l2-subdev.h>
 #include <media/soc_camera.h>
 #include <media/ov772x.h>
 
@@ -398,6 +398,7 @@ struct ov772x_win_size {
 };
 
 struct ov772x_priv {
+	struct v4l2_subdev                subdev;
 	struct ov772x_camera_info        *info;
 	const struct ov772x_color_format *fmt;
 	const struct ov772x_win_size     *win;
@@ -575,6 +576,11 @@ static const struct v4l2_queryctrl ov772x_controls[] = {
  * general function
  */
 
+static struct ov772x_priv *to_ov772x(const struct i2c_client *client)
+{
+	return container_of(i2c_get_clientdata(client), struct ov772x_priv, subdev);
+}
+
 static int ov772x_write_array(struct i2c_client        *client,
 			      const struct regval_list *vals)
 {
@@ -615,61 +621,29 @@ static int ov772x_reset(struct i2c_client *client)
  * soc_camera_ops function
  */
 
-static int ov772x_init(struct soc_camera_device *icd)
+static int ov772x_s_stream(struct v4l2_subdev *sd, int enable)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	int ret = 0;
+	struct i2c_client *client = sd->priv;
+	struct ov772x_priv *priv = to_ov772x(client);
 
-	if (icl->power) {
-		ret = icl->power(&client->dev, 1);
-		if (ret < 0)
-			return ret;
+	if (!enable) {
+		ov772x_mask_set(client, COM2, SOFT_SLEEP_MODE, SOFT_SLEEP_MODE);
+		return 0;
 	}
 
-	if (icl->reset)
-		ret = icl->reset(&client->dev);
-
-	return ret;
-}
-
-static int ov772x_release(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	int ret = 0;
-
-	if (icl->power)
-		ret = icl->power(&client->dev, 0);
-
-	return ret;
-}
-
-static int ov772x_start_capture(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
-
 	if (!priv->win || !priv->fmt) {
-		dev_err(&icd->dev, "norm or win select error\n");
+		dev_err(&client->dev, "norm or win select error\n");
 		return -EPERM;
 	}
 
 	ov772x_mask_set(client, COM2, SOFT_SLEEP_MODE, 0);
 
-	dev_dbg(&icd->dev,
+	dev_dbg(&client->dev,
 		"format %s, win %s\n", priv->fmt->name, priv->win->name);
 
 	return 0;
 }
 
-static int ov772x_stop_capture(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	ov772x_mask_set(client, COM2, SOFT_SLEEP_MODE, SOFT_SLEEP_MODE);
-	return 0;
-}
-
 static int ov772x_set_bus_param(struct soc_camera_device *icd,
 				unsigned long		  flags)
 {
@@ -688,11 +662,10 @@ static unsigned long ov772x_query_bus_param(struct soc_camera_device *icd)
 	return soc_camera_apply_sensor_flags(icl, flags);
 }
 
-static int ov772x_get_control(struct soc_camera_device *icd,
-			      struct v4l2_control *ctrl)
+static int ov772x_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct ov772x_priv *priv = to_ov772x(client);
 
 	switch (ctrl->id) {
 	case V4L2_CID_VFLIP:
@@ -705,11 +678,10 @@ static int ov772x_get_control(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int ov772x_set_control(struct soc_camera_device *icd,
-			      struct v4l2_control *ctrl)
+static int ov772x_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct ov772x_priv *priv = to_ov772x(client);
 	int ret = 0;
 	u8 val;
 
@@ -733,11 +705,11 @@ static int ov772x_set_control(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int ov772x_get_chip_id(struct soc_camera_device *icd,
-			      struct v4l2_dbg_chip_ident *id)
+static int ov772x_g_chip_ident(struct v4l2_subdev *sd,
+			       struct v4l2_dbg_chip_ident *id)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct ov772x_priv *priv = to_ov772x(client);
 
 	id->ident    = priv->model;
 	id->revision = 0;
@@ -746,10 +718,10 @@ static int ov772x_get_chip_id(struct soc_camera_device *icd,
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int ov772x_get_register(struct soc_camera_device *icd,
-			       struct v4l2_dbg_register *reg)
+static int ov772x_g_register(struct v4l2_subdev *sd,
+			     struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 	int ret;
 
 	reg->size = 1;
@@ -765,10 +737,10 @@ static int ov772x_get_register(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int ov772x_set_register(struct soc_camera_device *icd,
-			       struct v4l2_dbg_register *reg)
+static int ov772x_s_register(struct v4l2_subdev *sd,
+			     struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->reg > 0xff ||
 	    reg->val > 0xff)
@@ -778,8 +750,7 @@ static int ov772x_set_register(struct soc_camera_device *icd,
 }
 #endif
 
-static const struct ov772x_win_size*
-ov772x_select_win(u32 width, u32 height)
+static const struct ov772x_win_size *ov772x_select_win(u32 width, u32 height)
 {
 	__u32 diff;
 	const struct ov772x_win_size *win;
@@ -798,11 +769,10 @@ ov772x_select_win(u32 width, u32 height)
 	return win;
 }
 
-static int ov772x_set_params(struct soc_camera_device *icd,
+static int ov772x_set_params(struct i2c_client *client,
 			     u32 width, u32 height, u32 pixfmt)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct ov772x_priv *priv = to_ov772x(client);
 	int ret = -EINVAL;
 	u8  val;
 	int i;
@@ -817,7 +787,6 @@ static int ov772x_set_params(struct soc_camera_device *icd,
 			break;
 		}
 	}
-	dev_dbg(&icd->dev, "Using fmt %x #%d\n", pixfmt, i);
 	if (!priv->fmt)
 		goto ov772x_set_fmt_error;
 
@@ -939,26 +908,26 @@ static int ov772x_set_crop(struct soc_camera_device *icd,
 			   struct v4l2_rect *rect)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct ov772x_priv *priv = to_ov772x(client);
 
 	if (!priv->fmt)
 		return -EINVAL;
 
-	return ov772x_set_params(icd, rect->width, rect->height,
+	return ov772x_set_params(client, rect->width, rect->height,
 				 priv->fmt->fourcc);
 }
 
-static int ov772x_set_fmt(struct soc_camera_device *icd,
-			  struct v4l2_format *f)
+static int ov772x_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
-	return ov772x_set_params(icd, pix->width, pix->height,
+	return ov772x_set_params(client, pix->width, pix->height,
 				 pix->pixelformat);
 }
 
-static int ov772x_try_fmt(struct soc_camera_device *icd,
-			  struct v4l2_format       *f)
+static int ov772x_try_fmt(struct v4l2_subdev *sd,
+			  struct v4l2_format *f)
 {
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	const struct ov772x_win_size *win;
@@ -978,10 +947,9 @@ static int ov772x_try_fmt(struct soc_camera_device *icd,
 static int ov772x_video_probe(struct soc_camera_device *icd,
 			      struct i2c_client *client)
 {
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct ov772x_priv *priv = to_ov772x(client);
 	u8                  pid, ver;
 	const char         *devname;
-	int ret;
 
 	/*
 	 * We must have a parent by now. And it cannot be a wrong one.
@@ -1003,11 +971,6 @@ static int ov772x_video_probe(struct soc_camera_device *icd,
 	icd->formats     = ov772x_fmt_lists;
 	icd->num_formats = ARRAY_SIZE(ov772x_fmt_lists);
 
-	/* Switch master clock on */
-	ret = soc_camera_video_start(icd, &client->dev);
-	if (ret)
-		return ret;
-
 	/*
 	 * check and show product ID and manufacturer ID
 	 */
@@ -1026,8 +989,7 @@ static int ov772x_video_probe(struct soc_camera_device *icd,
 	default:
 		dev_err(&icd->dev,
 			"Product ID error %x:%x\n", pid, ver);
-		ret = -ENODEV;
-		goto ever;
+		return -ENODEV;
 	}
 
 	dev_info(&icd->dev,
@@ -1038,34 +1000,38 @@ static int ov772x_video_probe(struct soc_camera_device *icd,
 		 i2c_smbus_read_byte_data(client, MIDH),
 		 i2c_smbus_read_byte_data(client, MIDL));
 
-	soc_camera_video_stop(icd);
-
-ever:
-	return ret;
+	return 0;
 }
 
 static struct soc_camera_ops ov772x_ops = {
-	.owner			= THIS_MODULE,
-	.init			= ov772x_init,
-	.release		= ov772x_release,
-	.start_capture		= ov772x_start_capture,
-	.stop_capture		= ov772x_stop_capture,
 	.set_crop		= ov772x_set_crop,
-	.set_fmt		= ov772x_set_fmt,
-	.try_fmt		= ov772x_try_fmt,
 	.set_bus_param		= ov772x_set_bus_param,
 	.query_bus_param	= ov772x_query_bus_param,
 	.controls		= ov772x_controls,
 	.num_controls		= ARRAY_SIZE(ov772x_controls),
-	.get_control		= ov772x_get_control,
-	.set_control		= ov772x_set_control,
-	.get_chip_id		= ov772x_get_chip_id,
+};
+
+static struct v4l2_subdev_core_ops ov772x_subdev_core_ops = {
+	.g_ctrl		= ov772x_g_ctrl,
+	.s_ctrl		= ov772x_s_ctrl,
+	.g_chip_ident	= ov772x_g_chip_ident,
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-	.get_register		= ov772x_get_register,
-	.set_register		= ov772x_set_register,
+	.g_register	= ov772x_g_register,
+	.s_register	= ov772x_s_register,
 #endif
 };
 
+static struct v4l2_subdev_video_ops ov772x_subdev_video_ops = {
+	.s_stream	= ov772x_s_stream,
+	.s_fmt		= ov772x_s_fmt,
+	.try_fmt	= ov772x_try_fmt,
+};
+
+static struct v4l2_subdev_ops ov772x_subdev_ops = {
+	.core	= &ov772x_subdev_core_ops,
+	.video	= &ov772x_subdev_video_ops,
+};
+
 /*
  * i2c_driver function
  */
@@ -1081,7 +1047,7 @@ static int ov772x_probe(struct i2c_client *client,
 	int                        ret;
 
 	if (!icd) {
-		dev_err(&client->dev, "MT9M001: missing soc-camera data!\n");
+		dev_err(&client->dev, "OV772X: missing soc-camera data!\n");
 		return -EINVAL;
 	}
 
@@ -1102,8 +1068,9 @@ static int ov772x_probe(struct i2c_client *client,
 	if (!priv)
 		return -ENOMEM;
 
-	priv->info   = info;
-	i2c_set_clientdata(client, priv);
+	priv->info = info;
+
+	v4l2_i2c_subdev_init(&priv->subdev, client, &ov772x_subdev_ops);
 
 	icd->ops        = &ov772x_ops;
 	icd->width_max  = MAX_WIDTH;
@@ -1121,7 +1088,7 @@ static int ov772x_probe(struct i2c_client *client,
 
 static int ov772x_remove(struct i2c_client *client)
 {
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct ov772x_priv *priv = to_ov772x(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
 	icd->ops = NULL;
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index 8b9b44d86837..bdc0d85c461b 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -270,7 +270,7 @@ static void free_buffer(struct videobuf_queue *vq, struct pxa_buffer *buf)
 
 	for (i = 0; i < ARRAY_SIZE(buf->dmas); i++) {
 		if (buf->dmas[i].sg_cpu)
-			dma_free_coherent(ici->dev, buf->dmas[i].sg_size,
+			dma_free_coherent(ici->v4l2_dev.dev, buf->dmas[i].sg_size,
 					  buf->dmas[i].sg_cpu,
 					  buf->dmas[i].sg_dma);
 		buf->dmas[i].sg_cpu = NULL;
@@ -325,19 +325,20 @@ static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
 				struct scatterlist **sg_first, int *sg_first_ofs)
 {
 	struct pxa_cam_dma *pxa_dma = &buf->dmas[channel];
+	struct device *dev = pcdev->soc_host.v4l2_dev.dev;
 	struct scatterlist *sg;
 	int i, offset, sglen;
 	int dma_len = 0, xfer_len = 0;
 
 	if (pxa_dma->sg_cpu)
-		dma_free_coherent(pcdev->soc_host.dev, pxa_dma->sg_size,
+		dma_free_coherent(dev, pxa_dma->sg_size,
 				  pxa_dma->sg_cpu, pxa_dma->sg_dma);
 
 	sglen = calculate_dma_sglen(*sg_first, dma->sglen,
 				    *sg_first_ofs, size);
 
 	pxa_dma->sg_size = (sglen + 1) * sizeof(struct pxa_dma_desc);
-	pxa_dma->sg_cpu = dma_alloc_coherent(pcdev->soc_host.dev, pxa_dma->sg_size,
+	pxa_dma->sg_cpu = dma_alloc_coherent(dev, pxa_dma->sg_size,
 					     &pxa_dma->sg_dma, GFP_KERNEL);
 	if (!pxa_dma->sg_cpu)
 		return -ENOMEM;
@@ -345,7 +346,7 @@ static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
 	pxa_dma->sglen = sglen;
 	offset = *sg_first_ofs;
 
-	dev_dbg(pcdev->soc_host.dev, "DMA: sg_first=%p, sglen=%d, ofs=%d, dma.desc=%x\n",
+	dev_dbg(dev, "DMA: sg_first=%p, sglen=%d, ofs=%d, dma.desc=%x\n",
 		*sg_first, sglen, *sg_first_ofs, pxa_dma->sg_dma);
 
 
@@ -368,7 +369,7 @@ static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
 		pxa_dma->sg_cpu[i].ddadr =
 			pxa_dma->sg_dma + (i + 1) * sizeof(struct pxa_dma_desc);
 
-		dev_vdbg(pcdev->soc_host.dev, "DMA: desc.%08x->@phys=0x%08x, len=%d\n",
+		dev_vdbg(dev, "DMA: desc.%08x->@phys=0x%08x, len=%d\n",
 			 pxa_dma->sg_dma + i * sizeof(struct pxa_dma_desc),
 			 sg_dma_address(sg) + offset, xfer_len);
 		offset = 0;
@@ -418,11 +419,12 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 	struct soc_camera_device *icd = vq->priv_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct pxa_camera_dev *pcdev = ici->priv;
+	struct device *dev = pcdev->soc_host.v4l2_dev.dev;
 	struct pxa_buffer *buf = container_of(vb, struct pxa_buffer, vb);
 	int ret;
 	int size_y, size_u = 0, size_v = 0;
 
-	dev_dbg(&icd->dev, "%s (vb=0x%p) 0x%08lx %d\n", __func__,
+	dev_dbg(dev, "%s (vb=0x%p) 0x%08lx %d\n", __func__,
 		vb, vb->baddr, vb->bsize);
 
 	/* Added list head initialization on alloc */
@@ -480,8 +482,7 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 		ret = pxa_init_dma_channel(pcdev, buf, dma, 0, CIBR0, size_y,
 					   &sg, &next_ofs);
 		if (ret) {
-			dev_err(pcdev->soc_host.dev,
-				"DMA initialization for Y/RGB failed\n");
+			dev_err(dev, "DMA initialization for Y/RGB failed\n");
 			goto fail;
 		}
 
@@ -490,8 +491,7 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 			ret = pxa_init_dma_channel(pcdev, buf, dma, 1, CIBR1,
 						   size_u, &sg, &next_ofs);
 		if (ret) {
-			dev_err(pcdev->soc_host.dev,
-				"DMA initialization for U failed\n");
+			dev_err(dev, "DMA initialization for U failed\n");
 			goto fail_u;
 		}
 
@@ -500,8 +500,7 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 			ret = pxa_init_dma_channel(pcdev, buf, dma, 2, CIBR2,
 						   size_v, &sg, &next_ofs);
 		if (ret) {
-			dev_err(pcdev->soc_host.dev,
-				"DMA initialization for V failed\n");
+			dev_err(dev, "DMA initialization for V failed\n");
 			goto fail_v;
 		}
 
@@ -514,10 +513,10 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 	return 0;
 
 fail_v:
-	dma_free_coherent(pcdev->soc_host.dev, buf->dmas[1].sg_size,
+	dma_free_coherent(dev, buf->dmas[1].sg_size,
 			  buf->dmas[1].sg_cpu, buf->dmas[1].sg_dma);
 fail_u:
-	dma_free_coherent(pcdev->soc_host.dev, buf->dmas[0].sg_size,
+	dma_free_coherent(dev, buf->dmas[0].sg_size,
 			  buf->dmas[0].sg_cpu, buf->dmas[0].sg_dma);
 fail:
 	free_buffer(vq, buf);
@@ -541,7 +540,7 @@ static void pxa_dma_start_channels(struct pxa_camera_dev *pcdev)
 	active = pcdev->active;
 
 	for (i = 0; i < pcdev->channels; i++) {
-		dev_dbg(pcdev->soc_host.dev, "%s (channel=%d) ddadr=%08x\n", __func__,
+		dev_dbg(pcdev->soc_host.v4l2_dev.dev, "%s (channel=%d) ddadr=%08x\n", __func__,
 			i, active->dmas[i].sg_dma);
 		DDADR(pcdev->dma_chans[i]) = active->dmas[i].sg_dma;
 		DCSR(pcdev->dma_chans[i]) = DCSR_RUN;
@@ -553,7 +552,7 @@ static void pxa_dma_stop_channels(struct pxa_camera_dev *pcdev)
 	int i;
 
 	for (i = 0; i < pcdev->channels; i++) {
-		dev_dbg(pcdev->soc_host.dev, "%s (channel=%d)\n", __func__, i);
+		dev_dbg(pcdev->soc_host.v4l2_dev.dev, "%s (channel=%d)\n", __func__, i);
 		DCSR(pcdev->dma_chans[i]) = 0;
 	}
 }
@@ -589,7 +588,7 @@ static void pxa_camera_start_capture(struct pxa_camera_dev *pcdev)
 {
 	unsigned long cicr0, cifr;
 
-	dev_dbg(pcdev->soc_host.dev, "%s\n", __func__);
+	dev_dbg(pcdev->soc_host.v4l2_dev.dev, "%s\n", __func__);
 	/* Reset the FIFOs */
 	cifr = __raw_readl(pcdev->base + CIFR) | CIFR_RESET_F;
 	__raw_writel(cifr, pcdev->base + CIFR);
@@ -609,7 +608,7 @@ static void pxa_camera_stop_capture(struct pxa_camera_dev *pcdev)
 	__raw_writel(cicr0, pcdev->base + CICR0);
 
 	pcdev->active = NULL;
-	dev_dbg(pcdev->soc_host.dev, "%s\n", __func__);
+	dev_dbg(pcdev->soc_host.v4l2_dev.dev, "%s\n", __func__);
 }
 
 /* Called under spinlock_irqsave(&pcdev->lock, ...) */
@@ -674,7 +673,8 @@ static void pxa_camera_wakeup(struct pxa_camera_dev *pcdev,
 	do_gettimeofday(&vb->ts);
 	vb->field_count++;
 	wake_up(&vb->done);
-	dev_dbg(pcdev->soc_host.dev, "%s dequeud buffer (vb=0x%p)\n", __func__, vb);
+	dev_dbg(pcdev->soc_host.v4l2_dev.dev, "%s dequeud buffer (vb=0x%p)\n",
+		__func__, vb);
 
 	if (list_empty(&pcdev->capture)) {
 		pxa_camera_stop_capture(pcdev);
@@ -710,7 +710,8 @@ static void pxa_camera_check_link_miss(struct pxa_camera_dev *pcdev)
 	for (i = 0; i < pcdev->channels; i++)
 		if (DDADR(pcdev->dma_chans[i]) != DDADR_STOP)
 			is_dma_stopped = 0;
-	dev_dbg(pcdev->soc_host.dev, "%s : top queued buffer=%p, dma_stopped=%d\n",
+	dev_dbg(pcdev->soc_host.v4l2_dev.dev,
+		"%s : top queued buffer=%p, dma_stopped=%d\n",
 		__func__, pcdev->active, is_dma_stopped);
 	if (pcdev->active && is_dma_stopped)
 		pxa_camera_start_capture(pcdev);
@@ -719,6 +720,7 @@ static void pxa_camera_check_link_miss(struct pxa_camera_dev *pcdev)
 static void pxa_camera_dma_irq(int channel, struct pxa_camera_dev *pcdev,
 			       enum pxa_camera_active_dma act_dma)
 {
+	struct device *dev = pcdev->soc_host.v4l2_dev.dev;
 	struct pxa_buffer *buf;
 	unsigned long flags;
 	u32 status, camera_status, overrun;
@@ -735,13 +737,13 @@ static void pxa_camera_dma_irq(int channel, struct pxa_camera_dev *pcdev,
 		overrun |= CISR_IFO_1 | CISR_IFO_2;
 
 	if (status & DCSR_BUSERR) {
-		dev_err(pcdev->soc_host.dev, "DMA Bus Error IRQ!\n");
+		dev_err(dev, "DMA Bus Error IRQ!\n");
 		goto out;
 	}
 
 	if (!(status & (DCSR_ENDINTR | DCSR_STARTINTR))) {
-		dev_err(pcdev->soc_host.dev, "Unknown DMA IRQ source, "
-			"status: 0x%08x\n", status);
+		dev_err(dev, "Unknown DMA IRQ source, status: 0x%08x\n",
+			status);
 		goto out;
 	}
 
@@ -764,7 +766,7 @@ static void pxa_camera_dma_irq(int channel, struct pxa_camera_dev *pcdev,
 	buf = container_of(vb, struct pxa_buffer, vb);
 	WARN_ON(buf->inwork || list_empty(&vb->queue));
 
-	dev_dbg(pcdev->soc_host.dev, "%s channel=%d %s%s(vb=0x%p) dma.desc=%x\n",
+	dev_dbg(dev, "%s channel=%d %s%s(vb=0x%p) dma.desc=%x\n",
 		__func__, channel, status & DCSR_STARTINTR ? "SOF " : "",
 		status & DCSR_ENDINTR ? "EOF " : "", vb, DDADR(channel));
 
@@ -775,7 +777,7 @@ static void pxa_camera_dma_irq(int channel, struct pxa_camera_dev *pcdev,
 		 */
 		if (camera_status & overrun &&
 		    !list_is_last(pcdev->capture.next, &pcdev->capture)) {
-			dev_dbg(pcdev->soc_host.dev, "FIFO overrun! CISR: %x\n",
+			dev_dbg(dev, "FIFO overrun! CISR: %x\n",
 				camera_status);
 			pxa_camera_stop_capture(pcdev);
 			pxa_camera_start_capture(pcdev);
@@ -834,6 +836,7 @@ static u32 mclk_get_divisor(struct platform_device *pdev,
 			    struct pxa_camera_dev *pcdev)
 {
 	unsigned long mclk = pcdev->mclk;
+	struct device *dev = pcdev->soc_host.v4l2_dev.dev;
 	u32 div;
 	unsigned long lcdclk;
 
@@ -843,7 +846,7 @@ static u32 mclk_get_divisor(struct platform_device *pdev,
 	/* mclk <= ciclk / 4 (27.4.2) */
 	if (mclk > lcdclk / 4) {
 		mclk = lcdclk / 4;
-		dev_warn(&pdev->dev, "Limiting master clock to %lu\n", mclk);
+		dev_warn(dev, "Limiting master clock to %lu\n", mclk);
 	}
 
 	/* We verify mclk != 0, so if anyone breaks it, here comes their Oops */
@@ -853,7 +856,7 @@ static u32 mclk_get_divisor(struct platform_device *pdev,
 	if (pcdev->platform_flags & PXA_CAMERA_MCLK_EN)
 		pcdev->mclk = lcdclk / (2 * (div + 1));
 
-	dev_dbg(&pdev->dev, "LCD clock %luHz, target freq %luHz, divisor %u\n",
+	dev_dbg(dev, "LCD clock %luHz, target freq %luHz, divisor %u\n",
 		lcdclk, mclk, div);
 
 	return div;
@@ -871,14 +874,15 @@ static void recalculate_fifo_timeout(struct pxa_camera_dev *pcdev,
 static void pxa_camera_activate(struct pxa_camera_dev *pcdev)
 {
 	struct pxacamera_platform_data *pdata = pcdev->pdata;
+	struct device *dev = pcdev->soc_host.v4l2_dev.dev;
 	u32 cicr4 = 0;
 
-	dev_dbg(pcdev->soc_host.dev, "Registered platform device at %p data %p\n",
+	dev_dbg(dev, "Registered platform device at %p data %p\n",
 		pcdev, pdata);
 
 	if (pdata && pdata->init) {
-		dev_dbg(pcdev->soc_host.dev, "%s: Init gpios\n", __func__);
-		pdata->init(pcdev->soc_host.dev);
+		dev_dbg(dev, "%s: Init gpios\n", __func__);
+		pdata->init(dev);
 	}
 
 	/* disable all interrupts */
@@ -920,7 +924,7 @@ static irqreturn_t pxa_camera_irq(int irq, void *data)
 	struct videobuf_buffer *vb;
 
 	status = __raw_readl(pcdev->base + CISR);
-	dev_dbg(pcdev->soc_host.dev, "Camera interrupt status 0x%lx\n", status);
+	dev_dbg(pcdev->soc_host.v4l2_dev.dev, "Camera interrupt status 0x%lx\n", status);
 
 	if (!status)
 		return IRQ_NONE;
@@ -952,17 +956,11 @@ static int pxa_camera_add_device(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct pxa_camera_dev *pcdev = ici->priv;
-	int ret;
 
-	if (pcdev->icd) {
-		ret = -EBUSY;
-		goto ebusy;
-	}
+	if (pcdev->icd)
+		return -EBUSY;
 
 	pxa_camera_activate(pcdev);
-	ret = icd->ops->init(icd);
-	if (ret < 0)
-		goto einit;
 
 	pcdev->icd = icd;
 
@@ -970,11 +968,6 @@ static int pxa_camera_add_device(struct soc_camera_device *icd)
 		 icd->devnum);
 
 	return 0;
-
-einit:
-	pxa_camera_deactivate(pcdev);
-ebusy:
-	return ret;
 }
 
 /* Called with .video_lock held */
@@ -996,8 +989,6 @@ static void pxa_camera_remove_device(struct soc_camera_device *icd)
 	DCSR(pcdev->dma_chans[1]) = 0;
 	DCSR(pcdev->dma_chans[2]) = 0;
 
-	icd->ops->release(icd);
-
 	pxa_camera_deactivate(pcdev);
 
 	pcdev->icd = NULL;
@@ -1253,7 +1244,7 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(ici->dev, "Providing format %s using %s\n",
+			dev_dbg(ici->v4l2_dev.dev, "Providing format %s using %s\n",
 				pxa_camera_formats[0].name,
 				icd->formats[idx].name);
 		}
@@ -1268,7 +1259,7 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(ici->dev, "Providing format %s packed\n",
+			dev_dbg(ici->v4l2_dev.dev, "Providing format %s packed\n",
 				icd->formats[idx].name);
 		}
 		break;
@@ -1280,7 +1271,7 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = icd->formats[idx].depth;
 			xlate++;
-			dev_dbg(ici->dev,
+			dev_dbg(ici->v4l2_dev.dev,
 				"Providing format %s in pass-through mode\n",
 				icd->formats[idx].name);
 		}
@@ -1309,11 +1300,11 @@ static int pxa_camera_set_crop(struct soc_camera_device *icd,
 	icd->sense = NULL;
 
 	if (ret < 0) {
-		dev_warn(ici->dev, "Failed to crop to %ux%u@%u:%u\n",
+		dev_warn(ici->v4l2_dev.dev, "Failed to crop to %ux%u@%u:%u\n",
 			 rect->width, rect->height, rect->left, rect->top);
 	} else if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {
 		if (sense.pixel_clock > sense.pixel_clock_max) {
-			dev_err(ici->dev,
+			dev_err(ici->v4l2_dev.dev,
 				"pixel clock %lu set by the camera too high!",
 				sense.pixel_clock);
 			return -EIO;
@@ -1341,7 +1332,7 @@ static int pxa_camera_set_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pix->pixelformat);
+		dev_warn(ici->v4l2_dev.dev, "Format %x not found\n", pix->pixelformat);
 		return -EINVAL;
 	}
 
@@ -1352,16 +1343,16 @@ static int pxa_camera_set_fmt(struct soc_camera_device *icd,
 		icd->sense = &sense;
 
 	cam_f.fmt.pix.pixelformat = cam_fmt->fourcc;
-	ret = icd->ops->set_fmt(icd, &cam_f);
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, s_fmt, f);
 
 	icd->sense = NULL;
 
 	if (ret < 0) {
-		dev_warn(ici->dev, "Failed to configure for format %x\n",
+		dev_warn(ici->v4l2_dev.dev, "Failed to configure for format %x\n",
 			 pix->pixelformat);
 	} else if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {
 		if (sense.pixel_clock > sense.pixel_clock_max) {
-			dev_err(ici->dev,
+			dev_err(ici->v4l2_dev.dev,
 				"pixel clock %lu set by the camera too high!",
 				sense.pixel_clock);
 			return -EIO;
@@ -1389,7 +1380,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(ici->v4l2_dev.dev, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
@@ -1410,7 +1401,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 	/* camera has to see its format, but the user the original one */
 	pix->pixelformat = xlate->cam_fmt->fourcc;
 	/* limit to sensor capabilities */
-	ret = icd->ops->try_fmt(icd, f);
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, try_fmt, f);
 	pix->pixelformat = xlate->host_fmt->fourcc;
 
 	field = pix->field;
@@ -1646,7 +1637,7 @@ static int __devinit pxa_camera_probe(struct platform_device *pdev)
 	pcdev->soc_host.drv_name	= PXA_CAM_DRV_NAME;
 	pcdev->soc_host.ops		= &pxa_soc_camera_host_ops;
 	pcdev->soc_host.priv		= pcdev;
-	pcdev->soc_host.dev		= &pdev->dev;
+	pcdev->soc_host.v4l2_dev.dev	= &pdev->dev;
 	pcdev->soc_host.nr		= pdev->id;
 
 	err = soc_camera_host_register(&pcdev->soc_host);
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index e7ac84daf670..5101fa7cdb2f 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -347,10 +347,9 @@ static int sh_mobile_ceu_add_device(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	int ret = -EBUSY;
 
 	if (pcdev->icd)
-		goto err;
+		return -EBUSY;
 
 	dev_info(&icd->dev,
 		 "SuperH Mobile CEU driver attached to camera %d\n",
@@ -358,19 +357,13 @@ static int sh_mobile_ceu_add_device(struct soc_camera_device *icd)
 
 	clk_enable(pcdev->clk);
 
-	ret = icd->ops->init(icd);
-	if (ret) {
-		clk_disable(pcdev->clk);
-		goto err;
-	}
-
 	ceu_write(pcdev, CAPSR, 1 << 16); /* reset */
 	while (ceu_read(pcdev, CSTSR) & 1)
 		msleep(1);
 
 	pcdev->icd = icd;
-err:
-	return ret;
+
+	return 0;
 }
 
 /* Called with .video_lock held */
@@ -396,8 +389,6 @@ static void sh_mobile_ceu_remove_device(struct soc_camera_device *icd)
 	}
 	spin_unlock_irqrestore(&pcdev->lock, flags);
 
-	icd->ops->release(icd);
-
 	clk_disable(pcdev->clk);
 
 	dev_info(&icd->dev,
@@ -614,7 +605,7 @@ static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = icd->formats[idx].depth;
 			xlate++;
-			dev_dbg(ici->dev, "Providing format %s using %s\n",
+			dev_dbg(ici->v4l2_dev.dev, "Providing format %s using %s\n",
 				sh_mobile_ceu_formats[k].name,
 				icd->formats[idx].name);
 		}
@@ -627,7 +618,7 @@ add_single_format:
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = icd->formats[idx].depth;
 			xlate++;
-			dev_dbg(ici->dev,
+			dev_dbg(ici->v4l2_dev.dev,
 				"Providing format %s in pass-through mode\n",
 				icd->formats[idx].name);
 		}
@@ -649,18 +640,17 @@ static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
 	__u32 pixfmt = f->fmt.pix.pixelformat;
 	const struct soc_camera_format_xlate *xlate;
-	struct v4l2_format cam_f = *f;
 	int ret;
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(ici->v4l2_dev.dev, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
-	cam_f.fmt.pix.pixelformat = xlate->cam_fmt->fourcc;
-	ret = icd->ops->set_fmt(icd, &cam_f);
-
+	f->fmt.pix.pixelformat = xlate->cam_fmt->fourcc;
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, s_fmt, f);
+	f->fmt.pix.pixelformat = pixfmt;
 	if (!ret) {
 		icd->buswidth = xlate->buswidth;
 		icd->current_fmt = xlate->host_fmt;
@@ -681,7 +671,7 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(ici->v4l2_dev.dev, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
@@ -694,8 +684,11 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 		DIV_ROUND_UP(xlate->host_fmt->depth, 8);
 	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
 
+	f->fmt.pix.pixelformat = xlate->cam_fmt->fourcc;
+
 	/* limit to sensor capabilities */
-	ret = icd->ops->try_fmt(icd, f);
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, try_fmt, f);
+	f->fmt.pix.pixelformat = pixfmt;
 	if (ret < 0)
 		return ret;
 
@@ -771,7 +764,7 @@ static void sh_mobile_ceu_init_videobuf(struct videobuf_queue *q,
 
 	videobuf_queue_dma_contig_init(q,
 				       &sh_mobile_ceu_videobuf_ops,
-				       ici->dev, &pcdev->lock,
+				       ici->v4l2_dev.dev, &pcdev->lock,
 				       V4L2_BUF_TYPE_VIDEO_CAPTURE,
 				       pcdev->is_interlaced ?
 				       V4L2_FIELD_INTERLACED : V4L2_FIELD_NONE,
@@ -794,7 +787,7 @@ static struct soc_camera_host_ops sh_mobile_ceu_host_ops = {
 	.init_videobuf	= sh_mobile_ceu_init_videobuf,
 };
 
-static int sh_mobile_ceu_probe(struct platform_device *pdev)
+static int __devinit sh_mobile_ceu_probe(struct platform_device *pdev)
 {
 	struct sh_mobile_ceu_dev *pcdev;
 	struct resource *res;
@@ -867,7 +860,7 @@ static int sh_mobile_ceu_probe(struct platform_device *pdev)
 	pm_runtime_resume(&pdev->dev);
 
 	pcdev->ici.priv = pcdev;
-	pcdev->ici.dev = &pdev->dev;
+	pcdev->ici.v4l2_dev.dev = &pdev->dev;
 	pcdev->ici.nr = pdev->id;
 	pcdev->ici.drv_name = dev_name(&pdev->dev);
 	pcdev->ici.ops = &sh_mobile_ceu_host_ops;
@@ -891,7 +884,7 @@ exit:
 	return err;
 }
 
-static int sh_mobile_ceu_remove(struct platform_device *pdev)
+static int __devexit sh_mobile_ceu_remove(struct platform_device *pdev)
 {
 	struct soc_camera_host *soc_host = to_soc_camera_host(&pdev->dev);
 	struct sh_mobile_ceu_dev *pcdev = container_of(soc_host,
@@ -929,7 +922,7 @@ static struct platform_driver sh_mobile_ceu_driver = {
 		.pm	= &sh_mobile_ceu_dev_pm_ops,
 	},
 	.probe		= sh_mobile_ceu_probe,
-	.remove		= sh_mobile_ceu_remove,
+	.remove		= __exit_p(sh_mobile_ceu_remove),
 };
 
 static int __init sh_mobile_ceu_init(void)
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 20ef5c773fae..0a1cb40bfbf6 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -170,8 +170,6 @@ static int soc_camera_reqbufs(struct file *file, void *priv,
 
 	WARN_ON(priv != file->private_data);
 
-	dev_dbg(&icd->dev, "%s: %d\n", __func__, p->memory);
-
 	ret = videobuf_reqbufs(&icf->vb_vidq, p);
 	if (ret < 0)
 		return ret;
@@ -285,7 +283,7 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 		return ret;
 	} else if (!icd->current_fmt ||
 		   icd->current_fmt->fourcc != pix->pixelformat) {
-		dev_err(ici->dev,
+		dev_err(ici->v4l2_dev.dev,
 			"Host driver hasn't set up current format correctly!\n");
 		return -EINVAL;
 	}
@@ -308,20 +306,13 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 
 static int soc_camera_open(struct file *file)
 {
-	struct video_device *vdev;
-	struct soc_camera_device *icd;
+	struct video_device *vdev = video_devdata(file);
+	struct soc_camera_device *icd = container_of(vdev->parent, struct soc_camera_device, dev);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	struct soc_camera_host *ici;
 	struct soc_camera_file *icf;
 	int ret;
 
-	/*
-	 * It is safe to dereference these pointers now as long as a user has
-	 * the video device open - we are protected by the held cdev reference.
-	 */
-
-	vdev = video_devdata(file);
-	icd = container_of(vdev->parent, struct soc_camera_device, dev);
-
 	if (!icd->ops)
 		/* No device driver attached */
 		return -ENODEV;
@@ -332,12 +323,6 @@ static int soc_camera_open(struct file *file)
 	if (!icf)
 		return -ENOMEM;
 
-	if (!try_module_get(icd->ops->owner)) {
-		dev_err(&icd->dev, "Couldn't lock sensor driver.\n");
-		ret = -EINVAL;
-		goto emgd;
-	}
-
 	if (!try_module_get(ici->ops->owner)) {
 		dev_err(&icd->dev, "Couldn't lock capture bus driver.\n");
 		ret = -EINVAL;
@@ -366,47 +351,65 @@ static int soc_camera_open(struct file *file)
 		if (ret < 0)
 			goto eiufmt;
 
-		dev_dbg(&icd->dev, "Using fmt %x\n", icd->current_fmt->fourcc);
-
 		f.fmt.pix.pixelformat	= icd->current_fmt->fourcc;
 		f.fmt.pix.colorspace	= icd->current_fmt->colorspace;
 
+		if (icl->power) {
+			ret = icl->power(icd->pdev, 1);
+			if (ret < 0)
+				goto epower;
+		}
+
+		/* The camera could have been already on, try to reset */
+		if (icl->reset)
+			icl->reset(icd->pdev);
+
 		ret = ici->ops->add(icd);
 		if (ret < 0) {
 			dev_err(&icd->dev, "Couldn't activate the camera: %d\n", ret);
 			goto eiciadd;
 		}
 
+		if (icd->ops->init) {
+			ret = icd->ops->init(icd);
+			if (ret < 0)
+				goto einit;
+		}
+
 		/* Try to configure with default parameters */
 		ret = soc_camera_set_fmt(icf, &f);
 		if (ret < 0)
 			goto esfmt;
 	}
 
-	mutex_unlock(&icd->video_lock);
-
 	file->private_data = icf;
 	dev_dbg(&icd->dev, "camera device open\n");
 
 	ici->ops->init_videobuf(&icf->vb_vidq, icd);
 
+	mutex_unlock(&icd->video_lock);
+
 	return 0;
 
 	/*
-	 * First three errors are entered with the .video_lock held
+	 * First five errors are entered with the .video_lock held
 	 * and use_count == 1
 	 */
 esfmt:
+	if (icd->ops->release)
+		icd->ops->release(icd);
+einit:
 	ici->ops->remove(icd);
 eiciadd:
+	if (icl->power)
+		icl->power(icd->pdev, 0);
+epower:
 	soc_camera_free_user_formats(icd);
 eiufmt:
 	icd->use_count--;
 	mutex_unlock(&icd->video_lock);
 	module_put(ici->ops->owner);
 emgi:
-	module_put(icd->ops->owner);
-emgd:
 	vfree(icf);
 	return ret;
 }
@@ -421,13 +424,18 @@ static int soc_camera_close(struct file *file)
 	mutex_lock(&icd->video_lock);
 	icd->use_count--;
 	if (!icd->use_count) {
+		struct soc_camera_link *icl = to_soc_camera_link(icd);
+
+		if (icd->ops->release)
+			icd->ops->release(icd);
 		ici->ops->remove(icd);
+		if (icl->power)
+			icl->power(icd->pdev, 0);
 		soc_camera_free_user_formats(icd);
 	}
 
 	mutex_unlock(&icd->video_lock);
 
-	module_put(icd->ops->owner);
 	module_put(ici->ops->owner);
 
 	vfree(icf);
@@ -575,18 +583,17 @@ static int soc_camera_streamon(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	int ret;
 
 	WARN_ON(priv != file->private_data);
 
-	dev_dbg(&icd->dev, "%s\n", __func__);
-
 	if (i != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
 	mutex_lock(&icd->video_lock);
 
-	icd->ops->start_capture(icd);
+	v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, s_stream, 1);
 
 	/* This calls buf_queue from host driver's videobuf_queue_ops */
 	ret = videobuf_streamon(&icf->vb_vidq);
@@ -601,11 +608,10 @@ static int soc_camera_streamoff(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	WARN_ON(priv != file->private_data);
 
-	dev_dbg(&icd->dev, "%s\n", __func__);
-
 	if (i != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
@@ -615,7 +621,7 @@ static int soc_camera_streamoff(struct file *file, void *priv,
 	 * remaining buffers. When the last buffer is freed, stop capture */
 	videobuf_streamoff(&icf->vb_vidq);
 
-	icd->ops->stop_capture(icd);
+	v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, s_stream, 0);
 
 	mutex_unlock(&icd->video_lock);
 
@@ -649,6 +655,7 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	WARN_ON(priv != file->private_data);
 
@@ -665,9 +672,7 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 		return 0;
 	}
 
-	if (icd->ops->get_control)
-		return icd->ops->get_control(icd, ctrl);
-	return -EINVAL;
+	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_ctrl, ctrl);
 }
 
 static int soc_camera_s_ctrl(struct file *file, void *priv,
@@ -675,12 +680,11 @@ static int soc_camera_s_ctrl(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	WARN_ON(priv != file->private_data);
 
-	if (icd->ops->set_control)
-		return icd->ops->set_control(icd, ctrl);
-	return -EINVAL;
+	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_ctrl, ctrl);
 }
 
 static int soc_camera_cropcap(struct file *file, void *fh,
@@ -751,11 +755,9 @@ static int soc_camera_g_chip_ident(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
-	if (!icd->ops->get_chip_id)
-		return -EINVAL;
-
-	return icd->ops->get_chip_id(icd, id);
+	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_chip_ident, id);
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
@@ -764,11 +766,9 @@ static int soc_camera_g_register(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
-	if (!icd->ops->get_register)
-		return -EINVAL;
-
-	return icd->ops->get_register(icd, reg);
+	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_register, reg);
 }
 
 static int soc_camera_s_register(struct file *file, void *fh,
@@ -776,11 +776,9 @@ static int soc_camera_s_register(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
-	if (!icd->ops->set_register)
-		return -EINVAL;
-
-	return icd->ops->set_register(icd, reg);
+	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_register, reg);
 }
 #endif
 
@@ -794,7 +792,7 @@ static void scan_add_host(struct soc_camera_host *ici)
 	list_for_each_entry(icd, &devices, list) {
 		if (icd->iface == ici->nr) {
 			int ret;
-			icd->dev.parent = ici->dev;
+			icd->dev.parent = ici->v4l2_dev.dev;
 			dev_set_name(&icd->dev, "%u-%u", icd->iface,
 				     icd->devnum);
 			ret = device_register(&icd->dev);
@@ -814,7 +812,9 @@ static int soc_camera_init_i2c(struct soc_camera_device *icd,
 			       struct soc_camera_link *icl)
 {
 	struct i2c_client *client;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct i2c_adapter *adap = i2c_get_adapter(icl->i2c_adapter_id);
+	struct v4l2_subdev *subdev;
 	int ret;
 
 	if (!adap) {
@@ -826,17 +826,16 @@ static int soc_camera_init_i2c(struct soc_camera_device *icd,
 
 	icl->board_info->platform_data = icd;
 
-	client = i2c_new_device(adap, icl->board_info);
-	if (!client) {
+	subdev = v4l2_i2c_new_subdev_board(&ici->v4l2_dev, adap,
+				icl->module_name, icl->board_info, NULL);
+	if (!subdev) {
 		ret = -ENOMEM;
 		goto ei2cnd;
 	}
 
-	/*
-	 * We set icd drvdata at two locations - here and in
-	 * soc_camera_video_start(). Depending on the module loading /
-	 * initialisation order one of these locations will be entered first
-	 */
+	subdev->grp_id = (__u32)icd;
+	client = subdev->priv;
+
 	/* Use to_i2c_client(dev) to recover the i2c client */
 	dev_set_drvdata(&icd->dev, &client->dev);
 
@@ -852,6 +851,7 @@ static void soc_camera_free_i2c(struct soc_camera_device *icd)
 	struct i2c_client *client =
 		to_i2c_client(to_soc_camera_control(icd));
 	dev_set_drvdata(&icd->dev, NULL);
+	v4l2_device_unregister_subdev(i2c_get_clientdata(client));
 	i2c_unregister_device(client);
 	i2c_put_adapter(client->adapter);
 }
@@ -860,16 +860,37 @@ static void soc_camera_free_i2c(struct soc_camera_device *icd)
 #define soc_camera_free_i2c(icd)	do {} while (0)
 #endif
 
+static int soc_camera_video_start(struct soc_camera_device *icd);
 static int video_dev_create(struct soc_camera_device *icd);
 /* Called during host-driver probe */
 static int soc_camera_probe(struct device *dev)
 {
 	struct soc_camera_device *icd = to_soc_camera_dev(dev);
+	struct soc_camera_host *ici = to_soc_camera_host(dev->parent);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	struct device *control = NULL;
 	int ret;
 
 	dev_info(dev, "Probing %s\n", dev_name(dev));
 
+	if (icl->power) {
+		ret = icl->power(icd->pdev, 1);
+		if (ret < 0) {
+			dev_err(dev,
+				"Platform failed to power-on the camera.\n");
+			goto epower;
+		}
+	}
+
+	/* The camera could have been already on, try to reset */
+	if (icl->reset)
+		icl->reset(icd->pdev);
+
+	ret = ici->ops->add(icd);
+	if (ret < 0)
+		goto eadd;
+
+	/* Must have icd->vdev before registering the device */
 	ret = video_dev_create(icd);
 	if (ret < 0)
 		goto evdc;
@@ -883,34 +904,61 @@ static int soc_camera_probe(struct device *dev)
 		ret = -EINVAL;
 		goto eadddev;
 	} else {
+		if (icl->module_name)
+			ret = request_module(icl->module_name);
+
 		ret = icl->add_device(icl, &icd->dev);
 		if (ret < 0)
 			goto eadddev;
-	}
 
-	ret = video_register_device(icd->vdev, VFL_TYPE_GRABBER, icd->vdev->minor);
-	if (ret < 0) {
-		dev_err(&icd->dev, "video_register_device failed: %d\n", ret);
-		goto evidregd;
+		/* FIXME: this is racy, have to use driver-binding notification */
+		control = to_soc_camera_control(icd);
+		if (!control || !control->driver ||
+		    !try_module_get(control->driver->owner)) {
+			icl->del_device(icl);
+			goto enodrv;
+		}
 	}
 
+	/* ..._video_start() will create a device node, so we have to protect */
+	mutex_lock(&icd->video_lock);
+
+	ret = soc_camera_video_start(icd);
+	if (ret < 0)
+		goto evidstart;
+
 	/* Do we have to sysfs_remove_link() before device_unregister()? */
 	if (to_soc_camera_control(icd) &&
 	    sysfs_create_link(&icd->dev.kobj, &to_soc_camera_control(icd)->kobj,
 			      "control"))
 		dev_warn(&icd->dev, "Failed creating the control symlink\n");
 
+	ici->ops->remove(icd);
+
+	if (icl->power)
+		icl->power(icd->pdev, 0);
+
+	mutex_unlock(&icd->video_lock);
 
 	return 0;
 
-evidregd:
-	if (icl->board_info)
+evidstart:
+	mutex_unlock(&icd->video_lock);
+	if (icl->board_info) {
 		soc_camera_free_i2c(icd);
-	else
+	} else {
 		icl->del_device(icl);
+		module_put(control->driver->owner);
+	}
+enodrv:
 eadddev:
 	video_device_release(icd->vdev);
 evdc:
+	ici->ops->remove(icd);
+eadd:
+	if (icl->power)
+		icl->power(icd->pdev, 0);
+epower:
 	return ret;
 }
 
@@ -931,10 +979,16 @@ static int soc_camera_remove(struct device *dev)
 		mutex_unlock(&icd->video_lock);
 	}
 
-	if (icl->board_info)
+	if (icl->board_info) {
 		soc_camera_free_i2c(icd);
-	else
-		icl->del_device(icl);
+	} else {
+		struct device_driver *drv = to_soc_camera_control(icd) ?
+			to_soc_camera_control(icd)->driver : NULL;
+		if (drv) {
+			icl->del_device(icl);
+			module_put(drv->owner);
+		}
+	}
 
 	return 0;
 }
@@ -984,6 +1038,7 @@ static void dummy_release(struct device *dev)
 int soc_camera_host_register(struct soc_camera_host *ici)
 {
 	struct soc_camera_host *ix;
+	int ret;
 
 	if (!ici || !ici->ops ||
 	    !ici->ops->try_fmt ||
@@ -996,18 +1051,20 @@ int soc_camera_host_register(struct soc_camera_host *ici)
 	    !ici->ops->add ||
 	    !ici->ops->remove ||
 	    !ici->ops->poll ||
-	    !ici->dev)
+	    !ici->v4l2_dev.dev)
 		return -EINVAL;
 
 	mutex_lock(&list_lock);
 	list_for_each_entry(ix, &hosts, list) {
 		if (ix->nr == ici->nr) {
-			mutex_unlock(&list_lock);
-			return -EBUSY;
+			ret = -EBUSY;
+			goto edevreg;
 		}
 	}
 
-	dev_set_drvdata(ici->dev, ici);
+	ret = v4l2_device_register(ici->v4l2_dev.dev, &ici->v4l2_dev);
+	if (ret < 0)
+		goto edevreg;
 
 	list_add_tail(&ici->list, &hosts);
 	mutex_unlock(&list_lock);
@@ -1015,6 +1072,10 @@ int soc_camera_host_register(struct soc_camera_host *ici)
 	scan_add_host(ici);
 
 	return 0;
+
+edevreg:
+	mutex_unlock(&list_lock);
+	return ret;
 }
 EXPORT_SYMBOL(soc_camera_host_register);
 
@@ -1028,7 +1089,7 @@ void soc_camera_host_unregister(struct soc_camera_host *ici)
 	list_del(&ici->list);
 
 	list_for_each_entry(icd, &devices, list) {
-		if (icd->dev.parent == ici->dev) {
+		if (icd->iface == ici->nr) {
 			/* The bus->remove will be called */
 			device_unregister(&icd->dev);
 			/* Not before device_unregister(), .remove
@@ -1043,7 +1104,7 @@ void soc_camera_host_unregister(struct soc_camera_host *ici)
 
 	mutex_unlock(&list_lock);
 
-	dev_set_drvdata(ici->dev, NULL);
+	v4l2_device_unregister(&ici->v4l2_dev);
 }
 EXPORT_SYMBOL(soc_camera_host_unregister);
 
@@ -1123,7 +1184,6 @@ static int video_dev_create(struct soc_camera_device *icd)
 
 	if (!vdev)
 		return -ENOMEM;
-	dev_dbg(ici->dev, "Allocated video_device %p\n", vdev);
 
 	strlcpy(vdev->name, ici->drv_name, sizeof(vdev->name));
 
@@ -1141,50 +1201,35 @@ static int video_dev_create(struct soc_camera_device *icd)
 }
 
 /*
- * Usually called from the struct soc_camera_ops .probe() method, i.e., from
- * soc_camera_probe() above with .video_lock held
+ * Called from soc_camera_probe() above (with .video_lock held???)
  */
-int soc_camera_video_start(struct soc_camera_device *icd, struct device *dev)
+static int soc_camera_video_start(struct soc_camera_device *icd)
 {
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	const struct v4l2_queryctrl *qctrl;
+	int ret;
 
 	if (!icd->dev.parent)
 		return -ENODEV;
 
 	if (!icd->ops ||
-	    !icd->ops->init ||
-	    !icd->ops->release ||
-	    !icd->ops->start_capture ||
-	    !icd->ops->stop_capture ||
-	    !icd->ops->set_fmt ||
-	    !icd->ops->try_fmt ||
 	    !icd->ops->query_bus_param ||
 	    !icd->ops->set_bus_param)
 		return -EINVAL;
 
-	/* See comment in soc_camera_probe() */
-	dev_set_drvdata(&icd->dev, dev);
+	ret = video_register_device(icd->vdev, VFL_TYPE_GRABBER,
+				    icd->vdev->minor);
+	if (ret < 0) {
+		dev_err(&icd->dev, "video_register_device failed: %d\n", ret);
+		return ret;
+	}
 
 	qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_GAIN);
 	icd->gain = qctrl ? qctrl->default_value : (unsigned short)~0;
 	qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
 	icd->exposure = qctrl ? qctrl->default_value : (unsigned short)~0;
 
-	return ici->ops->add(icd);
-}
-EXPORT_SYMBOL(soc_camera_video_start);
-
-/* Called from client .remove() methods with .video_lock held */
-void soc_camera_video_stop(struct soc_camera_device *icd)
-{
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
-
-	dev_dbg(&icd->dev, "%s\n", __func__);
-
-	ici->ops->remove(icd);
+	return 0;
 }
-EXPORT_SYMBOL(soc_camera_video_stop);
 
 static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
 {
@@ -1200,6 +1245,7 @@ static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	icd->iface = icl->bus_id;
+	icd->pdev = &pdev->dev;
 	platform_set_drvdata(pdev, icd);
 	icd->dev.platform_data = icl;
 
diff --git a/drivers/media/video/soc_camera_platform.c b/drivers/media/video/soc_camera_platform.c
index d84c134f8d59..8168cf470eb3 100644
--- a/drivers/media/video/soc_camera_platform.c
+++ b/drivers/media/video/soc_camera_platform.c
@@ -16,11 +16,12 @@
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/videodev2.h>
-#include <media/v4l2-common.h>
+#include <media/v4l2-subdev.h>
 #include <media/soc_camera.h>
 #include <media/soc_camera_platform.h>
 
 struct soc_camera_platform_priv {
+	struct v4l2_subdev subdev;
 	struct soc_camera_data_format format;
 };
 
@@ -31,36 +32,10 @@ soc_camera_platform_get_info(struct soc_camera_device *icd)
 	return pdev->dev.platform_data;
 }
 
-static int soc_camera_platform_init(struct soc_camera_device *icd)
+static int soc_camera_platform_s_stream(struct v4l2_subdev *sd, int enable)
 {
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-
-	if (icl->power)
-		icl->power(dev_get_drvdata(&icd->dev), 1);
-
-	return 0;
-}
-
-static int soc_camera_platform_release(struct soc_camera_device *icd)
-{
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-
-	if (icl->power)
-		icl->power(dev_get_drvdata(&icd->dev), 0);
-
-	return 0;
-}
-
-static int soc_camera_platform_start_capture(struct soc_camera_device *icd)
-{
-	struct soc_camera_platform_info *p = soc_camera_platform_get_info(icd);
-	return p->set_capture(p, 1);
-}
-
-static int soc_camera_platform_stop_capture(struct soc_camera_device *icd)
-{
-	struct soc_camera_platform_info *p = soc_camera_platform_get_info(icd);
-	return p->set_capture(p, 0);
+	struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd);
+	return p->set_capture(p, enable);
 }
 
 static int soc_camera_platform_set_bus_param(struct soc_camera_device *icd,
@@ -82,16 +57,10 @@ static int soc_camera_platform_set_crop(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int soc_camera_platform_set_fmt(struct soc_camera_device *icd,
+static int soc_camera_platform_try_fmt(struct v4l2_subdev *sd,
 				       struct v4l2_format *f)
 {
-	return 0;
-}
-
-static int soc_camera_platform_try_fmt(struct soc_camera_device *icd,
-				       struct v4l2_format *f)
-{
-	struct soc_camera_platform_info *p = soc_camera_platform_get_info(icd);
+	struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd);
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
 	pix->width = p->format.width;
@@ -99,12 +68,11 @@ static int soc_camera_platform_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int soc_camera_platform_video_probe(struct soc_camera_device *icd,
-					   struct platform_device *pdev)
+static void soc_camera_platform_video_probe(struct soc_camera_device *icd,
+					    struct platform_device *pdev)
 {
 	struct soc_camera_platform_priv *priv = platform_get_drvdata(pdev);
 	struct soc_camera_platform_info *p = pdev->dev.platform_data;
-	int ret;
 
 	priv->format.name = p->format_name;
 	priv->format.depth = p->format_depth;
@@ -113,28 +81,29 @@ static int soc_camera_platform_video_probe(struct soc_camera_device *icd,
 
 	icd->formats = &priv->format;
 	icd->num_formats = 1;
-
-	/* ..._video_start() does dev_set_drvdata(&icd->dev, &pdev->dev) */
-	ret = soc_camera_video_start(icd, &pdev->dev);
-	soc_camera_video_stop(icd);
-	return ret;
 }
 
+static struct v4l2_subdev_core_ops platform_subdev_core_ops;
+
+static struct v4l2_subdev_video_ops platform_subdev_video_ops = {
+	.s_stream	= soc_camera_platform_s_stream,
+	.try_fmt	= soc_camera_platform_try_fmt,
+};
+
+static struct v4l2_subdev_ops platform_subdev_ops = {
+	.core	= &platform_subdev_core_ops,
+	.video	= &platform_subdev_video_ops,
+};
+
 static struct soc_camera_ops soc_camera_platform_ops = {
-	.owner			= THIS_MODULE,
-	.init			= soc_camera_platform_init,
-	.release		= soc_camera_platform_release,
-	.start_capture		= soc_camera_platform_start_capture,
-	.stop_capture		= soc_camera_platform_stop_capture,
 	.set_crop		= soc_camera_platform_set_crop,
-	.set_fmt		= soc_camera_platform_set_fmt,
-	.try_fmt		= soc_camera_platform_try_fmt,
 	.set_bus_param		= soc_camera_platform_set_bus_param,
 	.query_bus_param	= soc_camera_platform_query_bus_param,
 };
 
 static int soc_camera_platform_probe(struct platform_device *pdev)
 {
+	struct soc_camera_host *ici;
 	struct soc_camera_platform_priv *priv;
 	struct soc_camera_platform_info *p = pdev->dev.platform_data;
 	struct soc_camera_device *icd;
@@ -143,35 +112,48 @@ static int soc_camera_platform_probe(struct platform_device *pdev)
 	if (!p)
 		return -EINVAL;
 
+	if (!p->dev) {
+		dev_err(&pdev->dev,
+			"Platform has not set soc_camera_device pointer!\n");
+		return -EINVAL;
+	}
+
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, priv);
-
 	icd = to_soc_camera_dev(p->dev);
-	if (!icd)
-		goto enoicd;
 
-	icd->ops	= &soc_camera_platform_ops;
+	platform_set_drvdata(pdev, priv);
 	dev_set_drvdata(&icd->dev, &pdev->dev);
+
 	icd->width_min	= 0;
 	icd->width_max	= p->format.width;
 	icd->height_min	= 0;
 	icd->height_max	= p->format.height;
 	icd->y_skip_top	= 0;
+	icd->ops	= &soc_camera_platform_ops;
 
-	ret = soc_camera_platform_video_probe(icd, pdev);
-	if (ret) {
-		icd->ops = NULL;
-		kfree(priv);
-	}
+	ici = to_soc_camera_host(icd->dev.parent);
+
+	soc_camera_platform_video_probe(icd, pdev);
+
+	v4l2_subdev_init(&priv->subdev, &platform_subdev_ops);
+	v4l2_set_subdevdata(&priv->subdev, p);
+	priv->subdev.grp_id = (__u32)icd;
+	strncpy(priv->subdev.name, dev_name(&pdev->dev), V4L2_SUBDEV_NAME_SIZE);
+
+	ret = v4l2_device_register_subdev(&ici->v4l2_dev, &priv->subdev);
+	if (ret)
+		goto evdrs;
 
 	return ret;
 
-enoicd:
+evdrs:
+	icd->ops = NULL;
+	platform_set_drvdata(pdev, NULL);
 	kfree(priv);
-	return -EINVAL;
+	return ret;
 }
 
 static int soc_camera_platform_remove(struct platform_device *pdev)
@@ -180,7 +162,9 @@ static int soc_camera_platform_remove(struct platform_device *pdev)
 	struct soc_camera_platform_info *p = pdev->dev.platform_data;
 	struct soc_camera_device *icd = to_soc_camera_dev(p->dev);
 
+	v4l2_device_unregister_subdev(&priv->subdev);
 	icd->ops = NULL;
+	platform_set_drvdata(pdev, NULL);
 	kfree(priv);
 	return 0;
 }
@@ -188,6 +172,7 @@ static int soc_camera_platform_remove(struct platform_device *pdev)
 static struct platform_driver soc_camera_platform_driver = {
 	.driver 	= {
 		.name	= "soc_camera_platform",
+		.owner	= THIS_MODULE,
 	},
 	.probe		= soc_camera_platform_probe,
 	.remove		= soc_camera_platform_remove,
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index d780a509faa9..a006df1d28ec 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -24,7 +24,7 @@
 #include <linux/delay.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-chip-ident.h>
-#include <media/v4l2-common.h>
+#include <media/v4l2-subdev.h>
 #include <media/soc_camera.h>
 #include <media/tw9910.h>
 
@@ -223,6 +223,7 @@ struct tw9910_hsync_ctrl {
 };
 
 struct tw9910_priv {
+	struct v4l2_subdev                subdev;
 	struct tw9910_video_info       *info;
 	const struct tw9910_scale_ctrl *scale;
 };
@@ -354,6 +355,11 @@ static const struct tw9910_hsync_ctrl tw9910_hsync_ctrl = {
 /*
  * general function
  */
+static struct tw9910_priv *to_tw9910(const struct i2c_client *client)
+{
+	return container_of(i2c_get_clientdata(client), struct tw9910_priv, subdev);
+}
+
 static int tw9910_set_scale(struct i2c_client *client,
 			    const struct tw9910_scale_ctrl *scale)
 {
@@ -507,47 +513,20 @@ tw9910_select_norm(struct soc_camera_device *icd, u32 width, u32 height)
 /*
  * soc_camera_ops function
  */
-static int tw9910_init(struct soc_camera_device *icd)
+static int tw9910_s_stream(struct v4l2_subdev *sd, int enable)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	int ret = 0;
+	struct i2c_client *client = sd->priv;
+	struct tw9910_priv *priv = to_tw9910(client);
 
-	if (icl->power) {
-		ret = icl->power(&client->dev, 1);
-		if (ret < 0)
-			return ret;
-	}
-
-	if (icl->reset)
-		ret = icl->reset(&client->dev);
-
-	return ret;
-}
-
-static int tw9910_release(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	int ret = 0;
-
-	if (icl->power)
-		ret = icl->power(&client->dev, 0);
-
-	return ret;
-}
-
-static int tw9910_start_capture(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	if (!enable)
+		return 0;
 
 	if (!priv->scale) {
-		dev_err(&icd->dev, "norm select error\n");
+		dev_err(&client->dev, "norm select error\n");
 		return -EPERM;
 	}
 
-	dev_dbg(&icd->dev, "%s %dx%d\n",
+	dev_dbg(&client->dev, "%s %dx%d\n",
 		 priv->scale->name,
 		 priv->scale->width,
 		 priv->scale->height);
@@ -555,11 +534,6 @@ static int tw9910_start_capture(struct soc_camera_device *icd)
 	return 0;
 }
 
-static int tw9910_stop_capture(struct soc_camera_device *icd)
-{
-	return 0;
-}
-
 static int tw9910_set_bus_param(struct soc_camera_device *icd,
 				unsigned long flags)
 {
@@ -569,7 +543,7 @@ static int tw9910_set_bus_param(struct soc_camera_device *icd,
 static unsigned long tw9910_query_bus_param(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	struct tw9910_priv *priv = to_tw9910(client);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned long flags = SOCAM_PCLK_SAMPLE_RISING | SOCAM_MASTER |
 		SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_HSYNC_ACTIVE_HIGH |
@@ -578,21 +552,11 @@ static unsigned long tw9910_query_bus_param(struct soc_camera_device *icd)
 	return soc_camera_apply_sensor_flags(icl, flags);
 }
 
-static int tw9910_get_chip_id(struct soc_camera_device *icd,
-			      struct v4l2_dbg_chip_ident *id)
-{
-	id->ident = V4L2_IDENT_TW9910;
-	id->revision = 0;
-
-	return 0;
-}
-
-static int tw9910_set_std(struct soc_camera_device *icd,
-			  v4l2_std_id *a)
+static int tw9910_s_std(struct v4l2_subdev *sd, v4l2_std_id norm)
 {
 	int ret = -EINVAL;
 
-	if (*a & (V4L2_STD_NTSC | V4L2_STD_PAL))
+	if (norm & (V4L2_STD_NTSC | V4L2_STD_PAL))
 		ret = 0;
 
 	return ret;
@@ -608,11 +572,20 @@ static int tw9910_enum_input(struct soc_camera_device *icd,
 	return 0;
 }
 
+static int tw9910_g_chip_ident(struct v4l2_subdev *sd,
+			       struct v4l2_dbg_chip_ident *id)
+{
+	id->ident = V4L2_IDENT_TW9910;
+	id->revision = 0;
+
+	return 0;
+}
+
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int tw9910_get_register(struct soc_camera_device *icd,
-			       struct v4l2_dbg_register *reg)
+static int tw9910_g_register(struct v4l2_subdev *sd,
+			     struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 	int ret;
 
 	if (reg->reg > 0xff)
@@ -630,10 +603,10 @@ static int tw9910_get_register(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int tw9910_set_register(struct soc_camera_device *icd,
-			       struct v4l2_dbg_register *reg)
+static int tw9910_s_register(struct v4l2_subdev *sd,
+			     struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->reg > 0xff ||
 	    reg->val > 0xff)
@@ -647,7 +620,7 @@ static int tw9910_set_crop(struct soc_camera_device *icd,
 			   struct v4l2_rect *rect)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	struct tw9910_priv *priv = to_tw9910(client);
 	int                 ret  = -EINVAL;
 	u8                  val;
 
@@ -736,9 +709,10 @@ tw9910_set_fmt_error:
 	return ret;
 }
 
-static int tw9910_set_fmt(struct soc_camera_device *icd,
-			  struct v4l2_format *f)
+static int tw9910_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
 		.left	= icd->x_current,
@@ -761,16 +735,17 @@ static int tw9910_set_fmt(struct soc_camera_device *icd,
 	return tw9910_set_crop(icd, &rect);
 }
 
-static int tw9910_try_fmt(struct soc_camera_device *icd,
-			  struct v4l2_format *f)
+static int tw9910_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	const struct tw9910_scale_ctrl *scale;
 
 	if (V4L2_FIELD_ANY == pix->field) {
 		pix->field = V4L2_FIELD_INTERLACED;
 	} else if (V4L2_FIELD_INTERLACED != pix->field) {
-		dev_err(&icd->dev, "Field type invalid.\n");
+		dev_err(&client->dev, "Field type invalid.\n");
 		return -EINVAL;
 	}
 
@@ -790,9 +765,8 @@ static int tw9910_try_fmt(struct soc_camera_device *icd,
 static int tw9910_video_probe(struct soc_camera_device *icd,
 			      struct i2c_client *client)
 {
-	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	struct tw9910_priv *priv = to_tw9910(client);
 	s32 val;
-	int ret;
 
 	/*
 	 * We must have a parent by now. And it cannot be a wrong one.
@@ -814,18 +788,11 @@ static int tw9910_video_probe(struct soc_camera_device *icd,
 	icd->formats     = tw9910_color_fmt;
 	icd->num_formats = ARRAY_SIZE(tw9910_color_fmt);
 
-	/* Switch master clock on */
-	ret = soc_camera_video_start(icd, &client->dev);
-	if (ret)
-		return ret;
-
 	/*
 	 * check and show Product ID
 	 */
 	val = i2c_smbus_read_byte_data(client, ID);
 
-	soc_camera_video_stop(icd);
-
 	if (0x0B != GET_ID(val) ||
 	    0x00 != GET_ReV(val)) {
 		dev_err(&icd->dev,
@@ -839,29 +806,36 @@ static int tw9910_video_probe(struct soc_camera_device *icd,
 	icd->vdev->tvnorms      = V4L2_STD_NTSC | V4L2_STD_PAL;
 	icd->vdev->current_norm = V4L2_STD_NTSC;
 
-	return ret;
+	return 0;
 }
 
 static struct soc_camera_ops tw9910_ops = {
-	.owner			= THIS_MODULE,
-	.init			= tw9910_init,
-	.release		= tw9910_release,
-	.start_capture		= tw9910_start_capture,
-	.stop_capture		= tw9910_stop_capture,
 	.set_crop		= tw9910_set_crop,
-	.set_fmt		= tw9910_set_fmt,
-	.try_fmt		= tw9910_try_fmt,
 	.set_bus_param		= tw9910_set_bus_param,
 	.query_bus_param	= tw9910_query_bus_param,
-	.get_chip_id		= tw9910_get_chip_id,
-	.set_std		= tw9910_set_std,
 	.enum_input		= tw9910_enum_input,
+};
+
+static struct v4l2_subdev_core_ops tw9910_subdev_core_ops = {
+	.g_chip_ident	= tw9910_g_chip_ident,
+	.s_std		= tw9910_s_std,
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-	.get_register		= tw9910_get_register,
-	.set_register		= tw9910_set_register,
+	.g_register	= tw9910_g_register,
+	.s_register	= tw9910_s_register,
 #endif
 };
 
+static struct v4l2_subdev_video_ops tw9910_subdev_video_ops = {
+	.s_stream	= tw9910_s_stream,
+	.s_fmt		= tw9910_s_fmt,
+	.try_fmt	= tw9910_try_fmt,
+};
+
+static struct v4l2_subdev_ops tw9910_subdev_ops = {
+	.core	= &tw9910_subdev_core_ops,
+	.video	= &tw9910_subdev_video_ops,
+};
+
 /*
  * i2c_driver function
  */
@@ -902,7 +876,8 @@ static int tw9910_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	priv->info   = info;
-	i2c_set_clientdata(client, priv);
+
+	v4l2_i2c_subdev_init(&priv->subdev, client, &tw9910_subdev_ops);
 
 	icd->ops     = &tw9910_ops;
 	icd->iface   = info->link.bus_id;
@@ -942,7 +917,7 @@ static int tw9910_probe(struct i2c_client *client,
 
 static int tw9910_remove(struct i2c_client *client)
 {
-	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	struct tw9910_priv *priv = to_tw9910(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
 	icd->ops = NULL;
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index d8b4256126a4..3bc5b6b20f64 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -16,10 +16,12 @@
 #include <linux/pm.h>
 #include <linux/videodev2.h>
 #include <media/videobuf-core.h>
+#include <media/v4l2-device.h>
 
 struct soc_camera_device {
 	struct list_head list;
 	struct device dev;
+	struct device *pdev;		/* Platform device */
 	unsigned short width;		/* Current window */
 	unsigned short height;		/* sizes */
 	unsigned short x_min;		/* Camera capabilities */
@@ -45,7 +47,6 @@ struct soc_camera_device {
 	struct soc_camera_format_xlate *user_formats;
 	int num_user_formats;
 	enum v4l2_field field;		/* Preserve field over close() */
-	struct module *owner;
 	void *host_priv;		/* Per-device host private data */
 	/* soc_camera.c private count. Only accessed with .video_lock held */
 	int use_count;
@@ -58,8 +59,8 @@ struct soc_camera_file {
 };
 
 struct soc_camera_host {
+	struct v4l2_device v4l2_dev;
 	struct list_head list;
-	struct device *dev;
 	unsigned char nr;				/* Host number */
 	void *priv;
 	const char *drv_name;
@@ -127,7 +128,9 @@ static inline struct soc_camera_device *to_soc_camera_dev(struct device *dev)
 
 static inline struct soc_camera_host *to_soc_camera_host(struct device *dev)
 {
-	return dev_get_drvdata(dev);
+	struct v4l2_device *v4l2_dev = dev_get_drvdata(dev);
+
+	return container_of(v4l2_dev, struct soc_camera_host, v4l2_dev);
 }
 
 static inline struct soc_camera_link *to_soc_camera_link(struct soc_camera_device *icd)
@@ -143,9 +146,6 @@ static inline struct device *to_soc_camera_control(struct soc_camera_device *icd
 int soc_camera_host_register(struct soc_camera_host *ici);
 void soc_camera_host_unregister(struct soc_camera_host *ici);
 
-int soc_camera_video_start(struct soc_camera_device *icd, struct device *dev);
-void soc_camera_video_stop(struct soc_camera_device *icd);
-
 const struct soc_camera_data_format *soc_camera_format_by_fourcc(
 	struct soc_camera_device *icd, unsigned int fourcc);
 const struct soc_camera_format_xlate *soc_camera_xlate_by_fourcc(
@@ -176,28 +176,17 @@ struct soc_camera_format_xlate {
 };
 
 struct soc_camera_ops {
-	struct module *owner;
 	int (*suspend)(struct soc_camera_device *, pm_message_t state);
 	int (*resume)(struct soc_camera_device *);
 	int (*init)(struct soc_camera_device *);
 	int (*release)(struct soc_camera_device *);
-	int (*start_capture)(struct soc_camera_device *);
-	int (*stop_capture)(struct soc_camera_device *);
 	int (*set_crop)(struct soc_camera_device *, struct v4l2_rect *);
-	int (*set_fmt)(struct soc_camera_device *, struct v4l2_format *);
-	int (*try_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	unsigned long (*query_bus_param)(struct soc_camera_device *);
 	int (*set_bus_param)(struct soc_camera_device *, unsigned long);
 	int (*get_chip_id)(struct soc_camera_device *,
 			   struct v4l2_dbg_chip_ident *);
 	int (*set_std)(struct soc_camera_device *, v4l2_std_id *);
 	int (*enum_input)(struct soc_camera_device *, struct v4l2_input *);
-#ifdef CONFIG_VIDEO_ADV_DEBUG
-	int (*get_register)(struct soc_camera_device *, struct v4l2_dbg_register *);
-	int (*set_register)(struct soc_camera_device *, struct v4l2_dbg_register *);
-#endif
-	int (*get_control)(struct soc_camera_device *, struct v4l2_control *);
-	int (*set_control)(struct soc_camera_device *, struct v4l2_control *);
 	const struct v4l2_queryctrl *controls;
 	int num_controls;
 };
-- 
cgit v1.2.3


From 3418f165cc1cbcb30a8c017d7ebbc774710066bb Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:44:14 -0300
Subject: V4L/DVB (12511): V4L2: add a new V4L2_CID_BAND_STOP_FILTER integer
 control

Add a new V4L2_CID_BAND_STOP_FILTER integer control, which either switches the
band-stop filter off, or sets it to a certain strength.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 3689d7d81fe9..b59e78c57161 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -910,9 +910,10 @@ enum v4l2_colorfx {
 	V4L2_COLORFX_SEPIA	= 2,
 };
 #define V4L2_CID_AUTOBRIGHTNESS			(V4L2_CID_BASE+32)
+#define V4L2_CID_BAND_STOP_FILTER		(V4L2_CID_BASE+33)
 
 /* last CID + 1 */
-#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+33)
+#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+34)
 
 /*  MPEG-class control IDs defined by V4L2 */
 #define V4L2_CID_MPEG_BASE 			(V4L2_CTRL_CLASS_MPEG | 0x900)
-- 
cgit v1.2.3


From 2840d2497b912f25d2957477faa1c922ddd733e0 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:44:15 -0300
Subject: V4L/DVB (12513): soc-camera: add support for camera-host controls

Until now soc-camera only supported client (sensor) controls. This patch
enables camera-host drivers to implement their own controls too.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/soc_camera.c | 24 ++++++++++++++++++++++++
 include/media/soc_camera.h       |  4 ++++
 2 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 0a1cb40bfbf6..b3fb8f290ad5 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -633,6 +633,7 @@ static int soc_camera_queryctrl(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	int i;
 
 	WARN_ON(priv != file->private_data);
@@ -640,6 +641,15 @@ static int soc_camera_queryctrl(struct file *file, void *priv,
 	if (!qc->id)
 		return -EINVAL;
 
+	/* First check host controls */
+	for (i = 0; i < ici->ops->num_controls; i++)
+		if (qc->id == ici->ops->controls[i].id) {
+			memcpy(qc, &(ici->ops->controls[i]),
+				sizeof(*qc));
+			return 0;
+		}
+
+	/* Then device controls */
 	for (i = 0; i < icd->ops->num_controls; i++)
 		if (qc->id == icd->ops->controls[i].id) {
 			memcpy(qc, &(icd->ops->controls[i]),
@@ -656,6 +666,7 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	int ret;
 
 	WARN_ON(priv != file->private_data);
 
@@ -672,6 +683,12 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 		return 0;
 	}
 
+	if (ici->ops->get_ctrl) {
+		ret = ici->ops->get_ctrl(icd, ctrl);
+		if (ret != -ENOIOCTLCMD)
+			return ret;
+	}
+
 	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_ctrl, ctrl);
 }
 
@@ -681,9 +698,16 @@ static int soc_camera_s_ctrl(struct file *file, void *priv,
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	int ret;
 
 	WARN_ON(priv != file->private_data);
 
+	if (ici->ops->set_ctrl) {
+		ret = ici->ops->set_ctrl(icd, ctrl);
+		if (ret != -ENOIOCTLCMD)
+			return ret;
+	}
+
 	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_ctrl, ctrl);
 }
 
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 3bc5b6b20f64..2d116bbbcce1 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -83,7 +83,11 @@ struct soc_camera_host_ops {
 	int (*reqbufs)(struct soc_camera_file *, struct v4l2_requestbuffers *);
 	int (*querycap)(struct soc_camera_host *, struct v4l2_capability *);
 	int (*set_bus_param)(struct soc_camera_device *, __u32);
+	int (*get_ctrl)(struct soc_camera_device *, struct v4l2_control *);
+	int (*set_ctrl)(struct soc_camera_device *, struct v4l2_control *);
 	unsigned int (*poll)(struct file *, poll_table *);
+	const struct v4l2_queryctrl *controls;
+	int num_controls;
 };
 
 #define SOCAM_SENSOR_INVERT_PCLK	(1 << 0)
-- 
cgit v1.2.3


From a0705b07f1816ae2b85388fcda71de69c221b4b8 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:46:17 -0300
Subject: V4L/DVB (12515): soc-camera: use struct v4l2_rect in struct
 soc_camera_device

Switch to using struct v4l2_rect in struct soc_camera_device for uniformity and
simplicity.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c              | 30 ++++++++-------
 drivers/media/video/mt9m111.c              | 20 +++++-----
 drivers/media/video/mt9t031.c              | 49 ++++++++++++-----------
 drivers/media/video/mt9v022.c              | 24 ++++++------
 drivers/media/video/mx1_camera.c           | 10 ++---
 drivers/media/video/mx3_camera.c           | 25 ++++++------
 drivers/media/video/ov772x.c               |  6 +--
 drivers/media/video/pxa_camera.c           | 14 +++----
 drivers/media/video/sh_mobile_ceu_camera.c | 28 ++++++++------
 drivers/media/video/soc_camera.c           | 62 +++++++++++++++++-------------
 drivers/media/video/soc_camera_platform.c  | 12 +++---
 drivers/media/video/tw9910.c               | 38 ++++++++++--------
 include/media/soc_camera.h                 | 10 +----
 13 files changed, 174 insertions(+), 154 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 2a73dac11d37..8b36a74b1be0 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -240,8 +240,8 @@ static int mt9m001_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct i2c_client *client = sd->priv;
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_rect rect = {
-		.left	= icd->x_current,
-		.top	= icd->y_current,
+		.left	= icd->rect_current.left,
+		.top	= icd->rect_current.top,
 		.width	= f->fmt.pix.width,
 		.height	= f->fmt.pix.height,
 	};
@@ -467,11 +467,13 @@ static int mt9m001_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 	case V4L2_CID_EXPOSURE_AUTO:
 		if (ctrl->value) {
 			const u16 vblank = 25;
-			if (reg_write(client, MT9M001_SHUTTER_WIDTH, icd->height +
+			if (reg_write(client, MT9M001_SHUTTER_WIDTH,
+				      icd->rect_current.height +
 				      icd->y_skip_top + vblank) < 0)
 				return -EIO;
 			qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-			icd->exposure = (524 + (icd->height + icd->y_skip_top + vblank - 1) *
+			icd->exposure = (524 + (icd->rect_current.height +
+						icd->y_skip_top + vblank - 1) *
 					 (qctrl->maximum - qctrl->minimum)) /
 				1048 + qctrl->minimum;
 			mt9m001->autoexposure = 1;
@@ -613,16 +615,16 @@ static int mt9m001_probe(struct i2c_client *client,
 	v4l2_i2c_subdev_init(&mt9m001->subdev, client, &mt9m001_subdev_ops);
 
 	/* Second stage probe - when a capture adapter is there */
-	icd->ops	= &mt9m001_ops;
-	icd->x_min	= 20;
-	icd->y_min	= 12;
-	icd->x_current	= 20;
-	icd->y_current	= 12;
-	icd->width_min	= 48;
-	icd->width_max	= 1280;
-	icd->height_min	= 32;
-	icd->height_max	= 1024;
-	icd->y_skip_top	= 1;
+	icd->ops		= &mt9m001_ops;
+	icd->rect_max.left	= 20;
+	icd->rect_max.top	= 12;
+	icd->rect_max.width	= 1280;
+	icd->rect_max.height	= 1024;
+	icd->rect_current.left	= 20;
+	icd->rect_current.top	= 12;
+	icd->width_min		= 48;
+	icd->height_min		= 32;
+	icd->y_skip_top		= 1;
 	/* Simulated autoexposure. If enabled, we calculate shutter width
 	 * ourselves in the driver based on vertical blanking and frame width */
 	mt9m001->autoexposure = 1;
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index 29f976afd465..45101fd90ce0 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -948,16 +948,16 @@ static int mt9m111_probe(struct i2c_client *client,
 	v4l2_i2c_subdev_init(&mt9m111->subdev, client, &mt9m111_subdev_ops);
 
 	/* Second stage probe - when a capture adapter is there */
-	icd->ops	= &mt9m111_ops;
-	icd->x_min	= MT9M111_MIN_DARK_COLS;
-	icd->y_min	= MT9M111_MIN_DARK_ROWS;
-	icd->x_current	= icd->x_min;
-	icd->y_current	= icd->y_min;
-	icd->width_min	= MT9M111_MIN_DARK_ROWS;
-	icd->width_max	= MT9M111_MAX_WIDTH;
-	icd->height_min	= MT9M111_MIN_DARK_COLS;
-	icd->height_max	= MT9M111_MAX_HEIGHT;
-	icd->y_skip_top	= 0;
+	icd->ops		= &mt9m111_ops;
+	icd->rect_max.left	= MT9M111_MIN_DARK_COLS;
+	icd->rect_max.top	= MT9M111_MIN_DARK_ROWS;
+	icd->rect_max.width	= MT9M111_MAX_WIDTH;
+	icd->rect_max.height	= MT9M111_MAX_HEIGHT;
+	icd->rect_current.left	= icd->rect_max.left;
+	icd->rect_current.top	= icd->rect_max.top;
+	icd->width_min		= MT9M111_MIN_DARK_ROWS;
+	icd->height_min		= MT9M111_MIN_DARK_COLS;
+	icd->y_skip_top		= 0;
 
 	ret = mt9m111_video_probe(icd, client);
 	if (ret) {
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index 27a5edda902c..dc3eb652a7cf 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -222,12 +222,12 @@ static unsigned long mt9t031_query_bus_param(struct soc_camera_device *icd)
 static void recalculate_limits(struct soc_camera_device *icd,
 			       u16 xskip, u16 yskip)
 {
-	icd->x_min = (MT9T031_COLUMN_SKIP + xskip - 1) / xskip;
-	icd->y_min = (MT9T031_ROW_SKIP + yskip - 1) / yskip;
+	icd->rect_max.left = (MT9T031_COLUMN_SKIP + xskip - 1) / xskip;
+	icd->rect_max.top = (MT9T031_ROW_SKIP + yskip - 1) / yskip;
 	icd->width_min = (MT9T031_MIN_WIDTH + xskip - 1) / xskip;
 	icd->height_min = (MT9T031_MIN_HEIGHT + yskip - 1) / yskip;
-	icd->width_max = MT9T031_MAX_WIDTH / xskip;
-	icd->height_max = MT9T031_MAX_HEIGHT / yskip;
+	icd->rect_max.width = MT9T031_MAX_WIDTH / xskip;
+	icd->rect_max.height = MT9T031_MAX_HEIGHT / yskip;
 }
 
 static int mt9t031_set_params(struct soc_camera_device *icd,
@@ -241,11 +241,13 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 		vblank = MT9T031_VERTICAL_BLANK;
 
 	/* Make sure we don't exceed sensor limits */
-	if (rect->left + rect->width > icd->width_max)
-		rect->left = (icd->width_max - rect->width) / 2 + icd->x_min;
+	if (rect->left + rect->width > icd->rect_max.width)
+		rect->left = (icd->rect_max.width - rect->width) / 2 +
+			icd->rect_max.left;
 
-	if (rect->top + rect->height > icd->height_max)
-		rect->top = (icd->height_max - rect->height) / 2 + icd->y_min;
+	if (rect->top + rect->height > icd->rect_max.height)
+		rect->top = (icd->rect_max.height - rect->height) / 2 +
+			icd->rect_max.top;
 
 	width = rect->width * xskip;
 	height = rect->height * yskip;
@@ -346,8 +348,8 @@ static int mt9t031_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	int ret;
 	u16 xskip, yskip;
 	struct v4l2_rect rect = {
-		.left	= icd->x_current,
-		.top	= icd->y_current,
+		.left	= icd->rect_current.left,
+		.top	= icd->rect_current.top,
 		.width	= f->fmt.pix.width,
 		.height	= f->fmt.pix.height,
 	};
@@ -618,12 +620,13 @@ static int mt9t031_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		if (ctrl->value) {
 			const u16 vblank = MT9T031_VERTICAL_BLANK;
 			const u32 shutter_max = MT9T031_MAX_HEIGHT + vblank;
-			if (set_shutter(client, icd->height +
+			if (set_shutter(client, icd->rect_current.height +
 					icd->y_skip_top + vblank) < 0)
 				return -EIO;
 			qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-			icd->exposure = (shutter_max / 2 + (icd->height +
-					 icd->y_skip_top + vblank - 1) *
+			icd->exposure = (shutter_max / 2 +
+					 (icd->rect_current.height +
+					  icd->y_skip_top + vblank - 1) *
 					 (qctrl->maximum - qctrl->minimum)) /
 				shutter_max + qctrl->minimum;
 			mt9t031->autoexposure = 1;
@@ -726,16 +729,16 @@ static int mt9t031_probe(struct i2c_client *client,
 	v4l2_i2c_subdev_init(&mt9t031->subdev, client, &mt9t031_subdev_ops);
 
 	/* Second stage probe - when a capture adapter is there */
-	icd->ops	= &mt9t031_ops;
-	icd->x_min	= MT9T031_COLUMN_SKIP;
-	icd->y_min	= MT9T031_ROW_SKIP;
-	icd->x_current	= icd->x_min;
-	icd->y_current	= icd->y_min;
-	icd->width_min	= MT9T031_MIN_WIDTH;
-	icd->width_max	= MT9T031_MAX_WIDTH;
-	icd->height_min	= MT9T031_MIN_HEIGHT;
-	icd->height_max	= MT9T031_MAX_HEIGHT;
-	icd->y_skip_top	= 0;
+	icd->ops		= &mt9t031_ops;
+	icd->rect_max.left	= MT9T031_COLUMN_SKIP;
+	icd->rect_max.top	= MT9T031_ROW_SKIP;
+	icd->rect_current.left	= icd->rect_max.left;
+	icd->rect_current.top	= icd->rect_max.top;
+	icd->width_min		= MT9T031_MIN_WIDTH;
+	icd->rect_max.width	= MT9T031_MAX_WIDTH;
+	icd->height_min		= MT9T031_MIN_HEIGHT;
+	icd->rect_max.height	= MT9T031_MAX_HEIGHT;
+	icd->y_skip_top		= 0;
 	/* Simulated autoexposure. If enabled, we calculate shutter width
 	 * ourselves in the driver based on vertical blanking and frame width */
 	mt9t031->autoexposure = 1;
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index 3cb9f0f1e256..d2b0981ec1c1 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -298,8 +298,8 @@ static int mt9v022_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
-		.left	= icd->x_current,
-		.top	= icd->y_current,
+		.left	= icd->rect_current.left,
+		.top	= icd->rect_current.top,
 		.width	= pix->width,
 		.height	= pix->height,
 	};
@@ -741,16 +741,16 @@ static int mt9v022_probe(struct i2c_client *client,
 
 	mt9v022->chip_control = MT9V022_CHIP_CONTROL_DEFAULT;
 
-	icd->ops	= &mt9v022_ops;
-	icd->x_min	= 1;
-	icd->y_min	= 4;
-	icd->x_current	= 1;
-	icd->y_current	= 4;
-	icd->width_min	= 48;
-	icd->width_max	= 752;
-	icd->height_min	= 32;
-	icd->height_max	= 480;
-	icd->y_skip_top	= 1;
+	icd->ops		= &mt9v022_ops;
+	icd->rect_max.left	= 1;
+	icd->rect_max.top	= 4;
+	icd->rect_max.width	= 752;
+	icd->rect_max.height	= 480;
+	icd->rect_current.left	= 1;
+	icd->rect_current.top	= 4;
+	icd->width_min		= 48;
+	icd->height_min		= 32;
+	icd->y_skip_top		= 1;
 
 	ret = mt9v022_video_probe(icd, client);
 	if (ret) {
diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index ea4ceaec85fe..948a4714be9a 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -126,7 +126,7 @@ static int mx1_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
 {
 	struct soc_camera_device *icd = vq->priv_data;
 
-	*size = icd->width * icd->height *
+	*size = icd->rect_current.width * icd->rect_current.height *
 		((icd->current_fmt->depth + 7) >> 3);
 
 	if (!*count)
@@ -178,12 +178,12 @@ static int mx1_videobuf_prepare(struct videobuf_queue *vq,
 	buf->inwork = 1;
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->width ||
-	    vb->height	!= icd->height ||
+	    vb->width	!= icd->rect_current.width ||
+	    vb->height	!= icd->rect_current.height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->width;
-		vb->height	= icd->height;
+		vb->width	= icd->rect_current.width;
+		vb->height	= icd->rect_current.height;
 		vb->field	= field;
 		vb->state	= VIDEOBUF_NEEDS_INIT;
 	}
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index 677d355be8fc..6c3b7f9b906f 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -220,7 +220,7 @@ static int mx3_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
 	if (!mx3_cam->idmac_channel[0])
 		return -EINVAL;
 
-	*size = icd->width * icd->height * bpp;
+	*size = icd->rect_current.width * icd->rect_current.height * bpp;
 
 	if (!*count)
 		*count = 32;
@@ -241,7 +241,7 @@ static int mx3_videobuf_prepare(struct videobuf_queue *vq,
 	struct mx3_camera_buffer *buf =
 		container_of(vb, struct mx3_camera_buffer, vb);
 	/* current_fmt _must_ always be set */
-	size_t new_size = icd->width * icd->height *
+	size_t new_size = icd->rect_current.width * icd->rect_current.height *
 		((icd->current_fmt->depth + 7) >> 3);
 	int ret;
 
@@ -251,12 +251,12 @@ static int mx3_videobuf_prepare(struct videobuf_queue *vq,
 	 */
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->width ||
-	    vb->height	!= icd->height ||
+	    vb->width	!= icd->rect_current.width ||
+	    vb->height	!= icd->rect_current.height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->width;
-		vb->height	= icd->height;
+		vb->width	= icd->rect_current.width;
+		vb->height	= icd->rect_current.height;
 		vb->field	= field;
 		if (vb->state != VIDEOBUF_NEEDS_INIT)
 			free_buffer(vq, buf);
@@ -354,9 +354,9 @@ static void mx3_videobuf_queue(struct videobuf_queue *vq,
 
 	/* This is the configuration of one sg-element */
 	video->out_pixel_fmt	= fourcc_to_ipu_pix(data_fmt->fourcc);
-	video->out_width	= icd->width;
-	video->out_height	= icd->height;
-	video->out_stride	= icd->width;
+	video->out_width	= icd->rect_current.width;
+	video->out_height	= icd->rect_current.height;
+	video->out_stride	= icd->rect_current.width;
 
 #ifdef DEBUG
 	/* helps to see what DMA actually has written */
@@ -538,7 +538,8 @@ static bool channel_change_requested(struct soc_camera_device *icd,
 	struct idmac_channel *ichan = mx3_cam->idmac_channel[0];
 
 	/* Do buffers have to be re-allocated or channel re-configured? */
-	return ichan && rect->width * rect->height > icd->width * icd->height;
+	return ichan && rect->width * rect->height >
+		icd->rect_current.width * icd->rect_current.height;
 }
 
 static int test_platform_param(struct mx3_camera_dev *mx3_cam,
@@ -808,8 +809,8 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
-		.left	= icd->x_current,
-		.top	= icd->y_current,
+		.left	= icd->rect_current.left,
+		.top	= icd->rect_current.top,
 		.width	= pix->width,
 		.height	= pix->height,
 	};
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 4c550f91ca24..3417398e1b50 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -1120,9 +1120,9 @@ static int ov772x_probe(struct i2c_client *client,
 
 	v4l2_i2c_subdev_init(&priv->subdev, client, &ov772x_subdev_ops);
 
-	icd->ops        = &ov772x_ops;
-	icd->width_max  = MAX_WIDTH;
-	icd->height_max = MAX_HEIGHT;
+	icd->ops		= &ov772x_ops;
+	icd->rect_max.width	= MAX_WIDTH;
+	icd->rect_max.height	= MAX_HEIGHT;
 
 	ret = ov772x_video_probe(icd, client);
 	if (ret) {
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index bdc0d85c461b..0e4daaad2f4d 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -239,7 +239,7 @@ static int pxa_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
 
 	dev_dbg(&icd->dev, "count=%d, size=%d\n", *count, *size);
 
-	*size = roundup(icd->width * icd->height *
+	*size = roundup(icd->rect_current.width * icd->rect_current.height *
 			((icd->current_fmt->depth + 7) >> 3), 8);
 
 	if (0 == *count)
@@ -443,12 +443,12 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 	buf->inwork = 1;
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->width ||
-	    vb->height	!= icd->height ||
+	    vb->width	!= icd->rect_current.width ||
+	    vb->height	!= icd->rect_current.height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->width;
-		vb->height	= icd->height;
+		vb->width	= icd->rect_current.width;
+		vb->height	= icd->rect_current.height;
 		vb->field	= field;
 		vb->state	= VIDEOBUF_NEEDS_INIT;
 	}
@@ -1118,7 +1118,7 @@ static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 	if (cicr0 & CICR0_ENB)
 		__raw_writel(cicr0 & ~CICR0_ENB, pcdev->base + CICR0);
 
-	cicr1 = CICR1_PPL_VAL(icd->width - 1) | bpp | dw;
+	cicr1 = CICR1_PPL_VAL(icd->rect_current.width - 1) | bpp | dw;
 
 	switch (pixfmt) {
 	case V4L2_PIX_FMT_YUV422P:
@@ -1147,7 +1147,7 @@ static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 	}
 
 	cicr2 = 0;
-	cicr3 = CICR3_LPF_VAL(icd->height - 1) |
+	cicr3 = CICR3_LPF_VAL(icd->rect_current.height - 1) |
 		CICR3_BFW_VAL(min((unsigned short)255, icd->y_skip_top));
 	cicr4 |= pcdev->mclk_divisor;
 
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 16fa56efaf99..4c4b60c32263 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -146,7 +146,8 @@ static int sh_mobile_ceu_videobuf_setup(struct videobuf_queue *vq,
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
 	int bytes_per_pixel = (icd->current_fmt->depth + 7) >> 3;
 
-	*size = PAGE_ALIGN(icd->width * icd->height * bytes_per_pixel);
+	*size = PAGE_ALIGN(icd->rect_current.width * icd->rect_current.height *
+			   bytes_per_pixel);
 
 	if (0 == *count)
 		*count = 2;
@@ -205,7 +206,7 @@ static void sh_mobile_ceu_capture(struct sh_mobile_ceu_dev *pcdev)
 	phys_addr_top = videobuf_to_dma_contig(pcdev->active);
 	ceu_write(pcdev, CDAYR, phys_addr_top);
 	if (pcdev->is_interlaced) {
-		phys_addr_bottom = phys_addr_top + icd->width;
+		phys_addr_bottom = phys_addr_top + icd->rect_current.width;
 		ceu_write(pcdev, CDBYR, phys_addr_bottom);
 	}
 
@@ -214,10 +215,12 @@ static void sh_mobile_ceu_capture(struct sh_mobile_ceu_dev *pcdev)
 	case V4L2_PIX_FMT_NV21:
 	case V4L2_PIX_FMT_NV16:
 	case V4L2_PIX_FMT_NV61:
-		phys_addr_top += icd->width * icd->height;
+		phys_addr_top += icd->rect_current.width *
+			icd->rect_current.height;
 		ceu_write(pcdev, CDACR, phys_addr_top);
 		if (pcdev->is_interlaced) {
-			phys_addr_bottom = phys_addr_top + icd->width;
+			phys_addr_bottom = phys_addr_top +
+				icd->rect_current.width;
 			ceu_write(pcdev, CDBCR, phys_addr_bottom);
 		}
 	}
@@ -251,12 +254,12 @@ static int sh_mobile_ceu_videobuf_prepare(struct videobuf_queue *vq,
 	BUG_ON(NULL == icd->current_fmt);
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->width ||
-	    vb->height	!= icd->height ||
+	    vb->width	!= icd->rect_current.width ||
+	    vb->height	!= icd->rect_current.height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->width;
-		vb->height	= icd->height;
+		vb->width	= icd->rect_current.width;
+		vb->height	= icd->rect_current.height;
 		vb->field	= field;
 		vb->state	= VIDEOBUF_NEEDS_INIT;
 	}
@@ -475,17 +478,18 @@ static int sh_mobile_ceu_set_bus_param(struct soc_camera_device *icd,
 	mdelay(1);
 
 	if (yuv_mode) {
-		width = icd->width * 2;
+		width = icd->rect_current.width * 2;
 		width = buswidth == 16 ? width / 2 : width;
-		cfszr_width = cdwdr_width = icd->width;
+		cfszr_width = cdwdr_width = icd->rect_current.width;
 	} else {
-		width = icd->width * ((icd->current_fmt->depth + 7) >> 3);
+		width = icd->rect_current.width *
+			((icd->current_fmt->depth + 7) >> 3);
 		width = buswidth == 16 ? width / 2 : width;
 		cfszr_width = buswidth == 8 ? width / 2 : width;
 		cdwdr_width = buswidth == 16 ? width * 2 : width;
 	}
 
-	height = icd->height;
+	height = icd->rect_current.height;
 	if (pcdev->is_interlaced) {
 		height /= 2;
 		cdwdr_width *= 2;
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index b3fb8f290ad5..5028023b72aa 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -288,17 +288,17 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 		return -EINVAL;
 	}
 
-	icd->width		= pix->width;
-	icd->height		= pix->height;
-	icf->vb_vidq.field	=
-		icd->field	= pix->field;
+	icd->rect_current.width		= pix->width;
+	icd->rect_current.height	= pix->height;
+	icf->vb_vidq.field		=
+		icd->field		= pix->field;
 
 	if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		dev_warn(&icd->dev, "Attention! Wrong buf-type %d\n",
 			 f->type);
 
 	dev_dbg(&icd->dev, "set width: %d height: %d\n",
-		icd->width, icd->height);
+		icd->rect_current.width, icd->rect_current.height);
 
 	/* set physical bus parameters */
 	return ici->ops->set_bus_param(icd, pix->pixelformat);
@@ -341,8 +341,8 @@ static int soc_camera_open(struct file *file)
 		struct v4l2_format f = {
 			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			.fmt.pix = {
-				.width		= icd->width,
-				.height		= icd->height,
+				.width		= icd->rect_current.width,
+				.height		= icd->rect_current.height,
 				.field		= icd->field,
 			},
 		};
@@ -553,8 +553,8 @@ static int soc_camera_g_fmt_vid_cap(struct file *file, void *priv,
 
 	WARN_ON(priv != file->private_data);
 
-	pix->width		= icd->width;
-	pix->height		= icd->height;
+	pix->width		= icd->rect_current.width;
+	pix->height		= icd->rect_current.height;
 	pix->field		= icf->vb_vidq.field;
 	pix->pixelformat	= icd->current_fmt->fourcc;
 	pix->bytesperline	= pix->width *
@@ -718,12 +718,9 @@ static int soc_camera_cropcap(struct file *file, void *fh,
 	struct soc_camera_device *icd = icf->icd;
 
 	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	a->bounds.left			= icd->x_min;
-	a->bounds.top			= icd->y_min;
-	a->bounds.width			= icd->width_max;
-	a->bounds.height		= icd->height_max;
-	a->defrect.left			= icd->x_min;
-	a->defrect.top			= icd->y_min;
+	a->bounds			= icd->rect_max;
+	a->defrect.left			= icd->rect_max.left;
+	a->defrect.top			= icd->rect_max.top;
 	a->defrect.width		= DEFAULT_WIDTH;
 	a->defrect.height		= DEFAULT_HEIGHT;
 	a->pixelaspect.numerator	= 1;
@@ -738,11 +735,8 @@ static int soc_camera_g_crop(struct file *file, void *fh,
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
 
-	a->type		= V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	a->c.left	= icd->x_current;
-	a->c.top	= icd->y_current;
-	a->c.width	= icd->width;
-	a->c.height	= icd->height;
+	a->type	= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->c	= icd->rect_current;
 
 	return 0;
 }
@@ -761,13 +755,29 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 	/* Cropping is allowed during a running capture, guard consistency */
 	mutex_lock(&icf->vb_vidq.vb_lock);
 
+	if (a->c.width > icd->rect_max.width)
+		a->c.width = icd->rect_max.width;
+
+	if (a->c.width < icd->width_min)
+		a->c.width = icd->width_min;
+
+	if (a->c.height > icd->rect_max.height)
+		a->c.height = icd->rect_max.height;
+
+	if (a->c.height < icd->height_min)
+		a->c.height = icd->height_min;
+
+	if (a->c.width + a->c.left > icd->rect_max.width + icd->rect_max.left)
+		a->c.left = icd->rect_max.width + icd->rect_max.left -
+			a->c.width;
+
+	if (a->c.height + a->c.top > icd->rect_max.height + icd->rect_max.top)
+		a->c.top = icd->rect_max.height + icd->rect_max.top -
+			a->c.height;
+
 	ret = ici->ops->set_crop(icd, &a->c);
-	if (!ret) {
-		icd->width	= a->c.width;
-		icd->height	= a->c.height;
-		icd->x_current	= a->c.left;
-		icd->y_current	= a->c.top;
-	}
+	if (!ret)
+		icd->rect_current = a->c;
 
 	mutex_unlock(&icf->vb_vidq.vb_lock);
 
diff --git a/drivers/media/video/soc_camera_platform.c b/drivers/media/video/soc_camera_platform.c
index 8168cf470eb3..9e406c113aa4 100644
--- a/drivers/media/video/soc_camera_platform.c
+++ b/drivers/media/video/soc_camera_platform.c
@@ -127,12 +127,12 @@ static int soc_camera_platform_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, priv);
 	dev_set_drvdata(&icd->dev, &pdev->dev);
 
-	icd->width_min	= 0;
-	icd->width_max	= p->format.width;
-	icd->height_min	= 0;
-	icd->height_max	= p->format.height;
-	icd->y_skip_top	= 0;
-	icd->ops	= &soc_camera_platform_ops;
+	icd->width_min		= 0;
+	icd->rect_max.width	= p->format.width;
+	icd->height_min		= 0;
+	icd->rect_max.height	= p->format.height;
+	icd->y_skip_top		= 0;
+	icd->ops		= &soc_camera_platform_ops;
 
 	ici = to_soc_camera_host(icd->dev.parent);
 
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index a006df1d28ec..7199e0f71b2a 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -715,8 +715,8 @@ static int tw9910_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
-		.left	= icd->x_current,
-		.top	= icd->y_current,
+		.left	= icd->rect_current.left,
+		.top	= icd->rect_current.top,
 		.width	= pix->width,
 		.height	= pix->height,
 	};
@@ -840,6 +840,19 @@ static struct v4l2_subdev_ops tw9910_subdev_ops = {
  * i2c_driver function
  */
 
+static void limit_to_scale(struct soc_camera_device *icd,
+			   const struct tw9910_scale_ctrl *scale)
+{
+	if (scale->width > icd->rect_max.width)
+		icd->rect_max.width  = scale->width;
+	if (scale->width < icd->width_min)
+		icd->width_min = scale->width;
+	if (scale->height > icd->rect_max.height)
+		icd->rect_max.height = scale->height;
+	if (scale->height < icd->height_min)
+		icd->height_min = scale->height;
+}
+
 static int tw9910_probe(struct i2c_client *client,
 			const struct i2c_device_id *did)
 
@@ -885,25 +898,18 @@ static int tw9910_probe(struct i2c_client *client,
 	/*
 	 * set width and height
 	 */
-	icd->width_max  = tw9910_ntsc_scales[0].width; /* set default */
+	icd->rect_max.width  = tw9910_ntsc_scales[0].width; /* set default */
 	icd->width_min  = tw9910_ntsc_scales[0].width;
-	icd->height_max = tw9910_ntsc_scales[0].height;
+	icd->rect_max.height = tw9910_ntsc_scales[0].height;
 	icd->height_min = tw9910_ntsc_scales[0].height;
 
 	scale = tw9910_ntsc_scales;
-	for (i = 0; i < ARRAY_SIZE(tw9910_ntsc_scales); i++) {
-		icd->width_max  = max(scale[i].width,  icd->width_max);
-		icd->width_min  = min(scale[i].width,  icd->width_min);
-		icd->height_max = max(scale[i].height, icd->height_max);
-		icd->height_min = min(scale[i].height, icd->height_min);
-	}
+	for (i = 0; i < ARRAY_SIZE(tw9910_ntsc_scales); i++)
+		limit_to_scale(icd, scale + i);
+
 	scale = tw9910_pal_scales;
-	for (i = 0; i < ARRAY_SIZE(tw9910_pal_scales); i++) {
-		icd->width_max  = max(scale[i].width,  icd->width_max);
-		icd->width_min  = min(scale[i].width,  icd->width_min);
-		icd->height_max = max(scale[i].height, icd->height_max);
-		icd->height_min = min(scale[i].height, icd->height_min);
-	}
+	for (i = 0; i < ARRAY_SIZE(tw9910_pal_scales); i++)
+		limit_to_scale(icd, scale + i);
 
 	ret = tw9910_video_probe(icd, client);
 	if (ret) {
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 2d116bbbcce1..f623c010a539 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -22,16 +22,10 @@ struct soc_camera_device {
 	struct list_head list;
 	struct device dev;
 	struct device *pdev;		/* Platform device */
-	unsigned short width;		/* Current window */
-	unsigned short height;		/* sizes */
-	unsigned short x_min;		/* Camera capabilities */
-	unsigned short y_min;
-	unsigned short x_current;	/* Current window location */
-	unsigned short y_current;
+	struct v4l2_rect rect_current;	/* Current window */
+	struct v4l2_rect rect_max;	/* Maximum window */
 	unsigned short width_min;
-	unsigned short width_max;
 	unsigned short height_min;
-	unsigned short height_max;
 	unsigned short y_skip_top;	/* Lines to skip at the top */
 	unsigned short gain;
 	unsigned short exposure;
-- 
cgit v1.2.3


From fa48984e36ee73e964eeb994a45de6525114e871 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:46:43 -0300
Subject: V4L/DVB (12519): soc-camera: put pixel format initialisation back in
 probe, add .put_formats()

The move of format translation initialisation into soc_camera_open() was
temporary for the soc-camera as platform driver intermediate step, put it back
into soc_camera_probe(). Also add a .put_formats() method to
soc_camera_host_ops to free any resources host driver might have allocated in
.get_formats().

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/soc_camera.c | 50 ++++++++++++++++++++++++++++------------
 include/media/soc_camera.h       |  7 ++++++
 2 files changed, 42 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 5028023b72aa..aa6614b60d6f 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -211,7 +211,7 @@ static int soc_camera_dqbuf(struct file *file, void *priv,
 static int soc_camera_init_user_formats(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
-	int i, fmts = 0;
+	int i, fmts = 0, ret;
 
 	if (!ici->ops->get_formats)
 		/*
@@ -224,8 +224,12 @@ static int soc_camera_init_user_formats(struct soc_camera_device *icd)
 		 * First pass - only count formats this host-sensor
 		 * configuration can provide
 		 */
-		for (i = 0; i < icd->num_formats; i++)
-			fmts += ici->ops->get_formats(icd, i, NULL);
+		for (i = 0; i < icd->num_formats; i++) {
+			ret = ici->ops->get_formats(icd, i, NULL);
+			if (ret < 0)
+				return ret;
+			fmts += ret;
+		}
 
 	if (!fmts)
 		return -ENXIO;
@@ -247,19 +251,32 @@ static int soc_camera_init_user_formats(struct soc_camera_device *icd)
 			icd->user_formats[i].cam_fmt = icd->formats + i;
 			icd->user_formats[i].buswidth = icd->formats[i].depth;
 		} else {
-			fmts += ici->ops->get_formats(icd, i,
-						      &icd->user_formats[fmts]);
+			ret = ici->ops->get_formats(icd, i,
+						    &icd->user_formats[fmts]);
+			if (ret < 0)
+				goto egfmt;
+			fmts += ret;
 		}
 
 	icd->current_fmt = icd->user_formats[0].host_fmt;
 
 	return 0;
+
+egfmt:
+	icd->num_user_formats = 0;
+	vfree(icd->user_formats);
+	return ret;
 }
 
 /* Always entered with .video_lock held */
 static void soc_camera_free_user_formats(struct soc_camera_device *icd)
 {
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+
+	if (ici->ops->put_formats)
+		ici->ops->put_formats(icd);
 	icd->current_fmt = NULL;
+	icd->num_user_formats = 0;
 	vfree(icd->user_formats);
 	icd->user_formats = NULL;
 }
@@ -344,16 +361,11 @@ static int soc_camera_open(struct file *file)
 				.width		= icd->rect_current.width,
 				.height		= icd->rect_current.height,
 				.field		= icd->field,
+				.pixelformat	= icd->current_fmt->fourcc,
+				.colorspace	= icd->current_fmt->colorspace,
 			},
 		};
 
-		ret = soc_camera_init_user_formats(icd);
-		if (ret < 0)
-			goto eiufmt;
-
-		f.fmt.pix.pixelformat	= icd->current_fmt->fourcc;
-		f.fmt.pix.colorspace	= icd->current_fmt->colorspace;
-
 		if (icl->power) {
 			ret = icl->power(icd->pdev, 1);
 			if (ret < 0)
@@ -404,8 +416,6 @@ eiciadd:
 	if (icl->power)
 		icl->power(icd->pdev, 0);
 epower:
-	soc_camera_free_user_formats(icd);
-eiufmt:
 	icd->use_count--;
 	mutex_unlock(&icd->video_lock);
 	module_put(ici->ops->owner);
@@ -431,7 +441,6 @@ static int soc_camera_close(struct file *file)
 		ici->ops->remove(icd);
 		if (icl->power)
 			icl->power(icd->pdev, 0);
-		soc_camera_free_user_formats(icd);
 	}
 
 	mutex_unlock(&icd->video_lock);
@@ -954,6 +963,14 @@ static int soc_camera_probe(struct device *dev)
 		}
 	}
 
+	/* At this point client .probe() should have run already */
+	ret = soc_camera_init_user_formats(icd);
+	if (ret < 0)
+		goto eiufmt;
+
+	icd->rect_current = icd->rect_max;
+	icd->field = V4L2_FIELD_ANY;
+
 	/* ..._video_start() will create a device node, so we have to protect */
 	mutex_lock(&icd->video_lock);
 
@@ -978,6 +995,8 @@ static int soc_camera_probe(struct device *dev)
 
 evidstart:
 	mutex_unlock(&icd->video_lock);
+	soc_camera_free_user_formats(icd);
+eiufmt:
 	if (icl->board_info) {
 		soc_camera_free_i2c(icd);
 	} else {
@@ -1023,6 +1042,7 @@ static int soc_camera_remove(struct device *dev)
 			module_put(drv->owner);
 		}
 	}
+	soc_camera_free_user_formats(icd);
 
 	return 0;
 }
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index f623c010a539..2b7a8c663605 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -67,8 +67,15 @@ struct soc_camera_host_ops {
 	void (*remove)(struct soc_camera_device *);
 	int (*suspend)(struct soc_camera_device *, pm_message_t);
 	int (*resume)(struct soc_camera_device *);
+	/*
+	 * .get_formats() is called for each client device format, but
+	 * .put_formats() is only called once. Further, if any of the calls to
+	 * .get_formats() fail, .put_formats() will not be called at all, the
+	 * failing .get_formats() must then clean up internally.
+	 */
 	int (*get_formats)(struct soc_camera_device *, int,
 			   struct soc_camera_format_xlate *);
+	void (*put_formats)(struct soc_camera_device *);
 	int (*set_crop)(struct soc_camera_device *, struct v4l2_rect *);
 	int (*set_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	int (*try_fmt)(struct soc_camera_device *, struct v4l2_format *);
-- 
cgit v1.2.3


From a12222a73e7a9efd927eb99d1dec1cedc9887e0a Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:46:51 -0300
Subject: V4L/DVB (12521): soc-camera: use .s_std() from struct
 v4l2_subdev_core_ops

Remove .set_std() method from struct soc_camera_ops, use .s_std() from
struct v4l2_subdev_core_ops instead.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/soc_camera.c | 7 ++-----
 include/media/soc_camera.h       | 1 -
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index aa6614b60d6f..44a94dc934f8 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -152,12 +152,9 @@ static int soc_camera_s_std(struct file *file, void *priv, v4l2_std_id *a)
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	int ret = 0;
-
-	if (icd->ops->set_std)
-		ret = icd->ops->set_std(icd, a);
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
-	return ret;
+	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_std, *a);
 }
 
 static int soc_camera_reqbufs(struct file *file, void *priv,
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 2b7a8c663605..7c44d4016561 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -190,7 +190,6 @@ struct soc_camera_ops {
 	int (*set_bus_param)(struct soc_camera_device *, unsigned long);
 	int (*get_chip_id)(struct soc_camera_device *,
 			   struct v4l2_dbg_chip_ident *);
-	int (*set_std)(struct soc_camera_device *, v4l2_std_id *);
 	int (*enum_input)(struct soc_camera_device *, struct v4l2_input *);
 	const struct v4l2_queryctrl *controls;
 	int num_controls;
-- 
cgit v1.2.3


From 08590b9613f7f624fe3a052586eea2dbb3584b38 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:46:54 -0300
Subject: V4L/DVB (12529): soc-camera: switch to s_crop v4l2-subdev video
 operation

Remove set_crop soc-camera device method and switch to s_crop from v4l2-subdev
video operations. Also extend non-i2c drivers to also hold a pointer to their
v4l2-subdev instance in control device driver-data, i.e., in
dev_get_drvdata((struct device *)to_soc_camera_control(icd))

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c              |  23 +++---
 drivers/media/video/mt9m111.c              |   9 +--
 drivers/media/video/mt9t031.c              |   9 +--
 drivers/media/video/mt9v022.c              |  23 +++---
 drivers/media/video/mx1_camera.c           |   8 ++-
 drivers/media/video/mx3_camera.c           |   7 +-
 drivers/media/video/ov772x.c               |  19 -----
 drivers/media/video/pxa_camera.c           |   7 +-
 drivers/media/video/sh_mobile_ceu_camera.c | 110 +++++++++++++++--------------
 drivers/media/video/soc_camera.c           |   4 +-
 drivers/media/video/soc_camera_platform.c  |  28 ++++----
 drivers/media/video/tw9910.c               |  31 ++++----
 include/media/soc_camera.h                 |   3 +-
 13 files changed, 142 insertions(+), 139 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 8b36a74b1be0..6e762cd06e3f 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -194,11 +194,12 @@ static unsigned long mt9m001_query_bus_param(struct soc_camera_device *icd)
 	return soc_camera_apply_sensor_flags(icl, flags);
 }
 
-static int mt9m001_set_crop(struct soc_camera_device *icd,
-			    struct v4l2_rect *rect)
+static int mt9m001_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct v4l2_rect *rect = &a->c;
+	struct i2c_client *client = sd->priv;
 	struct mt9m001 *mt9m001 = to_mt9m001(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 	const u16 hblank = 9, vblank = 25;
 
@@ -239,15 +240,17 @@ static int mt9m001_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct i2c_client *client = sd->priv;
 	struct soc_camera_device *icd = client->dev.platform_data;
-	struct v4l2_rect rect = {
-		.left	= icd->rect_current.left,
-		.top	= icd->rect_current.top,
-		.width	= f->fmt.pix.width,
-		.height	= f->fmt.pix.height,
+	struct v4l2_crop a = {
+		.c = {
+			.left	= icd->rect_current.left,
+			.top	= icd->rect_current.top,
+			.width	= f->fmt.pix.width,
+			.height	= f->fmt.pix.height,
+		},
 	};
 
 	/* No support for scaling so far, just crop. TODO: use skipping */
-	return mt9m001_set_crop(icd, &rect);
+	return mt9m001_s_crop(sd, &a);
 }
 
 static int mt9m001_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
@@ -361,7 +364,6 @@ static const struct v4l2_queryctrl mt9m001_controls[] = {
 static struct soc_camera_ops mt9m001_ops = {
 	.init			= mt9m001_init,
 	.release		= mt9m001_release,
-	.set_crop		= mt9m001_set_crop,
 	.set_bus_param		= mt9m001_set_bus_param,
 	.query_bus_param	= mt9m001_query_bus_param,
 	.controls		= mt9m001_controls,
@@ -575,6 +577,7 @@ static struct v4l2_subdev_video_ops mt9m001_subdev_video_ops = {
 	.s_stream	= mt9m001_s_stream,
 	.s_fmt		= mt9m001_s_fmt,
 	.try_fmt	= mt9m001_try_fmt,
+	.s_crop		= mt9m001_s_crop,
 };
 
 static struct v4l2_subdev_ops mt9m001_subdev_ops = {
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index 45101fd90ce0..bef415170186 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -395,11 +395,12 @@ static int mt9m111_set_bus_param(struct soc_camera_device *icd, unsigned long f)
 	return 0;
 }
 
-static int mt9m111_set_crop(struct soc_camera_device *icd,
-			    struct v4l2_rect *rect)
+static int mt9m111_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct v4l2_rect *rect = &a->c;
+	struct i2c_client *client = sd->priv;
 	struct mt9m111 *mt9m111 = to_mt9m111(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 
 	dev_dbg(&icd->dev, "%s left=%d, top=%d, width=%d, height=%d\n",
@@ -601,7 +602,6 @@ static struct soc_camera_ops mt9m111_ops = {
 	.init			= mt9m111_init,
 	.resume			= mt9m111_resume,
 	.release		= mt9m111_release,
-	.set_crop		= mt9m111_set_crop,
 	.query_bus_param	= mt9m111_query_bus_param,
 	.set_bus_param		= mt9m111_set_bus_param,
 	.controls		= mt9m111_controls,
@@ -908,6 +908,7 @@ static struct v4l2_subdev_core_ops mt9m111_subdev_core_ops = {
 static struct v4l2_subdev_video_ops mt9m111_subdev_video_ops = {
 	.s_fmt		= mt9m111_s_fmt,
 	.try_fmt	= mt9m111_try_fmt,
+	.s_crop		= mt9m111_s_crop,
 };
 
 static struct v4l2_subdev_ops mt9m111_subdev_ops = {
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index 125973bf08b9..3fa87be2fc6e 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -321,11 +321,12 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 	return ret < 0 ? ret : 0;
 }
 
-static int mt9t031_set_crop(struct soc_camera_device *icd,
-			    struct v4l2_rect *rect)
+static int mt9t031_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct v4l2_rect *rect = &a->c;
+	struct i2c_client *client = sd->priv;
 	struct mt9t031 *mt9t031 = to_mt9t031(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 
 	/* Make sure we don't exceed sensor limits */
 	if (rect->left + rect->width > icd->rect_max.left + icd->rect_max.width)
@@ -495,7 +496,6 @@ static const struct v4l2_queryctrl mt9t031_controls[] = {
 static struct soc_camera_ops mt9t031_ops = {
 	.init			= mt9t031_init,
 	.release		= mt9t031_release,
-	.set_crop		= mt9t031_set_crop,
 	.set_bus_param		= mt9t031_set_bus_param,
 	.query_bus_param	= mt9t031_query_bus_param,
 	.controls		= mt9t031_controls,
@@ -689,6 +689,7 @@ static struct v4l2_subdev_video_ops mt9t031_subdev_video_ops = {
 	.s_stream	= mt9t031_s_stream,
 	.s_fmt		= mt9t031_s_fmt,
 	.try_fmt	= mt9t031_try_fmt,
+	.s_crop		= mt9t031_s_crop,
 };
 
 static struct v4l2_subdev_ops mt9t031_subdev_ops = {
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index d2b0981ec1c1..e609ff51aa66 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -248,10 +248,11 @@ static unsigned long mt9v022_query_bus_param(struct soc_camera_device *icd)
 		width_flag;
 }
 
-static int mt9v022_set_crop(struct soc_camera_device *icd,
-			    struct v4l2_rect *rect)
+static int mt9v022_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct v4l2_rect *rect = &a->c;
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 
 	/* Like in example app. Contradicts the datasheet though */
@@ -297,11 +298,13 @@ static int mt9v022_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct mt9v022 *mt9v022 = to_mt9v022(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
-	struct v4l2_rect rect = {
-		.left	= icd->rect_current.left,
-		.top	= icd->rect_current.top,
-		.width	= pix->width,
-		.height	= pix->height,
+	struct v4l2_crop a = {
+		.c = {
+			.left	= icd->rect_current.left,
+			.top	= icd->rect_current.top,
+			.width	= pix->width,
+			.height	= pix->height,
+		},
 	};
 
 	/* The caller provides a supported format, as verified per call to
@@ -325,7 +328,7 @@ static int mt9v022_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	}
 
 	/* No support for scaling on this camera, just crop. */
-	return mt9v022_set_crop(icd, &rect);
+	return mt9v022_s_crop(sd, &a);
 }
 
 static int mt9v022_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
@@ -454,7 +457,6 @@ static const struct v4l2_queryctrl mt9v022_controls[] = {
 
 static struct soc_camera_ops mt9v022_ops = {
 	.init			= mt9v022_init,
-	.set_crop		= mt9v022_set_crop,
 	.set_bus_param		= mt9v022_set_bus_param,
 	.query_bus_param	= mt9v022_query_bus_param,
 	.controls		= mt9v022_controls,
@@ -700,6 +702,7 @@ static struct v4l2_subdev_video_ops mt9v022_subdev_video_ops = {
 	.s_stream	= mt9v022_s_stream,
 	.s_fmt		= mt9v022_s_fmt,
 	.try_fmt	= mt9v022_try_fmt,
+	.s_crop		= mt9v022_s_crop,
 };
 
 static struct v4l2_subdev_ops mt9v022_subdev_ops = {
diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index 948a4714be9a..add496fca4d3 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -463,9 +463,13 @@ static void mx1_camera_remove_device(struct soc_camera_device *icd)
 }
 
 static int mx1_camera_set_crop(struct soc_camera_device *icd,
-			       struct v4l2_rect *rect)
+			       struct v4l2_crop *a)
 {
-	return icd->ops->set_crop(icd, rect);
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct device *control = to_soc_camera_control(icd);
+	struct v4l2_subdev *sd = dev_get_drvdata(control);
+
+	return v4l2_subdev_call(sd, video, s_crop, a);
 }
 
 static int mx1_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index 6c3b7f9b906f..de7ebfbf0397 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -781,10 +781,13 @@ static int acquire_dma_channel(struct mx3_camera_dev *mx3_cam)
 }
 
 static int mx3_camera_set_crop(struct soc_camera_device *icd,
-			       struct v4l2_rect *rect)
+			       struct v4l2_crop *a)
 {
+	struct v4l2_rect *rect = &a->c;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx3_camera_dev *mx3_cam = ici->priv;
+	struct device *control = to_soc_camera_control(icd);
+	struct v4l2_subdev *sd = dev_get_drvdata(control);
 
 	/*
 	 * We now know pixel formats and can decide upon DMA-channel(s)
@@ -798,7 +801,7 @@ static int mx3_camera_set_crop(struct soc_camera_device *icd,
 
 	configure_geometry(mx3_cam, rect);
 
-	return icd->ops->set_crop(icd, rect);
+	return v4l2_subdev_call(sd, video, s_crop, a);
 }
 
 static int mx3_camera_set_fmt(struct soc_camera_device *icd,
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 03488f9e1c88..bbe6f2d71c15 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -955,24 +955,6 @@ ov772x_set_fmt_error:
 	return ret;
 }
 
-/* Cannot crop, just return the current geometry */
-static int ov772x_set_crop(struct soc_camera_device *icd,
-			   struct v4l2_rect *rect)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = to_ov772x(client);
-
-	if (!priv->fmt || !priv->win)
-		return -EINVAL;
-
-	rect->left = 0;
-	rect->top = 0;
-	rect->width = priv->win->width;
-	rect->height = priv->win->height;
-
-	return 0;
-}
-
 static int ov772x_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct i2c_client *client = sd->priv;
@@ -1060,7 +1042,6 @@ static int ov772x_video_probe(struct soc_camera_device *icd,
 }
 
 static struct soc_camera_ops ov772x_ops = {
-	.set_crop		= ov772x_set_crop,
 	.set_bus_param		= ov772x_set_bus_param,
 	.query_bus_param	= ov772x_query_bus_param,
 	.controls		= ov772x_controls,
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index 0e4daaad2f4d..c38ce84b944d 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -1281,10 +1281,13 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 }
 
 static int pxa_camera_set_crop(struct soc_camera_device *icd,
-			       struct v4l2_rect *rect)
+			       struct v4l2_crop *a)
 {
+	struct v4l2_rect *rect = &a->c;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct pxa_camera_dev *pcdev = ici->priv;
+	struct device *control = to_soc_camera_control(icd);
+	struct v4l2_subdev *sd = dev_get_drvdata(control);
 	struct soc_camera_sense sense = {
 		.master_clock = pcdev->mclk,
 		.pixel_clock_max = pcdev->ciclk / 4,
@@ -1295,7 +1298,7 @@ static int pxa_camera_set_crop(struct soc_camera_device *icd,
 	if (pcdev->platform_flags & PXA_CAMERA_PCLK_EN)
 		icd->sense = &sense;
 
-	ret = icd->ops->set_crop(icd, rect);
+	ret = v4l2_subdev_call(sd, video, s_crop, a);
 
 	icd->sense = NULL;
 
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 499d1a235fd7..726cf0e4dc23 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -846,12 +846,16 @@ static bool is_inside(struct v4l2_rect *r1, struct v4l2_rect *r2)
  * 3. if (2) failed, try to request the maximum image
  */
 static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
-				  struct v4l2_rect *rect)
+				  struct v4l2_crop *a)
 {
+	struct v4l2_rect *rect = &a->c;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	struct v4l2_rect cam_rect, target, cam_max;
+	struct v4l2_crop cam_crop;
+	struct v4l2_rect *cam_rect = &cam_crop.c, target, cam_max;
 	struct sh_mobile_ceu_cam *cam = icd->host_priv;
+	struct device *control = to_soc_camera_control(icd);
+	struct v4l2_subdev *sd = dev_get_drvdata(control);
 	unsigned int hscale = pcdev->cflcr & 0xffff;
 	unsigned int vscale = (pcdev->cflcr >> 16) & 0xffff;
 	unsigned short width, height;
@@ -859,80 +863,80 @@ static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
 	int ret;
 
 	/* Scale back up into client units */
-	cam_rect.left	= size_src(rect->left, hscale);
-	cam_rect.width	= size_src(rect->width, hscale);
-	cam_rect.top	= size_src(rect->top, vscale);
-	cam_rect.height	= size_src(rect->height, vscale);
+	cam_rect->left	= size_src(rect->left, hscale);
+	cam_rect->width	= size_src(rect->width, hscale);
+	cam_rect->top	= size_src(rect->top, vscale);
+	cam_rect->height	= size_src(rect->height, vscale);
 
-	target = cam_rect;
+	target = *cam_rect;
 
 	capsr = capture_save_reset(pcdev);
 	dev_dbg(&icd->dev, "CAPSR 0x%x, CFLCR 0x%x\n", capsr, pcdev->cflcr);
 
 	/* First attempt - see if the client can deliver a perfect result */
-	ret = icd->ops->set_crop(icd, &cam_rect);
+	ret = v4l2_subdev_call(sd, video, s_crop, &cam_crop);
 	if (!ret && !memcmp(&target, &cam_rect, sizeof(target))) {
 		dev_dbg(&icd->dev, "Camera S_CROP successful for %ux%u@%u:%u\n",
-			cam_rect.width, cam_rect.height,
-			cam_rect.left, cam_rect.top);
+			cam_rect->width, cam_rect->height,
+			cam_rect->left, cam_rect->top);
 		goto ceu_set_rect;
 	}
 
 	/* Try to fix cropping, that camera hasn't managed to do */
 	dev_dbg(&icd->dev, "Fix camera S_CROP %d for %ux%u@%u:%u"
 		" to %ux%u@%u:%u\n",
-		ret, cam_rect.width, cam_rect.height,
-		cam_rect.left, cam_rect.top,
+		ret, cam_rect->width, cam_rect->height,
+		cam_rect->left, cam_rect->top,
 		target.width, target.height, target.left, target.top);
 
 	/*
 	 * Popular special case - some cameras can only handle fixed sizes like
 	 * QVGA, VGA,... Take care to avoid infinite loop.
 	 */
-	width = max(cam_rect.width, 1);
-	height = max(cam_rect.height, 1);
+	width = max(cam_rect->width, 1);
+	height = max(cam_rect->height, 1);
 	cam_max.width = size_src(icd->rect_max.width, hscale);
 	cam_max.left = size_src(icd->rect_max.left, hscale);
 	cam_max.height = size_src(icd->rect_max.height, vscale);
 	cam_max.top = size_src(icd->rect_max.top, vscale);
-	while (!ret && (is_smaller(&cam_rect, &target) ||
-			is_inside(&cam_rect, &target)) &&
+	while (!ret && (is_smaller(cam_rect, &target) ||
+			is_inside(cam_rect, &target)) &&
 	       cam_max.width >= width && cam_max.height >= height) {
 
 		width *= 2;
 		height *= 2;
-		cam_rect.width = width;
-		cam_rect.height = height;
+		cam_rect->width = width;
+		cam_rect->height = height;
 
 		/* We do not know what the camera is capable of, play safe */
-		if (cam_rect.left > target.left)
-			cam_rect.left = cam_max.left;
+		if (cam_rect->left > target.left)
+			cam_rect->left = cam_max.left;
 
-		if (cam_rect.left + cam_rect.width < target.left + target.width)
-			cam_rect.width = target.left + target.width -
-				cam_rect.left;
+		if (cam_rect->left + cam_rect->width < target.left + target.width)
+			cam_rect->width = target.left + target.width -
+				cam_rect->left;
 
-		if (cam_rect.top > target.top)
-			cam_rect.top = cam_max.top;
+		if (cam_rect->top > target.top)
+			cam_rect->top = cam_max.top;
 
-		if (cam_rect.top + cam_rect.height < target.top + target.height)
-			cam_rect.height = target.top + target.height -
-				cam_rect.top;
+		if (cam_rect->top + cam_rect->height < target.top + target.height)
+			cam_rect->height = target.top + target.height -
+				cam_rect->top;
 
-		if (cam_rect.width + cam_rect.left >
+		if (cam_rect->width + cam_rect->left >
 		    cam_max.width + cam_max.left)
-			cam_rect.left = max(cam_max.width + cam_max.left -
-					    cam_rect.width, cam_max.left);
+			cam_rect->left = max(cam_max.width + cam_max.left -
+					     cam_rect->width, cam_max.left);
 
-		if (cam_rect.height + cam_rect.top >
+		if (cam_rect->height + cam_rect->top >
 		    cam_max.height + cam_max.top)
-			cam_rect.top = max(cam_max.height + cam_max.top -
-					   cam_rect.height, cam_max.top);
+			cam_rect->top = max(cam_max.height + cam_max.top -
+					    cam_rect->height, cam_max.top);
 
-		ret = icd->ops->set_crop(icd, &cam_rect);
+		ret = v4l2_subdev_call(sd, video, s_crop, &cam_crop);
 		dev_dbg(&icd->dev, "Camera S_CROP %d for %ux%u@%u:%u\n",
-			ret, cam_rect.width, cam_rect.height,
-			cam_rect.left, cam_rect.top);
+			ret, cam_rect->width, cam_rect->height,
+			cam_rect->left, cam_rect->top);
 	}
 
 	/*
@@ -941,30 +945,30 @@ static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
 	 */
 	if ((ret < 0 && (is_smaller(&icd->rect_current, rect) ||
 			 is_inside(&icd->rect_current, rect))) ||
-	    is_smaller(&cam_rect, &target) || is_inside(&cam_rect, &target)) {
+	    is_smaller(cam_rect, &target) || is_inside(cam_rect, &target)) {
 		/*
 		 * The camera failed to configure a suitable cropping,
 		 * we cannot use the current rectangle, set to max
 		 */
-		cam_rect = cam_max;
-		ret = icd->ops->set_crop(icd, &cam_rect);
+		*cam_rect = cam_max;
+		ret = v4l2_subdev_call(sd, video, s_crop, &cam_crop);
 		dev_dbg(&icd->dev, "Camera S_CROP %d for max %ux%u@%u:%u\n",
-			ret, cam_rect.width, cam_rect.height,
-			cam_rect.left, cam_rect.top);
-		if (ret < 0)
+			ret, cam_rect->width, cam_rect->height,
+			cam_rect->left, cam_rect->top);
+		if (ret < 0 && ret != -ENOIOCTLCMD)
 			/* All failed, hopefully resume current capture */
 			goto resume_capture;
 
 		/* Finally, adjust the target rectangle */
-		if (target.width > cam_rect.width)
-			target.width = cam_rect.width;
-		if (target.height > cam_rect.height)
-			target.height = cam_rect.height;
-		if (target.left + target.width > cam_rect.left + cam_rect.width)
-			target.left = cam_rect.left + cam_rect.width -
+		if (target.width > cam_rect->width)
+			target.width = cam_rect->width;
+		if (target.height > cam_rect->height)
+			target.height = cam_rect->height;
+		if (target.left + target.width > cam_rect->left + cam_rect->width)
+			target.left = cam_rect->left + cam_rect->width -
 				target.width;
-		if (target.top + target.height > cam_rect.top + cam_rect.height)
-			target.top = cam_rect.top + cam_rect.height -
+		if (target.top + target.height > cam_rect->top + cam_rect->height)
+			target.top = cam_rect->top + cam_rect->height -
 				target.height;
 	}
 
@@ -978,14 +982,14 @@ static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
 	 */
 	dev_dbg(&icd->dev,
 		"SH S_CROP from %ux%u@%u:%u to %ux%u@%u:%u, scale to %ux%u@%u:%u\n",
-		cam_rect.width, cam_rect.height, cam_rect.left, cam_rect.top,
+		cam_rect->width, cam_rect->height, cam_rect->left, cam_rect->top,
 		target.width, target.height, target.left, target.top,
 		rect->width, rect->height, rect->left, rect->top);
 
 	ret = 0;
 
 ceu_set_rect:
-	cam->camera_rect = cam_rect;
+	cam->camera_rect = *cam_rect;
 
 	rect->width	= size_dst(target.width, hscale);
 	rect->left	= size_dst(target.left, hscale);
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 21a8aa586da5..d9ccc2866592 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -797,7 +797,7 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 		rect.top = icd->rect_max.height + icd->rect_max.top -
 			rect.height;
 
-	ret = ici->ops->set_crop(icd, &rect);
+	ret = ici->ops->set_crop(icd, a);
 	if (!ret)
 		icd->rect_current = rect;
 
@@ -970,7 +970,7 @@ static int soc_camera_probe(struct device *dev)
 
 		/* FIXME: this is racy, have to use driver-binding notification */
 		control = to_soc_camera_control(icd);
-		if (!control || !control->driver ||
+		if (!control || !control->driver || !dev_get_drvdata(control) ||
 		    !try_module_get(control->driver->owner)) {
 			icl->del_device(icl);
 			goto enodrv;
diff --git a/drivers/media/video/soc_camera_platform.c b/drivers/media/video/soc_camera_platform.c
index 9e406c113aa4..aec2cadbd2ee 100644
--- a/drivers/media/video/soc_camera_platform.c
+++ b/drivers/media/video/soc_camera_platform.c
@@ -25,10 +25,15 @@ struct soc_camera_platform_priv {
 	struct soc_camera_data_format format;
 };
 
-static struct soc_camera_platform_info *
-soc_camera_platform_get_info(struct soc_camera_device *icd)
+static struct soc_camera_platform_priv *get_priv(struct platform_device *pdev)
 {
-	struct platform_device *pdev = to_platform_device(dev_get_drvdata(&icd->dev));
+	struct v4l2_subdev *subdev = platform_get_drvdata(pdev);
+	return container_of(subdev, struct soc_camera_platform_priv, subdev);
+}
+
+static struct soc_camera_platform_info *get_info(struct soc_camera_device *icd)
+{
+	struct platform_device *pdev = to_platform_device(to_soc_camera_control(icd));
 	return pdev->dev.platform_data;
 }
 
@@ -47,16 +52,10 @@ static int soc_camera_platform_set_bus_param(struct soc_camera_device *icd,
 static unsigned long
 soc_camera_platform_query_bus_param(struct soc_camera_device *icd)
 {
-	struct soc_camera_platform_info *p = soc_camera_platform_get_info(icd);
+	struct soc_camera_platform_info *p = get_info(icd);
 	return p->bus_param;
 }
 
-static int soc_camera_platform_set_crop(struct soc_camera_device *icd,
-					struct v4l2_rect *rect)
-{
-	return 0;
-}
-
 static int soc_camera_platform_try_fmt(struct v4l2_subdev *sd,
 				       struct v4l2_format *f)
 {
@@ -71,7 +70,7 @@ static int soc_camera_platform_try_fmt(struct v4l2_subdev *sd,
 static void soc_camera_platform_video_probe(struct soc_camera_device *icd,
 					    struct platform_device *pdev)
 {
-	struct soc_camera_platform_priv *priv = platform_get_drvdata(pdev);
+	struct soc_camera_platform_priv *priv = get_priv(pdev);
 	struct soc_camera_platform_info *p = pdev->dev.platform_data;
 
 	priv->format.name = p->format_name;
@@ -96,7 +95,6 @@ static struct v4l2_subdev_ops platform_subdev_ops = {
 };
 
 static struct soc_camera_ops soc_camera_platform_ops = {
-	.set_crop		= soc_camera_platform_set_crop,
 	.set_bus_param		= soc_camera_platform_set_bus_param,
 	.query_bus_param	= soc_camera_platform_query_bus_param,
 };
@@ -124,7 +122,9 @@ static int soc_camera_platform_probe(struct platform_device *pdev)
 
 	icd = to_soc_camera_dev(p->dev);
 
-	platform_set_drvdata(pdev, priv);
+	/* soc-camera convention: control's drvdata points to the subdev */
+	platform_set_drvdata(pdev, &priv->subdev);
+	/* Set the control device reference */
 	dev_set_drvdata(&icd->dev, &pdev->dev);
 
 	icd->width_min		= 0;
@@ -158,7 +158,7 @@ evdrs:
 
 static int soc_camera_platform_remove(struct platform_device *pdev)
 {
-	struct soc_camera_platform_priv *priv = platform_get_drvdata(pdev);
+	struct soc_camera_platform_priv *priv = get_priv(pdev);
 	struct soc_camera_platform_info *p = pdev->dev.platform_data;
 	struct soc_camera_device *icd = to_soc_camera_dev(p->dev);
 
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index 735d0bd4bb10..b6b15468b40a 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -616,11 +616,12 @@ static int tw9910_s_register(struct v4l2_subdev *sd,
 }
 #endif
 
-static int tw9910_set_crop(struct soc_camera_device *icd,
-			   struct v4l2_rect *rect)
+static int tw9910_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct v4l2_rect *rect = &a->c;
+	struct i2c_client *client = sd->priv;
 	struct tw9910_priv *priv = to_tw9910(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	int                 ret  = -EINVAL;
 	u8                  val;
 
@@ -716,15 +717,15 @@ tw9910_set_fmt_error:
 
 static int tw9910_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct i2c_client *client = sd->priv;
-	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
-	/* See tw9910_set_crop() - no proper cropping support */
-	struct v4l2_rect rect = {
-		.left	= 0,
-		.top	= 0,
-		.width	= pix->width,
-		.height	= pix->height,
+	/* See tw9910_s_crop() - no proper cropping support */
+	struct v4l2_crop a = {
+		.c = {
+			.left	= 0,
+			.top	= 0,
+			.width	= pix->width,
+			.height	= pix->height,
+		},
 	};
 	int i, ret;
 
@@ -738,10 +739,10 @@ static int tw9910_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	if (i == ARRAY_SIZE(tw9910_color_fmt))
 		return -EINVAL;
 
-	ret = tw9910_set_crop(icd, &rect);
+	ret = tw9910_s_crop(sd, &a);
 	if (!ret) {
-		pix->width = rect.width;
-		pix->height = rect.height;
+		pix->width = a.c.width;
+		pix->height = a.c.height;
 	}
 	return ret;
 }
@@ -821,7 +822,6 @@ static int tw9910_video_probe(struct soc_camera_device *icd,
 }
 
 static struct soc_camera_ops tw9910_ops = {
-	.set_crop		= tw9910_set_crop,
 	.set_bus_param		= tw9910_set_bus_param,
 	.query_bus_param	= tw9910_query_bus_param,
 	.enum_input		= tw9910_enum_input,
@@ -840,6 +840,7 @@ static struct v4l2_subdev_video_ops tw9910_subdev_video_ops = {
 	.s_stream	= tw9910_s_stream,
 	.s_fmt		= tw9910_s_fmt,
 	.try_fmt	= tw9910_try_fmt,
+	.s_crop		= tw9910_s_crop,
 };
 
 static struct v4l2_subdev_ops tw9910_subdev_ops = {
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 7c44d4016561..0bad8f1d7e8d 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -76,7 +76,7 @@ struct soc_camera_host_ops {
 	int (*get_formats)(struct soc_camera_device *, int,
 			   struct soc_camera_format_xlate *);
 	void (*put_formats)(struct soc_camera_device *);
-	int (*set_crop)(struct soc_camera_device *, struct v4l2_rect *);
+	int (*set_crop)(struct soc_camera_device *, struct v4l2_crop *);
 	int (*set_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	int (*try_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	void (*init_videobuf)(struct videobuf_queue *,
@@ -185,7 +185,6 @@ struct soc_camera_ops {
 	int (*resume)(struct soc_camera_device *);
 	int (*init)(struct soc_camera_device *);
 	int (*release)(struct soc_camera_device *);
-	int (*set_crop)(struct soc_camera_device *, struct v4l2_rect *);
 	unsigned long (*query_bus_param)(struct soc_camera_device *);
 	int (*set_bus_param)(struct soc_camera_device *, unsigned long);
 	int (*get_chip_id)(struct soc_camera_device *,
-- 
cgit v1.2.3


From c9c1f1c0dbe90b82939917fdc3e4c9ccad42342d Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:46:59 -0300
Subject: V4L/DVB (12530): soc-camera: switch to using v4l2_subdev_call()

Use v4l2_subdev_call() instead of v4l2_device_call_until_err() in all host
drivers and in soc-camera core.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mx1_camera.c           | 12 +++++-------
 drivers/media/video/mx3_camera.c           | 10 +++++-----
 drivers/media/video/pxa_camera.c           |  9 +++++----
 drivers/media/video/sh_mobile_ceu_camera.c | 17 +++++++----------
 drivers/media/video/soc_camera.c           | 30 ++++++++++++++++--------------
 include/media/soc_camera.h                 | 14 ++++++++++----
 6 files changed, 48 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index add496fca4d3..ed7856bdad48 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -465,9 +465,7 @@ static void mx1_camera_remove_device(struct soc_camera_device *icd)
 static int mx1_camera_set_crop(struct soc_camera_device *icd,
 			       struct v4l2_crop *a)
 {
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
-	struct device *control = to_soc_camera_control(icd);
-	struct v4l2_subdev *sd = dev_get_drvdata(control);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
 	return v4l2_subdev_call(sd, video, s_crop, a);
 }
@@ -539,7 +537,7 @@ static int mx1_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 static int mx1_camera_set_fmt(struct soc_camera_device *icd,
 			      struct v4l2_format *f)
 {
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	int ret;
@@ -550,7 +548,7 @@ static int mx1_camera_set_fmt(struct soc_camera_device *icd,
 		return -EINVAL;
 	}
 
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, s_fmt, f);
+	ret = v4l2_subdev_call(sd, video, s_fmt, f);
 	if (!ret) {
 		icd->buswidth = xlate->buswidth;
 		icd->current_fmt = xlate->host_fmt;
@@ -562,11 +560,11 @@ static int mx1_camera_set_fmt(struct soc_camera_device *icd,
 static int mx1_camera_try_fmt(struct soc_camera_device *icd,
 			      struct v4l2_format *f)
 {
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	/* TODO: limit to mx1 hardware capabilities */
 
 	/* limit to sensor capabilities */
-	return v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, try_fmt, f);
+	return v4l2_subdev_call(sd, video, try_fmt, f);
 }
 
 static int mx1_camera_reqbufs(struct soc_camera_file *icf,
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index de7ebfbf0397..f7888f30da51 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -786,8 +786,7 @@ static int mx3_camera_set_crop(struct soc_camera_device *icd,
 	struct v4l2_rect *rect = &a->c;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx3_camera_dev *mx3_cam = ici->priv;
-	struct device *control = to_soc_camera_control(icd);
-	struct v4l2_subdev *sd = dev_get_drvdata(control);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
 	/*
 	 * We now know pixel formats and can decide upon DMA-channel(s)
@@ -809,6 +808,7 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx3_camera_dev *mx3_cam = ici->priv;
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
@@ -837,7 +837,7 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 
 	configure_geometry(mx3_cam, &rect);
 
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, s_fmt, f);
+	ret = v4l2_subdev_call(sd, video, s_fmt, f);
 	if (!ret) {
 		icd->buswidth = xlate->buswidth;
 		icd->current_fmt = xlate->host_fmt;
@@ -849,7 +849,7 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 static int mx3_camera_try_fmt(struct soc_camera_device *icd,
 			      struct v4l2_format *f)
 {
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	__u32 pixfmt = pix->pixelformat;
@@ -875,7 +875,7 @@ static int mx3_camera_try_fmt(struct soc_camera_device *icd,
 	/* camera has to see its format, but the user the original one */
 	pix->pixelformat = xlate->cam_fmt->fourcc;
 	/* limit to sensor capabilities */
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, try_fmt, f);
+	ret = v4l2_subdev_call(sd, video, try_fmt, f);
 	pix->pixelformat = xlate->host_fmt->fourcc;
 
 	field = pix->field;
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index c38ce84b944d..4bc2a4f81f79 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -1286,8 +1286,7 @@ static int pxa_camera_set_crop(struct soc_camera_device *icd,
 	struct v4l2_rect *rect = &a->c;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct pxa_camera_dev *pcdev = ici->priv;
-	struct device *control = to_soc_camera_control(icd);
-	struct v4l2_subdev *sd = dev_get_drvdata(control);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	struct soc_camera_sense sense = {
 		.master_clock = pcdev->mclk,
 		.pixel_clock_max = pcdev->ciclk / 4,
@@ -1323,6 +1322,7 @@ static int pxa_camera_set_fmt(struct soc_camera_device *icd,
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct pxa_camera_dev *pcdev = ici->priv;
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	const struct soc_camera_data_format *cam_fmt = NULL;
 	const struct soc_camera_format_xlate *xlate = NULL;
 	struct soc_camera_sense sense = {
@@ -1346,7 +1346,7 @@ static int pxa_camera_set_fmt(struct soc_camera_device *icd,
 		icd->sense = &sense;
 
 	cam_f.fmt.pix.pixelformat = cam_fmt->fourcc;
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, s_fmt, f);
+	ret = v4l2_subdev_call(sd, video, s_fmt, f);
 
 	icd->sense = NULL;
 
@@ -1375,6 +1375,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 			      struct v4l2_format *f)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	__u32 pixfmt = pix->pixelformat;
@@ -1404,7 +1405,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 	/* camera has to see its format, but the user the original one */
 	pix->pixelformat = xlate->cam_fmt->fourcc;
 	/* limit to sensor capabilities */
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, try_fmt, f);
+	ret = v4l2_subdev_call(sd, video, try_fmt, f);
 	pix->pixelformat = xlate->host_fmt->fourcc;
 
 	field = pix->field;
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 726cf0e4dc23..28c3affe8828 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -854,8 +854,7 @@ static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
 	struct v4l2_crop cam_crop;
 	struct v4l2_rect *cam_rect = &cam_crop.c, target, cam_max;
 	struct sh_mobile_ceu_cam *cam = icd->host_priv;
-	struct device *control = to_soc_camera_control(icd);
-	struct v4l2_subdev *sd = dev_get_drvdata(control);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	unsigned int hscale = pcdev->cflcr & 0xffff;
 	unsigned int vscale = (pcdev->cflcr >> 16) & 0xffff;
 	unsigned short width, height;
@@ -1016,6 +1015,7 @@ static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
 	struct sh_mobile_ceu_cam *cam = icd->host_priv;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	__u32 pixfmt = pix->pixelformat;
 	const struct soc_camera_format_xlate *xlate;
 	unsigned int width = pix->width, height = pix->height, tmp_w, tmp_h;
@@ -1042,7 +1042,7 @@ static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
 	}
 
 	pix->pixelformat = xlate->cam_fmt->fourcc;
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, s_fmt, f);
+	ret = v4l2_subdev_call(sd, video, s_fmt, f);
 	pix->pixelformat = pixfmt;
 	dev_dbg(&icd->dev, "Camera %d fmt %ux%u, requested %ux%u, max %ux%u\n",
 		ret, pix->width, pix->height, width, height,
@@ -1082,8 +1082,7 @@ static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
 		pix->width = tmp_w;
 		pix->height = tmp_h;
 		pix->pixelformat = xlate->cam_fmt->fourcc;
-		ret = v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd,
-						 video, s_fmt, f);
+		ret = v4l2_subdev_call(sd, video, s_fmt, f);
 		pix->pixelformat = pixfmt;
 		dev_dbg(&icd->dev, "Camera scaled to %ux%u\n",
 			pix->width, pix->height);
@@ -1140,6 +1139,7 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	__u32 pixfmt = pix->pixelformat;
 	int width, height;
 	int ret;
@@ -1165,8 +1165,7 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 	pix->pixelformat = xlate->cam_fmt->fourcc;
 
 	/* limit to sensor capabilities */
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video,
-					 try_fmt, f);
+	ret = v4l2_subdev_call(sd, video, try_fmt, f);
 	pix->pixelformat = pixfmt;
 	if (ret < 0)
 		return ret;
@@ -1182,9 +1181,7 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 			int tmp_w = pix->width, tmp_h = pix->height;
 			pix->width = 2560;
 			pix->height = 1920;
-			ret = v4l2_device_call_until_err(&ici->v4l2_dev,
-							 (__u32)icd, video,
-							 try_fmt, f);
+			ret = v4l2_subdev_call(sd, video, try_fmt, f);
 			if (ret < 0) {
 				/* Shouldn't actually happen... */
 				dev_err(&icd->dev,
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index d9ccc2866592..dd023bdb189e 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -152,9 +152,9 @@ static int soc_camera_s_std(struct file *file, void *priv, v4l2_std_id *a)
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
-	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_std, *a);
+	return v4l2_subdev_call(sd, core, s_std, *a);
 }
 
 static int soc_camera_reqbufs(struct file *file, void *priv,
@@ -589,7 +589,7 @@ static int soc_camera_streamon(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	int ret;
 
 	WARN_ON(priv != file->private_data);
@@ -599,7 +599,7 @@ static int soc_camera_streamon(struct file *file, void *priv,
 
 	mutex_lock(&icd->video_lock);
 
-	v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, s_stream, 1);
+	v4l2_subdev_call(sd, video, s_stream, 1);
 
 	/* This calls buf_queue from host driver's videobuf_queue_ops */
 	ret = videobuf_streamon(&icf->vb_vidq);
@@ -614,7 +614,7 @@ static int soc_camera_streamoff(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
 	WARN_ON(priv != file->private_data);
 
@@ -627,7 +627,7 @@ static int soc_camera_streamoff(struct file *file, void *priv,
 	 * remaining buffers. When the last buffer is freed, stop capture */
 	videobuf_streamoff(&icf->vb_vidq);
 
-	v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, s_stream, 0);
+	v4l2_subdev_call(sd, video, s_stream, 0);
 
 	mutex_unlock(&icd->video_lock);
 
@@ -672,6 +672,7 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	int ret;
 
 	WARN_ON(priv != file->private_data);
@@ -695,7 +696,7 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 			return ret;
 	}
 
-	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_ctrl, ctrl);
+	return v4l2_subdev_call(sd, core, g_ctrl, ctrl);
 }
 
 static int soc_camera_s_ctrl(struct file *file, void *priv,
@@ -704,6 +705,7 @@ static int soc_camera_s_ctrl(struct file *file, void *priv,
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	int ret;
 
 	WARN_ON(priv != file->private_data);
@@ -714,7 +716,7 @@ static int soc_camera_s_ctrl(struct file *file, void *priv,
 			return ret;
 	}
 
-	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_ctrl, ctrl);
+	return v4l2_subdev_call(sd, core, s_ctrl, ctrl);
 }
 
 static int soc_camera_cropcap(struct file *file, void *fh,
@@ -812,9 +814,9 @@ static int soc_camera_g_chip_ident(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
-	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_chip_ident, id);
+	return v4l2_subdev_call(sd, core, g_chip_ident, id);
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
@@ -823,9 +825,9 @@ static int soc_camera_g_register(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
-	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_register, reg);
+	return v4l2_subdev_call(sd, core, g_register, reg);
 }
 
 static int soc_camera_s_register(struct file *file, void *fh,
@@ -833,9 +835,9 @@ static int soc_camera_s_register(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
-	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_register, reg);
+	return v4l2_subdev_call(sd, core, s_register, reg);
 }
 #endif
 
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 0bad8f1d7e8d..344d89904774 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -126,28 +126,34 @@ struct soc_camera_link {
 	void (*free_bus)(struct soc_camera_link *);
 };
 
-static inline struct soc_camera_device *to_soc_camera_dev(struct device *dev)
+static inline struct soc_camera_device *to_soc_camera_dev(const struct device *dev)
 {
 	return container_of(dev, struct soc_camera_device, dev);
 }
 
-static inline struct soc_camera_host *to_soc_camera_host(struct device *dev)
+static inline struct soc_camera_host *to_soc_camera_host(const struct device *dev)
 {
 	struct v4l2_device *v4l2_dev = dev_get_drvdata(dev);
 
 	return container_of(v4l2_dev, struct soc_camera_host, v4l2_dev);
 }
 
-static inline struct soc_camera_link *to_soc_camera_link(struct soc_camera_device *icd)
+static inline struct soc_camera_link *to_soc_camera_link(const struct soc_camera_device *icd)
 {
 	return icd->dev.platform_data;
 }
 
-static inline struct device *to_soc_camera_control(struct soc_camera_device *icd)
+static inline struct device *to_soc_camera_control(const struct soc_camera_device *icd)
 {
 	return dev_get_drvdata(&icd->dev);
 }
 
+static inline struct v4l2_subdev *soc_camera_to_subdev(const struct soc_camera_device *icd)
+{
+	struct device *control = to_soc_camera_control(icd);
+	return dev_get_drvdata(control);
+}
+
 int soc_camera_host_register(struct soc_camera_host *ici);
 void soc_camera_host_unregister(struct soc_camera_host *ici);
 
-- 
cgit v1.2.3


From 6a6c8786725c0b3d143674effa8b772f47b1c189 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:50:46 -0300
Subject: V4L/DVB (12534): soc-camera: V4L2 API compliant scaling (S_FMT) and
 cropping (S_CROP)

The initial soc-camera scaling and cropping implementation turned out to be
incompliant with the V4L2 API, e.g., it expected the user to specify cropping
in output window pixels, instead of input window pixels. This patch converts
the soc-camera core and all drivers to comply with the standard.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/soc-camera.txt   |  40 ++
 drivers/media/video/mt9m001.c              | 142 ++++-
 drivers/media/video/mt9m111.c              | 112 +++-
 drivers/media/video/mt9t031.c              | 220 ++++----
 drivers/media/video/mt9v022.c              | 131 ++++-
 drivers/media/video/mx1_camera.c           |  10 +-
 drivers/media/video/mx3_camera.c           | 114 ++--
 drivers/media/video/ov772x.c               |  84 ++-
 drivers/media/video/pxa_camera.c           | 201 +++++--
 drivers/media/video/sh_mobile_ceu_camera.c | 828 ++++++++++++++++++++---------
 drivers/media/video/soc_camera.c           | 130 +++--
 drivers/media/video/soc_camera_platform.c  |   4 -
 drivers/media/video/tw9910.c               | 120 +++--
 include/media/soc_camera.h                 |  21 +-
 14 files changed, 1524 insertions(+), 633 deletions(-)

(limited to 'include')

diff --git a/Documentation/video4linux/soc-camera.txt b/Documentation/video4linux/soc-camera.txt
index 178ef3c5e579..3f87c7da4ca2 100644
--- a/Documentation/video4linux/soc-camera.txt
+++ b/Documentation/video4linux/soc-camera.txt
@@ -116,5 +116,45 @@ functionality.
 struct soc_camera_device also links to an array of struct soc_camera_data_format,
 listing pixel formats, supported by the camera.
 
+VIDIOC_S_CROP and VIDIOC_S_FMT behaviour
+----------------------------------------
+
+Above user ioctls modify image geometry as follows:
+
+VIDIOC_S_CROP: sets location and sizes of the sensor window. Unit is one sensor
+pixel. Changing sensor window sizes preserves any scaling factors, therefore
+user window sizes change as well.
+
+VIDIOC_S_FMT: sets user window. Should preserve previously set sensor window as
+much as possible by modifying scaling factors. If the sensor window cannot be
+preserved precisely, it may be changed too.
+
+In soc-camera there are two locations, where scaling and cropping can taks
+place: in the camera driver and in the host driver. User ioctls are first passed
+to the host driver, which then generally passes them down to the camera driver.
+It is more efficient to perform scaling and cropping in the camera driver to
+save camera bus bandwidth and maximise the framerate. However, if the camera
+driver failed to set the required parameters with sufficient precision, the host
+driver may decide to also use its own scaling and cropping to fulfill the user's
+request.
+
+Camera drivers are interfaced to the soc-camera core and to host drivers over
+the v4l2-subdev API, which is completely functional, it doesn't pass any data.
+Therefore all camera drivers shall reply to .g_fmt() requests with their current
+output geometry. This is necessary to correctly configure the camera bus.
+.s_fmt() and .try_fmt() have to be implemented too. Sensor window and scaling
+factors have to be maintained by camera drivers internally. According to the
+V4L2 API all capture drivers must support the VIDIOC_CROPCAP ioctl, hence we
+rely on camera drivers implementing .cropcap(). If the camera driver does not
+support cropping, it may choose to not implement .s_crop(), but to enable
+cropping support by the camera host driver at least the .g_crop method must be
+implemented.
+
+User window geometry is kept in .user_width and .user_height fields in struct
+soc_camera_device and used by the soc-camera core and host drivers. The core
+updates these fields upon successful completion of a .s_fmt() call, but if these
+fields change elsewhere, e.g., during .s_crop() processing, the host driver is
+responsible for updating them.
+
 --
 Author: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 775e1a3c98d3..e8cf56189ef1 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -39,6 +39,13 @@
 #define MT9M001_GLOBAL_GAIN		0x35
 #define MT9M001_CHIP_ENABLE		0xF1
 
+#define MT9M001_MAX_WIDTH		1280
+#define MT9M001_MAX_HEIGHT		1024
+#define MT9M001_MIN_WIDTH		48
+#define MT9M001_MIN_HEIGHT		32
+#define MT9M001_COLUMN_SKIP		20
+#define MT9M001_ROW_SKIP		12
+
 static const struct soc_camera_data_format mt9m001_colour_formats[] = {
 	/* Order important: first natively supported,
 	 * second supported with a GPIO extender */
@@ -70,6 +77,8 @@ static const struct soc_camera_data_format mt9m001_monochrome_formats[] = {
 
 struct mt9m001 {
 	struct v4l2_subdev subdev;
+	struct v4l2_rect rect;	/* Sensor window */
+	__u32 fourcc;
 	int model;	/* V4L2_IDENT_MT9M001* codes from v4l2-chip-ident.h */
 	unsigned char autoexposure;
 };
@@ -196,13 +205,31 @@ static unsigned long mt9m001_query_bus_param(struct soc_camera_device *icd)
 
 static int mt9m001_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct v4l2_rect *rect = &a->c;
 	struct i2c_client *client = sd->priv;
 	struct mt9m001 *mt9m001 = to_mt9m001(client);
+	struct v4l2_rect rect = a->c;
 	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 	const u16 hblank = 9, vblank = 25;
 
+	if (mt9m001->fourcc == V4L2_PIX_FMT_SBGGR8 ||
+	    mt9m001->fourcc == V4L2_PIX_FMT_SBGGR16)
+		/*
+		 * Bayer format - even number of rows for simplicity,
+		 * but let the user play with the top row.
+		 */
+		rect.height = ALIGN(rect.height, 2);
+
+	/* Datasheet requirement: see register description */
+	rect.width = ALIGN(rect.width, 2);
+	rect.left = ALIGN(rect.left, 2);
+
+	soc_camera_limit_side(&rect.left, &rect.width,
+		     MT9M001_COLUMN_SKIP, MT9M001_MIN_WIDTH, MT9M001_MAX_WIDTH);
+
+	soc_camera_limit_side(&rect.top, &rect.height,
+		     MT9M001_ROW_SKIP, MT9M001_MIN_HEIGHT, MT9M001_MAX_HEIGHT);
+
 	/* Blanking and start values - default... */
 	ret = reg_write(client, MT9M001_HORIZONTAL_BLANKING, hblank);
 	if (!ret)
@@ -211,46 +238,98 @@ static int mt9m001_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 	/* The caller provides a supported format, as verified per
 	 * call to icd->try_fmt() */
 	if (!ret)
-		ret = reg_write(client, MT9M001_COLUMN_START, rect->left);
+		ret = reg_write(client, MT9M001_COLUMN_START, rect.left);
 	if (!ret)
-		ret = reg_write(client, MT9M001_ROW_START, rect->top);
+		ret = reg_write(client, MT9M001_ROW_START, rect.top);
 	if (!ret)
-		ret = reg_write(client, MT9M001_WINDOW_WIDTH, rect->width - 1);
+		ret = reg_write(client, MT9M001_WINDOW_WIDTH, rect.width - 1);
 	if (!ret)
 		ret = reg_write(client, MT9M001_WINDOW_HEIGHT,
-				rect->height + icd->y_skip_top - 1);
+				rect.height + icd->y_skip_top - 1);
 	if (!ret && mt9m001->autoexposure) {
 		ret = reg_write(client, MT9M001_SHUTTER_WIDTH,
-				rect->height + icd->y_skip_top + vblank);
+				rect.height + icd->y_skip_top + vblank);
 		if (!ret) {
 			const struct v4l2_queryctrl *qctrl =
 				soc_camera_find_qctrl(icd->ops,
 						      V4L2_CID_EXPOSURE);
-			icd->exposure = (524 + (rect->height + icd->y_skip_top +
+			icd->exposure = (524 + (rect.height + icd->y_skip_top +
 						vblank - 1) *
 					 (qctrl->maximum - qctrl->minimum)) /
 				1048 + qctrl->minimum;
 		}
 	}
 
+	if (!ret)
+		mt9m001->rect = rect;
+
 	return ret;
 }
 
+static int mt9m001_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
+
+	a->c	= mt9m001->rect;
+	a->type	= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	return 0;
+}
+
+static int mt9m001_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a)
+{
+	a->bounds.left			= MT9M001_COLUMN_SKIP;
+	a->bounds.top			= MT9M001_ROW_SKIP;
+	a->bounds.width			= MT9M001_MAX_WIDTH;
+	a->bounds.height		= MT9M001_MAX_HEIGHT;
+	a->defrect			= a->bounds;
+	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->pixelaspect.numerator	= 1;
+	a->pixelaspect.denominator	= 1;
+
+	return 0;
+}
+
+static int mt9m001_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+
+	pix->width		= mt9m001->rect.width;
+	pix->height		= mt9m001->rect.height;
+	pix->pixelformat	= mt9m001->fourcc;
+	pix->field		= V4L2_FIELD_NONE;
+	pix->colorspace		= V4L2_COLORSPACE_SRGB;
+
+	return 0;
+}
+
 static int mt9m001_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct i2c_client *client = sd->priv;
-	struct soc_camera_device *icd = client->dev.platform_data;
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_crop a = {
 		.c = {
-			.left	= icd->rect_current.left,
-			.top	= icd->rect_current.top,
-			.width	= f->fmt.pix.width,
-			.height	= f->fmt.pix.height,
+			.left	= mt9m001->rect.left,
+			.top	= mt9m001->rect.top,
+			.width	= pix->width,
+			.height	= pix->height,
 		},
 	};
+	int ret;
 
 	/* No support for scaling so far, just crop. TODO: use skipping */
-	return mt9m001_s_crop(sd, &a);
+	ret = mt9m001_s_crop(sd, &a);
+	if (!ret) {
+		pix->width = mt9m001->rect.width;
+		pix->height = mt9m001->rect.height;
+		mt9m001->fourcc = pix->pixelformat;
+	}
+
+	return ret;
 }
 
 static int mt9m001_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
@@ -259,9 +338,14 @@ static int mt9m001_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
-	v4l_bound_align_image(&pix->width, 48, 1280, 1,
-			      &pix->height, 32 + icd->y_skip_top,
-			      1024 + icd->y_skip_top, 0, 0);
+	v4l_bound_align_image(&pix->width, MT9M001_MIN_WIDTH,
+		MT9M001_MAX_WIDTH, 1,
+		&pix->height, MT9M001_MIN_HEIGHT + icd->y_skip_top,
+		MT9M001_MAX_HEIGHT + icd->y_skip_top, 0, 0);
+
+	if (pix->pixelformat == V4L2_PIX_FMT_SBGGR8 ||
+	    pix->pixelformat == V4L2_PIX_FMT_SBGGR16)
+		pix->height = ALIGN(pix->height - 1, 2);
 
 	return 0;
 }
@@ -472,11 +556,11 @@ static int mt9m001_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		if (ctrl->value) {
 			const u16 vblank = 25;
 			if (reg_write(client, MT9M001_SHUTTER_WIDTH,
-				      icd->rect_current.height +
+				      mt9m001->rect.height +
 				      icd->y_skip_top + vblank) < 0)
 				return -EIO;
 			qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-			icd->exposure = (524 + (icd->rect_current.height +
+			icd->exposure = (524 + (mt9m001->rect.height +
 						icd->y_skip_top + vblank - 1) *
 					 (qctrl->maximum - qctrl->minimum)) /
 				1048 + qctrl->minimum;
@@ -548,6 +632,8 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 	if (flags & SOCAM_DATAWIDTH_8)
 		icd->num_formats++;
 
+	mt9m001->fourcc = icd->formats->fourcc;
+
 	dev_info(&client->dev, "Detected a MT9M001 chip ID %x (%s)\n", data,
 		 data == 0x8431 ? "C12STM" : "C12ST");
 
@@ -556,10 +642,9 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 
 static void mt9m001_video_remove(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-	dev_dbg(&client->dev, "Video %x removed: %p, %p\n", client->addr,
+	dev_dbg(&icd->dev, "Video removed: %p, %p\n",
 		icd->dev.parent, icd->vdev);
 	if (icl->free_bus)
 		icl->free_bus(icl);
@@ -578,8 +663,11 @@ static struct v4l2_subdev_core_ops mt9m001_subdev_core_ops = {
 static struct v4l2_subdev_video_ops mt9m001_subdev_video_ops = {
 	.s_stream	= mt9m001_s_stream,
 	.s_fmt		= mt9m001_s_fmt,
+	.g_fmt		= mt9m001_g_fmt,
 	.try_fmt	= mt9m001_try_fmt,
 	.s_crop		= mt9m001_s_crop,
+	.g_crop		= mt9m001_g_crop,
+	.cropcap	= mt9m001_cropcap,
 };
 
 static struct v4l2_subdev_ops mt9m001_subdev_ops = {
@@ -621,15 +709,13 @@ static int mt9m001_probe(struct i2c_client *client,
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops		= &mt9m001_ops;
-	icd->rect_max.left	= 20;
-	icd->rect_max.top	= 12;
-	icd->rect_max.width	= 1280;
-	icd->rect_max.height	= 1024;
-	icd->rect_current.left	= 20;
-	icd->rect_current.top	= 12;
-	icd->width_min		= 48;
-	icd->height_min		= 32;
 	icd->y_skip_top		= 1;
+
+	mt9m001->rect.left	= MT9M001_COLUMN_SKIP;
+	mt9m001->rect.top	= MT9M001_ROW_SKIP;
+	mt9m001->rect.width	= MT9M001_MAX_WIDTH;
+	mt9m001->rect.height	= MT9M001_MAX_HEIGHT;
+
 	/* Simulated autoexposure. If enabled, we calculate shutter width
 	 * ourselves in the driver based on vertical blanking and frame width */
 	mt9m001->autoexposure = 1;
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index 3637376da755..920dd53c4cfa 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -194,7 +194,7 @@ static int mt9m111_reg_read(struct i2c_client *client, const u16 reg)
 
 	ret = reg_page_map_set(client, reg);
 	if (!ret)
-		ret = swab16(i2c_smbus_read_word_data(client, (reg & 0xff)));
+		ret = swab16(i2c_smbus_read_word_data(client, reg & 0xff));
 
 	dev_dbg(&client->dev, "read  reg.%03x -> %04x\n", reg, ret);
 	return ret;
@@ -257,8 +257,8 @@ static int mt9m111_setup_rect(struct i2c_client *client,
 	int width = rect->width;
 	int height = rect->height;
 
-	if ((mt9m111->pixfmt == V4L2_PIX_FMT_SBGGR8)
-	    || (mt9m111->pixfmt == V4L2_PIX_FMT_SBGGR16))
+	if (mt9m111->pixfmt == V4L2_PIX_FMT_SBGGR8 ||
+	    mt9m111->pixfmt == V4L2_PIX_FMT_SBGGR16)
 		is_raw_format = 1;
 	else
 		is_raw_format = 0;
@@ -395,23 +395,85 @@ static int mt9m111_set_bus_param(struct soc_camera_device *icd, unsigned long f)
 	return 0;
 }
 
+static int mt9m111_make_rect(struct i2c_client *client,
+			     struct v4l2_rect *rect)
+{
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
+
+	if (mt9m111->pixfmt == V4L2_PIX_FMT_SBGGR8 ||
+	    mt9m111->pixfmt == V4L2_PIX_FMT_SBGGR16) {
+		/* Bayer format - even size lengths */
+		rect->width	= ALIGN(rect->width, 2);
+		rect->height	= ALIGN(rect->height, 2);
+		/* Let the user play with the starting pixel */
+	}
+
+	/* FIXME: the datasheet doesn't specify minimum sizes */
+	soc_camera_limit_side(&rect->left, &rect->width,
+		     MT9M111_MIN_DARK_COLS, 2, MT9M111_MAX_WIDTH);
+
+	soc_camera_limit_side(&rect->top, &rect->height,
+		     MT9M111_MIN_DARK_ROWS, 2, MT9M111_MAX_HEIGHT);
+
+	return mt9m111_setup_rect(client, rect);
+}
+
 static int mt9m111_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct v4l2_rect *rect = &a->c;
+	struct v4l2_rect rect = a->c;
 	struct i2c_client *client = sd->priv;
 	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	dev_dbg(&client->dev, "%s left=%d, top=%d, width=%d, height=%d\n",
-		__func__, rect->left, rect->top, rect->width,
-		rect->height);
+		__func__, rect.left, rect.top, rect.width, rect.height);
 
-	ret = mt9m111_setup_rect(client, rect);
+	ret = mt9m111_make_rect(client, &rect);
 	if (!ret)
-		mt9m111->rect = *rect;
+		mt9m111->rect = rect;
 	return ret;
 }
 
+static int mt9m111_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
+
+	a->c	= mt9m111->rect;
+	a->type	= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	return 0;
+}
+
+static int mt9m111_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a)
+{
+	a->bounds.left			= MT9M111_MIN_DARK_COLS;
+	a->bounds.top			= MT9M111_MIN_DARK_ROWS;
+	a->bounds.width			= MT9M111_MAX_WIDTH;
+	a->bounds.height		= MT9M111_MAX_HEIGHT;
+	a->defrect			= a->bounds;
+	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->pixelaspect.numerator	= 1;
+	a->pixelaspect.denominator	= 1;
+
+	return 0;
+}
+
+static int mt9m111_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+
+	pix->width		= mt9m111->rect.width;
+	pix->height		= mt9m111->rect.height;
+	pix->pixelformat	= mt9m111->pixfmt;
+	pix->field		= V4L2_FIELD_NONE;
+	pix->colorspace		= V4L2_COLORSPACE_SRGB;
+
+	return 0;
+}
+
 static int mt9m111_set_pixfmt(struct i2c_client *client, u32 pixfmt)
 {
 	struct mt9m111 *mt9m111 = to_mt9m111(client);
@@ -478,7 +540,7 @@ static int mt9m111_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 		__func__, pix->pixelformat, rect.left, rect.top, rect.width,
 		rect.height);
 
-	ret = mt9m111_setup_rect(client, &rect);
+	ret = mt9m111_make_rect(client, &rect);
 	if (!ret)
 		ret = mt9m111_set_pixfmt(client, pix->pixelformat);
 	if (!ret)
@@ -489,11 +551,27 @@ static int mt9m111_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 static int mt9m111_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct v4l2_pix_format *pix = &f->fmt.pix;
+	bool bayer = pix->pixelformat == V4L2_PIX_FMT_SBGGR8 ||
+		pix->pixelformat == V4L2_PIX_FMT_SBGGR16;
+
+	/*
+	 * With Bayer format enforce even side lengths, but let the user play
+	 * with the starting pixel
+	 */
 
 	if (pix->height > MT9M111_MAX_HEIGHT)
 		pix->height = MT9M111_MAX_HEIGHT;
+	else if (pix->height < 2)
+		pix->height = 2;
+	else if (bayer)
+		pix->height = ALIGN(pix->height, 2);
+
 	if (pix->width > MT9M111_MAX_WIDTH)
 		pix->width = MT9M111_MAX_WIDTH;
+	else if (pix->width < 2)
+		pix->width = 2;
+	else if (bayer)
+		pix->width = ALIGN(pix->width, 2);
 
 	return 0;
 }
@@ -906,8 +984,11 @@ static struct v4l2_subdev_core_ops mt9m111_subdev_core_ops = {
 
 static struct v4l2_subdev_video_ops mt9m111_subdev_video_ops = {
 	.s_fmt		= mt9m111_s_fmt,
+	.g_fmt		= mt9m111_g_fmt,
 	.try_fmt	= mt9m111_try_fmt,
 	.s_crop		= mt9m111_s_crop,
+	.g_crop		= mt9m111_g_crop,
+	.cropcap	= mt9m111_cropcap,
 };
 
 static struct v4l2_subdev_ops mt9m111_subdev_ops = {
@@ -949,16 +1030,13 @@ static int mt9m111_probe(struct i2c_client *client,
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops		= &mt9m111_ops;
-	icd->rect_max.left	= MT9M111_MIN_DARK_COLS;
-	icd->rect_max.top	= MT9M111_MIN_DARK_ROWS;
-	icd->rect_max.width	= MT9M111_MAX_WIDTH;
-	icd->rect_max.height	= MT9M111_MAX_HEIGHT;
-	icd->rect_current.left	= icd->rect_max.left;
-	icd->rect_current.top	= icd->rect_max.top;
-	icd->width_min		= MT9M111_MIN_DARK_ROWS;
-	icd->height_min		= MT9M111_MIN_DARK_COLS;
 	icd->y_skip_top		= 0;
 
+	mt9m111->rect.left	= MT9M111_MIN_DARK_COLS;
+	mt9m111->rect.top	= MT9M111_MIN_DARK_ROWS;
+	mt9m111->rect.width	= MT9M111_MAX_WIDTH;
+	mt9m111->rect.height	= MT9M111_MAX_HEIGHT;
+
 	ret = mt9m111_video_probe(icd, client);
 	if (ret) {
 		icd->ops = NULL;
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index cd3eb7731ac2..f234ba602049 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -47,7 +47,7 @@
 #define MT9T031_MAX_HEIGHT		1536
 #define MT9T031_MAX_WIDTH		2048
 #define MT9T031_MIN_HEIGHT		2
-#define MT9T031_MIN_WIDTH		2
+#define MT9T031_MIN_WIDTH		18
 #define MT9T031_HORIZONTAL_BLANK	142
 #define MT9T031_VERTICAL_BLANK		25
 #define MT9T031_COLUMN_SKIP		32
@@ -69,10 +69,11 @@ static const struct soc_camera_data_format mt9t031_colour_formats[] = {
 
 struct mt9t031 {
 	struct v4l2_subdev subdev;
+	struct v4l2_rect rect;	/* Sensor window */
 	int model;	/* V4L2_IDENT_MT9T031* codes from v4l2-chip-ident.h */
-	unsigned char autoexposure;
 	u16 xskip;
 	u16 yskip;
+	unsigned char autoexposure;
 };
 
 static struct mt9t031 *to_mt9t031(const struct i2c_client *client)
@@ -218,56 +219,68 @@ static unsigned long mt9t031_query_bus_param(struct soc_camera_device *icd)
 	return soc_camera_apply_sensor_flags(icl, MT9T031_BUS_PARAM);
 }
 
-/* Round up minima and round down maxima */
-static void recalculate_limits(struct soc_camera_device *icd,
-			       u16 xskip, u16 yskip)
+/* target must be _even_ */
+static u16 mt9t031_skip(s32 *source, s32 target, s32 max)
 {
-	icd->rect_max.left = (MT9T031_COLUMN_SKIP + xskip - 1) / xskip;
-	icd->rect_max.top = (MT9T031_ROW_SKIP + yskip - 1) / yskip;
-	icd->width_min = (MT9T031_MIN_WIDTH + xskip - 1) / xskip;
-	icd->height_min = (MT9T031_MIN_HEIGHT + yskip - 1) / yskip;
-	icd->rect_max.width = MT9T031_MAX_WIDTH / xskip;
-	icd->rect_max.height = MT9T031_MAX_HEIGHT / yskip;
+	unsigned int skip;
+
+	if (*source < target + target / 2) {
+		*source = target;
+		return 1;
+	}
+
+	skip = min(max, *source + target / 2) / target;
+	if (skip > 8)
+		skip = 8;
+	*source = target * skip;
+
+	return skip;
 }
 
+/* rect is the sensor rectangle, the caller guarantees parameter validity */
 static int mt9t031_set_params(struct soc_camera_device *icd,
 			      struct v4l2_rect *rect, u16 xskip, u16 yskip)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	int ret;
-	u16 xbin, ybin, width, height, left, top;
+	u16 xbin, ybin;
 	const u16 hblank = MT9T031_HORIZONTAL_BLANK,
 		vblank = MT9T031_VERTICAL_BLANK;
 
-	width = rect->width * xskip;
-	height = rect->height * yskip;
-	left = rect->left * xskip;
-	top = rect->top * yskip;
-
 	xbin = min(xskip, (u16)3);
 	ybin = min(yskip, (u16)3);
 
-	dev_dbg(&client->dev, "xskip %u, width %u/%u, yskip %u, height %u/%u\n",
-		xskip, width, rect->width, yskip, height, rect->height);
-
-	/* Could just do roundup(rect->left, [xy]bin * 2); but this is cheaper */
+	/*
+	 * Could just do roundup(rect->left, [xy]bin * 2); but this is cheaper.
+	 * There is always a valid suitably aligned value. The worst case is
+	 * xbin = 3, width = 2048. Then we will start at 36, the last read out
+	 * pixel will be 2083, which is < 2085 - first black pixel.
+	 *
+	 * MT9T031 datasheet imposes window left border alignment, depending on
+	 * the selected xskip. Failing to conform to this requirement produces
+	 * dark horizontal stripes in the image. However, even obeying to this
+	 * requirement doesn't eliminate the stripes in all configurations. They
+	 * appear "locally reproducibly," but can differ between tests under
+	 * different lighting conditions.
+	 */
 	switch (xbin) {
-	case 2:
-		left = (left + 3) & ~3;
+	case 1:
+		rect->left &= ~1;
 		break;
-	case 3:
-		left = roundup(left, 6);
-	}
-
-	switch (ybin) {
 	case 2:
-		top = (top + 3) & ~3;
+		rect->left &= ~3;
 		break;
 	case 3:
-		top = roundup(top, 6);
+		rect->left = rect->left > roundup(MT9T031_COLUMN_SKIP, 6) ?
+			(rect->left / 6) * 6 : roundup(MT9T031_COLUMN_SKIP, 6);
 	}
 
+	rect->top &= ~1;
+
+	dev_dbg(&client->dev, "skip %u:%u, rect %ux%u@%u:%u\n",
+		xskip, yskip, rect->width, rect->height, rect->left, rect->top);
+
 	/* Disable register update, reconfigure atomically */
 	ret = reg_set(client, MT9T031_OUTPUT_CONTROL, 1);
 	if (ret < 0)
@@ -287,27 +300,29 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 			ret = reg_write(client, MT9T031_ROW_ADDRESS_MODE,
 					((ybin - 1) << 4) | (yskip - 1));
 	}
-	dev_dbg(&client->dev, "new physical left %u, top %u\n", left, top);
+	dev_dbg(&client->dev, "new physical left %u, top %u\n",
+		rect->left, rect->top);
 
 	/* The caller provides a supported format, as guaranteed by
 	 * icd->try_fmt_cap(), soc_camera_s_crop() and soc_camera_cropcap() */
 	if (ret >= 0)
-		ret = reg_write(client, MT9T031_COLUMN_START, left);
+		ret = reg_write(client, MT9T031_COLUMN_START, rect->left);
 	if (ret >= 0)
-		ret = reg_write(client, MT9T031_ROW_START, top);
+		ret = reg_write(client, MT9T031_ROW_START, rect->top);
 	if (ret >= 0)
-		ret = reg_write(client, MT9T031_WINDOW_WIDTH, width - 1);
+		ret = reg_write(client, MT9T031_WINDOW_WIDTH, rect->width - 1);
 	if (ret >= 0)
 		ret = reg_write(client, MT9T031_WINDOW_HEIGHT,
-				height + icd->y_skip_top - 1);
+				rect->height + icd->y_skip_top - 1);
 	if (ret >= 0 && mt9t031->autoexposure) {
-		ret = set_shutter(client, height + icd->y_skip_top + vblank);
+		ret = set_shutter(client,
+				  rect->height + icd->y_skip_top + vblank);
 		if (ret >= 0) {
 			const u32 shutter_max = MT9T031_MAX_HEIGHT + vblank;
 			const struct v4l2_queryctrl *qctrl =
 				soc_camera_find_qctrl(icd->ops,
 						      V4L2_CID_EXPOSURE);
-			icd->exposure = (shutter_max / 2 + (height +
+			icd->exposure = (shutter_max / 2 + (rect->height +
 					 icd->y_skip_top + vblank - 1) *
 					 (qctrl->maximum - qctrl->minimum)) /
 				shutter_max + qctrl->minimum;
@@ -318,27 +333,72 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 	if (ret >= 0)
 		ret = reg_clear(client, MT9T031_OUTPUT_CONTROL, 1);
 
+	if (ret >= 0) {
+		mt9t031->rect = *rect;
+		mt9t031->xskip = xskip;
+		mt9t031->yskip = yskip;
+	}
+
 	return ret < 0 ? ret : 0;
 }
 
 static int mt9t031_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct v4l2_rect *rect = &a->c;
+	struct v4l2_rect rect = a->c;
 	struct i2c_client *client = sd->priv;
 	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
-	/* Make sure we don't exceed sensor limits */
-	if (rect->left + rect->width > icd->rect_max.left + icd->rect_max.width)
-		rect->left = icd->rect_max.width + icd->rect_max.left -
-			rect->width;
+	rect.width = ALIGN(rect.width, 2);
+	rect.height = ALIGN(rect.height, 2);
+
+	soc_camera_limit_side(&rect.left, &rect.width,
+		     MT9T031_COLUMN_SKIP, MT9T031_MIN_WIDTH, MT9T031_MAX_WIDTH);
+
+	soc_camera_limit_side(&rect.top, &rect.height,
+		     MT9T031_ROW_SKIP, MT9T031_MIN_HEIGHT, MT9T031_MAX_HEIGHT);
+
+	return mt9t031_set_params(icd, &rect, mt9t031->xskip, mt9t031->yskip);
+}
+
+static int mt9t031_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
+
+	a->c	= mt9t031->rect;
+	a->type	= V4L2_BUF_TYPE_VIDEO_CAPTURE;
 
-	if (rect->top + rect->height > icd->rect_max.height + icd->rect_max.top)
-		rect->top = icd->rect_max.height + icd->rect_max.top -
-			rect->height;
+	return 0;
+}
+
+static int mt9t031_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a)
+{
+	a->bounds.left			= MT9T031_COLUMN_SKIP;
+	a->bounds.top			= MT9T031_ROW_SKIP;
+	a->bounds.width			= MT9T031_MAX_WIDTH;
+	a->bounds.height		= MT9T031_MAX_HEIGHT;
+	a->defrect			= a->bounds;
+	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->pixelaspect.numerator	= 1;
+	a->pixelaspect.denominator	= 1;
 
-	/* CROP - no change in scaling, or in limits */
-	return mt9t031_set_params(icd, rect, mt9t031->xskip, mt9t031->yskip);
+	return 0;
+}
+
+static int mt9t031_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+
+	pix->width		= mt9t031->rect.width / mt9t031->xskip;
+	pix->height		= mt9t031->rect.height / mt9t031->yskip;
+	pix->pixelformat	= V4L2_PIX_FMT_SGRBG10;
+	pix->field		= V4L2_FIELD_NONE;
+	pix->colorspace		= V4L2_COLORSPACE_SRGB;
+
+	return 0;
 }
 
 static int mt9t031_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
@@ -346,40 +406,25 @@ static int mt9t031_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct i2c_client *client = sd->priv;
 	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
-	int ret;
+	struct v4l2_pix_format *pix = &f->fmt.pix;
 	u16 xskip, yskip;
-	struct v4l2_rect rect = {
-		.left	= icd->rect_current.left,
-		.top	= icd->rect_current.top,
-		.width	= f->fmt.pix.width,
-		.height	= f->fmt.pix.height,
-	};
+	struct v4l2_rect rect = mt9t031->rect;
 
 	/*
-	 * try_fmt has put rectangle within limits.
-	 * S_FMT - use binning and skipping for scaling, recalculate
-	 * limits, used for cropping
+	 * try_fmt has put width and height within limits.
+	 * S_FMT: use binning and skipping for scaling
 	 */
-	/* Is this more optimal than just a division? */
-	for (xskip = 8; xskip > 1; xskip--)
-		if (rect.width * xskip <= MT9T031_MAX_WIDTH)
-			break;
-
-	for (yskip = 8; yskip > 1; yskip--)
-		if (rect.height * yskip <= MT9T031_MAX_HEIGHT)
-			break;
-
-	recalculate_limits(icd, xskip, yskip);
-
-	ret = mt9t031_set_params(icd, &rect, xskip, yskip);
-	if (!ret) {
-		mt9t031->xskip = xskip;
-		mt9t031->yskip = yskip;
-	}
+	xskip = mt9t031_skip(&rect.width, pix->width, MT9T031_MAX_WIDTH);
+	yskip = mt9t031_skip(&rect.height, pix->height, MT9T031_MAX_HEIGHT);
 
-	return ret;
+	/* mt9t031_set_params() doesn't change width and height */
+	return mt9t031_set_params(icd, &rect, xskip, yskip);
 }
 
+/*
+ * If a user window larger than sensor window is requested, we'll increase the
+ * sensor window.
+ */
 static int mt9t031_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct v4l2_pix_format *pix = &f->fmt.pix;
@@ -620,12 +665,12 @@ static int mt9t031_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		if (ctrl->value) {
 			const u16 vblank = MT9T031_VERTICAL_BLANK;
 			const u32 shutter_max = MT9T031_MAX_HEIGHT + vblank;
-			if (set_shutter(client, icd->rect_current.height +
+			if (set_shutter(client, mt9t031->rect.height +
 					icd->y_skip_top + vblank) < 0)
 				return -EIO;
 			qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
 			icd->exposure = (shutter_max / 2 +
-					 (icd->rect_current.height +
+					 (mt9t031->rect.height +
 					  icd->y_skip_top + vblank - 1) *
 					 (qctrl->maximum - qctrl->minimum)) /
 				shutter_max + qctrl->minimum;
@@ -645,12 +690,6 @@ static int mt9t031_video_probe(struct i2c_client *client)
 	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	s32 data;
 
-	/* We must have a parent by now. And it cannot be a wrong one.
-	 * So this entire test is completely redundant. */
-	if (!icd->dev.parent ||
-	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
-		return -ENODEV;
-
 	/* Enable the chip */
 	data = reg_write(client, MT9T031_CHIP_ENABLE, 1);
 	dev_dbg(&client->dev, "write: %d\n", data);
@@ -688,8 +727,11 @@ static struct v4l2_subdev_core_ops mt9t031_subdev_core_ops = {
 static struct v4l2_subdev_video_ops mt9t031_subdev_video_ops = {
 	.s_stream	= mt9t031_s_stream,
 	.s_fmt		= mt9t031_s_fmt,
+	.g_fmt		= mt9t031_g_fmt,
 	.try_fmt	= mt9t031_try_fmt,
 	.s_crop		= mt9t031_s_crop,
+	.g_crop		= mt9t031_g_crop,
+	.cropcap	= mt9t031_cropcap,
 };
 
 static struct v4l2_subdev_ops mt9t031_subdev_ops = {
@@ -731,15 +773,13 @@ static int mt9t031_probe(struct i2c_client *client,
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops		= &mt9t031_ops;
-	icd->rect_max.left	= MT9T031_COLUMN_SKIP;
-	icd->rect_max.top	= MT9T031_ROW_SKIP;
-	icd->rect_current.left	= icd->rect_max.left;
-	icd->rect_current.top	= icd->rect_max.top;
-	icd->width_min		= MT9T031_MIN_WIDTH;
-	icd->rect_max.width	= MT9T031_MAX_WIDTH;
-	icd->height_min		= MT9T031_MIN_HEIGHT;
-	icd->rect_max.height	= MT9T031_MAX_HEIGHT;
 	icd->y_skip_top		= 0;
+
+	mt9t031->rect.left	= MT9T031_COLUMN_SKIP;
+	mt9t031->rect.top	= MT9T031_ROW_SKIP;
+	mt9t031->rect.width	= MT9T031_MAX_WIDTH;
+	mt9t031->rect.height	= MT9T031_MAX_HEIGHT;
+
 	/* Simulated autoexposure. If enabled, we calculate shutter width
 	 * ourselves in the driver based on vertical blanking and frame width */
 	mt9t031->autoexposure = 1;
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index ab1965425289..35ea0ddd0715 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -55,6 +55,13 @@ MODULE_PARM_DESC(sensor_type, "Sensor type: \"colour\" or \"monochrome\"");
 /* Progressive scan, master, defaults */
 #define MT9V022_CHIP_CONTROL_DEFAULT	0x188
 
+#define MT9V022_MAX_WIDTH		752
+#define MT9V022_MAX_HEIGHT		480
+#define MT9V022_MIN_WIDTH		48
+#define MT9V022_MIN_HEIGHT		32
+#define MT9V022_COLUMN_SKIP		1
+#define MT9V022_ROW_SKIP		4
+
 static const struct soc_camera_data_format mt9v022_colour_formats[] = {
 	/* Order important: first natively supported,
 	 * second supported with a GPIO extender */
@@ -86,6 +93,8 @@ static const struct soc_camera_data_format mt9v022_monochrome_formats[] = {
 
 struct mt9v022 {
 	struct v4l2_subdev subdev;
+	struct v4l2_rect rect;	/* Sensor window */
+	__u32 fourcc;
 	int model;	/* V4L2_IDENT_MT9V022* codes from v4l2-chip-ident.h */
 	u16 chip_control;
 };
@@ -250,44 +259,101 @@ static unsigned long mt9v022_query_bus_param(struct soc_camera_device *icd)
 
 static int mt9v022_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct v4l2_rect *rect = &a->c;
 	struct i2c_client *client = sd->priv;
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
+	struct v4l2_rect rect = a->c;
 	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 
+	/* Bayer format - even size lengths */
+	if (mt9v022->fourcc == V4L2_PIX_FMT_SBGGR8 ||
+	    mt9v022->fourcc == V4L2_PIX_FMT_SBGGR16) {
+		rect.width	= ALIGN(rect.width, 2);
+		rect.height	= ALIGN(rect.height, 2);
+		/* Let the user play with the starting pixel */
+	}
+
+	soc_camera_limit_side(&rect.left, &rect.width,
+		     MT9V022_COLUMN_SKIP, MT9V022_MIN_WIDTH, MT9V022_MAX_WIDTH);
+
+	soc_camera_limit_side(&rect.top, &rect.height,
+		     MT9V022_ROW_SKIP, MT9V022_MIN_HEIGHT, MT9V022_MAX_HEIGHT);
+
 	/* Like in example app. Contradicts the datasheet though */
 	ret = reg_read(client, MT9V022_AEC_AGC_ENABLE);
 	if (ret >= 0) {
 		if (ret & 1) /* Autoexposure */
 			ret = reg_write(client, MT9V022_MAX_TOTAL_SHUTTER_WIDTH,
-					rect->height + icd->y_skip_top + 43);
+					rect.height + icd->y_skip_top + 43);
 		else
 			ret = reg_write(client, MT9V022_TOTAL_SHUTTER_WIDTH,
-					rect->height + icd->y_skip_top + 43);
+					rect.height + icd->y_skip_top + 43);
 	}
 	/* Setup frame format: defaults apart from width and height */
 	if (!ret)
-		ret = reg_write(client, MT9V022_COLUMN_START, rect->left);
+		ret = reg_write(client, MT9V022_COLUMN_START, rect.left);
 	if (!ret)
-		ret = reg_write(client, MT9V022_ROW_START, rect->top);
+		ret = reg_write(client, MT9V022_ROW_START, rect.top);
 	if (!ret)
 		/* Default 94, Phytec driver says:
 		 * "width + horizontal blank >= 660" */
 		ret = reg_write(client, MT9V022_HORIZONTAL_BLANKING,
-				rect->width > 660 - 43 ? 43 :
-				660 - rect->width);
+				rect.width > 660 - 43 ? 43 :
+				660 - rect.width);
 	if (!ret)
 		ret = reg_write(client, MT9V022_VERTICAL_BLANKING, 45);
 	if (!ret)
-		ret = reg_write(client, MT9V022_WINDOW_WIDTH, rect->width);
+		ret = reg_write(client, MT9V022_WINDOW_WIDTH, rect.width);
 	if (!ret)
 		ret = reg_write(client, MT9V022_WINDOW_HEIGHT,
-				rect->height + icd->y_skip_top);
+				rect.height + icd->y_skip_top);
 
 	if (ret < 0)
 		return ret;
 
-	dev_dbg(&client->dev, "Frame %ux%u pixel\n", rect->width, rect->height);
+	dev_dbg(&client->dev, "Frame %ux%u pixel\n", rect.width, rect.height);
+
+	mt9v022->rect = rect;
+
+	return 0;
+}
+
+static int mt9v022_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
+
+	a->c	= mt9v022->rect;
+	a->type	= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	return 0;
+}
+
+static int mt9v022_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a)
+{
+	a->bounds.left			= MT9V022_COLUMN_SKIP;
+	a->bounds.top			= MT9V022_ROW_SKIP;
+	a->bounds.width			= MT9V022_MAX_WIDTH;
+	a->bounds.height		= MT9V022_MAX_HEIGHT;
+	a->defrect			= a->bounds;
+	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->pixelaspect.numerator	= 1;
+	a->pixelaspect.denominator	= 1;
+
+	return 0;
+}
+
+static int mt9v022_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+
+	pix->width		= mt9v022->rect.width;
+	pix->height		= mt9v022->rect.height;
+	pix->pixelformat	= mt9v022->fourcc;
+	pix->field		= V4L2_FIELD_NONE;
+	pix->colorspace		= V4L2_COLORSPACE_SRGB;
 
 	return 0;
 }
@@ -296,16 +362,16 @@ static int mt9v022_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct i2c_client *client = sd->priv;
 	struct mt9v022 *mt9v022 = to_mt9v022(client);
-	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_crop a = {
 		.c = {
-			.left	= icd->rect_current.left,
-			.top	= icd->rect_current.top,
+			.left	= mt9v022->rect.left,
+			.top	= mt9v022->rect.top,
 			.width	= pix->width,
 			.height	= pix->height,
 		},
 	};
+	int ret;
 
 	/* The caller provides a supported format, as verified per call to
 	 * icd->try_fmt(), datawidth is from our supported format list */
@@ -328,7 +394,14 @@ static int mt9v022_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	}
 
 	/* No support for scaling on this camera, just crop. */
-	return mt9v022_s_crop(sd, &a);
+	ret = mt9v022_s_crop(sd, &a);
+	if (!ret) {
+		pix->width = mt9v022->rect.width;
+		pix->height = mt9v022->rect.height;
+		mt9v022->fourcc = pix->pixelformat;
+	}
+
+	return ret;
 }
 
 static int mt9v022_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
@@ -336,10 +409,13 @@ static int mt9v022_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct i2c_client *client = sd->priv;
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
+	int align = pix->pixelformat == V4L2_PIX_FMT_SBGGR8 ||
+		pix->pixelformat == V4L2_PIX_FMT_SBGGR16;
 
-	v4l_bound_align_image(&pix->width, 48, 752, 2 /* ? */,
-			      &pix->height, 32 + icd->y_skip_top,
-			      480 + icd->y_skip_top, 0, 0);
+	v4l_bound_align_image(&pix->width, MT9V022_MIN_WIDTH,
+		MT9V022_MAX_WIDTH, align,
+		&pix->height, MT9V022_MIN_HEIGHT + icd->y_skip_top,
+		MT9V022_MAX_HEIGHT + icd->y_skip_top, align, 0);
 
 	return 0;
 }
@@ -669,6 +745,8 @@ static int mt9v022_video_probe(struct soc_camera_device *icd,
 	if (flags & SOCAM_DATAWIDTH_8)
 		icd->num_formats++;
 
+	mt9v022->fourcc = icd->formats->fourcc;
+
 	dev_info(&client->dev, "Detected a MT9V022 chip ID %x, %s sensor\n",
 		 data, mt9v022->model == V4L2_IDENT_MT9V022IX7ATM ?
 		 "monochrome" : "colour");
@@ -679,10 +757,9 @@ ei2c:
 
 static void mt9v022_video_remove(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-	dev_dbg(&client->dev, "Video %x removed: %p, %p\n", client->addr,
+	dev_dbg(&icd->dev, "Video removed: %p, %p\n",
 		icd->dev.parent, icd->vdev);
 	if (icl->free_bus)
 		icl->free_bus(icl);
@@ -701,8 +778,11 @@ static struct v4l2_subdev_core_ops mt9v022_subdev_core_ops = {
 static struct v4l2_subdev_video_ops mt9v022_subdev_video_ops = {
 	.s_stream	= mt9v022_s_stream,
 	.s_fmt		= mt9v022_s_fmt,
+	.g_fmt		= mt9v022_g_fmt,
 	.try_fmt	= mt9v022_try_fmt,
 	.s_crop		= mt9v022_s_crop,
+	.g_crop		= mt9v022_g_crop,
+	.cropcap	= mt9v022_cropcap,
 };
 
 static struct v4l2_subdev_ops mt9v022_subdev_ops = {
@@ -745,16 +825,13 @@ static int mt9v022_probe(struct i2c_client *client,
 	mt9v022->chip_control = MT9V022_CHIP_CONTROL_DEFAULT;
 
 	icd->ops		= &mt9v022_ops;
-	icd->rect_max.left	= 1;
-	icd->rect_max.top	= 4;
-	icd->rect_max.width	= 752;
-	icd->rect_max.height	= 480;
-	icd->rect_current.left	= 1;
-	icd->rect_current.top	= 4;
-	icd->width_min		= 48;
-	icd->height_min		= 32;
 	icd->y_skip_top		= 1;
 
+	mt9v022->rect.left	= MT9V022_COLUMN_SKIP;
+	mt9v022->rect.top	= MT9V022_ROW_SKIP;
+	mt9v022->rect.width	= MT9V022_MAX_WIDTH;
+	mt9v022->rect.height	= MT9V022_MAX_HEIGHT;
+
 	ret = mt9v022_video_probe(icd, client);
 	if (ret) {
 		icd->ops = NULL;
diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index 1f1324a1d493..3875483ab9d5 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -126,7 +126,7 @@ static int mx1_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
 {
 	struct soc_camera_device *icd = vq->priv_data;
 
-	*size = icd->rect_current.width * icd->rect_current.height *
+	*size = icd->user_width * icd->user_height *
 		((icd->current_fmt->depth + 7) >> 3);
 
 	if (!*count)
@@ -178,12 +178,12 @@ static int mx1_videobuf_prepare(struct videobuf_queue *vq,
 	buf->inwork = 1;
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->rect_current.width ||
-	    vb->height	!= icd->rect_current.height ||
+	    vb->width	!= icd->user_width ||
+	    vb->height	!= icd->user_height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->rect_current.width;
-		vb->height	= icd->rect_current.height;
+		vb->width	= icd->user_width;
+		vb->height	= icd->user_height;
 		vb->field	= field;
 		vb->state	= VIDEOBUF_NEEDS_INIT;
 	}
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index d5b51e9900bb..dff2e5e2d8c6 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -220,7 +220,7 @@ static int mx3_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
 	if (!mx3_cam->idmac_channel[0])
 		return -EINVAL;
 
-	*size = icd->rect_current.width * icd->rect_current.height * bpp;
+	*size = icd->user_width * icd->user_height * bpp;
 
 	if (!*count)
 		*count = 32;
@@ -241,7 +241,7 @@ static int mx3_videobuf_prepare(struct videobuf_queue *vq,
 	struct mx3_camera_buffer *buf =
 		container_of(vb, struct mx3_camera_buffer, vb);
 	/* current_fmt _must_ always be set */
-	size_t new_size = icd->rect_current.width * icd->rect_current.height *
+	size_t new_size = icd->user_width * icd->user_height *
 		((icd->current_fmt->depth + 7) >> 3);
 	int ret;
 
@@ -251,12 +251,12 @@ static int mx3_videobuf_prepare(struct videobuf_queue *vq,
 	 */
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->rect_current.width ||
-	    vb->height	!= icd->rect_current.height ||
+	    vb->width	!= icd->user_width ||
+	    vb->height	!= icd->user_height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->rect_current.width;
-		vb->height	= icd->rect_current.height;
+		vb->width	= icd->user_width;
+		vb->height	= icd->user_height;
 		vb->field	= field;
 		if (vb->state != VIDEOBUF_NEEDS_INIT)
 			free_buffer(vq, buf);
@@ -354,9 +354,9 @@ static void mx3_videobuf_queue(struct videobuf_queue *vq,
 
 	/* This is the configuration of one sg-element */
 	video->out_pixel_fmt	= fourcc_to_ipu_pix(data_fmt->fourcc);
-	video->out_width	= icd->rect_current.width;
-	video->out_height	= icd->rect_current.height;
-	video->out_stride	= icd->rect_current.width;
+	video->out_width	= icd->user_width;
+	video->out_height	= icd->user_height;
+	video->out_stride	= icd->user_width;
 
 #ifdef DEBUG
 	/* helps to see what DMA actually has written */
@@ -541,7 +541,7 @@ static bool channel_change_requested(struct soc_camera_device *icd,
 
 	/* Do buffers have to be re-allocated or channel re-configured? */
 	return ichan && rect->width * rect->height >
-		icd->rect_current.width * icd->rect_current.height;
+		icd->user_width * icd->user_height;
 }
 
 static int test_platform_param(struct mx3_camera_dev *mx3_cam,
@@ -589,8 +589,8 @@ static int test_platform_param(struct mx3_camera_dev *mx3_cam,
 		*flags |= SOCAM_DATAWIDTH_4;
 		break;
 	default:
-		dev_info(mx3_cam->soc_host.v4l2_dev.dev, "Unsupported bus width %d\n",
-			 buswidth);
+		dev_warn(mx3_cam->soc_host.v4l2_dev.dev,
+			 "Unsupported bus width %d\n", buswidth);
 		return -EINVAL;
 	}
 
@@ -605,8 +605,7 @@ static int mx3_camera_try_bus_param(struct soc_camera_device *icd,
 	unsigned long bus_flags, camera_flags;
 	int ret = test_platform_param(mx3_cam, depth, &bus_flags);
 
-	dev_dbg(icd->dev.parent, "requested bus width %d bit: %d\n",
-		depth, ret);
+	dev_dbg(icd->dev.parent, "request bus width %d bit: %d\n", depth, ret);
 
 	if (ret < 0)
 		return ret;
@@ -727,13 +726,13 @@ passthrough:
 }
 
 static void configure_geometry(struct mx3_camera_dev *mx3_cam,
-			       struct v4l2_rect *rect)
+			       unsigned int width, unsigned int height)
 {
 	u32 ctrl, width_field, height_field;
 
 	/* Setup frame size - this cannot be changed on-the-fly... */
-	width_field = rect->width - 1;
-	height_field = rect->height - 1;
+	width_field = width - 1;
+	height_field = height - 1;
 	csi_reg_write(mx3_cam, width_field | (height_field << 16), CSI_SENS_FRM_SIZE);
 
 	csi_reg_write(mx3_cam, width_field << 16, CSI_FLASH_STROBE_1);
@@ -745,11 +744,6 @@ static void configure_geometry(struct mx3_camera_dev *mx3_cam,
 	ctrl = csi_reg_read(mx3_cam, CSI_OUT_FRM_CTRL) & 0xffff0000;
 	/* Sensor does the cropping */
 	csi_reg_write(mx3_cam, ctrl | 0 | (0 << 8), CSI_OUT_FRM_CTRL);
-
-	/*
-	 * No need to free resources here if we fail, we'll see if we need to
-	 * do this next time we are called
-	 */
 }
 
 static int acquire_dma_channel(struct mx3_camera_dev *mx3_cam)
@@ -786,6 +780,22 @@ static int acquire_dma_channel(struct mx3_camera_dev *mx3_cam)
 	return 0;
 }
 
+/*
+ * FIXME: learn to use stride != width, then we can keep stride properly aligned
+ * and support arbitrary (even) widths.
+ */
+static inline void stride_align(__s32 *width)
+{
+	if (((*width + 7) &  ~7) < 4096)
+		*width = (*width + 7) &  ~7;
+	else
+		*width = *width &  ~7;
+}
+
+/*
+ * As long as we don't implement host-side cropping and scaling, we can use
+ * default g_crop and cropcap from soc_camera.c
+ */
 static int mx3_camera_set_crop(struct soc_camera_device *icd,
 			       struct v4l2_crop *a)
 {
@@ -793,20 +803,51 @@ static int mx3_camera_set_crop(struct soc_camera_device *icd,
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx3_camera_dev *mx3_cam = ici->priv;
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct v4l2_format f = {.type = V4L2_BUF_TYPE_VIDEO_CAPTURE};
+	struct v4l2_pix_format *pix = &f.fmt.pix;
+	int ret;
 
-	/*
-	 * We now know pixel formats and can decide upon DMA-channel(s)
-	 * So far only direct camera-to-memory is supported
-	 */
-	if (channel_change_requested(icd, rect)) {
-		int ret = acquire_dma_channel(mx3_cam);
+	soc_camera_limit_side(&rect->left, &rect->width, 0, 2, 4096);
+	soc_camera_limit_side(&rect->top, &rect->height, 0, 2, 4096);
+
+	ret = v4l2_subdev_call(sd, video, s_crop, a);
+	if (ret < 0)
+		return ret;
+
+	/* The capture device might have changed its output  */
+	ret = v4l2_subdev_call(sd, video, g_fmt, &f);
+	if (ret < 0)
+		return ret;
+
+	if (pix->width & 7) {
+		/* Ouch! We can only handle 8-byte aligned width... */
+		stride_align(&pix->width);
+		ret = v4l2_subdev_call(sd, video, s_fmt, &f);
 		if (ret < 0)
 			return ret;
 	}
 
-	configure_geometry(mx3_cam, rect);
+	if (pix->width != icd->user_width || pix->height != icd->user_height) {
+		/*
+		 * We now know pixel formats and can decide upon DMA-channel(s)
+		 * So far only direct camera-to-memory is supported
+		 */
+		if (channel_change_requested(icd, rect)) {
+			int ret = acquire_dma_channel(mx3_cam);
+			if (ret < 0)
+				return ret;
+		}
 
-	return v4l2_subdev_call(sd, video, s_crop, a);
+		configure_geometry(mx3_cam, pix->width, pix->height);
+	}
+
+	dev_dbg(icd->dev.parent, "Sensor cropped %dx%d\n",
+		pix->width, pix->height);
+
+	icd->user_width = pix->width;
+	icd->user_height = pix->height;
+
+	return ret;
 }
 
 static int mx3_camera_set_fmt(struct soc_camera_device *icd,
@@ -817,12 +858,6 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
-	struct v4l2_rect rect = {
-		.left	= icd->rect_current.left,
-		.top	= icd->rect_current.top,
-		.width	= pix->width,
-		.height	= pix->height,
-	};
 	int ret;
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
@@ -832,6 +867,9 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 		return -EINVAL;
 	}
 
+	stride_align(&pix->width);
+	dev_dbg(icd->dev.parent, "Set format %dx%d\n", pix->width, pix->height);
+
 	ret = acquire_dma_channel(mx3_cam);
 	if (ret < 0)
 		return ret;
@@ -842,7 +880,7 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 	 * mxc_v4l2_s_fmt()
 	 */
 
-	configure_geometry(mx3_cam, &rect);
+	configure_geometry(mx3_cam, pix->width, pix->height);
 
 	ret = v4l2_subdev_call(sd, video, s_fmt, f);
 	if (!ret) {
@@ -850,6 +888,8 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 		icd->current_fmt = xlate->host_fmt;
 	}
 
+	dev_dbg(icd->dev.parent, "Sensor set %dx%d\n", pix->width, pix->height);
+
 	return ret;
 }
 
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index bbf5331a2eae..776a91dcfbe6 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -382,11 +382,10 @@ struct regval_list {
 };
 
 struct ov772x_color_format {
-	char                     *name;
-	__u32                     fourcc;
-	u8                        dsp3;
-	u8                        com3;
-	u8                        com7;
+	const struct soc_camera_data_format *format;
+	u8 dsp3;
+	u8 com3;
+	u8 com7;
 };
 
 struct ov772x_win_size {
@@ -481,43 +480,43 @@ static const struct soc_camera_data_format ov772x_fmt_lists[] = {
  */
 static const struct ov772x_color_format ov772x_cfmts[] = {
 	{
-		SETFOURCC(YUYV),
+		.format = &ov772x_fmt_lists[0],
 		.dsp3   = 0x0,
 		.com3   = SWAP_YUV,
 		.com7   = OFMT_YUV,
 	},
 	{
-		SETFOURCC(YVYU),
+		.format = &ov772x_fmt_lists[1],
 		.dsp3   = UV_ON,
 		.com3   = SWAP_YUV,
 		.com7   = OFMT_YUV,
 	},
 	{
-		SETFOURCC(UYVY),
+		.format = &ov772x_fmt_lists[2],
 		.dsp3   = 0x0,
 		.com3   = 0x0,
 		.com7   = OFMT_YUV,
 	},
 	{
-		SETFOURCC(RGB555),
+		.format = &ov772x_fmt_lists[3],
 		.dsp3   = 0x0,
 		.com3   = SWAP_RGB,
 		.com7   = FMT_RGB555 | OFMT_RGB,
 	},
 	{
-		SETFOURCC(RGB555X),
+		.format = &ov772x_fmt_lists[4],
 		.dsp3   = 0x0,
 		.com3   = 0x0,
 		.com7   = FMT_RGB555 | OFMT_RGB,
 	},
 	{
-		SETFOURCC(RGB565),
+		.format = &ov772x_fmt_lists[5],
 		.dsp3   = 0x0,
 		.com3   = SWAP_RGB,
 		.com7   = FMT_RGB565 | OFMT_RGB,
 	},
 	{
-		SETFOURCC(RGB565X),
+		.format = &ov772x_fmt_lists[6],
 		.dsp3   = 0x0,
 		.com3   = 0x0,
 		.com7   = FMT_RGB565 | OFMT_RGB,
@@ -648,8 +647,8 @@ static int ov772x_s_stream(struct v4l2_subdev *sd, int enable)
 
 	ov772x_mask_set(client, COM2, SOFT_SLEEP_MODE, 0);
 
-	dev_dbg(&client->dev,
-		"format %s, win %s\n", priv->fmt->name, priv->win->name);
+	dev_dbg(&client->dev, "format %s, win %s\n",
+		priv->fmt->format->name, priv->win->name);
 
 	return 0;
 }
@@ -818,7 +817,7 @@ static int ov772x_set_params(struct i2c_client *client,
 	 */
 	priv->fmt = NULL;
 	for (i = 0; i < ARRAY_SIZE(ov772x_cfmts); i++) {
-		if (pixfmt == ov772x_cfmts[i].fourcc) {
+		if (pixfmt == ov772x_cfmts[i].format->fourcc) {
 			priv->fmt = ov772x_cfmts + i;
 			break;
 		}
@@ -955,6 +954,56 @@ ov772x_set_fmt_error:
 	return ret;
 }
 
+static int ov772x_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	a->c.left	= 0;
+	a->c.top	= 0;
+	a->c.width	= VGA_WIDTH;
+	a->c.height	= VGA_HEIGHT;
+	a->type		= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	return 0;
+}
+
+static int ov772x_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a)
+{
+	a->bounds.left			= 0;
+	a->bounds.top			= 0;
+	a->bounds.width			= VGA_WIDTH;
+	a->bounds.height		= VGA_HEIGHT;
+	a->defrect			= a->bounds;
+	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->pixelaspect.numerator	= 1;
+	a->pixelaspect.denominator	= 1;
+
+	return 0;
+}
+
+static int ov772x_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
+{
+	struct i2c_client *client = sd->priv;
+	struct ov772x_priv *priv = to_ov772x(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+
+	if (!priv->win || !priv->fmt) {
+		u32 width = VGA_WIDTH, height = VGA_HEIGHT;
+		int ret = ov772x_set_params(client, &width, &height,
+					    V4L2_PIX_FMT_YUYV);
+		if (ret < 0)
+			return ret;
+	}
+
+	f->type			= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	pix->width		= priv->win->width;
+	pix->height		= priv->win->height;
+	pix->pixelformat	= priv->fmt->format->fourcc;
+	pix->colorspace		= priv->fmt->format->colorspace;
+	pix->field		= V4L2_FIELD_NONE;
+
+	return 0;
+}
+
 static int ov772x_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct i2c_client *client = sd->priv;
@@ -1060,8 +1109,11 @@ static struct v4l2_subdev_core_ops ov772x_subdev_core_ops = {
 
 static struct v4l2_subdev_video_ops ov772x_subdev_video_ops = {
 	.s_stream	= ov772x_s_stream,
+	.g_fmt		= ov772x_g_fmt,
 	.s_fmt		= ov772x_s_fmt,
 	.try_fmt	= ov772x_try_fmt,
+	.cropcap	= ov772x_cropcap,
+	.g_crop		= ov772x_g_crop,
 };
 
 static struct v4l2_subdev_ops ov772x_subdev_ops = {
@@ -1110,8 +1162,6 @@ static int ov772x_probe(struct i2c_client *client,
 	v4l2_i2c_subdev_init(&priv->subdev, client, &ov772x_subdev_ops);
 
 	icd->ops		= &ov772x_ops;
-	icd->rect_max.width	= MAX_WIDTH;
-	icd->rect_max.height	= MAX_HEIGHT;
 
 	ret = ov772x_video_probe(icd, client);
 	if (ret) {
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index 1fd6ef392a54..a19bb76e175d 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -225,6 +225,10 @@ struct pxa_camera_dev {
 	u32			save_cicr[5];
 };
 
+struct pxa_cam {
+	unsigned long flags;
+};
+
 static const char *pxa_cam_driver_description = "PXA_Camera";
 
 static unsigned int vid_limit = 16;	/* Video memory limit, in Mb */
@@ -239,7 +243,7 @@ static int pxa_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
 
 	dev_dbg(icd->dev.parent, "count=%d, size=%d\n", *count, *size);
 
-	*size = roundup(icd->rect_current.width * icd->rect_current.height *
+	*size = roundup(icd->user_width * icd->user_height *
 			((icd->current_fmt->depth + 7) >> 3), 8);
 
 	if (0 == *count)
@@ -443,12 +447,12 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 	buf->inwork = 1;
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->rect_current.width ||
-	    vb->height	!= icd->rect_current.height ||
+	    vb->width	!= icd->user_width ||
+	    vb->height	!= icd->user_height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->rect_current.width;
-		vb->height	= icd->rect_current.height;
+		vb->width	= icd->user_width;
+		vb->height	= icd->user_height;
 		vb->field	= field;
 		vb->state	= VIDEOBUF_NEEDS_INIT;
 	}
@@ -839,7 +843,7 @@ static u32 mclk_get_divisor(struct platform_device *pdev,
 			    struct pxa_camera_dev *pcdev)
 {
 	unsigned long mclk = pcdev->mclk;
-	struct device *dev = pcdev->soc_host.v4l2_dev.dev;
+	struct device *dev = &pdev->dev;
 	u32 div;
 	unsigned long lcdclk;
 
@@ -1040,57 +1044,17 @@ static int test_platform_param(struct pxa_camera_dev *pcdev,
 	return 0;
 }
 
-static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
+static void pxa_camera_setup_cicr(struct soc_camera_device *icd,
+				  unsigned long flags, __u32 pixfmt)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct pxa_camera_dev *pcdev = ici->priv;
-	unsigned long dw, bpp, bus_flags, camera_flags, common_flags;
+	unsigned long dw, bpp;
 	u32 cicr0, cicr1, cicr2, cicr3, cicr4 = 0;
-	int ret = test_platform_param(pcdev, icd->buswidth, &bus_flags);
-
-	if (ret < 0)
-		return ret;
-
-	camera_flags = icd->ops->query_bus_param(icd);
-
-	common_flags = soc_camera_bus_param_compatible(camera_flags, bus_flags);
-	if (!common_flags)
-		return -EINVAL;
-
-	pcdev->channels = 1;
-
-	/* Make choises, based on platform preferences */
-	if ((common_flags & SOCAM_HSYNC_ACTIVE_HIGH) &&
-	    (common_flags & SOCAM_HSYNC_ACTIVE_LOW)) {
-		if (pcdev->platform_flags & PXA_CAMERA_HSP)
-			common_flags &= ~SOCAM_HSYNC_ACTIVE_HIGH;
-		else
-			common_flags &= ~SOCAM_HSYNC_ACTIVE_LOW;
-	}
-
-	if ((common_flags & SOCAM_VSYNC_ACTIVE_HIGH) &&
-	    (common_flags & SOCAM_VSYNC_ACTIVE_LOW)) {
-		if (pcdev->platform_flags & PXA_CAMERA_VSP)
-			common_flags &= ~SOCAM_VSYNC_ACTIVE_HIGH;
-		else
-			common_flags &= ~SOCAM_VSYNC_ACTIVE_LOW;
-	}
-
-	if ((common_flags & SOCAM_PCLK_SAMPLE_RISING) &&
-	    (common_flags & SOCAM_PCLK_SAMPLE_FALLING)) {
-		if (pcdev->platform_flags & PXA_CAMERA_PCP)
-			common_flags &= ~SOCAM_PCLK_SAMPLE_RISING;
-		else
-			common_flags &= ~SOCAM_PCLK_SAMPLE_FALLING;
-	}
-
-	ret = icd->ops->set_bus_param(icd, common_flags);
-	if (ret < 0)
-		return ret;
 
 	/* Datawidth is now guaranteed to be equal to one of the three values.
 	 * We fix bit-per-pixel equal to data-width... */
-	switch (common_flags & SOCAM_DATAWIDTH_MASK) {
+	switch (flags & SOCAM_DATAWIDTH_MASK) {
 	case SOCAM_DATAWIDTH_10:
 		dw = 4;
 		bpp = 0x40;
@@ -1111,18 +1075,18 @@ static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 		cicr4 |= CICR4_PCLK_EN;
 	if (pcdev->platform_flags & PXA_CAMERA_MCLK_EN)
 		cicr4 |= CICR4_MCLK_EN;
-	if (common_flags & SOCAM_PCLK_SAMPLE_FALLING)
+	if (flags & SOCAM_PCLK_SAMPLE_FALLING)
 		cicr4 |= CICR4_PCP;
-	if (common_flags & SOCAM_HSYNC_ACTIVE_LOW)
+	if (flags & SOCAM_HSYNC_ACTIVE_LOW)
 		cicr4 |= CICR4_HSP;
-	if (common_flags & SOCAM_VSYNC_ACTIVE_LOW)
+	if (flags & SOCAM_VSYNC_ACTIVE_LOW)
 		cicr4 |= CICR4_VSP;
 
 	cicr0 = __raw_readl(pcdev->base + CICR0);
 	if (cicr0 & CICR0_ENB)
 		__raw_writel(cicr0 & ~CICR0_ENB, pcdev->base + CICR0);
 
-	cicr1 = CICR1_PPL_VAL(icd->rect_current.width - 1) | bpp | dw;
+	cicr1 = CICR1_PPL_VAL(icd->user_width - 1) | bpp | dw;
 
 	switch (pixfmt) {
 	case V4L2_PIX_FMT_YUV422P:
@@ -1151,7 +1115,7 @@ static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 	}
 
 	cicr2 = 0;
-	cicr3 = CICR3_LPF_VAL(icd->rect_current.height - 1) |
+	cicr3 = CICR3_LPF_VAL(icd->user_height - 1) |
 		CICR3_BFW_VAL(min((unsigned short)255, icd->y_skip_top));
 	cicr4 |= pcdev->mclk_divisor;
 
@@ -1165,6 +1129,59 @@ static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 		CICR0_SIM_MP : (CICR0_SL_CAP_EN | CICR0_SIM_SP));
 	cicr0 |= CICR0_DMAEN | CICR0_IRQ_MASK;
 	__raw_writel(cicr0, pcdev->base + CICR0);
+}
+
+static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
+{
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct pxa_camera_dev *pcdev = ici->priv;
+	unsigned long bus_flags, camera_flags, common_flags;
+	int ret = test_platform_param(pcdev, icd->buswidth, &bus_flags);
+	struct pxa_cam *cam = icd->host_priv;
+
+	if (ret < 0)
+		return ret;
+
+	camera_flags = icd->ops->query_bus_param(icd);
+
+	common_flags = soc_camera_bus_param_compatible(camera_flags, bus_flags);
+	if (!common_flags)
+		return -EINVAL;
+
+	pcdev->channels = 1;
+
+	/* Make choises, based on platform preferences */
+	if ((common_flags & SOCAM_HSYNC_ACTIVE_HIGH) &&
+	    (common_flags & SOCAM_HSYNC_ACTIVE_LOW)) {
+		if (pcdev->platform_flags & PXA_CAMERA_HSP)
+			common_flags &= ~SOCAM_HSYNC_ACTIVE_HIGH;
+		else
+			common_flags &= ~SOCAM_HSYNC_ACTIVE_LOW;
+	}
+
+	if ((common_flags & SOCAM_VSYNC_ACTIVE_HIGH) &&
+	    (common_flags & SOCAM_VSYNC_ACTIVE_LOW)) {
+		if (pcdev->platform_flags & PXA_CAMERA_VSP)
+			common_flags &= ~SOCAM_VSYNC_ACTIVE_HIGH;
+		else
+			common_flags &= ~SOCAM_VSYNC_ACTIVE_LOW;
+	}
+
+	if ((common_flags & SOCAM_PCLK_SAMPLE_RISING) &&
+	    (common_flags & SOCAM_PCLK_SAMPLE_FALLING)) {
+		if (pcdev->platform_flags & PXA_CAMERA_PCP)
+			common_flags &= ~SOCAM_PCLK_SAMPLE_RISING;
+		else
+			common_flags &= ~SOCAM_PCLK_SAMPLE_FALLING;
+	}
+
+	cam->flags = common_flags;
+
+	ret = icd->ops->set_bus_param(icd, common_flags);
+	if (ret < 0)
+		return ret;
+
+	pxa_camera_setup_cicr(icd, common_flags, pixfmt);
 
 	return 0;
 }
@@ -1230,6 +1247,7 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 {
 	struct device *dev = icd->dev.parent;
 	int formats = 0, buswidth, ret;
+	struct pxa_cam *cam;
 
 	buswidth = required_buswidth(icd->formats + idx);
 
@@ -1240,6 +1258,16 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 	if (ret < 0)
 		return 0;
 
+	if (!icd->host_priv) {
+		cam = kzalloc(sizeof(*cam), GFP_KERNEL);
+		if (!cam)
+			return -ENOMEM;
+
+		icd->host_priv = cam;
+	} else {
+		cam = icd->host_priv;
+	}
+
 	switch (icd->formats[idx].fourcc) {
 	case V4L2_PIX_FMT_UYVY:
 		formats++;
@@ -1284,6 +1312,19 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 	return formats;
 }
 
+static void pxa_camera_put_formats(struct soc_camera_device *icd)
+{
+	kfree(icd->host_priv);
+	icd->host_priv = NULL;
+}
+
+static int pxa_camera_check_frame(struct v4l2_pix_format *pix)
+{
+	/* limit to pxa hardware capabilities */
+	return pix->height < 32 || pix->height > 2048 || pix->width < 48 ||
+		pix->width > 2048 || (pix->width & 0x01);
+}
+
 static int pxa_camera_set_crop(struct soc_camera_device *icd,
 			       struct v4l2_crop *a)
 {
@@ -1296,6 +1337,9 @@ static int pxa_camera_set_crop(struct soc_camera_device *icd,
 		.master_clock = pcdev->mclk,
 		.pixel_clock_max = pcdev->ciclk / 4,
 	};
+	struct v4l2_format f;
+	struct v4l2_pix_format *pix = &f.fmt.pix, pix_tmp;
+	struct pxa_cam *cam = icd->host_priv;
 	int ret;
 
 	/* If PCLK is used to latch data from the sensor, check sense */
@@ -1309,7 +1353,37 @@ static int pxa_camera_set_crop(struct soc_camera_device *icd,
 	if (ret < 0) {
 		dev_warn(dev, "Failed to crop to %ux%u@%u:%u\n",
 			 rect->width, rect->height, rect->left, rect->top);
-	} else if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {
+		return ret;
+	}
+
+	f.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = v4l2_subdev_call(sd, video, g_fmt, &f);
+	if (ret < 0)
+		return ret;
+
+	pix_tmp = *pix;
+	if (pxa_camera_check_frame(pix)) {
+		/*
+		 * Camera cropping produced a frame beyond our capabilities.
+		 * FIXME: just extract a subframe, that we can process.
+		 */
+		v4l_bound_align_image(&pix->width, 48, 2048, 1,
+			&pix->height, 32, 2048, 0,
+			icd->current_fmt->fourcc == V4L2_PIX_FMT_YUV422P ?
+				4 : 0);
+		ret = v4l2_subdev_call(sd, video, s_fmt, &f);
+		if (ret < 0)
+			return ret;
+
+		if (pxa_camera_check_frame(pix)) {
+			dev_warn(icd->dev.parent,
+				 "Inconsistent state. Use S_FMT to repair\n");
+			return -EINVAL;
+		}
+	}
+
+	if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {
 		if (sense.pixel_clock > sense.pixel_clock_max) {
 			dev_err(dev,
 				"pixel clock %lu set by the camera too high!",
@@ -1319,6 +1393,11 @@ static int pxa_camera_set_crop(struct soc_camera_device *icd,
 		recalculate_fifo_timeout(pcdev, sense.pixel_clock);
 	}
 
+	icd->user_width = pix->width;
+	icd->user_height = pix->height;
+
+	pxa_camera_setup_cicr(icd, cam->flags, icd->current_fmt->fourcc);
+
 	return ret;
 }
 
@@ -1359,6 +1438,11 @@ static int pxa_camera_set_fmt(struct soc_camera_device *icd,
 	if (ret < 0) {
 		dev_warn(dev, "Failed to configure for format %x\n",
 			 pix->pixelformat);
+	} else if (pxa_camera_check_frame(pix)) {
+		dev_warn(dev,
+			 "Camera driver produced an unsupported frame %dx%d\n",
+			 pix->width, pix->height);
+		ret = -EINVAL;
 	} else if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {
 		if (sense.pixel_clock > sense.pixel_clock_max) {
 			dev_err(dev,
@@ -1402,7 +1486,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 	 */
 	v4l_bound_align_image(&pix->width, 48, 2048, 1,
 			      &pix->height, 32, 2048, 0,
-			      xlate->host_fmt->fourcc == V4L2_PIX_FMT_YUV422P ? 4 : 0);
+			      pixfmt == V4L2_PIX_FMT_YUV422P ? 4 : 0);
 
 	pix->bytesperline = pix->width *
 		DIV_ROUND_UP(xlate->host_fmt->depth, 8);
@@ -1412,7 +1496,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 	pix->pixelformat = xlate->cam_fmt->fourcc;
 	/* limit to sensor capabilities */
 	ret = v4l2_subdev_call(sd, video, try_fmt, f);
-	pix->pixelformat = xlate->host_fmt->fourcc;
+	pix->pixelformat = pixfmt;
 
 	field = pix->field;
 
@@ -1525,6 +1609,7 @@ static struct soc_camera_host_ops pxa_soc_camera_host_ops = {
 	.resume		= pxa_camera_resume,
 	.set_crop	= pxa_camera_set_crop,
 	.get_formats	= pxa_camera_get_formats,
+	.put_formats	= pxa_camera_put_formats,
 	.set_fmt	= pxa_camera_set_fmt,
 	.try_fmt	= pxa_camera_try_fmt,
 	.init_videobuf	= pxa_camera_init_videobuf,
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 3457bababd36..5ab7c5aefd62 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -74,6 +74,13 @@
 #define CDBYR2 0x98 /* Capture data bottom-field address Y register 2 */
 #define CDBCR2 0x9c /* Capture data bottom-field address C register 2 */
 
+#undef DEBUG_GEOMETRY
+#ifdef DEBUG_GEOMETRY
+#define dev_geo	dev_info
+#else
+#define dev_geo	dev_dbg
+#endif
+
 /* per video frame buffer */
 struct sh_mobile_ceu_buffer {
 	struct videobuf_buffer vb; /* v4l buffer must be first */
@@ -103,8 +110,9 @@ struct sh_mobile_ceu_dev {
 };
 
 struct sh_mobile_ceu_cam {
-	struct v4l2_rect camera_rect;
-	struct v4l2_rect camera_max;
+	struct v4l2_rect ceu_rect;
+	unsigned int cam_width;
+	unsigned int cam_height;
 	const struct soc_camera_data_format *extra_fmt;
 	const struct soc_camera_data_format *camera_fmt;
 };
@@ -156,7 +164,7 @@ static int sh_mobile_ceu_videobuf_setup(struct videobuf_queue *vq,
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
 	int bytes_per_pixel = (icd->current_fmt->depth + 7) >> 3;
 
-	*size = PAGE_ALIGN(icd->rect_current.width * icd->rect_current.height *
+	*size = PAGE_ALIGN(icd->user_width * icd->user_height *
 			   bytes_per_pixel);
 
 	if (0 == *count)
@@ -176,8 +184,9 @@ static void free_buffer(struct videobuf_queue *vq,
 			struct sh_mobile_ceu_buffer *buf)
 {
 	struct soc_camera_device *icd = vq->priv_data;
+	struct device *dev = icd->dev.parent;
 
-	dev_dbg(icd->dev.parent, "%s (vb=0x%p) 0x%08lx %zd\n", __func__,
+	dev_dbg(dev, "%s (vb=0x%p) 0x%08lx %zd\n", __func__,
 		&buf->vb, buf->vb.baddr, buf->vb.bsize);
 
 	if (in_interrupt())
@@ -185,7 +194,7 @@ static void free_buffer(struct videobuf_queue *vq,
 
 	videobuf_waiton(&buf->vb, 0, 0);
 	videobuf_dma_contig_free(vq, &buf->vb);
-	dev_dbg(icd->dev.parent, "%s freed\n", __func__);
+	dev_dbg(dev, "%s freed\n", __func__);
 	buf->vb.state = VIDEOBUF_NEEDS_INIT;
 }
 
@@ -216,7 +225,7 @@ static void sh_mobile_ceu_capture(struct sh_mobile_ceu_dev *pcdev)
 	phys_addr_top = videobuf_to_dma_contig(pcdev->active);
 	ceu_write(pcdev, CDAYR, phys_addr_top);
 	if (pcdev->is_interlaced) {
-		phys_addr_bottom = phys_addr_top + icd->rect_current.width;
+		phys_addr_bottom = phys_addr_top + icd->user_width;
 		ceu_write(pcdev, CDBYR, phys_addr_bottom);
 	}
 
@@ -225,12 +234,12 @@ static void sh_mobile_ceu_capture(struct sh_mobile_ceu_dev *pcdev)
 	case V4L2_PIX_FMT_NV21:
 	case V4L2_PIX_FMT_NV16:
 	case V4L2_PIX_FMT_NV61:
-		phys_addr_top += icd->rect_current.width *
-			icd->rect_current.height;
+		phys_addr_top += icd->user_width *
+			icd->user_height;
 		ceu_write(pcdev, CDACR, phys_addr_top);
 		if (pcdev->is_interlaced) {
 			phys_addr_bottom = phys_addr_top +
-				icd->rect_current.width;
+				icd->user_width;
 			ceu_write(pcdev, CDBCR, phys_addr_bottom);
 		}
 	}
@@ -264,12 +273,12 @@ static int sh_mobile_ceu_videobuf_prepare(struct videobuf_queue *vq,
 	BUG_ON(NULL == icd->current_fmt);
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->rect_current.width ||
-	    vb->height	!= icd->rect_current.height ||
+	    vb->width	!= icd->user_width ||
+	    vb->height	!= icd->user_height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->rect_current.width;
-		vb->height	= icd->rect_current.height;
+		vb->width	= icd->user_width;
+		vb->height	= icd->user_height;
 		vb->field	= field;
 		vb->state	= VIDEOBUF_NEEDS_INIT;
 	}
@@ -451,18 +460,6 @@ static unsigned int size_dst(unsigned int src, unsigned int scale)
 		mant_pre * 4096 / scale + 1;
 }
 
-static unsigned int size_src(unsigned int dst, unsigned int scale)
-{
-	unsigned int mant_pre = scale >> 12, tmp;
-	if (!dst || !scale)
-		return dst;
-	for (tmp = ((dst - 1) * scale + 2048 * mant_pre) / 4096 + 1;
-	     size_dst(tmp, scale) < dst;
-	     tmp++)
-		;
-	return tmp;
-}
-
 static u16 calc_scale(unsigned int src, unsigned int *dst)
 {
 	u16 scale;
@@ -482,65 +479,46 @@ static u16 calc_scale(unsigned int src, unsigned int *dst)
 
 /* rect is guaranteed to not exceed the scaled camera rectangle */
 static void sh_mobile_ceu_set_rect(struct soc_camera_device *icd,
-				   struct v4l2_rect *rect)
+				   unsigned int out_width,
+				   unsigned int out_height)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct sh_mobile_ceu_cam *cam = icd->host_priv;
+	struct v4l2_rect *rect = &cam->ceu_rect;
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	int width, height, cfszr_width, cdwdr_width, in_width, in_height;
-	unsigned int left_offset, top_offset, left, top;
-	unsigned int hscale = pcdev->cflcr & 0xffff;
-	unsigned int vscale = (pcdev->cflcr >> 16) & 0xffff;
+	unsigned int height, width, cdwdr_width, in_width, in_height;
+	unsigned int left_offset, top_offset;
 	u32 camor;
 
-	/* Switch to the camera scale */
-	left = size_src(rect->left, hscale);
-	top = size_src(rect->top, vscale);
-
-	dev_dbg(icd->dev.parent, "Left %u * 0x%x = %u, top %u * 0x%x = %u\n",
-		rect->left, hscale, left, rect->top, vscale, top);
-
-	if (left > cam->camera_rect.left) {
-		left_offset = left - cam->camera_rect.left;
-	} else {
-		left_offset = 0;
-		left = cam->camera_rect.left;
-	}
-
-	if (top > cam->camera_rect.top) {
-		top_offset = top - cam->camera_rect.top;
-	} else {
-		top_offset = 0;
-		top = cam->camera_rect.top;
-	}
+	dev_dbg(icd->dev.parent, "Crop %ux%u@%u:%u\n",
+		rect->width, rect->height, rect->left, rect->top);
 
-	dev_dbg(icd->dev.parent, "New left %u, top %u, offsets %u:%u\n",
-		rect->left, rect->top, left_offset, top_offset);
+	left_offset	= rect->left;
+	top_offset	= rect->top;
 
 	if (pcdev->image_mode) {
-		width = rect->width;
-		in_width = cam->camera_rect.width;
+		in_width = rect->width;
 		if (!pcdev->is_16bit) {
-			width *= 2;
 			in_width *= 2;
 			left_offset *= 2;
 		}
-		cfszr_width = cdwdr_width = rect->width;
+		width = cdwdr_width = out_width;
 	} else {
 		unsigned int w_factor = (icd->current_fmt->depth + 7) >> 3;
+
+		width = out_width * w_factor / 2;
+
 		if (!pcdev->is_16bit)
 			w_factor *= 2;
 
-		width = rect->width * w_factor / 2;
-		in_width = cam->camera_rect.width * w_factor / 2;
+		in_width = rect->width * w_factor / 2;
 		left_offset = left_offset * w_factor / 2;
 
-		cfszr_width = pcdev->is_16bit ? width : width / 2;
-		cdwdr_width = pcdev->is_16bit ? width * 2 : width;
+		cdwdr_width = width * 2;
 	}
 
-	height = rect->height;
-	in_height = cam->camera_rect.height;
+	height = out_height;
+	in_height = rect->height;
 	if (pcdev->is_interlaced) {
 		height /= 2;
 		in_height /= 2;
@@ -548,10 +526,17 @@ static void sh_mobile_ceu_set_rect(struct soc_camera_device *icd,
 		cdwdr_width *= 2;
 	}
 
+	/* Set CAMOR, CAPWR, CFSZR, take care of CDWDR */
 	camor = left_offset | (top_offset << 16);
+
+	dev_geo(icd->dev.parent,
+		"CAMOR 0x%x, CAPWR 0x%x, CFSZR 0x%x, CDWDR 0x%x\n", camor,
+		(in_height << 16) | in_width, (height << 16) | width,
+		cdwdr_width);
+
 	ceu_write(pcdev, CAMOR, camor);
 	ceu_write(pcdev, CAPWR, (in_height << 16) | in_width);
-	ceu_write(pcdev, CFSZR, (height << 16) | cfszr_width);
+	ceu_write(pcdev, CFSZR, (height << 16) | width);
 	ceu_write(pcdev, CDWDR, cdwdr_width);
 }
 
@@ -663,8 +648,8 @@ static int sh_mobile_ceu_set_bus_param(struct soc_camera_device *icd,
 	ceu_write(pcdev, CAPCR, 0x00300000);
 	ceu_write(pcdev, CAIFR, pcdev->is_interlaced ? 0x101 : 0);
 
+	sh_mobile_ceu_set_rect(icd, icd->user_width, icd->user_height);
 	mdelay(1);
-	sh_mobile_ceu_set_rect(icd, &icd->rect_current);
 
 	ceu_write(pcdev, CFLCR, pcdev->cflcr);
 
@@ -687,11 +672,10 @@ static int sh_mobile_ceu_set_bus_param(struct soc_camera_device *icd,
 	ceu_write(pcdev, CDOCR, value);
 	ceu_write(pcdev, CFWCR, 0); /* keep "datafetch firewall" disabled */
 
-	dev_dbg(icd->dev.parent, "S_FMT successful for %c%c%c%c %ux%u@%u:%u\n",
+	dev_dbg(icd->dev.parent, "S_FMT successful for %c%c%c%c %ux%u\n",
 		pixfmt & 0xff, (pixfmt >> 8) & 0xff,
 		(pixfmt >> 16) & 0xff, (pixfmt >> 24) & 0xff,
-		icd->rect_current.width, icd->rect_current.height,
-		icd->rect_current.left, icd->rect_current.top);
+		icd->user_width, icd->user_height);
 
 	capture_restore(pcdev, capsr);
 
@@ -744,6 +728,7 @@ static const struct soc_camera_data_format sh_mobile_ceu_formats[] = {
 static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, int idx,
 				     struct soc_camera_format_xlate *xlate)
 {
+	struct device *dev = icd->dev.parent;
 	int ret, k, n;
 	int formats = 0;
 	struct sh_mobile_ceu_cam *cam;
@@ -758,7 +743,6 @@ static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, int idx,
 			return -ENOMEM;
 
 		icd->host_priv = cam;
-		cam->camera_max = icd->rect_max;
 	} else {
 		cam = icd->host_priv;
 	}
@@ -793,8 +777,7 @@ static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = icd->formats[idx].depth;
 			xlate++;
-			dev_dbg(icd->dev.parent,
-				"Providing format %s using %s\n",
+			dev_dbg(dev, "Providing format %s using %s\n",
 				sh_mobile_ceu_formats[k].name,
 				icd->formats[idx].name);
 		}
@@ -807,7 +790,7 @@ add_single_format:
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = icd->formats[idx].depth;
 			xlate++;
-			dev_dbg(icd->dev.parent,
+			dev_dbg(dev,
 				"Providing format %s in pass-through mode\n",
 				icd->formats[idx].name);
 		}
@@ -836,176 +819,487 @@ static bool is_inside(struct v4l2_rect *r1, struct v4l2_rect *r2)
 		r1->top + r1->height < r2->top + r2->height;
 }
 
+static unsigned int scale_down(unsigned int size, unsigned int scale)
+{
+	return (size * 4096 + scale / 2) / scale;
+}
+
+static unsigned int scale_up(unsigned int size, unsigned int scale)
+{
+	return (size * scale + 2048) / 4096;
+}
+
+static unsigned int calc_generic_scale(unsigned int input, unsigned int output)
+{
+	return (input * 4096 + output / 2) / output;
+}
+
+static int client_g_rect(struct v4l2_subdev *sd, struct v4l2_rect *rect)
+{
+	struct v4l2_crop crop;
+	struct v4l2_cropcap cap;
+	int ret;
+
+	crop.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = v4l2_subdev_call(sd, video, g_crop, &crop);
+	if (!ret) {
+		*rect = crop.c;
+		return ret;
+	}
+
+	/* Camera driver doesn't support .g_crop(), assume default rectangle */
+	cap.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = v4l2_subdev_call(sd, video, cropcap, &cap);
+	if (ret < 0)
+		return ret;
+
+	*rect = cap.defrect;
+
+	return ret;
+}
+
 /*
- * CEU can scale and crop, but we don't want to waste bandwidth and kill the
- * framerate by always requesting the maximum image from the client. For
- * cropping we also have to take care of the current scale. The common for both
- * scaling and cropping approach is:
+ * The common for both scaling and cropping iterative approach is:
  * 1. try if the client can produce exactly what requested by the user
  * 2. if (1) failed, try to double the client image until we get one big enough
  * 3. if (2) failed, try to request the maximum image
  */
-static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
-				  struct v4l2_crop *a)
+static int client_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *crop,
+			 struct v4l2_crop *cam_crop)
 {
-	struct v4l2_rect *rect = &a->c;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	struct v4l2_crop cam_crop;
-	struct v4l2_rect *cam_rect = &cam_crop.c, target, cam_max;
-	struct sh_mobile_ceu_cam *cam = icd->host_priv;
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	unsigned int hscale = pcdev->cflcr & 0xffff;
-	unsigned int vscale = (pcdev->cflcr >> 16) & 0xffff;
-	unsigned short width, height;
-	u32 capsr;
+	struct v4l2_rect *rect = &crop->c, *cam_rect = &cam_crop->c;
+	struct device *dev = sd->v4l2_dev->dev;
+	struct v4l2_cropcap cap;
 	int ret;
+	unsigned int width, height;
 
-	/* Scale back up into client units */
-	cam_rect->left	= size_src(rect->left, hscale);
-	cam_rect->width	= size_src(rect->width, hscale);
-	cam_rect->top	= size_src(rect->top, vscale);
-	cam_rect->height	= size_src(rect->height, vscale);
-
-	target = *cam_rect;
+	v4l2_subdev_call(sd, video, s_crop, crop);
+	ret = client_g_rect(sd, cam_rect);
+	if (ret < 0)
+		return ret;
 
-	capsr = capture_save_reset(pcdev);
-	dev_dbg(icd->dev.parent, "CAPSR 0x%x, CFLCR 0x%x\n",
-		capsr, pcdev->cflcr);
-
-	/* First attempt - see if the client can deliver a perfect result */
-	ret = v4l2_subdev_call(sd, video, s_crop, &cam_crop);
-	if (!ret && !memcmp(&target, &cam_rect, sizeof(target))) {
-		dev_dbg(icd->dev.parent,
-			"Camera S_CROP successful for %ux%u@%u:%u\n",
-			cam_rect->width, cam_rect->height,
-			cam_rect->left, cam_rect->top);
-		goto ceu_set_rect;
+	/*
+	 * Now cam_crop contains the current camera input rectangle, and it must
+	 * be within camera cropcap bounds
+	 */
+	if (!memcmp(rect, cam_rect, sizeof(*rect))) {
+		/* Even if camera S_CROP failed, but camera rectangle matches */
+		dev_dbg(dev, "Camera S_CROP successful for %ux%u@%u:%u\n",
+			rect->width, rect->height, rect->left, rect->top);
+		return 0;
 	}
 
-	/* Try to fix cropping, that camera hasn't managed to do */
-	dev_dbg(icd->dev.parent, "Fix camera S_CROP %d for %ux%u@%u:%u"
-		" to %ux%u@%u:%u\n",
-		ret, cam_rect->width, cam_rect->height,
+	/* Try to fix cropping, that camera hasn't managed to set */
+	dev_geo(dev, "Fix camera S_CROP for %ux%u@%u:%u to %ux%u@%u:%u\n",
+		cam_rect->width, cam_rect->height,
 		cam_rect->left, cam_rect->top,
-		target.width, target.height, target.left, target.top);
+		rect->width, rect->height, rect->left, rect->top);
+
+	/* We need sensor maximum rectangle */
+	ret = v4l2_subdev_call(sd, video, cropcap, &cap);
+	if (ret < 0)
+		return ret;
+
+	soc_camera_limit_side(&rect->left, &rect->width, cap.bounds.left, 2,
+			      cap.bounds.width);
+	soc_camera_limit_side(&rect->top, &rect->height, cap.bounds.top, 4,
+			      cap.bounds.height);
 
 	/*
 	 * Popular special case - some cameras can only handle fixed sizes like
 	 * QVGA, VGA,... Take care to avoid infinite loop.
 	 */
-	width = max(cam_rect->width, 1);
-	height = max(cam_rect->height, 1);
-	cam_max.width = size_src(icd->rect_max.width, hscale);
-	cam_max.left = size_src(icd->rect_max.left, hscale);
-	cam_max.height = size_src(icd->rect_max.height, vscale);
-	cam_max.top = size_src(icd->rect_max.top, vscale);
-	while (!ret && (is_smaller(cam_rect, &target) ||
-			is_inside(cam_rect, &target)) &&
-	       cam_max.width >= width && cam_max.height >= height) {
+	width = max(cam_rect->width, 2);
+	height = max(cam_rect->height, 2);
+
+	while (!ret && (is_smaller(cam_rect, rect) ||
+			is_inside(cam_rect, rect)) &&
+	       (cap.bounds.width > width || cap.bounds.height > height)) {
 
 		width *= 2;
 		height *= 2;
+
 		cam_rect->width = width;
 		cam_rect->height = height;
 
-		/* We do not know what the camera is capable of, play safe */
-		if (cam_rect->left > target.left)
-			cam_rect->left = cam_max.left;
+		/*
+		 * We do not know what capabilities the camera has to set up
+		 * left and top borders. We could try to be smarter in iterating
+		 * them, e.g., if camera current left is to the right of the
+		 * target left, set it to the middle point between the current
+		 * left and minimum left. But that would add too much
+		 * complexity: we would have to iterate each border separately.
+		 */
+		if (cam_rect->left > rect->left)
+			cam_rect->left = cap.bounds.left;
 
-		if (cam_rect->left + cam_rect->width < target.left + target.width)
-			cam_rect->width = target.left + target.width -
+		if (cam_rect->left + cam_rect->width < rect->left + rect->width)
+			cam_rect->width = rect->left + rect->width -
 				cam_rect->left;
 
-		if (cam_rect->top > target.top)
-			cam_rect->top = cam_max.top;
+		if (cam_rect->top > rect->top)
+			cam_rect->top = cap.bounds.top;
 
-		if (cam_rect->top + cam_rect->height < target.top + target.height)
-			cam_rect->height = target.top + target.height -
+		if (cam_rect->top + cam_rect->height < rect->top + rect->height)
+			cam_rect->height = rect->top + rect->height -
 				cam_rect->top;
 
-		if (cam_rect->width + cam_rect->left >
-		    cam_max.width + cam_max.left)
-			cam_rect->left = max(cam_max.width + cam_max.left -
-					     cam_rect->width, cam_max.left);
-
-		if (cam_rect->height + cam_rect->top >
-		    cam_max.height + cam_max.top)
-			cam_rect->top = max(cam_max.height + cam_max.top -
-					    cam_rect->height, cam_max.top);
-
-		ret = v4l2_subdev_call(sd, video, s_crop, &cam_crop);
-		dev_dbg(icd->dev.parent, "Camera S_CROP %d for %ux%u@%u:%u\n",
-			ret, cam_rect->width, cam_rect->height,
+		v4l2_subdev_call(sd, video, s_crop, cam_crop);
+		ret = client_g_rect(sd, cam_rect);
+		dev_geo(dev, "Camera S_CROP %d for %ux%u@%u:%u\n", ret,
+			cam_rect->width, cam_rect->height,
 			cam_rect->left, cam_rect->top);
 	}
 
-	/*
-	 * If the camera failed to configure cropping, it should not modify the
-	 * rectangle
-	 */
-	if ((ret < 0 && (is_smaller(&icd->rect_current, rect) ||
-			 is_inside(&icd->rect_current, rect))) ||
-	    is_smaller(cam_rect, &target) || is_inside(cam_rect, &target)) {
+	/* S_CROP must not modify the rectangle */
+	if (is_smaller(cam_rect, rect) || is_inside(cam_rect, rect)) {
 		/*
 		 * The camera failed to configure a suitable cropping,
 		 * we cannot use the current rectangle, set to max
 		 */
-		*cam_rect = cam_max;
-		ret = v4l2_subdev_call(sd, video, s_crop, &cam_crop);
-		dev_dbg(icd->dev.parent,
-			"Camera S_CROP %d for max %ux%u@%u:%u\n",
-			ret, cam_rect->width, cam_rect->height,
+		*cam_rect = cap.bounds;
+		v4l2_subdev_call(sd, video, s_crop, cam_crop);
+		ret = client_g_rect(sd, cam_rect);
+		dev_geo(dev, "Camera S_CROP %d for max %ux%u@%u:%u\n", ret,
+			cam_rect->width, cam_rect->height,
 			cam_rect->left, cam_rect->top);
-		if (ret < 0 && ret != -ENOIOCTLCMD)
-			/* All failed, hopefully resume current capture */
-			goto resume_capture;
-
-		/* Finally, adjust the target rectangle */
-		if (target.width > cam_rect->width)
-			target.width = cam_rect->width;
-		if (target.height > cam_rect->height)
-			target.height = cam_rect->height;
-		if (target.left + target.width > cam_rect->left + cam_rect->width)
-			target.left = cam_rect->left + cam_rect->width -
-				target.width;
-		if (target.top + target.height > cam_rect->top + cam_rect->height)
-			target.top = cam_rect->top + cam_rect->height -
-				target.height;
 	}
 
-	/* We now have a rectangle, larger than requested, let's crop */
+	return ret;
+}
+
+static int get_camera_scales(struct v4l2_subdev *sd, struct v4l2_rect *rect,
+			     unsigned int *scale_h, unsigned int *scale_v)
+{
+	struct v4l2_format f;
+	int ret;
+
+	f.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = v4l2_subdev_call(sd, video, g_fmt, &f);
+	if (ret < 0)
+		return ret;
+
+	*scale_h = calc_generic_scale(rect->width, f.fmt.pix.width);
+	*scale_v = calc_generic_scale(rect->height, f.fmt.pix.height);
+
+	return 0;
+}
+
+static int get_camera_subwin(struct soc_camera_device *icd,
+			     struct v4l2_rect *cam_subrect,
+			     unsigned int cam_hscale, unsigned int cam_vscale)
+{
+	struct sh_mobile_ceu_cam *cam = icd->host_priv;
+	struct v4l2_rect *ceu_rect = &cam->ceu_rect;
+
+	if (!ceu_rect->width) {
+		struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+		struct device *dev = icd->dev.parent;
+		struct v4l2_format f;
+		struct v4l2_pix_format *pix = &f.fmt.pix;
+		int ret;
+		/* First time */
+
+		f.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+		ret = v4l2_subdev_call(sd, video, g_fmt, &f);
+		if (ret < 0)
+			return ret;
+
+		dev_geo(dev, "camera fmt %ux%u\n", pix->width, pix->height);
+
+		if (pix->width > 2560) {
+			ceu_rect->width	 = 2560;
+			ceu_rect->left	 = (pix->width - 2560) / 2;
+		} else {
+			ceu_rect->width	 = pix->width;
+			ceu_rect->left	 = 0;
+		}
+
+		if (pix->height > 1920) {
+			ceu_rect->height = 1920;
+			ceu_rect->top	 = (pix->height - 1920) / 2;
+		} else {
+			ceu_rect->height = pix->height;
+			ceu_rect->top	 = 0;
+		}
+
+		dev_geo(dev, "initialised CEU rect %ux%u@%u:%u\n",
+			ceu_rect->width, ceu_rect->height,
+			ceu_rect->left, ceu_rect->top);
+	}
+
+	cam_subrect->width	= scale_up(ceu_rect->width, cam_hscale);
+	cam_subrect->left	= scale_up(ceu_rect->left, cam_hscale);
+	cam_subrect->height	= scale_up(ceu_rect->height, cam_vscale);
+	cam_subrect->top	= scale_up(ceu_rect->top, cam_vscale);
+
+	return 0;
+}
+
+static int client_s_fmt(struct soc_camera_device *icd, struct v4l2_format *f,
+			bool ceu_can_scale)
+{
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct device *dev = icd->dev.parent;
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+	unsigned int width = pix->width, height = pix->height, tmp_w, tmp_h;
+	unsigned int max_width, max_height;
+	struct v4l2_cropcap cap;
+	int ret;
+
+	cap.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = v4l2_subdev_call(sd, video, cropcap, &cap);
+	if (ret < 0)
+		return ret;
+
+	max_width = min(cap.bounds.width, 2560);
+	max_height = min(cap.bounds.height, 1920);
+
+	ret = v4l2_subdev_call(sd, video, s_fmt, f);
+	if (ret < 0)
+		return ret;
+
+	dev_geo(dev, "camera scaled to %ux%u\n", pix->width, pix->height);
+
+	if ((width == pix->width && height == pix->height) || !ceu_can_scale)
+		return 0;
+
+	/* Camera set a format, but geometry is not precise, try to improve */
+	tmp_w = pix->width;
+	tmp_h = pix->height;
+
+	/* width <= max_width && height <= max_height - guaranteed by try_fmt */
+	while ((width > tmp_w || height > tmp_h) &&
+	       tmp_w < max_width && tmp_h < max_height) {
+		tmp_w = min(2 * tmp_w, max_width);
+		tmp_h = min(2 * tmp_h, max_height);
+		pix->width = tmp_w;
+		pix->height = tmp_h;
+		ret = v4l2_subdev_call(sd, video, s_fmt, f);
+		dev_geo(dev, "Camera scaled to %ux%u\n",
+			pix->width, pix->height);
+		if (ret < 0) {
+			/* This shouldn't happen */
+			dev_err(dev, "Client failed to set format: %d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * @rect	- camera cropped rectangle
+ * @sub_rect	- CEU cropped rectangle, mapped back to camera input area
+ * @ceu_rect	- on output calculated CEU crop rectangle
+ */
+static int client_scale(struct soc_camera_device *icd, struct v4l2_rect *rect,
+			struct v4l2_rect *sub_rect, struct v4l2_rect *ceu_rect,
+			struct v4l2_format *f, bool ceu_can_scale)
+{
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct sh_mobile_ceu_cam *cam = icd->host_priv;
+	struct device *dev = icd->dev.parent;
+	struct v4l2_format f_tmp = *f;
+	struct v4l2_pix_format *pix_tmp = &f_tmp.fmt.pix;
+	unsigned int scale_h, scale_v;
+	int ret;
+
+	/* 5. Apply iterative camera S_FMT for camera user window. */
+	ret = client_s_fmt(icd, &f_tmp, ceu_can_scale);
+	if (ret < 0)
+		return ret;
+
+	dev_geo(dev, "5: camera scaled to %ux%u\n",
+		pix_tmp->width, pix_tmp->height);
+
+	/* 6. Retrieve camera output window (g_fmt) */
+
+	/* unneeded - it is already in "f_tmp" */
+
+	/* 7. Calculate new camera scales. */
+	ret = get_camera_scales(sd, rect, &scale_h, &scale_v);
+	if (ret < 0)
+		return ret;
+
+	dev_geo(dev, "7: camera scales %u:%u\n", scale_h, scale_v);
+
+	cam->cam_width		= pix_tmp->width;
+	cam->cam_height		= pix_tmp->height;
+	f->fmt.pix.width	= pix_tmp->width;
+	f->fmt.pix.height	= pix_tmp->height;
 
 	/*
-	 * We have to preserve camera rectangle between close() / open(),
-	 * because soc-camera core calls .set_fmt() on each first open() with
-	 * last before last close() _user_ rectangle, which can be different
-	 * from camera rectangle.
+	 * 8. Calculate new CEU crop - apply camera scales to previously
+	 *    calculated "effective" crop.
 	 */
-	dev_dbg(icd->dev.parent,
-		"SH S_CROP from %ux%u@%u:%u to %ux%u@%u:%u, scale to %ux%u@%u:%u\n",
-		cam_rect->width, cam_rect->height, cam_rect->left, cam_rect->top,
-		target.width, target.height, target.left, target.top,
-		rect->width, rect->height, rect->left, rect->top);
+	ceu_rect->left = scale_down(sub_rect->left, scale_h);
+	ceu_rect->width = scale_down(sub_rect->width, scale_h);
+	ceu_rect->top = scale_down(sub_rect->top, scale_v);
+	ceu_rect->height = scale_down(sub_rect->height, scale_v);
+
+	dev_geo(dev, "8: new CEU rect %ux%u@%u:%u\n",
+		ceu_rect->width, ceu_rect->height,
+		ceu_rect->left, ceu_rect->top);
+
+	return 0;
+}
+
+/* Get combined scales */
+static int get_scales(struct soc_camera_device *icd,
+		      unsigned int *scale_h, unsigned int *scale_v)
+{
+	struct sh_mobile_ceu_cam *cam = icd->host_priv;
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct v4l2_crop cam_crop;
+	unsigned int width_in, height_in;
+	int ret;
 
-	ret = 0;
+	cam_crop.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 
-ceu_set_rect:
-	cam->camera_rect = *cam_rect;
+	ret = client_g_rect(sd, &cam_crop.c);
+	if (ret < 0)
+		return ret;
 
-	rect->width	= size_dst(target.width, hscale);
-	rect->left	= size_dst(target.left, hscale);
-	rect->height	= size_dst(target.height, vscale);
-	rect->top	= size_dst(target.top, vscale);
+	ret = get_camera_scales(sd, &cam_crop.c, scale_h, scale_v);
+	if (ret < 0)
+		return ret;
 
-	sh_mobile_ceu_set_rect(icd, rect);
+	width_in = scale_up(cam->ceu_rect.width, *scale_h);
+	height_in = scale_up(cam->ceu_rect.height, *scale_v);
 
-resume_capture:
-	/* Set CAMOR, CAPWR, CFSZR, take care of CDWDR */
+	*scale_h = calc_generic_scale(cam->ceu_rect.width, icd->user_width);
+	*scale_v = calc_generic_scale(cam->ceu_rect.height, icd->user_height);
+
+	return 0;
+}
+
+/*
+ * CEU can scale and crop, but we don't want to waste bandwidth and kill the
+ * framerate by always requesting the maximum image from the client. See
+ * Documentation/video4linux/sh_mobile_camera_ceu.txt for a description of
+ * scaling and cropping algorithms and for the meaning of referenced here steps.
+ */
+static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
+				  struct v4l2_crop *a)
+{
+	struct v4l2_rect *rect = &a->c;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct sh_mobile_ceu_dev *pcdev = ici->priv;
+	struct v4l2_crop cam_crop;
+	struct sh_mobile_ceu_cam *cam = icd->host_priv;
+	struct v4l2_rect *cam_rect = &cam_crop.c, *ceu_rect = &cam->ceu_rect;
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct device *dev = icd->dev.parent;
+	struct v4l2_format f;
+	struct v4l2_pix_format *pix = &f.fmt.pix;
+	unsigned int scale_comb_h, scale_comb_v, scale_ceu_h, scale_ceu_v,
+		out_width, out_height;
+	u32 capsr, cflcr;
+	int ret;
+
+	/* 1. Calculate current combined scales. */
+	ret = get_scales(icd, &scale_comb_h, &scale_comb_v);
+	if (ret < 0)
+		return ret;
+
+	dev_geo(dev, "1: combined scales %u:%u\n", scale_comb_h, scale_comb_v);
+
+	/* 2. Apply iterative camera S_CROP for new input window. */
+	ret = client_s_crop(sd, a, &cam_crop);
+	if (ret < 0)
+		return ret;
+
+	dev_geo(dev, "2: camera cropped to %ux%u@%u:%u\n",
+		cam_rect->width, cam_rect->height,
+		cam_rect->left, cam_rect->top);
+
+	/* On success cam_crop contains current camera crop */
+
+	/*
+	 * 3. If old combined scales applied to new crop produce an impossible
+	 *    user window, adjust scales to produce nearest possible window.
+	 */
+	out_width	= scale_down(rect->width, scale_comb_h);
+	out_height	= scale_down(rect->height, scale_comb_v);
+
+	if (out_width > 2560)
+		out_width = 2560;
+	else if (out_width < 2)
+		out_width = 2;
+
+	if (out_height > 1920)
+		out_height = 1920;
+	else if (out_height < 4)
+		out_height = 4;
+
+	dev_geo(dev, "3: Adjusted output %ux%u\n", out_width, out_height);
+
+	/* 4. Use G_CROP to retrieve actual input window: already in cam_crop */
+
+	/*
+	 * 5. Using actual input window and calculated combined scales calculate
+	 *    camera target output window.
+	 */
+	pix->width		= scale_down(cam_rect->width, scale_comb_h);
+	pix->height		= scale_down(cam_rect->height, scale_comb_v);
+
+	dev_geo(dev, "5: camera target %ux%u\n", pix->width, pix->height);
+
+	/* 6. - 9. */
+	pix->pixelformat	= cam->camera_fmt->fourcc;
+	pix->colorspace		= cam->camera_fmt->colorspace;
+
+	capsr = capture_save_reset(pcdev);
+	dev_dbg(dev, "CAPSR 0x%x, CFLCR 0x%x\n", capsr, pcdev->cflcr);
+
+	/* Make relative to camera rectangle */
+	rect->left		-= cam_rect->left;
+	rect->top		-= cam_rect->top;
+
+	f.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = client_scale(icd, cam_rect, rect, ceu_rect, &f,
+			   pcdev->image_mode && !pcdev->is_interlaced);
+
+	dev_geo(dev, "6-9: %d\n", ret);
+
+	/* 10. Use CEU cropping to crop to the new window. */
+	sh_mobile_ceu_set_rect(icd, out_width, out_height);
+
+	dev_geo(dev, "10: CEU cropped to %ux%u@%u:%u\n",
+		ceu_rect->width, ceu_rect->height,
+		ceu_rect->left, ceu_rect->top);
+
+	/*
+	 * 11. Calculate CEU scales from camera scales from results of (10) and
+	 *     user window from (3)
+	 */
+	scale_ceu_h = calc_scale(ceu_rect->width, &out_width);
+	scale_ceu_v = calc_scale(ceu_rect->height, &out_height);
+
+	dev_geo(dev, "11: CEU scales %u:%u\n", scale_ceu_h, scale_ceu_v);
+
+	/* 12. Apply CEU scales. */
+	cflcr = scale_ceu_h | (scale_ceu_v << 16);
+	if (cflcr != pcdev->cflcr) {
+		pcdev->cflcr = cflcr;
+		ceu_write(pcdev, CFLCR, cflcr);
+	}
+
+	/* Restore capture */
 	if (pcdev->active)
 		capsr |= 1;
 	capture_restore(pcdev, capsr);
 
+	icd->user_width = out_width;
+	icd->user_height = out_height;
+
 	/* Even if only camera cropping succeeded */
 	return ret;
 }
@@ -1018,121 +1312,137 @@ static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
 	struct sh_mobile_ceu_cam *cam = icd->host_priv;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
+	struct v4l2_format cam_f = *f;
+	struct v4l2_pix_format *cam_pix = &cam_f.fmt.pix;
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct device *dev = icd->dev.parent;
 	__u32 pixfmt = pix->pixelformat;
 	const struct soc_camera_format_xlate *xlate;
-	unsigned int width = pix->width, height = pix->height, tmp_w, tmp_h;
-	u16 vscale, hscale;
-	int ret, is_interlaced;
+	struct v4l2_crop cam_crop;
+	struct v4l2_rect *cam_rect = &cam_crop.c, cam_subrect, ceu_rect;
+	unsigned int scale_cam_h, scale_cam_v;
+	u16 scale_v, scale_h;
+	int ret;
+	bool is_interlaced, image_mode;
 
 	switch (pix->field) {
 	case V4L2_FIELD_INTERLACED:
-		is_interlaced = 1;
+		is_interlaced = true;
 		break;
 	case V4L2_FIELD_ANY:
 	default:
 		pix->field = V4L2_FIELD_NONE;
 		/* fall-through */
 	case V4L2_FIELD_NONE:
-		is_interlaced = 0;
+		is_interlaced = false;
 		break;
 	}
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(icd->dev.parent, "Format %x not found\n", pixfmt);
+		dev_warn(dev, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
-	pix->pixelformat = xlate->cam_fmt->fourcc;
-	ret = v4l2_subdev_call(sd, video, s_fmt, f);
-	pix->pixelformat = pixfmt;
-	dev_dbg(icd->dev.parent,
-		"Camera %d fmt %ux%u, requested %ux%u, max %ux%u\n",
-		ret, pix->width, pix->height, width, height,
-		icd->rect_max.width, icd->rect_max.height);
+	/* 1. Calculate current camera scales. */
+	cam_crop.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = client_g_rect(sd, cam_rect);
+	if (ret < 0)
+		return ret;
+
+	ret = get_camera_scales(sd, cam_rect, &scale_cam_h, &scale_cam_v);
+	if (ret < 0)
+		return ret;
+
+	dev_geo(dev, "1: camera scales %u:%u\n", scale_cam_h, scale_cam_v);
+
+	/*
+	 * 2. Calculate "effective" input crop (sensor subwindow) - CEU crop
+	 *    scaled back at current camera scales onto input window.
+	 */
+	ret = get_camera_subwin(icd, &cam_subrect, scale_cam_h, scale_cam_v);
 	if (ret < 0)
 		return ret;
 
+	dev_geo(dev, "2: subwin %ux%u@%u:%u\n",
+		cam_subrect.width, cam_subrect.height,
+		cam_subrect.left, cam_subrect.top);
+
+	/*
+	 * 3. Calculate new combined scales from "effective" input window to
+	 *    requested user window.
+	 */
+	scale_h = calc_generic_scale(cam_subrect.width, pix->width);
+	scale_v = calc_generic_scale(cam_subrect.height, pix->height);
+
+	dev_geo(dev, "3: scales %u:%u\n", scale_h, scale_v);
+
+	/*
+	 * 4. Calculate camera output window by applying combined scales to real
+	 *    input window.
+	 */
+	cam_pix->width = scale_down(cam_rect->width, scale_h);
+	cam_pix->height = scale_down(cam_rect->height, scale_v);
+	cam_pix->pixelformat = xlate->cam_fmt->fourcc;
+
 	switch (pixfmt) {
 	case V4L2_PIX_FMT_NV12:
 	case V4L2_PIX_FMT_NV21:
 	case V4L2_PIX_FMT_NV16:
 	case V4L2_PIX_FMT_NV61:
-		pcdev->image_mode = 1;
+		image_mode = true;
 		break;
 	default:
-		pcdev->image_mode = 0;
+		image_mode = false;
 	}
 
-	if ((abs(width - pix->width) < 4 && abs(height - pix->height) < 4) ||
-	    !pcdev->image_mode || is_interlaced) {
-		hscale = 0;
-		vscale = 0;
-		goto out;
-	}
+	dev_geo(dev, "4: camera output %ux%u\n",
+		cam_pix->width, cam_pix->height);
 
-	/* Camera set a format, but geometry is not precise, try to improve */
-	/*
-	 * FIXME: when soc-camera is converted to implement traditional S_FMT
-	 * and S_CROP semantics, replace CEU limits with camera maxima
-	 */
-	tmp_w = pix->width;
-	tmp_h = pix->height;
-	while ((width > tmp_w || height > tmp_h) &&
-	       tmp_w < 2560 && tmp_h < 1920) {
-		tmp_w = min(2 * tmp_w, (__u32)2560);
-		tmp_h = min(2 * tmp_h, (__u32)1920);
-		pix->width = tmp_w;
-		pix->height = tmp_h;
-		pix->pixelformat = xlate->cam_fmt->fourcc;
-		ret = v4l2_subdev_call(sd, video, s_fmt, f);
-		pix->pixelformat = pixfmt;
-		dev_dbg(icd->dev.parent, "Camera scaled to %ux%u\n",
-			pix->width, pix->height);
-		if (ret < 0) {
-			/* This shouldn't happen */
-			dev_err(icd->dev.parent,
-				"Client failed to set format: %d\n", ret);
-			return ret;
-		}
-	}
+	/* 5. - 9. */
+	ret = client_scale(icd, cam_rect, &cam_subrect, &ceu_rect, &cam_f,
+			   image_mode && !is_interlaced);
+
+	dev_geo(dev, "5-9: client scale %d\n", ret);
+
+	/* Done with the camera. Now see if we can improve the result */
+
+	dev_dbg(dev, "Camera %d fmt %ux%u, requested %ux%u\n",
+		ret, cam_pix->width, cam_pix->height, pix->width, pix->height);
+	if (ret < 0)
+		return ret;
+
+	/* 10. Use CEU scaling to scale to the requested user window. */
 
 	/* We cannot scale up */
-	if (width > pix->width)
-		width = pix->width;
+	if (pix->width > cam_pix->width)
+		pix->width = cam_pix->width;
+	if (pix->width > ceu_rect.width)
+		pix->width = ceu_rect.width;
 
-	if (height > pix->height)
-		height = pix->height;
+	if (pix->height > cam_pix->height)
+		pix->height = cam_pix->height;
+	if (pix->height > ceu_rect.height)
+		pix->height = ceu_rect.height;
 
 	/* Let's rock: scale pix->{width x height} down to width x height */
-	hscale = calc_scale(pix->width, &width);
-	vscale = calc_scale(pix->height, &height);
+	scale_h = calc_scale(ceu_rect.width, &pix->width);
+	scale_v = calc_scale(ceu_rect.height, &pix->height);
 
-	dev_dbg(icd->dev.parent, "W: %u : 0x%x = %u, H: %u : 0x%x = %u\n",
-		pix->width, hscale, width, pix->height, vscale, height);
+	dev_geo(dev, "10: W: %u : 0x%x = %u, H: %u : 0x%x = %u\n",
+		ceu_rect.width, scale_h, pix->width,
+		ceu_rect.height, scale_v, pix->height);
 
-out:
-	pcdev->cflcr = hscale | (vscale << 16);
+	pcdev->cflcr = scale_h | (scale_v << 16);
 
 	icd->buswidth = xlate->buswidth;
 	icd->current_fmt = xlate->host_fmt;
 	cam->camera_fmt = xlate->cam_fmt;
-	cam->camera_rect.width = pix->width;
-	cam->camera_rect.height = pix->height;
-
-	icd->rect_max.left = size_dst(cam->camera_max.left, hscale);
-	icd->rect_max.width = size_dst(cam->camera_max.width, hscale);
-	icd->rect_max.top = size_dst(cam->camera_max.top, vscale);
-	icd->rect_max.height = size_dst(cam->camera_max.height, vscale);
-
-	icd->rect_current.left = icd->rect_max.left;
-	icd->rect_current.top = icd->rect_max.top;
+	cam->ceu_rect = ceu_rect;
 
 	pcdev->is_interlaced = is_interlaced;
-
-	pix->width = width;
-	pix->height = height;
+	pcdev->image_mode = image_mode;
 
 	return 0;
 }
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index c6cccdf8daf5..86e0648f65a0 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -278,6 +278,9 @@ static void soc_camera_free_user_formats(struct soc_camera_device *icd)
 	icd->user_formats = NULL;
 }
 
+#define pixfmtstr(x) (x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, \
+	((x) >> 24) & 0xff
+
 /* Called with .vb_lock held */
 static int soc_camera_set_fmt(struct soc_camera_file *icf,
 			      struct v4l2_format *f)
@@ -287,6 +290,9 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	int ret;
 
+	dev_dbg(&icd->dev, "S_FMT(%c%c%c%c, %ux%u)\n",
+		pixfmtstr(pix->pixelformat), pix->width, pix->height);
+
 	/* We always call try_fmt() before set_fmt() or set_crop() */
 	ret = ici->ops->try_fmt(icd, f);
 	if (ret < 0)
@@ -302,17 +308,17 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 		return -EINVAL;
 	}
 
-	icd->rect_current.width		= pix->width;
-	icd->rect_current.height	= pix->height;
-	icf->vb_vidq.field		=
-		icd->field		= pix->field;
+	icd->user_width		= pix->width;
+	icd->user_height	= pix->height;
+	icf->vb_vidq.field	=
+		icd->field	= pix->field;
 
 	if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		dev_warn(&icd->dev, "Attention! Wrong buf-type %d\n",
 			 f->type);
 
 	dev_dbg(&icd->dev, "set width: %d height: %d\n",
-		icd->rect_current.width, icd->rect_current.height);
+		icd->user_width, icd->user_height);
 
 	/* set physical bus parameters */
 	return ici->ops->set_bus_param(icd, pix->pixelformat);
@@ -355,8 +361,8 @@ static int soc_camera_open(struct file *file)
 		struct v4l2_format f = {
 			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			.fmt.pix = {
-				.width		= icd->rect_current.width,
-				.height		= icd->rect_current.height,
+				.width		= icd->user_width,
+				.height		= icd->user_height,
 				.field		= icd->field,
 				.pixelformat	= icd->current_fmt->fourcc,
 				.colorspace	= icd->current_fmt->colorspace,
@@ -557,8 +563,8 @@ static int soc_camera_g_fmt_vid_cap(struct file *file, void *priv,
 
 	WARN_ON(priv != file->private_data);
 
-	pix->width		= icd->rect_current.width;
-	pix->height		= icd->rect_current.height;
+	pix->width		= icd->user_width;
+	pix->height		= icd->user_height;
 	pix->field		= icf->vb_vidq.field;
 	pix->pixelformat	= icd->current_fmt->fourcc;
 	pix->bytesperline	= pix->width *
@@ -722,17 +728,9 @@ static int soc_camera_cropcap(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
-	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	a->bounds			= icd->rect_max;
-	a->defrect.left			= icd->rect_max.left;
-	a->defrect.top			= icd->rect_max.top;
-	a->defrect.width		= DEFAULT_WIDTH;
-	a->defrect.height		= DEFAULT_HEIGHT;
-	a->pixelaspect.numerator	= 1;
-	a->pixelaspect.denominator	= 1;
-
-	return 0;
+	return ici->ops->cropcap(icd, a);
 }
 
 static int soc_camera_g_crop(struct file *file, void *fh,
@@ -740,11 +738,14 @@ static int soc_camera_g_crop(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	int ret;
 
-	a->type	= V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	a->c	= icd->rect_current;
+	mutex_lock(&icf->vb_vidq.vb_lock);
+	ret = ici->ops->get_crop(icd, a);
+	mutex_unlock(&icf->vb_vidq.vb_lock);
 
-	return 0;
+	return ret;
 }
 
 /*
@@ -759,49 +760,33 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
-	struct v4l2_rect rect = a->c;
+	struct v4l2_rect *rect = &a->c;
+	struct v4l2_crop current_crop;
 	int ret;
 
 	if (a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
+	dev_dbg(&icd->dev, "S_CROP(%ux%u@%u:%u)\n",
+		rect->width, rect->height, rect->left, rect->top);
+
 	/* Cropping is allowed during a running capture, guard consistency */
 	mutex_lock(&icf->vb_vidq.vb_lock);
 
+	/* If get_crop fails, we'll let host and / or client drivers decide */
+	ret = ici->ops->get_crop(icd, &current_crop);
+
 	/* Prohibit window size change with initialised buffers */
-	if (icf->vb_vidq.bufs[0] && (rect.width != icd->rect_current.width ||
-				     rect.height != icd->rect_current.height)) {
+	if (icf->vb_vidq.bufs[0] && !ret &&
+	    (a->c.width != current_crop.c.width ||
+	     a->c.height != current_crop.c.height)) {
 		dev_err(&icd->dev,
 			"S_CROP denied: queue initialised and sizes differ\n");
 		ret = -EBUSY;
-		goto unlock;
+	} else {
+		ret = ici->ops->set_crop(icd, a);
 	}
 
-	if (rect.width > icd->rect_max.width)
-		rect.width = icd->rect_max.width;
-
-	if (rect.width < icd->width_min)
-		rect.width = icd->width_min;
-
-	if (rect.height > icd->rect_max.height)
-		rect.height = icd->rect_max.height;
-
-	if (rect.height < icd->height_min)
-		rect.height = icd->height_min;
-
-	if (rect.width + rect.left > icd->rect_max.width + icd->rect_max.left)
-		rect.left = icd->rect_max.width + icd->rect_max.left -
-			rect.width;
-
-	if (rect.height + rect.top > icd->rect_max.height + icd->rect_max.top)
-		rect.top = icd->rect_max.height + icd->rect_max.top -
-			rect.height;
-
-	ret = ici->ops->set_crop(icd, a);
-	if (!ret)
-		icd->rect_current = rect;
-
-unlock:
 	mutex_unlock(&icf->vb_vidq.vb_lock);
 
 	return ret;
@@ -926,6 +911,8 @@ static int soc_camera_probe(struct device *dev)
 	struct soc_camera_host *ici = to_soc_camera_host(dev->parent);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	struct device *control = NULL;
+	struct v4l2_subdev *sd;
+	struct v4l2_format f = {.type = V4L2_BUF_TYPE_VIDEO_CAPTURE};
 	int ret;
 
 	dev_info(dev, "Probing %s\n", dev_name(dev));
@@ -982,7 +969,6 @@ static int soc_camera_probe(struct device *dev)
 	if (ret < 0)
 		goto eiufmt;
 
-	icd->rect_current = icd->rect_max;
 	icd->field = V4L2_FIELD_ANY;
 
 	/* ..._video_start() will create a device node, so we have to protect */
@@ -992,9 +978,15 @@ static int soc_camera_probe(struct device *dev)
 	if (ret < 0)
 		goto evidstart;
 
+	/* Try to improve our guess of a reasonable window format */
+	sd = soc_camera_to_subdev(icd);
+	if (!v4l2_subdev_call(sd, video, g_fmt, &f)) {
+		icd->user_width		= f.fmt.pix.width;
+		icd->user_height	= f.fmt.pix.height;
+	}
+
 	/* Do we have to sysfs_remove_link() before device_unregister()? */
-	if (to_soc_camera_control(icd) &&
-	    sysfs_create_link(&icd->dev.kobj, &to_soc_camera_control(icd)->kobj,
+	if (sysfs_create_link(&icd->dev.kobj, &to_soc_camera_control(icd)->kobj,
 			      "control"))
 		dev_warn(&icd->dev, "Failed creating the control symlink\n");
 
@@ -1103,6 +1095,25 @@ static void dummy_release(struct device *dev)
 {
 }
 
+static int default_cropcap(struct soc_camera_device *icd,
+			   struct v4l2_cropcap *a)
+{
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	return v4l2_subdev_call(sd, video, cropcap, a);
+}
+
+static int default_g_crop(struct soc_camera_device *icd, struct v4l2_crop *a)
+{
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	return v4l2_subdev_call(sd, video, g_crop, a);
+}
+
+static int default_s_crop(struct soc_camera_device *icd, struct v4l2_crop *a)
+{
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	return v4l2_subdev_call(sd, video, s_crop, a);
+}
+
 int soc_camera_host_register(struct soc_camera_host *ici)
 {
 	struct soc_camera_host *ix;
@@ -1111,7 +1122,6 @@ int soc_camera_host_register(struct soc_camera_host *ici)
 	if (!ici || !ici->ops ||
 	    !ici->ops->try_fmt ||
 	    !ici->ops->set_fmt ||
-	    !ici->ops->set_crop ||
 	    !ici->ops->set_bus_param ||
 	    !ici->ops->querycap ||
 	    !ici->ops->init_videobuf ||
@@ -1122,6 +1132,13 @@ int soc_camera_host_register(struct soc_camera_host *ici)
 	    !ici->v4l2_dev.dev)
 		return -EINVAL;
 
+	if (!ici->ops->set_crop)
+		ici->ops->set_crop = default_s_crop;
+	if (!ici->ops->get_crop)
+		ici->ops->get_crop = default_g_crop;
+	if (!ici->ops->cropcap)
+		ici->ops->cropcap = default_cropcap;
+
 	mutex_lock(&list_lock);
 	list_for_each_entry(ix, &hosts, list) {
 		if (ix->nr == ici->nr) {
@@ -1321,6 +1338,9 @@ static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto escdevreg;
 
+	icd->user_width		= DEFAULT_WIDTH;
+	icd->user_height	= DEFAULT_HEIGHT;
+
 	return 0;
 
 escdevreg:
diff --git a/drivers/media/video/soc_camera_platform.c b/drivers/media/video/soc_camera_platform.c
index aec2cadbd2ee..3825c358172f 100644
--- a/drivers/media/video/soc_camera_platform.c
+++ b/drivers/media/video/soc_camera_platform.c
@@ -127,10 +127,6 @@ static int soc_camera_platform_probe(struct platform_device *pdev)
 	/* Set the control device reference */
 	dev_set_drvdata(&icd->dev, &pdev->dev);
 
-	icd->width_min		= 0;
-	icd->rect_max.width	= p->format.width;
-	icd->height_min		= 0;
-	icd->rect_max.height	= p->format.height;
 	icd->y_skip_top		= 0;
 	icd->ops		= &soc_camera_platform_ops;
 
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index 94bd5b09f057..fbf4130dfc5d 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -715,8 +715,88 @@ tw9910_set_fmt_error:
 	return ret;
 }
 
+static int tw9910_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	struct i2c_client *client = sd->priv;
+	struct tw9910_priv *priv = to_tw9910(client);
+
+	if (!priv->scale) {
+		int ret;
+		struct v4l2_crop crop = {
+			.c = {
+				.left	= 0,
+				.top	= 0,
+				.width	= 640,
+				.height	= 480,
+			},
+		};
+		ret = tw9910_s_crop(sd, &crop);
+		if (ret < 0)
+			return ret;
+	}
+
+	a->c.left	= 0;
+	a->c.top	= 0;
+	a->c.width	= priv->scale->width;
+	a->c.height	= priv->scale->height;
+	a->type		= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	return 0;
+}
+
+static int tw9910_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a)
+{
+	a->bounds.left			= 0;
+	a->bounds.top			= 0;
+	a->bounds.width			= 768;
+	a->bounds.height		= 576;
+	a->defrect.left			= 0;
+	a->defrect.top			= 0;
+	a->defrect.width		= 640;
+	a->defrect.height		= 480;
+	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->pixelaspect.numerator	= 1;
+	a->pixelaspect.denominator	= 1;
+
+	return 0;
+}
+
+static int tw9910_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
+{
+	struct i2c_client *client = sd->priv;
+	struct tw9910_priv *priv = to_tw9910(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+
+	if (!priv->scale) {
+		int ret;
+		struct v4l2_crop crop = {
+			.c = {
+				.left	= 0,
+				.top	= 0,
+				.width	= 640,
+				.height	= 480,
+			},
+		};
+		ret = tw9910_s_crop(sd, &crop);
+		if (ret < 0)
+			return ret;
+	}
+
+	f->type			= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	pix->width		= priv->scale->width;
+	pix->height		= priv->scale->height;
+	pix->pixelformat	= V4L2_PIX_FMT_VYUY;
+	pix->colorspace		= V4L2_COLORSPACE_SMPTE170M;
+	pix->field		= V4L2_FIELD_INTERLACED;
+
+	return 0;
+}
+
 static int tw9910_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
+	struct tw9910_priv *priv = to_tw9910(client);
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	/* See tw9910_s_crop() - no proper cropping support */
 	struct v4l2_crop a = {
@@ -741,8 +821,8 @@ static int tw9910_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 
 	ret = tw9910_s_crop(sd, &a);
 	if (!ret) {
-		pix->width = a.c.width;
-		pix->height = a.c.height;
+		pix->width = priv->scale->width;
+		pix->height = priv->scale->height;
 	}
 	return ret;
 }
@@ -838,8 +918,11 @@ static struct v4l2_subdev_core_ops tw9910_subdev_core_ops = {
 
 static struct v4l2_subdev_video_ops tw9910_subdev_video_ops = {
 	.s_stream	= tw9910_s_stream,
+	.g_fmt		= tw9910_g_fmt,
 	.s_fmt		= tw9910_s_fmt,
 	.try_fmt	= tw9910_try_fmt,
+	.cropcap	= tw9910_cropcap,
+	.g_crop		= tw9910_g_crop,
 	.s_crop		= tw9910_s_crop,
 };
 
@@ -852,20 +935,6 @@ static struct v4l2_subdev_ops tw9910_subdev_ops = {
  * i2c_driver function
  */
 
-/* This is called during probe, so, setting rect_max is Ok here: scale == 1 */
-static void limit_to_scale(struct soc_camera_device *icd,
-			   const struct tw9910_scale_ctrl *scale)
-{
-	if (scale->width > icd->rect_max.width)
-		icd->rect_max.width  = scale->width;
-	if (scale->width < icd->width_min)
-		icd->width_min = scale->width;
-	if (scale->height > icd->rect_max.height)
-		icd->rect_max.height = scale->height;
-	if (scale->height < icd->height_min)
-		icd->height_min = scale->height;
-}
-
 static int tw9910_probe(struct i2c_client *client,
 			const struct i2c_device_id *did)
 
@@ -876,8 +945,7 @@ static int tw9910_probe(struct i2c_client *client,
 	struct i2c_adapter             *adapter =
 		to_i2c_adapter(client->dev.parent);
 	struct soc_camera_link         *icl;
-	const struct tw9910_scale_ctrl *scale;
-	int                             i, ret;
+	int                             ret;
 
 	if (!icd) {
 		dev_err(&client->dev, "TW9910: missing soc-camera data!\n");
@@ -908,22 +976,6 @@ static int tw9910_probe(struct i2c_client *client,
 	icd->ops     = &tw9910_ops;
 	icd->iface   = info->link.bus_id;
 
-	/*
-	 * set width and height
-	 */
-	icd->rect_max.width  = tw9910_ntsc_scales[0].width; /* set default */
-	icd->width_min  = tw9910_ntsc_scales[0].width;
-	icd->rect_max.height = tw9910_ntsc_scales[0].height;
-	icd->height_min = tw9910_ntsc_scales[0].height;
-
-	scale = tw9910_ntsc_scales;
-	for (i = 0; i < ARRAY_SIZE(tw9910_ntsc_scales); i++)
-		limit_to_scale(icd, scale + i);
-
-	scale = tw9910_pal_scales;
-	for (i = 0; i < ARRAY_SIZE(tw9910_pal_scales); i++)
-		limit_to_scale(icd, scale + i);
-
 	ret = tw9910_video_probe(icd, client);
 	if (ret) {
 		icd->ops = NULL;
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 344d89904774..3185e8daaa0a 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -22,8 +22,8 @@ struct soc_camera_device {
 	struct list_head list;
 	struct device dev;
 	struct device *pdev;		/* Platform device */
-	struct v4l2_rect rect_current;	/* Current window */
-	struct v4l2_rect rect_max;	/* Maximum window */
+	s32 user_width;
+	s32 user_height;
 	unsigned short width_min;
 	unsigned short height_min;
 	unsigned short y_skip_top;	/* Lines to skip at the top */
@@ -76,6 +76,8 @@ struct soc_camera_host_ops {
 	int (*get_formats)(struct soc_camera_device *, int,
 			   struct soc_camera_format_xlate *);
 	void (*put_formats)(struct soc_camera_device *);
+	int (*cropcap)(struct soc_camera_device *, struct v4l2_cropcap *);
+	int (*get_crop)(struct soc_camera_device *, struct v4l2_crop *);
 	int (*set_crop)(struct soc_camera_device *, struct v4l2_crop *);
 	int (*set_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	int (*try_fmt)(struct soc_camera_device *, struct v4l2_format *);
@@ -277,6 +279,21 @@ static inline unsigned long soc_camera_bus_param_compatible(
 		common_flags;
 }
 
+static inline void soc_camera_limit_side(unsigned int *start,
+		unsigned int *length, unsigned int start_min,
+		unsigned int length_min, unsigned int length_max)
+{
+	if (*length < length_min)
+		*length = length_min;
+	else if (*length > length_max)
+		*length = length_max;
+
+	if (*start < start_min)
+		*start = start_min;
+	else if (*start > start_min + length_max - *length)
+		*start = start_min + length_max - *length;
+}
+
 extern unsigned long soc_camera_apply_sensor_flags(struct soc_camera_link *icl,
 						   unsigned long flags);
 
-- 
cgit v1.2.3


From a4c56fd8892e51d675f7665ddee4fd9d7e5c2cc3 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:53:23 -0300
Subject: V4L/DVB (12535): soc-camera: remove .init() and .release() methods
 from struct soc_camera_ops

Remove unneeded soc-camera operations, this also makes the soc-camera API to
v4l2 subdevices thinner.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c    | 22 +++++++---------------
 drivers/media/video/mt9m111.c    | 40 ++++++++--------------------------------
 drivers/media/video/mt9t031.c    | 23 ++++++-----------------
 drivers/media/video/mt9v022.c    |  8 +++++---
 drivers/media/video/soc_camera.c | 11 -----------
 include/media/soc_camera.h       |  4 ----
 6 files changed, 26 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index e8cf56189ef1..4b394798a293 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -122,9 +122,8 @@ static int reg_clear(struct i2c_client *client, const u8 reg,
 	return reg_write(client, reg, ret & ~data);
 }
 
-static int mt9m001_init(struct soc_camera_device *icd)
+static int mt9m001_init(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int ret;
 
 	dev_dbg(&client->dev, "%s\n", __func__);
@@ -144,16 +143,6 @@ static int mt9m001_init(struct soc_camera_device *icd)
 	return ret;
 }
 
-static int mt9m001_release(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-
-	/* Disable the chip */
-	reg_write(client, MT9M001_OUTPUT_CONTROL, 0);
-
-	return 0;
-}
-
 static int mt9m001_s_stream(struct v4l2_subdev *sd, int enable)
 {
 	struct i2c_client *client = sd->priv;
@@ -446,8 +435,6 @@ static const struct v4l2_queryctrl mt9m001_controls[] = {
 };
 
 static struct soc_camera_ops mt9m001_ops = {
-	.init			= mt9m001_init,
-	.release		= mt9m001_release,
 	.set_bus_param		= mt9m001_set_bus_param,
 	.query_bus_param	= mt9m001_query_bus_param,
 	.controls		= mt9m001_controls,
@@ -581,6 +568,7 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	s32 data;
 	unsigned long flags;
+	int ret;
 
 	/* We must have a parent by now. And it cannot be a wrong one.
 	 * So this entire test is completely redundant. */
@@ -637,7 +625,11 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 	dev_info(&client->dev, "Detected a MT9M001 chip ID %x (%s)\n", data,
 		 data == 0x8431 ? "C12STM" : "C12ST");
 
-	return 0;
+	ret = mt9m001_init(client);
+	if (ret < 0)
+		dev_err(&client->dev, "Failed to initialise the camera\n");
+
+	return ret;
 }
 
 static void mt9m001_video_remove(struct soc_camera_device *icd)
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index 920dd53c4cfa..186902f9be2e 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -672,13 +672,9 @@ static const struct v4l2_queryctrl mt9m111_controls[] = {
 };
 
 static int mt9m111_resume(struct soc_camera_device *icd);
-static int mt9m111_init(struct soc_camera_device *icd);
-static int mt9m111_release(struct soc_camera_device *icd);
 
 static struct soc_camera_ops mt9m111_ops = {
-	.init			= mt9m111_init,
 	.resume			= mt9m111_resume,
-	.release		= mt9m111_release,
 	.query_bus_param	= mt9m111_query_bus_param,
 	.set_bus_param		= mt9m111_set_bus_param,
 	.controls		= mt9m111_controls,
@@ -880,9 +876,8 @@ static int mt9m111_resume(struct soc_camera_device *icd)
 	return ret;
 }
 
-static int mt9m111_init(struct soc_camera_device *icd)
+static int mt9m111_init(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
@@ -899,22 +894,6 @@ static int mt9m111_init(struct soc_camera_device *icd)
 	return ret;
 }
 
-static int mt9m111_release(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = to_mt9m111(client);
-	int ret;
-
-	ret = reg_clear(RESET, MT9M111_RESET_CHIP_ENABLE);
-	if (!ret)
-		mt9m111->powered = 0;
-
-	if (ret < 0)
-		dev_err(&client->dev, "mt9m11x release failed: %d\n", ret);
-
-	return ret;
-}
-
 /*
  * Interface active, can use i2c. If it fails, it can indeed mean, that
  * this wasn't our capture interface, so, we wait for the right one
@@ -934,10 +913,13 @@ static int mt9m111_video_probe(struct soc_camera_device *icd,
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
-	ret = mt9m111_enable(client);
-	if (ret)
-		goto ei2c;
-	ret = mt9m111_reset(client);
+	mt9m111->autoexposure = 1;
+	mt9m111->autowhitebalance = 1;
+
+	mt9m111->swap_rgb_even_odd = 1;
+	mt9m111->swap_rgb_red_blue = 1;
+
+	ret = mt9m111_init(client);
 	if (ret)
 		goto ei2c;
 
@@ -962,12 +944,6 @@ static int mt9m111_video_probe(struct soc_camera_device *icd,
 
 	dev_info(&client->dev, "Detected a MT9M11x chip ID %x\n", data);
 
-	mt9m111->autoexposure = 1;
-	mt9m111->autowhitebalance = 1;
-
-	mt9m111->swap_rgb_even_odd = 1;
-	mt9m111->swap_rgb_red_blue = 1;
-
 ei2c:
 	return ret;
 }
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index f234ba602049..9a6489689382 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -163,20 +163,6 @@ static int mt9t031_disable(struct i2c_client *client)
 	return 0;
 }
 
-static int mt9t031_init(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-
-	return mt9t031_idle(client);
-}
-
-static int mt9t031_release(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-
-	return mt9t031_disable(client);
-}
-
 static int mt9t031_s_stream(struct v4l2_subdev *sd, int enable)
 {
 	struct i2c_client *client = sd->priv;
@@ -539,8 +525,6 @@ static const struct v4l2_queryctrl mt9t031_controls[] = {
 };
 
 static struct soc_camera_ops mt9t031_ops = {
-	.init			= mt9t031_init,
-	.release		= mt9t031_release,
 	.set_bus_param		= mt9t031_set_bus_param,
 	.query_bus_param	= mt9t031_query_bus_param,
 	.controls		= mt9t031_controls,
@@ -689,6 +673,7 @@ static int mt9t031_video_probe(struct i2c_client *client)
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	s32 data;
+	int ret;
 
 	/* Enable the chip */
 	data = reg_write(client, MT9T031_CHIP_ENABLE, 1);
@@ -711,7 +696,11 @@ static int mt9t031_video_probe(struct i2c_client *client)
 
 	dev_info(&client->dev, "Detected a MT9T031 chip ID %x\n", data);
 
-	return 0;
+	ret = mt9t031_idle(client);
+	if (ret < 0)
+		dev_err(&client->dev, "Failed to initialise the camera\n");
+
+	return ret;
 }
 
 static struct v4l2_subdev_core_ops mt9t031_subdev_core_ops = {
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index 35ea0ddd0715..5c47b55823c8 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -138,9 +138,8 @@ static int reg_clear(struct i2c_client *client, const u8 reg,
 	return reg_write(client, reg, ret & ~data);
 }
 
-static int mt9v022_init(struct soc_camera_device *icd)
+static int mt9v022_init(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	struct mt9v022 *mt9v022 = to_mt9v022(client);
 	int ret;
 
@@ -532,7 +531,6 @@ static const struct v4l2_queryctrl mt9v022_controls[] = {
 };
 
 static struct soc_camera_ops mt9v022_ops = {
-	.init			= mt9v022_init,
 	.set_bus_param		= mt9v022_set_bus_param,
 	.query_bus_param	= mt9v022_query_bus_param,
 	.controls		= mt9v022_controls,
@@ -751,6 +749,10 @@ static int mt9v022_video_probe(struct soc_camera_device *icd,
 		 data, mt9v022->model == V4L2_IDENT_MT9V022IX7ATM ?
 		 "monochrome" : "colour");
 
+	ret = mt9v022_init(client);
+	if (ret < 0)
+		dev_err(&client->dev, "Failed to initialise the camera\n");
+
 ei2c:
 	return ret;
 }
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 86e0648f65a0..27921162514c 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -385,12 +385,6 @@ static int soc_camera_open(struct file *file)
 			goto eiciadd;
 		}
 
-		if (icd->ops->init) {
-			ret = icd->ops->init(icd);
-			if (ret < 0)
-				goto einit;
-		}
-
 		/* Try to configure with default parameters */
 		ret = soc_camera_set_fmt(icf, &f);
 		if (ret < 0)
@@ -411,9 +405,6 @@ static int soc_camera_open(struct file *file)
 	 * and use_count == 1
 	 */
 esfmt:
-	if (icd->ops->release)
-		icd->ops->release(icd);
-einit:
 	ici->ops->remove(icd);
 eiciadd:
 	if (icl->power)
@@ -438,8 +429,6 @@ static int soc_camera_close(struct file *file)
 	if (!icd->use_count) {
 		struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-		if (icd->ops->release)
-			icd->ops->release(icd);
 		ici->ops->remove(icd);
 		if (icl->power)
 			icl->power(icd->pdev, 0);
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 3185e8daaa0a..f95cc4a2d9af 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -191,12 +191,8 @@ struct soc_camera_format_xlate {
 struct soc_camera_ops {
 	int (*suspend)(struct soc_camera_device *, pm_message_t state);
 	int (*resume)(struct soc_camera_device *);
-	int (*init)(struct soc_camera_device *);
-	int (*release)(struct soc_camera_device *);
 	unsigned long (*query_bus_param)(struct soc_camera_device *);
 	int (*set_bus_param)(struct soc_camera_device *, unsigned long);
-	int (*get_chip_id)(struct soc_camera_device *,
-			   struct v4l2_dbg_chip_ident *);
 	int (*enum_input)(struct soc_camera_device *, struct v4l2_input *);
 	const struct v4l2_queryctrl *controls;
 	int num_controls;
-- 
cgit v1.2.3


From 96c75399544838e1752001c8abdde36dd459cf8f Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:53:23 -0300
Subject: V4L/DVB (12536): soc-camera: remove .gain and .exposure struct
 soc_camera_device members

This makes the soc-camera interface for V4L2 subdevices thinner yet. Handle
gain and exposure internally in each driver just like all other controls.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c             | 43 +++++++++++++++++++----------
 drivers/media/video/mt9m111.c             | 29 +++++++++++++------
 drivers/media/video/mt9t031.c             | 37 ++++++++++++++++---------
 drivers/media/video/mt9v022.c             | 46 +++++++++++++++++++++++--------
 drivers/media/video/mx1_camera.c          |  3 +-
 drivers/media/video/ov772x.c              |  6 ++--
 drivers/media/video/pxa_camera.c          |  3 +-
 drivers/media/video/soc_camera.c          | 33 ++++++++--------------
 drivers/media/video/soc_camera_platform.c |  3 +-
 drivers/media/video/tw9910.c              |  3 +-
 include/media/soc_camera.h                | 17 +++++++-----
 11 files changed, 141 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 4b394798a293..45388d2ce2fd 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -80,6 +80,8 @@ struct mt9m001 {
 	struct v4l2_rect rect;	/* Sensor window */
 	__u32 fourcc;
 	int model;	/* V4L2_IDENT_MT9M001* codes from v4l2-chip-ident.h */
+	unsigned int gain;
+	unsigned int exposure;
 	unsigned char autoexposure;
 };
 
@@ -129,8 +131,8 @@ static int mt9m001_init(struct i2c_client *client)
 	dev_dbg(&client->dev, "%s\n", __func__);
 
 	/*
-	 * We don't know, whether platform provides reset,
-	 * issue a soft reset too
+	 * We don't know, whether platform provides reset, issue a soft reset
+	 * too. This returns all registers to their default values.
 	 */
 	ret = reg_write(client, MT9M001_RESET, 1);
 	if (!ret)
@@ -200,6 +202,7 @@ static int mt9m001_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 	const u16 hblank = 9, vblank = 25;
+	unsigned int total_h;
 
 	if (mt9m001->fourcc == V4L2_PIX_FMT_SBGGR8 ||
 	    mt9m001->fourcc == V4L2_PIX_FMT_SBGGR16)
@@ -219,6 +222,8 @@ static int mt9m001_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 	soc_camera_limit_side(&rect.top, &rect.height,
 		     MT9M001_ROW_SKIP, MT9M001_MIN_HEIGHT, MT9M001_MAX_HEIGHT);
 
+	total_h = rect.height + icd->y_skip_top + vblank;
+
 	/* Blanking and start values - default... */
 	ret = reg_write(client, MT9M001_HORIZONTAL_BLANKING, hblank);
 	if (!ret)
@@ -236,15 +241,13 @@ static int mt9m001_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 		ret = reg_write(client, MT9M001_WINDOW_HEIGHT,
 				rect.height + icd->y_skip_top - 1);
 	if (!ret && mt9m001->autoexposure) {
-		ret = reg_write(client, MT9M001_SHUTTER_WIDTH,
-				rect.height + icd->y_skip_top + vblank);
+		ret = reg_write(client, MT9M001_SHUTTER_WIDTH, total_h);
 		if (!ret) {
 			const struct v4l2_queryctrl *qctrl =
 				soc_camera_find_qctrl(icd->ops,
 						      V4L2_CID_EXPOSURE);
-			icd->exposure = (524 + (rect.height + icd->y_skip_top +
-						vblank - 1) *
-					 (qctrl->maximum - qctrl->minimum)) /
+			mt9m001->exposure = (524 + (total_h - 1) *
+				 (qctrl->maximum - qctrl->minimum)) /
 				1048 + qctrl->minimum;
 		}
 	}
@@ -457,6 +460,12 @@ static int mt9m001_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 	case V4L2_CID_EXPOSURE_AUTO:
 		ctrl->value = mt9m001->autoexposure;
 		break;
+	case V4L2_CID_GAIN:
+		ctrl->value = mt9m001->gain;
+		break;
+	case V4L2_CID_EXPOSURE:
+		ctrl->value = mt9m001->exposure;
+		break;
 	}
 	return 0;
 }
@@ -518,7 +527,7 @@ static int mt9m001_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		}
 
 		/* Success */
-		icd->gain = ctrl->value;
+		mt9m001->gain = ctrl->value;
 		break;
 	case V4L2_CID_EXPOSURE:
 		/* mt9m001 has maximum == default */
@@ -535,21 +544,21 @@ static int mt9m001_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 				shutter);
 			if (reg_write(client, MT9M001_SHUTTER_WIDTH, shutter) < 0)
 				return -EIO;
-			icd->exposure = ctrl->value;
+			mt9m001->exposure = ctrl->value;
 			mt9m001->autoexposure = 0;
 		}
 		break;
 	case V4L2_CID_EXPOSURE_AUTO:
 		if (ctrl->value) {
 			const u16 vblank = 25;
+			unsigned int total_h = mt9m001->rect.height +
+				icd->y_skip_top + vblank;
 			if (reg_write(client, MT9M001_SHUTTER_WIDTH,
-				      mt9m001->rect.height +
-				      icd->y_skip_top + vblank) < 0)
+				      total_h) < 0)
 				return -EIO;
 			qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-			icd->exposure = (524 + (mt9m001->rect.height +
-						icd->y_skip_top + vblank - 1) *
-					 (qctrl->maximum - qctrl->minimum)) /
+			mt9m001->exposure = (524 + (total_h - 1) *
+				 (qctrl->maximum - qctrl->minimum)) /
 				1048 + qctrl->minimum;
 			mt9m001->autoexposure = 1;
 		} else
@@ -629,6 +638,10 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 	if (ret < 0)
 		dev_err(&client->dev, "Failed to initialise the camera\n");
 
+	/* mt9m001_init() has reset the chip, returning registers to defaults */
+	mt9m001->gain = 64;
+	mt9m001->exposure = 255;
+
 	return ret;
 }
 
@@ -701,7 +714,7 @@ static int mt9m001_probe(struct i2c_client *client,
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops		= &mt9m001_ops;
-	icd->y_skip_top		= 1;
+	icd->y_skip_top		= 0;
 
 	mt9m001->rect.left	= MT9M001_COLUMN_SKIP;
 	mt9m001->rect.top	= MT9M001_ROW_SKIP;
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index 186902f9be2e..90da699601ea 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -153,6 +153,7 @@ struct mt9m111 {
 	enum mt9m111_context context;
 	struct v4l2_rect rect;
 	u32 pixfmt;
+	unsigned int gain;
 	unsigned char autoexposure;
 	unsigned char datawidth;
 	unsigned int powered:1;
@@ -513,7 +514,8 @@ static int mt9m111_set_pixfmt(struct i2c_client *client, u32 pixfmt)
 		ret = mt9m111_setfmt_yuv(client);
 		break;
 	default:
-		dev_err(&client->dev, "Pixel format not handled : %x\n", pixfmt);
+		dev_err(&client->dev, "Pixel format not handled : %x\n",
+			pixfmt);
 		ret = -EINVAL;
 	}
 
@@ -536,9 +538,9 @@ static int mt9m111_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	};
 	int ret;
 
-	dev_dbg(&client->dev, "%s fmt=%x left=%d, top=%d, width=%d, height=%d\n",
-		__func__, pix->pixelformat, rect.left, rect.top, rect.width,
-		rect.height);
+	dev_dbg(&client->dev,
+		"%s fmt=%x left=%d, top=%d, width=%d, height=%d\n", __func__,
+		pix->pixelformat, rect.left, rect.top, rect.width, rect.height);
 
 	ret = mt9m111_make_rect(client, &rect);
 	if (!ret)
@@ -672,8 +674,10 @@ static const struct v4l2_queryctrl mt9m111_controls[] = {
 };
 
 static int mt9m111_resume(struct soc_camera_device *icd);
+static int mt9m111_suspend(struct soc_camera_device *icd, pm_message_t state);
 
 static struct soc_camera_ops mt9m111_ops = {
+	.suspend		= mt9m111_suspend,
 	.resume			= mt9m111_resume,
 	.query_bus_param	= mt9m111_query_bus_param,
 	.set_bus_param		= mt9m111_set_bus_param,
@@ -714,13 +718,13 @@ static int mt9m111_get_global_gain(struct i2c_client *client)
 
 static int mt9m111_set_global_gain(struct i2c_client *client, int gain)
 {
-	struct soc_camera_device *icd = client->dev.platform_data;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	u16 val;
 
 	if (gain > 63 * 2 * 2)
 		return -EINVAL;
 
-	icd->gain = gain;
+	mt9m111->gain = gain;
 	if ((gain >= 64 * 2) && (gain < 63 * 2 * 2))
 		val = (1 << 10) | (1 << 9) | (gain / 4);
 	else if ((gain >= 64) && (gain < 64 * 2))
@@ -844,17 +848,26 @@ static int mt9m111_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 	return ret;
 }
 
+static int mt9m111_suspend(struct soc_camera_device *icd, pm_message_t state)
+{
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
+
+	mt9m111->gain = mt9m111_get_global_gain(client);
+
+	return 0;
+}
+
 static int mt9m111_restore_state(struct i2c_client *client)
 {
 	struct mt9m111 *mt9m111 = to_mt9m111(client);
-	struct soc_camera_device *icd = client->dev.platform_data;
 
 	mt9m111_set_context(client, mt9m111->context);
 	mt9m111_set_pixfmt(client, mt9m111->pixfmt);
 	mt9m111_setup_rect(client, &mt9m111->rect);
 	mt9m111_set_flip(client, mt9m111->hflip, MT9M111_RMB_MIRROR_COLS);
 	mt9m111_set_flip(client, mt9m111->vflip, MT9M111_RMB_MIRROR_ROWS);
-	mt9m111_set_global_gain(client, icd->gain);
+	mt9m111_set_global_gain(client, mt9m111->gain);
 	mt9m111_set_autoexposure(client, mt9m111->autoexposure);
 	mt9m111_set_autowhitebalance(client, mt9m111->autowhitebalance);
 	return 0;
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index 9a6489689382..6966f644977e 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -73,6 +73,8 @@ struct mt9t031 {
 	int model;	/* V4L2_IDENT_MT9T031* codes from v4l2-chip-ident.h */
 	u16 xskip;
 	u16 yskip;
+	unsigned int gain;
+	unsigned int exposure;
 	unsigned char autoexposure;
 };
 
@@ -301,16 +303,15 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 		ret = reg_write(client, MT9T031_WINDOW_HEIGHT,
 				rect->height + icd->y_skip_top - 1);
 	if (ret >= 0 && mt9t031->autoexposure) {
-		ret = set_shutter(client,
-				  rect->height + icd->y_skip_top + vblank);
+		unsigned int total_h = rect->height + icd->y_skip_top + vblank;
+		ret = set_shutter(client, total_h);
 		if (ret >= 0) {
 			const u32 shutter_max = MT9T031_MAX_HEIGHT + vblank;
 			const struct v4l2_queryctrl *qctrl =
 				soc_camera_find_qctrl(icd->ops,
 						      V4L2_CID_EXPOSURE);
-			icd->exposure = (shutter_max / 2 + (rect->height +
-					 icd->y_skip_top + vblank - 1) *
-					 (qctrl->maximum - qctrl->minimum)) /
+			mt9t031->exposure = (shutter_max / 2 + (total_h - 1) *
+				 (qctrl->maximum - qctrl->minimum)) /
 				shutter_max + qctrl->minimum;
 		}
 	}
@@ -553,6 +554,12 @@ static int mt9t031_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 	case V4L2_CID_EXPOSURE_AUTO:
 		ctrl->value = mt9t031->autoexposure;
 		break;
+	case V4L2_CID_GAIN:
+		ctrl->value = mt9t031->gain;
+		break;
+	case V4L2_CID_EXPOSURE:
+		ctrl->value = mt9t031->exposure;
+		break;
 	}
 	return 0;
 }
@@ -624,7 +631,7 @@ static int mt9t031_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		}
 
 		/* Success */
-		icd->gain = ctrl->value;
+		mt9t031->gain = ctrl->value;
 		break;
 	case V4L2_CID_EXPOSURE:
 		/* mt9t031 has maximum == default */
@@ -641,7 +648,7 @@ static int mt9t031_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 				old, shutter);
 			if (set_shutter(client, shutter) < 0)
 				return -EIO;
-			icd->exposure = ctrl->value;
+			mt9t031->exposure = ctrl->value;
 			mt9t031->autoexposure = 0;
 		}
 		break;
@@ -649,14 +656,14 @@ static int mt9t031_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		if (ctrl->value) {
 			const u16 vblank = MT9T031_VERTICAL_BLANK;
 			const u32 shutter_max = MT9T031_MAX_HEIGHT + vblank;
-			if (set_shutter(client, mt9t031->rect.height +
-					icd->y_skip_top + vblank) < 0)
+			unsigned int total_h = mt9t031->rect.height +
+				icd->y_skip_top + vblank;
+
+			if (set_shutter(client, total_h) < 0)
 				return -EIO;
 			qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-			icd->exposure = (shutter_max / 2 +
-					 (mt9t031->rect.height +
-					  icd->y_skip_top + vblank - 1) *
-					 (qctrl->maximum - qctrl->minimum)) /
+			mt9t031->exposure = (shutter_max / 2 + (total_h - 1) *
+				 (qctrl->maximum - qctrl->minimum)) /
 				shutter_max + qctrl->minimum;
 			mt9t031->autoexposure = 1;
 		} else
@@ -700,6 +707,10 @@ static int mt9t031_video_probe(struct i2c_client *client)
 	if (ret < 0)
 		dev_err(&client->dev, "Failed to initialise the camera\n");
 
+	/* mt9t031_idle() has reset the chip to default. */
+	mt9t031->exposure = 255;
+	mt9t031->gain = 64;
+
 	return ret;
 }
 
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index 5c47b55823c8..995607f9d3ba 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -45,7 +45,7 @@ MODULE_PARM_DESC(sensor_type, "Sensor type: \"colour\" or \"monochrome\"");
 #define MT9V022_PIXEL_OPERATION_MODE	0x0f
 #define MT9V022_LED_OUT_CONTROL		0x1b
 #define MT9V022_ADC_MODE_CONTROL	0x1c
-#define MT9V022_ANALOG_GAIN		0x34
+#define MT9V022_ANALOG_GAIN		0x35
 #define MT9V022_BLACK_LEVEL_CALIB_CTRL	0x47
 #define MT9V022_PIXCLK_FV_LV		0x74
 #define MT9V022_DIGITAL_TEST_PATTERN	0x7f
@@ -155,6 +155,10 @@ static int mt9v022_init(struct i2c_client *client)
 	if (!ret)
 		/* AEC, AGC on */
 		ret = reg_set(client, MT9V022_AEC_AGC_ENABLE, 0x3);
+	if (!ret)
+		ret = reg_write(client, MT9V022_ANALOG_GAIN, 16);
+	if (!ret)
+		ret = reg_write(client, MT9V022_TOTAL_SHUTTER_WIDTH, 480);
 	if (!ret)
 		ret = reg_write(client, MT9V022_MAX_TOTAL_SHUTTER_WIDTH, 480);
 	if (!ret)
@@ -540,8 +544,12 @@ static struct soc_camera_ops mt9v022_ops = {
 static int mt9v022_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
 	struct i2c_client *client = sd->priv;
+	const struct v4l2_queryctrl *qctrl;
+	unsigned long range;
 	int data;
 
+	qctrl = soc_camera_find_qctrl(&mt9v022_ops, ctrl->id);
+
 	switch (ctrl->id) {
 	case V4L2_CID_VFLIP:
 		data = reg_read(client, MT9V022_READ_MODE);
@@ -566,6 +574,24 @@ static int mt9v022_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		if (data < 0)
 			return -EIO;
 		ctrl->value = !!(data & 0x2);
+		break;
+	case V4L2_CID_GAIN:
+		data = reg_read(client, MT9V022_ANALOG_GAIN);
+		if (data < 0)
+			return -EIO;
+
+		range = qctrl->maximum - qctrl->minimum;
+		ctrl->value = ((data - 16) * range + 24) / 48 + qctrl->minimum;
+
+		break;
+	case V4L2_CID_EXPOSURE:
+		data = reg_read(client, MT9V022_TOTAL_SHUTTER_WIDTH);
+		if (data < 0)
+			return -EIO;
+
+		range = qctrl->maximum - qctrl->minimum;
+		ctrl->value = ((data - 1) * range + 239) / 479 + qctrl->minimum;
+
 		break;
 	}
 	return 0;
@@ -575,7 +601,6 @@ static int mt9v022_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
 	int data;
 	struct i2c_client *client = sd->priv;
-	struct soc_camera_device *icd = client->dev.platform_data;
 	const struct v4l2_queryctrl *qctrl;
 
 	qctrl = soc_camera_find_qctrl(&mt9v022_ops, ctrl->id);
@@ -605,12 +630,9 @@ static int mt9v022_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 			return -EINVAL;
 		else {
 			unsigned long range = qctrl->maximum - qctrl->minimum;
-			/* Datasheet says 16 to 64. autogain only works properly
-			 * after setting gain to maximum 14. Larger values
-			 * produce "white fly" noise effect. On the whole,
-			 * manually setting analog gain does no good. */
+			/* Valid values 16 to 64, 32 to 64 must be even. */
 			unsigned long gain = ((ctrl->value - qctrl->minimum) *
-					      10 + range / 2) / range + 4;
+					      48 + range / 2) / range + 16;
 			if (gain >= 32)
 				gain &= ~1;
 			/* The user wants to set gain manually, hope, she
@@ -619,11 +641,10 @@ static int mt9v022_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 			if (reg_clear(client, MT9V022_AEC_AGC_ENABLE, 0x2) < 0)
 				return -EIO;
 
-			dev_info(&client->dev, "Setting gain from %d to %lu\n",
-				 reg_read(client, MT9V022_ANALOG_GAIN), gain);
+			dev_dbg(&client->dev, "Setting gain from %d to %lu\n",
+				reg_read(client, MT9V022_ANALOG_GAIN), gain);
 			if (reg_write(client, MT9V022_ANALOG_GAIN, gain) < 0)
 				return -EIO;
-			icd->gain = ctrl->value;
 		}
 		break;
 	case V4L2_CID_EXPOSURE:
@@ -646,7 +667,6 @@ static int mt9v022_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 			if (reg_write(client, MT9V022_TOTAL_SHUTTER_WIDTH,
 				      shutter) < 0)
 				return -EIO;
-			icd->exposure = ctrl->value;
 		}
 		break;
 	case V4L2_CID_AUTOGAIN:
@@ -827,6 +847,10 @@ static int mt9v022_probe(struct i2c_client *client,
 	mt9v022->chip_control = MT9V022_CHIP_CONTROL_DEFAULT;
 
 	icd->ops		= &mt9v022_ops;
+	/*
+	 * MT9V022 _really_ corrupts the first read out line.
+	 * TODO: verify on i.MX31
+	 */
 	icd->y_skip_top		= 1;
 
 	mt9v022->rect.left	= MT9V022_COLUMN_SKIP;
diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index 3875483ab9d5..5f37952c75cf 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -548,7 +548,8 @@ static int mx1_camera_set_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
 	if (!xlate) {
-		dev_warn(icd->dev.parent, "Format %x not found\n", pix->pixelformat);
+		dev_warn(icd->dev.parent, "Format %x not found\n",
+			 pix->pixelformat);
 		return -EINVAL;
 	}
 
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 776a91dcfbe6..eccb40ab7fec 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -404,7 +404,8 @@ struct ov772x_priv {
 	int                               model;
 	unsigned short                    flag_vflip:1;
 	unsigned short                    flag_hflip:1;
-	unsigned short                    band_filter;	/* 256 - BDBASE, 0 if (!COM8[5]) */
+	/* band_filter = COM8[5] ? 256 - BDBASE : 0 */
+	unsigned short                    band_filter;
 };
 
 #define ENDMARKER { 0xff, 0xff }
@@ -587,7 +588,8 @@ static const struct v4l2_queryctrl ov772x_controls[] = {
 
 static struct ov772x_priv *to_ov772x(const struct i2c_client *client)
 {
-	return container_of(i2c_get_clientdata(client), struct ov772x_priv, subdev);
+	return container_of(i2c_get_clientdata(client), struct ov772x_priv,
+			    subdev);
 }
 
 static int ov772x_write_array(struct i2c_client        *client,
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index a19bb76e175d..6952e9602d5d 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -274,7 +274,8 @@ static void free_buffer(struct videobuf_queue *vq, struct pxa_buffer *buf)
 
 	for (i = 0; i < ARRAY_SIZE(buf->dmas); i++) {
 		if (buf->dmas[i].sg_cpu)
-			dma_free_coherent(ici->v4l2_dev.dev, buf->dmas[i].sg_size,
+			dma_free_coherent(ici->v4l2_dev.dev,
+					  buf->dmas[i].sg_size,
 					  buf->dmas[i].sg_cpu,
 					  buf->dmas[i].sg_dma);
 		buf->dmas[i].sg_cpu = NULL;
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 27921162514c..e8248ba0c03c 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -327,7 +327,9 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 static int soc_camera_open(struct file *file)
 {
 	struct video_device *vdev = video_devdata(file);
-	struct soc_camera_device *icd = container_of(vdev->parent, struct soc_camera_device, dev);
+	struct soc_camera_device *icd = container_of(vdev->parent,
+						     struct soc_camera_device,
+						     dev);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	struct soc_camera_host *ici;
 	struct soc_camera_file *icf;
@@ -349,7 +351,10 @@ static int soc_camera_open(struct file *file)
 		goto emgi;
 	}
 
-	/* Protect against icd->ops->remove() until we module_get() both drivers. */
+	/*
+	 * Protect against icd->ops->remove() until we module_get() both
+	 * drivers.
+	 */
 	mutex_lock(&icd->video_lock);
 
 	icf->icd = icd;
@@ -670,19 +675,6 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 
 	WARN_ON(priv != file->private_data);
 
-	switch (ctrl->id) {
-	case V4L2_CID_GAIN:
-		if (icd->gain == (unsigned short)~0)
-			return -EINVAL;
-		ctrl->value = icd->gain;
-		return 0;
-	case V4L2_CID_EXPOSURE:
-		if (icd->exposure == (unsigned short)~0)
-			return -EINVAL;
-		ctrl->value = icd->exposure;
-		return 0;
-	}
-
 	if (ici->ops->get_ctrl) {
 		ret = ici->ops->get_ctrl(icd, ctrl);
 		if (ret != -ENOIOCTLCMD)
@@ -944,7 +936,10 @@ static int soc_camera_probe(struct device *dev)
 		if (ret < 0)
 			goto eadddev;
 
-		/* FIXME: this is racy, have to use driver-binding notification */
+		/*
+		 * FIXME: this is racy, have to use driver-binding notification,
+		 * when it is available
+		 */
 		control = to_soc_camera_control(icd);
 		if (!control || !control->driver || !dev_get_drvdata(control) ||
 		    !try_module_get(control->driver->owner)) {
@@ -1279,7 +1274,6 @@ static int video_dev_create(struct soc_camera_device *icd)
  */
 static int soc_camera_video_start(struct soc_camera_device *icd)
 {
-	const struct v4l2_queryctrl *qctrl;
 	int ret;
 
 	if (!icd->dev.parent)
@@ -1297,11 +1291,6 @@ static int soc_camera_video_start(struct soc_camera_device *icd)
 		return ret;
 	}
 
-	qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_GAIN);
-	icd->gain = qctrl ? qctrl->default_value : (unsigned short)~0;
-	qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-	icd->exposure = qctrl ? qctrl->default_value : (unsigned short)~0;
-
 	return 0;
 }
 
diff --git a/drivers/media/video/soc_camera_platform.c b/drivers/media/video/soc_camera_platform.c
index 3825c358172f..1b6dd02a801f 100644
--- a/drivers/media/video/soc_camera_platform.c
+++ b/drivers/media/video/soc_camera_platform.c
@@ -33,7 +33,8 @@ static struct soc_camera_platform_priv *get_priv(struct platform_device *pdev)
 
 static struct soc_camera_platform_info *get_info(struct soc_camera_device *icd)
 {
-	struct platform_device *pdev = to_platform_device(to_soc_camera_control(icd));
+	struct platform_device *pdev =
+		to_platform_device(to_soc_camera_control(icd));
 	return pdev->dev.platform_data;
 }
 
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index fbf4130dfc5d..269ab044072a 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -357,7 +357,8 @@ static const struct tw9910_hsync_ctrl tw9910_hsync_ctrl = {
  */
 static struct tw9910_priv *to_tw9910(const struct i2c_client *client)
 {
-	return container_of(i2c_get_clientdata(client), struct tw9910_priv, subdev);
+	return container_of(i2c_get_clientdata(client), struct tw9910_priv,
+			    subdev);
 }
 
 static int tw9910_set_scale(struct i2c_client *client,
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index f95cc4a2d9af..3d74e60032dd 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -27,8 +27,6 @@ struct soc_camera_device {
 	unsigned short width_min;
 	unsigned short height_min;
 	unsigned short y_skip_top;	/* Lines to skip at the top */
-	unsigned short gain;
-	unsigned short exposure;
 	unsigned char iface;		/* Host number */
 	unsigned char devnum;		/* Device number per host */
 	unsigned char buswidth;		/* See comment in .c */
@@ -128,29 +126,34 @@ struct soc_camera_link {
 	void (*free_bus)(struct soc_camera_link *);
 };
 
-static inline struct soc_camera_device *to_soc_camera_dev(const struct device *dev)
+static inline struct soc_camera_device *to_soc_camera_dev(
+	const struct device *dev)
 {
 	return container_of(dev, struct soc_camera_device, dev);
 }
 
-static inline struct soc_camera_host *to_soc_camera_host(const struct device *dev)
+static inline struct soc_camera_host *to_soc_camera_host(
+	const struct device *dev)
 {
 	struct v4l2_device *v4l2_dev = dev_get_drvdata(dev);
 
 	return container_of(v4l2_dev, struct soc_camera_host, v4l2_dev);
 }
 
-static inline struct soc_camera_link *to_soc_camera_link(const struct soc_camera_device *icd)
+static inline struct soc_camera_link *to_soc_camera_link(
+	const struct soc_camera_device *icd)
 {
 	return icd->dev.platform_data;
 }
 
-static inline struct device *to_soc_camera_control(const struct soc_camera_device *icd)
+static inline struct device *to_soc_camera_control(
+	const struct soc_camera_device *icd)
 {
 	return dev_get_drvdata(&icd->dev);
 }
 
-static inline struct v4l2_subdev *soc_camera_to_subdev(const struct soc_camera_device *icd)
+static inline struct v4l2_subdev *soc_camera_to_subdev(
+	const struct soc_camera_device *icd)
 {
 	struct device *control = to_soc_camera_control(icd);
 	return dev_get_drvdata(control);
-- 
cgit v1.2.3


From 53dacb15705901e14b03dcba27e40364fedd9d09 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 10 Aug 2009 02:49:08 -0300
Subject: V4L/DVB (12540): v4l: simplify v4l2_i2c_new_subdev and friends

Rewrite v4l2_i2c_new_subdev as a simplified version of v4l2_i2c_new_subdev_cfg
and remove v4l2_i2c_new_probed_subdev and v4l2_i2c_new_probed_subdev_addr.

This simplifies this API substantially.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/v4l2-framework.txt  |  16 ++--
 drivers/media/video/au0828/au0828-cards.c     |   4 +-
 drivers/media/video/bt8xx/bttv-cards.c        |  44 ++++-----
 drivers/media/video/cafe_ccic.c               |   2 +-
 drivers/media/video/cx18/cx18-i2c.c           |  14 +--
 drivers/media/video/cx231xx/cx231xx-cards.c   |   4 +-
 drivers/media/video/cx23885/cx23885-cards.c   |   2 +-
 drivers/media/video/cx23885/cx23885-video.c   |   6 +-
 drivers/media/video/cx88/cx88-cards.c         |  14 +--
 drivers/media/video/cx88/cx88-video.c         |   6 +-
 drivers/media/video/davinci/vpif_display.c    |   4 +-
 drivers/media/video/em28xx/em28xx-cards.c     |  30 +++---
 drivers/media/video/ivtv/ivtv-i2c.c           |  18 ++--
 drivers/media/video/mxb.c                     |  14 +--
 drivers/media/video/pvrusb2/pvrusb2-hdw.c     |  10 +-
 drivers/media/video/saa7134/saa7134-cards.c   |  12 +--
 drivers/media/video/saa7134/saa7134-core.c    |   6 +-
 drivers/media/video/usbvision/usbvision-i2c.c |  12 +--
 drivers/media/video/v4l2-common.c             | 133 --------------------------
 drivers/media/video/vino.c                    |   8 +-
 drivers/media/video/w9968cf.c                 |   4 +-
 drivers/media/video/zoran/zoran_card.c        |   8 +-
 include/media/v4l2-common.h                   |  24 ++---
 23 files changed, 126 insertions(+), 269 deletions(-)

(limited to 'include')

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index ba4706afc5fb..e395a9cdc533 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -370,19 +370,20 @@ from the remove() callback ensures that this is always done correctly.
 The bridge driver also has some helper functions it can use:
 
 struct v4l2_subdev *sd = v4l2_i2c_new_subdev(v4l2_dev, adapter,
-	       "module_foo", "chipid", 0x36);
+	       "module_foo", "chipid", 0x36, NULL);
 
 This loads the given module (can be NULL if no module needs to be loaded) and
 calls i2c_new_device() with the given i2c_adapter and chip/address arguments.
 If all goes well, then it registers the subdev with the v4l2_device.
 
-You can also use v4l2_i2c_new_probed_subdev() which is very similar to
-v4l2_i2c_new_subdev(), except that it has an array of possible I2C addresses
-that it should probe. Internally it calls i2c_new_probed_device().
+You can also use the last argument of v4l2_i2c_new_subdev() to pass an array
+of possible I2C addresses that it should probe. These probe addresses are
+only used if the previous argument is 0. A non-zero argument means that you
+know the exact i2c address so in that case no probing will take place.
 
 Both functions return NULL if something went wrong.
 
-Note that the chipid you pass to v4l2_i2c_new_(probed_)subdev() is usually
+Note that the chipid you pass to v4l2_i2c_new_subdev() is usually
 the same as the module name. It allows you to specify a chip variant, e.g.
 "saa7114" or "saa7115". In general though the i2c driver autodetects this.
 The use of chipid is something that needs to be looked at more closely at a
@@ -410,11 +411,6 @@ the irq and platform_data arguments after the subdev was setup. The older
 v4l2_i2c_new_(probed_)subdev functions will call s_config as well, but with
 irq set to 0 and platform_data set to NULL.
 
-Note that in the next kernel release the functions v4l2_i2c_new_subdev,
-v4l2_i2c_new_probed_subdev and v4l2_i2c_new_probed_subdev_addr will all be
-replaced by a single v4l2_i2c_new_subdev that is identical to
-v4l2_i2c_new_subdev_cfg but without the irq and platform_data arguments.
-
 struct video_device
 -------------------
 
diff --git a/drivers/media/video/au0828/au0828-cards.c b/drivers/media/video/au0828/au0828-cards.c
index 830c4a933f63..57dd9195daf5 100644
--- a/drivers/media/video/au0828/au0828-cards.c
+++ b/drivers/media/video/au0828/au0828-cards.c
@@ -212,7 +212,7 @@ void au0828_card_setup(struct au0828_dev *dev)
 		   be abstracted out if we ever need to support a different
 		   demod) */
 		sd = v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-				"au8522", "au8522", 0x8e >> 1);
+				"au8522", "au8522", 0x8e >> 1, NULL);
 		if (sd == NULL)
 			printk(KERN_ERR "analog subdev registration failed\n");
 	}
@@ -221,7 +221,7 @@ void au0828_card_setup(struct au0828_dev *dev)
 	if (dev->board.tuner_type != TUNER_ABSENT) {
 		/* Load the tuner module, which does the attach */
 		sd = v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-				"tuner", "tuner", dev->board.tuner_addr);
+				"tuner", "tuner", dev->board.tuner_addr, NULL);
 		if (sd == NULL)
 			printk(KERN_ERR "tuner subdev registration fail\n");
 
diff --git a/drivers/media/video/bt8xx/bttv-cards.c b/drivers/media/video/bt8xx/bttv-cards.c
index b42251fa96ba..12279f6d9bc4 100644
--- a/drivers/media/video/bt8xx/bttv-cards.c
+++ b/drivers/media/video/bt8xx/bttv-cards.c
@@ -3524,8 +3524,8 @@ void __devinit bttv_init_card2(struct bttv *btv)
 		};
 		struct v4l2_subdev *sd;
 
-		sd = v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
-			&btv->c.i2c_adap, "saa6588", "saa6588", addrs);
+		sd = v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
+			&btv->c.i2c_adap, "saa6588", "saa6588", 0, addrs);
 		btv->has_saa6588 = (sd != NULL);
 	}
 
@@ -3549,8 +3549,8 @@ void __devinit bttv_init_card2(struct bttv *btv)
 			I2C_CLIENT_END
 		};
 
-		btv->sd_msp34xx = v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
-			&btv->c.i2c_adap, "msp3400", "msp3400", addrs);
+		btv->sd_msp34xx = v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
+			&btv->c.i2c_adap, "msp3400", "msp3400", 0, addrs);
 		if (btv->sd_msp34xx)
 			return;
 		goto no_audio;
@@ -3563,16 +3563,16 @@ void __devinit bttv_init_card2(struct bttv *btv)
 			I2C_CLIENT_END
 		};
 
-		if (v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
-				&btv->c.i2c_adap, "tda7432", "tda7432", addrs))
+		if (v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
+				&btv->c.i2c_adap, "tda7432", "tda7432", 0, addrs))
 			return;
 		goto no_audio;
 	}
 
 	case 3: {
 		/* The user specified that we should probe for tvaudio */
-		btv->sd_tvaudio = v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
-			&btv->c.i2c_adap, "tvaudio", "tvaudio", tvaudio_addrs());
+		btv->sd_tvaudio = v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
+			&btv->c.i2c_adap, "tvaudio", "tvaudio", 0, tvaudio_addrs());
 		if (btv->sd_tvaudio)
 			return;
 		goto no_audio;
@@ -3591,13 +3591,13 @@ void __devinit bttv_init_card2(struct bttv *btv)
 	   it really is a msp3400, so it will return NULL when the device
 	   found is really something else (e.g. a tea6300). */
 	if (!bttv_tvcards[btv->c.type].no_msp34xx) {
-		btv->sd_msp34xx = v4l2_i2c_new_probed_subdev_addr(&btv->c.v4l2_dev,
+		btv->sd_msp34xx = v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
 			&btv->c.i2c_adap, "msp3400", "msp3400",
-			I2C_ADDR_MSP3400 >> 1);
+			0, I2C_ADDRS(I2C_ADDR_MSP3400 >> 1));
 	} else if (bttv_tvcards[btv->c.type].msp34xx_alt) {
-		btv->sd_msp34xx = v4l2_i2c_new_probed_subdev_addr(&btv->c.v4l2_dev,
+		btv->sd_msp34xx = v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
 			&btv->c.i2c_adap, "msp3400", "msp3400",
-			I2C_ADDR_MSP3400_ALT >> 1);
+			0, I2C_ADDRS(I2C_ADDR_MSP3400_ALT >> 1));
 	}
 
 	/* If we found a msp34xx, then we're done. */
@@ -3611,14 +3611,14 @@ void __devinit bttv_init_card2(struct bttv *btv)
 			I2C_CLIENT_END
 		};
 
-		if (v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
-				&btv->c.i2c_adap, "tda7432", "tda7432", addrs))
+		if (v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
+				&btv->c.i2c_adap, "tda7432", "tda7432", 0, addrs))
 			return;
 	}
 
 	/* Now see if we can find one of the tvaudio devices. */
-	btv->sd_tvaudio = v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
-		&btv->c.i2c_adap, "tvaudio", "tvaudio", tvaudio_addrs());
+	btv->sd_tvaudio = v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
+		&btv->c.i2c_adap, "tvaudio", "tvaudio", 0, tvaudio_addrs());
 	if (btv->sd_tvaudio)
 		return;
 
@@ -3641,15 +3641,15 @@ void __devinit bttv_init_tuner(struct bttv *btv)
 
 		/* Load tuner module before issuing tuner config call! */
 		if (bttv_tvcards[btv->c.type].has_radio)
-			v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
+			v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
 				&btv->c.i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_RADIO));
-		v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
+				0, v4l2_i2c_tuner_addrs(ADDRS_RADIO));
+		v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
 				&btv->c.i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
-		v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
+				0, v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
+		v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
 				&btv->c.i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_TV_WITH_DEMOD));
+				0, v4l2_i2c_tuner_addrs(ADDRS_TV_WITH_DEMOD));
 
 		tun_setup.mode_mask = T_ANALOG_TV | T_DIGITAL_TV;
 		tun_setup.type = btv->tuner_type;
diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c
index 9c149a781294..657c481d255c 100644
--- a/drivers/media/video/cafe_ccic.c
+++ b/drivers/media/video/cafe_ccic.c
@@ -1955,7 +1955,7 @@ static int cafe_pci_probe(struct pci_dev *pdev,
 
 	cam->sensor_addr = 0x42;
 	cam->sensor = v4l2_i2c_new_subdev(&cam->v4l2_dev, &cam->i2c_adapter,
-			"ov7670", "ov7670", cam->sensor_addr);
+			"ov7670", "ov7670", cam->sensor_addr, NULL);
 	if (cam->sensor == NULL) {
 		ret = -ENODEV;
 		goto out_smbus;
diff --git a/drivers/media/video/cx18/cx18-i2c.c b/drivers/media/video/cx18/cx18-i2c.c
index dbbf93d2eee0..2477461e84d7 100644
--- a/drivers/media/video/cx18/cx18-i2c.c
+++ b/drivers/media/video/cx18/cx18-i2c.c
@@ -139,16 +139,16 @@ int cx18_i2c_register(struct cx18 *cx, unsigned idx)
 
 	if (hw == CX18_HW_TUNER) {
 		/* special tuner group handling */
-		sd = v4l2_i2c_new_probed_subdev(&cx->v4l2_dev,
-				adap, mod, type, cx->card_i2c->radio);
+		sd = v4l2_i2c_new_subdev(&cx->v4l2_dev,
+				adap, mod, type, 0, cx->card_i2c->radio);
 		if (sd != NULL)
 			sd->grp_id = hw;
-		sd = v4l2_i2c_new_probed_subdev(&cx->v4l2_dev,
-				adap, mod, type, cx->card_i2c->demod);
+		sd = v4l2_i2c_new_subdev(&cx->v4l2_dev,
+				adap, mod, type, 0, cx->card_i2c->demod);
 		if (sd != NULL)
 			sd->grp_id = hw;
-		sd = v4l2_i2c_new_probed_subdev(&cx->v4l2_dev,
-				adap, mod, type, cx->card_i2c->tv);
+		sd = v4l2_i2c_new_subdev(&cx->v4l2_dev,
+				adap, mod, type, 0, cx->card_i2c->tv);
 		if (sd != NULL)
 			sd->grp_id = hw;
 		return sd != NULL ? 0 : -1;
@@ -162,7 +162,7 @@ int cx18_i2c_register(struct cx18 *cx, unsigned idx)
 		return -1;
 
 	/* It's an I2C device other than an analog tuner or IR chip */
-	sd = v4l2_i2c_new_subdev(&cx->v4l2_dev, adap, mod, type, hw_addrs[idx]);
+	sd = v4l2_i2c_new_subdev(&cx->v4l2_dev, adap, mod, type, hw_addrs[idx], NULL);
 	if (sd != NULL)
 		sd->grp_id = hw;
 	return sd != NULL ? 0 : -1;
diff --git a/drivers/media/video/cx231xx/cx231xx-cards.c b/drivers/media/video/cx231xx/cx231xx-cards.c
index 63d2239fd324..319c459459e0 100644
--- a/drivers/media/video/cx231xx/cx231xx-cards.c
+++ b/drivers/media/video/cx231xx/cx231xx-cards.c
@@ -313,7 +313,7 @@ void cx231xx_card_setup(struct cx231xx *dev)
 	if (dev->board.decoder == CX231XX_AVDECODER) {
 		dev->sd_cx25840 = v4l2_i2c_new_subdev(&dev->v4l2_dev,
 					&dev->i2c_bus[0].i2c_adap,
-					"cx25840", "cx25840", 0x88 >> 1);
+					"cx25840", "cx25840", 0x88 >> 1, NULL);
 		if (dev->sd_cx25840 == NULL)
 			cx231xx_info("cx25840 subdev registration failure\n");
 		cx25840_call(dev, core, load_fw);
@@ -323,7 +323,7 @@ void cx231xx_card_setup(struct cx231xx *dev)
 	if (dev->board.tuner_type != TUNER_ABSENT) {
 		dev->sd_tuner =	v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_bus[1].i2c_adap,
-				"tuner", "tuner", 0xc2 >> 1);
+				"tuner", "tuner", 0xc2 >> 1, NULL);
 		if (dev->sd_tuner == NULL)
 			cx231xx_info("tuner subdev registration failure\n");
 
diff --git a/drivers/media/video/cx23885/cx23885-cards.c b/drivers/media/video/cx23885/cx23885-cards.c
index 3143d85ef31d..02ba4aec7d92 100644
--- a/drivers/media/video/cx23885/cx23885-cards.c
+++ b/drivers/media/video/cx23885/cx23885-cards.c
@@ -929,7 +929,7 @@ void cx23885_card_setup(struct cx23885_dev *dev)
 	case CX23885_BOARD_NETUP_DUAL_DVBS2_CI:
 		dev->sd_cx25840 = v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_bus[2].i2c_adap,
-				"cx25840", "cx25840", 0x88 >> 1);
+				"cx25840", "cx25840", 0x88 >> 1, NULL);
 		v4l2_subdev_call(dev->sd_cx25840, core, load_fw);
 		break;
 	}
diff --git a/drivers/media/video/cx23885/cx23885-video.c b/drivers/media/video/cx23885/cx23885-video.c
index 5d6093336300..654cc253cd50 100644
--- a/drivers/media/video/cx23885/cx23885-video.c
+++ b/drivers/media/video/cx23885/cx23885-video.c
@@ -1521,11 +1521,11 @@ int cx23885_video_register(struct cx23885_dev *dev)
 		if (dev->tuner_addr)
 			sd = v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_bus[1].i2c_adap,
-				"tuner", "tuner", dev->tuner_addr);
+				"tuner", "tuner", dev->tuner_addr, NULL);
 		else
-			sd = v4l2_i2c_new_probed_subdev(&dev->v4l2_dev,
+			sd = v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_bus[1].i2c_adap,
-				"tuner", "tuner", v4l2_i2c_tuner_addrs(ADDRS_TV));
+				"tuner", "tuner", 0, v4l2_i2c_tuner_addrs(ADDRS_TV));
 		if (sd) {
 			struct tuner_setup tun_setup;
 
diff --git a/drivers/media/video/cx88/cx88-cards.c b/drivers/media/video/cx88/cx88-cards.c
index e5f07fbd5a35..33be6369871a 100644
--- a/drivers/media/video/cx88/cx88-cards.c
+++ b/drivers/media/video/cx88/cx88-cards.c
@@ -3439,20 +3439,20 @@ struct cx88_core *cx88_core_create(struct pci_dev *pci, int nr)
 		   The radio_type is sometimes missing, or set to UNSET but
 		   later code configures a tea5767.
 		 */
-		v4l2_i2c_new_probed_subdev(&core->v4l2_dev, &core->i2c_adap,
+		v4l2_i2c_new_subdev(&core->v4l2_dev, &core->i2c_adap,
 				"tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_RADIO));
+				0, v4l2_i2c_tuner_addrs(ADDRS_RADIO));
 		if (has_demod)
-			v4l2_i2c_new_probed_subdev(&core->v4l2_dev,
+			v4l2_i2c_new_subdev(&core->v4l2_dev,
 				&core->i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
+				0, v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
 		if (core->board.tuner_addr == ADDR_UNSET) {
-			v4l2_i2c_new_probed_subdev(&core->v4l2_dev,
+			v4l2_i2c_new_subdev(&core->v4l2_dev,
 				&core->i2c_adap, "tuner", "tuner",
-				has_demod ? tv_addrs + 4 : tv_addrs);
+				0, has_demod ? tv_addrs + 4 : tv_addrs);
 		} else {
 			v4l2_i2c_new_subdev(&core->v4l2_dev, &core->i2c_adap,
-				"tuner", "tuner", core->board.tuner_addr);
+				"tuner", "tuner", core->board.tuner_addr, NULL);
 		}
 	}
 
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index 2bb54c3ef5cd..81d2b5dea18e 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -1881,14 +1881,14 @@ static int __devinit cx8800_initdev(struct pci_dev *pci_dev,
 
 	if (core->board.audio_chip == V4L2_IDENT_WM8775)
 		v4l2_i2c_new_subdev(&core->v4l2_dev, &core->i2c_adap,
-				"wm8775", "wm8775", 0x36 >> 1);
+				"wm8775", "wm8775", 0x36 >> 1, NULL);
 
 	if (core->board.audio_chip == V4L2_IDENT_TVAUDIO) {
 		/* This probes for a tda9874 as is used on some
 		   Pixelview Ultra boards. */
-		v4l2_i2c_new_probed_subdev_addr(&core->v4l2_dev,
+		v4l2_i2c_new_subdev(&core->v4l2_dev,
 				&core->i2c_adap,
-				"tvaudio", "tvaudio", 0xb0 >> 1);
+				"tvaudio", "tvaudio", 0, I2C_ADDRS(0xb0 >> 1));
 	}
 
 	switch (core->boardnr) {
diff --git a/drivers/media/video/davinci/vpif_display.c b/drivers/media/video/davinci/vpif_display.c
index 8ea65d794dbf..a125a452d24b 100644
--- a/drivers/media/video/davinci/vpif_display.c
+++ b/drivers/media/video/davinci/vpif_display.c
@@ -1566,10 +1566,10 @@ static __init int vpif_probe(struct platform_device *pdev)
 	}
 
 	for (i = 0; i < subdev_count; i++) {
-		vpif_obj.sd[i] = v4l2_i2c_new_probed_subdev(&vpif_obj.v4l2_dev,
+		vpif_obj.sd[i] = v4l2_i2c_new_subdev(&vpif_obj.v4l2_dev,
 						i2c_adap, subdevdata[i].name,
 						subdevdata[i].name,
-						&subdevdata[i].addr);
+						0, I2C_ADDRS(subdevdata[i].addr));
 		if (!vpif_obj.sd[i]) {
 			vpif_err("Error registering v4l2 subdevice\n");
 			goto probe_subdev_out;
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index 8a5ce818170a..bdb249bd9d5d 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -2372,55 +2372,55 @@ void em28xx_card_setup(struct em28xx *dev)
 
 	/* request some modules */
 	if (dev->board.has_msp34xx)
-		v4l2_i2c_new_probed_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-			"msp3400", "msp3400", msp3400_addrs);
+		v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
+			"msp3400", "msp3400", 0, msp3400_addrs);
 
 	if (dev->board.decoder == EM28XX_SAA711X)
-		v4l2_i2c_new_probed_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-			"saa7115", "saa7115_auto", saa711x_addrs);
+		v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
+			"saa7115", "saa7115_auto", 0, saa711x_addrs);
 
 	if (dev->board.decoder == EM28XX_TVP5150)
-		v4l2_i2c_new_probed_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-			"tvp5150", "tvp5150", tvp5150_addrs);
+		v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
+			"tvp5150", "tvp5150", 0, tvp5150_addrs);
 
 	if (dev->em28xx_sensor == EM28XX_MT9V011) {
 		struct v4l2_subdev *sd;
 
-		sd = v4l2_i2c_new_probed_subdev(&dev->v4l2_dev,
-			 &dev->i2c_adap, "mt9v011", "mt9v011", mt9v011_addrs);
+		sd = v4l2_i2c_new_subdev(&dev->v4l2_dev,
+			 &dev->i2c_adap, "mt9v011", "mt9v011", 0, mt9v011_addrs);
 		v4l2_subdev_call(sd, core, s_config, 0, &dev->sensor_xtal);
 	}
 
 
 	if (dev->board.adecoder == EM28XX_TVAUDIO)
 		v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-			"tvaudio", "tvaudio", dev->board.tvaudio_addr);
+			"tvaudio", "tvaudio", dev->board.tvaudio_addr, NULL);
 
 	if (dev->board.tuner_type != TUNER_ABSENT) {
 		int has_demod = (dev->tda9887_conf & TDA9887_PRESENT);
 
 		if (dev->board.radio.type)
 			v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-				"tuner", "tuner", dev->board.radio_addr);
+				"tuner", "tuner", dev->board.radio_addr, NULL);
 
 		if (has_demod)
-			v4l2_i2c_new_probed_subdev(&dev->v4l2_dev,
+			v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
+				0, v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
 		if (dev->tuner_addr == 0) {
 			enum v4l2_i2c_tuner_type type =
 				has_demod ? ADDRS_TV_WITH_DEMOD : ADDRS_TV;
 			struct v4l2_subdev *sd;
 
-			sd = v4l2_i2c_new_probed_subdev(&dev->v4l2_dev,
+			sd = v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(type));
+				0, v4l2_i2c_tuner_addrs(type));
 
 			if (sd)
 				dev->tuner_addr = v4l2_i2c_subdev_addr(sd);
 		} else {
 			v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-				"tuner", "tuner", dev->tuner_addr);
+				"tuner", "tuner", dev->tuner_addr, NULL);
 		}
 	}
 
diff --git a/drivers/media/video/ivtv/ivtv-i2c.c b/drivers/media/video/ivtv/ivtv-i2c.c
index 8f15a31d3f66..b9c71e61f7d6 100644
--- a/drivers/media/video/ivtv/ivtv-i2c.c
+++ b/drivers/media/video/ivtv/ivtv-i2c.c
@@ -161,19 +161,19 @@ int ivtv_i2c_register(struct ivtv *itv, unsigned idx)
 		return -1;
 	if (hw == IVTV_HW_TUNER) {
 		/* special tuner handling */
-		sd = v4l2_i2c_new_probed_subdev(&itv->v4l2_dev,
+		sd = v4l2_i2c_new_subdev(&itv->v4l2_dev,
 				adap, mod, type,
-				itv->card_i2c->radio);
+				0, itv->card_i2c->radio);
 		if (sd)
 			sd->grp_id = 1 << idx;
-		sd = v4l2_i2c_new_probed_subdev(&itv->v4l2_dev,
+		sd = v4l2_i2c_new_subdev(&itv->v4l2_dev,
 				adap, mod, type,
-				itv->card_i2c->demod);
+				0, itv->card_i2c->demod);
 		if (sd)
 			sd->grp_id = 1 << idx;
-		sd = v4l2_i2c_new_probed_subdev(&itv->v4l2_dev,
+		sd = v4l2_i2c_new_subdev(&itv->v4l2_dev,
 				adap, mod, type,
-				itv->card_i2c->tv);
+				0, itv->card_i2c->tv);
 		if (sd)
 			sd->grp_id = 1 << idx;
 		return sd ? 0 : -1;
@@ -181,11 +181,11 @@ int ivtv_i2c_register(struct ivtv *itv, unsigned idx)
 	if (!hw_addrs[idx])
 		return -1;
 	if (hw == IVTV_HW_UPD64031A || hw == IVTV_HW_UPD6408X) {
-		sd = v4l2_i2c_new_probed_subdev_addr(&itv->v4l2_dev,
-				adap, mod, type, hw_addrs[idx]);
+		sd = v4l2_i2c_new_subdev(&itv->v4l2_dev,
+				adap, mod, type, 0, I2C_ADDRS(hw_addrs[idx]));
 	} else {
 		sd = v4l2_i2c_new_subdev(&itv->v4l2_dev,
-				adap, mod, type, hw_addrs[idx]);
+				adap, mod, type, hw_addrs[idx], NULL);
 	}
 	if (sd)
 		sd->grp_id = 1 << idx;
diff --git a/drivers/media/video/mxb.c b/drivers/media/video/mxb.c
index 35890e8b2431..3454070e63f0 100644
--- a/drivers/media/video/mxb.c
+++ b/drivers/media/video/mxb.c
@@ -186,19 +186,19 @@ static int mxb_probe(struct saa7146_dev *dev)
 	}
 
 	mxb->saa7111a = v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"saa7115", "saa7111", I2C_SAA7111A);
+			"saa7115", "saa7111", I2C_SAA7111A, NULL);
 	mxb->tea6420_1 = v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"tea6420", "tea6420", I2C_TEA6420_1);
+			"tea6420", "tea6420", I2C_TEA6420_1, NULL);
 	mxb->tea6420_2 = v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"tea6420", "tea6420", I2C_TEA6420_2);
+			"tea6420", "tea6420", I2C_TEA6420_2, NULL);
 	mxb->tea6415c = v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"tea6415c", "tea6415c", I2C_TEA6415C);
+			"tea6415c", "tea6415c", I2C_TEA6415C, NULL);
 	mxb->tda9840 = v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"tda9840", "tda9840", I2C_TDA9840);
+			"tda9840", "tda9840", I2C_TDA9840, NULL);
 	mxb->tuner = v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"tuner", "tuner", I2C_TUNER);
+			"tuner", "tuner", I2C_TUNER, NULL);
 	if (v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"saa5246a", "saa5246a", I2C_SAA5246A)) {
+			"saa5246a", "saa5246a", I2C_SAA5246A, NULL)) {
 		printk(KERN_INFO "mxb: found teletext decoder\n");
 	}
 
diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw.c b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
index cbc388729d77..13639b302700 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-hdw.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
@@ -2063,8 +2063,8 @@ static int pvr2_hdw_load_subdev(struct pvr2_hdw *hdw,
 		return -EINVAL;
 	}
 
-	/* Note how the 2nd and 3rd arguments are the same for both
-	 * v4l2_i2c_new_subdev() and v4l2_i2c_new_probed_subdev().  Why?
+	/* Note how the 2nd and 3rd arguments are the same for
+	 * v4l2_i2c_new_subdev().  Why?
 	 * Well the 2nd argument is the module name to load, while the 3rd
 	 * argument is documented in the framework as being the "chipid" -
 	 * and every other place where I can find examples of this, the
@@ -2077,15 +2077,15 @@ static int pvr2_hdw_load_subdev(struct pvr2_hdw *hdw,
 			   mid, i2caddr[0]);
 		sd = v4l2_i2c_new_subdev(&hdw->v4l2_dev, &hdw->i2c_adap,
 					 fname, fname,
-					 i2caddr[0]);
+					 i2caddr[0], NULL);
 	} else {
 		pvr2_trace(PVR2_TRACE_INIT,
 			   "Module ID %u:"
 			   " Setting up with address probe list",
 			   mid);
-		sd = v4l2_i2c_new_probed_subdev(&hdw->v4l2_dev, &hdw->i2c_adap,
+		sd = v4l2_i2c_new_subdev(&hdw->v4l2_dev, &hdw->i2c_adap,
 						fname, fname,
-						i2caddr);
+						0, i2caddr);
 	}
 
 	if (!sd) {
diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c
index 1b29487fd254..14b9ba4579b7 100644
--- a/drivers/media/video/saa7134/saa7134-cards.c
+++ b/drivers/media/video/saa7134/saa7134-cards.c
@@ -7208,22 +7208,22 @@ int saa7134_board_init2(struct saa7134_dev *dev)
 		if (dev->radio_type != UNSET)
 			v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap, "tuner", "tuner",
-				dev->radio_addr);
+				dev->radio_addr, NULL);
 		if (has_demod)
-			v4l2_i2c_new_probed_subdev(&dev->v4l2_dev,
+			v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
+				0, v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
 		if (dev->tuner_addr == ADDR_UNSET) {
 			enum v4l2_i2c_tuner_type type =
 				has_demod ? ADDRS_TV_WITH_DEMOD : ADDRS_TV;
 
-			v4l2_i2c_new_probed_subdev(&dev->v4l2_dev,
+			v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(type));
+				0, v4l2_i2c_tuner_addrs(type));
 		} else {
 			v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap, "tuner", "tuner",
-				dev->tuner_addr);
+				dev->tuner_addr, NULL);
 		}
 	}
 
diff --git a/drivers/media/video/saa7134/saa7134-core.c b/drivers/media/video/saa7134/saa7134-core.c
index cb78c956d810..f87757fccc72 100644
--- a/drivers/media/video/saa7134/saa7134-core.c
+++ b/drivers/media/video/saa7134/saa7134-core.c
@@ -1000,7 +1000,7 @@ static int __devinit saa7134_initdev(struct pci_dev *pci_dev,
 		struct v4l2_subdev *sd =
 			v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
 				"saa6752hs", "saa6752hs",
-				saa7134_boards[dev->board].empress_addr);
+				saa7134_boards[dev->board].empress_addr, NULL);
 
 		if (sd)
 			sd->grp_id = GRP_EMPRESS;
@@ -1009,9 +1009,9 @@ static int __devinit saa7134_initdev(struct pci_dev *pci_dev,
 	if (saa7134_boards[dev->board].rds_addr) {
 		struct v4l2_subdev *sd;
 
-		sd = v4l2_i2c_new_probed_subdev_addr(&dev->v4l2_dev,
+		sd = v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap,	"saa6588", "saa6588",
-				saa7134_boards[dev->board].rds_addr);
+				0, I2C_ADDRS(saa7134_boards[dev->board].rds_addr));
 		if (sd) {
 			printk(KERN_INFO "%s: found RDS decoder\n", dev->name);
 			dev->has_rds = 1;
diff --git a/drivers/media/video/usbvision/usbvision-i2c.c b/drivers/media/video/usbvision/usbvision-i2c.c
index 1fe5befbbf85..f97fd06d5948 100644
--- a/drivers/media/video/usbvision/usbvision-i2c.c
+++ b/drivers/media/video/usbvision/usbvision-i2c.c
@@ -246,9 +246,9 @@ int usbvision_i2c_register(struct usb_usbvision *usbvision)
 	switch (usbvision_device_data[usbvision->DevModel].Codec) {
 	case CODEC_SAA7113:
 	case CODEC_SAA7111:
-		v4l2_i2c_new_probed_subdev(&usbvision->v4l2_dev,
+		v4l2_i2c_new_subdev(&usbvision->v4l2_dev,
 				&usbvision->i2c_adap, "saa7115",
-				"saa7115_auto", saa711x_addrs);
+				"saa7115_auto", 0, saa711x_addrs);
 		break;
 	}
 	if (usbvision_device_data[usbvision->DevModel].Tuner == 1) {
@@ -256,16 +256,16 @@ int usbvision_i2c_register(struct usb_usbvision *usbvision)
 		enum v4l2_i2c_tuner_type type;
 		struct tuner_setup tun_setup;
 
-		sd = v4l2_i2c_new_probed_subdev(&usbvision->v4l2_dev,
+		sd = v4l2_i2c_new_subdev(&usbvision->v4l2_dev,
 				&usbvision->i2c_adap, "tuner",
-				"tuner", v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
+				"tuner", 0, v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
 		/* depending on whether we found a demod or not, select
 		   the tuner type. */
 		type = sd ? ADDRS_TV_WITH_DEMOD : ADDRS_TV;
 
-		sd = v4l2_i2c_new_probed_subdev(&usbvision->v4l2_dev,
+		sd = v4l2_i2c_new_subdev(&usbvision->v4l2_dev,
 				&usbvision->i2c_adap, "tuner",
-				"tuner", v4l2_i2c_tuner_addrs(type));
+				"tuner", 0, v4l2_i2c_tuner_addrs(type));
 
 		if (usbvision->tuner_type != -1) {
 			tun_setup.mode_mask = T_ANALOG_TV | T_RADIO;
diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index 3a0c64935b0e..f5a93ae3cdf9 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -813,139 +813,6 @@ EXPORT_SYMBOL_GPL(v4l2_i2c_subdev_init);
 
 
-/* Load an i2c sub-device. */
-struct v4l2_subdev *v4l2_i2c_new_subdev(struct v4l2_device *v4l2_dev,
-		struct i2c_adapter *adapter,
-		const char *module_name, const char *client_type, u8 addr)
-{
-	struct v4l2_subdev *sd = NULL;
-	struct i2c_client *client;
-	struct i2c_board_info info;
-
-	BUG_ON(!v4l2_dev);
-
-	if (module_name)
-		request_module(module_name);
-
-	/* Setup the i2c board info with the device type and
-	   the device address. */
-	memset(&info, 0, sizeof(info));
-	strlcpy(info.type, client_type, sizeof(info.type));
-	info.addr = addr;
-
-	/* Create the i2c client */
-	client = i2c_new_device(adapter, &info);
-	/* Note: it is possible in the future that
-	   c->driver is NULL if the driver is still being loaded.
-	   We need better support from the kernel so that we
-	   can easily wait for the load to finish. */
-	if (client == NULL || client->driver == NULL)
-		goto error;
-
-	/* Lock the module so we can safely get the v4l2_subdev pointer */
-	if (!try_module_get(client->driver->driver.owner))
-		goto error;
-	sd = i2c_get_clientdata(client);
-
-	/* Register with the v4l2_device which increases the module's
-	   use count as well. */
-	if (v4l2_device_register_subdev(v4l2_dev, sd))
-		sd = NULL;
-	/* Decrease the module use count to match the first try_module_get. */
-	module_put(client->driver->driver.owner);
-
-	if (sd) {
-		/* We return errors from v4l2_subdev_call only if we have the
-		   callback as the .s_config is not mandatory */
-		int err = v4l2_subdev_call(sd, core, s_config, 0, NULL);
-
-		if (err && err != -ENOIOCTLCMD) {
-			v4l2_device_unregister_subdev(sd);
-			sd = NULL;
-		}
-	}
-
-error:
-	/* If we have a client but no subdev, then something went wrong and
-	   we must unregister the client. */
-	if (client && sd == NULL)
-		i2c_unregister_device(client);
-	return sd;
-}
-EXPORT_SYMBOL_GPL(v4l2_i2c_new_subdev);
-
-/* Probe and load an i2c sub-device. */
-struct v4l2_subdev *v4l2_i2c_new_probed_subdev(struct v4l2_device *v4l2_dev,
-	struct i2c_adapter *adapter,
-	const char *module_name, const char *client_type,
-	const unsigned short *addrs)
-{
-	struct v4l2_subdev *sd = NULL;
-	struct i2c_client *client = NULL;
-	struct i2c_board_info info;
-
-	BUG_ON(!v4l2_dev);
-
-	if (module_name)
-		request_module(module_name);
-
-	/* Setup the i2c board info with the device type and
-	   the device address. */
-	memset(&info, 0, sizeof(info));
-	strlcpy(info.type, client_type, sizeof(info.type));
-
-	/* Probe and create the i2c client */
-	client = i2c_new_probed_device(adapter, &info, addrs);
-	/* Note: it is possible in the future that
-	   c->driver is NULL if the driver is still being loaded.
-	   We need better support from the kernel so that we
-	   can easily wait for the load to finish. */
-	if (client == NULL || client->driver == NULL)
-		goto error;
-
-	/* Lock the module so we can safely get the v4l2_subdev pointer */
-	if (!try_module_get(client->driver->driver.owner))
-		goto error;
-	sd = i2c_get_clientdata(client);
-
-	/* Register with the v4l2_device which increases the module's
-	   use count as well. */
-	if (v4l2_device_register_subdev(v4l2_dev, sd))
-		sd = NULL;
-	/* Decrease the module use count to match the first try_module_get. */
-	module_put(client->driver->driver.owner);
-
-	if (sd) {
-		/* We return errors from v4l2_subdev_call only if we have the
-		   callback as the .s_config is not mandatory */
-		int err = v4l2_subdev_call(sd, core, s_config, 0, NULL);
-
-		if (err && err != -ENOIOCTLCMD) {
-			v4l2_device_unregister_subdev(sd);
-			sd = NULL;
-		}
-	}
-
-error:
-	/* If we have a client but no subdev, then something went wrong and
-	   we must unregister the client. */
-	if (client && sd == NULL)
-		i2c_unregister_device(client);
-	return sd;
-}
-EXPORT_SYMBOL_GPL(v4l2_i2c_new_probed_subdev);
-
-struct v4l2_subdev *v4l2_i2c_new_probed_subdev_addr(struct v4l2_device *v4l2_dev,
-		struct i2c_adapter *adapter,
-		const char *module_name, const char *client_type, u8 addr)
-{
-	unsigned short addrs[2] = { addr, I2C_CLIENT_END };
-
-	return v4l2_i2c_new_probed_subdev(v4l2_dev, adapter,
-			module_name, client_type, addrs);
-}
-EXPORT_SYMBOL_GPL(v4l2_i2c_new_probed_subdev_addr);
-
 /* Load an i2c sub-device. */
 struct v4l2_subdev *v4l2_i2c_new_subdev_board(struct v4l2_device *v4l2_dev,
 		struct i2c_adapter *adapter, const char *module_name,
diff --git a/drivers/media/video/vino.c b/drivers/media/video/vino.c
index f3b6e15d91f2..cd6a3446ab7e 100644
--- a/drivers/media/video/vino.c
+++ b/drivers/media/video/vino.c
@@ -4333,11 +4333,11 @@ static int __init vino_module_init(void)
 	vino_init_stage++;
 
 	vino_drvdata->decoder =
-		v4l2_i2c_new_probed_subdev_addr(&vino_drvdata->v4l2_dev,
-			&vino_i2c_adapter, "saa7191", "saa7191", 0x45);
+		v4l2_i2c_new_subdev(&vino_drvdata->v4l2_dev, &vino_i2c_adapter,
+			       "saa7191", "saa7191", 0, I2C_ADDRS(0x45));
 	vino_drvdata->camera =
-		v4l2_i2c_new_probed_subdev_addr(&vino_drvdata->v4l2_dev,
-			&vino_i2c_adapter, "indycam", "indycam", 0x2b);
+		v4l2_i2c_new_subdev(&vino_drvdata->v4l2_dev, &vino_i2c_adapter,
+			       "indycam", "indycam", 0, I2C_ADDRS(0x2b));
 
 	dprintk("init complete!\n");
 
diff --git a/drivers/media/video/w9968cf.c b/drivers/media/video/w9968cf.c
index 602484dd3da9..37fcdc447db5 100644
--- a/drivers/media/video/w9968cf.c
+++ b/drivers/media/video/w9968cf.c
@@ -3515,9 +3515,9 @@ w9968cf_usb_probe(struct usb_interface* intf, const struct usb_device_id* id)
 	w9968cf_turn_on_led(cam);
 
 	w9968cf_i2c_init(cam);
-	cam->sensor_sd = v4l2_i2c_new_probed_subdev(&cam->v4l2_dev,
+	cam->sensor_sd = v4l2_i2c_new_subdev(&cam->v4l2_dev,
 			&cam->i2c_adapter,
-			"ovcamchip", "ovcamchip", addrs);
+			"ovcamchip", "ovcamchip", 0, addrs);
 
 	usb_set_intfdata(intf, cam);
 	mutex_unlock(&cam->dev_mutex);
diff --git a/drivers/media/video/zoran/zoran_card.c b/drivers/media/video/zoran/zoran_card.c
index 0c4d9b1f8e6f..be70574870de 100644
--- a/drivers/media/video/zoran/zoran_card.c
+++ b/drivers/media/video/zoran/zoran_card.c
@@ -1357,15 +1357,15 @@ static int __devinit zoran_probe(struct pci_dev *pdev,
 		goto zr_free_irq;
 	}
 
-	zr->decoder = v4l2_i2c_new_probed_subdev(&zr->v4l2_dev,
+	zr->decoder = v4l2_i2c_new_subdev(&zr->v4l2_dev,
 		&zr->i2c_adapter, zr->card.mod_decoder, zr->card.i2c_decoder,
-		zr->card.addrs_decoder);
+		0, zr->card.addrs_decoder);
 
 	if (zr->card.mod_encoder)
-		zr->encoder = v4l2_i2c_new_probed_subdev(&zr->v4l2_dev,
+		zr->encoder = v4l2_i2c_new_subdev(&zr->v4l2_dev,
 			&zr->i2c_adapter,
 			zr->card.mod_encoder, zr->card.i2c_encoder,
-			zr->card.addrs_encoder);
+			0, zr->card.addrs_encoder);
 
 	dprintk(2,
 		KERN_INFO "%s: Initializing videocodec bus...\n",
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index 33a18426ab9b..1c25b10da34b 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -139,29 +139,23 @@ struct v4l2_subdev_ops;
 /* Load an i2c module and return an initialized v4l2_subdev struct.
    Only call request_module if module_name != NULL.
    The client_type argument is the name of the chip that's on the adapter. */
-struct v4l2_subdev *v4l2_i2c_new_subdev(struct v4l2_device *v4l2_dev,
-		struct i2c_adapter *adapter,
-		const char *module_name, const char *client_type, u8 addr);
-/* Probe and load an i2c module and return an initialized v4l2_subdev struct.
-   Only call request_module if module_name != NULL.
-   The client_type argument is the name of the chip that's on the adapter. */
-struct v4l2_subdev *v4l2_i2c_new_probed_subdev(struct v4l2_device *v4l2_dev,
+struct v4l2_subdev *v4l2_i2c_new_subdev_cfg(struct v4l2_device *v4l2_dev,
 		struct i2c_adapter *adapter,
 		const char *module_name, const char *client_type,
-		const unsigned short *addrs);
-/* Like v4l2_i2c_new_probed_subdev, except probe for a single address. */
-struct v4l2_subdev *v4l2_i2c_new_probed_subdev_addr(struct v4l2_device *v4l2_dev,
-		struct i2c_adapter *adapter,
-		const char *module_name, const char *client_type, u8 addr);
+		int irq, void *platform_data,
+		u8 addr, const unsigned short *probe_addrs);
 
 /* Load an i2c module and return an initialized v4l2_subdev struct.
    Only call request_module if module_name != NULL.
    The client_type argument is the name of the chip that's on the adapter. */
-struct v4l2_subdev *v4l2_i2c_new_subdev_cfg(struct v4l2_device *v4l2_dev,
+static inline struct v4l2_subdev *v4l2_i2c_new_subdev(struct v4l2_device *v4l2_dev,
 		struct i2c_adapter *adapter,
 		const char *module_name, const char *client_type,
-		int irq, void *platform_data,
-		u8 addr, const unsigned short *probe_addrs);
+		u8 addr, const unsigned short *probe_addrs)
+{
+	return v4l2_i2c_new_subdev_cfg(v4l2_dev, adapter, module_name,
+				client_type, 0, NULL, addr, probe_addrs);
+}
 
 struct i2c_board_info;
 
-- 
cgit v1.2.3


From 7ae0cd9bc793e16d8d68df3c17c601732cc1d3c7 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 19 Jun 2009 11:32:56 -0300
Subject: V4L/DVB (12541): v4l: remove video_register_device_index

video_register_device_index is never actually called, instead the
stream index number is always calculated automatically.

This patch removes this function and simplifies the internal get_index
function since that can now always just return the first free index.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/v4l2-framework.txt | 17 +++------
 drivers/media/video/v4l2-dev.c               | 55 ++++++++--------------------
 include/media/v4l2-dev.h                     |  2 -
 3 files changed, 20 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index e395a9cdc533..cb6c7eb51472 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -500,17 +500,11 @@ first free number.
 Whenever a device node is created some attributes are also created for you.
 If you look in /sys/class/video4linux you see the devices. Go into e.g.
 video0 and you will see 'name' and 'index' attributes. The 'name' attribute
-is the 'name' field of the video_device struct. The 'index' attribute is
-a device node index that can be assigned by the driver, or that is calculated
-for you.
-
-If you call video_register_device(), then the index is just increased by
-1 for each device node you register. The first video device node you register
-always starts off with 0.
+is the 'name' field of the video_device struct.
 
-Alternatively you can call video_register_device_index() which is identical
-to video_register_device(), but with an extra index argument. Here you can
-pass a specific index value (between 0 and 31) that should be used.
+The 'index' attribute is the index of the device node: for each call to
+video_register_device() the index is just increased by 1. The first video
+device node you register always starts with index 0.
 
 Users can setup udev rules that utilize the index attribute to make fancy
 device names (e.g. 'mpegX' for MPEG video capture device nodes).
@@ -520,8 +514,7 @@ After the device was successfully registered, then you can use these fields:
 - vfl_type: the device type passed to video_register_device.
 - minor: the assigned device minor number.
 - num: the device kernel number (i.e. the X in videoX).
-- index: the device index number (calculated or set explicitly using
-  video_register_device_index).
+- index: the device index number.
 
 If the registration failed, then you need to call video_device_release()
 to free the allocated video_device struct, or free your own struct if the
diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index a7f1b69a7dab..1219721894a1 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -299,32 +299,28 @@ static const struct file_operations v4l2_fops = {
 };
 
 /**
- * get_index - assign stream number based on parent device
+ * get_index - assign stream index number based on parent device
  * @vdev: video_device to assign index number to, vdev->parent should be assigned
- * @num:  -1 if auto assign, requested number otherwise
  *
  * Note that when this is called the new device has not yet been registered
- * in the video_device array.
+ * in the video_device array, but it was able to obtain a minor number.
  *
- * Returns -ENFILE if num is already in use, a free index number if
- * successful.
+ * This means that we can always obtain a free stream index number since
+ * the worst case scenario is that there are VIDEO_NUM_DEVICES - 1 slots in
+ * use of the video_device array.
+ *
+ * Returns a free index number.
  */
-static int get_index(struct video_device *vdev, int num)
+static int get_index(struct video_device *vdev)
 {
 	/* This can be static since this function is called with the global
 	   videodev_lock held. */
 	static DECLARE_BITMAP(used, VIDEO_NUM_DEVICES);
 	int i;
 
-	if (num >= VIDEO_NUM_DEVICES) {
-		printk(KERN_ERR "videodev: %s num is too large\n", __func__);
-		return -EINVAL;
-	}
-
-	/* Some drivers do not set the parent. In that case always return
-	   num or 0. */
+	/* Some drivers do not set the parent. In that case always return 0. */
 	if (vdev->parent == NULL)
-		return num >= 0 ? num : 0;
+		return 0;
 
 	bitmap_zero(used, VIDEO_NUM_DEVICES);
 
@@ -335,30 +331,15 @@ static int get_index(struct video_device *vdev, int num)
 		}
 	}
 
-	if (num >= 0) {
-		if (test_bit(num, used))
-			return -ENFILE;
-		return num;
-	}
-
-	i = find_first_zero_bit(used, VIDEO_NUM_DEVICES);
-	return i == VIDEO_NUM_DEVICES ? -ENFILE : i;
+	return find_first_zero_bit(used, VIDEO_NUM_DEVICES);
 }
 
-int video_register_device(struct video_device *vdev, int type, int nr)
-{
-	return video_register_device_index(vdev, type, nr, -1);
-}
-EXPORT_SYMBOL(video_register_device);
-
 /**
- *	video_register_device_index - register video4linux devices
+ *	video_register_device - register video4linux devices
  *	@vdev: video device structure we want to register
  *	@type: type of device to register
  *	@nr:   which device number (0 == /dev/video0, 1 == /dev/video1, ...
  *             -1 == first free)
- *	@index: stream number based on parent device;
- *		-1 if auto assign, requested number otherwise
  *
  *	The registration code assigns minor numbers based on the type
  *	requested. -ENFILE is returned in all the device slots for this
@@ -377,8 +358,7 @@ EXPORT_SYMBOL(video_register_device);
  *
  *	%VFL_TYPE_RADIO - A radio card
  */
-int video_register_device_index(struct video_device *vdev, int type, int nr,
-					int index)
+int video_register_device(struct video_device *vdev, int type, int nr)
 {
 	int i = 0;
 	int ret;
@@ -481,14 +461,9 @@ int video_register_device_index(struct video_device *vdev, int type, int nr,
 	set_bit(nr, video_nums[type]);
 	/* Should not happen since we thought this minor was free */
 	WARN_ON(video_device[vdev->minor] != NULL);
-	ret = vdev->index = get_index(vdev, index);
+	vdev->index = get_index(vdev);
 	mutex_unlock(&videodev_lock);
 
-	if (ret < 0) {
-		printk(KERN_ERR "%s: get_index failed\n", __func__);
-		goto cleanup;
-	}
-
 	/* Part 3: Initialize the character device */
 	vdev->cdev = cdev_alloc();
 	if (vdev->cdev == NULL) {
@@ -543,7 +518,7 @@ cleanup:
 	vdev->minor = -1;
 	return ret;
 }
-EXPORT_SYMBOL(video_register_device_index);
+EXPORT_SYMBOL(video_register_device);
 
 /**
  *	video_unregister_device - unregister a video4linux device
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index 2058dd45e915..255f6442b635 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -100,8 +100,6 @@ struct video_device
 
    Also note that vdev->minor is set to -1 if the registration failed. */
 int __must_check video_register_device(struct video_device *vdev, int type, int nr);
-int __must_check video_register_device_index(struct video_device *vdev,
-						int type, int nr, int index);
 
 /* Unregister video devices. Will do nothing if vdev == NULL or
    vdev->minor < 0. */
-- 
cgit v1.2.3


From 6b5270d21202fcf6ae16a6266fed83a30ccece7a Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 6 Sep 2009 07:54:00 -0300
Subject: V4L/DVB (12725): v4l: warn when desired devnodenr is in use & add
 _no_warn function

Warn when the desired device node number is already in use, except when
the new video_register_device_no_warn function is called since in some
use-cases that warning is not relevant.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/v4l2-framework.txt | 22 ++++++++++++++++------
 drivers/media/video/cx18/cx18-streams.c      |  2 +-
 drivers/media/video/ivtv/ivtv-streams.c      |  2 +-
 drivers/media/video/v4l2-dev.c               | 20 +++++++++++++++++++-
 include/media/v4l2-dev.h                     |  4 ++++
 5 files changed, 41 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index 38b3716d8643..b806edaf3e75 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -486,17 +486,27 @@ VFL_TYPE_RADIO: radioX for radio tuners
 VFL_TYPE_VTX: vtxX for teletext devices (deprecated, don't use)
 
 The last argument gives you a certain amount of control over the device
-device node number used (i.e. the X in videoX). Normally you will pass -1 to
-let the v4l2 framework pick the first free number. But if a driver creates
-many devices, then it can be useful to have different video devices in
-separate ranges. For example, video capture devices start at 0, video
-output devices start at 16.
-
+device node number used (i.e. the X in videoX). Normally you will pass -1
+to let the v4l2 framework pick the first free number. But sometimes users
+want to select a specific node number. It is common that drivers allow
+the user to select a specific device node number through a driver module
+option. That number is then passed to this function and video_register_device
+will attempt to select that device node number. If that number was already
+in use, then the next free device node number will be selected and it
+will send a warning to the kernel log.
+
+Another use-case is if a driver creates many devices. In that case it can
+be useful to place different video devices in separate ranges. For example,
+video capture devices start at 0, video output devices start at 16.
 So you can use the last argument to specify a minimum device node number
 and the v4l2 framework will try to pick the first free number that is equal
 or higher to what you passed. If that fails, then it will just pick the
 first free number.
 
+Since in this case you do not care about a warning about not being able
+to select the specified device node number, you can call the function
+video_register_device_no_warn() instead.
+
 Whenever a device node is created some attributes are also created for you.
 If you look in /sys/class/video4linux you see the devices. Go into e.g.
 video0 and you will see 'name' and 'index' attributes. The 'name' attribute
diff --git a/drivers/media/video/cx18/cx18-streams.c b/drivers/media/video/cx18/cx18-streams.c
index 6c988b95adc6..7df513a2dba8 100644
--- a/drivers/media/video/cx18/cx18-streams.c
+++ b/drivers/media/video/cx18/cx18-streams.c
@@ -245,7 +245,7 @@ static int cx18_reg_dev(struct cx18 *cx, int type)
 	video_set_drvdata(s->video_dev, s);
 
 	/* Register device. First try the desired minor, then any free one. */
-	ret = video_register_device(s->video_dev, vfl_type, num);
+	ret = video_register_device_no_warn(s->video_dev, vfl_type, num);
 	if (ret < 0) {
 		CX18_ERR("Couldn't register v4l2 device for %s (device node number %d)\n",
 			s->name, num);
diff --git a/drivers/media/video/ivtv/ivtv-streams.c b/drivers/media/video/ivtv/ivtv-streams.c
index 23400035240a..67699e3f2aaa 100644
--- a/drivers/media/video/ivtv/ivtv-streams.c
+++ b/drivers/media/video/ivtv/ivtv-streams.c
@@ -261,7 +261,7 @@ static int ivtv_reg_dev(struct ivtv *itv, int type)
 	video_set_drvdata(s->vdev, s);
 
 	/* Register device. First try the desired minor, then any free one. */
-	if (video_register_device(s->vdev, vfl_type, num)) {
+	if (video_register_device_no_warn(s->vdev, vfl_type, num)) {
 		IVTV_ERR("Couldn't register v4l2 device for %s (device node number %d)\n",
 				s->name, num);
 		video_device_release(s->vdev);
diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index 4715f08157bc..500cbe9891ac 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -382,6 +382,8 @@ static int get_index(struct video_device *vdev)
  *	@type: type of device to register
  *	@nr:   which device node number (0 == /dev/video0, 1 == /dev/video1, ...
  *             -1 == first free)
+ *	@warn_if_nr_in_use: warn if the desired device node number
+ *	       was already in use and another number was chosen instead.
  *
  *	The registration code assigns minor numbers and device node numbers
  *	based on the requested type and registers the new device node with
@@ -401,7 +403,8 @@ static int get_index(struct video_device *vdev)
  *
  *	%VFL_TYPE_RADIO - A radio card
  */
-int video_register_device(struct video_device *vdev, int type, int nr)
+static int __video_register_device(struct video_device *vdev, int type, int nr,
+		int warn_if_nr_in_use)
 {
 	int i = 0;
 	int ret;
@@ -547,6 +550,10 @@ int video_register_device(struct video_device *vdev, int type, int nr)
 	   reference to the device goes away. */
 	vdev->dev.release = v4l2_device_release;
 
+	if (nr != -1 && nr != vdev->num && warn_if_nr_in_use)
+		printk(KERN_WARNING "%s: requested %s%d, got %s%d\n",
+				__func__, name_base, nr, name_base, vdev->num);
+
 	/* Part 5: Activate this minor. The char device can now be used. */
 	mutex_lock(&videodev_lock);
 	video_device[vdev->minor] = vdev;
@@ -563,8 +570,19 @@ cleanup:
 	vdev->minor = -1;
 	return ret;
 }
+
+int video_register_device(struct video_device *vdev, int type, int nr)
+{
+	return __video_register_device(vdev, type, nr, 1);
+}
 EXPORT_SYMBOL(video_register_device);
 
+int video_register_device_no_warn(struct video_device *vdev, int type, int nr)
+{
+	return __video_register_device(vdev, type, nr, 0);
+}
+EXPORT_SYMBOL(video_register_device_no_warn);
+
 /**
  *	video_unregister_device - unregister a video4linux device
  *	@vdev: the device to unregister
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index 255f6442b635..73c9867d744c 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -101,6 +101,10 @@ struct video_device
    Also note that vdev->minor is set to -1 if the registration failed. */
 int __must_check video_register_device(struct video_device *vdev, int type, int nr);
 
+/* Same as video_register_device, but no warning is issued if the desired
+   device node number was already in use. */
+int __must_check video_register_device_no_warn(struct video_device *vdev, int type, int nr);
+
 /* Unregister video devices. Will do nothing if vdev == NULL or
    vdev->minor < 0. */
 void video_unregister_device(struct video_device *vdev);
-- 
cgit v1.2.3


From 98293ef3e54f9f2175f11b4d14c119a2ff753d61 Mon Sep 17 00:00:00 2001
From: HIRANO Takahito <hiranotaka@zng.info>
Date: Fri, 18 Sep 2009 11:17:54 -0300
Subject: V4L/DVB (12997): Add the DTV_ISDB_TS_ID property for ISDB_S

In ISDB-S, time-devision duplex is used to multiplexing several waves
in the same frequency. Each wave is identified by its own transport
stream ID, or TS ID. We need to provide some way to specify this ID
from user applications to handle ISDB-S frontends.

This code has been tested with the Earthsoft PT1 driver.

[mchehab@infradead.org: Fix merge conflicts with isdbt and rename the new parameter to DTV_ISDBS_TS_ID]
Signed-off-by: HIRANO Takahito <hiranotaka@zng.info>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dvb_frontend.c | 8 ++++++++
 drivers/media/dvb/dvb-core/dvb_frontend.h | 3 +++
 include/linux/dvb/frontend.h              | 4 +++-
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index 3c9482660eac..ddf639ed2fd8 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -1031,6 +1031,8 @@ static struct dtv_cmds_h dtv_cmds[] = {
 	_DTV_CMD(DTV_ISDBT_LAYERC_SEGMENT_COUNT, 0, 0),
 	_DTV_CMD(DTV_ISDBT_LAYERC_TIME_INTERLEAVING, 0, 0),
 
+	_DTV_CMD(DTV_ISDBS_TS_ID, 1, 0),
+
 	/* Get */
 	[DTV_DISEQC_SLAVE_REPLY] = {
 		.name	= "DTV_DISEQC_SLAVE_REPLY",
@@ -1420,6 +1422,9 @@ static int dtv_property_process_get(struct dvb_frontend *fe,
 	case DTV_ISDBT_LAYERC_TIME_INTERLEAVING:
 		tvp->u.data = fe->dtv_property_cache.layer[2].interleaving;
 		break;
+	case DTV_ISDBS_TS_ID:
+		tvp->u.data = fe->dtv_property_cache.isdbs_ts_id;
+		break;
 	default:
 		r = -1;
 	}
@@ -1571,6 +1576,9 @@ static int dtv_property_process_set(struct dvb_frontend *fe,
 	case DTV_ISDBT_LAYERC_TIME_INTERLEAVING:
 		fe->dtv_property_cache.layer[2].interleaving = tvp->u.data;
 		break;
+	case DTV_ISDBS_TS_ID:
+		fe->dtv_property_cache.isdbs_ts_id = tvp->u.data;
+		break;
 	default:
 		r = -1;
 	}
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h
index 9e46f1772c54..810f07d63246 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.h
@@ -355,6 +355,9 @@ struct dtv_frontend_properties {
 	    fe_modulation_t	modulation;
 	    u8			interleaving;
 	} layer[3];
+
+	/* ISDB-T specifics */
+	u32			isdbs_ts_id;
 };
 
 struct dvb_frontend {
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 25b01c14727b..b6cb5425cde3 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -302,7 +302,9 @@ struct dvb_frontend_event {
 
 #define DTV_ISDBT_LAYER_ENABLED	41
 
-#define DTV_MAX_COMMAND				DTV_ISDBT_LAYER_ENABLED
+#define DTV_ISDBS_TS_ID		42
+
+#define DTV_MAX_COMMAND				DTV_ISDBS_TS_ID
 
 typedef enum fe_pilot {
 	PILOT_ON,
-- 
cgit v1.2.3


From 6789cb5230f8b06271b6a89ace20449af14be303 Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Date: Fri, 18 Sep 2009 21:17:20 -0300
Subject: V4L/DVB (13019): video: initial support for ADV7180
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is an initial driver for Analog Devices ADV7180 Video Decoder.
So far it only supports query standard.

[akpm@linux-foundation.org: remove unneeded cast]
Cc: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Douglas Schilling Landgraf <dougsland@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig     |   9 ++
 drivers/media/video/Makefile    |   1 +
 drivers/media/video/adv7180.c   | 202 ++++++++++++++++++++++++++++++++++++++++
 include/media/v4l2-chip-ident.h |   3 +
 4 files changed, 215 insertions(+)
 create mode 100644 drivers/media/video/adv7180.c

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index dc71eaea6af8..e6186b338a12 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -265,6 +265,15 @@ config VIDEO_SAA6588
 
 comment "Video decoders"
 
+config VIDEO_ADV7180
+	tristate "Analog Devices ADV7180 decoder"
+	depends on VIDEO_V4L2 && I2C
+	---help---
+	  Support for the Analog Devices ADV7180 video decoder.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called adv7180.
+
 config VIDEO_BT819
 	tristate "BT819A VideoStream decoder"
 	depends on VIDEO_V4L2 && I2C
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 040bc0492007..e541932a789b 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_VIDEO_SAA7185) += saa7185.o
 obj-$(CONFIG_VIDEO_SAA7191) += saa7191.o
 obj-$(CONFIG_VIDEO_ADV7170) += adv7170.o
 obj-$(CONFIG_VIDEO_ADV7175) += adv7175.o
+obj-$(CONFIG_VIDEO_ADV7180) += adv7180.o
 obj-$(CONFIG_VIDEO_ADV7343) += adv7343.o
 obj-$(CONFIG_VIDEO_VPX3220) += vpx3220.o
 obj-$(CONFIG_VIDEO_BT819) += bt819.o
diff --git a/drivers/media/video/adv7180.c b/drivers/media/video/adv7180.c
new file mode 100644
index 000000000000..1b3cbd02a7fd
--- /dev/null
+++ b/drivers/media/video/adv7180.c
@@ -0,0 +1,202 @@
+/*
+ * adv7180.c Analog Devices ADV7180 video decoder driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/i2c.h>
+#include <linux/i2c-id.h>
+#include <media/v4l2-ioctl.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-chip-ident.h>
+
+#define DRIVER_NAME "adv7180"
+
+#define ADV7180_INPUT_CONTROL_REG	0x00
+#define ADV7180_INPUT_CONTROL_PAL_BG_NTSC_J_SECAM	0x00
+#define ADV7180_AUTODETECT_ENABLE_REG	0x07
+#define ADV7180_AUTODETECT_DEFAULT	0x7f
+
+
+#define ADV7180_STATUS1_REG 0x10
+#define ADV7180_STATUS1_AUTOD_MASK 0x70
+#define ADV7180_STATUS1_AUTOD_NTSM_M_J	0x00
+#define ADV7180_STATUS1_AUTOD_NTSC_4_43 0x10
+#define ADV7180_STATUS1_AUTOD_PAL_M	0x20
+#define ADV7180_STATUS1_AUTOD_PAL_60	0x30
+#define ADV7180_STATUS1_AUTOD_PAL_B_G	0x40
+#define ADV7180_STATUS1_AUTOD_SECAM	0x50
+#define ADV7180_STATUS1_AUTOD_PAL_COMB	0x60
+#define ADV7180_STATUS1_AUTOD_SECAM_525	0x70
+
+#define ADV7180_IDENT_REG 0x11
+#define ADV7180_ID_7180 0x18
+
+
+struct adv7180_state {
+	struct v4l2_subdev sd;
+};
+
+static v4l2_std_id determine_norm(struct i2c_client *client)
+{
+	u8 status1 = i2c_smbus_read_byte_data(client, ADV7180_STATUS1_REG);
+
+	switch (status1 & ADV7180_STATUS1_AUTOD_MASK) {
+	case ADV7180_STATUS1_AUTOD_NTSM_M_J:
+		return V4L2_STD_NTSC_M_JP;
+	case ADV7180_STATUS1_AUTOD_NTSC_4_43:
+		return V4L2_STD_NTSC_443;
+	case ADV7180_STATUS1_AUTOD_PAL_M:
+		return V4L2_STD_PAL_M;
+	case ADV7180_STATUS1_AUTOD_PAL_60:
+		return V4L2_STD_PAL_60;
+	case ADV7180_STATUS1_AUTOD_PAL_B_G:
+		return V4L2_STD_PAL;
+	case ADV7180_STATUS1_AUTOD_SECAM:
+		return V4L2_STD_SECAM;
+	case ADV7180_STATUS1_AUTOD_PAL_COMB:
+		return V4L2_STD_PAL_Nc | V4L2_STD_PAL_N;
+	case ADV7180_STATUS1_AUTOD_SECAM_525:
+		return V4L2_STD_SECAM;
+	default:
+		return V4L2_STD_UNKNOWN;
+	}
+}
+
+static inline struct adv7180_state *to_state(struct v4l2_subdev *sd)
+{
+	return container_of(sd, struct adv7180_state, sd);
+}
+
+static int adv7180_querystd(struct v4l2_subdev *sd, v4l2_std_id *std)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	*std = determine_norm(client);
+	return 0;
+}
+
+static int adv7180_g_chip_ident(struct v4l2_subdev *sd,
+	struct v4l2_dbg_chip_ident *chip)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	return v4l2_chip_ident_i2c_client(client, chip, V4L2_IDENT_ADV7180, 0);
+}
+
+static const struct v4l2_subdev_video_ops adv7180_video_ops = {
+	.querystd = adv7180_querystd,
+};
+
+static const struct v4l2_subdev_core_ops adv7180_core_ops = {
+	.g_chip_ident = adv7180_g_chip_ident,
+};
+
+static const struct v4l2_subdev_ops adv7180_ops = {
+	.core = &adv7180_core_ops,
+	.video = &adv7180_video_ops,
+};
+
+/*
+ * Generic i2c probe
+ * concerning the addresses: i2c wants 7 bit (without the r/w bit), so '>>1'
+ */
+
+static int adv7180_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	struct adv7180_state *state;
+	struct v4l2_subdev *sd;
+	int ret;
+
+	/* Check if the adapter supports the needed features */
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+		return -EIO;
+
+	v4l_info(client, "chip found @ 0x%02x (%s)\n",
+			client->addr << 1, client->adapter->name);
+
+	state = kzalloc(sizeof(struct adv7180_state), GFP_KERNEL);
+	if (state == NULL)
+		return -ENOMEM;
+	sd = &state->sd;
+	v4l2_i2c_subdev_init(sd, client, &adv7180_ops);
+
+	/* Initialize adv7180 */
+	/* enable autodetection */
+	ret = i2c_smbus_write_byte_data(client, ADV7180_INPUT_CONTROL_REG,
+		ADV7180_INPUT_CONTROL_PAL_BG_NTSC_J_SECAM);
+	if (ret > 0)
+		ret = i2c_smbus_write_byte_data(client,
+			ADV7180_AUTODETECT_ENABLE_REG,
+			ADV7180_AUTODETECT_DEFAULT);
+	if (ret < 0) {
+		printk(KERN_ERR DRIVER_NAME
+			": Failed to communicate to chip: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int adv7180_remove(struct i2c_client *client)
+{
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+
+	v4l2_device_unregister_subdev(sd);
+	kfree(to_state(sd));
+	return 0;
+}
+
+static const struct i2c_device_id adv7180_id[] = {
+	{DRIVER_NAME, 0},
+	{},
+};
+
+MODULE_DEVICE_TABLE(i2c, adv7180_id);
+
+static struct i2c_driver adv7180_driver = {
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= DRIVER_NAME,
+	},
+	.probe		= adv7180_probe,
+	.remove		= adv7180_remove,
+	.id_table	= adv7180_id,
+};
+
+static __init int adv7180_init(void)
+{
+	return i2c_add_driver(&adv7180_driver);
+}
+
+static __exit void adv7180_exit(void)
+{
+	i2c_del_driver(&adv7180_driver);
+}
+
+module_init(adv7180_init);
+module_exit(adv7180_exit);
+
+MODULE_DESCRIPTION("Analog Devices ADV7180 video decoder driver");
+MODULE_AUTHOR("Mocean Laboratories");
+MODULE_LICENSE("GPL v2");
+
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index 94e908c0d7a0..cf16689adba7 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -135,6 +135,9 @@ enum {
 	/* module adv7175: just ident 7175 */
 	V4L2_IDENT_ADV7175 = 7175,
 
+	/* module adv7180: just ident 7180 */
+	V4L2_IDENT_ADV7180 = 7180,
+
 	/* module saa7185: just ident 7185 */
 	V4L2_IDENT_SAA7185 = 7185,
 
-- 
cgit v1.2.3


From eb27cae8adaa658a0bf31631baa1ce29d8183759 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Mon, 6 Jul 2009 23:40:19 -0400
Subject: ACPI: linux/acpi.h should not include linux/dmi.h

users of acpi.h that need dmi.h should include it directly.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/bus.c      | 1 +
 drivers/acpi/ec.c       | 1 +
 drivers/acpi/pci_slot.c | 1 +
 drivers/acpi/video.c    | 2 +-
 drivers/pci/dmar.c      | 1 +
 include/linux/acpi.h    | 2 --
 6 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 2876fc70c3a9..0fb6b2a8b103 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -38,6 +38,7 @@
 #include <linux/pci.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
+#include <linux/dmi.h>
 
 #include "internal.h"
 
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 391f331674c7..c434f6571ab8 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -42,6 +42,7 @@
 #include <asm/io.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
+#include <linux/dmi.h>
 
 #define ACPI_EC_CLASS			"embedded_controller"
 #define ACPI_EC_DEVICE_NAME		"Embedded Controller"
diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c
index 12158e0d009b..7aa6802c0ee3 100644
--- a/drivers/acpi/pci_slot.c
+++ b/drivers/acpi/pci_slot.c
@@ -31,6 +31,7 @@
 #include <linux/acpi.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
+#include <linux/dmi.h>
 
 static int debug;
 static int check_sta_before_sun;
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 60ea984c84a0..055a455b6c75 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -40,7 +40,7 @@
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
 #include <asm/uaccess.h>
-
+#include <linux/dmi.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 7b287cb38b7a..4484ac089772 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -33,6 +33,7 @@
 #include <linux/timer.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
+#include <linux/dmi.h>
 
 #undef PREFIX
 #define PREFIX "DMAR:"
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 34321cfffeab..7950c65c43af 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -41,8 +41,6 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/acpi_numa.h>
 #include <asm/acpi.h>
-#include <linux/dmi.h>
-
 
 enum acpi_irq_model_id {
 	ACPI_IRQ_MODEL_PIC = 0,
-- 
cgit v1.2.3


From e4f55966d02c5dfade2978c2aa05fb202a78a4d1 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Mon, 6 Jul 2009 23:42:10 -0400
Subject: ACPI: remove unnecessary #ifdef CONFIG_DMI

acpi_osi_setup() does not depend on CONFIG_DMI
acpi_dmi_osi_linux()'s definition doesn't depend on CONFIG_DMI either

Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/acpi.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7950c65c43af..9260408bf4ea 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -217,10 +217,8 @@ static inline int acpi_video_display_switch_support(void)
 #endif /* defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) */
 
 extern int acpi_blacklisted(void);
-#ifdef CONFIG_DMI
 extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d);
 extern int acpi_osi_setup(char *str);
-#endif
 
 #ifdef CONFIG_ACPI_NUMA
 int acpi_get_pxm(acpi_handle handle);
-- 
cgit v1.2.3


From a71fca58b7f4abca551ae2256ac08dd9123a03f9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 18 Sep 2009 10:28:19 -0700
Subject: rcu: Fix whitespace inconsistencies

Fix a number of whitespace ^Ierrors in the include/linux/rcu*
and the kernel/rcu* files.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
LKML-Reference: <20090918172819.GA24405@linux.vnet.ibm.com>
[ did more checkpatch fixlets ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rculist_nulls.h |  2 +-
 include/linux/rcupdate.h      |  6 +++---
 include/linux/rcutree.h       |  2 +-
 kernel/rcupdate.c             |  4 ++--
 kernel/rcutorture.c           | 28 +++++++++++++++-------------
 kernel/rcutree.c              |  2 +-
 kernel/rcutree.h              |  2 +-
 kernel/rcutree_plugin.h       |  3 +--
 kernel/rcutree_trace.c        |  2 +-
 9 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index f9ddd03961a8..589a40919f01 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -102,7 +102,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
  */
 #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
 	for (pos = rcu_dereference((head)->first);			 \
-		(!is_a_nulls(pos)) && 			\
+		(!is_a_nulls(pos)) &&			\
 		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference(pos->next))
 
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 39dce83c4865..6fe0363724e9 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1,5 +1,5 @@
 /*
- * Read-Copy Update mechanism for mutual exclusion 
+ * Read-Copy Update mechanism for mutual exclusion
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@
  * Copyright IBM Corporation, 2001
  *
  * Author: Dipankar Sarma <dipankar@in.ibm.com>
- * 
+ *
  * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
  * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  * Papers:
@@ -26,7 +26,7 @@
  * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 		http://lse.sourceforge.net/locking/rcupdate.html
+ *		http://lse.sourceforge.net/locking/rcupdate.html
  *
  */
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 00d08c0cbcc1..37682770e9d2 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -24,7 +24,7 @@
  * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 	Documentation/RCU
+ *	Documentation/RCU
  */
 
 #ifndef __LINUX_RCUTREE_H
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 28d2f24e7871..37ac45483082 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -19,7 +19,7 @@
  *
  * Authors: Dipankar Sarma <dipankar@in.ibm.com>
  *	    Manfred Spraul <manfred@colorfullife.com>
- * 
+ *
  * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
  * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  * Papers:
@@ -27,7 +27,7 @@
  * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 		http://lse.sourceforge.net/locking/rcupdate.html
+ *		http://lse.sourceforge.net/locking/rcupdate.html
  *
  */
 #include <linux/types.h>
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 328a8257c885..233768f21f97 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -18,7 +18,7 @@
  * Copyright (C) IBM Corporation, 2005, 2006
  *
  * Authors: Paul E. McKenney <paulmck@us.ibm.com>
- *          Josh Triplett <josh@freedesktop.org>
+ *	  Josh Triplett <josh@freedesktop.org>
  *
  * See also:  Documentation/RCU/torture.txt
  */
@@ -50,7 +50,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
-              "Josh Triplett <josh@freedesktop.org>");
+	      "Josh Triplett <josh@freedesktop.org>");
 
 static int nreaders = -1;	/* # reader threads, defaults to 2*ncpus */
 static int nfakewriters = 4;	/* # fake writer threads */
@@ -110,8 +110,8 @@ struct rcu_torture {
 };
 
 static LIST_HEAD(rcu_torture_freelist);
-static struct rcu_torture *rcu_torture_current = NULL;
-static long rcu_torture_current_version = 0;
+static struct rcu_torture *rcu_torture_current;
+static long rcu_torture_current_version;
 static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
 static DEFINE_SPINLOCK(rcu_torture_lock);
 static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
@@ -124,11 +124,11 @@ static atomic_t n_rcu_torture_alloc_fail;
 static atomic_t n_rcu_torture_free;
 static atomic_t n_rcu_torture_mberror;
 static atomic_t n_rcu_torture_error;
-static long n_rcu_torture_timers = 0;
+static long n_rcu_torture_timers;
 static struct list_head rcu_torture_removed;
 static cpumask_var_t shuffle_tmp_mask;
 
-static int stutter_pause_test = 0;
+static int stutter_pause_test;
 
 #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
 #define RCUTORTURE_RUNNABLE_INIT 1
@@ -267,7 +267,8 @@ struct rcu_torture_ops {
 	int irq_capable;
 	char *name;
 };
-static struct rcu_torture_ops *cur_ops = NULL;
+
+static struct rcu_torture_ops *cur_ops;
 
 /*
  * Definitions for rcu torture testing.
@@ -342,8 +343,8 @@ static struct rcu_torture_ops rcu_ops = {
 	.sync		= synchronize_rcu,
 	.cb_barrier	= rcu_barrier,
 	.stats		= NULL,
-	.irq_capable 	= 1,
-	.name 		= "rcu"
+	.irq_capable	= 1,
+	.name		= "rcu"
 };
 
 static void rcu_sync_torture_deferred_free(struct rcu_torture *p)
@@ -641,7 +642,8 @@ rcu_torture_writer(void *arg)
 
 	do {
 		schedule_timeout_uninterruptible(1);
-		if ((rp = rcu_torture_alloc()) == NULL)
+		rp = rcu_torture_alloc();
+		if (rp == NULL)
 			continue;
 		rp->rtort_pipe_count = 0;
 		udelay(rcu_random(&rand) & 0x3ff);
@@ -1113,7 +1115,7 @@ rcu_torture_init(void)
 		printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n",
 		       torture_type);
 		mutex_unlock(&fullstop_mutex);
-		return (-EINVAL);
+		return -EINVAL;
 	}
 	if (cur_ops->init)
 		cur_ops->init(); /* no "goto unwind" prior to this point!!! */
@@ -1164,7 +1166,7 @@ rcu_torture_init(void)
 		goto unwind;
 	}
 	fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]),
-	                           GFP_KERNEL);
+				   GFP_KERNEL);
 	if (fakewriter_tasks == NULL) {
 		VERBOSE_PRINTK_ERRSTRING("out of memory");
 		firsterr = -ENOMEM;
@@ -1173,7 +1175,7 @@ rcu_torture_init(void)
 	for (i = 0; i < nfakewriters; i++) {
 		VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task");
 		fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL,
-		                                  "rcu_torture_fakewriter");
+						  "rcu_torture_fakewriter");
 		if (IS_ERR(fakewriter_tasks[i])) {
 			firsterr = PTR_ERR(fakewriter_tasks[i]);
 			VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter");
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 1b32cdd1b2e2..52b06f6e158c 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -25,7 +25,7 @@
  * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 	Documentation/RCU
+ *	Documentation/RCU
  */
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index bf8a6f9f134d..8e8287a983c2 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -142,7 +142,7 @@ struct rcu_data {
 	 */
 	struct rcu_head *nxtlist;
 	struct rcu_head **nxttail[RCU_NEXT_SIZE];
-	long		qlen; 	 	/* # of queued callbacks */
+	long		qlen;		/* # of queued callbacks */
 	long		blimit;		/* Upper limit on a processed batch */
 
 #ifdef CONFIG_NO_HZ
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 09b7325baad1..1cee04f627eb 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -370,9 +370,8 @@ static void rcu_preempt_check_callbacks(int cpu)
 		rcu_preempt_qs(cpu);
 		return;
 	}
-	if (per_cpu(rcu_preempt_data, cpu).qs_pending) {
+	if (per_cpu(rcu_preempt_data, cpu).qs_pending)
 		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
-	}
 }
 
 /*
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 0ea1bff69727..c89f5e9fd173 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -20,7 +20,7 @@
  * Papers:  http://www.rdrop.com/users/paulmck/RCU
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU
+ *		Documentation/RCU
  *
  */
 #include <linux/types.h>
-- 
cgit v1.2.3


From 393b2ad8c757ba3ccd2a99ca5bbcd6db4d3fa53d Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 12 Sep 2009 07:52:47 +0200
Subject: perf: Add a timestamp to fork events

perf timechart needs to know when a process forked, in order to be
able to visualize properly when tasks start.

This patch adds a time field to the event structure, and fills it
in appropriately.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20090912130341.51ad2de2@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 ++
 kernel/perf_counter.c        | 11 +++++++++--
 tools/perf/util/event.h      |  1 +
 3 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c7375f97aa19..bd341007c4fc 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -336,6 +336,7 @@ enum perf_event_type {
 	 *	struct perf_event_header	header;
 	 *	u32				pid, ppid;
 	 *	u32				tid, ptid;
+	 *	u64				time;
 	 * };
 	 */
 	PERF_EVENT_EXIT			= 4,
@@ -356,6 +357,7 @@ enum perf_event_type {
 	 *	struct perf_event_header	header;
 	 *	u32				pid, ppid;
 	 *	u32				tid, ptid;
+	 *	{ u64				time;     } && PERF_SAMPLE_TIME
 	 * };
 	 */
 	PERF_EVENT_FORK			= 7,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d013f4e89e9c..d5899b62b276 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3083,6 +3083,7 @@ struct perf_task_event {
 		u32				ppid;
 		u32				tid;
 		u32				ptid;
+		u64				time;
 	} event;
 };
 
@@ -3090,9 +3091,12 @@ static void perf_counter_task_output(struct perf_counter *counter,
 				     struct perf_task_event *task_event)
 {
 	struct perf_output_handle handle;
-	int size = task_event->event.header.size;
+	int size;
 	struct task_struct *task = task_event->task;
-	int ret = perf_output_begin(&handle, counter, size, 0, 0);
+	int ret;
+
+	size  = task_event->event.header.size;
+	ret = perf_output_begin(&handle, counter, size, 0, 0);
 
 	if (ret)
 		return;
@@ -3103,7 +3107,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
 	task_event->event.tid = perf_counter_tid(counter, task);
 	task_event->event.ptid = perf_counter_tid(counter, current);
 
+	task_event->event.time = perf_clock();
+
 	perf_output_put(&handle, task_event->event);
+
 	perf_output_end(&handle);
 }
 
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 2495529cae7d..28a579f8fa8e 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -39,6 +39,7 @@ struct fork_event {
 	struct perf_event_header header;
 	u32 pid, ppid;
 	u32 tid, ptid;
+	u64 time;
 };
 
 struct lost_event {
-- 
cgit v1.2.3


From 6161352142d5fed4cd753b32e5ccde66e705b14e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 17 Sep 2009 16:11:28 +0200
Subject: tracing, perf: Convert the power tracer into an event tracer

This patch converts the existing power tracer into an event tracer,
so that power events (C states and frequency changes) can be
tracked via "perf".

This also removes the perl script that was used to demo the tracer;
its functionality is being replaced entirely with timechart.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20090912130542.6d314860@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/trace/power.txt              |  17 ---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c |   7 +-
 arch/x86/kernel/process.c                  |  28 ++--
 include/trace/events/power.h               |  81 +++++++++++
 kernel/trace/Makefile                      |   2 +-
 kernel/trace/power-traces.c                |  20 +++
 kernel/trace/trace.h                       |   3 -
 kernel/trace/trace_entries.h               |  17 ---
 kernel/trace/trace_power.c                 | 218 -----------------------------
 scripts/tracing/power.pl                   | 108 --------------
 10 files changed, 113 insertions(+), 388 deletions(-)
 delete mode 100644 Documentation/trace/power.txt
 create mode 100644 include/trace/events/power.h
 create mode 100644 kernel/trace/power-traces.c
 delete mode 100644 kernel/trace/trace_power.c
 delete mode 100644 scripts/tracing/power.pl

(limited to 'include')

diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt
deleted file mode 100644
index cd805e16dc27..000000000000
--- a/Documentation/trace/power.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-The power tracer collects detailed information about C-state and P-state
-transitions, instead of just looking at the high-level "average"
-information.
-
-There is a helper script found in scrips/tracing/power.pl in the kernel
-sources which can be used to parse this information and create a
-Scalable Vector Graphics (SVG) picture from the trace data.
-
-To use this tracer:
-
-	echo 0 > /sys/kernel/debug/tracing/tracing_enabled
-	echo power > /sys/kernel/debug/tracing/current_tracer
-	echo 1 > /sys/kernel/debug/tracing/tracing_enabled
-	sleep 1
-	echo 0 > /sys/kernel/debug/tracing/tracing_enabled
-	cat /sys/kernel/debug/tracing/trace | \
-		perl scripts/tracing/power.pl > out.sv
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 4109679863c1..479cc8c418c1 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,7 +33,7 @@
 #include <linux/cpufreq.h>
 #include <linux/compiler.h>
 #include <linux/dmi.h>
-#include <trace/power.h>
+#include <trace/events/power.h>
 
 #include <linux/acpi.h>
 #include <linux/io.h>
@@ -72,8 +72,6 @@ static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
 
 static DEFINE_PER_CPU(struct aperfmperf, old_perf);
 
-DEFINE_TRACE(power_mark);
-
 /* acpi_perf_data is a pointer to percpu data. */
 static struct acpi_processor_performance *acpi_perf_data;
 
@@ -332,7 +330,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	unsigned int next_perf_state = 0; /* Index into perf table */
 	unsigned int i;
 	int result = 0;
-	struct power_trace it;
 
 	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
 
@@ -364,7 +361,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 		}
 	}
 
-	trace_power_mark(&it, POWER_PSTATE, next_perf_state);
+	trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);
 
 	switch (data->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 071166a4ba83..7b60e3906889 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,7 +9,7 @@
 #include <linux/pm.h>
 #include <linux/clockchips.h>
 #include <linux/random.h>
-#include <trace/power.h>
+#include <trace/events/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
@@ -25,9 +25,6 @@ EXPORT_SYMBOL(idle_nomwait);
 
 struct kmem_cache *task_xstate_cachep;
 
-DEFINE_TRACE(power_start);
-DEFINE_TRACE(power_end);
-
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	*dst = *src;
@@ -299,9 +296,7 @@ static inline int hlt_use_halt(void)
 void default_idle(void)
 {
 	if (hlt_use_halt()) {
-		struct power_trace it;
-
-		trace_power_start(&it, POWER_CSTATE, 1);
+		trace_power_start(POWER_CSTATE, 1);
 		current_thread_info()->status &= ~TS_POLLING;
 		/*
 		 * TS_POLLING-cleared state must be visible before we
@@ -314,7 +309,7 @@ void default_idle(void)
 		else
 			local_irq_enable();
 		current_thread_info()->status |= TS_POLLING;
-		trace_power_end(&it);
+		trace_power_end(0);
 	} else {
 		local_irq_enable();
 		/* loop is done by the caller */
@@ -372,9 +367,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  */
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
-	struct power_trace it;
-
-	trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
+	trace_power_start(POWER_CSTATE, (ax>>4)+1);
 	if (!need_resched()) {
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
@@ -384,15 +377,14 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 		if (!need_resched())
 			__mwait(ax, cx);
 	}
-	trace_power_end(&it);
+	trace_power_end(0);
 }
 
 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
 static void mwait_idle(void)
 {
-	struct power_trace it;
 	if (!need_resched()) {
-		trace_power_start(&it, POWER_CSTATE, 1);
+		trace_power_start(POWER_CSTATE, 1);
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
@@ -402,7 +394,7 @@ static void mwait_idle(void)
 			__sti_mwait(0, 0);
 		else
 			local_irq_enable();
-		trace_power_end(&it);
+		trace_power_end(0);
 	} else
 		local_irq_enable();
 }
@@ -414,13 +406,11 @@ static void mwait_idle(void)
  */
 static void poll_idle(void)
 {
-	struct power_trace it;
-
-	trace_power_start(&it, POWER_CSTATE, 0);
+	trace_power_start(POWER_CSTATE, 0);
 	local_irq_enable();
 	while (!need_resched())
 		cpu_relax();
-	trace_power_end(&it);
+	trace_power_end(0);
 }
 
 /*
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
new file mode 100644
index 000000000000..ea6d579261ad
--- /dev/null
+++ b/include/trace/events/power.h
@@ -0,0 +1,81 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM power
+
+#if !defined(_TRACE_POWER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWER_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+
+#ifndef _TRACE_POWER_ENUM_
+#define _TRACE_POWER_ENUM_
+enum {
+	POWER_NONE = 0,
+	POWER_CSTATE = 1,
+	POWER_PSTATE = 2,
+};
+#endif
+
+
+
+TRACE_EVENT(power_start,
+
+	TP_PROTO(unsigned int type, unsigned int state),
+
+	TP_ARGS(type, state),
+
+	TP_STRUCT__entry(
+		__field(	u64,		type		)
+		__field(	u64,		state		)
+	),
+
+	TP_fast_assign(
+		__entry->type = type;
+		__entry->state = state;
+	),
+
+	TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state)
+);
+
+TRACE_EVENT(power_end,
+
+	TP_PROTO(int dummy),
+
+	TP_ARGS(dummy),
+
+	TP_STRUCT__entry(
+		__field(	u64,		dummy		)
+	),
+
+	TP_fast_assign(
+		__entry->dummy = 0xffff;
+	),
+
+	TP_printk("dummy=%lu", (unsigned long)__entry->dummy)
+
+);
+
+
+TRACE_EVENT(power_frequency,
+
+	TP_PROTO(unsigned int type, unsigned int state),
+
+	TP_ARGS(type, state),
+
+	TP_STRUCT__entry(
+		__field(	u64,		type		)
+		__field(	u64,		state		)
+	),
+
+	TP_fast_assign(
+		__entry->type = type;
+		__entry->state = state;
+	),
+
+	TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state)
+);
+
+#endif /* _TRACE_POWER_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 844164dca90a..26f03ac07c2b 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -42,7 +42,6 @@ obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
 obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
-obj-$(CONFIG_POWER_TRACER) += trace_power.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
@@ -54,5 +53,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
+obj-$(CONFIG_EVENT_TRACING) += power-traces.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
new file mode 100644
index 000000000000..e06c6e3d56a3
--- /dev/null
+++ b/kernel/trace/power-traces.c
@@ -0,0 +1,20 @@
+/*
+ * Power trace points
+ *
+ * Copyright (C) 2009 Arjan van de Ven <arjan@linux.intel.com>
+ */
+
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/power.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
+EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
+EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
+
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 86bcff94791a..405cb850b75d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,7 +11,6 @@
 #include <linux/ftrace.h>
 #include <trace/boot.h>
 #include <linux/kmemtrace.h>
-#include <trace/power.h>
 
 #include <linux/trace_seq.h>
 #include <linux/ftrace_event.h>
@@ -37,7 +36,6 @@ enum trace_type {
 	TRACE_HW_BRANCHES,
 	TRACE_KMEM_ALLOC,
 	TRACE_KMEM_FREE,
-	TRACE_POWER,
 	TRACE_BLK,
 
 	__TRACE_LAST_TYPE,
@@ -207,7 +205,6 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,	\
 			  TRACE_GRAPH_RET);		\
 		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
-		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
 		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
 			  TRACE_KMEM_ALLOC);	\
 		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index a431748ddd6e..ead3d724599d 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -330,23 +330,6 @@ FTRACE_ENTRY(hw_branch, hw_branch_entry,
 	F_printk("from: %llx to: %llx", __entry->from, __entry->to)
 );
 
-FTRACE_ENTRY(power, trace_power,
-
-	TRACE_POWER,
-
-	F_STRUCT(
-		__field_struct(	struct power_trace,	state_data	)
-		__field_desc(	s64,	state_data,	stamp		)
-		__field_desc(	s64,	state_data,	end		)
-		__field_desc(	int,	state_data,	type		)
-		__field_desc(	int,	state_data,	state		)
-	),
-
-	F_printk("%llx->%llx type:%u state:%u",
-		 __entry->stamp, __entry->end,
-		 __entry->type, __entry->state)
-);
-
 FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
 
 	TRACE_KMEM_ALLOC,
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
deleted file mode 100644
index fe1a00f1445a..000000000000
--- a/kernel/trace/trace_power.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * ring buffer based C-state tracer
- *
- * Arjan van de Ven <arjan@linux.intel.com>
- * Copyright (C) 2008 Intel Corporation
- *
- * Much is borrowed from trace_boot.c which is
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- *
- */
-
-#include <linux/init.h>
-#include <linux/debugfs.h>
-#include <trace/power.h>
-#include <linux/kallsyms.h>
-#include <linux/module.h>
-
-#include "trace.h"
-#include "trace_output.h"
-
-static struct trace_array *power_trace;
-static int __read_mostly trace_power_enabled;
-
-static void probe_power_start(struct power_trace *it, unsigned int type,
-				unsigned int level)
-{
-	if (!trace_power_enabled)
-		return;
-
-	memset(it, 0, sizeof(struct power_trace));
-	it->state = level;
-	it->type = type;
-	it->stamp = ktime_get();
-}
-
-
-static void probe_power_end(struct power_trace *it)
-{
-	struct ftrace_event_call *call = &event_power;
-	struct ring_buffer_event *event;
-	struct ring_buffer *buffer;
-	struct trace_power *entry;
-	struct trace_array_cpu *data;
-	struct trace_array *tr = power_trace;
-
-	if (!trace_power_enabled)
-		return;
-
-	buffer = tr->buffer;
-
-	preempt_disable();
-	it->end = ktime_get();
-	data = tr->data[smp_processor_id()];
-
-	event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
-					  sizeof(*entry), 0, 0);
-	if (!event)
-		goto out;
-	entry	= ring_buffer_event_data(event);
-	entry->state_data = *it;
-	if (!filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
-	preempt_enable();
-}
-
-static void probe_power_mark(struct power_trace *it, unsigned int type,
-				unsigned int level)
-{
-	struct ftrace_event_call *call = &event_power;
-	struct ring_buffer_event *event;
-	struct ring_buffer *buffer;
-	struct trace_power *entry;
-	struct trace_array_cpu *data;
-	struct trace_array *tr = power_trace;
-
-	if (!trace_power_enabled)
-		return;
-
-	buffer = tr->buffer;
-
-	memset(it, 0, sizeof(struct power_trace));
-	it->state = level;
-	it->type = type;
-	it->stamp = ktime_get();
-	preempt_disable();
-	it->end = it->stamp;
-	data = tr->data[smp_processor_id()];
-
-	event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
-					  sizeof(*entry), 0, 0);
-	if (!event)
-		goto out;
-	entry	= ring_buffer_event_data(event);
-	entry->state_data = *it;
-	if (!filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
-	preempt_enable();
-}
-
-static int tracing_power_register(void)
-{
-	int ret;
-
-	ret = register_trace_power_start(probe_power_start);
-	if (ret) {
-		pr_info("power trace: Couldn't activate tracepoint"
-			" probe to trace_power_start\n");
-		return ret;
-	}
-	ret = register_trace_power_end(probe_power_end);
-	if (ret) {
-		pr_info("power trace: Couldn't activate tracepoint"
-			" probe to trace_power_end\n");
-		goto fail_start;
-	}
-	ret = register_trace_power_mark(probe_power_mark);
-	if (ret) {
-		pr_info("power trace: Couldn't activate tracepoint"
-			" probe to trace_power_mark\n");
-		goto fail_end;
-	}
-	return ret;
-fail_end:
-	unregister_trace_power_end(probe_power_end);
-fail_start:
-	unregister_trace_power_start(probe_power_start);
-	return ret;
-}
-
-static void start_power_trace(struct trace_array *tr)
-{
-	trace_power_enabled = 1;
-}
-
-static void stop_power_trace(struct trace_array *tr)
-{
-	trace_power_enabled = 0;
-}
-
-static void power_trace_reset(struct trace_array *tr)
-{
-	trace_power_enabled = 0;
-	unregister_trace_power_start(probe_power_start);
-	unregister_trace_power_end(probe_power_end);
-	unregister_trace_power_mark(probe_power_mark);
-}
-
-
-static int power_trace_init(struct trace_array *tr)
-{
-	power_trace = tr;
-
-	trace_power_enabled = 1;
-	tracing_power_register();
-
-	tracing_reset_online_cpus(tr);
-	return 0;
-}
-
-static enum print_line_t power_print_line(struct trace_iterator *iter)
-{
-	int ret = 0;
-	struct trace_entry *entry = iter->ent;
-	struct trace_power *field ;
-	struct power_trace *it;
-	struct trace_seq *s = &iter->seq;
-	struct timespec stamp;
-	struct timespec duration;
-
-	trace_assign_type(field, entry);
-	it = &field->state_data;
-	stamp = ktime_to_timespec(it->stamp);
-	duration = ktime_to_timespec(ktime_sub(it->end, it->stamp));
-
-	if (entry->type == TRACE_POWER) {
-		if (it->type == POWER_CSTATE)
-			ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n",
-					  stamp.tv_sec,
-					  stamp.tv_nsec,
-					  it->state, iter->cpu,
-					  duration.tv_sec,
-					  duration.tv_nsec);
-		if (it->type == POWER_PSTATE)
-			ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n",
-					  stamp.tv_sec,
-					  stamp.tv_nsec,
-					  it->state, iter->cpu);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-		return TRACE_TYPE_HANDLED;
-	}
-	return TRACE_TYPE_UNHANDLED;
-}
-
-static void power_print_header(struct seq_file *s)
-{
-	seq_puts(s, "#   TIMESTAMP      STATE  EVENT\n");
-	seq_puts(s, "#       |            |      |\n");
-}
-
-static struct tracer power_tracer __read_mostly =
-{
-	.name		= "power",
-	.init		= power_trace_init,
-	.start		= start_power_trace,
-	.stop		= stop_power_trace,
-	.reset		= power_trace_reset,
-	.print_line	= power_print_line,
-	.print_header	= power_print_header,
-};
-
-static int init_power_trace(void)
-{
-	return register_tracer(&power_tracer);
-}
-device_initcall(init_power_trace);
diff --git a/scripts/tracing/power.pl b/scripts/tracing/power.pl
deleted file mode 100644
index 4f729b3501e0..000000000000
--- a/scripts/tracing/power.pl
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/usr/bin/perl
-
-# Copyright 2008, Intel Corporation
-#
-# This file is part of the Linux kernel
-#
-# This program file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program in a file named COPYING; if not, write to the
-# Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor,
-# Boston, MA 02110-1301 USA
-#
-# Authors:
-# 	Arjan van de Ven <arjan@linux.intel.com>
-
-
-#
-# This script turns a cstate ftrace output into a SVG graphic that shows
-# historic C-state information
-#
-#
-# 	cat /sys/kernel/debug/tracing/trace | perl power.pl > out.svg
-#
-
-my @styles;
-my $base = 0;
-
-my @pstate_last;
-my @pstate_level;
-
-$styles[0] = "fill:rgb(0,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[1] = "fill:rgb(0,255,0);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[2] = "fill:rgb(255,0,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[3] = "fill:rgb(255,255,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[4] = "fill:rgb(255,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[5] = "fill:rgb(0,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[6] = "fill:rgb(0,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[7] = "fill:rgb(0,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[8] = "fill:rgb(0,25,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-
-
-print "<?xml version=\"1.0\" standalone=\"no\"?> \n";
-print "<svg width=\"10000\" height=\"100%\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n";
-
-my $scale = 30000.0;
-while (<>) {
-	my $line = $_;
-	if ($line =~ /([0-9\.]+)\] CSTATE: Going to C([0-9]) on cpu ([0-9]+) for ([0-9\.]+)/) {
-		if ($base == 0) {
-			$base = $1;
-		}
-		my $time = $1 - $base;
-		$time = $time * $scale;
-		my $C = $2;
-		my $cpu = $3;
-		my $y = 400 * $cpu;
-		my $duration = $4 * $scale;
-		my $msec = int($4 * 100000)/100.0;
-		my $height = $C * 20;
-		$style = $styles[$C];
-
-		$y = $y + 140 - $height;
-
-		$x2 = $time + 4;
-		$y2 = $y + 4;
-
-
-		print "<rect x=\"$time\" width=\"$duration\" y=\"$y\" height=\"$height\" style=\"$style\"/>\n";
-		print "<text transform=\"translate($x2,$y2) rotate(90)\">C$C $msec</text>\n";
-	}
-	if ($line =~ /([0-9\.]+)\] PSTATE: Going to P([0-9]) on cpu ([0-9]+)/) {
-		my $time = $1 - $base;
-		my $state = $2;
-		my $cpu = $3;
-
-		if (defined($pstate_last[$cpu])) {
-			my $from = $pstate_last[$cpu];
-			my $oldstate = $pstate_state[$cpu];
-			my $duration = ($time-$from) * $scale;
-
-			$from = $from * $scale;
-			my $to = $from + $duration;
-			my $height = 140 - ($oldstate * (140/8));
-
-			my $y = 400 * $cpu + 200 + $height;
-			my $y2 = $y+4;
-			my $style = $styles[8];
-
-			print "<rect x=\"$from\" y=\"$y\" width=\"$duration\" height=\"5\" style=\"$style\"/>\n";
-			print "<text transform=\"translate($from,$y2)\">P$oldstate (cpu $cpu)</text>\n";
-		};
-
-		$pstate_last[$cpu] = $time;
-		$pstate_state[$cpu] = $state;
-	}
-}
-
-
-print "</svg>\n";
-- 
cgit v1.2.3


From 778dbcc1ebea6f9a560020110987449bf4607e5f Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 18 Sep 2009 12:51:44 -0700
Subject: mtd: onenand: make onenand/generic.c more generic

Remove the ARM dependency from the generic "onenand" platform device
driver.  This change makes the driver useful for other architectures as
well.  Needed for the SuperH kfr2r09 board.

Apart from the obvious Kconfig bits, the most important change is the move
away from ARM specific includes and platform data.  Together with this
change the only in-tree board code gets an update, and the driver name is
also changed gracefully break potential out of tree drivers.

The driver is also updated to allow NULL as platform data together with a
few changes to make use of resource_size() and dev_name().

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Kyungmin Park <kmpark@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/arm/mach-omap2/board-apollon.c |  4 ++--
 drivers/mtd/onenand/Kconfig         |  1 -
 drivers/mtd/onenand/generic.c       | 24 ++++++++++++++----------
 include/linux/mtd/onenand.h         |  8 ++++++++
 4 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-omap2/board-apollon.c b/arch/arm/mach-omap2/board-apollon.c
index dcfc20d03894..b4d4ef6d90b9 100644
--- a/arch/arm/mach-omap2/board-apollon.c
+++ b/arch/arm/mach-omap2/board-apollon.c
@@ -87,7 +87,7 @@ static struct mtd_partition apollon_partitions[] = {
 	},
 };
 
-static struct flash_platform_data apollon_flash_data = {
+static struct onenand_platform_data apollon_flash_data = {
 	.parts		= apollon_partitions,
 	.nr_parts	= ARRAY_SIZE(apollon_partitions),
 };
@@ -99,7 +99,7 @@ static struct resource apollon_flash_resource[] = {
 };
 
 static struct platform_device apollon_onenand_device = {
-	.name		= "onenand",
+	.name		= "onenand-flash",
 	.id		= -1,
 	.dev		= {
 		.platform_data	= &apollon_flash_data,
diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig
index 3a094d1bf8cc..a38f580c2bb3 100644
--- a/drivers/mtd/onenand/Kconfig
+++ b/drivers/mtd/onenand/Kconfig
@@ -24,7 +24,6 @@ config MTD_ONENAND_VERIFY_WRITE
 
 config MTD_ONENAND_GENERIC
 	tristate "OneNAND Flash device via platform device driver"
-	depends on ARM
 	help
 	  Support for OneNAND flash via platform device driver.
 
diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c
index 3a496c33fb52..e78914938c5c 100644
--- a/drivers/mtd/onenand/generic.c
+++ b/drivers/mtd/onenand/generic.c
@@ -19,12 +19,16 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/onenand.h>
 #include <linux/mtd/partitions.h>
-
 #include <asm/io.h>
-#include <asm/mach/flash.h>
-
-#define DRIVER_NAME	"onenand"
 
+/*
+ * Note: Driver name and platform data format have been updated!
+ *
+ * This version of the driver is named "onenand-flash" and takes struct
+ * onenand_platform_data as platform data. The old ARM-specific version
+ * with the name "onenand" used to take struct flash_platform_data.
+ */
+#define DRIVER_NAME	"onenand-flash"
 
 #ifdef CONFIG_MTD_PARTITIONS
 static const char *part_probes[] = { "cmdlinepart", NULL,  };
@@ -39,16 +43,16 @@ struct onenand_info {
 static int __devinit generic_onenand_probe(struct platform_device *pdev)
 {
 	struct onenand_info *info;
-	struct flash_platform_data *pdata = pdev->dev.platform_data;
+	struct onenand_platform_data *pdata = pdev->dev.platform_data;
 	struct resource *res = pdev->resource;
-	unsigned long size = res->end - res->start + 1;
+	unsigned long size = resource_size(res);
 	int err;
 
 	info = kzalloc(sizeof(struct onenand_info), GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;
 
-	if (!request_mem_region(res->start, size, pdev->dev.driver->name)) {
+	if (!request_mem_region(res->start, size, dev_name(&pdev->dev))) {
 		err = -EBUSY;
 		goto out_free_info;
 	}
@@ -59,7 +63,7 @@ static int __devinit generic_onenand_probe(struct platform_device *pdev)
 		goto out_release_mem_region;
 	}
 
-	info->onenand.mmcontrol = pdata->mmcontrol;
+	info->onenand.mmcontrol = pdata ? pdata->mmcontrol : 0;
 	info->onenand.irq = platform_get_irq(pdev, 0);
 
 	info->mtd.name = dev_name(&pdev->dev);
@@ -75,7 +79,7 @@ static int __devinit generic_onenand_probe(struct platform_device *pdev)
 	err = parse_mtd_partitions(&info->mtd, part_probes, &info->parts, 0);
 	if (err > 0)
 		add_mtd_partitions(&info->mtd, info->parts, err);
-	else if (err <= 0 && pdata->parts)
+	else if (err <= 0 && pdata && pdata->parts)
 		add_mtd_partitions(&info->mtd, pdata->parts, pdata->nr_parts);
 	else
 #endif
@@ -99,7 +103,7 @@ static int __devexit generic_onenand_remove(struct platform_device *pdev)
 {
 	struct onenand_info *info = platform_get_drvdata(pdev);
 	struct resource *res = pdev->resource;
-	unsigned long size = res->end - res->start + 1;
+	unsigned long size = resource_size(res);
 
 	platform_set_drvdata(pdev, NULL);
 
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 8ed873374381..4e49f3350678 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -214,4 +214,12 @@ unsigned onenand_block(struct onenand_chip *this, loff_t addr);
 loff_t onenand_addr(struct onenand_chip *this, int block);
 int flexonenand_region(struct mtd_info *mtd, loff_t addr);
 
+struct mtd_partition;
+
+struct onenand_platform_data {
+	void		(*mmcontrol)(struct mtd_info *mtd, int sync_read);
+	struct mtd_partition *parts;
+	unsigned int	nr_parts;
+};
+
 #endif	/* __LINUX_MTD_ONENAND_H */
-- 
cgit v1.2.3


From 46a8cf2df2232c0051f29716ff8a166ebeb08daf Mon Sep 17 00:00:00 2001
From: Sneha Narnakaje <nsnehaprabha@ti.com>
Date: Fri, 18 Sep 2009 12:51:46 -0700
Subject: mtd: nand: add "page" parameter to all read_page/read_page_raw APIs

This patch adds a new "page" parameter to all NAND read_page/read_page_raw
APIs.  The read_page API for the new mode ECC_HW_OOB_FIRST requires the
page information to send the READOOB command and read the OOB area before
the data area.

Reviewed-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Sneha Narnakaje <nsnehaprabha@ti.com>
Signed-off-by: Sandeep Paulraj <s-paulraj@ti.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/atmel_nand.c    |  2 +-
 drivers/mtd/nand/cafe_nand.c     |  2 +-
 drivers/mtd/nand/fsl_elbc_nand.c |  3 ++-
 drivers/mtd/nand/nand_base.c     | 18 ++++++++++--------
 drivers/mtd/nand/sh_flctl.c      |  2 +-
 include/linux/mtd/nand.h         |  4 ++--
 6 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 20c828ba9405..f8e9975c86e5 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -218,7 +218,7 @@ static int atmel_nand_calculate(struct mtd_info *mtd,
  * buf:        buffer to store read data
  */
 static int atmel_nand_read_page(struct mtd_info *mtd,
-		struct nand_chip *chip, uint8_t *buf)
+		struct nand_chip *chip, uint8_t *buf, int page)
 {
 	int eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index 29acd06b1c39..a70f40e161d7 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -381,7 +381,7 @@ static int cafe_nand_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
  * we need a special oob layout and handling.
  */
 static int cafe_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			       uint8_t *buf)
+			       uint8_t *buf, int page)
 {
 	struct cafe_priv *cafe = mtd->priv;
 
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 1f6eb2578717..ddd37d2554ed 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -739,7 +739,8 @@ static int fsl_elbc_chip_init_tail(struct mtd_info *mtd)
 
 static int fsl_elbc_read_page(struct mtd_info *mtd,
                               struct nand_chip *chip,
-                              uint8_t *buf)
+			      uint8_t *buf,
+			      int page)
 {
 	fsl_elbc_read_buf(mtd, buf, mtd->writesize);
 	fsl_elbc_read_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 4c5e8a74e1b2..17bbd5062027 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -765,7 +765,7 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip)
  * Not for syndrome calculating ecc controllers, which use a special oob layout
  */
 static int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf)
+			      uint8_t *buf, int page)
 {
 	chip->read_buf(mtd, buf, mtd->writesize);
 	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
@@ -781,7 +781,7 @@ static int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
  * We need a special oob layout and handling even when OOB isn't used.
  */
 static int nand_read_page_raw_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf)
+			      uint8_t *buf, int page)
 {
 	int eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -820,7 +820,7 @@ static int nand_read_page_raw_syndrome(struct mtd_info *mtd, struct nand_chip *c
  * @buf:	buffer to store read data
  */
 static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf)
+				uint8_t *buf, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -830,7 +830,7 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
 	uint8_t *ecc_code = chip->buffers->ecccode;
 	uint32_t *eccpos = chip->ecc.layout->eccpos;
 
-	chip->ecc.read_page_raw(mtd, chip, buf);
+	chip->ecc.read_page_raw(mtd, chip, buf, page);
 
 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
 		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
@@ -943,7 +943,7 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, uint3
  * Not for syndrome calculating ecc controllers which need a special oob layout
  */
 static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf)
+				uint8_t *buf, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -988,7 +988,7 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
  * we need a special oob layout and handling.
  */
 static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
-				   uint8_t *buf)
+				   uint8_t *buf, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -1130,11 +1130,13 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 
 			/* Now read the page into the buffer */
 			if (unlikely(ops->mode == MTD_OOB_RAW))
-				ret = chip->ecc.read_page_raw(mtd, chip, bufpoi);
+				ret = chip->ecc.read_page_raw(mtd, chip,
+							      bufpoi, page);
 			else if (!aligned && NAND_SUBPAGE_READ(chip) && !oob)
 				ret = chip->ecc.read_subpage(mtd, chip, col, bytes, bufpoi);
 			else
-				ret = chip->ecc.read_page(mtd, chip, bufpoi);
+				ret = chip->ecc.read_page(mtd, chip, bufpoi,
+							  page);
 			if (ret < 0)
 				break;
 
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 2bc896623e2d..c5df28596a4a 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -329,7 +329,7 @@ static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_va
 }
 
 static int flctl_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf)
+				uint8_t *buf, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 4030ebada49e..686f3701f2f4 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -271,13 +271,13 @@ struct nand_ecc_ctrl {
 					   uint8_t *calc_ecc);
 	int			(*read_page_raw)(struct mtd_info *mtd,
 						 struct nand_chip *chip,
-						 uint8_t *buf);
+						 uint8_t *buf, int page);
 	void			(*write_page_raw)(struct mtd_info *mtd,
 						  struct nand_chip *chip,
 						  const uint8_t *buf);
 	int			(*read_page)(struct mtd_info *mtd,
 					     struct nand_chip *chip,
-					     uint8_t *buf);
+					     uint8_t *buf, int page);
 	int			(*read_subpage)(struct mtd_info *mtd,
 					     struct nand_chip *chip,
 					     uint32_t offs, uint32_t len,
-- 
cgit v1.2.3


From 6e0cb135b3f3713b95ea41a11155e83a8c70f5f8 Mon Sep 17 00:00:00 2001
From: Sneha Narnakaje <nsnehaprabha@ti.com>
Date: Fri, 18 Sep 2009 12:51:47 -0700
Subject: mtd: nand: add new ECC mode - ECC_HW_OOB_FIRST

This patch adds the new mode NAND_ECC_HW_OOB_FIRST in the nand code to
support 4-bit ECC on TI DaVinci devices with large page (up to 2KiB) NAND
chips.  This ECC mode is similar to NAND_ECC_HW, with the exception of
read_page API that first reads the OOB area, reads the data in chunks,
feeds the ECC from OOB area to the ECC hw engine and perform any
correction on the data as per the ECC status reported by the engine.

"ECC_HW_OOB_FIRST" name suggested by Thomas Gleixner

Reviewed-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Sneha Narnakaje <nsnehaprabha@ti.com>
Signed-off-by: Sandeep Paulraj <s-paulraj@ti.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 61 +++++++++++++++++++++++++++++++++++++++++++-
 include/linux/mtd/nand.h     |  1 +
 2 files changed, 61 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 17bbd5062027..22113865438b 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -978,6 +978,54 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 	return 0;
 }
 
+/**
+ * nand_read_page_hwecc_oob_first - [REPLACABLE] hw ecc, read oob first
+ * @mtd:	mtd info structure
+ * @chip:	nand chip info structure
+ * @buf:	buffer to store read data
+ *
+ * Hardware ECC for large page chips, require OOB to be read first.
+ * For this ECC mode, the write_page method is re-used from ECC_HW.
+ * These methods read/write ECC from the OOB area, unlike the
+ * ECC_HW_SYNDROME support with multiple ECC steps, follows the
+ * "infix ECC" scheme and reads/writes ECC from the data area, by
+ * overwriting the NAND manufacturer bad block markings.
+ */
+static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd,
+	struct nand_chip *chip, uint8_t *buf, int page)
+{
+	int i, eccsize = chip->ecc.size;
+	int eccbytes = chip->ecc.bytes;
+	int eccsteps = chip->ecc.steps;
+	uint8_t *p = buf;
+	uint8_t *ecc_code = chip->buffers->ecccode;
+	uint32_t *eccpos = chip->ecc.layout->eccpos;
+	uint8_t *ecc_calc = chip->buffers->ecccalc;
+
+	/* Read the OOB area first */
+	chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
+	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page);
+
+	for (i = 0; i < chip->ecc.total; i++)
+		ecc_code[i] = chip->oob_poi[eccpos[i]];
+
+	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
+		int stat;
+
+		chip->ecc.hwctl(mtd, NAND_ECC_READ);
+		chip->read_buf(mtd, p, eccsize);
+		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
+
+		stat = chip->ecc.correct(mtd, p, &ecc_code[i], NULL);
+		if (stat < 0)
+			mtd->ecc_stats.failed++;
+		else
+			mtd->ecc_stats.corrected += stat;
+	}
+	return 0;
+}
+
 /**
  * nand_read_page_syndrome - [REPLACABLE] hardware ecc syndrom based page read
  * @mtd:	mtd info structure
@@ -2673,6 +2721,17 @@ int nand_scan_tail(struct mtd_info *mtd)
 	 */
 
 	switch (chip->ecc.mode) {
+	case NAND_ECC_HW_OOB_FIRST:
+		/* Similar to NAND_ECC_HW, but a separate read_page handle */
+		if (!chip->ecc.calculate || !chip->ecc.correct ||
+		     !chip->ecc.hwctl) {
+			printk(KERN_WARNING "No ECC functions supplied; "
+			       "Hardware ECC not possible\n");
+			BUG();
+		}
+		if (!chip->ecc.read_page)
+			chip->ecc.read_page = nand_read_page_hwecc_oob_first;
+
 	case NAND_ECC_HW:
 		/* Use standard hwecc read page function ? */
 		if (!chip->ecc.read_page)
@@ -2695,7 +2754,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 		     chip->ecc.read_page == nand_read_page_hwecc ||
 		     !chip->ecc.write_page ||
 		     chip->ecc.write_page == nand_write_page_hwecc)) {
-			printk(KERN_WARNING "No ECC functions supplied, "
+			printk(KERN_WARNING "No ECC functions supplied; "
 			       "Hardware ECC not possible\n");
 			BUG();
 		}
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 686f3701f2f4..7a232a9bdd62 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -121,6 +121,7 @@ typedef enum {
 	NAND_ECC_SOFT,
 	NAND_ECC_HW,
 	NAND_ECC_HW_SYNDROME,
+	NAND_ECC_HW_OOB_FIRST,
 } nand_ecc_modes_t;
 
 /*
-- 
cgit v1.2.3


From e454cea20bdcff10ee698d11b8882662a0153a47 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Fri, 18 Sep 2009 23:01:12 +0200
Subject: Driver-Core: extend devnode callbacks to provide permissions

This allows subsytems to provide devtmpfs with non-default permissions
for the device node. Instead of the default mode of 0600, null, zero,
random, urandom, full, tty, ptmx now have a mode of 0666, which allows
non-privileged processes to access standard device nodes in case no
other userspace process applies the expected permissions.

This also fixes a wrong assignment in pktcdvd and a checkpatch.pl complain.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/kernel/cpuid.c             |  4 ++--
 arch/x86/kernel/microcode_core.c    |  2 +-
 arch/x86/kernel/msr.c               |  4 ++--
 block/bsg.c                         |  4 ++--
 block/genhd.c                       |  8 ++++----
 drivers/base/core.c                 | 19 ++++++++++++-------
 drivers/base/devtmpfs.c             | 24 ++++++++++++++++--------
 drivers/block/aoe/aoechr.c          |  4 ++--
 drivers/block/pktcdvd.c             |  6 +++---
 drivers/char/hw_random/core.c       |  2 +-
 drivers/char/mem.c                  | 29 +++++++++++++++++++----------
 drivers/char/misc.c                 | 10 ++++++----
 drivers/char/raw.c                  |  4 ++--
 drivers/char/tty_io.c               | 11 +++++++++++
 drivers/gpu/drm/drm_sysfs.c         |  4 ++--
 drivers/hid/usbhid/hiddev.c         |  4 ++--
 drivers/input/input.c               |  4 ++--
 drivers/md/dm-ioctl.c               |  2 +-
 drivers/media/dvb/dvb-core/dvbdev.c |  4 ++--
 drivers/net/tun.c                   |  2 +-
 drivers/usb/class/usblp.c           |  4 ++--
 drivers/usb/core/file.c             |  8 ++++----
 drivers/usb/core/usb.c              |  4 ++--
 drivers/usb/misc/iowarrior.c        |  4 ++--
 drivers/usb/misc/legousbtower.c     |  4 ++--
 include/linux/device.h              |  7 ++++---
 include/linux/genhd.h               |  2 +-
 include/linux/miscdevice.h          |  3 ++-
 include/linux/usb.h                 |  4 ++--
 sound/sound_core.c                  |  4 ++--
 30 files changed, 116 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index b07af8861244..6a52d4b36a30 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -182,7 +182,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier =
 	.notifier_call = cpuid_class_cpu_callback,
 };
 
-static char *cpuid_nodename(struct device *dev)
+static char *cpuid_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
 }
@@ -203,7 +203,7 @@ static int __init cpuid_init(void)
 		err = PTR_ERR(cpuid_class);
 		goto out_chrdev;
 	}
-	cpuid_class->nodename = cpuid_nodename;
+	cpuid_class->devnode = cpuid_devnode;
 	for_each_online_cpu(i) {
 		err = cpuid_device_create(i);
 		if (err != 0)
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 9371448290ac..0db7969b0dde 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -236,7 +236,7 @@ static const struct file_operations microcode_fops = {
 static struct miscdevice microcode_dev = {
 	.minor			= MICROCODE_MINOR,
 	.name			= "microcode",
-	.devnode		= "cpu/microcode",
+	.nodename		= "cpu/microcode",
 	.fops			= &microcode_fops,
 };
 
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 7dd950094178..6a3cefc7dda1 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -241,7 +241,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = {
 	.notifier_call = msr_class_cpu_callback,
 };
 
-static char *msr_nodename(struct device *dev)
+static char *msr_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
 }
@@ -262,7 +262,7 @@ static int __init msr_init(void)
 		err = PTR_ERR(msr_class);
 		goto out_chrdev;
 	}
-	msr_class->nodename = msr_nodename;
+	msr_class->devnode = msr_devnode;
 	for_each_online_cpu(i) {
 		err = msr_device_create(i);
 		if (err != 0)
diff --git a/block/bsg.c b/block/bsg.c
index 5f184bb3ff9e..0676301f16d0 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -1062,7 +1062,7 @@ EXPORT_SYMBOL_GPL(bsg_register_queue);
 
 static struct cdev bsg_cdev;
 
-static char *bsg_nodename(struct device *dev)
+static char *bsg_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev));
 }
@@ -1087,7 +1087,7 @@ static int __init bsg_init(void)
 		ret = PTR_ERR(bsg_class);
 		goto destroy_kmemcache;
 	}
-	bsg_class->nodename = bsg_nodename;
+	bsg_class->devnode = bsg_devnode;
 
 	ret = alloc_chrdev_region(&devid, 0, BSG_MAX_DEVS, "bsg");
 	if (ret)
diff --git a/block/genhd.c b/block/genhd.c
index 2ad91ddad8e2..517e4332cb37 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -998,12 +998,12 @@ struct class block_class = {
 	.name		= "block",
 };
 
-static char *block_nodename(struct device *dev)
+static char *block_devnode(struct device *dev, mode_t *mode)
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
-	if (disk->nodename)
-		return disk->nodename(disk);
+	if (disk->devnode)
+		return disk->devnode(disk, mode);
 	return NULL;
 }
 
@@ -1011,7 +1011,7 @@ static struct device_type disk_type = {
 	.name		= "disk",
 	.groups		= disk_attr_groups,
 	.release	= disk_release,
-	.nodename	= block_nodename,
+	.devnode	= block_devnode,
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 390e664ec1c7..6bee6af8d8e1 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -166,13 +166,16 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj,
 	if (MAJOR(dev->devt)) {
 		const char *tmp;
 		const char *name;
+		mode_t mode = 0;
 
 		add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt));
 		add_uevent_var(env, "MINOR=%u", MINOR(dev->devt));
-		name = device_get_nodename(dev, &tmp);
+		name = device_get_devnode(dev, &mode, &tmp);
 		if (name) {
 			add_uevent_var(env, "DEVNAME=%s", name);
 			kfree(tmp);
+			if (mode)
+				add_uevent_var(env, "DEVMODE=%#o", mode & 0777);
 		}
 	}
 
@@ -1148,8 +1151,9 @@ static struct device *next_device(struct klist_iter *i)
 }
 
 /**
- * device_get_nodename - path of device node file
+ * device_get_devnode - path of device node file
  * @dev: device
+ * @mode: returned file access mode
  * @tmp: possibly allocated string
  *
  * Return the relative path of a possible device node.
@@ -1157,21 +1161,22 @@ static struct device *next_device(struct klist_iter *i)
  * a name. This memory is returned in tmp and needs to be
  * freed by the caller.
  */
-const char *device_get_nodename(struct device *dev, const char **tmp)
+const char *device_get_devnode(struct device *dev,
+			       mode_t *mode, const char **tmp)
 {
 	char *s;
 
 	*tmp = NULL;
 
 	/* the device type may provide a specific name */
-	if (dev->type && dev->type->nodename)
-		*tmp = dev->type->nodename(dev);
+	if (dev->type && dev->type->devnode)
+		*tmp = dev->type->devnode(dev, mode);
 	if (*tmp)
 		return *tmp;
 
 	/* the class may provide a specific name */
-	if (dev->class && dev->class->nodename)
-		*tmp = dev->class->nodename(dev);
+	if (dev->class && dev->class->devnode)
+		*tmp = dev->class->devnode(dev, mode);
 	if (*tmp)
 		return *tmp;
 
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index fd488ad4263a..a1cb5afe6801 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -6,9 +6,10 @@
  * During bootup, before any driver core device is registered,
  * devtmpfs, a tmpfs-based filesystem is created. Every driver-core
  * device which requests a device node, will add a node in this
- * filesystem. The node is named after the the name of the device,
- * or the susbsytem can provide a custom name. All devices are
- * owned by root and have a mode of 0600.
+ * filesystem.
+ * By default, all devices are named after the the name of the
+ * device, owned by root and have a default mode of 0600. Subsystems
+ * can overwrite the default setting if needed.
  */
 
 #include <linux/kernel.h>
@@ -20,6 +21,7 @@
 #include <linux/fs.h>
 #include <linux/shmem_fs.h>
 #include <linux/cred.h>
+#include <linux/sched.h>
 #include <linux/init_task.h>
 
 static struct vfsmount *dev_mnt;
@@ -134,7 +136,7 @@ int devtmpfs_create_node(struct device *dev)
 	const char *tmp = NULL;
 	const char *nodename;
 	const struct cred *curr_cred;
-	mode_t mode;
+	mode_t mode = 0;
 	struct nameidata nd;
 	struct dentry *dentry;
 	int err;
@@ -142,14 +144,16 @@ int devtmpfs_create_node(struct device *dev)
 	if (!dev_mnt)
 		return 0;
 
-	nodename = device_get_nodename(dev, &tmp);
+	nodename = device_get_devnode(dev, &mode, &tmp);
 	if (!nodename)
 		return -ENOMEM;
 
+	if (mode == 0)
+		mode = 0600;
 	if (is_blockdev(dev))
-		mode = S_IFBLK|0600;
+		mode |= S_IFBLK;
 	else
-		mode = S_IFCHR|0600;
+		mode |= S_IFCHR;
 
 	curr_cred = override_creds(&init_cred);
 	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
@@ -165,8 +169,12 @@ int devtmpfs_create_node(struct device *dev)
 
 	dentry = lookup_create(&nd, 0);
 	if (!IS_ERR(dentry)) {
+		int umask;
+
+		umask = sys_umask(0000);
 		err = vfs_mknod(nd.path.dentry->d_inode,
 				dentry, mode, dev->devt);
+		sys_umask(umask);
 		/* mark as kernel created inode */
 		if (!err)
 			dentry->d_inode->i_private = &dev_mnt;
@@ -271,7 +279,7 @@ int devtmpfs_delete_node(struct device *dev)
 	if (!dev_mnt)
 		return 0;
 
-	nodename = device_get_nodename(dev, &tmp);
+	nodename = device_get_devnode(dev, NULL, &tmp);
 	if (!nodename)
 		return -ENOMEM;
 
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 19888354188f..62141ec09a22 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -266,7 +266,7 @@ static const struct file_operations aoe_fops = {
 	.owner = THIS_MODULE,
 };
 
-static char *aoe_nodename(struct device *dev)
+static char *aoe_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "etherd/%s", dev_name(dev));
 }
@@ -288,7 +288,7 @@ aoechr_init(void)
 		unregister_chrdev(AOE_MAJOR, "aoechr");
 		return PTR_ERR(aoe_class);
 	}
-	aoe_class->nodename = aoe_nodename;
+	aoe_class->devnode = aoe_devnode;
 
 	for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
 		device_create(aoe_class, NULL,
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 95f11cdef203..fd5bb8ad59a9 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2857,7 +2857,7 @@ static struct block_device_operations pktcdvd_ops = {
 	.media_changed =	pkt_media_changed,
 };
 
-static char *pktcdvd_nodename(struct gendisk *gd)
+static char *pktcdvd_devnode(struct gendisk *gd, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "pktcdvd/%s", gd->disk_name);
 }
@@ -2914,7 +2914,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 	disk->fops = &pktcdvd_ops;
 	disk->flags = GENHD_FL_REMOVABLE;
 	strcpy(disk->disk_name, pd->name);
-	disk->nodename = pktcdvd_nodename;
+	disk->devnode = pktcdvd_devnode;
 	disk->private_data = pd;
 	disk->queue = blk_alloc_queue(GFP_KERNEL);
 	if (!disk->queue)
@@ -3070,7 +3070,7 @@ static const struct file_operations pkt_ctl_fops = {
 static struct miscdevice pkt_misc = {
 	.minor 		= MISC_DYNAMIC_MINOR,
 	.name  		= DRIVER_NAME,
-	.name  		= "pktcdvd/control",
+	.nodename	= "pktcdvd/control",
 	.fops  		= &pkt_ctl_fops
 };
 
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index fc93e2fc7c71..1573aebd54b5 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -153,7 +153,7 @@ static const struct file_operations rng_chrdev_ops = {
 static struct miscdevice rng_miscdev = {
 	.minor		= RNG_MISCDEV_MINOR,
 	.name		= RNG_MODULE_NAME,
-	.devnode	= "hwrng",
+	.nodename	= "hwrng",
 	.fops		= &rng_chrdev_ops,
 };
 
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 0491cdf63f2a..0aede1d6a9ea 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -866,24 +866,25 @@ static const struct file_operations kmsg_fops = {
 
 static const struct memdev {
 	const char *name;
+	mode_t mode;
 	const struct file_operations *fops;
 	struct backing_dev_info *dev_info;
 } devlist[] = {
-	[ 1] = { "mem", &mem_fops, &directly_mappable_cdev_bdi },
+	 [1] = { "mem", 0, &mem_fops, &directly_mappable_cdev_bdi },
 #ifdef CONFIG_DEVKMEM
-	[ 2] = { "kmem", &kmem_fops, &directly_mappable_cdev_bdi },
+	 [2] = { "kmem", 0, &kmem_fops, &directly_mappable_cdev_bdi },
 #endif
-	[ 3] = {"null", &null_fops, NULL },
+	 [3] = { "null", 0666, &null_fops, NULL },
 #ifdef CONFIG_DEVPORT
-	[ 4] = { "port", &port_fops, NULL },
+	 [4] = { "port", 0, &port_fops, NULL },
 #endif
-	[ 5] = { "zero", &zero_fops, &zero_bdi },
-	[ 7] = { "full", &full_fops, NULL },
-	[ 8] = { "random", &random_fops, NULL },
-	[ 9] = { "urandom", &urandom_fops, NULL },
-	[11] = { "kmsg", &kmsg_fops, NULL },
+	 [5] = { "zero", 0666, &zero_fops, &zero_bdi },
+	 [7] = { "full", 0666, &full_fops, NULL },
+	 [8] = { "random", 0666, &random_fops, NULL },
+	 [9] = { "urandom", 0666, &urandom_fops, NULL },
+	[11] = { "kmsg", 0, &kmsg_fops, NULL },
 #ifdef CONFIG_CRASH_DUMP
-	[12] = { "oldmem", &oldmem_fops, NULL },
+	[12] = { "oldmem", 0, &oldmem_fops, NULL },
 #endif
 };
 
@@ -920,6 +921,13 @@ static const struct file_operations memory_fops = {
 	.open		= memory_open,
 };
 
+static char *mem_devnode(struct device *dev, mode_t *mode)
+{
+	if (mode && devlist[MINOR(dev->devt)].mode)
+		*mode = devlist[MINOR(dev->devt)].mode;
+	return NULL;
+}
+
 static struct class *mem_class;
 
 static int __init chr_dev_init(void)
@@ -935,6 +943,7 @@ static int __init chr_dev_init(void)
 		printk("unable to get major %d for memory devs\n", MEM_MAJOR);
 
 	mem_class = class_create(THIS_MODULE, "mem");
+	mem_class->devnode = mem_devnode;
 	for (minor = 1; minor < ARRAY_SIZE(devlist); minor++) {
 		if (!devlist[minor].name)
 			continue;
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 62c99fa59e2b..1ee27cc23426 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -263,12 +263,14 @@ int misc_deregister(struct miscdevice *misc)
 EXPORT_SYMBOL(misc_register);
 EXPORT_SYMBOL(misc_deregister);
 
-static char *misc_nodename(struct device *dev)
+static char *misc_devnode(struct device *dev, mode_t *mode)
 {
 	struct miscdevice *c = dev_get_drvdata(dev);
 
-	if (c->devnode)
-		return kstrdup(c->devnode, GFP_KERNEL);
+	if (mode && c->mode)
+		*mode = c->mode;
+	if (c->nodename)
+		return kstrdup(c->nodename, GFP_KERNEL);
 	return NULL;
 }
 
@@ -287,7 +289,7 @@ static int __init misc_init(void)
 	err = -EIO;
 	if (register_chrdev(MISC_MAJOR,"misc",&misc_fops))
 		goto fail_printk;
-	misc_class->nodename = misc_nodename;
+	misc_class->devnode = misc_devnode;
 	return 0;
 
 fail_printk:
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 40268db02e22..64acd05f71c8 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -261,7 +261,7 @@ static const struct file_operations raw_ctl_fops = {
 
 static struct cdev raw_cdev;
 
-static char *raw_nodename(struct device *dev)
+static char *raw_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "raw/%s", dev_name(dev));
 }
@@ -289,7 +289,7 @@ static int __init raw_init(void)
 		ret = PTR_ERR(raw_class);
 		goto error_region;
 	}
-	raw_class->nodename = raw_nodename;
+	raw_class->devnode = raw_devnode;
 	device_create(raw_class, NULL, MKDEV(RAW_MAJOR, 0), NULL, "rawctl");
 
 	return 0;
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index a3afa0c387cd..c70d9dabefae 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -3056,11 +3056,22 @@ void __init console_init(void)
 	}
 }
 
+static char *tty_devnode(struct device *dev, mode_t *mode)
+{
+	if (!mode)
+		return NULL;
+	if (dev->devt == MKDEV(TTYAUX_MAJOR, 0) ||
+	    dev->devt == MKDEV(TTYAUX_MAJOR, 2))
+		*mode = 0666;
+	return NULL;
+}
+
 static int __init tty_class_init(void)
 {
 	tty_class = class_create(THIS_MODULE, "tty");
 	if (IS_ERR(tty_class))
 		return PTR_ERR(tty_class);
+	tty_class->devnode = tty_devnode;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index f7a615b80c70..5301f226cb1c 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -76,7 +76,7 @@ static ssize_t version_show(struct class *dev, char *buf)
 		       CORE_MINOR, CORE_PATCHLEVEL, CORE_DATE);
 }
 
-static char *drm_nodename(struct device *dev)
+static char *drm_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "dri/%s", dev_name(dev));
 }
@@ -112,7 +112,7 @@ struct class *drm_sysfs_create(struct module *owner, char *name)
 	if (err)
 		goto err_out_class;
 
-	class->nodename = drm_nodename;
+	class->devnode = drm_devnode;
 
 	return class;
 
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 4d1dc0cf1401..8b6ee247bfe4 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -852,14 +852,14 @@ static const struct file_operations hiddev_fops = {
 #endif
 };
 
-static char *hiddev_nodename(struct device *dev)
+static char *hiddev_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
 
 static struct usb_class_driver hiddev_class = {
 	.name =		"hiddev%d",
-	.nodename =	hiddev_nodename,
+	.devnode =	hiddev_devnode,
 	.fops =		&hiddev_fops,
 	.minor_base =	HIDDEV_MINOR_BASE,
 };
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 851791d955f3..556539d617a4 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1265,14 +1265,14 @@ static struct device_type input_dev_type = {
 	.uevent		= input_dev_uevent,
 };
 
-static char *input_nodename(struct device *dev)
+static char *input_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "input/%s", dev_name(dev));
 }
 
 struct class input_class = {
 	.name		= "input",
-	.nodename	= input_nodename,
+	.devnode	= input_devnode,
 };
 EXPORT_SYMBOL_GPL(input_class);
 
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 7f77f18fcafa..a67942931582 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1532,7 +1532,7 @@ static const struct file_operations _ctl_fops = {
 static struct miscdevice _dm_misc = {
 	.minor 		= MISC_DYNAMIC_MINOR,
 	.name  		= DM_NAME,
-	.devnode	= "mapper/control",
+	.nodename	= "mapper/control",
 	.fops  		= &_ctl_fops
 };
 
diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c
index 479dd05762a5..94159b90f733 100644
--- a/drivers/media/dvb/dvb-core/dvbdev.c
+++ b/drivers/media/dvb/dvb-core/dvbdev.c
@@ -447,7 +447,7 @@ static int dvb_uevent(struct device *dev, struct kobj_uevent_env *env)
 	return 0;
 }
 
-static char *dvb_nodename(struct device *dev)
+static char *dvb_devnode(struct device *dev, mode_t *mode)
 {
 	struct dvb_device *dvbdev = dev_get_drvdata(dev);
 
@@ -478,7 +478,7 @@ static int __init init_dvbdev(void)
 		goto error;
 	}
 	dvb_class->dev_uevent = dvb_uevent;
-	dvb_class->nodename = dvb_nodename;
+	dvb_class->devnode = dvb_devnode;
 	return 0;
 
 error:
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 3f5d28851aa2..d3ee1994b02f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1370,7 +1370,7 @@ static const struct file_operations tun_fops = {
 static struct miscdevice tun_miscdev = {
 	.minor = TUN_MINOR,
 	.name = "tun",
-	.devnode = "net/tun",
+	.nodename = "net/tun",
 	.fops = &tun_fops,
 };
 
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index 26c09f0257db..9bc112ee7803 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -1057,14 +1057,14 @@ static const struct file_operations usblp_fops = {
 	.release =	usblp_release,
 };
 
-static char *usblp_nodename(struct device *dev)
+static char *usblp_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
 
 static struct usb_class_driver usblp_class = {
 	.name =		"lp%d",
-	.nodename =	usblp_nodename,
+	.devnode =	usblp_devnode,
 	.fops =		&usblp_fops,
 	.minor_base =	USBLP_MINOR_BASE,
 };
diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c
index 5cef88929b3e..222ee07ea680 100644
--- a/drivers/usb/core/file.c
+++ b/drivers/usb/core/file.c
@@ -67,14 +67,14 @@ static struct usb_class {
 	struct class *class;
 } *usb_class;
 
-static char *usb_nodename(struct device *dev)
+static char *usb_devnode(struct device *dev, mode_t *mode)
 {
 	struct usb_class_driver *drv;
 
 	drv = dev_get_drvdata(dev);
-	if (!drv || !drv->nodename)
+	if (!drv || !drv->devnode)
 		return NULL;
-	return drv->nodename(dev);
+	return drv->devnode(dev, mode);
 }
 
 static int init_usb_class(void)
@@ -100,7 +100,7 @@ static int init_usb_class(void)
 		kfree(usb_class);
 		usb_class = NULL;
 	}
-	usb_class->class->nodename = usb_nodename;
+	usb_class->class->devnode = usb_devnode;
 
 exit:
 	return result;
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index a26f73880c32..43ee943d757a 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -311,7 +311,7 @@ static struct dev_pm_ops usb_device_pm_ops = {
 #endif	/* CONFIG_PM */
 
 
-static char *usb_nodename(struct device *dev)
+static char *usb_devnode(struct device *dev, mode_t *mode)
 {
 	struct usb_device *usb_dev;
 
@@ -324,7 +324,7 @@ struct device_type usb_device_type = {
 	.name =		"usb_device",
 	.release =	usb_release_dev,
 	.uevent =	usb_dev_uevent,
-	.nodename = 	usb_nodename,
+	.devnode = 	usb_devnode,
 	.pm =		&usb_device_pm_ops,
 };
 
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 90e1a8dedfa9..e75bb87ee92b 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -727,7 +727,7 @@ static const struct file_operations iowarrior_fops = {
 	.poll = iowarrior_poll,
 };
 
-static char *iowarrior_nodename(struct device *dev)
+static char *iowarrior_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
@@ -738,7 +738,7 @@ static char *iowarrior_nodename(struct device *dev)
  */
 static struct usb_class_driver iowarrior_class = {
 	.name = "iowarrior%d",
-	.nodename = iowarrior_nodename,
+	.devnode = iowarrior_devnode,
 	.fops = &iowarrior_fops,
 	.minor_base = IOWARRIOR_MINOR_BASE,
 };
diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index c1e2433f640d..97efeaec4d52 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -266,7 +266,7 @@ static const struct file_operations tower_fops = {
 	.llseek =	tower_llseek,
 };
 
-static char *legousbtower_nodename(struct device *dev)
+static char *legousbtower_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
@@ -277,7 +277,7 @@ static char *legousbtower_nodename(struct device *dev)
  */
 static struct usb_class_driver tower_class = {
 	.name =		"legousbtower%d",
-	.nodename = 	legousbtower_nodename,
+	.devnode = 	legousbtower_devnode,
 	.fops =		&tower_fops,
 	.minor_base =	LEGO_USB_TOWER_MINOR_BASE,
 };
diff --git a/include/linux/device.h b/include/linux/device.h
index 847b763e40e9..aca31bf7d8ed 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -193,7 +193,7 @@ struct class {
 	struct kobject			*dev_kobj;
 
 	int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env);
-	char *(*nodename)(struct device *dev);
+	char *(*devnode)(struct device *dev, mode_t *mode);
 
 	void (*class_release)(struct class *class);
 	void (*dev_release)(struct device *dev);
@@ -298,7 +298,7 @@ struct device_type {
 	const char *name;
 	const struct attribute_group **groups;
 	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
-	char *(*nodename)(struct device *dev);
+	char *(*devnode)(struct device *dev, mode_t *mode);
 	void (*release)(struct device *dev);
 
 	const struct dev_pm_ops *pm;
@@ -487,7 +487,8 @@ extern struct device *device_find_child(struct device *dev, void *data,
 extern int device_rename(struct device *dev, char *new_name);
 extern int device_move(struct device *dev, struct device *new_parent,
 		       enum dpm_order dpm_order);
-extern const char *device_get_nodename(struct device *dev, const char **tmp);
+extern const char *device_get_devnode(struct device *dev,
+				      mode_t *mode, const char **tmp);
 extern void *dev_get_drvdata(const struct device *dev);
 extern void dev_set_drvdata(struct device *dev, void *data);
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 44263cb27121..109d179adb93 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -142,7 +142,7 @@ struct gendisk {
                                          * disks that can't be partitioned. */
 
 	char disk_name[DISK_NAME_LEN];	/* name of major driver */
-	char *(*nodename)(struct gendisk *gd);
+	char *(*devnode)(struct gendisk *gd, mode_t *mode);
 	/* Array of pointers to partitions indexed by partno.
 	 * Protected with matching bdev lock but stat and other
 	 * non-critical accesses use RCU.  Always access through
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 052117744629..adaf3c15e449 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -41,7 +41,8 @@ struct miscdevice  {
 	struct list_head list;
 	struct device *parent;
 	struct device *this_device;
-	const char *devnode;
+	const char *nodename;
+	mode_t mode;
 };
 
 extern int misc_register(struct miscdevice * misc);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index b1e3c2fbfe11..a8fe05f224e5 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -922,7 +922,7 @@ extern struct bus_type usb_bus_type;
 /**
  * struct usb_class_driver - identifies a USB driver that wants to use the USB major number
  * @name: the usb class device name for this driver.  Will show up in sysfs.
- * @nodename: Callback to provide a naming hint for a possible
+ * @devnode: Callback to provide a naming hint for a possible
  *	device node to create.
  * @fops: pointer to the struct file_operations of this driver.
  * @minor_base: the start of the minor range for this driver.
@@ -933,7 +933,7 @@ extern struct bus_type usb_bus_type;
  */
 struct usb_class_driver {
 	char *name;
-	char *(*nodename)(struct device *dev);
+	char *(*devnode)(struct device *dev, mode_t *mode);
 	const struct file_operations *fops;
 	int minor_base;
 };
diff --git a/sound/sound_core.c b/sound/sound_core.c
index bb4b88e606bb..49c998186592 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -29,7 +29,7 @@ MODULE_DESCRIPTION("Core sound module");
 MODULE_AUTHOR("Alan Cox");
 MODULE_LICENSE("GPL");
 
-static char *sound_nodename(struct device *dev)
+static char *sound_devnode(struct device *dev, mode_t *mode)
 {
 	if (MAJOR(dev->devt) == SOUND_MAJOR)
 		return NULL;
@@ -50,7 +50,7 @@ static int __init init_soundcore(void)
 		return PTR_ERR(sound_class);
 	}
 
-	sound_class->nodename = sound_nodename;
+	sound_class->devnode = sound_devnode;
 
 	return 0;
 }
-- 
cgit v1.2.3


From f0eefdc30e55e761facf645bd1be1339b21c30e6 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sat, 19 Sep 2009 13:13:13 -0700
Subject: cyclades: avoid addresses recomputation

Don't fetch firmware address and recompute channel control on each
port access. Precompute the values on init and use them later all
the time.

The same for board control.

This simplify code and improves readability.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/cyclades.c  | 241 ++++++++++++++---------------------------------
 include/linux/cyclades.h |  10 ++
 2 files changed, 81 insertions(+), 170 deletions(-)

(limited to 'include')

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 9e0cd7020aef..7a7092ae5d39 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -814,7 +814,7 @@ static char rflow_thr[] = {	/* rflow threshold */
 /*  The Cyclom-Ye has placed the sequential chips in non-sequential
  *  address order.  This look-up table overcomes that problem.
  */
-static int cy_chip_offset[] = { 0x0000,
+static const unsigned int cy_chip_offset[] = { 0x0000,
 	0x0400,
 	0x0800,
 	0x0C00,
@@ -1406,15 +1406,9 @@ static int
 cyz_fetch_msg(struct cyclades_card *cinfo,
 		__u32 *channel, __u8 *cmd, __u32 *param)
 {
-	struct FIRM_ID __iomem *firm_id;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct BOARD_CTRL __iomem *board_ctrl;
+	struct BOARD_CTRL __iomem *board_ctrl = cinfo->board_ctrl;
 	unsigned long loc_doorbell;
 
-	firm_id = cinfo->base_addr + ID_ADDRESS;
-	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-	board_ctrl = &zfw_ctrl->board_ctrl;
-
 	loc_doorbell = readl(&cinfo->ctl_addr.p9060->loc_doorbell);
 	if (loc_doorbell) {
 		*cmd = (char)(0xff & loc_doorbell);
@@ -1430,19 +1424,13 @@ static int
 cyz_issue_cmd(struct cyclades_card *cinfo,
 		__u32 channel, __u8 cmd, __u32 param)
 {
-	struct FIRM_ID __iomem *firm_id;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct BOARD_CTRL __iomem *board_ctrl;
+	struct BOARD_CTRL __iomem *board_ctrl = cinfo->board_ctrl;
 	__u32 __iomem *pci_doorbell;
 	unsigned int index;
 
-	firm_id = cinfo->base_addr + ID_ADDRESS;
 	if (!cyz_is_loaded(cinfo))
 		return -1;
 
-	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-	board_ctrl = &zfw_ctrl->board_ctrl;
-
 	index = 0;
 	pci_doorbell = &cinfo->ctl_addr.p9060->pci_doorbell;
 	while ((readl(pci_doorbell) & 0xff) != 0) {
@@ -1457,9 +1445,9 @@ cyz_issue_cmd(struct cyclades_card *cinfo,
 	return 0;
 }				/* cyz_issue_cmd */
 
-static void cyz_handle_rx(struct cyclades_port *info, struct tty_struct *tty,
-		struct BUF_CTRL __iomem *buf_ctrl)
+static void cyz_handle_rx(struct cyclades_port *info, struct tty_struct *tty)
 {
+	struct BUF_CTRL __iomem *buf_ctrl = info->u.cyz.buf_ctrl;
 	struct cyclades_card *cinfo = info->card;
 	unsigned int char_count;
 	int len;
@@ -1549,9 +1537,9 @@ static void cyz_handle_rx(struct cyclades_port *info, struct tty_struct *tty,
 	}
 }
 
-static void cyz_handle_tx(struct cyclades_port *info, struct tty_struct *tty,
-		struct BUF_CTRL __iomem *buf_ctrl)
+static void cyz_handle_tx(struct cyclades_port *info, struct tty_struct *tty)
 {
+	struct BUF_CTRL __iomem *buf_ctrl = info->u.cyz.buf_ctrl;
 	struct cyclades_card *cinfo = info->card;
 	u8 data;
 	unsigned int char_count;
@@ -1627,21 +1615,14 @@ ztxdone:
 
 static void cyz_handle_cmd(struct cyclades_card *cinfo)
 {
+	struct BOARD_CTRL __iomem *board_ctrl = cinfo->board_ctrl;
 	struct tty_struct *tty;
 	struct cyclades_port *info;
-	static struct FIRM_ID __iomem *firm_id;
-	static struct ZFW_CTRL __iomem *zfw_ctrl;
-	static struct BOARD_CTRL __iomem *board_ctrl;
-	static struct CH_CTRL __iomem *ch_ctrl;
-	static struct BUF_CTRL __iomem *buf_ctrl;
 	__u32 channel, param, fw_ver;
 	__u8 cmd;
 	int special_count;
 	int delta_count;
 
-	firm_id = cinfo->base_addr + ID_ADDRESS;
-	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-	board_ctrl = &zfw_ctrl->board_ctrl;
 	fw_ver = readl(&board_ctrl->fw_version);
 
 	while (cyz_fetch_msg(cinfo, &channel, &cmd, &param) == 1) {
@@ -1652,9 +1633,6 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 		if (tty == NULL)
 			continue;
 
-		ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]);
-		buf_ctrl = &(zfw_ctrl->buf_ctrl[channel]);
-
 		switch (cmd) {
 		case C_CM_PR_ERROR:
 			tty_insert_flip_char(tty, 0, TTY_PARITY);
@@ -1675,9 +1653,9 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 			info->icount.dcd++;
 			delta_count++;
 			if (info->port.flags & ASYNC_CHECK_CD) {
-				if ((fw_ver > 241 ? ((u_long) param) :
-						readl(&ch_ctrl->rs_status)) &
-						C_RS_DCD) {
+				u32 dcd = fw_ver > 241 ? param :
+					readl(&info->u.cyz.ch_ctrl->rs_status);
+				if (dcd & C_RS_DCD) {
 					wake_up_interruptible(&info->port.open_wait);
 				} else {
 					tty_hangup(tty);
@@ -1712,7 +1690,7 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 			printk(KERN_DEBUG "cyz_interrupt: rcvd intr, card %d, "
 					"port %ld\n", info->card, channel);
 #endif
-			cyz_handle_rx(info, tty, buf_ctrl);
+			cyz_handle_rx(info, tty);
 			break;
 		case C_CM_TXBEMPTY:
 		case C_CM_TXLOWWM:
@@ -1722,7 +1700,7 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 			printk(KERN_DEBUG "cyz_interrupt: xmit intr, card %d, "
 					"port %ld\n", info->card, channel);
 #endif
-			cyz_handle_tx(info, tty, buf_ctrl);
+			cyz_handle_tx(info, tty);
 			break;
 #endif				/* CONFIG_CYZ_INTR */
 		case C_CM_FATAL:
@@ -1781,9 +1759,6 @@ static void cyz_poll(unsigned long arg)
 {
 	struct cyclades_card *cinfo;
 	struct cyclades_port *info;
-	struct FIRM_ID __iomem *firm_id;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct BUF_CTRL __iomem *buf_ctrl;
 	unsigned long expires = jiffies + HZ;
 	unsigned int port, card;
 
@@ -1795,10 +1770,6 @@ static void cyz_poll(unsigned long arg)
 		if (!cyz_is_loaded(cinfo))
 			continue;
 
-		firm_id = cinfo->base_addr + ID_ADDRESS;
-		zfw_ctrl = cinfo->base_addr +
-				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-
 	/* Skip first polling cycle to avoid racing conditions with the FW */
 		if (!cinfo->intr_enabled) {
 			cinfo->intr_enabled = 1;
@@ -1811,15 +1782,13 @@ static void cyz_poll(unsigned long arg)
 			struct tty_struct *tty;
 
 			info = &cinfo->ports[port];
-			buf_ctrl = &(zfw_ctrl->buf_ctrl[port]);
-
 			tty = tty_port_tty_get(&info->port);
 			/* OK to pass NULL to the handle functions below.
 			   They need to drop the data in that case. */
 
 			if (!info->throttle)
-				cyz_handle_rx(info, tty, buf_ctrl);
-			cyz_handle_tx(info, tty, buf_ctrl);
+				cyz_handle_rx(info, tty);
+			cyz_handle_tx(info, tty);
 			tty_kref_put(tty);
 		}
 		/* poll every 'cyz_polling_cycle' period */
@@ -1922,45 +1891,34 @@ static int cy_startup(struct cyclades_port *info, struct tty_struct *tty)
 		spin_unlock_irqrestore(&card->card_lock, flags);
 
 	} else {
-		struct FIRM_ID __iomem *firm_id;
-		struct ZFW_CTRL __iomem *zfw_ctrl;
-		struct BOARD_CTRL __iomem *board_ctrl;
-		struct CH_CTRL __iomem *ch_ctrl;
-
-		base_addr = card->base_addr;
+		struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 
-		firm_id = base_addr + ID_ADDRESS;
 		if (!cyz_is_loaded(card))
 			return -ENODEV;
 
-		zfw_ctrl = card->base_addr +
-				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		board_ctrl = &zfw_ctrl->board_ctrl;
-		ch_ctrl = zfw_ctrl->ch_ctrl;
-
 #ifdef CY_DEBUG_OPEN
 		printk(KERN_DEBUG "cyc startup Z card %d, channel %d, "
-			"base_addr %p\n", card, channel, base_addr);
+			"base_addr %p\n", card, channel, card->base_addr);
 #endif
 		spin_lock_irqsave(&card->card_lock, flags);
 
-		cy_writel(&ch_ctrl[channel].op_mode, C_CH_ENABLE);
+		cy_writel(&ch_ctrl->op_mode, C_CH_ENABLE);
 #ifdef Z_WAKE
 #ifdef CONFIG_CYZ_INTR
-		cy_writel(&ch_ctrl[channel].intr_enable,
+		cy_writel(&ch_ctrl->intr_enable,
 			  C_IN_TXBEMPTY | C_IN_TXLOWWM | C_IN_RXHIWM |
 			  C_IN_RXNNDT | C_IN_IOCTLW | C_IN_MDCD);
 #else
-		cy_writel(&ch_ctrl[channel].intr_enable,
+		cy_writel(&ch_ctrl->intr_enable,
 			  C_IN_IOCTLW | C_IN_MDCD);
 #endif				/* CONFIG_CYZ_INTR */
 #else
 #ifdef CONFIG_CYZ_INTR
-		cy_writel(&ch_ctrl[channel].intr_enable,
+		cy_writel(&ch_ctrl->intr_enable,
 			  C_IN_TXBEMPTY | C_IN_TXLOWWM | C_IN_RXHIWM |
 			  C_IN_RXNNDT | C_IN_MDCD);
 #else
-		cy_writel(&ch_ctrl[channel].intr_enable, C_IN_MDCD);
+		cy_writel(&ch_ctrl->intr_enable, C_IN_MDCD);
 #endif				/* CONFIG_CYZ_INTR */
 #endif				/* Z_WAKE */
 
@@ -1979,9 +1937,8 @@ static int cy_startup(struct cyclades_port *info, struct tty_struct *tty)
 
 		/* set timeout !!! */
 		/* set RTS and DTR !!! */
-		cy_writel(&ch_ctrl[channel].rs_control,
-			readl(&ch_ctrl[channel].rs_control) | C_RS_RTS |
-			C_RS_DTR);
+		cy_writel(&ch_ctrl->rs_control, readl(&ch_ctrl->rs_control) |
+				C_RS_RTS | C_RS_DTR);
 		retval = cyz_issue_cmd(card, channel, C_CM_IOCTLM, 0L);
 		if (retval != 0) {
 			printk(KERN_ERR "cyc:startup(3) retval on ttyC%d was "
@@ -2110,27 +2067,17 @@ static void cy_shutdown(struct cyclades_port *info, struct tty_struct *tty)
 		info->port.flags &= ~ASYNC_INITIALIZED;
 		spin_unlock_irqrestore(&card->card_lock, flags);
 	} else {
-		struct FIRM_ID __iomem *firm_id;
-		struct ZFW_CTRL __iomem *zfw_ctrl;
-		struct BOARD_CTRL __iomem *board_ctrl;
-		struct CH_CTRL __iomem *ch_ctrl;
+		struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 		int retval;
 
-		base_addr = card->base_addr;
 #ifdef CY_DEBUG_OPEN
 		printk(KERN_DEBUG "cyc shutdown Z card %d, channel %d, "
-			"base_addr %p\n", card, channel, base_addr);
+			"base_addr %p\n", card, channel, card->base_addr);
 #endif
 
-		firm_id = base_addr + ID_ADDRESS;
 		if (!cyz_is_loaded(card))
 			return;
 
-		zfw_ctrl = card->base_addr +
-				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		board_ctrl = &zfw_ctrl->board_ctrl;
-		ch_ctrl = zfw_ctrl->ch_ctrl;
-
 		spin_lock_irqsave(&card->card_lock, flags);
 
 		if (info->port.xmit_buf) {
@@ -2141,9 +2088,9 @@ static void cy_shutdown(struct cyclades_port *info, struct tty_struct *tty)
 		}
 
 		if (tty->termios->c_cflag & HUPCL) {
-			cy_writel(&ch_ctrl[channel].rs_control,
-				(__u32)(readl(&ch_ctrl[channel].rs_control) &
-					~(C_RS_RTS | C_RS_DTR)));
+			cy_writel(&ch_ctrl->rs_control,
+				readl(&ch_ctrl->rs_control) &
+					~(C_RS_RTS | C_RS_DTR));
 			retval = cyz_issue_cmd(info->card, channel,
 					C_CM_IOCTLM, 0L);
 			if (retval != 0) {
@@ -2497,15 +2444,11 @@ static void cy_close(struct tty_struct *tty, struct file *filp)
 #ifdef Z_WAKE
 		/* Waiting for on-board buffers to be empty before closing
 		   the port */
-		void __iomem *base_addr = card->base_addr;
-		struct FIRM_ID __iomem *firm_id = base_addr + ID_ADDRESS;
-		struct ZFW_CTRL __iomem *zfw_ctrl =
-		    base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		struct CH_CTRL __iomem *ch_ctrl = zfw_ctrl->ch_ctrl;
+		struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 		int channel = info->line - card->first_line;
 		int retval;
 
-		if (readl(&ch_ctrl[channel].flow_status) != C_FS_TXIDLE) {
+		if (readl(&ch_ctrl->flow_status) != C_FS_TXIDLE) {
 			retval = cyz_issue_cmd(card, channel, C_CM_IOCTLW, 0L);
 			if (retval != 0) {
 				printk(KERN_DEBUG "cyc:cy_close retval on "
@@ -2685,18 +2628,13 @@ static int cy_write_room(struct tty_struct *tty)
 
 static int cy_chars_in_buffer(struct tty_struct *tty)
 {
-	struct cyclades_card *card;
 	struct cyclades_port *info = tty->driver_data;
-	int channel;
 
 	if (serial_paranoia_check(info, tty->name, "cy_chars_in_buffer"))
 		return 0;
 
-	card = info->card;
-	channel = (info->line) - (card->first_line);
-
 #ifdef Z_EXT_CHARS_IN_BUFFER
-	if (!cy_is_Z(card)) {
+	if (!cy_is_Z(info->card)) {
 #endif				/* Z_EXT_CHARS_IN_BUFFER */
 #ifdef CY_DEBUG_IO
 		printk(KERN_DEBUG "cyc:cy_chars_in_buffer ttyC%d %d\n",
@@ -2705,20 +2643,11 @@ static int cy_chars_in_buffer(struct tty_struct *tty)
 		return info->xmit_cnt;
 #ifdef Z_EXT_CHARS_IN_BUFFER
 	} else {
-		static struct FIRM_ID *firm_id;
-		static struct ZFW_CTRL *zfw_ctrl;
-		static struct CH_CTRL *ch_ctrl;
-		static struct BUF_CTRL *buf_ctrl;
+		struct BUF_CTRL __iomem *buf_ctrl = info->u.cyz.buf_ctrl;
 		int char_count;
 		__u32 tx_put, tx_get, tx_bufsize;
 
 		lock_kernel();
-		firm_id = card->base_addr + ID_ADDRESS;
-		zfw_ctrl = card->base_addr +
-			(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]);
-		buf_ctrl = &(zfw_ctrl->buf_ctrl[channel]);
-
 		tx_get = readl(&buf_ctrl->tx_get);
 		tx_put = readl(&buf_ctrl->tx_put);
 		tx_bufsize = readl(&buf_ctrl->tx_bufsize);
@@ -3019,20 +2948,13 @@ static void cy_set_line_char(struct cyclades_port *info, struct tty_struct *tty)
 		spin_unlock_irqrestore(&card->card_lock, flags);
 
 	} else {
-		struct FIRM_ID __iomem *firm_id;
-		struct ZFW_CTRL __iomem *zfw_ctrl;
-		struct CH_CTRL __iomem *ch_ctrl;
+		struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 		__u32 sw_flow;
 		int retval;
 
-		firm_id = card->base_addr + ID_ADDRESS;
 		if (!cyz_is_loaded(card))
 			return;
 
-		zfw_ctrl = card->base_addr +
-			(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]);
-
 		/* baud rate */
 		baud = tty_get_baud_rate(tty);
 		if (baud == 38400 && (info->port.flags & ASYNC_SPD_MASK) ==
@@ -3268,10 +3190,6 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file)
 	unsigned char status;
 	unsigned long lstatus;
 	unsigned int result;
-	struct FIRM_ID __iomem *firm_id;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct BOARD_CTRL __iomem *board_ctrl;
-	struct CH_CTRL __iomem *ch_ctrl;
 
 	if (serial_paranoia_check(info, tty->name, __func__))
 		return -ENODEV;
@@ -3304,14 +3222,8 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file)
 			((status & CyDSR) ? TIOCM_DSR : 0) |
 			((status & CyCTS) ? TIOCM_CTS : 0);
 	} else {
-		base_addr = card->base_addr;
-		firm_id = card->base_addr + ID_ADDRESS;
 		if (cyz_is_loaded(card)) {
-			zfw_ctrl = card->base_addr +
-				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-			board_ctrl = &zfw_ctrl->board_ctrl;
-			ch_ctrl = zfw_ctrl->ch_ctrl;
-			lstatus = readl(&ch_ctrl[channel].rs_status);
+			lstatus = readl(&info->u.cyz.ch_ctrl->rs_status);
 			result = ((lstatus & C_RS_RTS) ? TIOCM_RTS : 0) |
 				((lstatus & C_RS_DTR) ? TIOCM_DTR : 0) |
 				((lstatus & C_RS_DCD) ? TIOCM_CAR : 0) |
@@ -3336,12 +3248,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
 	struct cyclades_port *info = tty->driver_data;
 	struct cyclades_card *card;
 	int chip, channel, index;
-	void __iomem *base_addr;
 	unsigned long flags;
-	struct FIRM_ID __iomem *firm_id;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct BOARD_CTRL __iomem *board_ctrl;
-	struct CH_CTRL __iomem *ch_ctrl;
 	int retval;
 
 	if (serial_paranoia_check(info, tty->name, __func__))
@@ -3350,6 +3257,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
 	card = info->card;
 	channel = (info->line) - (card->first_line);
 	if (!cy_is_Z(card)) {
+		void __iomem *base_addr;
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3421,34 +3329,26 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
 			spin_unlock_irqrestore(&card->card_lock, flags);
 		}
 	} else {
-		base_addr = card->base_addr;
-
-		firm_id = card->base_addr + ID_ADDRESS;
 		if (cyz_is_loaded(card)) {
-			zfw_ctrl = card->base_addr +
-				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-			board_ctrl = &zfw_ctrl->board_ctrl;
-			ch_ctrl = zfw_ctrl->ch_ctrl;
+			struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 
 			if (set & TIOCM_RTS) {
 				spin_lock_irqsave(&card->card_lock, flags);
-				cy_writel(&ch_ctrl[channel].rs_control,
-					readl(&ch_ctrl[channel].rs_control) |
-					C_RS_RTS);
+				cy_writel(&ch_ctrl->rs_control,
+					readl(&ch_ctrl->rs_control) | C_RS_RTS);
 				spin_unlock_irqrestore(&card->card_lock, flags);
 			}
 			if (clear & TIOCM_RTS) {
 				spin_lock_irqsave(&card->card_lock, flags);
-				cy_writel(&ch_ctrl[channel].rs_control,
-					readl(&ch_ctrl[channel].rs_control) &
+				cy_writel(&ch_ctrl->rs_control,
+					readl(&ch_ctrl->rs_control) &
 					~C_RS_RTS);
 				spin_unlock_irqrestore(&card->card_lock, flags);
 			}
 			if (set & TIOCM_DTR) {
 				spin_lock_irqsave(&card->card_lock, flags);
-				cy_writel(&ch_ctrl[channel].rs_control,
-					readl(&ch_ctrl[channel].rs_control) |
-					C_RS_DTR);
+				cy_writel(&ch_ctrl->rs_control,
+					readl(&ch_ctrl->rs_control) | C_RS_DTR);
 #ifdef CY_DEBUG_DTR
 				printk(KERN_DEBUG "cyc:set_modem_info raising "
 					"Z DTR\n");
@@ -3457,8 +3357,8 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
 			}
 			if (clear & TIOCM_DTR) {
 				spin_lock_irqsave(&card->card_lock, flags);
-				cy_writel(&ch_ctrl[channel].rs_control,
-					readl(&ch_ctrl[channel].rs_control) &
+				cy_writel(&ch_ctrl->rs_control,
+					readl(&ch_ctrl->rs_control) &
 					~C_RS_DTR);
 #ifdef CY_DEBUG_DTR
 				printk(KERN_DEBUG "cyc:set_modem_info clearing "
@@ -4171,17 +4071,8 @@ static int cyz_carrier_raised(struct tty_port *port)
 {
 	struct cyclades_port *info = container_of(port, struct cyclades_port,
 			port);
-	struct cyclades_card *cinfo = info->card;
-	void __iomem *base = cinfo->base_addr;
-	struct FIRM_ID __iomem *firm_id = base + ID_ADDRESS;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct CH_CTRL __iomem *ch_ctrl;
-	int channel = info->line - cinfo->first_line;
 
-	zfw_ctrl = base + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-	ch_ctrl = zfw_ctrl->ch_ctrl;
-
-	return readl(&ch_ctrl[channel].rs_status) & C_RS_DCD;
+	return readl(&info->u.cyz.ch_ctrl->rs_status) & C_RS_DCD;
 }
 
 static void cyz_dtr_rts(struct tty_port *port, int raise)
@@ -4189,22 +4080,16 @@ static void cyz_dtr_rts(struct tty_port *port, int raise)
 	struct cyclades_port *info = container_of(port, struct cyclades_port,
 			port);
 	struct cyclades_card *cinfo = info->card;
-	void __iomem *base = cinfo->base_addr;
-	struct FIRM_ID __iomem *firm_id = base + ID_ADDRESS;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct CH_CTRL __iomem *ch_ctrl;
+	struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 	int ret, channel = info->line - cinfo->first_line;
 	u32 rs;
 
-	zfw_ctrl = base + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-	ch_ctrl = zfw_ctrl->ch_ctrl;
-
-	rs = readl(&ch_ctrl[channel].rs_control);
+	rs = readl(&ch_ctrl->rs_control);
 	if (raise)
 		rs |= C_RS_RTS | C_RS_DTR;
 	else
 		rs &= ~(C_RS_RTS | C_RS_DTR);
-	cy_writel(&ch_ctrl[channel].rs_control, rs);
+	cy_writel(&ch_ctrl->rs_control, rs);
 	ret = cyz_issue_cmd(cinfo, channel, C_CM_IOCTLM, 0L);
 	if (ret != 0)
 		printk(KERN_ERR "%s: retval on ttyC%d was %x\n",
@@ -4235,8 +4120,7 @@ static const struct tty_port_operations cyz_port_ops = {
 static int __devinit cy_init_card(struct cyclades_card *cinfo)
 {
 	struct cyclades_port *info;
-	unsigned int port;
-	unsigned short chip_number;
+	unsigned int channel, port;
 
 	spin_lock_init(&cinfo->card_lock);
 	cinfo->intr_enabled = 0;
@@ -4248,9 +4132,9 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 		return -ENOMEM;
 	}
 
-	for (port = cinfo->first_line; port < cinfo->first_line + cinfo->nports;
-			port++) {
-		info = &cinfo->ports[port - cinfo->first_line];
+	for (channel = 0, port = cinfo->first_line; channel < cinfo->nports;
+			channel++, port++) {
+		info = &cinfo->ports[channel];
 		tty_port_init(&info->port);
 		info->magic = CYCLADES_MAGIC;
 		info->card = cinfo;
@@ -4263,8 +4147,17 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 		init_waitqueue_head(&info->delta_msr_wait);
 
 		if (cy_is_Z(cinfo)) {
+			struct FIRM_ID *firm_id = cinfo->base_addr + ID_ADDRESS;
+			struct ZFW_CTRL *zfw_ctrl;
+
 			info->port.ops = &cyz_port_ops;
 			info->type = PORT_STARTECH;
+
+			zfw_ctrl = cinfo->base_addr +
+				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
+			info->u.cyz.ch_ctrl = &zfw_ctrl->ch_ctrl[channel];
+			info->u.cyz.buf_ctrl = &zfw_ctrl->buf_ctrl[channel];
+
 			if (cinfo->hw_ver == ZO_V1)
 				info->xmit_fifo_size = CYZ_FIFO_SIZE;
 			else
@@ -4274,7 +4167,9 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 				cyz_rx_restart, (unsigned long)info);
 #endif
 		} else {
+			unsigned short chip_number;
 			int index = cinfo->bus_index;
+
 			info->port.ops = &cyy_port_ops;
 			info->type = PORT_CIRRUS;
 			info->xmit_fifo_size = CyMAX_CHAR_FIFO;
@@ -4282,7 +4177,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 			info->cor2 = CyETC;
 			info->cor3 = 0x08;	/* _very_ small rcv threshold */
 
-			chip_number = (port - cinfo->first_line) / 4;
+			chip_number = channel / CyPORTS_PER_CHIP;
 			info->chip_rev = readb(cinfo->base_addr +
 				      (cy_chip_offset[chip_number] << index) +
 				      (CyGFRCR << index));
@@ -4976,8 +4871,14 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 		}
 		cy_card[card_no].num_chips = nchan / CyPORTS_PER_CHIP;
 	} else {
+		struct FIRM_ID __iomem *firm_id = addr2 + ID_ADDRESS;
+		struct ZFW_CTRL __iomem *zfw_ctrl;
+
+		zfw_ctrl = addr2 + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
+
 		cy_card[card_no].hw_ver = mailbox;
 		cy_card[card_no].num_chips = (unsigned int)-1;
+		cy_card[card_no].board_ctrl = &zfw_ctrl->board_ctrl;
 #ifdef CONFIG_CYZ_INTR
 		/* allocate IRQ only if board has an IRQ */
 		if (irq != 0 && irq != 255) {
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index 1fbdea4f08eb..1eb87a6a2f6b 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -499,6 +499,7 @@ struct cyclades_card {
 		void __iomem *p9050;
 		struct RUNTIME_9060 __iomem *p9060;
 	} ctl_addr;
+	struct BOARD_CTRL __iomem *board_ctrl;	/* cyz specific */
 	int irq;
 	unsigned int num_chips;	/* 0 if card absent, -1 if Z/PCI, else Y */
 	unsigned int first_line;	/* minor number of first channel on card */
@@ -541,6 +542,15 @@ struct cyclades_port {
 	int                     magic;
 	struct tty_port		port;
 	struct cyclades_card	*card;
+	union {
+		struct {
+			int filler;
+		} cyy;
+		struct {
+			struct CH_CTRL __iomem	*ch_ctrl;
+			struct BUF_CTRL __iomem	*buf_ctrl;
+		} cyz;
+	} u;
 	int			line;
 	int			flags; 		/* defined in tty.h */
 	int                     type;		/* UART type */
-- 
cgit v1.2.3


From 3aeea5b92210083c7cffd4f08a0bb141d3f2d574 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sat, 19 Sep 2009 13:13:16 -0700
Subject: cyclades: introduce cyy_readb/writeb

Add helpers for io operations, so that we can eliminate huge
amount of supporting code. It is now centralized in those
helpers and used values are precomputed in the init phase.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/cyclades.c  | 448 +++++++++++++++++------------------------------
 include/linux/cyclades.h |   2 +-
 2 files changed, 165 insertions(+), 285 deletions(-)

(limited to 'include')

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index ed66c3ab230d..cf2874a89c8c 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -90,7 +90,6 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
-static void cy_throttle(struct tty_struct *tty);
 static void cy_send_xchar(struct tty_struct *tty, char ch);
 
 #ifndef SERIAL_XMIT_SIZE
@@ -298,6 +297,20 @@ static void cyz_rx_restart(unsigned long);
 static struct timer_list cyz_rx_full_timer[NR_PORTS];
 #endif				/* CONFIG_CYZ_INTR */
 
+static inline void cyy_writeb(struct cyclades_port *port, u32 reg, u8 val)
+{
+	struct cyclades_card *card = port->card;
+
+	cy_writeb(port->u.cyy.base_addr + (reg << card->bus_index), val);
+}
+
+static inline u8 cyy_readb(struct cyclades_port *port, u32 reg)
+{
+	struct cyclades_card *card = port->card;
+
+	return readb(port->u.cyy.base_addr + (reg << card->bus_index));
+}
+
 static inline bool cy_is_Z(struct cyclades_card *card)
 {
 	return card->num_chips == (unsigned int)-1;
@@ -350,13 +363,14 @@ static inline int serial_paranoia_check(struct cyclades_port *info,
 
    This function is only called from inside spinlock-protected code.
  */
-static int cyy_issue_cmd(void __iomem *base_addr, u_char cmd, int index)
+static int __cyy_issue_cmd(void __iomem *base_addr, u8 cmd, int index)
 {
+	void __iomem *ccr = base_addr + (CyCCR << index);
 	unsigned int i;
 
 	/* Check to see that the previous command has completed */
 	for (i = 0; i < 100; i++) {
-		if (readb(base_addr + (CyCCR << index)) == 0)
+		if (readb(ccr) == 0)
 			break;
 		udelay(10L);
 	}
@@ -366,10 +380,16 @@ static int cyy_issue_cmd(void __iomem *base_addr, u_char cmd, int index)
 		return -1;
 
 	/* Issue the new command */
-	cy_writeb(base_addr + (CyCCR << index), cmd);
+	cy_writeb(ccr, cmd);
 
 	return 0;
-}				/* cyy_issue_cmd */
+}
+
+static inline int cyy_issue_cmd(struct cyclades_port *port, u8 cmd)
+{
+	return __cyy_issue_cmd(port->u.cyy.base_addr, cmd,
+			port->card->bus_index);
+}
 
 #ifdef CONFIG_ISA
 /* ISA interrupt detection code */
@@ -394,7 +414,7 @@ static unsigned detect_isa_irq(void __iomem *address)
 	/* Enable the Tx interrupts on the CD1400 */
 	local_irq_save(flags);
 	cy_writeb(address + (CyCAR << index), 0);
-	cyy_issue_cmd(address, CyCHAN_CTL | CyENB_XMTR, index);
+	__cyy_issue_cmd(address, CyCHAN_CTL | CyENB_XMTR, index);
 
 	cy_writeb(address + (CyCAR << index), 0);
 	cy_writeb(address + (CySRER << index),
@@ -428,7 +448,7 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 	struct cyclades_port *info;
 	struct tty_struct *tty;
 	int len, index = cinfo->bus_index;
-	u8 save_xir, channel, save_car, data, char_count;
+	u8 ivr, save_xir, channel, save_car, data, char_count;
 
 #ifdef CY_DEBUG_INTERRUPTS
 	printk(KERN_DEBUG "cyy_interrupt: rcvd intr, chip %d\n", chip);
@@ -437,26 +457,25 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 	save_xir = readb(base_addr + (CyRIR << index));
 	channel = save_xir & CyIRChannel;
 	info = &cinfo->ports[channel + chip * 4];
-	save_car = readb(base_addr + (CyCAR << index));
-	cy_writeb(base_addr + (CyCAR << index), save_xir);
+	save_car = cyy_readb(info, CyCAR);
+	cyy_writeb(info, CyCAR, save_xir);
+	ivr = cyy_readb(info, CyRIVR) & CyIVRMask;
 
 	tty = tty_port_tty_get(&info->port);
 	/* if there is nowhere to put the data, discard it */
 	if (tty == NULL) {
-		if ((readb(base_addr + (CyRIVR << index)) & CyIVRMask) ==
-				CyIVRRxEx) {	/* exception */
-			data = readb(base_addr + (CyRDSR << index));
+		if (ivr == CyIVRRxEx) {	/* exception */
+			data = cyy_readb(info, CyRDSR);
 		} else {	/* normal character reception */
-			char_count = readb(base_addr + (CyRDCR << index));
+			char_count = cyy_readb(info, CyRDCR);
 			while (char_count--)
-				data = readb(base_addr + (CyRDSR << index));
+				data = cyy_readb(info, CyRDSR);
 		}
 		goto end;
 	}
 	/* there is an open port for this data */
-	if ((readb(base_addr + (CyRIVR << index)) & CyIVRMask) ==
-			CyIVRRxEx) {	/* exception */
-		data = readb(base_addr + (CyRDSR << index));
+	if (ivr == CyIVRRxEx) {	/* exception */
+		data = cyy_readb(info, CyRDSR);
 
 		/* For statistics only */
 		if (data & CyBREAK)
@@ -477,22 +496,22 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 			if (data & info->read_status_mask) {
 				if (data & CyBREAK) {
 					tty_insert_flip_char(tty,
-						readb(base_addr + (CyRDSR <<
-							index)), TTY_BREAK);
+						cyy_readb(info, CyRDSR),
+						TTY_BREAK);
 					info->icount.rx++;
 					if (info->port.flags & ASYNC_SAK)
 						do_SAK(tty);
 				} else if (data & CyFRAME) {
 					tty_insert_flip_char(tty,
-						readb(base_addr + (CyRDSR <<
-							index)), TTY_FRAME);
+						cyy_readb(info, CyRDSR),
+						TTY_FRAME);
 					info->icount.rx++;
 					info->idle_stats.frame_errs++;
 				} else if (data & CyPARITY) {
 					/* Pieces of seven... */
 					tty_insert_flip_char(tty,
-						readb(base_addr + (CyRDSR <<
-							index)), TTY_PARITY);
+						cyy_readb(info, CyRDSR),
+						TTY_PARITY);
 					info->icount.rx++;
 					info->idle_stats.parity_errs++;
 				} else if (data & CyOVERRUN) {
@@ -504,8 +523,8 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 					   the next incoming character.
 					 */
 					tty_insert_flip_char(tty,
-						readb(base_addr + (CyRDSR <<
-							index)), TTY_FRAME);
+						cyy_readb(info, CyRDSR),
+						TTY_FRAME);
 					info->icount.rx++;
 					info->idle_stats.overruns++;
 				/* These two conditions may imply */
@@ -529,7 +548,7 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 		}
 	} else {	/* normal character reception */
 		/* load # chars available from the chip */
-		char_count = readb(base_addr + (CyRDCR << index));
+		char_count = cyy_readb(info, CyRDCR);
 
 #ifdef CY_ENABLE_MONITORING
 		++info->mon.int_count;
@@ -540,7 +559,7 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 #endif
 		len = tty_buffer_request_room(tty, char_count);
 		while (len--) {
-			data = readb(base_addr + (CyRDSR << index));
+			data = cyy_readb(info, CyRDSR);
 			tty_insert_flip_char(tty, data, TTY_NORMAL);
 			info->idle_stats.recv_bytes++;
 			info->icount.rx++;
@@ -554,8 +573,8 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 	tty_kref_put(tty);
 end:
 	/* end of service */
-	cy_writeb(base_addr + (CyRIR << index), save_xir & 0x3f);
-	cy_writeb(base_addr + (CyCAR << index), save_car);
+	cyy_writeb(info, CyRIR, save_xir & 0x3f);
+	cyy_writeb(info, CyCAR, save_car);
 }
 
 static void cyy_chip_tx(struct cyclades_card *cinfo, unsigned int chip,
@@ -588,8 +607,7 @@ static void cyy_chip_tx(struct cyclades_card *cinfo, unsigned int chip,
 	info = &cinfo->ports[channel + chip * 4];
 	tty = tty_port_tty_get(&info->port);
 	if (tty == NULL) {
-		cy_writeb(base_addr + (CySRER << index),
-			  readb(base_addr + (CySRER << index)) & ~CyTxRdy);
+		cyy_writeb(info, CySRER, cyy_readb(info, CySRER) & ~CyTxRdy);
 		goto end;
 	}
 
@@ -598,7 +616,7 @@ static void cyy_chip_tx(struct cyclades_card *cinfo, unsigned int chip,
 
 	if (info->x_char) {	/* send special char */
 		outch = info->x_char;
-		cy_writeb(base_addr + (CyTDR << index), outch);
+		cyy_writeb(info, CyTDR, outch);
 		char_count--;
 		info->icount.tx++;
 		info->x_char = 0;
@@ -606,14 +624,14 @@ static void cyy_chip_tx(struct cyclades_card *cinfo, unsigned int chip,
 
 	if (info->breakon || info->breakoff) {
 		if (info->breakon) {
-			cy_writeb(base_addr + (CyTDR << index), 0);
-			cy_writeb(base_addr + (CyTDR << index), 0x81);
+			cyy_writeb(info, CyTDR, 0);
+			cyy_writeb(info, CyTDR, 0x81);
 			info->breakon = 0;
 			char_count -= 2;
 		}
 		if (info->breakoff) {
-			cy_writeb(base_addr + (CyTDR << index), 0);
-			cy_writeb(base_addr + (CyTDR << index), 0x83);
+			cyy_writeb(info, CyTDR, 0);
+			cyy_writeb(info, CyTDR, 0x83);
 			info->breakoff = 0;
 			char_count -= 2;
 		}
@@ -621,27 +639,23 @@ static void cyy_chip_tx(struct cyclades_card *cinfo, unsigned int chip,
 
 	while (char_count-- > 0) {
 		if (!info->xmit_cnt) {
-			if (readb(base_addr + (CySRER << index)) & CyTxMpty) {
-				cy_writeb(base_addr + (CySRER << index),
-					readb(base_addr + (CySRER << index)) &
-						~CyTxMpty);
+			if (cyy_readb(info, CySRER) & CyTxMpty) {
+				cyy_writeb(info, CySRER,
+					cyy_readb(info, CySRER) & ~CyTxMpty);
 			} else {
-				cy_writeb(base_addr + (CySRER << index),
-					(readb(base_addr + (CySRER << index)) &
-						~CyTxRdy) | CyTxMpty);
+				cyy_writeb(info, CySRER, CyTxMpty |
+					(cyy_readb(info, CySRER) & ~CyTxRdy));
 			}
 			goto done;
 		}
 		if (info->port.xmit_buf == NULL) {
-			cy_writeb(base_addr + (CySRER << index),
-				readb(base_addr + (CySRER << index)) &
-					~CyTxRdy);
+			cyy_writeb(info, CySRER,
+				cyy_readb(info, CySRER) & ~CyTxRdy);
 			goto done;
 		}
 		if (tty->stopped || tty->hw_stopped) {
-			cy_writeb(base_addr + (CySRER << index),
-				readb(base_addr + (CySRER << index)) &
-					~CyTxRdy);
+			cyy_writeb(info, CySRER,
+				cyy_readb(info, CySRER) & ~CyTxRdy);
 			goto done;
 		}
 		/* Because the Embedded Transmit Commands have been enabled,
@@ -658,15 +672,15 @@ static void cyy_chip_tx(struct cyclades_card *cinfo, unsigned int chip,
 			info->xmit_cnt--;
 			info->xmit_tail = (info->xmit_tail + 1) &
 					(SERIAL_XMIT_SIZE - 1);
-			cy_writeb(base_addr + (CyTDR << index), outch);
+			cyy_writeb(info, CyTDR, outch);
 			info->icount.tx++;
 		} else {
 			if (char_count > 1) {
 				info->xmit_cnt--;
 				info->xmit_tail = (info->xmit_tail + 1) &
 					(SERIAL_XMIT_SIZE - 1);
-				cy_writeb(base_addr + (CyTDR << index), outch);
-				cy_writeb(base_addr + (CyTDR << index), 0);
+				cyy_writeb(info, CyTDR, outch);
+				cyy_writeb(info, CyTDR, 0);
 				info->icount.tx++;
 				char_count--;
 			}
@@ -678,8 +692,8 @@ done:
 	tty_kref_put(tty);
 end:
 	/* end of service */
-	cy_writeb(base_addr + (CyTIR << index), save_xir & 0x3f);
-	cy_writeb(base_addr + (CyCAR << index), save_car);
+	cyy_writeb(info, CyTIR, save_xir & 0x3f);
+	cyy_writeb(info, CyCAR, save_car);
 }
 
 static void cyy_chip_modem(struct cyclades_card *cinfo, int chip,
@@ -694,11 +708,11 @@ static void cyy_chip_modem(struct cyclades_card *cinfo, int chip,
 	save_xir = readb(base_addr + (CyMIR << index));
 	channel = save_xir & CyIRChannel;
 	info = &cinfo->ports[channel + chip * 4];
-	save_car = readb(base_addr + (CyCAR << index));
-	cy_writeb(base_addr + (CyCAR << index), save_xir);
+	save_car = cyy_readb(info, CyCAR);
+	cyy_writeb(info, CyCAR, save_xir);
 
-	mdm_change = readb(base_addr + (CyMISR << index));
-	mdm_status = readb(base_addr + (CyMSVR1 << index));
+	mdm_change = cyy_readb(info, CyMISR);
+	mdm_status = cyy_readb(info, CyMSVR1);
 
 	tty = tty_port_tty_get(&info->port);
 	if (!tty)
@@ -730,9 +744,8 @@ static void cyy_chip_modem(struct cyclades_card *cinfo, int chip,
 				/* cy_start isn't used
 				   because... !!! */
 				tty->hw_stopped = 0;
-				cy_writeb(base_addr + (CySRER << index),
-					readb(base_addr + (CySRER << index)) |
-						CyTxRdy);
+				cyy_writeb(info, CySRER,
+					cyy_readb(info, CySRER) | CyTxRdy);
 				tty_wakeup(tty);
 			}
 		} else {
@@ -740,9 +753,8 @@ static void cyy_chip_modem(struct cyclades_card *cinfo, int chip,
 				/* cy_stop isn't used
 				   because ... !!! */
 				tty->hw_stopped = 1;
-				cy_writeb(base_addr + (CySRER << index),
-					readb(base_addr + (CySRER << index)) &
-						~CyTxRdy);
+				cyy_writeb(info, CySRER,
+					cyy_readb(info, CySRER) & ~CyTxRdy);
 			}
 		}
 	}
@@ -753,8 +765,8 @@ static void cyy_chip_modem(struct cyclades_card *cinfo, int chip,
 	tty_kref_put(tty);
 end:
 	/* end of service */
-	cy_writeb(base_addr + (CyMIR << index), save_xir & 0x3f);
-	cy_writeb(base_addr + (CyCAR << index), save_car);
+	cyy_writeb(info, CyMIR, save_xir & 0x3f);
+	cyy_writeb(info, CyCAR, save_car);
 }
 
 /* The real interrupt service routine is called
@@ -829,15 +841,10 @@ static void cyy_change_rts_dtr(struct cyclades_port *info, unsigned int set,
 		unsigned int clear)
 {
 	struct cyclades_card *card = info->card;
-	void __iomem *base_addr;
-	int chip, channel, index;
+	int channel = info->line - card->first_line;
 	u32 rts, dtr, msvrr, msvrd;
 
-	channel = info->line - card->first_line;
-	chip = channel >> 2;
 	channel &= 0x03;
-	index = card->bus_index;
-	base_addr = card->base_addr + (cy_chip_offset[chip] << index);
 
 	if (info->rtsdtr_inv) {
 		msvrr = CyMSVR2;
@@ -851,31 +858,31 @@ static void cyy_change_rts_dtr(struct cyclades_port *info, unsigned int set,
 		dtr = CyDTR;
 	}
 	if (set & TIOCM_RTS) {
-		cy_writeb(base_addr + (CyCAR << index), (u8)channel);
-		cy_writeb(base_addr + (msvrr << index), rts);
+		cyy_writeb(info, CyCAR, channel);
+		cyy_writeb(info, msvrr, rts);
 	}
 	if (clear & TIOCM_RTS) {
-		cy_writeb(base_addr + (CyCAR << index), (u8)channel);
-		cy_writeb(base_addr + (msvrr << index), ~rts);
+		cyy_writeb(info, CyCAR, channel);
+		cyy_writeb(info, msvrr, ~rts);
 	}
 	if (set & TIOCM_DTR) {
-		cy_writeb(base_addr + (CyCAR << index), (u8)channel);
-		cy_writeb(base_addr + (msvrd << index), dtr);
+		cyy_writeb(info, CyCAR, channel);
+		cyy_writeb(info, msvrd, dtr);
 #ifdef CY_DEBUG_DTR
 		printk(KERN_DEBUG "cyc:set_modem_info raising DTR\n");
 		printk(KERN_DEBUG "     status: 0x%x, 0x%x\n",
-			readb(base_addr + (CyMSVR1 << index)),
-			readb(base_addr + (CyMSVR2 << index)));
+			cyy_readb(info, CyMSVR1),
+			cyy_readb(info, CyMSVR2));
 #endif
 	}
 	if (clear & TIOCM_DTR) {
-		cy_writeb(base_addr + (CyCAR << index), (u8)channel);
-		cy_writeb(base_addr + (msvrd << index), ~dtr);
+		cyy_writeb(info, CyCAR, channel);
+		cyy_writeb(info, msvrd, ~dtr);
 #ifdef CY_DEBUG_DTR
 		printk(KERN_DEBUG "cyc:set_modem_info dropping DTR\n");
 		printk(KERN_DEBUG "     status: 0x%x, 0x%x\n",
-			readb(base_addr + (CyMSVR1 << index)),
-			readb(base_addr + (CyMSVR2 << index)));
+			cyy_readb(info, CyMSVR1),
+			cyy_readb(info, CyMSVR2));
 #endif
 	}
 }
@@ -1290,7 +1297,6 @@ static int cy_startup(struct cyclades_port *info, struct tty_struct *tty)
 	struct cyclades_card *card;
 	unsigned long flags;
 	int retval = 0;
-	void __iomem *base_addr;
 	int channel;
 	unsigned long page;
 
@@ -1321,31 +1327,21 @@ static int cy_startup(struct cyclades_port *info, struct tty_struct *tty)
 	cy_set_line_char(info, tty);
 
 	if (!cy_is_Z(card)) {
-		int chip = channel >> 2;
-		int index = card->bus_index;
 		channel &= 0x03;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
 
-#ifdef CY_DEBUG_OPEN
-		printk(KERN_DEBUG "cyc startup card %d, chip %d, channel %d, "
-				"base_addr %p\n",
-				card, chip, channel, base_addr);
-#endif
 		spin_lock_irqsave(&card->card_lock, flags);
 
-		cy_writeb(base_addr + (CyCAR << index), (u_char) channel);
+		cyy_writeb(info, CyCAR, channel);
 
-		cy_writeb(base_addr + (CyRTPR << index),
+		cyy_writeb(info, CyRTPR,
 			(info->default_timeout ? info->default_timeout : 0x02));
 		/* 10ms rx timeout */
 
-		cyy_issue_cmd(base_addr, CyCHAN_CTL | CyENB_RCVR | CyENB_XMTR,
-				index);
+		cyy_issue_cmd(info, CyCHAN_CTL | CyENB_RCVR | CyENB_XMTR);
 
 		cyy_change_rts_dtr(info, TIOCM_RTS | TIOCM_DTR, 0);
 
-		cy_writeb(base_addr + (CySRER << index),
-			readb(base_addr + (CySRER << index)) | CyRxData);
+		cyy_writeb(info, CySRER, cyy_readb(info, CySRER) | CyRxData);
 	} else {
 		struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 
@@ -1423,23 +1419,14 @@ errout:
 
 static void start_xmit(struct cyclades_port *info)
 {
-	struct cyclades_card *card;
+	struct cyclades_card *card = info->card;
 	unsigned long flags;
-	void __iomem *base_addr;
-	int chip, channel, index;
+	int channel = info->line - card->first_line;
 
-	card = info->card;
-	channel = info->line - card->first_line;
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-
 		spin_lock_irqsave(&card->card_lock, flags);
-		cy_writeb(base_addr + (CyCAR << index), channel);
-		cy_writeb(base_addr + (CySRER << index),
-			readb(base_addr + (CySRER << index)) | CyTxRdy);
+		cyy_writeb(info, CyCAR, channel & 0x03);
+		cyy_writeb(info, CySRER, cyy_readb(info, CySRER) | CyTxRdy);
 		spin_unlock_irqrestore(&card->card_lock, flags);
 	} else {
 #ifdef CONFIG_CYZ_INTR
@@ -1466,8 +1453,7 @@ static void cy_shutdown(struct cyclades_port *info, struct tty_struct *tty)
 {
 	struct cyclades_card *card;
 	unsigned long flags;
-	void __iomem *base_addr;
-	int chip, channel, index;
+	int channel;
 
 	if (!(info->port.flags & ASYNC_INITIALIZED))
 		return;
@@ -1475,17 +1461,6 @@ static void cy_shutdown(struct cyclades_port *info, struct tty_struct *tty)
 	card = info->card;
 	channel = info->line - card->first_line;
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-
-#ifdef CY_DEBUG_OPEN
-		printk(KERN_DEBUG "cyc shutdown Y card %d, chip %d, "
-				"channel %d, base_addr %p\n",
-				card, chip, channel, base_addr);
-#endif
-
 		spin_lock_irqsave(&card->card_lock, flags);
 
 		/* Clear delta_msr_wait queue to avoid mem leaks. */
@@ -1500,7 +1475,7 @@ static void cy_shutdown(struct cyclades_port *info, struct tty_struct *tty)
 		if (tty->termios->c_cflag & HUPCL)
 			cyy_change_rts_dtr(info, 0, TIOCM_RTS | TIOCM_DTR);
 
-		cyy_issue_cmd(base_addr, CyCHAN_CTL | CyDIS_RCVR, index);
+		cyy_issue_cmd(info, CyCHAN_CTL | CyDIS_RCVR);
 		/* it may be appropriate to clear _XMIT at
 		   some later date (after testing)!!! */
 
@@ -1677,8 +1652,6 @@ static void cy_wait_until_sent(struct tty_struct *tty, int timeout)
 {
 	struct cyclades_card *card;
 	struct cyclades_port *info = tty->driver_data;
-	void __iomem *base_addr;
-	int chip, channel, index;
 	unsigned long orig_jiffies;
 	int char_time;
 
@@ -1722,13 +1695,8 @@ static void cy_wait_until_sent(struct tty_struct *tty, int timeout)
 		timeout, char_time, jiffies);
 #endif
 	card = info->card;
-	channel = (info->line) - (card->first_line);
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-		while (readb(base_addr + (CySRER << index)) & CyTxRdy) {
+		while (cyy_readb(info, CySRER) & CyTxRdy) {
 #ifdef CY_DEBUG_WAIT_UNTIL_SENT
 			printk(KERN_DEBUG "Not clean (jiff=%lu)...", jiffies);
 #endif
@@ -1790,6 +1758,7 @@ static void cy_close(struct tty_struct *tty, struct file *filp)
 	struct cyclades_port *info = tty->driver_data;
 	struct cyclades_card *card;
 	unsigned long flags;
+	int channel;
 
 	if (!info || serial_paranoia_check(info, tty->name, "cy_close"))
 		return;
@@ -1799,18 +1768,13 @@ static void cy_close(struct tty_struct *tty, struct file *filp)
 	if (!tty_port_close_start(&info->port, tty, filp))
 		return;
 
+	channel = info->line - card->first_line;
 	spin_lock_irqsave(&card->card_lock, flags);
 
 	if (!cy_is_Z(card)) {
-		int channel = info->line - card->first_line;
-		int index = card->bus_index;
-		void __iomem *base_addr = card->base_addr +
-			(cy_chip_offset[channel >> 2] << index);
 		/* Stop accepting input */
-		channel &= 0x03;
-		cy_writeb(base_addr + (CyCAR << index), (u_char) channel);
-		cy_writeb(base_addr + (CySRER << index),
-			  readb(base_addr + (CySRER << index)) & ~CyRxData);
+		cyy_writeb(info, CyCAR, channel & 0x03);
+		cyy_writeb(info, CySRER, cyy_readb(info, CySRER) & ~CyRxData);
 		if (info->port.flags & ASYNC_INITIALIZED) {
 			/* Waiting for on-board buffers to be empty before
 			   closing the port */
@@ -1823,7 +1787,6 @@ static void cy_close(struct tty_struct *tty, struct file *filp)
 		/* Waiting for on-board buffers to be empty before closing
 		   the port */
 		struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
-		int channel = info->line - card->first_line;
 		int retval;
 
 		if (readl(&ch_ctrl->flow_status) != C_FS_TXIDLE) {
@@ -2064,8 +2027,7 @@ static void cy_set_line_char(struct cyclades_port *info, struct tty_struct *tty)
 {
 	struct cyclades_card *card;
 	unsigned long flags;
-	void __iomem *base_addr;
-	int chip, channel, index;
+	int channel;
 	unsigned cflag, iflag;
 	int baud, baud_rate = 0;
 	int i;
@@ -2095,9 +2057,6 @@ static void cy_set_line_char(struct cyclades_port *info, struct tty_struct *tty)
 	channel = info->line - card->first_line;
 
 	if (!cy_is_Z(card)) {
-
-		index = card->bus_index;
-
 		/* baud rate */
 		baud = tty_get_baud_rate(tty);
 		if (baud == 38400 && (info->port.flags & ASYNC_SPD_MASK) ==
@@ -2208,70 +2167,67 @@ static void cy_set_line_char(struct cyclades_port *info, struct tty_struct *tty)
 	    cable.  Contact Marcio Saito for details.
 	 ***********************************************/
 
-		chip = channel >> 2;
 		channel &= 0x03;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
 
 		spin_lock_irqsave(&card->card_lock, flags);
-		cy_writeb(base_addr + (CyCAR << index), (u_char) channel);
+		cyy_writeb(info, CyCAR, channel);
 
 		/* tx and rx baud rate */
 
-		cy_writeb(base_addr + (CyTCOR << index), info->tco);
-		cy_writeb(base_addr + (CyTBPR << index), info->tbpr);
-		cy_writeb(base_addr + (CyRCOR << index), info->rco);
-		cy_writeb(base_addr + (CyRBPR << index), info->rbpr);
+		cyy_writeb(info, CyTCOR, info->tco);
+		cyy_writeb(info, CyTBPR, info->tbpr);
+		cyy_writeb(info, CyRCOR, info->rco);
+		cyy_writeb(info, CyRBPR, info->rbpr);
 
 		/* set line characteristics  according configuration */
 
-		cy_writeb(base_addr + (CySCHR1 << index), START_CHAR(tty));
-		cy_writeb(base_addr + (CySCHR2 << index), STOP_CHAR(tty));
-		cy_writeb(base_addr + (CyCOR1 << index), info->cor1);
-		cy_writeb(base_addr + (CyCOR2 << index), info->cor2);
-		cy_writeb(base_addr + (CyCOR3 << index), info->cor3);
-		cy_writeb(base_addr + (CyCOR4 << index), info->cor4);
-		cy_writeb(base_addr + (CyCOR5 << index), info->cor5);
+		cyy_writeb(info, CySCHR1, START_CHAR(tty));
+		cyy_writeb(info, CySCHR2, STOP_CHAR(tty));
+		cyy_writeb(info, CyCOR1, info->cor1);
+		cyy_writeb(info, CyCOR2, info->cor2);
+		cyy_writeb(info, CyCOR3, info->cor3);
+		cyy_writeb(info, CyCOR4, info->cor4);
+		cyy_writeb(info, CyCOR5, info->cor5);
 
-		cyy_issue_cmd(base_addr, CyCOR_CHANGE | CyCOR1ch | CyCOR2ch |
-				CyCOR3ch, index);
+		cyy_issue_cmd(info, CyCOR_CHANGE | CyCOR1ch | CyCOR2ch |
+				CyCOR3ch);
 
 		/* !!! Is this needed? */
-		cy_writeb(base_addr + (CyCAR << index), (u_char) channel);
-		cy_writeb(base_addr + (CyRTPR << index),
+		cyy_writeb(info, CyCAR, channel);
+		cyy_writeb(info, CyRTPR,
 			(info->default_timeout ? info->default_timeout : 0x02));
 		/* 10ms rx timeout */
 
 		if (C_CLOCAL(tty)) {
 			/* without modem intr */
-			cy_writeb(base_addr + (CySRER << index),
-				readb(base_addr + (CySRER << index)) | CyMdmCh);
+			cyy_writeb(info, CySRER,
+					cyy_readb(info, CySRER) | CyMdmCh);
 			/* act on 1->0 modem transitions */
 			if ((cflag & CRTSCTS) && info->rflow) {
-				cy_writeb(base_addr + (CyMCOR1 << index),
+				cyy_writeb(info, CyMCOR1,
 					  (CyCTS | rflow_thr[i]));
 			} else {
-				cy_writeb(base_addr + (CyMCOR1 << index),
+				cyy_writeb(info, CyMCOR1,
 					  CyCTS);
 			}
 			/* act on 0->1 modem transitions */
-			cy_writeb(base_addr + (CyMCOR2 << index), CyCTS);
+			cyy_writeb(info, CyMCOR2, CyCTS);
 		} else {
 			/* without modem intr */
-			cy_writeb(base_addr + (CySRER << index),
-				  readb(base_addr +
-					   (CySRER << index)) | CyMdmCh);
+			cyy_writeb(info, CySRER,
+					cyy_readb(info, CySRER) | CyMdmCh);
 			/* act on 1->0 modem transitions */
 			if ((cflag & CRTSCTS) && info->rflow) {
-				cy_writeb(base_addr + (CyMCOR1 << index),
+				cyy_writeb(info, CyMCOR1,
 					  (CyDSR | CyCTS | CyRI | CyDCD |
 					   rflow_thr[i]));
 			} else {
-				cy_writeb(base_addr + (CyMCOR1 << index),
-					  CyDSR | CyCTS | CyRI | CyDCD);
+				cyy_writeb(info, CyMCOR1,
+						CyDSR | CyCTS | CyRI | CyDCD);
 			}
 			/* act on 0->1 modem transitions */
-			cy_writeb(base_addr + (CyMCOR2 << index),
-				  CyDSR | CyCTS | CyRI | CyDCD);
+			cyy_writeb(info, CyMCOR2,
+					CyDSR | CyCTS | CyRI | CyDCD);
 		}
 
 		if (i == 0)	/* baud rate is zero, turn off line */
@@ -2482,24 +2438,14 @@ check_and_exit:
  */
 static int get_lsr_info(struct cyclades_port *info, unsigned int __user *value)
 {
-	struct cyclades_card *card;
-	int chip, channel, index;
-	unsigned char status;
+	struct cyclades_card *card = info->card;
 	unsigned int result;
 	unsigned long flags;
-	void __iomem *base_addr;
+	u8 status;
 
-	card = info->card;
-	channel = (info->line) - (card->first_line);
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-
 		spin_lock_irqsave(&card->card_lock, flags);
-		status = readb(base_addr + (CySRER << index)) &
-				(CyTxRdy | CyTxMpty);
+		status = cyy_readb(info, CySRER) & (CyTxRdy | CyTxMpty);
 		spin_unlock_irqrestore(&card->card_lock, flags);
 		result = (status ? 0 : TIOCSER_TEMT);
 	} else {
@@ -2513,29 +2459,23 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file)
 {
 	struct cyclades_port *info = tty->driver_data;
 	struct cyclades_card *card;
-	void __iomem *base_addr;
-	int result, channel;
+	int result;
 
 	if (serial_paranoia_check(info, tty->name, __func__))
 		return -ENODEV;
 
 	card = info->card;
-	channel = info->line - card->first_line;
 
 	lock_kernel();
 	if (!cy_is_Z(card)) {
 		unsigned long flags;
-		unsigned char status;
-		int chip = channel >> 2;
-		int index = card->bus_index;
-
-		channel &= 0x03;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
+		int channel = info->line - card->first_line;
+		u8 status;
 
 		spin_lock_irqsave(&card->card_lock, flags);
-		cy_writeb(base_addr + (CyCAR << index), (u8)channel);
-		status = readb(base_addr + (CyMSVR1 << index));
-		status |= readb(base_addr + (CyMSVR2 << index));
+		cyy_writeb(info, CyCAR, channel & 0x03);
+		status = cyy_readb(info, CyMSVR1);
+		status |= cyy_readb(info, CyMSVR2);
 		spin_unlock_irqrestore(&card->card_lock, flags);
 
 		if (info->rtsdtr_inv) {
@@ -2689,26 +2629,16 @@ static int cy_break(struct tty_struct *tty, int break_state)
 
 static int set_threshold(struct cyclades_port *info, unsigned long value)
 {
-	struct cyclades_card *card;
-	void __iomem *base_addr;
-	int channel, chip, index;
+	struct cyclades_card *card = info->card;
 	unsigned long flags;
 
-	card = info->card;
-	channel = info->line - card->first_line;
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr =
-		    card->base_addr + (cy_chip_offset[chip] << index);
-
 		info->cor3 &= ~CyREC_FIFO;
 		info->cor3 |= value & CyREC_FIFO;
 
 		spin_lock_irqsave(&card->card_lock, flags);
-		cy_writeb(base_addr + (CyCOR3 << index), info->cor3);
-		cyy_issue_cmd(base_addr, CyCOR_CHANGE | CyCOR3ch, index);
+		cyy_writeb(info, CyCOR3, info->cor3);
+		cyy_issue_cmd(info, CyCOR_CHANGE | CyCOR3ch);
 		spin_unlock_irqrestore(&card->card_lock, flags);
 	}
 	return 0;
@@ -2717,20 +2647,10 @@ static int set_threshold(struct cyclades_port *info, unsigned long value)
 static int get_threshold(struct cyclades_port *info,
 						unsigned long __user *value)
 {
-	struct cyclades_card *card;
-	void __iomem *base_addr;
-	int channel, chip, index;
-	unsigned long tmp;
+	struct cyclades_card *card = info->card;
 
-	card = info->card;
-	channel = info->line - card->first_line;
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-
-		tmp = readb(base_addr + (CyCOR3 << index)) & CyREC_FIFO;
+		u8 tmp = cyy_readb(info, CyCOR3) & CyREC_FIFO;
 		return put_user(tmp, value);
 	}
 	return 0;
@@ -2738,21 +2658,12 @@ static int get_threshold(struct cyclades_port *info,
 
 static int set_timeout(struct cyclades_port *info, unsigned long value)
 {
-	struct cyclades_card *card;
-	void __iomem *base_addr;
-	int channel, chip, index;
+	struct cyclades_card *card = info->card;
 	unsigned long flags;
 
-	card = info->card;
-	channel = info->line - card->first_line;
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-
 		spin_lock_irqsave(&card->card_lock, flags);
-		cy_writeb(base_addr + (CyRTPR << index), value & 0xff);
+		cyy_writeb(info, CyRTPR, value & 0xff);
 		spin_unlock_irqrestore(&card->card_lock, flags);
 	}
 	return 0;
@@ -2761,20 +2672,10 @@ static int set_timeout(struct cyclades_port *info, unsigned long value)
 static int get_timeout(struct cyclades_port *info,
 						unsigned long __user *value)
 {
-	struct cyclades_card *card;
-	void __iomem *base_addr;
-	int channel, chip, index;
-	unsigned long tmp;
+	struct cyclades_card *card = info->card;
 
-	card = info->card;
-	channel = info->line - card->first_line;
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-
-		tmp = readb(base_addr + (CyRTPR << index));
+		u8 tmp = cyy_readb(info, CyRTPR);
 		return put_user(tmp, value);
 	}
 	return 0;
@@ -3101,8 +3002,7 @@ static void cy_stop(struct tty_struct *tty)
 {
 	struct cyclades_card *cinfo;
 	struct cyclades_port *info = tty->driver_data;
-	void __iomem *base_addr;
-	int chip, channel, index;
+	int channel;
 	unsigned long flags;
 
 #ifdef CY_DEBUG_OTHER
@@ -3115,16 +3015,9 @@ static void cy_stop(struct tty_struct *tty)
 	cinfo = info->card;
 	channel = info->line - cinfo->first_line;
 	if (!cy_is_Z(cinfo)) {
-		index = cinfo->bus_index;
-		chip = channel >> 2;
-		channel &= 0x03;
-		base_addr = cinfo->base_addr + (cy_chip_offset[chip] << index);
-
 		spin_lock_irqsave(&cinfo->card_lock, flags);
-		cy_writeb(base_addr + (CyCAR << index),
-			(u_char)(channel & 0x0003)); /* index channel */
-		cy_writeb(base_addr + (CySRER << index),
-			  readb(base_addr + (CySRER << index)) & ~CyTxRdy);
+		cyy_writeb(info, CyCAR, channel & 0x03);
+		cyy_writeb(info, CySRER, cyy_readb(info, CySRER) & ~CyTxRdy);
 		spin_unlock_irqrestore(&cinfo->card_lock, flags);
 	}
 }				/* cy_stop */
@@ -3133,8 +3026,7 @@ static void cy_start(struct tty_struct *tty)
 {
 	struct cyclades_card *cinfo;
 	struct cyclades_port *info = tty->driver_data;
-	void __iomem *base_addr;
-	int chip, channel, index;
+	int channel;
 	unsigned long flags;
 
 #ifdef CY_DEBUG_OTHER
@@ -3146,17 +3038,10 @@ static void cy_start(struct tty_struct *tty)
 
 	cinfo = info->card;
 	channel = info->line - cinfo->first_line;
-	index = cinfo->bus_index;
 	if (!cy_is_Z(cinfo)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		base_addr = cinfo->base_addr + (cy_chip_offset[chip] << index);
-
 		spin_lock_irqsave(&cinfo->card_lock, flags);
-		cy_writeb(base_addr + (CyCAR << index),
-			(u_char) (channel & 0x0003));	/* index channel */
-		cy_writeb(base_addr + (CySRER << index),
-			  readb(base_addr + (CySRER << index)) | CyTxRdy);
+		cyy_writeb(info, CyCAR, channel & 0x03);
+		cyy_writeb(info, CySRER, cyy_readb(info, CySRER) | CyTxRdy);
 		spin_unlock_irqrestore(&cinfo->card_lock, flags);
 	}
 }				/* cy_start */
@@ -3185,18 +3070,13 @@ static int cyy_carrier_raised(struct tty_port *port)
 	struct cyclades_port *info = container_of(port, struct cyclades_port,
 			port);
 	struct cyclades_card *cinfo = info->card;
-	void __iomem *base = cinfo->base_addr;
 	unsigned long flags;
 	int channel = info->line - cinfo->first_line;
-	int chip = channel >> 2, index = cinfo->bus_index;
 	u32 cd;
 
-	channel &= 0x03;
-	base += cy_chip_offset[chip] << index;
-
 	spin_lock_irqsave(&cinfo->card_lock, flags);
-	cy_writeb(base + (CyCAR << index), (u8)channel);
-	cd = readb(base + (CyMSVR1 << index)) & CyDCD;
+	cyy_writeb(info, CyCAR, channel & 0x03);
+	cd = cyy_readb(info, CyMSVR1) & CyDCD;
 	spin_unlock_irqrestore(&cinfo->card_lock, flags);
 
 	return cd;
@@ -3326,9 +3206,9 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 			info->cor3 = 0x08;	/* _very_ small rcv threshold */
 
 			chip_number = channel / CyPORTS_PER_CHIP;
-			info->chip_rev = readb(cinfo->base_addr +
-				      (cy_chip_offset[chip_number] << index) +
-				      (CyGFRCR << index));
+			info->u.cyy.base_addr = cinfo->base_addr +
+				(cy_chip_offset[chip_number] << index);
+			info->chip_rev = cyy_readb(info, CyGFRCR);
 
 			if (info->chip_rev >= CD1400_REV_J) {
 				/* It is a CD1400 rev. J or later */
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index 1eb87a6a2f6b..bbebef7713b3 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -544,7 +544,7 @@ struct cyclades_port {
 	struct cyclades_card	*card;
 	union {
 		struct {
-			int filler;
+			void __iomem *base_addr;
 		} cyy;
 		struct {
 			struct CH_CTRL __iomem	*ch_ctrl;
-- 
cgit v1.2.3


From f8a7c1a976a6672204c7f4f0f694f33715dfa617 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:17 -0700
Subject: kfifo: Use "const" definitions

Currently kfifo cannot be used by parts of the kernel that use "const"
properly as kfifo itself does not use const for passed data blocks which
are indeed const.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kfifo.h | 4 ++--
 kernel/kfifo.c        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 29f62e1733ff..ad6bdf5a5970 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -38,7 +38,7 @@ extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask,
 				 spinlock_t *lock);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int __kfifo_put(struct kfifo *fifo,
-				unsigned char *buffer, unsigned int len);
+				const unsigned char *buffer, unsigned int len);
 extern unsigned int __kfifo_get(struct kfifo *fifo,
 				unsigned char *buffer, unsigned int len);
 
@@ -77,7 +77,7 @@ static inline void kfifo_reset(struct kfifo *fifo)
  * bytes copied.
  */
 static inline unsigned int kfifo_put(struct kfifo *fifo,
-				     unsigned char *buffer, unsigned int len)
+				const unsigned char *buffer, unsigned int len)
 {
 	unsigned long flags;
 	unsigned int ret;
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 26539e3228e5..3765ff3c1bbe 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -117,7 +117,7 @@ EXPORT_SYMBOL(kfifo_free);
  * writer, you don't need extra locking to use these functions.
  */
 unsigned int __kfifo_put(struct kfifo *fifo,
-			 unsigned char *buffer, unsigned int len)
+			const unsigned char *buffer, unsigned int len)
 {
 	unsigned int l;
 
-- 
cgit v1.2.3


From 1c2f04937b3e397a5695953c6b82aa4c77d21eb8 Mon Sep 17 00:00:00 2001
From: Vikram Pandita <vikram.pandita@ti.com>
Date: Sat, 19 Sep 2009 13:13:19 -0700
Subject: serial: 8250: add IRQ trigger support

There is currently no provision for passing IRQ trigger flags for
serial IRQs with triggering requirements (such as GPIO IRQs)

This patch adds irqflags to plat_serial8250_port that can be passed
from board file to reqest_irq() of 8250 driver

Changes are backward compatible with boards passing UPF_SHARE_IRQ flag

Tested on Zoom2 board that has IRQF_TRIGGER_RISING requirement for 8250 irq

[Moved new flag to end to fix bugs in the original with the old_serial array
	-- Alan]

Signed-off-by: Vikram Pandita <vikram.pandita@ti.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/8250.c       | 14 +++++++++-----
 drivers/serial/8250.h       |  1 +
 include/linux/serial_8250.h |  1 +
 include/linux/serial_core.h |  1 +
 4 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index fb867a9f55e9..83168a6c3c05 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1677,7 +1677,7 @@ static int serial_link_irq_chain(struct uart_8250_port *up)
 		INIT_LIST_HEAD(&up->list);
 		i->head = &up->list;
 		spin_unlock_irq(&i->lock);
-
+		irq_flags |= up->port.irqflags;
 		ret = request_irq(up->port.irq, serial8250_interrupt,
 				  irq_flags, "serial", i);
 		if (ret < 0)
@@ -2026,7 +2026,7 @@ static int serial8250_startup(struct uart_port *port)
 		 * allow register changes to become visible.
 		 */
 		spin_lock_irqsave(&up->port.lock, flags);
-		if (up->port.flags & UPF_SHARE_IRQ)
+		if (up->port.irqflags & IRQF_SHARED)
 			disable_irq_nosync(up->port.irq);
 
 		wait_for_xmitr(up, UART_LSR_THRE);
@@ -2039,7 +2039,7 @@ static int serial8250_startup(struct uart_port *port)
 		iir = serial_in(up, UART_IIR);
 		serial_out(up, UART_IER, 0);
 
-		if (up->port.flags & UPF_SHARE_IRQ)
+		if (up->port.irqflags & IRQF_SHARED)
 			enable_irq(up->port.irq);
 		spin_unlock_irqrestore(&up->port.lock, flags);
 
@@ -2671,6 +2671,7 @@ static void __init serial8250_isa_init_ports(void)
 	     i++, up++) {
 		up->port.iobase   = old_serial_port[i].port;
 		up->port.irq      = irq_canonicalize(old_serial_port[i].irq);
+		up->port.irqflags = old_serial_port[i].irqflags;
 		up->port.uartclk  = old_serial_port[i].baud_base * 16;
 		up->port.flags    = old_serial_port[i].flags;
 		up->port.hub6     = old_serial_port[i].hub6;
@@ -2679,7 +2680,7 @@ static void __init serial8250_isa_init_ports(void)
 		up->port.regshift = old_serial_port[i].iomem_reg_shift;
 		set_io_from_upio(&up->port);
 		if (share_irqs)
-			up->port.flags |= UPF_SHARE_IRQ;
+			up->port.irqflags |= IRQF_SHARED;
 	}
 }
 
@@ -2869,6 +2870,7 @@ int __init early_serial_setup(struct uart_port *port)
 	p->iobase       = port->iobase;
 	p->membase      = port->membase;
 	p->irq          = port->irq;
+	p->irqflags     = port->irqflags;
 	p->uartclk      = port->uartclk;
 	p->fifosize     = port->fifosize;
 	p->regshift     = port->regshift;
@@ -2942,6 +2944,7 @@ static int __devinit serial8250_probe(struct platform_device *dev)
 		port.iobase		= p->iobase;
 		port.membase		= p->membase;
 		port.irq		= p->irq;
+		port.irqflags		= p->irqflags;
 		port.uartclk		= p->uartclk;
 		port.regshift		= p->regshift;
 		port.iotype		= p->iotype;
@@ -2954,7 +2957,7 @@ static int __devinit serial8250_probe(struct platform_device *dev)
 		port.serial_out		= p->serial_out;
 		port.dev		= &dev->dev;
 		if (share_irqs)
-			port.flags |= UPF_SHARE_IRQ;
+			port.irqflags |= IRQF_SHARED;
 		ret = serial8250_register_port(&port);
 		if (ret < 0) {
 			dev_err(&dev->dev, "unable to register port at index %d "
@@ -3096,6 +3099,7 @@ int serial8250_register_port(struct uart_port *port)
 		uart->port.iobase       = port->iobase;
 		uart->port.membase      = port->membase;
 		uart->port.irq          = port->irq;
+		uart->port.irqflags     = port->irqflags;
 		uart->port.uartclk      = port->uartclk;
 		uart->port.fifosize     = port->fifosize;
 		uart->port.regshift     = port->regshift;
diff --git a/drivers/serial/8250.h b/drivers/serial/8250.h
index 520260326f3d..6e19ea3e48d5 100644
--- a/drivers/serial/8250.h
+++ b/drivers/serial/8250.h
@@ -25,6 +25,7 @@ struct old_serial_port {
 	unsigned char io_type;
 	unsigned char *iomem_base;
 	unsigned short iomem_reg_shift;
+	unsigned long irqflags;
 };
 
 /*
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index d4d2a78ad43e..fb46aba11fb5 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -22,6 +22,7 @@ struct plat_serial8250_port {
 	void __iomem	*membase;	/* ioremap cookie or NULL */
 	resource_size_t	mapbase;	/* resource base */
 	unsigned int	irq;		/* interrupt number */
+	unsigned long	irqflags;	/* request_irq flags */
 	unsigned int	uartclk;	/* UART clock rate */
 	void            *private_data;
 	unsigned char	regshift;	/* register shift */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 23d2fb051f97..3cd255f0b211 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -265,6 +265,7 @@ struct uart_port {
 	unsigned int		(*serial_in)(struct uart_port *, int);
 	void			(*serial_out)(struct uart_port *, int, int);
 	unsigned int		irq;			/* irq number */
+	unsigned long		irqflags;		/* irq flags  */
 	unsigned int		uartclk;		/* base uart clock */
 	unsigned int		fifosize;		/* tx fifo size */
 	unsigned char		x_char;			/* xon/xoff char */
-- 
cgit v1.2.3


From 7ca0ff9ab3218ec443a7a9ad247e4650373ed41e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:20 -0700
Subject: tty: Add a full port_close function

Now we are extracting out methods for shutdown and the like we can add a
proper tty_port_close method that knows all the innards of the tty closing
process and hides the lot from the caller.

At some point in the future this will be paired with a similar open()
helper and the drivers can stick to hardware management.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_port.c | 29 +++++++++++++++++++++++++++--
 include/linux/tty.h     |  8 +++++++-
 2 files changed, 34 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 9769b1149f76..549bd0fa8bb6 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -96,6 +96,14 @@ void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty)
 }
 EXPORT_SYMBOL(tty_port_tty_set);
 
+static void tty_port_shutdown(struct tty_port *port)
+{
+	if (port->ops->shutdown &&
+		test_and_clear_bit(ASYNC_INITIALIZED, &port->flags))
+			port->ops->shutdown(port);
+
+}
+
 /**
  *	tty_port_hangup		-	hangup helper
  *	@port: tty port
@@ -116,6 +124,7 @@ void tty_port_hangup(struct tty_port *port)
 	port->tty = NULL;
 	spin_unlock_irqrestore(&port->lock, flags);
 	wake_up_interruptible(&port->open_wait);
+	tty_port_shutdown(port);
 }
 EXPORT_SYMBOL(tty_port_hangup);
 
@@ -296,15 +305,17 @@ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct f
 
 	if (port->count) {
 		spin_unlock_irqrestore(&port->lock, flags);
+		if (port->ops->drop)
+			port->ops->drop(port);
 		return 0;
 	}
-	port->flags |= ASYNC_CLOSING;
+	set_bit(ASYNC_CLOSING, &port->flags);
 	tty->closing = 1;
 	spin_unlock_irqrestore(&port->lock, flags);
 	/* Don't block on a stalled port, just pull the chain */
 	if (tty->flow_stopped)
 		tty_driver_flush_buffer(tty);
-	if (port->flags & ASYNC_INITIALIZED &&
+	if (test_bit(ASYNCB_INITIALIZED, &port->flags) &&
 			port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, port->closing_wait);
 	if (port->drain_delay) {
@@ -318,6 +329,9 @@ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct f
 			timeout = 2 * HZ;
 		schedule_timeout_interruptible(timeout);
 	}
+	/* Don't call port->drop for the last reference. Callers will want
+	   to drop the last active reference in ->shutdown() or the tty
+	   shutdown path */
 	return 1;
 }
 EXPORT_SYMBOL(tty_port_close_start);
@@ -348,3 +362,14 @@ void tty_port_close_end(struct tty_port *port, struct tty_struct *tty)
 	spin_unlock_irqrestore(&port->lock, flags);
 }
 EXPORT_SYMBOL(tty_port_close_end);
+
+void tty_port_close(struct tty_port *port, struct tty_struct *tty,
+							struct file *filp)
+{
+	if (tty_port_close_start(port, tty, filp) == 0)
+		return;
+	tty_port_shutdown(port);
+	tty_port_close_end(port, tty);
+	tty_port_tty_set(port, NULL);
+}
+EXPORT_SYMBOL(tty_port_close);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index a916a318004e..ecb3d1ba3017 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -187,7 +187,12 @@ struct tty_port;
 struct tty_port_operations {
 	/* Return 1 if the carrier is raised */
 	int (*carrier_raised)(struct tty_port *port);
+	/* Control the DTR line */
 	void (*dtr_rts)(struct tty_port *port, int raise);
+	/* Called when the last close completes or a hangup finishes
+	   IFF the port was initialized. Do not use to free resources */
+	void (*shutdown)(struct tty_port *port);
+	void (*drop)(struct tty_port *port);
 };
 	
 struct tty_port {
@@ -459,7 +464,8 @@ extern int tty_port_block_til_ready(struct tty_port *port,
 extern int tty_port_close_start(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
 extern void tty_port_close_end(struct tty_port *port, struct tty_struct *tty);
-
+extern void tty_port_close(struct tty_port *port,
+				struct tty_struct *tty, struct file *filp);
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
 extern int tty_set_ldisc(struct tty_struct *tty, int ldisc);
-- 
cgit v1.2.3


From 8b92e87d39bfd046e7581e1fe0f40eac40f88608 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:24 -0700
Subject: vt: add an event interface

This is needed and requested in various forms for ConsoleKit, screenblank
handling and the like so do the job with a single interface. Also build the
interface so that unlike VT_WAITACTIVE and friends it won't miss events.

FIXME: Should this be a waitactive ioctl or a new device file you can poll
and read events from. We need the code anyway to fix up the existing broken
wait for console switch logic but the ConsoleKit people would prefer the
new device to the ioctl we have here

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/vt.c       |   4 +-
 drivers/char/vt_ioctl.c | 185 +++++++++++++++++++++++++++++++++++-------------
 include/linux/vt.h      |  14 ++++
 include/linux/vt_kern.h |   3 +-
 4 files changed, 154 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index e47a4c88976b..33214d92ca4c 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -252,7 +252,6 @@ static void notify_update(struct vc_data *vc)
 	struct vt_notifier_param param = { .vc = vc };
 	atomic_notifier_call_chain(&vt_notifier_list, VT_UPDATE, &param);
 }
-
 /*
  *	Low-Level Functions
  */
@@ -935,6 +934,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 
 	if (CON_IS_VISIBLE(vc))
 		update_screen(vc);
+	vt_event_post(VT_EVENT_RESIZE, vc->vc_num, vc->vc_num);
 	return err;
 }
 
@@ -3637,6 +3637,7 @@ void do_blank_screen(int entering_gfx)
 		blank_state = blank_vesa_wait;
 		mod_timer(&console_timer, jiffies + vesa_off_interval);
 	}
+	vt_event_post(VT_EVENT_BLANK, vc->vc_num, vc->vc_num);
 }
 EXPORT_SYMBOL(do_blank_screen);
 
@@ -3681,6 +3682,7 @@ void do_unblank_screen(int leaving_gfx)
 		console_blank_hook(0);
 	set_palette(vc);
 	set_cursor(vc);
+	vt_event_post(VT_EVENT_UNBLANK, vc->vc_num, vc->vc_num);
 }
 EXPORT_SYMBOL(do_unblank_screen);
 
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 95189f288f8c..35ad94e65d0d 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -61,6 +61,133 @@ extern struct tty_driver *console_driver;
 
 static void complete_change_console(struct vc_data *vc);
 
+/*
+ *	User space VT_EVENT handlers
+ */
+
+struct vt_event_wait {
+	struct list_head list;
+	struct vt_event event;
+	int done;
+};
+
+static LIST_HEAD(vt_events);
+static DEFINE_SPINLOCK(vt_event_lock);
+static DECLARE_WAIT_QUEUE_HEAD(vt_event_waitqueue);
+
+/**
+ *	vt_event_post
+ *	@event: the event that occurred
+ *	@old: old console
+ *	@new: new console
+ *
+ *	Post an VT event to interested VT handlers
+ */
+
+void vt_event_post(unsigned int event, unsigned int old, unsigned int new)
+{
+	struct list_head *pos, *head;
+	unsigned long flags;
+	int wake = 0;
+
+	spin_lock_irqsave(&vt_event_lock, flags);
+	head = &vt_events;
+
+	list_for_each(pos, head) {
+		struct vt_event_wait *ve = list_entry(pos,
+						struct vt_event_wait, list);
+		if (!(ve->event.event & event))
+			continue;
+		ve->event.event = event;
+		/* kernel view is consoles 0..n-1, user space view is
+		   console 1..n with 0 meaning current, so we must bias */
+		ve->event.old = old + 1;
+		ve->event.new = new + 1;
+		wake = 1;
+		ve->done = 1;
+	}
+	spin_unlock_irqrestore(&vt_event_lock, flags);
+	if (wake)
+		wake_up_interruptible(&vt_event_waitqueue);
+}
+
+/**
+ *	vt_event_wait		-	wait for an event
+ *	@vw: our event
+ *
+ *	Waits for an event to occur which completes our vt_event_wait
+ *	structure. On return the structure has wv->done set to 1 for success
+ *	or 0 if some event such as a signal ended the wait.
+ */
+
+static void vt_event_wait(struct vt_event_wait *vw)
+{
+	unsigned long flags;
+	/* Prepare the event */
+	INIT_LIST_HEAD(&vw->list);
+	vw->done = 0;
+	/* Queue our event */
+	spin_lock_irqsave(&vt_event_lock, flags);
+	list_add(&vw->list, &vt_events);
+	spin_unlock_irqrestore(&vt_event_lock, flags);
+	/* Wait for it to pass */
+	wait_event_interruptible(vt_event_waitqueue, vw->done);
+	/* Dequeue it */
+	spin_lock_irqsave(&vt_event_lock, flags);
+	list_del(&vw->list);
+	spin_unlock_irqrestore(&vt_event_lock, flags);
+}
+
+/**
+ *	vt_event_wait_ioctl	-	event ioctl handler
+ *	@arg: argument to ioctl
+ *
+ *	Implement the VT_WAITEVENT ioctl using the VT event interface
+ */
+
+static int vt_event_wait_ioctl(struct vt_event __user *event)
+{
+	struct vt_event_wait vw;
+
+	if (copy_from_user(&vw.event, event, sizeof(struct vt_event)))
+		return -EFAULT;
+	/* Highest supported event for now */
+	if (vw.event.event & ~VT_MAX_EVENT)
+		return -EINVAL;
+
+	vt_event_wait(&vw);
+	/* If it occurred report it */
+	if (vw.done) {
+		if (copy_to_user(event, &vw.event, sizeof(struct vt_event)))
+			return -EFAULT;
+		return 0;
+	}
+	return -EINTR;
+}
+
+/**
+ *	vt_waitactive	-	active console wait
+ *	@event: event code
+ *	@n: new console
+ *
+ *	Helper for event waits. Used to implement the legacy
+ *	event waiting ioctls in terms of events
+ */
+
+int vt_waitactive(int n)
+{
+	struct vt_event_wait vw;
+	do {
+		if (n == fg_console + 1)
+			break;
+		vw.event.event = VT_EVENT_SWITCH;
+		vt_event_wait(&vw);
+		if (vw.done == 0)
+			return -EINTR;
+	} while (vw.event.new != n);
+	return 0;
+}
+
 /*
  * these are the valid i/o ports we're allowed to change. they map all the
  * video ports
@@ -360,6 +487,8 @@ do_unimap_ioctl(int cmd, struct unimapdesc __user *user_ud, int perm, struct vc_
 	return 0;
 }
 
+
+
 /*
  * We handle the console-specific ioctl's here.  We allow the
  * capability to modify any console, not just the fg_console. 
@@ -851,7 +980,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 		if (arg == 0 || arg > MAX_NR_CONSOLES)
 			ret = -ENXIO;
 		else
-			ret = vt_waitactive(arg - 1);
+			ret = vt_waitactive(arg);
 		break;
 
 	/*
@@ -1159,6 +1288,9 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 		ret = put_user(vc->vc_hi_font_mask,
 					(unsigned short __user *)arg);
 		break;
+	case VT_WAITEVENT:
+		ret = vt_event_wait_ioctl((struct vt_event __user *)arg);
+		break;
 	default:
 		ret = -ENOIOCTLCMD;
 	}
@@ -1170,54 +1302,6 @@ eperm:
 	goto out;
 }
 
-/*
- * Sometimes we want to wait until a particular VT has been activated. We
- * do it in a very simple manner. Everybody waits on a single queue and
- * get woken up at once. Those that are satisfied go on with their business,
- * while those not ready go back to sleep. Seems overkill to add a wait
- * to each vt just for this - usually this does nothing!
- */
-static DECLARE_WAIT_QUEUE_HEAD(vt_activate_queue);
-
-/*
- * Sleeps until a vt is activated, or the task is interrupted. Returns
- * 0 if activation, -EINTR if interrupted by a signal handler.
- */
-int vt_waitactive(int vt)
-{
-	int retval;
-	DECLARE_WAITQUEUE(wait, current);
-
-	add_wait_queue(&vt_activate_queue, &wait);
-	for (;;) {
-		retval = 0;
-
-		/*
-		 * Synchronize with redraw_screen(). By acquiring the console
-		 * semaphore we make sure that the console switch is completed
-		 * before we return. If we didn't wait for the semaphore, we
-		 * could return at a point where fg_console has already been
-		 * updated, but the console switch hasn't been completed.
-		 */
-		acquire_console_sem();
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (vt == fg_console) {
-			release_console_sem();
-			break;
-		}
-		release_console_sem();
-		retval = -ERESTARTNOHAND;
-		if (signal_pending(current))
-			break;
-		schedule();
-	}
-	remove_wait_queue(&vt_activate_queue, &wait);
-	__set_current_state(TASK_RUNNING);
-	return retval;
-}
-
-#define vt_wake_waitactive() wake_up(&vt_activate_queue)
-
 void reset_vc(struct vc_data *vc)
 {
 	vc->vc_mode = KD_TEXT;
@@ -1262,6 +1346,7 @@ void vc_SAK(struct work_struct *work)
 static void complete_change_console(struct vc_data *vc)
 {
 	unsigned char old_vc_mode;
+	int old = fg_console;
 
 	last_console = fg_console;
 
@@ -1325,7 +1410,7 @@ static void complete_change_console(struct vc_data *vc)
 	/*
 	 * Wake anyone waiting for their VT to activate
 	 */
-	vt_wake_waitactive();
+	vt_event_post(VT_EVENT_SWITCH, old, vc->vc_num);
 	return;
 }
 
diff --git a/include/linux/vt.h b/include/linux/vt.h
index 02c1c0288770..89c03a11e193 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -74,4 +74,18 @@ struct vt_consize {
 #define VT_UNLOCKSWITCH 0x560C  /* allow vt switching */
 #define VT_GETHIFONTMASK 0x560D  /* return hi font mask */
 
+struct vt_event {
+	unsigned int event;
+#define VT_EVENT_SWITCH		0x0001	/* Console switch */
+#define VT_EVENT_BLANK		0x0002	/* Screen blank */
+#define VT_EVENT_UNBLANK	0x0004	/* Screen unblank */
+#define VT_EVENT_RESIZE		0x0008	/* Resize display */
+#define VT_MAX_EVENT		0x000F
+	unsigned int old;		/* Old console */
+	unsigned int new;		/* New console (if changing) */
+	unsigned int pad[4];		/* Padding for expansion */
+};
+
+#define VT_WAITEVENT	0x560E	/* Wait for an event */
+
 #endif /* _LINUX_VT_H */
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 2f1113467f70..f8c797d57c8b 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -91,7 +91,8 @@ int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc);
 #endif
 
 /* vt.c */
-int vt_waitactive(int vt);
+void vt_event_post(unsigned int event, unsigned int old, unsigned int new);
+int vt_waitactive(int n);
 void change_console(struct vc_data *new_vc);
 void reset_vc(struct vc_data *vc);
 extern int unbind_con_driver(const struct consw *csw, int first, int last,
-- 
cgit v1.2.3


From 8d233558cd99a888571bb5a88a74970879e0aba4 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:25 -0700
Subject: vt: remove power stuff from kernel/power

In the past someone gratuitiously borrowed chunks of kernel internal vt
code and dumped them in kernel/power. They have all sorts of deep relations
with the vt code so put them in the vt tree instead

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/vt_ioctl.c | 56 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/vt_kern.h |  1 +
 kernel/power/console.c  | 63 ++++---------------------------------------------
 3 files changed, 62 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 35ad94e65d0d..d29fbd44bdca 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -16,6 +16,7 @@
 #include <linux/tty.h>
 #include <linux/timer.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/kd.h>
 #include <linux/vt.h>
 #include <linux/string.h>
@@ -1483,3 +1484,58 @@ void change_console(struct vc_data *new_vc)
 
 	complete_change_console(new_vc);
 }
+
+/* Perform a kernel triggered VT switch for suspend/resume */
+
+static int disable_vt_switch;
+
+int vt_move_to_console(unsigned int vt, int alloc)
+{
+	int prev;
+
+	acquire_console_sem();
+	/* Graphics mode - up to X */
+	if (disable_vt_switch) {
+		release_console_sem();
+		return 0;
+	}
+	prev = fg_console;
+
+	if (alloc && vc_allocate(vt)) {
+		/* we can't have a free VC for now. Too bad,
+		 * we don't want to mess the screen for now. */
+		release_console_sem();
+		return -ENOSPC;
+	}
+
+	if (set_console(vt)) {
+		/*
+		 * We're unable to switch to the SUSPEND_CONSOLE.
+		 * Let the calling function know so it can decide
+		 * what to do.
+		 */
+		release_console_sem();
+		return -EIO;
+	}
+	release_console_sem();
+	if (vt_waitactive(vt)) {
+		pr_debug("Suspend: Can't switch VCs.");
+		return -EINTR;
+	}
+	return prev;
+}
+
+/*
+ * Normally during a suspend, we allocate a new console and switch to it.
+ * When we resume, we switch back to the original console.  This switch
+ * can be slow, so on systems where the framebuffer can handle restoration
+ * of video registers anyways, there's little point in doing the console
+ * switch.  This function allows you to disable it by passing it '0'.
+ */
+void pm_set_vt_switch(int do_switch)
+{
+	acquire_console_sem();
+	disable_vt_switch = !do_switch;
+	release_console_sem();
+}
+EXPORT_SYMBOL(pm_set_vt_switch);
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index f8c797d57c8b..5b53efe41b5c 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -117,4 +117,5 @@ struct vt_spawn_console {
 };
 extern struct vt_spawn_console vt_spawn_con;
 
+extern int vt_move_to_console(unsigned int vt, int alloc);
 #endif /* _VT_KERN_H */
diff --git a/kernel/power/console.c b/kernel/power/console.c
index a3961b205de7..5187136fe1de 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -14,56 +14,13 @@
 #define SUSPEND_CONSOLE	(MAX_NR_CONSOLES-1)
 
 static int orig_fgconsole, orig_kmsg;
-static int disable_vt_switch;
-
-/*
- * Normally during a suspend, we allocate a new console and switch to it.
- * When we resume, we switch back to the original console.  This switch
- * can be slow, so on systems where the framebuffer can handle restoration
- * of video registers anyways, there's little point in doing the console
- * switch.  This function allows you to disable it by passing it '0'.
- */
-void pm_set_vt_switch(int do_switch)
-{
-	acquire_console_sem();
-	disable_vt_switch = !do_switch;
-	release_console_sem();
-}
-EXPORT_SYMBOL(pm_set_vt_switch);
 
 int pm_prepare_console(void)
 {
-	acquire_console_sem();
-
-	if (disable_vt_switch) {
-		release_console_sem();
-		return 0;
-	}
-
-	orig_fgconsole = fg_console;
-
-	if (vc_allocate(SUSPEND_CONSOLE)) {
-	  /* we can't have a free VC for now. Too bad,
-	   * we don't want to mess the screen for now. */
-		release_console_sem();
+	orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1);
+	if (orig_fgconsole < 0)
 		return 1;
-	}
 
-	if (set_console(SUSPEND_CONSOLE)) {
-		/*
-		 * We're unable to switch to the SUSPEND_CONSOLE.
-		 * Let the calling function know so it can decide
-		 * what to do.
-		 */
-		release_console_sem();
-		return 1;
-	}
-	release_console_sem();
-
-	if (vt_waitactive(SUSPEND_CONSOLE)) {
-		pr_debug("Suspend: Can't switch VCs.");
-		return 1;
-	}
 	orig_kmsg = kmsg_redirect;
 	kmsg_redirect = SUSPEND_CONSOLE;
 	return 0;
@@ -71,19 +28,9 @@ int pm_prepare_console(void)
 
 void pm_restore_console(void)
 {
-	acquire_console_sem();
-	if (disable_vt_switch) {
-		release_console_sem();
-		return;
-	}
-	set_console(orig_fgconsole);
-	release_console_sem();
-
-	if (vt_waitactive(orig_fgconsole)) {
-		pr_debug("Resume: Can't switch VCs.");
-		return;
+	if (orig_fgconsole >= 0) {
+		vt_move_to_console(orig_fgconsole, 0);
+		kmsg_redirect = orig_kmsg;
 	}
-
-	kmsg_redirect = orig_kmsg;
 }
 #endif
-- 
cgit v1.2.3


From a5eb56242d1e2d82938a066219ac1cdf0d68adc8 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:25 -0700
Subject: vt: move kernel stuff out of vt.h

We have vt_kern.h for this


Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/vt.h      | 11 -----------
 include/linux/vt_kern.h | 12 ++++++++++++
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/vt.h b/include/linux/vt.h
index 89c03a11e193..831daf64b90c 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -1,17 +1,6 @@
 #ifndef _LINUX_VT_H
 #define _LINUX_VT_H
 
-#ifdef __KERNEL__
-struct notifier_block;
-
-struct vt_notifier_param {
-	struct vc_data *vc;	/* VC on which the update happened */
-	unsigned int c;		/* Printed char */
-};
-
-extern int register_vt_notifier(struct notifier_block *nb);
-extern int unregister_vt_notifier(struct notifier_block *nb);
-#endif
 
 /*
  * These constants are also useful for user-level apps (e.g., VC
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 5b53efe41b5c..c0c4e1103a73 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -13,6 +13,7 @@
 #include <linux/console_struct.h>
 #include <linux/mm.h>
 #include <linux/consolemap.h>
+#include <linux/notifier.h>
 
 /*
  * Presently, a lot of graphics programs do not restore the contents of
@@ -118,4 +119,15 @@ struct vt_spawn_console {
 extern struct vt_spawn_console vt_spawn_con;
 
 extern int vt_move_to_console(unsigned int vt, int alloc);
+
+/* Interfaces for VC notification of character events (for accessibility etc) */
+
+struct vt_notifier_param {
+	struct vc_data *vc;	/* VC on which the update happened */
+	unsigned int c;		/* Printed char */
+};
+
+extern int register_vt_notifier(struct notifier_block *nb);
+extern int unregister_vt_notifier(struct notifier_block *nb);
+
 #endif /* _VT_KERN_H */
-- 
cgit v1.2.3


From d3b5cffcf84a8bdc7073dce4745d67c72629af85 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:26 -0700
Subject: vt: add an activate and lock

X and other graphical interfaces need to be able to flip to a console
and lock it into graphics mode without races.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/vt_ioctl.c | 38 +++++++++++++++++++++++++++++++++++++-
 include/linux/vt.h      |  7 +++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index d29fbd44bdca..0fceb8f4d6f2 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -972,6 +972,41 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 		}
 		break;
 
+	case VT_SETACTIVATE:
+	{
+		struct vt_setactivate vsa;
+
+		if (!perm)
+			goto eperm;
+
+		if (copy_from_user(&vsa, (struct vt_setactivate __user *)arg,
+						sizeof(struct vt_setactivate)))
+			return -EFAULT;
+		if (vsa.console == 0 || vsa.console > MAX_NR_CONSOLES)
+			ret = -ENXIO;
+		else {
+			vsa.console--;
+			acquire_console_sem();
+			ret = vc_allocate(vsa.console);
+			if (ret == 0) {
+				struct vc_data *nvc;
+				/* This is safe providing we don't drop the
+				   console sem between vc_allocate and
+				   finishing referencing nvc */
+				nvc = vc_cons[vsa.console].d;
+				nvc->vt_mode = vsa.mode;
+				nvc->vt_mode.frsig = 0;
+				put_pid(nvc->vt_pid);
+				nvc->vt_pid = get_pid(task_pid(current));
+			}
+			release_console_sem();
+			if (ret)
+				break;
+			/* Commence switch and lock */
+			set_console(arg);
+		}
+	}
+
 	/*
 	 * wait until the specified VT has been activated
 	 */
@@ -1342,7 +1377,8 @@ void vc_SAK(struct work_struct *work)
 }
 
 /*
- * Performs the back end of a vt switch
+ * Performs the back end of a vt switch. Called under the console
+ * semaphore.
  */
 static void complete_change_console(struct vc_data *vc)
 {
diff --git a/include/linux/vt.h b/include/linux/vt.h
index 831daf64b90c..7afca0d72139 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -77,4 +77,11 @@ struct vt_event {
 
 #define VT_WAITEVENT	0x560E	/* Wait for an event */
 
+struct vt_setactivate {
+	unsigned int console;
+	struct vt_mode mode;
+};
+
+#define VT_SETACTIVATE	0x560F	/* Activate and set the mode of a console */
+
 #endif /* _LINUX_VT_H */
-- 
cgit v1.2.3


From a509a7e478e4766114d69f12d19d644ac63e9765 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:26 -0700
Subject: tty: USB does not need the filp argument in the drivers

And indeed none of them use it. Clean this up as it will make moving to a
standard open method rather easier.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/ark3116.c          |  5 ++---
 drivers/usb/serial/belkin_sa.c        |  4 ++--
 drivers/usb/serial/ch341.c            |  5 ++---
 drivers/usb/serial/console.c          |  4 ++--
 drivers/usb/serial/cp210x.c           |  6 ++----
 drivers/usb/serial/cyberjack.c        |  4 ++--
 drivers/usb/serial/cypress_m8.c       |  6 ++----
 drivers/usb/serial/digi_acceleport.c  |  6 ++----
 drivers/usb/serial/empeg.c            |  8 +++-----
 drivers/usb/serial/ftdi_sio.c         |  6 ++----
 drivers/usb/serial/garmin_gps.c       |  3 +--
 drivers/usb/serial/generic.c          |  3 +--
 drivers/usb/serial/io_edgeport.c      |  6 ++----
 drivers/usb/serial/io_ti.c            |  3 +--
 drivers/usb/serial/ipaq.c             |  9 ++-------
 drivers/usb/serial/ipw.c              |  3 +--
 drivers/usb/serial/ir-usb.c           |  6 ++----
 drivers/usb/serial/iuu_phoenix.c      |  5 ++---
 drivers/usb/serial/keyspan.c          |  3 +--
 drivers/usb/serial/keyspan.h          |  3 +--
 drivers/usb/serial/keyspan_pda.c      |  2 +-
 drivers/usb/serial/kl5kusb105.c       | 10 ++--------
 drivers/usb/serial/kobil_sct.c        |  6 ++----
 drivers/usb/serial/mct_u232.c         |  6 ++----
 drivers/usb/serial/mos7720.c          |  3 +--
 drivers/usb/serial/mos7840.c          |  3 +--
 drivers/usb/serial/navman.c           |  3 +--
 drivers/usb/serial/omninet.c          |  6 ++----
 drivers/usb/serial/opticon.c          |  3 +--
 drivers/usb/serial/option.c           |  6 ++----
 drivers/usb/serial/oti6858.c          |  6 ++----
 drivers/usb/serial/pl2303.c           |  5 +----
 drivers/usb/serial/sierra.c           |  3 +--
 drivers/usb/serial/spcp8x5.c          |  5 +----
 drivers/usb/serial/symbolserial.c     |  3 +--
 drivers/usb/serial/ti_usb_3410_5052.c |  6 ++----
 drivers/usb/serial/usb-serial.c       |  2 +-
 drivers/usb/serial/usb_debug.c        |  5 ++---
 drivers/usb/serial/visor.c            |  6 ++----
 drivers/usb/serial/whiteheat.c        |  5 ++---
 include/linux/usb/serial.h            |  7 +++----
 41 files changed, 68 insertions(+), 131 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c
index aec61880f36c..7ddde4ddfb4b 100644
--- a/drivers/usb/serial/ark3116.c
+++ b/drivers/usb/serial/ark3116.c
@@ -318,8 +318,7 @@ static void ark3116_set_termios(struct tty_struct *tty,
 	return;
 }
 
-static int ark3116_open(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp)
+static int ark3116_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct ktermios tmp_termios;
 	struct usb_serial *serial = port->serial;
@@ -334,7 +333,7 @@ static int ark3116_open(struct tty_struct *tty, struct usb_serial_port *port,
 		return -ENOMEM;
 	}
 
-	result = usb_serial_generic_open(tty, port, filp);
+	result = usb_serial_generic_open(tty, port);
 	if (result)
 		goto err_out;
 
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index 7033b031b443..a0467bc61627 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -92,7 +92,7 @@ static int debug;
 static int  belkin_sa_startup(struct usb_serial *serial);
 static void belkin_sa_release(struct usb_serial *serial);
 static int  belkin_sa_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+			struct usb_serial_port *port);
 static void belkin_sa_close(struct usb_serial_port *port);
 static void belkin_sa_read_int_callback(struct urb *urb);
 static void belkin_sa_set_termios(struct tty_struct *tty,
@@ -213,7 +213,7 @@ static void belkin_sa_release(struct usb_serial *serial)
 
 
 static int  belkin_sa_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+					struct usb_serial_port *port)
 {
 	int retval = 0;
 
diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 2830766f5b39..8c894a7d5dcf 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -300,8 +300,7 @@ static void ch341_close(struct usb_serial_port *port)
 
 
 /* open this device, set default parameters */
-static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct ch341_private *priv = usb_get_serial_port_data(serial->port[0]);
@@ -333,7 +332,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port,
 		return -EPROTO;
 	}
 
-	r = usb_serial_generic_open(tty, port, filp);
+	r = usb_serial_generic_open(tty, port);
 
 out:	return r;
 }
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index 0e4f2e41ace5..be086e4c76b6 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -150,9 +150,9 @@ static int usb_console_setup(struct console *co, char *options)
 		/* only call the device specific open if this
 		 * is the first time the port is opened */
 		if (serial->type->open)
-			retval = serial->type->open(NULL, port, NULL);
+			retval = serial->type->open(NULL, port);
 		else
-			retval = usb_serial_generic_open(NULL, port, NULL);
+			retval = usb_serial_generic_open(NULL, port);
 
 		if (retval) {
 			err("could not open USB console port");
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index b5275c4a8eed..4a208fe85bc9 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -33,8 +33,7 @@
 /*
  * Function Prototypes
  */
-static int cp210x_open(struct tty_struct *, struct usb_serial_port *,
-							struct file *);
+static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *);
 static void cp210x_cleanup(struct usb_serial_port *);
 static void cp210x_close(struct usb_serial_port *);
 static void cp210x_get_termios(struct tty_struct *,
@@ -368,8 +367,7 @@ static unsigned int cp210x_quantise_baudrate(unsigned int baud) {
 	return baud;
 }
 
-static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	int result;
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index 336523fd7366..b0f6402a91ca 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -61,7 +61,7 @@ static int cyberjack_startup(struct usb_serial *serial);
 static void cyberjack_disconnect(struct usb_serial *serial);
 static void cyberjack_release(struct usb_serial *serial);
 static int  cyberjack_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+	struct usb_serial_port *port);
 static void cyberjack_close(struct usb_serial_port *port);
 static int cyberjack_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
@@ -173,7 +173,7 @@ static void cyberjack_release(struct usb_serial *serial)
 }
 
 static int  cyberjack_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+					struct usb_serial_port *port)
 {
 	struct cyberjack_private *priv;
 	unsigned long flags;
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index 59adfe123110..df1adb9a4367 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -172,8 +172,7 @@ static int  cypress_earthmate_startup(struct usb_serial *serial);
 static int  cypress_hidcom_startup(struct usb_serial *serial);
 static int  cypress_ca42v2_startup(struct usb_serial *serial);
 static void cypress_release(struct usb_serial *serial);
-static int  cypress_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static int  cypress_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void cypress_close(struct usb_serial_port *port);
 static void cypress_dtr_rts(struct usb_serial_port *port, int on);
 static int  cypress_write(struct tty_struct *tty, struct usb_serial_port *port,
@@ -633,8 +632,7 @@ static void cypress_release(struct usb_serial *serial)
 }
 
 
-static int cypress_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int cypress_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct cypress_private *priv = usb_get_serial_port_data(port);
 	struct usb_serial *serial = port->serial;
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index f4808091c47c..ab3dd991586b 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -453,8 +453,7 @@ static int digi_write(struct tty_struct *tty, struct usb_serial_port *port,
 static void digi_write_bulk_callback(struct urb *urb);
 static int digi_write_room(struct tty_struct *tty);
 static int digi_chars_in_buffer(struct tty_struct *tty);
-static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
-	struct file *filp);
+static int digi_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void digi_close(struct usb_serial_port *port);
 static int digi_carrier_raised(struct usb_serial_port *port);
 static void digi_dtr_rts(struct usb_serial_port *port, int on);
@@ -1347,8 +1346,7 @@ static int digi_carrier_raised(struct usb_serial_port *port)
 	return 0;
 }
 
-static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static int digi_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	int ret;
 	unsigned char buf[32];
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index 80cb3471adbe..da559a773b51 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -79,8 +79,7 @@ static int debug;
 #define EMPEG_PRODUCT_ID		0x0001
 
 /* function prototypes for an empeg-car player */
-static int  empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
-						struct file *filp);
+static int  empeg_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void empeg_close(struct usb_serial_port *port);
 static int  empeg_write(struct tty_struct *tty, struct usb_serial_port *port,
 						const unsigned char *buf,
@@ -142,8 +141,7 @@ static int		bytes_out;
 /******************************************************************************
  * Empeg specific driver functions
  ******************************************************************************/
-static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static int empeg_open(struct tty_struct *tty,struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	int result = 0;
@@ -151,7 +149,7 @@ static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
 	dbg("%s - port %d", __func__, port->number);
 
 	/* Force default termio settings */
-	empeg_set_termios(tty, port, NULL) ;
+	empeg_set_termios(tty, port, NULL);
 
 	bytes_in = 0;
 	bytes_out = 0;
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 8fec5d4455c9..76a17f915eef 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -747,8 +747,7 @@ static int  ftdi_sio_probe(struct usb_serial *serial,
 					const struct usb_device_id *id);
 static int  ftdi_sio_port_probe(struct usb_serial_port *port);
 static int  ftdi_sio_port_remove(struct usb_serial_port *port);
-static int  ftdi_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static int  ftdi_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void ftdi_close(struct usb_serial_port *port);
 static void ftdi_dtr_rts(struct usb_serial_port *port, int on);
 static int  ftdi_write(struct tty_struct *tty, struct usb_serial_port *port,
@@ -1680,8 +1679,7 @@ static int ftdi_sio_port_remove(struct usb_serial_port *port)
 	return 0;
 }
 
-static int ftdi_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int ftdi_open(struct tty_struct *tty, struct usb_serial_port *port)
 { /* ftdi_open */
 	struct usb_device *dev = port->serial->dev;
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 8839f1c70b7f..20432d345529 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -933,8 +933,7 @@ static int garmin_init_session(struct usb_serial_port *port)
 
 
-static int garmin_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int garmin_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	unsigned long flags;
 	int status = 0;
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index ce57f6a32bdf..d9398e9f30ce 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -114,8 +114,7 @@ void usb_serial_generic_deregister(void)
 #endif
 }
 
-int usb_serial_generic_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	int result = 0;
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index 0191693625d6..dc0f832657e6 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -205,8 +205,7 @@ static void edge_bulk_out_data_callback(struct urb *urb);
 static void edge_bulk_out_cmd_callback(struct urb *urb);
 
 /* function prototypes for the usbserial callbacks */
-static int edge_open(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp);
+static int edge_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void edge_close(struct usb_serial_port *port);
 static int edge_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
@@ -852,8 +851,7 @@ static void edge_bulk_out_cmd_callback(struct urb *urb)
  *	If successful, we return 0
  *	Otherwise we return a negative error number.
  *****************************************************************************/
-static int edge_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int edge_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct edgeport_port *edge_port = usb_get_serial_port_data(port);
 	struct usb_serial *serial;
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index e8bc42f92e79..d4cc0f7af400 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -1831,8 +1831,7 @@ static void edge_bulk_out_callback(struct urb *urb)
 	tty_kref_put(tty);
 }
 
-static int edge_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int edge_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct edgeport_port *edge_port = usb_get_serial_port_data(port);
 	struct edgeport_serial *edge_serial;
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index 2545d45ce16f..24fcc64b837d 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -75,7 +75,7 @@ static int initial_wait;
 
 /* Function prototypes for an ipaq */
 static int  ipaq_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+			struct usb_serial_port *port);
 static void ipaq_close(struct usb_serial_port *port);
 static int  ipaq_calc_num_ports(struct usb_serial *serial);
 static int  ipaq_startup(struct usb_serial *serial);
@@ -587,7 +587,7 @@ static int		bytes_in;
 static int		bytes_out;
 
 static int ipaq_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+			struct usb_serial_port *port)
 {
 	struct usb_serial	*serial = port->serial;
 	struct ipaq_private	*priv;
@@ -628,11 +628,6 @@ static int ipaq_open(struct tty_struct *tty,
 		priv->free_len += PACKET_SIZE;
 	}
 
-	if (tty) {
-		/* FIXME: These two are bogus */
-		tty->raw = 1;
-		tty->real_raw = 1;
-	}
 	/*
 	 * Lose the small buffers usbserial provides. Make larger ones.
 	 */
diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c
index 29ad038b9c8d..727d323f092a 100644
--- a/drivers/usb/serial/ipw.c
+++ b/drivers/usb/serial/ipw.c
@@ -193,8 +193,7 @@ static void ipw_read_bulk_callback(struct urb *urb)
 	return;
 }
 
-static int ipw_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int ipw_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_device *dev = port->serial->dev;
 	u8 buf_flow_static[16] = IPW_BYTES_FLOWINIT;
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 66009b6b763a..95d8d26b9a44 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -86,8 +86,7 @@ static int buffer_size;
 static int xbof = -1;
 
 static int  ir_startup (struct usb_serial *serial);
-static int  ir_open(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filep);
+static int  ir_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void ir_close(struct usb_serial_port *port);
 static int  ir_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
@@ -296,8 +295,7 @@ static int ir_startup(struct usb_serial *serial)
 	return 0;
 }
 
-static int ir_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int ir_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	char *buffer;
 	int result = 0;
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 96873a7a32b0..f3f9be0469c5 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -1018,8 +1018,7 @@ static void iuu_close(struct usb_serial_port *port)
 	}
 }
 
-static int iuu_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int iuu_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	u8 *buf;
@@ -1077,7 +1076,7 @@ static int iuu_open(struct tty_struct *tty,
 		tty->termios->c_iflag = 0;
 		priv->termios_initialized = 1;
 		priv->poll = 0;
-	 }
+	}
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	/* initialize writebuf */
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index 2594b8743d3f..f8c4b07033ff 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -1209,8 +1209,7 @@ static int keyspan_write_room(struct tty_struct *tty)
 }
 
 
-static int keyspan_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int keyspan_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct keyspan_port_private 	*p_priv;
 	struct keyspan_serial_private 	*s_priv;
diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h
index 3107ed15af64..30771e5b3973 100644
--- a/drivers/usb/serial/keyspan.h
+++ b/drivers/usb/serial/keyspan.h
@@ -36,8 +36,7 @@
 
 /* Function prototypes for Keyspan serial converter */
 static int  keyspan_open		(struct tty_struct *tty,
-					 struct usb_serial_port *port,
-					 struct file *filp);
+					 struct usb_serial_port *port);
 static void keyspan_close		(struct usb_serial_port *port);
 static void keyspan_dtr_rts		(struct usb_serial_port *port, int on);
 static int  keyspan_startup		(struct usb_serial *serial);
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index d0b12e40c2b1..257c16cc6b2a 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -681,7 +681,7 @@ static int keyspan_pda_carrier_raised(struct usb_serial_port *port)
 
 
 static int keyspan_pda_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+					struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	unsigned char room;
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index 0f44bb8e8d4f..a61673133d7d 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -75,8 +75,7 @@ static int debug;
 static int  klsi_105_startup(struct usb_serial *serial);
 static void klsi_105_disconnect(struct usb_serial *serial);
 static void klsi_105_release(struct usb_serial *serial);
-static int  klsi_105_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static int  klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void klsi_105_close(struct usb_serial_port *port);
 static int  klsi_105_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
@@ -358,8 +357,7 @@ static void klsi_105_release(struct usb_serial *serial)
 	}
 } /* klsi_105_release */
 
-static int  klsi_105_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int  klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct klsi_105_private *priv = usb_get_serial_port_data(port);
 	int retval = 0;
@@ -371,10 +369,6 @@ static int  klsi_105_open(struct tty_struct *tty,
 
 	dbg("%s port %d", __func__, port->number);
 
-	/* force low_latency on so that our tty_push actually forces
-	 * the data through
-	 * tty->low_latency = 1; */
-
 	/* Do a defined restart:
 	 * Set up sane default baud rate and send the 'READ_ON'
 	 * vendor command.
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index 6db0e561f680..97901578343a 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -70,8 +70,7 @@ static int debug;
 /* Function prototypes */
 static int  kobil_startup(struct usb_serial *serial);
 static void kobil_release(struct usb_serial *serial);
-static int  kobil_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static int  kobil_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void kobil_close(struct usb_serial_port *port);
 static int  kobil_write(struct tty_struct *tty, struct usb_serial_port *port,
 			 const unsigned char *buf, int count);
@@ -211,8 +210,7 @@ static void kobil_release(struct usb_serial *serial)
 }
 
 
-static int kobil_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	int result = 0;
 	struct kobil_private *priv;
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index d8825e159aa5..d501aefa2628 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -93,8 +93,7 @@ static int debug;
  */
 static int  mct_u232_startup(struct usb_serial *serial);
 static void mct_u232_release(struct usb_serial *serial);
-static int  mct_u232_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static int  mct_u232_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void mct_u232_close(struct usb_serial_port *port);
 static void mct_u232_dtr_rts(struct usb_serial_port *port, int on);
 static void mct_u232_read_int_callback(struct urb *urb);
@@ -421,8 +420,7 @@ static void mct_u232_release(struct usb_serial *serial)
 	}
 } /* mct_u232_release */
 
-static int  mct_u232_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int  mct_u232_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct mct_u232_private *priv = usb_get_serial_port_data(port);
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 4342a8a0eac9..763e32a44be0 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -319,8 +319,7 @@ static int send_mos_cmd(struct usb_serial *serial, __u8 request, __u16 value,
 	return status;
 }
 
-static int mos7720_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int mos7720_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial;
 	struct usb_serial_port *port0;
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index b93f0f992d9f..f11abf52be7d 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -824,8 +824,7 @@ static int mos7840_serial_probe(struct usb_serial *serial,
  *	Otherwise we return a negative error number.
  *****************************************************************************/
 
-static int mos7840_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	int response;
 	int j;
diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c
index f5f3751a888c..5ceaa4c6be09 100644
--- a/drivers/usb/serial/navman.c
+++ b/drivers/usb/serial/navman.c
@@ -80,8 +80,7 @@ exit:
 			__func__, result);
 }
 
-static int navman_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int navman_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	int result = 0;
 
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index 56857ddbd70b..062265038bf0 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -64,8 +64,7 @@ static int debug;
 #define BT_IGNITIONPRO_ID	0x2000
 
 /* function prototypes */
-static int  omninet_open(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp);
+static int  omninet_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void omninet_close(struct usb_serial_port *port);
 static void omninet_read_bulk_callback(struct urb *urb);
 static void omninet_write_bulk_callback(struct urb *urb);
@@ -163,8 +162,7 @@ static int omninet_attach(struct usb_serial *serial)
 	return 0;
 }
 
-static int omninet_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial	*serial = port->serial;
 	struct usb_serial_port	*wport;
diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index 336bba79ad32..1085a577c5c1 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -144,8 +144,7 @@ exit:
 	spin_unlock(&priv->lock);
 }
 
-static int opticon_open(struct tty_struct *tty, struct usb_serial_port *port,
-			struct file *filp)
+static int opticon_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct opticon_private *priv = usb_get_serial_data(port->serial);
 	unsigned long flags;
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index c784ddbe7b61..fe47051dbef2 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -45,8 +45,7 @@
 /* Function prototypes */
 static int  option_probe(struct usb_serial *serial,
 			const struct usb_device_id *id);
-static int  option_open(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp);
+static int  option_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void option_close(struct usb_serial_port *port);
 static void option_dtr_rts(struct usb_serial_port *port, int on);
 
@@ -961,8 +960,7 @@ static int option_chars_in_buffer(struct tty_struct *tty)
 	return data_len;
 }
 
-static int option_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int option_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct option_port_private *portdata;
 	int i, err;
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index 3cece27325e7..e90f88a3b040 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -141,8 +141,7 @@ struct oti6858_control_pkt {
 	  && ((a)->frame_fmt == (priv)->pending_setup.frame_fmt))
 
 /* function prototypes */
-static int oti6858_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static int oti6858_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void oti6858_close(struct usb_serial_port *port);
 static void oti6858_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
@@ -566,8 +565,7 @@ static void oti6858_set_termios(struct tty_struct *tty,
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static int oti6858_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int oti6858_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct oti6858_private *priv = usb_get_serial_port_data(port);
 	struct ktermios tmp_termios;
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 3e86815b2705..a63ea99936f7 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -691,8 +691,7 @@ static void pl2303_close(struct usb_serial_port *port)
 
 }
 
-static int pl2303_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int pl2303_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct ktermios tmp_termios;
 	struct usb_serial *serial = port->serial;
@@ -714,8 +713,6 @@ static int pl2303_open(struct tty_struct *tty,
 	if (tty)
 		pl2303_set_termios(tty, port, &tmp_termios);
 
-	/* FIXME: need to assert RTS and DTR if CRTSCTS off */
-
 	dbg("%s - submitting read urb", __func__);
 	port->read_urb->dev = serial->dev;
 	result = usb_submit_urb(port->read_urb, GFP_KERNEL);
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index f48d05e0acc1..55391bbe1230 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -734,8 +734,7 @@ static void sierra_close(struct usb_serial_port *port)
 	}
 }
 
-static int sierra_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int sierra_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct sierra_port_private *portdata;
 	struct usb_serial *serial = port->serial;
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 3c249d8e8b8e..8b312a05a353 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -623,8 +623,7 @@ static void spcp8x5_set_termios(struct tty_struct *tty,
 
 /* open the serial port. do some usb system call. set termios and get the line
  * status of the device. then submit the read urb */
-static int spcp8x5_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int spcp8x5_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct ktermios tmp_termios;
 	struct usb_serial *serial = port->serial;
@@ -658,8 +657,6 @@ static int spcp8x5_open(struct tty_struct *tty,
 	priv->line_status = status & 0xf0 ;
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* FIXME: need to assert RTS and DTR if CRTSCTS off */
-
 	dbg("%s - submitting read urb", __func__);
 	port->read_urb->dev = serial->dev;
 	ret = usb_submit_urb(port->read_urb, GFP_KERNEL);
diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c
index 6157fac9366b..cb7e95f9fcbf 100644
--- a/drivers/usb/serial/symbolserial.c
+++ b/drivers/usb/serial/symbolserial.c
@@ -124,8 +124,7 @@ exit:
 	spin_unlock(&priv->lock);
 }
 
-static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port,
-			struct file *filp)
+static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct symbol_private *priv = usb_get_serial_data(port->serial);
 	unsigned long flags;
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 3bc609fe2242..1e9dc8821698 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -98,8 +98,7 @@ struct ti_device {
 
 static int ti_startup(struct usb_serial *serial);
 static void ti_release(struct usb_serial *serial);
-static int ti_open(struct tty_struct *tty, struct usb_serial_port *port,
-		struct file *file);
+static int ti_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void ti_close(struct usb_serial_port *port);
 static int ti_write(struct tty_struct *tty, struct usb_serial_port *port,
 		const unsigned char *data, int count);
@@ -492,8 +491,7 @@ static void ti_release(struct usb_serial *serial)
 }
 
 
-static int ti_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *file)
+static int ti_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct ti_port *tport = usb_get_serial_port_data(port);
 	struct ti_device *tdev;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 31c7a0939b98..3dda6841e724 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -244,7 +244,7 @@ static int serial_open (struct tty_struct *tty, struct file *filp)
 
 		/* only call the device specific open if this
 		 * is the first time the port is opened */
-		retval = serial->type->open(tty, port, filp);
+		retval = serial->type->open(tty, port);
 		if (retval)
 			goto bailout_interface_put;
 		mutex_unlock(&serial->disc_mutex);
diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c
index 614800972dc3..7b5bfc4edd3d 100644
--- a/drivers/usb/serial/usb_debug.c
+++ b/drivers/usb/serial/usb_debug.c
@@ -43,11 +43,10 @@ static struct usb_driver debug_driver = {
 	.no_dynamic_id = 	1,
 };
 
-static int usb_debug_open(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp)
+static int usb_debug_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	port->bulk_out_size = USB_DEBUG_MAX_PACKET_SIZE;
-	return usb_serial_generic_open(tty, port, filp);
+	return usb_serial_generic_open(tty, port);
 }
 
 /* This HW really does not support a serial break, so one will be
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index f5d0f64dcc52..1aa5d20a5d99 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -36,8 +36,7 @@
 #define DRIVER_DESC "USB HandSpring Visor / Palm OS driver"
 
 /* function prototypes for a handspring visor */
-static int  visor_open(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp);
+static int  visor_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void visor_close(struct usb_serial_port *port);
 static int  visor_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
@@ -273,8 +272,7 @@ static int stats;
 /******************************************************************************
  * Handspring Visor specific driver functions
  ******************************************************************************/
-static int visor_open(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp)
+static int visor_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct visor_private *priv = usb_get_serial_port_data(port);
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 8d126dd7a02e..81f2ae505966 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -146,7 +146,7 @@ static int  whiteheat_firmware_attach(struct usb_serial *serial);
 static int  whiteheat_attach(struct usb_serial *serial);
 static void whiteheat_release(struct usb_serial *serial);
 static int  whiteheat_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+			struct usb_serial_port *port);
 static void whiteheat_close(struct usb_serial_port *port);
 static int  whiteheat_write(struct tty_struct *tty,
 			struct usb_serial_port *port,
@@ -659,8 +659,7 @@ static void whiteheat_release(struct usb_serial *serial)
 	return;
 }
 
-static int whiteheat_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int whiteheat_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	int		retval = 0;
 
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 0ec50ba62139..1cbaf4a6b449 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -238,9 +238,8 @@ struct usb_serial_driver {
 	int (*resume)(struct usb_serial *serial);
 
 	/* serial function calls */
-	/* Called by console with tty = NULL and by tty */
-	int  (*open)(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+	/* Called by console and by the tty layer */
+	int  (*open)(struct tty_struct *tty, struct usb_serial_port *port);
 	void (*close)(struct usb_serial_port *port);
 	int  (*write)(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
@@ -300,7 +299,7 @@ static inline void usb_serial_console_disconnect(struct usb_serial *serial) {}
 extern struct usb_serial *usb_serial_get_by_index(unsigned int minor);
 extern void usb_serial_put(struct usb_serial *serial);
 extern int usb_serial_generic_open(struct tty_struct *tty,
-		struct usb_serial_port *port, struct file *filp);
+	struct usb_serial_port *port);
 extern int usb_serial_generic_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
 extern void usb_serial_generic_close(struct usb_serial_port *port);
-- 
cgit v1.2.3


From ebd2c8f6d2ec4012c267ecb95e72a57b8355a705 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:28 -0700
Subject: serial: kill off uart_info

We moved this into uart_state, now move the fields out of the separate
structure and kill it off.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/mn10300/kernel/asm-offsets.c       |   6 +-
 drivers/serial/21285.c                  |   8 +-
 drivers/serial/8250.c                   |  10 +-
 drivers/serial/amba-pl010.c             |   6 +-
 drivers/serial/amba-pl011.c             |   6 +-
 drivers/serial/atmel_serial.c           |  14 +-
 drivers/serial/bfin_5xx.c               |  20 +--
 drivers/serial/bfin_sport_uart.c        |   6 +-
 drivers/serial/clps711x.c               |   4 +-
 drivers/serial/cpm_uart/cpm_uart_core.c |   2 +-
 drivers/serial/dz.c                     |   6 +-
 drivers/serial/icom.c                   |  16 +-
 drivers/serial/imx.c                    |  16 +-
 drivers/serial/ioc3_serial.c            |  52 +++---
 drivers/serial/ioc4_serial.c            |  66 ++++----
 drivers/serial/ip22zilog.c              |  14 +-
 drivers/serial/jsm/jsm_neo.c            |   2 +-
 drivers/serial/jsm/jsm_tty.c            |  20 +--
 drivers/serial/m32r_sio.c               |   6 +-
 drivers/serial/max3100.c                |  16 +-
 drivers/serial/mcf.c                    |   4 +-
 drivers/serial/mpc52xx_uart.c           |   4 +-
 drivers/serial/mpsc.c                   |   4 +-
 drivers/serial/msm_serial.c             |   6 +-
 drivers/serial/mux.c                    |   4 +-
 drivers/serial/netx-serial.c            |   6 +-
 drivers/serial/nwpserial.c              |   4 +-
 drivers/serial/pmac_zilog.c             |  12 +-
 drivers/serial/pnx8xxx_uart.c           |   8 +-
 drivers/serial/pxa.c                    |   6 +-
 drivers/serial/sa1100.c                 |   8 +-
 drivers/serial/samsung.c                |   8 +-
 drivers/serial/sb1250-duart.c           |   6 +-
 drivers/serial/sc26xx.c                 |  10 +-
 drivers/serial/serial_core.c            | 286 ++++++++++++++++----------------
 drivers/serial/serial_ks8695.c          |   6 +-
 drivers/serial/serial_lh7a40x.c         |   6 +-
 drivers/serial/serial_txx9.c            |   4 +-
 drivers/serial/sh-sci.c                 |  10 +-
 drivers/serial/sn_console.c             |  22 +--
 drivers/serial/sunhv.c                  |   8 +-
 drivers/serial/sunsab.c                 |  10 +-
 drivers/serial/sunsu.c                  |   6 +-
 drivers/serial/sunzilog.c               |  14 +-
 drivers/serial/timbuart.c               |  10 +-
 drivers/serial/uartlite.c               |   6 +-
 drivers/serial/ucc_uart.c               |   4 +-
 drivers/serial/vr41xx_siu.c             |   6 +-
 drivers/serial/zs.c                     |   6 +-
 include/linux/serial_core.h             |  64 ++++---
 50 files changed, 422 insertions(+), 432 deletions(-)

(limited to 'include')

diff --git a/arch/mn10300/kernel/asm-offsets.c b/arch/mn10300/kernel/asm-offsets.c
index 2646fcbd7d89..82b40079ad76 100644
--- a/arch/mn10300/kernel/asm-offsets.c
+++ b/arch/mn10300/kernel/asm-offsets.c
@@ -95,7 +95,7 @@ void foo(void)
 	OFFSET(__iobase,		mn10300_serial_port, _iobase);
 
 	DEFINE(__UART_XMIT_SIZE,	UART_XMIT_SIZE);
-	OFFSET(__xmit_buffer,		uart_info, xmit.buf);
-	OFFSET(__xmit_head,		uart_info, xmit.head);
-	OFFSET(__xmit_tail,		uart_info, xmit.tail);
+	OFFSET(__xmit_buffer,		uart_state, xmit.buf);
+	OFFSET(__xmit_head,		uart_state, xmit.head);
+	OFFSET(__xmit_tail,		uart_state, xmit.tail);
 }
diff --git a/drivers/serial/21285.c b/drivers/serial/21285.c
index cb6d85d7ff43..1e3d19397a59 100644
--- a/drivers/serial/21285.c
+++ b/drivers/serial/21285.c
@@ -86,7 +86,7 @@ static void serial21285_enable_ms(struct uart_port *port)
 static irqreturn_t serial21285_rx_chars(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned int status, ch, flag, rxs, max_count = 256;
 
 	status = *CSR_UARTFLG;
@@ -124,7 +124,7 @@ static irqreturn_t serial21285_rx_chars(int irq, void *dev_id)
 static irqreturn_t serial21285_tx_chars(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	int count = 256;
 
 	if (port->x_char) {
@@ -235,8 +235,8 @@ serial21285_set_termios(struct uart_port *port, struct ktermios *termios,
 	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16); 
 	quot = uart_get_divisor(port, baud);
 
-	if (port->info && port->info->port.tty) {
-		struct tty_struct *tty = port->info->port.tty;
+	if (port->state && port->state->port.tty) {
+		struct tty_struct *tty = port->state->port.tty;
 		unsigned int b = port->uartclk / (16 * quot);
 		tty_encode_baud_rate(tty, b, b);
 	}
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 1fd4894d9b51..e415c5eca599 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1382,7 +1382,7 @@ static void serial8250_enable_ms(struct uart_port *port)
 static void
 receive_chars(struct uart_8250_port *up, unsigned int *status)
 {
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned char ch, lsr = *status;
 	int max_count = 256;
 	char flag;
@@ -1457,7 +1457,7 @@ ignore_char:
 
 static void transmit_chars(struct uart_8250_port *up)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int count;
 
 	if (up->port.x_char) {
@@ -1500,7 +1500,7 @@ static unsigned int check_modem_status(struct uart_8250_port *up)
 	status |= up->msr_saved_flags;
 	up->msr_saved_flags = 0;
 	if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI &&
-	    up->port.info != NULL) {
+	    up->port.state != NULL) {
 		if (status & UART_MSR_TERI)
 			up->port.icount.rng++;
 		if (status & UART_MSR_DDSR)
@@ -1510,7 +1510,7 @@ static unsigned int check_modem_status(struct uart_8250_port *up)
 		if (status & UART_MSR_DCTS)
 			uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
 
-		wake_up_interruptible(&up->port.info->delta_msr_wait);
+		wake_up_interruptible(&up->port.state->delta_msr_wait);
 	}
 
 	return status;
@@ -1764,7 +1764,7 @@ static void serial8250_backup_timeout(unsigned long data)
 	up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
 	spin_unlock_irqrestore(&up->port.lock, flags);
 	if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) &&
-	    (!uart_circ_empty(&up->port.info->xmit) || up->port.x_char) &&
+	    (!uart_circ_empty(&up->port.state->xmit) || up->port.x_char) &&
 	    (lsr & UART_LSR_THRE)) {
 		iir &= ~(UART_IIR_ID | UART_IIR_NO_INT);
 		iir |= UART_IIR_THRI;
diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c
index 58a4879c7e48..39032413d4a1 100644
--- a/drivers/serial/amba-pl010.c
+++ b/drivers/serial/amba-pl010.c
@@ -117,7 +117,7 @@ static void pl010_enable_ms(struct uart_port *port)
 
 static void pl010_rx_chars(struct uart_amba_port *uap)
 {
-	struct tty_struct *tty = uap->port.info->port.tty;
+	struct tty_struct *tty = uap->port.state->port.tty;
 	unsigned int status, ch, flag, rsr, max_count = 256;
 
 	status = readb(uap->port.membase + UART01x_FR);
@@ -172,7 +172,7 @@ static void pl010_rx_chars(struct uart_amba_port *uap)
 
 static void pl010_tx_chars(struct uart_amba_port *uap)
 {
-	struct circ_buf *xmit = &uap->port.info->xmit;
+	struct circ_buf *xmit = &uap->port.state->xmit;
 	int count;
 
 	if (uap->port.x_char) {
@@ -225,7 +225,7 @@ static void pl010_modem_status(struct uart_amba_port *uap)
 	if (delta & UART01x_FR_CTS)
 		uart_handle_cts_change(&uap->port, status & UART01x_FR_CTS);
 
-	wake_up_interruptible(&uap->port.info->delta_msr_wait);
+	wake_up_interruptible(&uap->port.state->delta_msr_wait);
 }
 
 static irqreturn_t pl010_int(int irq, void *dev_id)
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 72ba0c6d3551..ef82a34baf0f 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -124,7 +124,7 @@ static void pl011_enable_ms(struct uart_port *port)
 
 static void pl011_rx_chars(struct uart_amba_port *uap)
 {
-	struct tty_struct *tty = uap->port.info->port.tty;
+	struct tty_struct *tty = uap->port.state->port.tty;
 	unsigned int status, ch, flag, max_count = 256;
 
 	status = readw(uap->port.membase + UART01x_FR);
@@ -175,7 +175,7 @@ static void pl011_rx_chars(struct uart_amba_port *uap)
 
 static void pl011_tx_chars(struct uart_amba_port *uap)
 {
-	struct circ_buf *xmit = &uap->port.info->xmit;
+	struct circ_buf *xmit = &uap->port.state->xmit;
 	int count;
 
 	if (uap->port.x_char) {
@@ -226,7 +226,7 @@ static void pl011_modem_status(struct uart_amba_port *uap)
 	if (delta & UART01x_FR_CTS)
 		uart_handle_cts_change(&uap->port, status & UART01x_FR_CTS);
 
-	wake_up_interruptible(&uap->port.info->delta_msr_wait);
+	wake_up_interruptible(&uap->port.state->delta_msr_wait);
 }
 
 static irqreturn_t pl011_int(int irq, void *dev_id)
diff --git a/drivers/serial/atmel_serial.c b/drivers/serial/atmel_serial.c
index 607d43a31048..963e3c12af41 100644
--- a/drivers/serial/atmel_serial.c
+++ b/drivers/serial/atmel_serial.c
@@ -427,7 +427,7 @@ static void atmel_rx_chars(struct uart_port *port)
  */
 static void atmel_tx_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	if (port->x_char && UART_GET_CSR(port) & ATMEL_US_TXRDY) {
 		UART_PUT_CHAR(port, port->x_char);
@@ -560,7 +560,7 @@ static irqreturn_t atmel_interrupt(int irq, void *dev_id)
 static void atmel_tx_dma(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	struct atmel_dma_buffer *pdc = &atmel_port->pdc_tx;
 	int count;
 
@@ -663,14 +663,14 @@ static void atmel_rx_from_ring(struct uart_port *port)
 	 * uart_start(), which takes the lock.
 	 */
 	spin_unlock(&port->lock);
-	tty_flip_buffer_push(port->info->port.tty);
+	tty_flip_buffer_push(port->state->port.tty);
 	spin_lock(&port->lock);
 }
 
 static void atmel_rx_from_dma(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	struct atmel_dma_buffer *pdc;
 	int rx_idx = atmel_port->pdc_rx_idx;
 	unsigned int head;
@@ -776,7 +776,7 @@ static void atmel_tasklet_func(unsigned long data)
 		if (status_change & ATMEL_US_CTS)
 			uart_handle_cts_change(port, !(status & ATMEL_US_CTS));
 
-		wake_up_interruptible(&port->info->delta_msr_wait);
+		wake_up_interruptible(&port->state->delta_msr_wait);
 
 		atmel_port->irq_status_prev = status;
 	}
@@ -795,7 +795,7 @@ static void atmel_tasklet_func(unsigned long data)
 static int atmel_startup(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	int retval;
 
 	/*
@@ -854,7 +854,7 @@ static int atmel_startup(struct uart_port *port)
 	}
 	if (atmel_use_dma_tx(port)) {
 		struct atmel_dma_buffer *pdc = &atmel_port->pdc_tx;
-		struct circ_buf *xmit = &port->info->xmit;
+		struct circ_buf *xmit = &port->state->xmit;
 
 		pdc->buf = xmit->buf;
 		pdc->dma_addr = dma_map_single(port->dev,
diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index 4fff4e524034..50abb7e557f4 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -144,7 +144,7 @@ static void bfin_serial_stop_tx(struct uart_port *port)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
 #ifdef CONFIG_SERIAL_BFIN_DMA
-	struct circ_buf *xmit = &uart->port.info->xmit;
+	struct circ_buf *xmit = &uart->port.state->xmit;
 #endif
 
 	while (!(UART_GET_LSR(uart) & TEMT))
@@ -171,7 +171,7 @@ static void bfin_serial_stop_tx(struct uart_port *port)
 static void bfin_serial_start_tx(struct uart_port *port)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
-	struct tty_struct *tty = uart->port.info->port.tty;
+	struct tty_struct *tty = uart->port.state->port.tty;
 
 #ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS
 	if (uart->scts && !(bfin_serial_get_mctrl(&uart->port) & TIOCM_CTS)) {
@@ -243,10 +243,10 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
 			return;
 		}
 
-	if (!uart->port.info || !uart->port.info->port.tty)
+	if (!uart->port.state || !uart->port.state->port.tty)
 		return;
 #endif
-	tty = uart->port.info->port.tty;
+	tty = uart->port.state->port.tty;
 
 	if (ANOMALY_05000363) {
 		/* The BF533 (and BF561) family of processors have a nice anomaly
@@ -331,7 +331,7 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
 
 static void bfin_serial_tx_chars(struct bfin_serial_port *uart)
 {
-	struct circ_buf *xmit = &uart->port.info->xmit;
+	struct circ_buf *xmit = &uart->port.state->xmit;
 
 	if (uart_circ_empty(xmit) || uart_tx_stopped(&uart->port)) {
 #ifdef CONFIG_BF54x
@@ -398,7 +398,7 @@ static irqreturn_t bfin_serial_tx_int(int irq, void *dev_id)
 #ifdef CONFIG_SERIAL_BFIN_DMA
 static void bfin_serial_dma_tx_chars(struct bfin_serial_port *uart)
 {
-	struct circ_buf *xmit = &uart->port.info->xmit;
+	struct circ_buf *xmit = &uart->port.state->xmit;
 
 	uart->tx_done = 0;
 
@@ -436,7 +436,7 @@ static void bfin_serial_dma_tx_chars(struct bfin_serial_port *uart)
 
 static void bfin_serial_dma_rx_chars(struct bfin_serial_port *uart)
 {
-	struct tty_struct *tty = uart->port.info->port.tty;
+	struct tty_struct *tty = uart->port.state->port.tty;
 	int i, flg, status;
 
 	status = UART_GET_LSR(uart);
@@ -529,7 +529,7 @@ void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
 static irqreturn_t bfin_serial_dma_tx_int(int irq, void *dev_id)
 {
 	struct bfin_serial_port *uart = dev_id;
-	struct circ_buf *xmit = &uart->port.info->xmit;
+	struct circ_buf *xmit = &uart->port.state->xmit;
 
 #ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS
 	if (uart->scts && !(bfin_serial_get_mctrl(&uart->port)&TIOCM_CTS)) {
@@ -965,10 +965,10 @@ static void bfin_serial_set_ldisc(struct uart_port *port)
 	int line = port->line;
 	unsigned short val;
 
-	if (line >= port->info->port.tty->driver->num)
+	if (line >= port->state->port.tty->driver->num)
 		return;
 
-	switch (port->info->port.tty->termios->c_line) {
+	switch (port->state->port.tty->termios->c_line) {
 	case N_IRDA:
 		val = UART_GET_GCTL(&bfin_serial_ports[line]);
 		val |= (IREN | RPOLC);
diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c
index c108b1a0ce98..088bb35475f1 100644
--- a/drivers/serial/bfin_sport_uart.c
+++ b/drivers/serial/bfin_sport_uart.c
@@ -178,7 +178,7 @@ static int sport_uart_setup(struct sport_uart_port *up, int sclk, int baud_rate)
 static irqreturn_t sport_uart_rx_irq(int irq, void *dev_id)
 {
 	struct sport_uart_port *up = dev_id;
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned int ch;
 
 	do {
@@ -205,7 +205,7 @@ static irqreturn_t sport_uart_tx_irq(int irq, void *dev_id)
 static irqreturn_t sport_uart_err_irq(int irq, void *dev_id)
 {
 	struct sport_uart_port *up = dev_id;
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned int stat = SPORT_GET_STAT(up);
 
 	/* Overflow in RX FIFO */
@@ -290,7 +290,7 @@ fail1:
 
 static void sport_uart_tx_chars(struct sport_uart_port *up)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 
 	if (SPORT_GET_STAT(up) & TXF)
 		return;
diff --git a/drivers/serial/clps711x.c b/drivers/serial/clps711x.c
index 80e76426131d..b6acd19b458e 100644
--- a/drivers/serial/clps711x.c
+++ b/drivers/serial/clps711x.c
@@ -93,7 +93,7 @@ static void clps711xuart_enable_ms(struct uart_port *port)
 static irqreturn_t clps711xuart_int_rx(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned int status, ch, flg;
 
 	status = clps_readl(SYSFLG(port));
@@ -147,7 +147,7 @@ static irqreturn_t clps711xuart_int_rx(int irq, void *dev_id)
 static irqreturn_t clps711xuart_int_tx(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	int count;
 
 	if (port->x_char) {
diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
index f8df0681e160..8d349b23249a 100644
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -244,7 +244,7 @@ static void cpm_uart_int_rx(struct uart_port *port)
 	int i;
 	unsigned char ch;
 	u8 *cp;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port;
 	cbd_t __iomem *bdp;
 	u16 status;
diff --git a/drivers/serial/dz.c b/drivers/serial/dz.c
index 6042b87797a1..57421d776329 100644
--- a/drivers/serial/dz.c
+++ b/drivers/serial/dz.c
@@ -197,7 +197,7 @@ static inline void dz_receive_chars(struct dz_mux *mux)
 	while ((status = dz_in(dport, DZ_RBUF)) & DZ_DVAL) {
 		dport = &mux->dport[LINE(status)];
 		uport = &dport->port;
-		tty = uport->info->port.tty;	/* point to the proper dev */
+		tty = uport->state->port.tty;	/* point to the proper dev */
 
 		ch = UCHAR(status);		/* grab the char */
 		flag = TTY_NORMAL;
@@ -249,7 +249,7 @@ static inline void dz_receive_chars(struct dz_mux *mux)
 	}
 	for (i = 0; i < DZ_NB_PORT; i++)
 		if (lines_rx[i])
-			tty_flip_buffer_push(mux->dport[i].port.info->port.tty);
+			tty_flip_buffer_push(mux->dport[i].port.state->port.tty);
 }
 
 /*
@@ -268,7 +268,7 @@ static inline void dz_transmit_chars(struct dz_mux *mux)
 
 	status = dz_in(dport, DZ_CSR);
 	dport = &mux->dport[LINE(status)];
-	xmit = &dport->port.info->xmit;
+	xmit = &dport->port.state->xmit;
 
 	if (dport->port.x_char) {		/* XON/XOFF chars */
 		dz_out(dport, DZ_TDR, dport->port.x_char);
diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c
index 060f4e3d54c5..f86c47e08a06 100644
--- a/drivers/serial/icom.c
+++ b/drivers/serial/icom.c
@@ -627,7 +627,7 @@ static int icom_write(struct uart_port *port)
 	unsigned long data_count;
 	unsigned char cmdReg;
 	unsigned long offset;
-	int temp_tail = port->info->xmit.tail;
+	int temp_tail = port->state->xmit.tail;
 
 	trace(ICOM_PORT, "WRITE", 0);
 
@@ -638,11 +638,11 @@ static int icom_write(struct uart_port *port)
 	}
 
 	data_count = 0;
-	while ((port->info->xmit.head != temp_tail) &&
+	while ((port->state->xmit.head != temp_tail) &&
 	       (data_count <= XMIT_BUFF_SZ)) {
 
 		ICOM_PORT->xmit_buf[data_count++] =
-		    port->info->xmit.buf[temp_tail];
+		    port->state->xmit.buf[temp_tail];
 
 		temp_tail++;
 		temp_tail &= (UART_XMIT_SIZE - 1);
@@ -694,7 +694,7 @@ static inline void check_modem_status(struct icom_port *icom_port)
 			uart_handle_cts_change(&icom_port->uart_port,
 					       delta_status & ICOM_CTS);
 
-		wake_up_interruptible(&icom_port->uart_port.info->
+		wake_up_interruptible(&icom_port->uart_port.state->
 				      delta_msr_wait);
 		old_status = status;
 	}
@@ -718,10 +718,10 @@ static void xmit_interrupt(u16 port_int_reg, struct icom_port *icom_port)
 		icom_port->uart_port.icount.tx += count;
 
 		for (i=0; i<count &&
-			!uart_circ_empty(&icom_port->uart_port.info->xmit); i++) {
+			!uart_circ_empty(&icom_port->uart_port.state->xmit); i++) {
 
-			icom_port->uart_port.info->xmit.tail++;
-			icom_port->uart_port.info->xmit.tail &=
+			icom_port->uart_port.state->xmit.tail++;
+			icom_port->uart_port.state->xmit.tail &=
 				(UART_XMIT_SIZE - 1);
 		}
 
@@ -735,7 +735,7 @@ static void xmit_interrupt(u16 port_int_reg, struct icom_port *icom_port)
 static void recv_interrupt(u16 port_int_reg, struct icom_port *icom_port)
 {
 	short int count, rcv_buff;
-	struct tty_struct *tty = icom_port->uart_port.info->port.tty;
+	struct tty_struct *tty = icom_port->uart_port.state->port.tty;
 	unsigned short int status;
 	struct uart_icount *icount;
 	unsigned long offset;
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 7485afd0df4c..1febeafcb97a 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -224,7 +224,7 @@ static void imx_mctrl_check(struct imx_port *sport)
 	if (changed & TIOCM_CTS)
 		uart_handle_cts_change(&sport->port, status & TIOCM_CTS);
 
-	wake_up_interruptible(&sport->port.info->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->delta_msr_wait);
 }
 
 /*
@@ -236,7 +236,7 @@ static void imx_timeout(unsigned long data)
 	struct imx_port *sport = (struct imx_port *)data;
 	unsigned long flags;
 
-	if (sport->port.info) {
+	if (sport->port.state) {
 		spin_lock_irqsave(&sport->port.lock, flags);
 		imx_mctrl_check(sport);
 		spin_unlock_irqrestore(&sport->port.lock, flags);
@@ -323,7 +323,7 @@ static void imx_enable_ms(struct uart_port *port)
 
 static inline void imx_transmit_buffer(struct imx_port *sport)
 {
-	struct circ_buf *xmit = &sport->port.info->xmit;
+	struct circ_buf *xmit = &sport->port.state->xmit;
 
 	while (!(readl(sport->port.membase + UTS) & UTS_TXFULL)) {
 		/* send xmit->buf[xmit->tail]
@@ -388,7 +388,7 @@ static irqreturn_t imx_rtsint(int irq, void *dev_id)
 
 	writel(USR1_RTSD, sport->port.membase + USR1);
 	uart_handle_cts_change(&sport->port, !!val);
-	wake_up_interruptible(&sport->port.info->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->delta_msr_wait);
 
 	spin_unlock_irqrestore(&sport->port.lock, flags);
 	return IRQ_HANDLED;
@@ -397,7 +397,7 @@ static irqreturn_t imx_rtsint(int irq, void *dev_id)
 static irqreturn_t imx_txint(int irq, void *dev_id)
 {
 	struct imx_port *sport = dev_id;
-	struct circ_buf *xmit = &sport->port.info->xmit;
+	struct circ_buf *xmit = &sport->port.state->xmit;
 	unsigned long flags;
 
 	spin_lock_irqsave(&sport->port.lock,flags);
@@ -427,7 +427,7 @@ static irqreturn_t imx_rxint(int irq, void *dev_id)
 {
 	struct imx_port *sport = dev_id;
 	unsigned int rx,flg,ignored = 0;
-	struct tty_struct *tty = sport->port.info->port.tty;
+	struct tty_struct *tty = sport->port.state->port.tty;
 	unsigned long flags, temp;
 
 	spin_lock_irqsave(&sport->port.lock,flags);
@@ -900,11 +900,11 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios,
 	rational_best_approximation(16 * div * baud, sport->port.uartclk,
 		1 << 16, 1 << 16, &num, &denom);
 
-	if (port->info && port->info->port.tty) {
+	if (port->state && port->state->port.tty) {
 		tdiv64 = sport->port.uartclk;
 		tdiv64 *= num;
 		do_div(tdiv64, denom * 16 * div);
-		tty_encode_baud_rate(sport->port.info->port.tty,
+		tty_encode_baud_rate(sport->port.state->port.tty,
 				(speed_t)tdiv64, (speed_t)tdiv64);
 	}
 
diff --git a/drivers/serial/ioc3_serial.c b/drivers/serial/ioc3_serial.c
index ae3699d77dd0..de4ab1bfee8d 100644
--- a/drivers/serial/ioc3_serial.c
+++ b/drivers/serial/ioc3_serial.c
@@ -897,25 +897,25 @@ static void transmit_chars(struct uart_port *the_port)
 	char *start;
 	struct tty_struct *tty;
 	struct ioc3_port *port = get_ioc3_port(the_port);
-	struct uart_info *info;
+	struct uart_state *state;
 
 	if (!the_port)
 		return;
 	if (!port)
 		return;
 
-	info = the_port->info;
-	tty = info->port.tty;
+	state = the_port->state;
+	tty = state->port.tty;
 
-	if (uart_circ_empty(&info->xmit) || uart_tx_stopped(the_port)) {
+	if (uart_circ_empty(&state->xmit) || uart_tx_stopped(the_port)) {
 		/* Nothing to do or hw stopped */
 		set_notification(port, N_ALL_OUTPUT, 0);
 		return;
 	}
 
-	head = info->xmit.head;
-	tail = info->xmit.tail;
-	start = (char *)&info->xmit.buf[tail];
+	head = state->xmit.head;
+	tail = state->xmit.tail;
+	start = (char *)&state->xmit.buf[tail];
 
 	/* write out all the data or until the end of the buffer */
 	xmit_count = (head < tail) ? (UART_XMIT_SIZE - tail) : (head - tail);
@@ -928,14 +928,14 @@ static void transmit_chars(struct uart_port *the_port)
 			/* advance the pointers */
 			tail += result;
 			tail &= UART_XMIT_SIZE - 1;
-			info->xmit.tail = tail;
-			start = (char *)&info->xmit.buf[tail];
+			state->xmit.tail = tail;
+			start = (char *)&state->xmit.buf[tail];
 		}
 	}
-	if (uart_circ_chars_pending(&info->xmit) < WAKEUP_CHARS)
+	if (uart_circ_chars_pending(&state->xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(the_port);
 
-	if (uart_circ_empty(&info->xmit)) {
+	if (uart_circ_empty(&state->xmit)) {
 		set_notification(port, N_OUTPUT_LOWAT, 0);
 	} else {
 		set_notification(port, N_OUTPUT_LOWAT, 1);
@@ -956,7 +956,7 @@ ioc3_change_speed(struct uart_port *the_port,
 	unsigned int cflag;
 	int baud;
 	int new_parity = 0, new_parity_enable = 0, new_stop = 0, new_data = 8;
-	struct uart_info *info = the_port->info;
+	struct uart_state *state = the_port->state;
 
 	cflag = new_termios->c_cflag;
 
@@ -997,14 +997,14 @@ ioc3_change_speed(struct uart_port *the_port,
 
 	the_port->ignore_status_mask = N_ALL_INPUT;
 
-	info->port.tty->low_latency = 1;
+	state->port.tty->low_latency = 1;
 
-	if (I_IGNPAR(info->port.tty))
+	if (I_IGNPAR(state->port.tty))
 		the_port->ignore_status_mask &= ~(N_PARITY_ERROR
 						  | N_FRAMING_ERROR);
-	if (I_IGNBRK(info->port.tty)) {
+	if (I_IGNBRK(state->port.tty)) {
 		the_port->ignore_status_mask &= ~N_BREAK;
-		if (I_IGNPAR(info->port.tty))
+		if (I_IGNPAR(state->port.tty))
 			the_port->ignore_status_mask &= ~N_OVERRUN_ERROR;
 	}
 	if (!(cflag & CREAD)) {
@@ -1286,7 +1286,7 @@ static inline int do_read(struct uart_port *the_port, char *buf, int len)
 						uart_handle_dcd_change
 							(port->ip_port, 0);
 						wake_up_interruptible
-						    (&the_port->info->
+						    (&the_port->state->
 						     delta_msr_wait);
 					}
 
@@ -1392,21 +1392,21 @@ static int receive_chars(struct uart_port *the_port)
 	struct tty_struct *tty;
 	unsigned char ch[MAX_CHARS];
 	int read_count = 0, read_room, flip = 0;
-	struct uart_info *info = the_port->info;
+	struct uart_state *state = the_port->state;
 	struct ioc3_port *port = get_ioc3_port(the_port);
 	unsigned long pflags;
 
 	/* Make sure all the pointers are "good" ones */
-	if (!info)
+	if (!state)
 		return 0;
-	if (!info->port.tty)
+	if (!state->port.tty)
 		return 0;
 
 	if (!(port->ip_flags & INPUT_ENABLE))
 		return 0;
 
 	spin_lock_irqsave(&the_port->lock, pflags);
-	tty = info->port.tty;
+	tty = state->port.tty;
 
 	read_count = do_read(the_port, ch, MAX_CHARS);
 	if (read_count > 0) {
@@ -1491,7 +1491,7 @@ ioc3uart_intr_one(struct ioc3_submodule *is,
 				uart_handle_dcd_change(the_port,
 						shadow & SHADOW_DCD);
 				wake_up_interruptible
-				    (&the_port->info->delta_msr_wait);
+				    (&the_port->state->delta_msr_wait);
 			} else if ((port->ip_notify & N_DDCD)
 				   && !(shadow & SHADOW_DCD)) {
 				/* Flag delta DCD/no DCD */
@@ -1511,7 +1511,7 @@ ioc3uart_intr_one(struct ioc3_submodule *is,
 				uart_handle_cts_change(the_port, shadow
 						& SHADOW_CTS);
 				wake_up_interruptible
-				    (&the_port->info->delta_msr_wait);
+				    (&the_port->state->delta_msr_wait);
 			}
 		}
 
@@ -1721,14 +1721,14 @@ static void ic3_shutdown(struct uart_port *the_port)
 {
 	unsigned long port_flags;
 	struct ioc3_port *port;
-	struct uart_info *info;
+	struct uart_state *state;
 
 	port = get_ioc3_port(the_port);
 	if (!port)
 		return;
 
-	info = the_port->info;
-	wake_up_interruptible(&info->delta_msr_wait);
+	state = the_port->state;
+	wake_up_interruptible(&state->delta_msr_wait);
 
 	spin_lock_irqsave(&the_port->lock, port_flags);
 	set_notification(port, N_ALL, 0);
diff --git a/drivers/serial/ioc4_serial.c b/drivers/serial/ioc4_serial.c
index e5c58fe7e745..2055d323f15f 100644
--- a/drivers/serial/ioc4_serial.c
+++ b/drivers/serial/ioc4_serial.c
@@ -1627,25 +1627,25 @@ static void transmit_chars(struct uart_port *the_port)
 	char *start;
 	struct tty_struct *tty;
 	struct ioc4_port *port = get_ioc4_port(the_port, 0);
-	struct uart_info *info;
+	struct uart_state *state;
 
 	if (!the_port)
 		return;
 	if (!port)
 		return;
 
-	info = the_port->info;
-	tty = info->port.tty;
+	state = the_port->state;
+	tty = state->port.tty;
 
-	if (uart_circ_empty(&info->xmit) || uart_tx_stopped(the_port)) {
+	if (uart_circ_empty(&state->xmit) || uart_tx_stopped(the_port)) {
 		/* Nothing to do or hw stopped */
 		set_notification(port, N_ALL_OUTPUT, 0);
 		return;
 	}
 
-	head = info->xmit.head;
-	tail = info->xmit.tail;
-	start = (char *)&info->xmit.buf[tail];
+	head = state->xmit.head;
+	tail = state->xmit.tail;
+	start = (char *)&state->xmit.buf[tail];
 
 	/* write out all the data or until the end of the buffer */
 	xmit_count = (head < tail) ? (UART_XMIT_SIZE - tail) : (head - tail);
@@ -1658,14 +1658,14 @@ static void transmit_chars(struct uart_port *the_port)
 			/* advance the pointers */
 			tail += result;
 			tail &= UART_XMIT_SIZE - 1;
-			info->xmit.tail = tail;
-			start = (char *)&info->xmit.buf[tail];
+			state->xmit.tail = tail;
+			start = (char *)&state->xmit.buf[tail];
 		}
 	}
-	if (uart_circ_chars_pending(&info->xmit) < WAKEUP_CHARS)
+	if (uart_circ_chars_pending(&state->xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(the_port);
 
-	if (uart_circ_empty(&info->xmit)) {
+	if (uart_circ_empty(&state->xmit)) {
 		set_notification(port, N_OUTPUT_LOWAT, 0);
 	} else {
 		set_notification(port, N_OUTPUT_LOWAT, 1);
@@ -1686,7 +1686,7 @@ ioc4_change_speed(struct uart_port *the_port,
 	int baud, bits;
 	unsigned cflag;
 	int new_parity = 0, new_parity_enable = 0, new_stop = 0, new_data = 8;
-	struct uart_info *info = the_port->info;
+	struct uart_state *state = the_port->state;
 
 	cflag = new_termios->c_cflag;
 
@@ -1738,14 +1738,14 @@ ioc4_change_speed(struct uart_port *the_port,
 
 	the_port->ignore_status_mask = N_ALL_INPUT;
 
-	info->port.tty->low_latency = 1;
+	state->port.tty->low_latency = 1;
 
-	if (I_IGNPAR(info->port.tty))
+	if (I_IGNPAR(state->port.tty))
 		the_port->ignore_status_mask &= ~(N_PARITY_ERROR
 						| N_FRAMING_ERROR);
-	if (I_IGNBRK(info->port.tty)) {
+	if (I_IGNBRK(state->port.tty)) {
 		the_port->ignore_status_mask &= ~N_BREAK;
-		if (I_IGNPAR(info->port.tty))
+		if (I_IGNPAR(state->port.tty))
 			the_port->ignore_status_mask &= ~N_OVERRUN_ERROR;
 	}
 	if (!(cflag & CREAD)) {
@@ -1784,7 +1784,7 @@ ioc4_change_speed(struct uart_port *the_port,
 static inline int ic4_startup_local(struct uart_port *the_port)
 {
 	struct ioc4_port *port;
-	struct uart_info *info;
+	struct uart_state *state;
 
 	if (!the_port)
 		return -1;
@@ -1793,7 +1793,7 @@ static inline int ic4_startup_local(struct uart_port *the_port)
 	if (!port)
 		return -1;
 
-	info = the_port->info;
+	state = the_port->state;
 
 	local_open(port);
 
@@ -1801,7 +1801,7 @@ static inline int ic4_startup_local(struct uart_port *the_port)
 	ioc4_set_proto(port, the_port->mapbase);
 
 	/* set the speed of the serial port */
-	ioc4_change_speed(the_port, info->port.tty->termios,
+	ioc4_change_speed(the_port, state->port.tty->termios,
 			  (struct ktermios *)0);
 
 	return 0;
@@ -1882,7 +1882,7 @@ static void handle_intr(void *arg, uint32_t sio_ir)
 				the_port = port->ip_port;
 				the_port->icount.dcd = 1;
 				wake_up_interruptible
-					    (&the_port-> info->delta_msr_wait);
+					    (&the_port->state->delta_msr_wait);
 			} else if ((port->ip_notify & N_DDCD)
 					&& !(shadow & IOC4_SHADOW_DCD)) {
 				/* Flag delta DCD/no DCD */
@@ -1904,7 +1904,7 @@ static void handle_intr(void *arg, uint32_t sio_ir)
 				the_port->icount.cts =
 					(shadow & IOC4_SHADOW_CTS) ? 1 : 0;
 				wake_up_interruptible
-					(&the_port->info->delta_msr_wait);
+					(&the_port->state->delta_msr_wait);
 			}
 		}
 
@@ -2236,7 +2236,7 @@ static inline int do_read(struct uart_port *the_port, unsigned char *buf,
 						   && port->ip_port) {
 						the_port->icount.dcd = 0;
 						wake_up_interruptible
-						    (&the_port->info->
+						    (&the_port->state->
 							delta_msr_wait);
 					}
 
@@ -2341,17 +2341,17 @@ static void receive_chars(struct uart_port *the_port)
 	unsigned char ch[IOC4_MAX_CHARS];
 	int read_count, request_count = IOC4_MAX_CHARS;
 	struct uart_icount *icount;
-	struct uart_info *info = the_port->info;
+	struct uart_state *state = the_port->state;
 	unsigned long pflags;
 
 	/* Make sure all the pointers are "good" ones */
-	if (!info)
+	if (!state)
 		return;
-	if (!info->port.tty)
+	if (!state->port.tty)
 		return;
 
 	spin_lock_irqsave(&the_port->lock, pflags);
-	tty = info->port.tty;
+	tty = state->port.tty;
 
 	request_count = tty_buffer_request_room(tty, IOC4_MAX_CHARS);
 
@@ -2430,19 +2430,19 @@ static void ic4_shutdown(struct uart_port *the_port)
 {
 	unsigned long port_flags;
 	struct ioc4_port *port;
-	struct uart_info *info;
+	struct uart_state *state;
 
 	port = get_ioc4_port(the_port, 0);
 	if (!port)
 		return;
 
-	info = the_port->info;
+	state = the_port->state;
 	port->ip_port = NULL;
 
-	wake_up_interruptible(&info->delta_msr_wait);
+	wake_up_interruptible(&state->delta_msr_wait);
 
-	if (info->port.tty)
-		set_bit(TTY_IO_ERROR, &info->port.tty->flags);
+	if (state->port.tty)
+		set_bit(TTY_IO_ERROR, &state->port.tty->flags);
 
 	spin_lock_irqsave(&the_port->lock, port_flags);
 	set_notification(port, N_ALL, 0);
@@ -2538,7 +2538,7 @@ static int ic4_startup(struct uart_port *the_port)
 	int retval;
 	struct ioc4_port *port;
 	struct ioc4_control *control;
-	struct uart_info *info;
+	struct uart_state *state;
 	unsigned long port_flags;
 
 	if (!the_port)
@@ -2546,7 +2546,7 @@ static int ic4_startup(struct uart_port *the_port)
 	port = get_ioc4_port(the_port, 1);
 	if (!port)
 		return -ENODEV;
-	info = the_port->info;
+	state = the_port->state;
 
 	control = port->ip_control;
 	if (!control) {
diff --git a/drivers/serial/ip22zilog.c b/drivers/serial/ip22zilog.c
index 0d9acbd0bb70..2e847deb41dc 100644
--- a/drivers/serial/ip22zilog.c
+++ b/drivers/serial/ip22zilog.c
@@ -256,9 +256,9 @@ static struct tty_struct *ip22zilog_receive_chars(struct uart_ip22zilog_port *up
 	unsigned int r1;
 
 	tty = NULL;
-	if (up->port.info != NULL &&
-	    up->port.info->port.tty != NULL)
-		tty = up->port.info->port.tty;
+	if (up->port.state != NULL &&
+	    up->port.state->port.tty != NULL)
+		tty = up->port.state->port.tty;
 
 	for (;;) {
 		ch = readb(&channel->control);
@@ -354,7 +354,7 @@ static void ip22zilog_status_handle(struct uart_ip22zilog_port *up,
 			uart_handle_cts_change(&up->port,
 					       (status & CTS));
 
-		wake_up_interruptible(&up->port.info->delta_msr_wait);
+		wake_up_interruptible(&up->port.state->delta_msr_wait);
 	}
 
 	up->prev_status = status;
@@ -404,9 +404,9 @@ static void ip22zilog_transmit_chars(struct uart_ip22zilog_port *up,
 		return;
 	}
 
-	if (up->port.info == NULL)
+	if (up->port.state == NULL)
 		goto ack_tx_int;
-	xmit = &up->port.info->xmit;
+	xmit = &up->port.state->xmit;
 	if (uart_circ_empty(xmit))
 		goto ack_tx_int;
 	if (uart_tx_stopped(&up->port))
@@ -607,7 +607,7 @@ static void ip22zilog_start_tx(struct uart_port *port)
 		port->icount.tx++;
 		port->x_char = 0;
 	} else {
-		struct circ_buf *xmit = &port->info->xmit;
+		struct circ_buf *xmit = &port->state->xmit;
 
 		writeb(xmit->buf[xmit->tail], &channel->data);
 		ZSDELAY();
diff --git a/drivers/serial/jsm/jsm_neo.c b/drivers/serial/jsm/jsm_neo.c
index 9dadaa11d266..b4b124e4828f 100644
--- a/drivers/serial/jsm/jsm_neo.c
+++ b/drivers/serial/jsm/jsm_neo.c
@@ -989,7 +989,7 @@ static void neo_param(struct jsm_channel *ch)
 			{     50, B50     },
 		};
 
-		cflag = C_BAUD(ch->uart_port.info->port.tty);
+		cflag = C_BAUD(ch->uart_port.state->port.tty);
 		baud = 9600;
 		for (i = 0; i < ARRAY_SIZE(baud_rates); i++) {
 			if (baud_rates[i].cflag == cflag) {
diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c
index 00f4577d2f7f..7439c0373620 100644
--- a/drivers/serial/jsm/jsm_tty.c
+++ b/drivers/serial/jsm/jsm_tty.c
@@ -147,7 +147,7 @@ static void jsm_tty_send_xchar(struct uart_port *port, char ch)
 	struct ktermios *termios;
 
 	spin_lock_irqsave(&port->lock, lock_flags);
-	termios = port->info->port.tty->termios;
+	termios = port->state->port.tty->termios;
 	if (ch == termios->c_cc[VSTART])
 		channel->ch_bd->bd_ops->send_start_character(channel);
 
@@ -245,7 +245,7 @@ static int jsm_tty_open(struct uart_port *port)
 	channel->ch_cached_lsr = 0;
 	channel->ch_stops_sent = 0;
 
-	termios = port->info->port.tty->termios;
+	termios = port->state->port.tty->termios;
 	channel->ch_c_cflag	= termios->c_cflag;
 	channel->ch_c_iflag	= termios->c_iflag;
 	channel->ch_c_oflag	= termios->c_oflag;
@@ -278,7 +278,7 @@ static void jsm_tty_close(struct uart_port *port)
 	jsm_printk(CLOSE, INFO, &channel->ch_bd->pci_dev, "start\n");
 
 	bd = channel->ch_bd;
-	ts = port->info->port.tty->termios;
+	ts = port->state->port.tty->termios;
 
 	channel->ch_flags &= ~(CH_STOPI);
 
@@ -530,7 +530,7 @@ void jsm_input(struct jsm_channel *ch)
 	if (!ch)
 		return;
 
-	tp = ch->uart_port.info->port.tty;
+	tp = ch->uart_port.state->port.tty;
 
 	bd = ch->ch_bd;
 	if(!bd)
@@ -849,7 +849,7 @@ int jsm_tty_write(struct uart_port *port)
 	u16 tail;
 	u16 tmask;
 	u32 remain;
-	int temp_tail = port->info->xmit.tail;
+	int temp_tail = port->state->xmit.tail;
 	struct jsm_channel *channel = (struct jsm_channel *)port;
 
 	tmask = WQUEUEMASK;
@@ -865,10 +865,10 @@ int jsm_tty_write(struct uart_port *port)
 	data_count = 0;
 	if (bufcount >= remain) {
 		bufcount -= remain;
-		while ((port->info->xmit.head != temp_tail) &&
+		while ((port->state->xmit.head != temp_tail) &&
 		(data_count < remain)) {
 			channel->ch_wqueue[head++] =
-			port->info->xmit.buf[temp_tail];
+			port->state->xmit.buf[temp_tail];
 
 			temp_tail++;
 			temp_tail &= (UART_XMIT_SIZE - 1);
@@ -880,10 +880,10 @@ int jsm_tty_write(struct uart_port *port)
 	data_count1 = 0;
 	if (bufcount > 0) {
 		remain = bufcount;
-		while ((port->info->xmit.head != temp_tail) &&
+		while ((port->state->xmit.head != temp_tail) &&
 			(data_count1 < remain)) {
 			channel->ch_wqueue[head++] =
-				port->info->xmit.buf[temp_tail];
+				port->state->xmit.buf[temp_tail];
 
 			temp_tail++;
 			temp_tail &= (UART_XMIT_SIZE - 1);
@@ -892,7 +892,7 @@ int jsm_tty_write(struct uart_port *port)
 		}
 	}
 
-	port->info->xmit.tail = temp_tail;
+	port->state->xmit.tail = temp_tail;
 
 	data_count += data_count1;
 	if (data_count) {
diff --git a/drivers/serial/m32r_sio.c b/drivers/serial/m32r_sio.c
index 611c97a15654..bea5c215460c 100644
--- a/drivers/serial/m32r_sio.c
+++ b/drivers/serial/m32r_sio.c
@@ -286,7 +286,7 @@ static void m32r_sio_start_tx(struct uart_port *port)
 {
 #ifdef CONFIG_SERIAL_M32R_PLDSIO
 	struct uart_sio_port *up = (struct uart_sio_port *)port;
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 
 	if (!(up->ier & UART_IER_THRI)) {
 		up->ier |= UART_IER_THRI;
@@ -325,7 +325,7 @@ static void m32r_sio_enable_ms(struct uart_port *port)
 
 static void receive_chars(struct uart_sio_port *up, int *status)
 {
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned char ch;
 	unsigned char flag;
 	int max_count = 256;
@@ -398,7 +398,7 @@ static void receive_chars(struct uart_sio_port *up, int *status)
 
 static void transmit_chars(struct uart_sio_port *up)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int count;
 
 	if (up->port.x_char) {
diff --git a/drivers/serial/max3100.c b/drivers/serial/max3100.c
index 9fd33e5622bd..75ab00631c41 100644
--- a/drivers/serial/max3100.c
+++ b/drivers/serial/max3100.c
@@ -184,7 +184,7 @@ static void max3100_timeout(unsigned long data)
 {
 	struct max3100_port *s = (struct max3100_port *)data;
 
-	if (s->port.info) {
+	if (s->port.state) {
 		max3100_dowork(s);
 		mod_timer(&s->timer, jiffies + s->poll_time);
 	}
@@ -261,7 +261,7 @@ static void max3100_work(struct work_struct *w)
 	int rxchars;
 	u16 tx, rx;
 	int conf, cconf, rts, crts;
-	struct circ_buf *xmit = &s->port.info->xmit;
+	struct circ_buf *xmit = &s->port.state->xmit;
 
 	dev_dbg(&s->spi->dev, "%s\n", __func__);
 
@@ -307,8 +307,8 @@ static void max3100_work(struct work_struct *w)
 			}
 		}
 
-		if (rxchars > 16 && s->port.info->port.tty != NULL) {
-			tty_flip_buffer_push(s->port.info->port.tty);
+		if (rxchars > 16 && s->port.state->port.tty != NULL) {
+			tty_flip_buffer_push(s->port.state->port.tty);
 			rxchars = 0;
 		}
 		if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
@@ -320,8 +320,8 @@ static void max3100_work(struct work_struct *w)
 		  (!uart_circ_empty(xmit) &&
 		   !uart_tx_stopped(&s->port))));
 
-	if (rxchars > 0 && s->port.info->port.tty != NULL)
-		tty_flip_buffer_push(s->port.info->port.tty);
+	if (rxchars > 0 && s->port.state->port.tty != NULL)
+		tty_flip_buffer_push(s->port.state->port.tty);
 }
 
 static irqreturn_t max3100_irq(int irqno, void *dev_id)
@@ -429,7 +429,7 @@ max3100_set_termios(struct uart_port *port, struct ktermios *termios,
 	int baud = 0;
 	unsigned cflag;
 	u32 param_new, param_mask, parity = 0;
-	struct tty_struct *tty = s->port.info->port.tty;
+	struct tty_struct *tty = s->port.state->port.tty;
 
 	dev_dbg(&s->spi->dev, "%s\n", __func__);
 	if (!tty)
@@ -529,7 +529,7 @@ max3100_set_termios(struct uart_port *port, struct ktermios *termios,
 			MAX3100_STATUS_OE;
 
 	/* we are sending char from a workqueue so enable */
-	s->port.info->port.tty->low_latency = 1;
+	s->port.state->port.tty->low_latency = 1;
 
 	if (s->poll_time > 0)
 		del_timer_sync(&s->timer);
diff --git a/drivers/serial/mcf.c b/drivers/serial/mcf.c
index 0eefb07bebaf..b44382442bf1 100644
--- a/drivers/serial/mcf.c
+++ b/drivers/serial/mcf.c
@@ -323,7 +323,7 @@ static void mcf_rx_chars(struct mcf_uart *pp)
 		uart_insert_char(port, status, MCFUART_USR_RXOVERRUN, ch, flag);
 	}
 
-	tty_flip_buffer_push(port->info->port.tty);
+	tty_flip_buffer_push(port->state->port.tty);
 }
 
 /****************************************************************************/
@@ -331,7 +331,7 @@ static void mcf_rx_chars(struct mcf_uart *pp)
 static void mcf_tx_chars(struct mcf_uart *pp)
 {
 	struct uart_port *port = &pp->port;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	if (port->x_char) {
 		/* Send special char - probably flow control */
diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
index abbd146c50d9..d7bcd074d383 100644
--- a/drivers/serial/mpc52xx_uart.c
+++ b/drivers/serial/mpc52xx_uart.c
@@ -745,7 +745,7 @@ static struct uart_ops mpc52xx_uart_ops = {
 static inline int
 mpc52xx_uart_int_rx_chars(struct uart_port *port)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned char ch, flag;
 	unsigned short status;
 
@@ -812,7 +812,7 @@ mpc52xx_uart_int_rx_chars(struct uart_port *port)
 static inline int
 mpc52xx_uart_int_tx_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	/* Process out of band chars */
 	if (port->x_char) {
diff --git a/drivers/serial/mpsc.c b/drivers/serial/mpsc.c
index 61d3ade5286c..b5496c28e60b 100644
--- a/drivers/serial/mpsc.c
+++ b/drivers/serial/mpsc.c
@@ -936,7 +936,7 @@ static int serial_polled;
 static int mpsc_rx_intr(struct mpsc_port_info *pi)
 {
 	struct mpsc_rx_desc *rxre;
-	struct tty_struct *tty = pi->port.info->port.tty;
+	struct tty_struct *tty = pi->port.state->port.tty;
 	u32	cmdstat, bytes_in, i;
 	int	rc = 0;
 	u8	*bp;
@@ -1109,7 +1109,7 @@ static void mpsc_setup_tx_desc(struct mpsc_port_info *pi, u32 count, u32 intr)
 
 static void mpsc_copy_tx_data(struct mpsc_port_info *pi)
 {
-	struct circ_buf *xmit = &pi->port.info->xmit;
+	struct circ_buf *xmit = &pi->port.state->xmit;
 	u8 *bp;
 	u32 i;
 
diff --git a/drivers/serial/msm_serial.c b/drivers/serial/msm_serial.c
index f7c24baa1416..ff18d50c99c1 100644
--- a/drivers/serial/msm_serial.c
+++ b/drivers/serial/msm_serial.c
@@ -88,7 +88,7 @@ static void msm_enable_ms(struct uart_port *port)
 
 static void handle_rx(struct uart_port *port)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned int sr;
 
 	/*
@@ -136,7 +136,7 @@ static void handle_rx(struct uart_port *port)
 
 static void handle_tx(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	struct msm_port *msm_port = UART_TO_MSM(port);
 	int sent_tx;
 
@@ -169,7 +169,7 @@ static void handle_delta_cts(struct uart_port *port)
 {
 	msm_write(port, UART_CR_CMD_RESET_CTS, UART_CR);
 	port->icount.cts++;
-	wake_up_interruptible(&port->info->delta_msr_wait);
+	wake_up_interruptible(&port->state->delta_msr_wait);
 }
 
 static irqreturn_t msm_irq(int irq, void *dev_id)
diff --git a/drivers/serial/mux.c b/drivers/serial/mux.c
index 953a5ffa9b44..7571aaa138b0 100644
--- a/drivers/serial/mux.c
+++ b/drivers/serial/mux.c
@@ -199,7 +199,7 @@ static void mux_break_ctl(struct uart_port *port, int break_state)
 static void mux_write(struct uart_port *port)
 {
 	int count;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	if(port->x_char) {
 		UART_PUT_CHAR(port, port->x_char);
@@ -243,7 +243,7 @@ static void mux_write(struct uart_port *port)
 static void mux_read(struct uart_port *port)
 {
 	int data;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	__u32 start_count = port->icount.rx;
 
 	while(1) {
diff --git a/drivers/serial/netx-serial.c b/drivers/serial/netx-serial.c
index 3e5dda8518b7..7735c9f35fa0 100644
--- a/drivers/serial/netx-serial.c
+++ b/drivers/serial/netx-serial.c
@@ -140,7 +140,7 @@ static void netx_enable_ms(struct uart_port *port)
 
 static inline void netx_transmit_buffer(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	if (port->x_char) {
 		writel(port->x_char, port->membase + UART_DR);
@@ -185,7 +185,7 @@ static unsigned int netx_tx_empty(struct uart_port *port)
 
 static void netx_txint(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
 		netx_stop_tx(port);
@@ -201,7 +201,7 @@ static void netx_txint(struct uart_port *port)
 static void netx_rxint(struct uart_port *port)
 {
 	unsigned char rx, flg, status;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 
 	while (!(readl(port->membase + UART_FR) & FR_RXFE)) {
 		rx = readl(port->membase + UART_DR);
diff --git a/drivers/serial/nwpserial.c b/drivers/serial/nwpserial.c
index 9e150b19d726..e1ab8ec0a4a6 100644
--- a/drivers/serial/nwpserial.c
+++ b/drivers/serial/nwpserial.c
@@ -126,7 +126,7 @@ static void nwpserial_config_port(struct uart_port *port, int flags)
 static irqreturn_t nwpserial_interrupt(int irq, void *dev_id)
 {
 	struct nwpserial_port *up = dev_id;
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	irqreturn_t ret;
 	unsigned int iir;
 	unsigned char ch;
@@ -261,7 +261,7 @@ static void nwpserial_start_tx(struct uart_port *port)
 	struct nwpserial_port *up;
 	struct circ_buf *xmit;
 	up = container_of(port, struct nwpserial_port, port);
-	xmit  = &up->port.info->xmit;
+	xmit  = &up->port.state->xmit;
 
 	if (port->x_char) {
 		nwpserial_putchar(up, up->port.x_char);
diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c
index 9c1243fbd512..ab4c85ba3549 100644
--- a/drivers/serial/pmac_zilog.c
+++ b/drivers/serial/pmac_zilog.c
@@ -242,12 +242,12 @@ static struct tty_struct *pmz_receive_chars(struct uart_pmac_port *uap)
 	}
 
 	/* Sanity check, make sure the old bug is no longer happening */
-	if (uap->port.info == NULL || uap->port.info->port.tty == NULL) {
+	if (uap->port.state == NULL || uap->port.state->port.tty == NULL) {
 		WARN_ON(1);
 		(void)read_zsdata(uap);
 		return NULL;
 	}
-	tty = uap->port.info->port.tty;
+	tty = uap->port.state->port.tty;
 
 	while (1) {
 		error = 0;
@@ -369,7 +369,7 @@ static void pmz_status_handle(struct uart_pmac_port *uap)
 			uart_handle_cts_change(&uap->port,
 					       !(status & CTS));
 
-		wake_up_interruptible(&uap->port.info->delta_msr_wait);
+		wake_up_interruptible(&uap->port.state->delta_msr_wait);
 	}
 
 	if (status & BRK_ABRT)
@@ -420,9 +420,9 @@ static void pmz_transmit_chars(struct uart_pmac_port *uap)
 		return;
 	}
 
-	if (uap->port.info == NULL)
+	if (uap->port.state == NULL)
 		goto ack_tx_int;
-	xmit = &uap->port.info->xmit;
+	xmit = &uap->port.state->xmit;
 	if (uart_circ_empty(xmit)) {
 		uart_write_wakeup(&uap->port);
 		goto ack_tx_int;
@@ -655,7 +655,7 @@ static void pmz_start_tx(struct uart_port *port)
 		port->icount.tx++;
 		port->x_char = 0;
 	} else {
-		struct circ_buf *xmit = &port->info->xmit;
+		struct circ_buf *xmit = &port->state->xmit;
 
 		write_zsdata(uap, xmit->buf[xmit->tail]);
 		zssync(uap);
diff --git a/drivers/serial/pnx8xxx_uart.c b/drivers/serial/pnx8xxx_uart.c
index 1bb8f1b45767..2da747635275 100644
--- a/drivers/serial/pnx8xxx_uart.c
+++ b/drivers/serial/pnx8xxx_uart.c
@@ -100,7 +100,7 @@ static void pnx8xxx_mctrl_check(struct pnx8xxx_port *sport)
 	if (changed & TIOCM_CTS)
 		uart_handle_cts_change(&sport->port, status & TIOCM_CTS);
 
-	wake_up_interruptible(&sport->port.info->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->delta_msr_wait);
 }
 
 /*
@@ -112,7 +112,7 @@ static void pnx8xxx_timeout(unsigned long data)
 	struct pnx8xxx_port *sport = (struct pnx8xxx_port *)data;
 	unsigned long flags;
 
-	if (sport->port.info) {
+	if (sport->port.state) {
 		spin_lock_irqsave(&sport->port.lock, flags);
 		pnx8xxx_mctrl_check(sport);
 		spin_unlock_irqrestore(&sport->port.lock, flags);
@@ -181,7 +181,7 @@ static void pnx8xxx_enable_ms(struct uart_port *port)
 
 static void pnx8xxx_rx_chars(struct pnx8xxx_port *sport)
 {
-	struct tty_struct *tty = sport->port.info->port.tty;
+	struct tty_struct *tty = sport->port.state->port.tty;
 	unsigned int status, ch, flg;
 
 	status = FIFO_TO_SM(serial_in(sport, PNX8XXX_FIFO)) |
@@ -243,7 +243,7 @@ static void pnx8xxx_rx_chars(struct pnx8xxx_port *sport)
 
 static void pnx8xxx_tx_chars(struct pnx8xxx_port *sport)
 {
-	struct circ_buf *xmit = &sport->port.info->xmit;
+	struct circ_buf *xmit = &sport->port.state->xmit;
 
 	if (sport->port.x_char) {
 		serial_out(sport, PNX8XXX_FIFO, sport->port.x_char);
diff --git a/drivers/serial/pxa.c b/drivers/serial/pxa.c
index a48a8a13d87b..ad48919c0415 100644
--- a/drivers/serial/pxa.c
+++ b/drivers/serial/pxa.c
@@ -96,7 +96,7 @@ static void serial_pxa_stop_rx(struct uart_port *port)
 
 static inline void receive_chars(struct uart_pxa_port *up, int *status)
 {
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned int ch, flag;
 	int max_count = 256;
 
@@ -161,7 +161,7 @@ static inline void receive_chars(struct uart_pxa_port *up, int *status)
 
 static void transmit_chars(struct uart_pxa_port *up)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int count;
 
 	if (up->port.x_char) {
@@ -220,7 +220,7 @@ static inline void check_modem_status(struct uart_pxa_port *up)
 	if (status & UART_MSR_DCTS)
 		uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
 
-	wake_up_interruptible(&up->port.info->delta_msr_wait);
+	wake_up_interruptible(&up->port.state->delta_msr_wait);
 }
 
 /*
diff --git a/drivers/serial/sa1100.c b/drivers/serial/sa1100.c
index 94530f01521e..61ef3ae24927 100644
--- a/drivers/serial/sa1100.c
+++ b/drivers/serial/sa1100.c
@@ -117,7 +117,7 @@ static void sa1100_mctrl_check(struct sa1100_port *sport)
 	if (changed & TIOCM_CTS)
 		uart_handle_cts_change(&sport->port, status & TIOCM_CTS);
 
-	wake_up_interruptible(&sport->port.info->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->delta_msr_wait);
 }
 
 /*
@@ -129,7 +129,7 @@ static void sa1100_timeout(unsigned long data)
 	struct sa1100_port *sport = (struct sa1100_port *)data;
 	unsigned long flags;
 
-	if (sport->port.info) {
+	if (sport->port.state) {
 		spin_lock_irqsave(&sport->port.lock, flags);
 		sa1100_mctrl_check(sport);
 		spin_unlock_irqrestore(&sport->port.lock, flags);
@@ -189,7 +189,7 @@ static void sa1100_enable_ms(struct uart_port *port)
 static void
 sa1100_rx_chars(struct sa1100_port *sport)
 {
-	struct tty_struct *tty = sport->port.info->port.tty;
+	struct tty_struct *tty = sport->port.state->port.tty;
 	unsigned int status, ch, flg;
 
 	status = UTSR1_TO_SM(UART_GET_UTSR1(sport)) |
@@ -239,7 +239,7 @@ sa1100_rx_chars(struct sa1100_port *sport)
 
 static void sa1100_tx_chars(struct sa1100_port *sport)
 {
-	struct circ_buf *xmit = &sport->port.info->xmit;
+	struct circ_buf *xmit = &sport->port.state->xmit;
 
 	if (sport->port.x_char) {
 		UART_PUT_CHAR(sport, sport->port.x_char);
diff --git a/drivers/serial/samsung.c b/drivers/serial/samsung.c
index c8851a0db63a..1523e8d9ae77 100644
--- a/drivers/serial/samsung.c
+++ b/drivers/serial/samsung.c
@@ -196,7 +196,7 @@ s3c24xx_serial_rx_chars(int irq, void *dev_id)
 {
 	struct s3c24xx_uart_port *ourport = dev_id;
 	struct uart_port *port = &ourport->port;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned int ufcon, ch, flag, ufstat, uerstat;
 	int max_count = 64;
 
@@ -281,7 +281,7 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id)
 {
 	struct s3c24xx_uart_port *ourport = id;
 	struct uart_port *port = &ourport->port;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	int count = 256;
 
 	if (port->x_char) {
@@ -992,10 +992,10 @@ static int s3c24xx_serial_cpufreq_transition(struct notifier_block *nb,
 		struct ktermios *termios;
 		struct tty_struct *tty;
 
-		if (uport->info == NULL)
+		if (uport->state == NULL)
 			goto exit;
 
-		tty = uport->info->port.tty;
+		tty = uport->state->port.tty;
 
 		if (tty == NULL)
 			goto exit;
diff --git a/drivers/serial/sb1250-duart.c b/drivers/serial/sb1250-duart.c
index 319e8b83f6be..fa5f303b36d3 100644
--- a/drivers/serial/sb1250-duart.c
+++ b/drivers/serial/sb1250-duart.c
@@ -384,13 +384,13 @@ static void sbd_receive_chars(struct sbd_port *sport)
 		uart_insert_char(uport, status, M_DUART_OVRUN_ERR, ch, flag);
 	}
 
-	tty_flip_buffer_push(uport->info->port.tty);
+	tty_flip_buffer_push(uport->state->port.tty);
 }
 
 static void sbd_transmit_chars(struct sbd_port *sport)
 {
 	struct uart_port *uport = &sport->port;
-	struct circ_buf *xmit = &sport->port.info->xmit;
+	struct circ_buf *xmit = &sport->port.state->xmit;
 	unsigned int mask;
 	int stop_tx;
 
@@ -440,7 +440,7 @@ static void sbd_status_handle(struct sbd_port *sport)
 
 	if (delta & ((M_DUART_IN_PIN2_VAL | M_DUART_IN_PIN0_VAL) <<
 		     S_DUART_IN_PIN_CHNG))
-		wake_up_interruptible(&uport->info->delta_msr_wait);
+		wake_up_interruptible(&uport->state->delta_msr_wait);
 }
 
 static irqreturn_t sbd_interrupt(int irq, void *dev_id)
diff --git a/drivers/serial/sc26xx.c b/drivers/serial/sc26xx.c
index e0be11ceaa25..75038ad2b242 100644
--- a/drivers/serial/sc26xx.c
+++ b/drivers/serial/sc26xx.c
@@ -140,8 +140,8 @@ static struct tty_struct *receive_chars(struct uart_port *port)
 	char flag;
 	u8 status;
 
-	if (port->info != NULL)		/* Unopened serial console */
-		tty = port->info->port.tty;
+	if (port->state != NULL)		/* Unopened serial console */
+		tty = port->state->port.tty;
 
 	while (limit-- > 0) {
 		status = READ_SC_PORT(port, SR);
@@ -190,10 +190,10 @@ static void transmit_chars(struct uart_port *port)
 {
 	struct circ_buf *xmit;
 
-	if (!port->info)
+	if (!port->state)
 		return;
 
-	xmit = &port->info->xmit;
+	xmit = &port->state->xmit;
 	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
 		sc26xx_disable_irq(port, IMR_TXRDY);
 		return;
@@ -316,7 +316,7 @@ static void sc26xx_stop_tx(struct uart_port *port)
 /* port->lock held by caller.  */
 static void sc26xx_start_tx(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	while (!uart_circ_empty(xmit)) {
 		if (!(READ_SC_PORT(port, SR) & SR_TXRDY)) {
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index b0bb29d804ae..ea53b6f224b0 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -52,7 +52,7 @@ static struct lock_class_key port_lock_key;
 
 #define HIGH_BITS_OFFSET	((sizeof(long)-sizeof(int))*8)
 
-#define uart_users(state)	((state)->count + (state)->info.port.blocked_open)
+#define uart_users(state)	((state)->count + (state)->port.blocked_open)
 
 #ifdef CONFIG_SERIAL_CORE_CONSOLE
 #define uart_console(port)	((port)->cons && (port)->cons->index == (port)->line)
@@ -71,19 +71,19 @@ static void uart_change_pm(struct uart_state *state, int pm_state);
  */
 void uart_write_wakeup(struct uart_port *port)
 {
-	struct uart_info *info = port->info;
+	struct uart_state *state = port->state;
 	/*
 	 * This means you called this function _after_ the port was
 	 * closed.  No cookie for you.
 	 */
-	BUG_ON(!info);
-	tasklet_schedule(&info->tlet);
+	BUG_ON(!state);
+	tasklet_schedule(&state->tlet);
 }
 
 static void uart_stop(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned long flags;
 
 	spin_lock_irqsave(&port->lock, flags);
@@ -94,9 +94,9 @@ static void uart_stop(struct tty_struct *tty)
 static void __uart_start(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 
-	if (!uart_circ_empty(&state->info.xmit) && state->info.xmit.buf &&
+	if (!uart_circ_empty(&state->xmit) && state->xmit.buf &&
 	    !tty->stopped && !tty->hw_stopped)
 		port->ops->start_tx(port);
 }
@@ -104,7 +104,7 @@ static void __uart_start(struct tty_struct *tty)
 static void uart_start(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned long flags;
 
 	spin_lock_irqsave(&port->lock, flags);
@@ -115,7 +115,7 @@ static void uart_start(struct tty_struct *tty)
 static void uart_tasklet_action(unsigned long data)
 {
 	struct uart_state *state = (struct uart_state *)data;
-	tty_wakeup(state->info.port.tty);
+	tty_wakeup(state->port.tty);
 }
 
 static inline void
@@ -141,12 +141,11 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear)
  */
 static int uart_startup(struct uart_state *state, int init_hw)
 {
-	struct uart_info *info = &state->info;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned long page;
 	int retval = 0;
 
-	if (info->flags & UIF_INITIALIZED)
+	if (state->flags & UIF_INITIALIZED)
 		return 0;
 
 	/*
@@ -154,7 +153,7 @@ static int uart_startup(struct uart_state *state, int init_hw)
 	 * once we have successfully opened the port.  Also set
 	 * up the tty->alt_speed kludge
 	 */
-	set_bit(TTY_IO_ERROR, &info->port.tty->flags);
+	set_bit(TTY_IO_ERROR, &state->port.tty->flags);
 
 	if (port->type == PORT_UNKNOWN)
 		return 0;
@@ -163,14 +162,14 @@ static int uart_startup(struct uart_state *state, int init_hw)
 	 * Initialise and allocate the transmit and temporary
 	 * buffer.
 	 */
-	if (!info->xmit.buf) {
+	if (!state->xmit.buf) {
 		/* This is protected by the per port mutex */
 		page = get_zeroed_page(GFP_KERNEL);
 		if (!page)
 			return -ENOMEM;
 
-		info->xmit.buf = (unsigned char *) page;
-		uart_circ_clear(&info->xmit);
+		state->xmit.buf = (unsigned char *) page;
+		uart_circ_clear(&state->xmit);
 	}
 
 	retval = port->ops->startup(port);
@@ -185,20 +184,20 @@ static int uart_startup(struct uart_state *state, int init_hw)
 			 * Setup the RTS and DTR signals once the
 			 * port is open and ready to respond.
 			 */
-			if (info->port.tty->termios->c_cflag & CBAUD)
+			if (state->port.tty->termios->c_cflag & CBAUD)
 				uart_set_mctrl(port, TIOCM_RTS | TIOCM_DTR);
 		}
 
-		if (info->flags & UIF_CTS_FLOW) {
+		if (state->flags & UIF_CTS_FLOW) {
 			spin_lock_irq(&port->lock);
 			if (!(port->ops->get_mctrl(port) & TIOCM_CTS))
-				info->port.tty->hw_stopped = 1;
+				state->port.tty->hw_stopped = 1;
 			spin_unlock_irq(&port->lock);
 		}
 
-		info->flags |= UIF_INITIALIZED;
+		state->flags |= UIF_INITIALIZED;
 
-		clear_bit(TTY_IO_ERROR, &info->port.tty->flags);
+		clear_bit(TTY_IO_ERROR, &state->port.tty->flags);
 	}
 
 	if (retval && capable(CAP_SYS_ADMIN))
@@ -214,9 +213,8 @@ static int uart_startup(struct uart_state *state, int init_hw)
  */
 static void uart_shutdown(struct uart_state *state)
 {
-	struct uart_info *info = &state->info;
-	struct uart_port *port = state->port;
-	struct tty_struct *tty = info->port.tty;
+	struct uart_port *port = state->uart_port;
+	struct tty_struct *tty = state->port.tty;
 
 	/*
 	 * Set the TTY IO error marker
@@ -224,8 +222,8 @@ static void uart_shutdown(struct uart_state *state)
 	if (tty)
 		set_bit(TTY_IO_ERROR, &tty->flags);
 
-	if (info->flags & UIF_INITIALIZED) {
-		info->flags &= ~UIF_INITIALIZED;
+	if (state->flags & UIF_INITIALIZED) {
+		state->flags &= ~UIF_INITIALIZED;
 
 		/*
 		 * Turn off DTR and RTS early.
@@ -240,7 +238,7 @@ static void uart_shutdown(struct uart_state *state)
 		 * any outstanding file descriptors should be pointing at
 		 * hung_up_tty_fops now.
 		 */
-		wake_up_interruptible(&info->delta_msr_wait);
+		wake_up_interruptible(&state->delta_msr_wait);
 
 		/*
 		 * Free the IRQ and disable the port.
@@ -256,14 +254,14 @@ static void uart_shutdown(struct uart_state *state)
 	/*
 	 * kill off our tasklet
 	 */
-	tasklet_kill(&info->tlet);
+	tasklet_kill(&state->tlet);
 
 	/*
 	 * Free the transmit buffer page.
 	 */
-	if (info->xmit.buf) {
-		free_page((unsigned long)info->xmit.buf);
-		info->xmit.buf = NULL;
+	if (state->xmit.buf) {
+		free_page((unsigned long)state->xmit.buf);
+		state->xmit.buf = NULL;
 	}
 }
 
@@ -430,8 +428,8 @@ EXPORT_SYMBOL(uart_get_divisor);
 static void
 uart_change_speed(struct uart_state *state, struct ktermios *old_termios)
 {
-	struct tty_struct *tty = state->info.port.tty;
-	struct uart_port *port = state->port;
+	struct tty_struct *tty = state->port.tty;
+	struct uart_port *port = state->uart_port;
 	struct ktermios *termios;
 
 	/*
@@ -447,14 +445,14 @@ uart_change_speed(struct uart_state *state, struct ktermios *old_termios)
 	 * Set flags based on termios cflag
 	 */
 	if (termios->c_cflag & CRTSCTS)
-		state->info.flags |= UIF_CTS_FLOW;
+		state->flags |= UIF_CTS_FLOW;
 	else
-		state->info.flags &= ~UIF_CTS_FLOW;
+		state->flags &= ~UIF_CTS_FLOW;
 
 	if (termios->c_cflag & CLOCAL)
-		state->info.flags &= ~UIF_CHECK_CD;
+		state->flags &= ~UIF_CHECK_CD;
 	else
-		state->info.flags |= UIF_CHECK_CD;
+		state->flags |= UIF_CHECK_CD;
 
 	port->ops->set_termios(port, termios, old_termios);
 }
@@ -482,7 +480,7 @@ static int uart_put_char(struct tty_struct *tty, unsigned char ch)
 {
 	struct uart_state *state = tty->driver_data;
 
-	return __uart_put_char(state->port, &state->info.xmit, ch);
+	return __uart_put_char(state->uart_port, &state->xmit, ch);
 }
 
 static void uart_flush_chars(struct tty_struct *tty)
@@ -508,8 +506,8 @@ uart_write(struct tty_struct *tty, const unsigned char *buf, int count)
 		return -EL3HLT;
 	}
 
-	port = state->port;
-	circ = &state->info.xmit;
+	port = state->uart_port;
+	circ = &state->xmit;
 
 	if (!circ->buf)
 		return 0;
@@ -539,9 +537,9 @@ static int uart_write_room(struct tty_struct *tty)
 	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&state->port->lock, flags);
-	ret = uart_circ_chars_free(&state->info.xmit);
-	spin_unlock_irqrestore(&state->port->lock, flags);
+	spin_lock_irqsave(&state->uart_port->lock, flags);
+	ret = uart_circ_chars_free(&state->xmit);
+	spin_unlock_irqrestore(&state->uart_port->lock, flags);
 	return ret;
 }
 
@@ -551,9 +549,9 @@ static int uart_chars_in_buffer(struct tty_struct *tty)
 	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&state->port->lock, flags);
-	ret = uart_circ_chars_pending(&state->info.xmit);
-	spin_unlock_irqrestore(&state->port->lock, flags);
+	spin_lock_irqsave(&state->uart_port->lock, flags);
+	ret = uart_circ_chars_pending(&state->xmit);
+	spin_unlock_irqrestore(&state->uart_port->lock, flags);
 	return ret;
 }
 
@@ -572,11 +570,11 @@ static void uart_flush_buffer(struct tty_struct *tty)
 		return;
 	}
 
-	port = state->port;
+	port = state->uart_port;
 	pr_debug("uart_flush_buffer(%d) called\n", tty->index);
 
 	spin_lock_irqsave(&port->lock, flags);
-	uart_circ_clear(&state->info.xmit);
+	uart_circ_clear(&state->xmit);
 	if (port->ops->flush_buffer)
 		port->ops->flush_buffer(port);
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -590,7 +588,7 @@ static void uart_flush_buffer(struct tty_struct *tty)
 static void uart_send_xchar(struct tty_struct *tty, char ch)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned long flags;
 
 	if (port->ops->send_xchar)
@@ -613,13 +611,13 @@ static void uart_throttle(struct tty_struct *tty)
 		uart_send_xchar(tty, STOP_CHAR(tty));
 
 	if (tty->termios->c_cflag & CRTSCTS)
-		uart_clear_mctrl(state->port, TIOCM_RTS);
+		uart_clear_mctrl(state->uart_port, TIOCM_RTS);
 }
 
 static void uart_unthrottle(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 
 	if (I_IXOFF(tty)) {
 		if (port->x_char)
@@ -635,7 +633,7 @@ static void uart_unthrottle(struct tty_struct *tty)
 static int uart_get_info(struct uart_state *state,
 			 struct serial_struct __user *retinfo)
 {
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	struct serial_struct tmp;
 
 	memset(&tmp, 0, sizeof(tmp));
@@ -674,7 +672,7 @@ static int uart_set_info(struct uart_state *state,
 			 struct serial_struct __user *newinfo)
 {
 	struct serial_struct new_serial;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned long new_port;
 	unsigned int change_irq, change_port, closing_wait;
 	unsigned int old_custom_divisor, close_delay;
@@ -840,15 +838,15 @@ static int uart_set_info(struct uart_state *state,
 	state->closing_wait    = closing_wait;
 	if (new_serial.xmit_fifo_size)
 		port->fifosize = new_serial.xmit_fifo_size;
-	if (state->info.port.tty)
-		state->info.port.tty->low_latency =
+	if (state->port.tty)
+		state->port.tty->low_latency =
 			(port->flags & UPF_LOW_LATENCY) ? 1 : 0;
 
  check_and_exit:
 	retval = 0;
 	if (port->type == PORT_UNKNOWN)
 		goto exit;
-	if (state->info.flags & UIF_INITIALIZED) {
+	if (state->flags & UIF_INITIALIZED) {
 		if (((old_flags ^ port->flags) & UPF_SPD_MASK) ||
 		    old_custom_divisor != port->custom_divisor) {
 			/*
@@ -861,7 +859,7 @@ static int uart_set_info(struct uart_state *state,
 				printk(KERN_NOTICE
 				       "%s sets custom speed on %s. This "
 				       "is deprecated.\n", current->comm,
-				       tty_name(state->info.port.tty, buf));
+				       tty_name(state->port.tty, buf));
 			}
 			uart_change_speed(state, NULL);
 		}
@@ -880,7 +878,7 @@ static int uart_set_info(struct uart_state *state,
 static int uart_get_lsr_info(struct uart_state *state,
 			     unsigned int __user *value)
 {
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned int result;
 
 	result = port->ops->tx_empty(port);
@@ -892,8 +890,8 @@ static int uart_get_lsr_info(struct uart_state *state,
 	 * interrupt happens).
 	 */
 	if (port->x_char ||
-	    ((uart_circ_chars_pending(&state->info.xmit) > 0) &&
-	     !state->info.port.tty->stopped && !state->info.port.tty->hw_stopped))
+	    ((uart_circ_chars_pending(&state->xmit) > 0) &&
+	     !state->port.tty->stopped && !state->port.tty->hw_stopped))
 		result &= ~TIOCSER_TEMT;
 
 	return put_user(result, value);
@@ -902,7 +900,7 @@ static int uart_get_lsr_info(struct uart_state *state,
 static int uart_tiocmget(struct tty_struct *tty, struct file *file)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	int result = -EIO;
 
 	mutex_lock(&state->mutex);
@@ -924,7 +922,7 @@ uart_tiocmset(struct tty_struct *tty, struct file *file,
 	      unsigned int set, unsigned int clear)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	int ret = -EIO;
 
 	mutex_lock(&state->mutex);
@@ -940,7 +938,7 @@ uart_tiocmset(struct tty_struct *tty, struct file *file,
 static int uart_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 
 	mutex_lock(&state->mutex);
 
@@ -953,7 +951,7 @@ static int uart_break_ctl(struct tty_struct *tty, int break_state)
 
 static int uart_do_autoconfig(struct uart_state *state)
 {
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	int flags, ret;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -1003,7 +1001,7 @@ static int uart_do_autoconfig(struct uart_state *state)
 static int
 uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 {
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	DECLARE_WAITQUEUE(wait, current);
 	struct uart_icount cprev, cnow;
 	int ret;
@@ -1020,7 +1018,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 	port->ops->enable_ms(port);
 	spin_unlock_irq(&port->lock);
 
-	add_wait_queue(&state->info.delta_msr_wait, &wait);
+	add_wait_queue(&state->delta_msr_wait, &wait);
 	for (;;) {
 		spin_lock_irq(&port->lock);
 		memcpy(&cnow, &port->icount, sizeof(struct uart_icount));
@@ -1048,7 +1046,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 	}
 
 	current->state = TASK_RUNNING;
-	remove_wait_queue(&state->info.delta_msr_wait, &wait);
+	remove_wait_queue(&state->delta_msr_wait, &wait);
 
 	return ret;
 }
@@ -1064,7 +1062,7 @@ static int uart_get_count(struct uart_state *state,
 {
 	struct serial_icounter_struct icount;
 	struct uart_icount cnow;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 
 	spin_lock_irq(&port->lock);
 	memcpy(&cnow, &port->icount, sizeof(struct uart_icount));
@@ -1160,7 +1158,7 @@ uart_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd,
 		break;
 
 	default: {
-		struct uart_port *port = state->port;
+		struct uart_port *port = state->uart_port;
 		if (port->ops->ioctl)
 			ret = port->ops->ioctl(port, cmd, arg);
 		break;
@@ -1175,7 +1173,7 @@ out:
 static void uart_set_ldisc(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 
 	if (port->ops->set_ldisc)
 		port->ops->set_ldisc(port);
@@ -1207,7 +1205,7 @@ static void uart_set_termios(struct tty_struct *tty,
 
 	/* Handle transition to B0 status */
 	if ((old_termios->c_cflag & CBAUD) && !(cflag & CBAUD))
-		uart_clear_mctrl(state->port, TIOCM_RTS | TIOCM_DTR);
+		uart_clear_mctrl(state->uart_port, TIOCM_RTS | TIOCM_DTR);
 
 	/* Handle transition away from B0 status */
 	if (!(old_termios->c_cflag & CBAUD) && (cflag & CBAUD)) {
@@ -1215,25 +1213,25 @@ static void uart_set_termios(struct tty_struct *tty,
 		if (!(cflag & CRTSCTS) ||
 		    !test_bit(TTY_THROTTLED, &tty->flags))
 			mask |= TIOCM_RTS;
-		uart_set_mctrl(state->port, mask);
+		uart_set_mctrl(state->uart_port, mask);
 	}
 
 	/* Handle turning off CRTSCTS */
 	if ((old_termios->c_cflag & CRTSCTS) && !(cflag & CRTSCTS)) {
-		spin_lock_irqsave(&state->port->lock, flags);
+		spin_lock_irqsave(&state->uart_port->lock, flags);
 		tty->hw_stopped = 0;
 		__uart_start(tty);
-		spin_unlock_irqrestore(&state->port->lock, flags);
+		spin_unlock_irqrestore(&state->uart_port->lock, flags);
 	}
 
 	/* Handle turning on CRTSCTS */
 	if (!(old_termios->c_cflag & CRTSCTS) && (cflag & CRTSCTS)) {
-		spin_lock_irqsave(&state->port->lock, flags);
-		if (!(state->port->ops->get_mctrl(state->port) & TIOCM_CTS)) {
+		spin_lock_irqsave(&state->uart_port->lock, flags);
+		if (!(state->uart_port->ops->get_mctrl(state->uart_port) & TIOCM_CTS)) {
 			tty->hw_stopped = 1;
-			state->port->ops->stop_tx(state->port);
+			state->uart_port->ops->stop_tx(state->uart_port);
 		}
-		spin_unlock_irqrestore(&state->port->lock, flags);
+		spin_unlock_irqrestore(&state->uart_port->lock, flags);
 	}
 #if 0
 	/*
@@ -1244,7 +1242,7 @@ static void uart_set_termios(struct tty_struct *tty,
 	 */
 	if (!(old_termios->c_cflag & CLOCAL) &&
 	    (tty->termios->c_cflag & CLOCAL))
-		wake_up_interruptible(&info->port.open_wait);
+		wake_up_interruptible(&state->uart_port.open_wait);
 #endif
 }
 
@@ -1260,10 +1258,10 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 
 	BUG_ON(!kernel_locked());
 
-	if (!state || !state->port)
+	if (!state || !state->uart_port)
 		return;
 
-	port = state->port;
+	port = state->uart_port;
 
 	pr_debug("uart_close(%d) called\n", port->line);
 
@@ -1306,7 +1304,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	 * At this point, we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts.
 	 */
-	if (state->info.flags & UIF_INITIALIZED) {
+	if (state->flags & UIF_INITIALIZED) {
 		unsigned long flags;
 		spin_lock_irqsave(&port->lock, flags);
 		port->ops->stop_rx(port);
@@ -1325,9 +1323,9 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	tty_ldisc_flush(tty);
 
 	tty->closing = 0;
-	state->info.port.tty = NULL;
+	state->port.tty = NULL;
 
-	if (state->info.port.blocked_open) {
+	if (state->port.blocked_open) {
 		if (state->close_delay)
 			msleep_interruptible(state->close_delay);
 	} else if (!uart_console(port)) {
@@ -1337,8 +1335,8 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Wake up anyone trying to open this port.
 	 */
-	state->info.flags &= ~UIF_NORMAL_ACTIVE;
-	wake_up_interruptible(&state->info.port.open_wait);
+	state->flags &= ~UIF_NORMAL_ACTIVE;
+	wake_up_interruptible(&state->port.open_wait);
 
  done:
 	mutex_unlock(&state->mutex);
@@ -1347,7 +1345,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 static void uart_wait_until_sent(struct tty_struct *tty, int timeout)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned long char_time, expire;
 
 	if (port->type == PORT_UNKNOWN || port->fifosize == 0)
@@ -1412,20 +1410,19 @@ static void uart_wait_until_sent(struct tty_struct *tty, int timeout)
 static void uart_hangup(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_info *info = &state->info;
 
 	BUG_ON(!kernel_locked());
-	pr_debug("uart_hangup(%d)\n", state->port->line);
+	pr_debug("uart_hangup(%d)\n", state->uart_port->line);
 
 	mutex_lock(&state->mutex);
-	if (info->flags & UIF_NORMAL_ACTIVE) {
+	if (state->flags & UIF_NORMAL_ACTIVE) {
 		uart_flush_buffer(tty);
 		uart_shutdown(state);
 		state->count = 0;
-		info->flags &= ~UIF_NORMAL_ACTIVE;
-		info->port.tty = NULL;
-		wake_up_interruptible(&info->port.open_wait);
-		wake_up_interruptible(&info->delta_msr_wait);
+		state->flags &= ~UIF_NORMAL_ACTIVE;
+		state->port.tty = NULL;
+		wake_up_interruptible(&state->port.open_wait);
+		wake_up_interruptible(&state->delta_msr_wait);
 	}
 	mutex_unlock(&state->mutex);
 }
@@ -1438,8 +1435,8 @@ static void uart_hangup(struct tty_struct *tty)
  */
 static void uart_update_termios(struct uart_state *state)
 {
-	struct tty_struct *tty = state->info.port.tty;
-	struct uart_port *port = state->port;
+	struct tty_struct *tty = state->port.tty;
+	struct uart_port *port = state->uart_port;
 
 	if (uart_console(port) && port->cons->cflag) {
 		tty->termios->c_cflag = port->cons->cflag;
@@ -1473,27 +1470,26 @@ static int
 uart_block_til_ready(struct file *filp, struct uart_state *state)
 {
 	DECLARE_WAITQUEUE(wait, current);
-	struct uart_info *info = &state->info;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned int mctrl;
 
-	info->port.blocked_open++;
+	state->port.blocked_open++;
 	state->count--;
 
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&state->port.open_wait, &wait);
 	while (1) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
 		/*
 		 * If we have been hung up, tell userspace/restart open.
 		 */
-		if (tty_hung_up_p(filp) || info->port.tty == NULL)
+		if (tty_hung_up_p(filp) || state->port.tty == NULL)
 			break;
 
 		/*
 		 * If the port has been closed, tell userspace/restart open.
 		 */
-		if (!(info->flags & UIF_INITIALIZED))
+		if (!(state->flags & UIF_INITIALIZED))
 			break;
 
 		/*
@@ -1506,8 +1502,8 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 		 * have set TTY_IO_ERROR for a non-existant port.
 		 */
 		if ((filp->f_flags & O_NONBLOCK) ||
-		    (info->port.tty->termios->c_cflag & CLOCAL) ||
-		    (info->port.tty->flags & (1 << TTY_IO_ERROR)))
+		    (state->port.tty->termios->c_cflag & CLOCAL) ||
+		    (state->port.tty->flags & (1 << TTY_IO_ERROR)))
 			break;
 
 		/*
@@ -1515,7 +1511,7 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 		 * not set RTS here - we want to make sure we catch
 		 * the data from the modem.
 		 */
-		if (info->port.tty->termios->c_cflag & CBAUD)
+		if (state->port.tty->termios->c_cflag & CBAUD)
 			uart_set_mctrl(port, TIOCM_DTR);
 
 		/*
@@ -1537,15 +1533,15 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 			break;
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&state->port.open_wait, &wait);
 
 	state->count++;
-	info->port.blocked_open--;
+	state->port.blocked_open--;
 
 	if (signal_pending(current))
 		return -ERESTARTSYS;
 
-	if (!info->port.tty || tty_hung_up_p(filp))
+	if (!state->port.tty || tty_hung_up_p(filp))
 		return -EAGAIN;
 
 	return 0;
@@ -1563,7 +1559,7 @@ static struct uart_state *uart_get(struct uart_driver *drv, int line)
 	}
 
 	state->count++;
-	if (!state->port || state->port->flags & UPF_DEAD) {
+	if (!state->uart_port || state->uart_port->flags & UPF_DEAD) {
 		ret = -ENXIO;
 		goto err_unlock;
 	}
@@ -1606,10 +1602,11 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 
 	/*
 	 * We take the semaphore inside uart_get to guarantee that we won't
-	 * be re-entered while allocating the info structure, or while we
+	 * be re-entered while allocating the state structure, or while we
 	 * request any IRQs that the driver may need.  This also has the nice
 	 * side-effect that it delays the action of uart_hangup, so we can
-	 * guarantee that info->port.tty will always contain something reasonable.
+	 * guarantee that state->port.tty will always contain something
+	 * reasonable.
 	 */
 	state = uart_get(drv, line);
 	if (IS_ERR(state)) {
@@ -1623,10 +1620,10 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	 * Any failures from here onwards should not touch the count.
 	 */
 	tty->driver_data = state;
-	state->port->info = &state->info;
-	tty->low_latency = (state->port->flags & UPF_LOW_LATENCY) ? 1 : 0;
+	state->uart_port->state = state;
+	tty->low_latency = (state->uart_port->flags & UPF_LOW_LATENCY) ? 1 : 0;
 	tty->alt_speed = 0;
-	state->info.port.tty = tty;
+	state->port.tty = tty;
 
 	/*
 	 * If the port is in the middle of closing, bail out now.
@@ -1659,8 +1656,8 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * If this is the first open to succeed, adjust things to suit.
 	 */
-	if (retval == 0 && !(state->info.flags & UIF_NORMAL_ACTIVE)) {
-		state->info.flags |= UIF_NORMAL_ACTIVE;
+	if (retval == 0 && !(state->flags & UIF_NORMAL_ACTIVE)) {
+		state->flags |= UIF_NORMAL_ACTIVE;
 
 		uart_update_termios(state);
 	}
@@ -1688,7 +1685,7 @@ static void uart_line_info(struct seq_file *m, struct uart_driver *drv, int i)
 {
 	struct uart_state *state = drv->state + i;
 	int pm_state;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	char stat_buf[32];
 	unsigned int status;
 	int mmio;
@@ -1958,7 +1955,7 @@ EXPORT_SYMBOL_GPL(uart_set_options);
 
 static void uart_change_pm(struct uart_state *state, int pm_state)
 {
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 
 	if (state->pm_state != pm_state) {
 		if (port->ops->pm)
@@ -2005,11 +2002,11 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
 	}
 	port->suspended = 1;
 
-	if (state->info.flags & UIF_INITIALIZED) {
+	if (state->flags & UIF_INITIALIZED) {
 		const struct uart_ops *ops = port->ops;
 		int tries;
 
-		state->info.flags = (state->info.flags & ~UIF_INITIALIZED)
+		state->flags = (state->flags & ~UIF_INITIALIZED)
 				     | UIF_SUSPENDED;
 
 		spin_lock_irq(&port->lock);
@@ -2084,15 +2081,15 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 		/*
 		 * If that's unset, use the tty termios setting.
 		 */
-		if (state->info.port.tty && termios.c_cflag == 0)
-			termios = *state->info.port.tty->termios;
+		if (state->port.tty && termios.c_cflag == 0)
+			termios = *state->port.tty->termios;
 
 		uart_change_pm(state, 0);
 		port->ops->set_termios(port, &termios, NULL);
 		console_start(port->cons);
 	}
 
-	if (state->info.flags & UIF_SUSPENDED) {
+	if (state->flags & UIF_SUSPENDED) {
 		const struct uart_ops *ops = port->ops;
 		int ret;
 
@@ -2107,7 +2104,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 			ops->set_mctrl(port, port->mctrl);
 			ops->start_tx(port);
 			spin_unlock_irq(&port->lock);
-			state->info.flags |= UIF_INITIALIZED;
+			state->flags |= UIF_INITIALIZED;
 		} else {
 			/*
 			 * Failed to resume - maybe hardware went away?
@@ -2117,7 +2114,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 			uart_shutdown(state);
 		}
 
-		state->info.flags &= ~UIF_SUSPENDED;
+		state->flags &= ~UIF_SUSPENDED;
 	}
 
 	mutex_unlock(&state->mutex);
@@ -2232,10 +2229,10 @@ static int uart_poll_init(struct tty_driver *driver, int line, char *options)
 	int parity = 'n';
 	int flow = 'n';
 
-	if (!state || !state->port)
+	if (!state || !state->uart_port)
 		return -1;
 
-	port = state->port;
+	port = state->uart_port;
 	if (!(port->ops->poll_get_char && port->ops->poll_put_char))
 		return -1;
 
@@ -2253,10 +2250,10 @@ static int uart_poll_get_char(struct tty_driver *driver, int line)
 	struct uart_state *state = drv->state + line;
 	struct uart_port *port;
 
-	if (!state || !state->port)
+	if (!state || !state->uart_port)
 		return -1;
 
-	port = state->port;
+	port = state->uart_port;
 	return port->ops->poll_get_char(port);
 }
 
@@ -2266,10 +2263,10 @@ static void uart_poll_put_char(struct tty_driver *driver, int line, char ch)
 	struct uart_state *state = drv->state + line;
 	struct uart_port *port;
 
-	if (!state || !state->port)
+	if (!state || !state->uart_port)
 		return;
 
-	port = state->port;
+	port = state->uart_port;
 	port->ops->poll_put_char(port, ch);
 }
 #endif
@@ -2365,9 +2362,9 @@ int uart_register_driver(struct uart_driver *drv)
 		state->closing_wait    = 30000;	/* 30 seconds */
 		mutex_init(&state->mutex);
 
-		tty_port_init(&state->info.port);
-		init_waitqueue_head(&state->info.delta_msr_wait);
-		tasklet_init(&state->info.tlet, uart_tasklet_action,
+		tty_port_init(&state->port);
+		init_waitqueue_head(&state->delta_msr_wait);
+		tasklet_init(&state->tlet, uart_tasklet_action,
 			     (unsigned long)state);
 	}
 
@@ -2430,16 +2427,16 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
 
 	mutex_lock(&port_mutex);
 	mutex_lock(&state->mutex);
-	if (state->port) {
+	if (state->uart_port) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	state->port = port;
+	state->uart_port = port;
 	state->pm_state = -1;
 
 	port->cons = drv->cons;
-	port->info = &state->info;
+	port->state = state;
 
 	/*
 	 * If this port is a console, then the spinlock is already
@@ -2488,13 +2485,12 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
 int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
 {
 	struct uart_state *state = drv->state + port->line;
-	struct uart_info *info;
 
 	BUG_ON(in_interrupt());
 
-	if (state->port != port)
+	if (state->uart_port != port)
 		printk(KERN_ALERT "Removing wrong port: %p != %p\n",
-			state->port, port);
+			state->uart_port, port);
 
 	mutex_lock(&port_mutex);
 
@@ -2511,9 +2507,8 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
 	 */
 	tty_unregister_device(drv->tty_driver, port->line);
 
-	info = &state->info;
-	if (info && info->port.tty)
-		tty_vhangup(info->port.tty);
+	if (state->port.tty)
+		tty_vhangup(state->port.tty);
 
 	/*
 	 * Free the port IO and memory resources, if any.
@@ -2529,10 +2524,9 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
 	/*
 	 * Kill the tasklet, and free resources.
 	 */
-	if (info)
-		tasklet_kill(&info->tlet);
+	tasklet_kill(&state->tlet);
 
-	state->port = NULL;
+	state->uart_port = NULL;
 	mutex_unlock(&port_mutex);
 
 	return 0;
diff --git a/drivers/serial/serial_ks8695.c b/drivers/serial/serial_ks8695.c
index 52db5cc3f900..4560b2e70685 100644
--- a/drivers/serial/serial_ks8695.c
+++ b/drivers/serial/serial_ks8695.c
@@ -154,7 +154,7 @@ static void ks8695uart_disable_ms(struct uart_port *port)
 static irqreturn_t ks8695uart_rx_chars(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned int status, ch, lsr, flg, max_count = 256;
 
 	status = UART_GET_LSR(port);		/* clears pending LSR interrupts */
@@ -210,7 +210,7 @@ ignore_char:
 static irqreturn_t ks8695uart_tx_chars(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	unsigned int count;
 
 	if (port->x_char) {
@@ -266,7 +266,7 @@ static irqreturn_t ks8695uart_modem_status(int irq, void *dev_id)
 	if (status & URMS_URTERI)
 		port->icount.rng++;
 
-	wake_up_interruptible(&port->info->delta_msr_wait);
+	wake_up_interruptible(&port->state->delta_msr_wait);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/serial/serial_lh7a40x.c b/drivers/serial/serial_lh7a40x.c
index a7bf024a8286..057fc5e8cc8d 100644
--- a/drivers/serial/serial_lh7a40x.c
+++ b/drivers/serial/serial_lh7a40x.c
@@ -138,7 +138,7 @@ static void lh7a40xuart_enable_ms (struct uart_port* port)
 
 static void lh7a40xuart_rx_chars (struct uart_port* port)
 {
-	struct tty_struct* tty = port->info->port.tty;
+	struct tty_struct* tty = port->state->port.tty;
 	int cbRxMax = 256;	/* (Gross) limit on receive */
 	unsigned int data;	/* Received data and status */
 	unsigned int flag;
@@ -184,7 +184,7 @@ static void lh7a40xuart_rx_chars (struct uart_port* port)
 
 static void lh7a40xuart_tx_chars (struct uart_port* port)
 {
-	struct circ_buf* xmit = &port->info->xmit;
+	struct circ_buf* xmit = &port->state->xmit;
 	int cbTxMax = port->fifosize;
 
 	if (port->x_char) {
@@ -241,7 +241,7 @@ static void lh7a40xuart_modem_status (struct uart_port* port)
 	if (delta & CTS)
 		uart_handle_cts_change (port, status & CTS);
 
-	wake_up_interruptible (&port->info->delta_msr_wait);
+	wake_up_interruptible (&port->state->delta_msr_wait);
 }
 
 static irqreturn_t lh7a40xuart_int (int irq, void* dev_id)
diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c
index 54dd16d66a4b..0f7cf4c453e6 100644
--- a/drivers/serial/serial_txx9.c
+++ b/drivers/serial/serial_txx9.c
@@ -272,7 +272,7 @@ static void serial_txx9_initialize(struct uart_port *port)
 static inline void
 receive_chars(struct uart_txx9_port *up, unsigned int *status)
 {
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned char ch;
 	unsigned int disr = *status;
 	int max_count = 256;
@@ -348,7 +348,7 @@ receive_chars(struct uart_txx9_port *up, unsigned int *status)
 
 static inline void transmit_chars(struct uart_txx9_port *up)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int count;
 
 	if (up->port.x_char) {
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 32dc2fc50e6b..85119fb7cb50 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -361,7 +361,7 @@ static inline int sci_rxroom(struct uart_port *port)
 
 static void sci_transmit_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	unsigned int stopped = uart_tx_stopped(port);
 	unsigned short status;
 	unsigned short ctrl;
@@ -426,7 +426,7 @@ static void sci_transmit_chars(struct uart_port *port)
 static inline void sci_receive_chars(struct uart_port *port)
 {
 	struct sci_port *sci_port = to_sci_port(port);
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	int i, count, copied = 0;
 	unsigned short status;
 	unsigned char flag;
@@ -546,7 +546,7 @@ static inline int sci_handle_errors(struct uart_port *port)
 {
 	int copied = 0;
 	unsigned short status = sci_in(port, SCxSR);
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 
 	if (status & SCxSR_ORER(port)) {
 		/* overrun error */
@@ -600,7 +600,7 @@ static inline int sci_handle_errors(struct uart_port *port)
 
 static inline int sci_handle_fifo_overrun(struct uart_port *port)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	int copied = 0;
 
 	if (port->type != PORT_SCIF)
@@ -623,7 +623,7 @@ static inline int sci_handle_breaks(struct uart_port *port)
 {
 	int copied = 0;
 	unsigned short status = sci_in(port, SCxSR);
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	struct sci_port *s = to_sci_port(port);
 
 	if (uart_handle_break(port))
diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c
index d5276c012f78..9794e0cd3dcc 100644
--- a/drivers/serial/sn_console.c
+++ b/drivers/serial/sn_console.c
@@ -469,9 +469,9 @@ sn_receive_chars(struct sn_cons_port *port, unsigned long flags)
 		return;
 	}
 
-	if (port->sc_port.info) {
+	if (port->sc_port.state) {
 		/* The serial_core stuffs are initilized, use them */
-		tty = port->sc_port.info->port.tty;
+		tty = port->sc_port.state->port.tty;
 	}
 	else {
 		/* Not registered yet - can't pass to tty layer.  */
@@ -550,9 +550,9 @@ static void sn_transmit_chars(struct sn_cons_port *port, int raw)
 
 	BUG_ON(!port->sc_is_asynch);
 
-	if (port->sc_port.info) {
+	if (port->sc_port.state) {
 		/* We're initilized, using serial core infrastructure */
-		xmit = &port->sc_port.info->xmit;
+		xmit = &port->sc_port.state->xmit;
 	} else {
 		/* Probably sn_sal_switch_to_asynch has been run but serial core isn't
 		 * initilized yet.  Just return.  Writes are going through
@@ -927,7 +927,7 @@ sn_sal_console_write(struct console *co, const char *s, unsigned count)
 	/* We can't look at the xmit buffer if we're not registered with serial core
 	 *  yet.  So only do the fancy recovery after registering
 	 */
-	if (!port->sc_port.info) {
+	if (!port->sc_port.state) {
 		/* Not yet registered with serial core - simple case */
 		puts_raw_fixed(port->sc_ops->sal_puts_raw, s, count);
 		return;
@@ -936,8 +936,8 @@ sn_sal_console_write(struct console *co, const char *s, unsigned count)
 	/* somebody really wants this output, might be an
 	 * oops, kdb, panic, etc.  make sure they get it. */
 	if (spin_is_locked(&port->sc_port.lock)) {
-		int lhead = port->sc_port.info->xmit.head;
-		int ltail = port->sc_port.info->xmit.tail;
+		int lhead = port->sc_port.state->xmit.head;
+		int ltail = port->sc_port.state->xmit.tail;
 		int counter, got_lock = 0;
 
 		/*
@@ -962,13 +962,13 @@ sn_sal_console_write(struct console *co, const char *s, unsigned count)
 				break;
 			} else {
 				/* still locked */
-				if ((lhead != port->sc_port.info->xmit.head)
+				if ((lhead != port->sc_port.state->xmit.head)
 				    || (ltail !=
-					port->sc_port.info->xmit.tail)) {
+					port->sc_port.state->xmit.tail)) {
 					lhead =
-						port->sc_port.info->xmit.head;
+						port->sc_port.state->xmit.head;
 					ltail =
-						port->sc_port.info->xmit.tail;
+						port->sc_port.state->xmit.tail;
 					counter = 0;
 				}
 			}
diff --git a/drivers/serial/sunhv.c b/drivers/serial/sunhv.c
index 1df5325faab2..d548652dee50 100644
--- a/drivers/serial/sunhv.c
+++ b/drivers/serial/sunhv.c
@@ -184,8 +184,8 @@ static struct tty_struct *receive_chars(struct uart_port *port)
 {
 	struct tty_struct *tty = NULL;
 
-	if (port->info != NULL)		/* Unopened serial console */
-		tty = port->info->port.tty;
+	if (port->state != NULL)		/* Unopened serial console */
+		tty = port->state->port.tty;
 
 	if (sunhv_ops->receive_chars(port, tty))
 		sun_do_break();
@@ -197,10 +197,10 @@ static void transmit_chars(struct uart_port *port)
 {
 	struct circ_buf *xmit;
 
-	if (!port->info)
+	if (!port->state)
 		return;
 
-	xmit = &port->info->xmit;
+	xmit = &port->state->xmit;
 	if (uart_circ_empty(xmit) || uart_tx_stopped(port))
 		return;
 
diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c
index 0355efe115d9..7c4f2fe8e246 100644
--- a/drivers/serial/sunsab.c
+++ b/drivers/serial/sunsab.c
@@ -117,8 +117,8 @@ receive_chars(struct uart_sunsab_port *up,
 	int count = 0;
 	int i;
 
-	if (up->port.info != NULL)		/* Unopened serial console */
-		tty = up->port.info->port.tty;
+	if (up->port.state != NULL)		/* Unopened serial console */
+		tty = up->port.state->port.tty;
 
 	/* Read number of BYTES (Character + Status) available. */
 	if (stat->sreg.isr0 & SAB82532_ISR0_RPF) {
@@ -229,7 +229,7 @@ static void sunsab_tx_idle(struct uart_sunsab_port *);
 static void transmit_chars(struct uart_sunsab_port *up,
 			   union sab82532_irq_status *stat)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int i;
 
 	if (stat->sreg.isr1 & SAB82532_ISR1_ALLS) {
@@ -297,7 +297,7 @@ static void check_status(struct uart_sunsab_port *up,
 		up->port.icount.dsr++;
 	}
 
-	wake_up_interruptible(&up->port.info->delta_msr_wait);
+	wake_up_interruptible(&up->port.state->delta_msr_wait);
 }
 
 static irqreturn_t sunsab_interrupt(int irq, void *dev_id)
@@ -429,7 +429,7 @@ static void sunsab_tx_idle(struct uart_sunsab_port *up)
 static void sunsab_start_tx(struct uart_port *port)
 {
 	struct uart_sunsab_port *up = (struct uart_sunsab_port *) port;
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int i;
 
 	up->interrupt_mask1 &= ~(SAB82532_IMR1_ALLS|SAB82532_IMR1_XPR);
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index 47c6837850b1..5a32365b58ad 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -311,7 +311,7 @@ static void sunsu_enable_ms(struct uart_port *port)
 static struct tty_struct *
 receive_chars(struct uart_sunsu_port *up, unsigned char *status)
 {
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned char ch, flag;
 	int max_count = 256;
 	int saw_console_brk = 0;
@@ -389,7 +389,7 @@ receive_chars(struct uart_sunsu_port *up, unsigned char *status)
 
 static void transmit_chars(struct uart_sunsu_port *up)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int count;
 
 	if (up->port.x_char) {
@@ -441,7 +441,7 @@ static void check_modem_status(struct uart_sunsu_port *up)
 	if (status & UART_MSR_DCTS)
 		uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
 
-	wake_up_interruptible(&up->port.info->delta_msr_wait);
+	wake_up_interruptible(&up->port.state->delta_msr_wait);
 }
 
 static irqreturn_t sunsu_serial_interrupt(int irq, void *dev_id)
diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c
index e09d3cebb4fb..055034d12b1c 100644
--- a/drivers/serial/sunzilog.c
+++ b/drivers/serial/sunzilog.c
@@ -328,9 +328,9 @@ sunzilog_receive_chars(struct uart_sunzilog_port *up,
 	unsigned char ch, r1, flag;
 
 	tty = NULL;
-	if (up->port.info != NULL &&		/* Unopened serial console */
-	    up->port.info->port.tty != NULL)	/* Keyboard || mouse */
-		tty = up->port.info->port.tty;
+	if (up->port.state != NULL &&		/* Unopened serial console */
+	    up->port.state->port.tty != NULL)	/* Keyboard || mouse */
+		tty = up->port.state->port.tty;
 
 	for (;;) {
 
@@ -451,7 +451,7 @@ static void sunzilog_status_handle(struct uart_sunzilog_port *up,
 			uart_handle_cts_change(&up->port,
 					       (status & CTS));
 
-		wake_up_interruptible(&up->port.info->delta_msr_wait);
+		wake_up_interruptible(&up->port.state->delta_msr_wait);
 	}
 
 	up->prev_status = status;
@@ -501,9 +501,9 @@ static void sunzilog_transmit_chars(struct uart_sunzilog_port *up,
 		return;
 	}
 
-	if (up->port.info == NULL)
+	if (up->port.state == NULL)
 		goto ack_tx_int;
-	xmit = &up->port.info->xmit;
+	xmit = &up->port.state->xmit;
 	if (uart_circ_empty(xmit))
 		goto ack_tx_int;
 
@@ -705,7 +705,7 @@ static void sunzilog_start_tx(struct uart_port *port)
 		port->icount.tx++;
 		port->x_char = 0;
 	} else {
-		struct circ_buf *xmit = &port->info->xmit;
+		struct circ_buf *xmit = &port->state->xmit;
 
 		writeb(xmit->buf[xmit->tail], &channel->data);
 		ZSDELAY();
diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c
index 063a313b755c..3d40be6f389f 100644
--- a/drivers/serial/timbuart.c
+++ b/drivers/serial/timbuart.c
@@ -77,7 +77,7 @@ static void timbuart_flush_buffer(struct uart_port *port)
 
 static void timbuart_rx_chars(struct uart_port *port)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 
 	while (ioread32(port->membase + TIMBUART_ISR) & RXDP) {
 		u8 ch = ioread8(port->membase + TIMBUART_RXFIFO);
@@ -86,7 +86,7 @@ static void timbuart_rx_chars(struct uart_port *port)
 	}
 
 	spin_unlock(&port->lock);
-	tty_flip_buffer_push(port->info->port.tty);
+	tty_flip_buffer_push(port->state->port.tty);
 	spin_lock(&port->lock);
 
 	dev_dbg(port->dev, "%s - total read %d bytes\n",
@@ -95,7 +95,7 @@ static void timbuart_rx_chars(struct uart_port *port)
 
 static void timbuart_tx_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	while (!(ioread32(port->membase + TIMBUART_ISR) & TXBF) &&
 		!uart_circ_empty(xmit)) {
@@ -118,7 +118,7 @@ static void timbuart_handle_tx_port(struct uart_port *port, u32 isr, u32 *ier)
 {
 	struct timbuart_port *uart =
 		container_of(port, struct timbuart_port, port);
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	if (uart_circ_empty(xmit) || uart_tx_stopped(port))
 		return;
@@ -231,7 +231,7 @@ static void timbuart_mctrl_check(struct uart_port *port, u32 isr, u32 *ier)
 		iowrite32(CTS_DELTA, port->membase + TIMBUART_ISR);
 		cts = timbuart_get_mctrl(port);
 		uart_handle_cts_change(port, cts & TIOCM_CTS);
-		wake_up_interruptible(&port->info->delta_msr_wait);
+		wake_up_interruptible(&port->state->delta_msr_wait);
 	}
 
 	*ier |= CTS_DELTA;
diff --git a/drivers/serial/uartlite.c b/drivers/serial/uartlite.c
index 3317148a4b93..5d3a573d6994 100644
--- a/drivers/serial/uartlite.c
+++ b/drivers/serial/uartlite.c
@@ -75,7 +75,7 @@ static struct uart_port ulite_ports[ULITE_NR_UARTS];
 
 static int ulite_receive(struct uart_port *port, int stat)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned char ch = 0;
 	char flag = TTY_NORMAL;
 
@@ -125,7 +125,7 @@ static int ulite_receive(struct uart_port *port, int stat)
 
 static int ulite_transmit(struct uart_port *port, int stat)
 {
-	struct circ_buf *xmit  = &port->info->xmit;
+	struct circ_buf *xmit  = &port->state->xmit;
 
 	if (stat & ULITE_STATUS_TXFULL)
 		return 0;
@@ -162,7 +162,7 @@ static irqreturn_t ulite_isr(int irq, void *dev_id)
 		busy |= ulite_transmit(port, stat);
 	} while (busy);
 
-	tty_flip_buffer_push(port->info->port.tty);
+	tty_flip_buffer_push(port->state->port.tty);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/serial/ucc_uart.c b/drivers/serial/ucc_uart.c
index e945e780b5c9..0c08f286a2ef 100644
--- a/drivers/serial/ucc_uart.c
+++ b/drivers/serial/ucc_uart.c
@@ -327,7 +327,7 @@ static int qe_uart_tx_pump(struct uart_qe_port *qe_port)
 	unsigned char *p;
 	unsigned int count;
 	struct uart_port *port = &qe_port->port;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	bdp = qe_port->rx_cur;
 
@@ -466,7 +466,7 @@ static void qe_uart_int_rx(struct uart_qe_port *qe_port)
 	int i;
 	unsigned char ch, *cp;
 	struct uart_port *port = &qe_port->port;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	struct qe_bd *bdp;
 	u16 status;
 	unsigned int flg;
diff --git a/drivers/serial/vr41xx_siu.c b/drivers/serial/vr41xx_siu.c
index dac550e57c29..cf4410e6d53b 100644
--- a/drivers/serial/vr41xx_siu.c
+++ b/drivers/serial/vr41xx_siu.c
@@ -318,7 +318,7 @@ static inline void receive_chars(struct uart_port *port, uint8_t *status)
 	char flag;
 	int max_count = RX_MAX_COUNT;
 
-	tty = port->info->port.tty;
+	tty = port->state->port.tty;
 	lsr = *status;
 
 	do {
@@ -386,7 +386,7 @@ static inline void check_modem_status(struct uart_port *port)
 	if (msr & UART_MSR_DCTS)
 		uart_handle_cts_change(port, msr & UART_MSR_CTS);
 
-	wake_up_interruptible(&port->info->delta_msr_wait);
+	wake_up_interruptible(&port->state->delta_msr_wait);
 }
 
 static inline void transmit_chars(struct uart_port *port)
@@ -394,7 +394,7 @@ static inline void transmit_chars(struct uart_port *port)
 	struct circ_buf *xmit;
 	int max_count = TX_MAX_COUNT;
 
-	xmit = &port->info->xmit;
+	xmit = &port->state->xmit;
 
 	if (port->x_char) {
 		siu_write(port, UART_TX, port->x_char);
diff --git a/drivers/serial/zs.c b/drivers/serial/zs.c
index d8c2809b1ab6..b9c9fb9198d6 100644
--- a/drivers/serial/zs.c
+++ b/drivers/serial/zs.c
@@ -602,12 +602,12 @@ static void zs_receive_chars(struct zs_port *zport)
 		uart_insert_char(uport, status, Rx_OVR, ch, flag);
 	}
 
-	tty_flip_buffer_push(uport->info->port.tty);
+	tty_flip_buffer_push(uport->state->port.tty);
 }
 
 static void zs_raw_transmit_chars(struct zs_port *zport)
 {
-	struct circ_buf *xmit = &zport->port.info->xmit;
+	struct circ_buf *xmit = &zport->port.state->xmit;
 
 	/* XON/XOFF chars.  */
 	if (zport->port.x_char) {
@@ -686,7 +686,7 @@ static void zs_status_handle(struct zs_port *zport, struct zs_port *zport_a)
 			uport->icount.rng++;
 
 		if (delta)
-			wake_up_interruptible(&uport->info->delta_msr_wait);
+			wake_up_interruptible(&uport->state->delta_msr_wait);
 
 		spin_lock(&scc->zlock);
 	}
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 3cd255f0b211..c1542703fbab 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -186,7 +186,6 @@
 #include <linux/sysrq.h>
 
 struct uart_port;
-struct uart_info;
 struct serial_struct;
 struct device;
 
@@ -284,7 +283,7 @@ struct uart_port {
 
 	unsigned int		read_status_mask;	/* driver specific */
 	unsigned int		ignore_status_mask;	/* driver specific */
-	struct uart_info	*info;			/* pointer to parent info */
+	struct uart_state	*state;			/* pointer to parent state */
 	struct uart_icount	icount;			/* statistics */
 
 	struct console		*cons;			/* struct console, if any */
@@ -343,8 +342,22 @@ struct uart_port {
  */
 typedef unsigned int __bitwise__ uif_t;
 
-struct uart_info {
+
+/*
+ * This is the state information which is persistent across opens.
+ * The low level driver must not to touch any elements contained
+ * within.
+ */
+struct uart_state {
 	struct tty_port		port;
+	unsigned int		close_delay;		/* msec */
+	unsigned int		closing_wait;		/* msec */
+
+#define USF_CLOSING_WAIT_INF	(0)
+#define USF_CLOSING_WAIT_NONE	(~0U)
+
+	int			count;
+	int			pm_state;
 	struct circ_buf		xmit;
 	uif_t			flags;
 
@@ -362,24 +375,7 @@ struct uart_info {
 
 	struct tasklet_struct	tlet;
 	wait_queue_head_t	delta_msr_wait;
-};
-
-/*
- * This is the state information which is persistent across opens.
- * The low level driver must not to touch any elements contained
- * within.
- */
-struct uart_state {
-	unsigned int		close_delay;		/* msec */
-	unsigned int		closing_wait;		/* msec */
-
-#define USF_CLOSING_WAIT_INF	(0)
-#define USF_CLOSING_WAIT_NONE	(~0U)
-
-	int			count;
-	int			pm_state;
-	struct uart_info	info;
-	struct uart_port	*port;
+	struct uart_port	*uart_port;
 
 	struct mutex		mutex;
 };
@@ -462,7 +458,7 @@ int uart_resume_port(struct uart_driver *reg, struct uart_port *port);
 
 static inline int uart_tx_stopped(struct uart_port *port)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	if(tty->stopped || tty->hw_stopped)
 		return 1;
 	return 0;
@@ -477,7 +473,7 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
 #ifdef SUPPORT_SYSRQ
 	if (port->sysrq) {
 		if (ch && time_before(jiffies, port->sysrq)) {
-			handle_sysrq(ch, port->info->port.tty);
+			handle_sysrq(ch, port->state->port.tty);
 			port->sysrq = 0;
 			return 1;
 		}
@@ -495,7 +491,7 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
  */
 static inline int uart_handle_break(struct uart_port *port)
 {
-	struct uart_info *info = port->info;
+	struct uart_state *state = port->state;
 #ifdef SUPPORT_SYSRQ
 	if (port->cons && port->cons->index == port->line) {
 		if (!port->sysrq) {
@@ -506,7 +502,7 @@ static inline int uart_handle_break(struct uart_port *port)
 	}
 #endif
 	if (port->flags & UPF_SAK)
-		do_SAK(info->port.tty);
+		do_SAK(state->port.tty);
 	return 0;
 }
 
@@ -518,7 +514,7 @@ static inline int uart_handle_break(struct uart_port *port)
 static inline void
 uart_handle_dcd_change(struct uart_port *port, unsigned int status)
 {
-	struct uart_info *info = port->info;
+	struct uart_state *state = port->state;
 
 	port->icount.dcd++;
 
@@ -527,11 +523,11 @@ uart_handle_dcd_change(struct uart_port *port, unsigned int status)
 		hardpps();
 #endif
 
-	if (info->flags & UIF_CHECK_CD) {
+	if (state->flags & UIF_CHECK_CD) {
 		if (status)
-			wake_up_interruptible(&info->port.open_wait);
-		else if (info->port.tty)
-			tty_hangup(info->port.tty);
+			wake_up_interruptible(&state->port.open_wait);
+		else if (state->port.tty)
+			tty_hangup(state->port.tty);
 	}
 }
 
@@ -543,12 +539,12 @@ uart_handle_dcd_change(struct uart_port *port, unsigned int status)
 static inline void
 uart_handle_cts_change(struct uart_port *port, unsigned int status)
 {
-	struct uart_info *info = port->info;
-	struct tty_struct *tty = info->port.tty;
+	struct uart_state *state = port->state;
+	struct tty_struct *tty = state->port.tty;
 
 	port->icount.cts++;
 
-	if (info->flags & UIF_CTS_FLOW) {
+	if (state->flags & UIF_CTS_FLOW) {
 		if (tty->hw_stopped) {
 			if (status) {
 				tty->hw_stopped = 0;
@@ -570,7 +566,7 @@ static inline void
 uart_insert_char(struct uart_port *port, unsigned int status,
 		 unsigned int overrun, unsigned int ch, unsigned int flag)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 
 	if ((status & port->ignore_status_mask & ~overrun) == 0)
 		tty_insert_flip_char(tty, ch, flag);
-- 
cgit v1.2.3


From 5e99df561fc830730d63672d795a0b02ef8cdd6f Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:28 -0700
Subject: serial: Fold closing_* fields into the tty_port ones

Remove some more serial specific use

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/serial_core.c | 26 +++++++++++++-------------
 include/linux/serial_core.h  |  2 --
 include/linux/tty.h          |  4 ++--
 3 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index ea53b6f224b0..2d63b13e2f7e 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -651,10 +651,10 @@ static int uart_get_info(struct uart_state *state,
 	tmp.flags	    = port->flags;
 	tmp.xmit_fifo_size  = port->fifosize;
 	tmp.baud_base	    = port->uartclk / 16;
-	tmp.close_delay	    = state->close_delay / 10;
-	tmp.closing_wait    = state->closing_wait == USF_CLOSING_WAIT_NONE ?
+	tmp.close_delay	    = state->port.close_delay / 10;
+	tmp.closing_wait    = state->port.closing_wait == USF_CLOSING_WAIT_NONE ?
 				ASYNC_CLOSING_WAIT_NONE :
-				state->closing_wait / 10;
+				state->port.closing_wait / 10;
 	tmp.custom_divisor  = port->custom_divisor;
 	tmp.hub6	    = port->hub6;
 	tmp.io_type         = port->iotype;
@@ -724,8 +724,8 @@ static int uart_set_info(struct uart_state *state,
 		retval = -EPERM;
 		if (change_irq || change_port ||
 		    (new_serial.baud_base != port->uartclk / 16) ||
-		    (close_delay != state->close_delay) ||
-		    (closing_wait != state->closing_wait) ||
+		    (close_delay != state->port.close_delay) ||
+		    (closing_wait != state->port.closing_wait) ||
 		    (new_serial.xmit_fifo_size &&
 		     new_serial.xmit_fifo_size != port->fifosize) ||
 		    (((new_flags ^ old_flags) & ~UPF_USR_MASK) != 0))
@@ -834,8 +834,8 @@ static int uart_set_info(struct uart_state *state,
 	port->flags            = (port->flags & ~UPF_CHANGE_MASK) |
 				 (new_flags & UPF_CHANGE_MASK);
 	port->custom_divisor   = new_serial.custom_divisor;
-	state->close_delay     = close_delay;
-	state->closing_wait    = closing_wait;
+	state->port.close_delay     = close_delay;
+	state->port.closing_wait    = closing_wait;
 	if (new_serial.xmit_fifo_size)
 		port->fifosize = new_serial.xmit_fifo_size;
 	if (state->port.tty)
@@ -1297,8 +1297,8 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	 */
 	tty->closing = 1;
 
-	if (state->closing_wait != USF_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, msecs_to_jiffies(state->closing_wait));
+	if (state->port.closing_wait != USF_CLOSING_WAIT_NONE)
+		tty_wait_until_sent(tty, msecs_to_jiffies(state->port.closing_wait));
 
 	/*
 	 * At this point, we stop accepting input.  To do this, we
@@ -1326,8 +1326,8 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	state->port.tty = NULL;
 
 	if (state->port.blocked_open) {
-		if (state->close_delay)
-			msleep_interruptible(state->close_delay);
+		if (state->port.close_delay)
+			msleep_interruptible(state->port.close_delay);
 	} else if (!uart_console(port)) {
 		uart_change_pm(state, 3);
 	}
@@ -2358,11 +2358,11 @@ int uart_register_driver(struct uart_driver *drv)
 	for (i = 0; i < drv->nr; i++) {
 		struct uart_state *state = drv->state + i;
 
-		state->close_delay     = 500;	/* .5 seconds */
-		state->closing_wait    = 30000;	/* 30 seconds */
 		mutex_init(&state->mutex);
 
 		tty_port_init(&state->port);
+		state->port.close_delay     = 500;	/* .5 seconds */
+		state->port.closing_wait    = 30000;	/* 30 seconds */
 		init_waitqueue_head(&state->delta_msr_wait);
 		tasklet_init(&state->tlet, uart_tasklet_action,
 			     (unsigned long)state);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index c1542703fbab..fd11d4d5a571 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -350,8 +350,6 @@ typedef unsigned int __bitwise__ uif_t;
  */
 struct uart_state {
 	struct tty_port		port;
-	unsigned int		close_delay;		/* msec */
-	unsigned int		closing_wait;		/* msec */
 
 #define USF_CLOSING_WAIT_INF	(0)
 #define USF_CLOSING_WAIT_NONE	(~0U)
diff --git a/include/linux/tty.h b/include/linux/tty.h
index ecb3d1ba3017..9fdc3d84baad 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -206,8 +206,8 @@ struct tty_port {
 	unsigned long		flags;		/* TTY flags ASY_*/
 	struct mutex		mutex;		/* Locking */
 	unsigned char		*xmit_buf;	/* Optional buffer */
-	int			close_delay;	/* Close port delay */
-	int			closing_wait;	/* Delay for output */
+	unsigned int		close_delay;	/* Close port delay */
+	unsigned int		closing_wait;	/* Delay for output */
 	int			drain_delay;	/* Set to zero if no pure time
 						   based drain is needed else
 						   set to size of fifo */
-- 
cgit v1.2.3


From 91312cdb4fcd832341e425f74f49938e0503c929 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:29 -0700
Subject: serial: move count into the tty_port version

Remove more stuff from the serial special case code

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/serial_core.c | 38 ++++++++++++++++++++------------------
 include/linux/serial_core.h  |  1 -
 2 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 0ffefb3f1d8f..4af3364dd811 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -52,7 +52,7 @@ static struct lock_class_key port_lock_key;
 
 #define HIGH_BITS_OFFSET	((sizeof(long)-sizeof(int))*8)
 
-#define uart_users(state)	((state)->count + (state)->port.blocked_open)
+#define uart_users(state)	((state)->port.count + (state)->port.blocked_open)
 
 #ifdef CONFIG_SERIAL_CORE_CONSOLE
 #define uart_console(port)	((port)->cons && (port)->cons->index == (port)->line)
@@ -694,7 +694,7 @@ static int uart_set_info(struct uart_state *state,
 			USF_CLOSING_WAIT_NONE : new_serial.closing_wait * 10;
 
 	/*
-	 * This semaphore protects state->count.  It is also
+	 * This semaphore protects port->count.  It is also
 	 * very useful to prevent opens.  Also, take the
 	 * port configuration semaphore to make sure that a
 	 * module insertion/removal doesn't change anything
@@ -1272,24 +1272,24 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	if (tty_hung_up_p(filp))
 		goto done;
 
-	if ((tty->count == 1) && (state->count != 1)) {
+	if ((tty->count == 1) && (port->count != 1)) {
 		/*
 		 * Uh, oh.  tty->count is 1, which means that the tty
-		 * structure will be freed.  state->count should always
+		 * structure will be freed.  port->count should always
 		 * be one in these conditions.  If it's greater than
 		 * one, we've got real problems, since it means the
 		 * serial port won't be shutdown.
 		 */
 		printk(KERN_ERR "uart_close: bad serial port count; tty->count is 1, "
-		       "state->count is %d\n", state->count);
-		state->count = 1;
+		       "port->count is %d\n", port->count);
+		port->count = 1;
 	}
-	if (--state->count < 0) {
+	if (--port->count < 0) {
 		printk(KERN_ERR "uart_close: bad serial port count for %s: %d\n",
-		       tty->name, state->count);
-		state->count = 0;
+		       tty->name, port->count);
+		port->count = 0;
 	}
-	if (state->count)
+	if (port->count)
 		goto done;
 
 	/*
@@ -1421,7 +1421,7 @@ static void uart_hangup(struct tty_struct *tty)
 	if (state->flags & UIF_NORMAL_ACTIVE) {
 		uart_flush_buffer(tty);
 		uart_shutdown(state);
-		state->count = 0;
+		port->count = 0;
 		state->flags &= ~UIF_NORMAL_ACTIVE;
 		port->tty = NULL;
 		wake_up_interruptible(&port->open_wait);
@@ -1478,7 +1478,7 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 	unsigned int mctrl;
 
 	port->blocked_open++;
-	state->count--;
+	port->count--;
 
 	add_wait_queue(&port->open_wait, &wait);
 	while (1) {
@@ -1539,7 +1539,7 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&port->open_wait, &wait);
 
-	state->count++;
+	port->count++;
 	port->blocked_open--;
 
 	if (signal_pending(current))
@@ -1562,7 +1562,7 @@ static struct uart_state *uart_get(struct uart_driver *drv, int line)
 		goto err;
 	}
 
-	state->count++;
+	state->port.count++;
 	if (!state->uart_port || state->uart_port->flags & UPF_DEAD) {
 		ret = -ENXIO;
 		goto err_unlock;
@@ -1570,7 +1570,7 @@ static struct uart_state *uart_get(struct uart_driver *drv, int line)
 	return state;
 
  err_unlock:
-	state->count--;
+	state->port.count--;
 	mutex_unlock(&state->mutex);
  err:
 	return ERR_PTR(ret);
@@ -1590,6 +1590,7 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 {
 	struct uart_driver *drv = (struct uart_driver *)tty->driver->driver_state;
 	struct uart_state *state;
+	struct tty_port *port;
 	int retval, line = tty->index;
 
 	BUG_ON(!kernel_locked());
@@ -1617,6 +1618,7 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 		retval = PTR_ERR(state);
 		goto fail;
 	}
+	port = &state->port;
 
 	/*
 	 * Once we set tty->driver_data here, we are guaranteed that
@@ -1627,14 +1629,14 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	state->uart_port->state = state;
 	tty->low_latency = (state->uart_port->flags & UPF_LOW_LATENCY) ? 1 : 0;
 	tty->alt_speed = 0;
-	state->port.tty = tty;
+	port->tty = tty;
 
 	/*
 	 * If the port is in the middle of closing, bail out now.
 	 */
 	if (tty_hung_up_p(filp)) {
 		retval = -EAGAIN;
-		state->count--;
+		port->count--;
 		mutex_unlock(&state->mutex);
 		goto fail;
 	}
@@ -1642,7 +1644,7 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Make sure the device is in D0 state.
 	 */
-	if (state->count == 1)
+	if (port->count == 1)
 		uart_change_pm(state, 0);
 
 	/*
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index fd11d4d5a571..63ad90966db2 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -354,7 +354,6 @@ struct uart_state {
 #define USF_CLOSING_WAIT_INF	(0)
 #define USF_CLOSING_WAIT_NONE	(~0U)
 
-	int			count;
 	int			pm_state;
 	struct circ_buf		xmit;
 	uif_t			flags;
-- 
cgit v1.2.3


From ccce6debb62d94964e3878f978a56b0f3e32d94f Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:30 -0700
Subject: serial: move the flags into the tty_port field

Fortunately the serial layer was designed to use the same flag values but
with different names. It has its own SUSPENDED flag which is a free slot in
the ASYNC flags so we allocate it in the ASYNC flags instead.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/serial_core.c | 153 ++++++++++++++++++++++---------------------
 include/linux/serial.h       |   2 +
 include/linux/serial_core.h  |  48 ++++++--------
 3 files changed, 98 insertions(+), 105 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 4af3364dd811..744dec9301fb 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -29,10 +29,10 @@
 #include <linux/console.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/serial_core.h>
 #include <linux/smp_lock.h>
 #include <linux/device.h>
 #include <linux/serial.h> /* for serial_state and serial_icounter_struct */
+#include <linux/serial_core.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
 
@@ -146,7 +146,7 @@ static int uart_startup(struct uart_state *state, int init_hw)
 	unsigned long page;
 	int retval = 0;
 
-	if (state->flags & UIF_INITIALIZED)
+	if (port->flags & ASYNC_INITIALIZED)
 		return 0;
 
 	/*
@@ -189,14 +189,14 @@ static int uart_startup(struct uart_state *state, int init_hw)
 				uart_set_mctrl(uport, TIOCM_RTS | TIOCM_DTR);
 		}
 
-		if (state->flags & UIF_CTS_FLOW) {
+		if (port->flags & ASYNC_CTS_FLOW) {
 			spin_lock_irq(&uport->lock);
 			if (!(uport->ops->get_mctrl(uport) & TIOCM_CTS))
 				port->tty->hw_stopped = 1;
 			spin_unlock_irq(&uport->lock);
 		}
 
-		state->flags |= UIF_INITIALIZED;
+		set_bit(ASYNCB_INITIALIZED, &port->flags);
 
 		clear_bit(TTY_IO_ERROR, &port->tty->flags);
 	}
@@ -214,7 +214,7 @@ static int uart_startup(struct uart_state *state, int init_hw)
  */
 static void uart_shutdown(struct uart_state *state)
 {
-	struct uart_port *port = state->uart_port;
+	struct uart_port *uport = state->uart_port;
 	struct tty_struct *tty = state->port.tty;
 
 	/*
@@ -223,14 +223,12 @@ static void uart_shutdown(struct uart_state *state)
 	if (tty)
 		set_bit(TTY_IO_ERROR, &tty->flags);
 
-	if (state->flags & UIF_INITIALIZED) {
-		state->flags &= ~UIF_INITIALIZED;
-
+	if (test_and_clear_bit(ASYNCB_INITIALIZED, &state->port.flags)) {
 		/*
 		 * Turn off DTR and RTS early.
 		 */
 		if (!tty || (tty->termios->c_cflag & HUPCL))
-			uart_clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
+			uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS);
 
 		/*
 		 * clear delta_msr_wait queue to avoid mem leaks: we may free
@@ -244,12 +242,12 @@ static void uart_shutdown(struct uart_state *state)
 		/*
 		 * Free the IRQ and disable the port.
 		 */
-		port->ops->shutdown(port);
+		uport->ops->shutdown(uport);
 
 		/*
 		 * Ensure that the IRQ handler isn't running on another CPU.
 		 */
-		synchronize_irq(port->irq);
+		synchronize_irq(uport->irq);
 	}
 
 	/*
@@ -429,15 +427,16 @@ EXPORT_SYMBOL(uart_get_divisor);
 static void
 uart_change_speed(struct uart_state *state, struct ktermios *old_termios)
 {
-	struct tty_struct *tty = state->port.tty;
-	struct uart_port *port = state->uart_port;
+	struct tty_port *port = &state->port;
+	struct tty_struct *tty = port->tty;
+	struct uart_port *uport = state->uart_port;
 	struct ktermios *termios;
 
 	/*
 	 * If we have no tty, termios, or the port does not exist,
 	 * then we can't set the parameters for this port.
 	 */
-	if (!tty || !tty->termios || port->type == PORT_UNKNOWN)
+	if (!tty || !tty->termios || uport->type == PORT_UNKNOWN)
 		return;
 
 	termios = tty->termios;
@@ -446,16 +445,16 @@ uart_change_speed(struct uart_state *state, struct ktermios *old_termios)
 	 * Set flags based on termios cflag
 	 */
 	if (termios->c_cflag & CRTSCTS)
-		state->flags |= UIF_CTS_FLOW;
+		set_bit(ASYNCB_CTS_FLOW, &port->flags);
 	else
-		state->flags &= ~UIF_CTS_FLOW;
+		clear_bit(ASYNCB_CTS_FLOW, &port->flags);
 
 	if (termios->c_cflag & CLOCAL)
-		state->flags &= ~UIF_CHECK_CD;
+		clear_bit(ASYNCB_CHECK_CD, &port->flags);
 	else
-		state->flags |= UIF_CHECK_CD;
+		set_bit(ASYNCB_CHECK_CD, &port->flags);
 
-	port->ops->set_termios(port, termios, old_termios);
+	uport->ops->set_termios(uport, termios, old_termios);
 }
 
 static inline int
@@ -848,7 +847,7 @@ static int uart_set_info(struct uart_state *state,
 	retval = 0;
 	if (uport->type == PORT_UNKNOWN)
 		goto exit;
-	if (state->flags & UIF_INITIALIZED) {
+	if (port->flags & ASYNC_INITIALIZED) {
 		if (((old_flags ^ uport->flags) & UPF_SPD_MASK) ||
 		    old_custom_divisor != uport->custom_divisor) {
 			/*
@@ -1306,7 +1305,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	 * At this point, we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts.
 	 */
-	if (state->flags & UIF_INITIALIZED) {
+	if (port->flags & ASYNC_INITIALIZED) {
 		unsigned long flags;
 		spin_lock_irqsave(&port->lock, flags);
 		uport->ops->stop_rx(uport);
@@ -1337,7 +1336,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Wake up anyone trying to open this port.
 	 */
-	state->flags &= ~UIF_NORMAL_ACTIVE;
+	clear_bit(ASYNCB_NORMAL_ACTIVE, &port->flags);
 	wake_up_interruptible(&port->open_wait);
 
 done:
@@ -1418,11 +1417,11 @@ static void uart_hangup(struct tty_struct *tty)
 	pr_debug("uart_hangup(%d)\n", state->uart_port->line);
 
 	mutex_lock(&state->mutex);
-	if (state->flags & UIF_NORMAL_ACTIVE) {
+	if (port->flags & ASYNC_NORMAL_ACTIVE) {
 		uart_flush_buffer(tty);
 		uart_shutdown(state);
 		port->count = 0;
-		state->flags &= ~UIF_NORMAL_ACTIVE;
+		clear_bit(ASYNCB_NORMAL_ACTIVE, &port->flags);
 		port->tty = NULL;
 		wake_up_interruptible(&port->open_wait);
 		wake_up_interruptible(&state->delta_msr_wait);
@@ -1493,7 +1492,7 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 		/*
 		 * If the port has been closed, tell userspace/restart open.
 		 */
-		if (!(state->flags & UIF_INITIALIZED))
+		if (!(port->flags & ASYNC_INITIALIZED))
 			break;
 
 		/*
@@ -1662,8 +1661,8 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * If this is the first open to succeed, adjust things to suit.
 	 */
-	if (retval == 0 && !(state->flags & UIF_NORMAL_ACTIVE)) {
-		state->flags |= UIF_NORMAL_ACTIVE;
+	if (retval == 0 && !(port->flags & ASYNC_NORMAL_ACTIVE)) {
+		set_bit(ASYNCB_NORMAL_ACTIVE, &port->flags);
 
 		uart_update_termios(state);
 	}
@@ -1985,63 +1984,64 @@ static int serial_match_port(struct device *dev, void *data)
 	return dev->devt == devt; /* Actually, only one tty per port */
 }
 
-int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
+int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
 {
-	struct uart_state *state = drv->state + port->line;
+	struct uart_state *state = drv->state + uport->line;
+	struct tty_port *port = &state->port;
 	struct device *tty_dev;
-	struct uart_match match = {port, drv};
+	struct uart_match match = {uport, drv};
 
 	mutex_lock(&state->mutex);
 
-	if (!console_suspend_enabled && uart_console(port)) {
+	if (!console_suspend_enabled && uart_console(uport)) {
 		/* we're going to avoid suspending serial console */
 		mutex_unlock(&state->mutex);
 		return 0;
 	}
 
-	tty_dev = device_find_child(port->dev, &match, serial_match_port);
+	tty_dev = device_find_child(uport->dev, &match, serial_match_port);
 	if (device_may_wakeup(tty_dev)) {
-		enable_irq_wake(port->irq);
+		enable_irq_wake(uport->irq);
 		put_device(tty_dev);
 		mutex_unlock(&state->mutex);
 		return 0;
 	}
-	port->suspended = 1;
+	uport->suspended = 1;
 
-	if (state->flags & UIF_INITIALIZED) {
-		const struct uart_ops *ops = port->ops;
+	if (port->flags & ASYNC_INITIALIZED) {
+		const struct uart_ops *ops = uport->ops;
 		int tries;
 
-		state->flags = (state->flags & ~UIF_INITIALIZED)
-				     | UIF_SUSPENDED;
+		set_bit(ASYNCB_SUSPENDED, &port->flags);
+		clear_bit(ASYNCB_INITIALIZED, &port->flags);
 
-		spin_lock_irq(&port->lock);
-		ops->stop_tx(port);
-		ops->set_mctrl(port, 0);
-		ops->stop_rx(port);
-		spin_unlock_irq(&port->lock);
+		spin_lock_irq(&uport->lock);
+		ops->stop_tx(uport);
+		ops->set_mctrl(uport, 0);
+		ops->stop_rx(uport);
+		spin_unlock_irq(&uport->lock);
 
 		/*
 		 * Wait for the transmitter to empty.
 		 */
-		for (tries = 3; !ops->tx_empty(port) && tries; tries--)
+		for (tries = 3; !ops->tx_empty(uport) && tries; tries--)
 			msleep(10);
 		if (!tries)
 			printk(KERN_ERR "%s%s%s%d: Unable to drain "
 					"transmitter\n",
-			       port->dev ? dev_name(port->dev) : "",
-			       port->dev ? ": " : "",
+			       uport->dev ? dev_name(uport->dev) : "",
+			       uport->dev ? ": " : "",
 			       drv->dev_name,
-			       drv->tty_driver->name_base + port->line);
+			       drv->tty_driver->name_base + uport->line);
 
-		ops->shutdown(port);
+		ops->shutdown(uport);
 	}
 
 	/*
 	 * Disable the console device before suspending.
 	 */
-	if (uart_console(port))
-		console_stop(port->cons);
+	if (uart_console(uport))
+		console_stop(uport->cons);
 
 	uart_change_pm(state, 3);
 
@@ -2050,67 +2050,68 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
 	return 0;
 }
 
-int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
+int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
 {
-	struct uart_state *state = drv->state + port->line;
+	struct uart_state *state = drv->state + uport->line;
+	struct tty_port *port = &state->port;
 	struct device *tty_dev;
-	struct uart_match match = {port, drv};
+	struct uart_match match = {uport, drv};
 
 	mutex_lock(&state->mutex);
 
-	if (!console_suspend_enabled && uart_console(port)) {
+	if (!console_suspend_enabled && uart_console(uport)) {
 		/* no need to resume serial console, it wasn't suspended */
 		mutex_unlock(&state->mutex);
 		return 0;
 	}
 
-	tty_dev = device_find_child(port->dev, &match, serial_match_port);
-	if (!port->suspended && device_may_wakeup(tty_dev)) {
-		disable_irq_wake(port->irq);
+	tty_dev = device_find_child(uport->dev, &match, serial_match_port);
+	if (!uport->suspended && device_may_wakeup(tty_dev)) {
+		disable_irq_wake(uport->irq);
 		mutex_unlock(&state->mutex);
 		return 0;
 	}
-	port->suspended = 0;
+	uport->suspended = 0;
 
 	/*
 	 * Re-enable the console device after suspending.
 	 */
-	if (uart_console(port)) {
+	if (uart_console(uport)) {
 		struct ktermios termios;
 
 		/*
 		 * First try to use the console cflag setting.
 		 */
 		memset(&termios, 0, sizeof(struct ktermios));
-		termios.c_cflag = port->cons->cflag;
+		termios.c_cflag = uport->cons->cflag;
 
 		/*
 		 * If that's unset, use the tty termios setting.
 		 */
-		if (state->port.tty && termios.c_cflag == 0)
-			termios = *state->port.tty->termios;
+		if (port->tty && termios.c_cflag == 0)
+			termios = *port->tty->termios;
 
 		uart_change_pm(state, 0);
-		port->ops->set_termios(port, &termios, NULL);
-		console_start(port->cons);
+		uport->ops->set_termios(uport, &termios, NULL);
+		console_start(uport->cons);
 	}
 
-	if (state->flags & UIF_SUSPENDED) {
-		const struct uart_ops *ops = port->ops;
+	if (port->flags & ASYNC_SUSPENDED) {
+		const struct uart_ops *ops = uport->ops;
 		int ret;
 
 		uart_change_pm(state, 0);
-		spin_lock_irq(&port->lock);
-		ops->set_mctrl(port, 0);
-		spin_unlock_irq(&port->lock);
-		ret = ops->startup(port);
+		spin_lock_irq(&uport->lock);
+		ops->set_mctrl(uport, 0);
+		spin_unlock_irq(&uport->lock);
+		ret = ops->startup(uport);
 		if (ret == 0) {
 			uart_change_speed(state, NULL);
-			spin_lock_irq(&port->lock);
-			ops->set_mctrl(port, port->mctrl);
-			ops->start_tx(port);
-			spin_unlock_irq(&port->lock);
-			state->flags |= UIF_INITIALIZED;
+			spin_lock_irq(&uport->lock);
+			ops->set_mctrl(uport, uport->mctrl);
+			ops->start_tx(uport);
+			spin_unlock_irq(&uport->lock);
+			set_bit(ASYNCB_INITIALIZED, &port->flags);
 		} else {
 			/*
 			 * Failed to resume - maybe hardware went away?
@@ -2120,7 +2121,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 			uart_shutdown(state);
 		}
 
-		state->flags &= ~UIF_SUSPENDED;
+		clear_bit(ASYNCB_SUSPENDED, &port->flags);
 	}
 
 	mutex_unlock(&state->mutex);
diff --git a/include/linux/serial.h b/include/linux/serial.h
index e5bb75a63802..c8613c3ff9d3 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -122,6 +122,7 @@ struct serial_uart_config {
 
 /* Internal flags used only by kernel */
 #define ASYNCB_INITIALIZED	31 /* Serial port was initialized */
+#define ASYNCB_SUSPENDED	30 /* Serial port is suspended */
 #define ASYNCB_NORMAL_ACTIVE	29 /* Normal device is active */
 #define ASYNCB_BOOT_AUTOCONF	28 /* Autoconfigure port on bootup */
 #define ASYNCB_CLOSING		27 /* Serial port is closing */
@@ -133,6 +134,7 @@ struct serial_uart_config {
 #define ASYNCB_FIRST_KERNEL	22
 
 #define ASYNC_HUP_NOTIFY	(1U << ASYNCB_HUP_NOTIFY)
+#define ASYNC_SUSPENDED		(1U << ASYNCB_SUSPENDED)
 #define ASYNC_FOURPORT		(1U << ASYNCB_FOURPORT)
 #define ASYNC_SAK		(1U << ASYNCB_SAK)
 #define ASYNC_SPLIT_TERMIOS	(1U << ASYNCB_SPLIT_TERMIOS)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 63ad90966db2..284ef7f54890 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -20,6 +20,8 @@
 #ifndef LINUX_SERIAL_CORE_H
 #define LINUX_SERIAL_CORE_H
 
+#include <linux/serial.h>
+
 /*
  * The type definitions.  These are from Ted Ts'o's serial.h
  */
@@ -356,19 +358,6 @@ struct uart_state {
 
 	int			pm_state;
 	struct circ_buf		xmit;
-	uif_t			flags;
-
-/*
- * Definitions for info->flags.  These are _private_ to serial_core, and
- * are specific to this structure.  They may be queried by low level drivers.
- *
- * FIXME: use the ASY_ definitions
- */
-#define UIF_CHECK_CD		((__force uif_t) (1 << 25))
-#define UIF_CTS_FLOW		((__force uif_t) (1 << 26))
-#define UIF_NORMAL_ACTIVE	((__force uif_t) (1 << 29))
-#define UIF_INITIALIZED		((__force uif_t) (1 << 31))
-#define UIF_SUSPENDED		((__force uif_t) (1 << 30))
 
 	struct tasklet_struct	tlet;
 	wait_queue_head_t	delta_msr_wait;
@@ -509,22 +498,23 @@ static inline int uart_handle_break(struct uart_port *port)
  *	@status: new carrier detect status, nonzero if active
  */
 static inline void
-uart_handle_dcd_change(struct uart_port *port, unsigned int status)
+uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 {
-	struct uart_state *state = port->state;
+	struct uart_state *state = uport->state;
+	struct tty_port *port = &state->port;
 
-	port->icount.dcd++;
+	uport->icount.dcd++;
 
 #ifdef CONFIG_HARD_PPS
-	if ((port->flags & UPF_HARDPPS_CD) && status)
+	if ((uport->flags & UPF_HARDPPS_CD) && status)
 		hardpps();
 #endif
 
-	if (state->flags & UIF_CHECK_CD) {
+	if (port->flags & ASYNC_CHECK_CD) {
 		if (status)
-			wake_up_interruptible(&state->port.open_wait);
-		else if (state->port.tty)
-			tty_hangup(state->port.tty);
+			wake_up_interruptible(&port->open_wait);
+		else if (port->tty)
+			tty_hangup(port->tty);
 	}
 }
 
@@ -534,24 +524,24 @@ uart_handle_dcd_change(struct uart_port *port, unsigned int status)
  *	@status: new clear to send status, nonzero if active
  */
 static inline void
-uart_handle_cts_change(struct uart_port *port, unsigned int status)
+uart_handle_cts_change(struct uart_port *uport, unsigned int status)
 {
-	struct uart_state *state = port->state;
-	struct tty_struct *tty = state->port.tty;
+	struct tty_port *port = &uport->state->port;
+	struct tty_struct *tty = port->tty;
 
-	port->icount.cts++;
+	uport->icount.cts++;
 
-	if (state->flags & UIF_CTS_FLOW) {
+	if (port->flags & ASYNC_CTS_FLOW) {
 		if (tty->hw_stopped) {
 			if (status) {
 				tty->hw_stopped = 0;
-				port->ops->start_tx(port);
-				uart_write_wakeup(port);
+				uport->ops->start_tx(uport);
+				uart_write_wakeup(uport);
 			}
 		} else {
 			if (!status) {
 				tty->hw_stopped = 1;
-				port->ops->stop_tx(port);
+				uport->ops->stop_tx(uport);
 			}
 		}
 	}
-- 
cgit v1.2.3


From a03006860d272eac5a8ebf23f04f54c7e1e783a5 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:30 -0700
Subject: serial: kill off uif_t

This typedef is now extinct

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/serial_core.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 284ef7f54890..f98dc78abb27 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -336,19 +336,8 @@ struct uart_port {
 	void			*private_data;		/* generic platform data pointer */
 };
 
-/*
- * This is the state information which is only valid when the port
- * is open; it may be cleared the core driver once the device has
- * been closed.  Either the low level driver or the core can modify
- * stuff here.
- */
-typedef unsigned int __bitwise__ uif_t;
-
-
 /*
  * This is the state information which is persistent across opens.
- * The low level driver must not to touch any elements contained
- * within.
  */
 struct uart_state {
 	struct tty_port		port;
-- 
cgit v1.2.3


From a2bceae065ed8c4f552b35c4dde4cc2db05ce9e3 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:31 -0700
Subject: serial: replace the state mutex with the tty port mutex

They cover essentially the same stuff and we can therefore fold it into the
tty_port one.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/pmac_zilog.c  |   8 +-
 drivers/serial/serial_core.c | 227 +++++++++++++++++++++++--------------------
 include/linux/serial_core.h  |   2 -
 3 files changed, 123 insertions(+), 114 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c
index ab4c85ba3549..0dc786835dca 100644
--- a/drivers/serial/pmac_zilog.c
+++ b/drivers/serial/pmac_zilog.c
@@ -1645,7 +1645,7 @@ static int pmz_suspend(struct macio_dev *mdev, pm_message_t pm_state)
 	state = pmz_uart_reg.state + uap->port.line;
 
 	mutex_lock(&pmz_irq_mutex);
-	mutex_lock(&state->mutex);
+	mutex_lock(&state->port.mutex);
 
 	spin_lock_irqsave(&uap->port.lock, flags);
 
@@ -1676,7 +1676,7 @@ static int pmz_suspend(struct macio_dev *mdev, pm_message_t pm_state)
 	/* Shut the chip down */
 	pmz_set_scc_power(uap, 0);
 
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&state->port.mutex);
 	mutex_unlock(&pmz_irq_mutex);
 
 	pmz_debug("suspend, switching complete\n");
@@ -1705,7 +1705,7 @@ static int pmz_resume(struct macio_dev *mdev)
 	state = pmz_uart_reg.state + uap->port.line;
 
 	mutex_lock(&pmz_irq_mutex);
-	mutex_lock(&state->mutex);
+	mutex_lock(&state->port.mutex);
 
 	spin_lock_irqsave(&uap->port.lock, flags);
 	if (!ZS_IS_OPEN(uap) && !ZS_IS_CONS(uap)) {
@@ -1737,7 +1737,7 @@ static int pmz_resume(struct macio_dev *mdev)
 	}
 
  bail:
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&state->port.mutex);
 	mutex_unlock(&pmz_irq_mutex);
 
 	/* Right now, we deal with delay by blocking here, I'll be
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 744dec9301fb..9d42e57e1971 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -633,35 +633,36 @@ static void uart_unthrottle(struct tty_struct *tty)
 static int uart_get_info(struct uart_state *state,
 			 struct serial_struct __user *retinfo)
 {
-	struct uart_port *port = state->uart_port;
+	struct uart_port *uport = state->uart_port;
+	struct tty_port *port = &state->port;
 	struct serial_struct tmp;
 
 	memset(&tmp, 0, sizeof(tmp));
 
 	/* Ensure the state we copy is consistent and no hardware changes
 	   occur as we go */
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
-	tmp.type	    = port->type;
-	tmp.line	    = port->line;
-	tmp.port	    = port->iobase;
+	tmp.type	    = uport->type;
+	tmp.line	    = uport->line;
+	tmp.port	    = uport->iobase;
 	if (HIGH_BITS_OFFSET)
-		tmp.port_high = (long) port->iobase >> HIGH_BITS_OFFSET;
-	tmp.irq		    = port->irq;
-	tmp.flags	    = port->flags;
-	tmp.xmit_fifo_size  = port->fifosize;
-	tmp.baud_base	    = port->uartclk / 16;
-	tmp.close_delay	    = state->port.close_delay / 10;
-	tmp.closing_wait    = state->port.closing_wait == USF_CLOSING_WAIT_NONE ?
+		tmp.port_high = (long) uport->iobase >> HIGH_BITS_OFFSET;
+	tmp.irq		    = uport->irq;
+	tmp.flags	    = uport->flags;
+	tmp.xmit_fifo_size  = uport->fifosize;
+	tmp.baud_base	    = uport->uartclk / 16;
+	tmp.close_delay	    = port->close_delay / 10;
+	tmp.closing_wait    = port->closing_wait == USF_CLOSING_WAIT_NONE ?
 				ASYNC_CLOSING_WAIT_NONE :
-				state->port.closing_wait / 10;
-	tmp.custom_divisor  = port->custom_divisor;
-	tmp.hub6	    = port->hub6;
-	tmp.io_type         = port->iotype;
-	tmp.iomem_reg_shift = port->regshift;
-	tmp.iomem_base      = (void *)(unsigned long)port->mapbase;
+				port->closing_wait / 10;
+	tmp.custom_divisor  = uport->custom_divisor;
+	tmp.hub6	    = uport->hub6;
+	tmp.io_type         = uport->iotype;
+	tmp.iomem_reg_shift = uport->regshift;
+	tmp.iomem_base      = (void *)(unsigned long)uport->mapbase;
 
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 
 	if (copy_to_user(retinfo, &tmp, sizeof(*retinfo)))
 		return -EFAULT;
@@ -699,7 +700,7 @@ static int uart_set_info(struct uart_state *state,
 	 * module insertion/removal doesn't change anything
 	 * under us.
 	 */
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
 	change_irq  = !(uport->flags & UPF_FIXED_PORT)
 		&& new_serial.irq != uport->irq;
@@ -867,7 +868,7 @@ static int uart_set_info(struct uart_state *state,
 	} else
 		retval = uart_startup(state, 1);
  exit:
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 	return retval;
 }
 
@@ -902,10 +903,11 @@ static int uart_get_lsr_info(struct uart_state *state,
 static int uart_tiocmget(struct tty_struct *tty, struct file *file)
 {
 	struct uart_state *state = tty->driver_data;
+	struct tty_port *port = &state->port;
 	struct uart_port *uport = state->uart_port;
 	int result = -EIO;
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 	if ((!file || !tty_hung_up_p(file)) &&
 	    !(tty->flags & (1 << TTY_IO_ERROR))) {
 		result = uport->mctrl;
@@ -914,7 +916,7 @@ static int uart_tiocmget(struct tty_struct *tty, struct file *file)
 		result |= uport->ops->get_mctrl(uport);
 		spin_unlock_irq(&uport->lock);
 	}
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 
 	return result;
 }
@@ -925,35 +927,38 @@ uart_tiocmset(struct tty_struct *tty, struct file *file,
 {
 	struct uart_state *state = tty->driver_data;
 	struct uart_port *uport = state->uart_port;
+	struct tty_port *port = &state->port;
 	int ret = -EIO;
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 	if ((!file || !tty_hung_up_p(file)) &&
 	    !(tty->flags & (1 << TTY_IO_ERROR))) {
 		uart_update_mctrl(uport, set, clear);
 		ret = 0;
 	}
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 	return ret;
 }
 
 static int uart_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct uart_state *state = tty->driver_data;
+	struct tty_port *port = &state->port;
 	struct uart_port *uport = state->uart_port;
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
 	if (uport->type != PORT_UNKNOWN)
 		uport->ops->break_ctl(uport, break_state);
 
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 	return 0;
 }
 
 static int uart_do_autoconfig(struct uart_state *state)
 {
 	struct uart_port *uport = state->uart_port;
+	struct tty_port *port = &state->port;
 	int flags, ret;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -964,7 +969,7 @@ static int uart_do_autoconfig(struct uart_state *state)
 	 * changing, and hence any extra opens of the port while
 	 * we're auto-configuring.
 	 */
-	if (mutex_lock_interruptible(&state->mutex))
+	if (mutex_lock_interruptible(&port->mutex))
 		return -ERESTARTSYS;
 
 	ret = -EBUSY;
@@ -990,7 +995,7 @@ static int uart_do_autoconfig(struct uart_state *state)
 
 		ret = uart_startup(state, 1);
 	}
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 	return ret;
 }
 
@@ -1093,6 +1098,7 @@ uart_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd,
 	   unsigned long arg)
 {
 	struct uart_state *state = tty->driver_data;
+	struct tty_port *port = &state->port;
 	void __user *uarg = (void __user *)arg;
 	int ret = -ENOIOCTLCMD;
 
@@ -1143,7 +1149,7 @@ uart_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd,
 	if (ret != -ENOIOCTLCMD)
 		goto out;
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
 	if (tty_hung_up_p(filp)) {
 		ret = -EIO;
@@ -1167,7 +1173,7 @@ uart_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd,
 	}
 	}
 out_up:
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 out:
 	return ret;
 }
@@ -1266,7 +1272,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 
 	pr_debug("uart_close(%d) called\n", uport->line);
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
 	if (tty_hung_up_p(filp))
 		goto done;
@@ -1340,7 +1346,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	wake_up_interruptible(&port->open_wait);
 
 done:
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 }
 
 static void uart_wait_until_sent(struct tty_struct *tty, int timeout)
@@ -1416,7 +1422,7 @@ static void uart_hangup(struct tty_struct *tty)
 	BUG_ON(!kernel_locked());
 	pr_debug("uart_hangup(%d)\n", state->uart_port->line);
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 	if (port->flags & ASYNC_NORMAL_ACTIVE) {
 		uart_flush_buffer(tty);
 		uart_shutdown(state);
@@ -1426,7 +1432,7 @@ static void uart_hangup(struct tty_struct *tty)
 		wake_up_interruptible(&port->open_wait);
 		wake_up_interruptible(&state->delta_msr_wait);
 	}
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 }
 
 /*
@@ -1528,9 +1534,9 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 		if (mctrl & TIOCM_CAR)
 			break;
 
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 		schedule();
-		mutex_lock(&state->mutex);
+		mutex_lock(&port->mutex);
 
 		if (signal_pending(current))
 			break;
@@ -1553,15 +1559,17 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 static struct uart_state *uart_get(struct uart_driver *drv, int line)
 {
 	struct uart_state *state;
+	struct tty_port *port;
 	int ret = 0;
 
 	state = drv->state + line;
-	if (mutex_lock_interruptible(&state->mutex)) {
+	port = &state->port;
+	if (mutex_lock_interruptible(&port->mutex)) {
 		ret = -ERESTARTSYS;
 		goto err;
 	}
 
-	state->port.count++;
+	port->count++;
 	if (!state->uart_port || state->uart_port->flags & UPF_DEAD) {
 		ret = -ENXIO;
 		goto err_unlock;
@@ -1569,8 +1577,8 @@ static struct uart_state *uart_get(struct uart_driver *drv, int line)
 	return state;
 
  err_unlock:
-	state->port.count--;
-	mutex_unlock(&state->mutex);
+	port->count--;
+	mutex_unlock(&port->mutex);
  err:
 	return ERR_PTR(ret);
 }
@@ -1636,7 +1644,7 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	if (tty_hung_up_p(filp)) {
 		retval = -EAGAIN;
 		port->count--;
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 		goto fail;
 	}
 
@@ -1656,7 +1664,7 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	 */
 	if (retval == 0)
 		retval = uart_block_til_ready(filp, state);
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 
 	/*
 	 * If this is the first open to succeed, adjust things to suit.
@@ -1667,7 +1675,7 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 		uart_update_termios(state);
 	}
 
- fail:
+fail:
 	return retval;
 }
 
@@ -1689,57 +1697,58 @@ static const char *uart_type(struct uart_port *port)
 static void uart_line_info(struct seq_file *m, struct uart_driver *drv, int i)
 {
 	struct uart_state *state = drv->state + i;
+	struct tty_port *port = &state->port;
 	int pm_state;
-	struct uart_port *port = state->uart_port;
+	struct uart_port *uport = state->uart_port;
 	char stat_buf[32];
 	unsigned int status;
 	int mmio;
 
-	if (!port)
+	if (!uport)
 		return;
 
-	mmio = port->iotype >= UPIO_MEM;
+	mmio = uport->iotype >= UPIO_MEM;
 	seq_printf(m, "%d: uart:%s %s%08llX irq:%d",
-			port->line, uart_type(port),
+			uport->line, uart_type(uport),
 			mmio ? "mmio:0x" : "port:",
-			mmio ? (unsigned long long)port->mapbase
-			     : (unsigned long long) port->iobase,
-			port->irq);
+			mmio ? (unsigned long long)uport->mapbase
+			     : (unsigned long long)uport->iobase,
+			uport->irq);
 
-	if (port->type == PORT_UNKNOWN) {
+	if (uport->type == PORT_UNKNOWN) {
 		seq_putc(m, '\n');
 		return;
 	}
 
 	if (capable(CAP_SYS_ADMIN)) {
-		mutex_lock(&state->mutex);
+		mutex_lock(&port->mutex);
 		pm_state = state->pm_state;
 		if (pm_state)
 			uart_change_pm(state, 0);
-		spin_lock_irq(&port->lock);
-		status = port->ops->get_mctrl(port);
-		spin_unlock_irq(&port->lock);
+		spin_lock_irq(&uport->lock);
+		status = uport->ops->get_mctrl(uport);
+		spin_unlock_irq(&uport->lock);
 		if (pm_state)
 			uart_change_pm(state, pm_state);
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 
 		seq_printf(m, " tx:%d rx:%d",
-				port->icount.tx, port->icount.rx);
-		if (port->icount.frame)
+				uport->icount.tx, uport->icount.rx);
+		if (uport->icount.frame)
 			seq_printf(m, " fe:%d",
-				port->icount.frame);
-		if (port->icount.parity)
+				uport->icount.frame);
+		if (uport->icount.parity)
 			seq_printf(m, " pe:%d",
-				port->icount.parity);
-		if (port->icount.brk)
+				uport->icount.parity);
+		if (uport->icount.brk)
 			seq_printf(m, " brk:%d",
-				port->icount.brk);
-		if (port->icount.overrun)
+				uport->icount.brk);
+		if (uport->icount.overrun)
 			seq_printf(m, " oe:%d",
-				port->icount.overrun);
+				uport->icount.overrun);
 
 #define INFOBIT(bit, str) \
-	if (port->mctrl & (bit)) \
+	if (uport->mctrl & (bit)) \
 		strncat(stat_buf, (str), sizeof(stat_buf) - \
 			strlen(stat_buf) - 2)
 #define STATBIT(bit, str) \
@@ -1991,11 +2000,11 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
 	struct device *tty_dev;
 	struct uart_match match = {uport, drv};
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
 	if (!console_suspend_enabled && uart_console(uport)) {
 		/* we're going to avoid suspending serial console */
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 		return 0;
 	}
 
@@ -2003,7 +2012,7 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
 	if (device_may_wakeup(tty_dev)) {
 		enable_irq_wake(uport->irq);
 		put_device(tty_dev);
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 		return 0;
 	}
 	uport->suspended = 1;
@@ -2045,7 +2054,7 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
 
 	uart_change_pm(state, 3);
 
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 
 	return 0;
 }
@@ -2057,18 +2066,18 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
 	struct device *tty_dev;
 	struct uart_match match = {uport, drv};
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
 	if (!console_suspend_enabled && uart_console(uport)) {
 		/* no need to resume serial console, it wasn't suspended */
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 		return 0;
 	}
 
 	tty_dev = device_find_child(uport->dev, &match, serial_match_port);
 	if (!uport->suspended && device_may_wakeup(tty_dev)) {
 		disable_irq_wake(uport->irq);
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 		return 0;
 	}
 	uport->suspended = 0;
@@ -2124,7 +2133,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
 		clear_bit(ASYNCB_SUSPENDED, &port->flags);
 	}
 
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 
 	return 0;
 }
@@ -2364,12 +2373,11 @@ int uart_register_driver(struct uart_driver *drv)
 	 */
 	for (i = 0; i < drv->nr; i++) {
 		struct uart_state *state = drv->state + i;
+		struct tty_port *port = &state->port;
 
-		mutex_init(&state->mutex);
-
-		tty_port_init(&state->port);
-		state->port.close_delay     = 500;	/* .5 seconds */
-		state->port.closing_wait    = 30000;	/* 30 seconds */
+		tty_port_init(port);
+		port->close_delay     = 500;	/* .5 seconds */
+		port->closing_wait    = 30000;	/* 30 seconds */
 		init_waitqueue_head(&state->delta_msr_wait);
 		tasklet_init(&state->tlet, uart_tasklet_action,
 			     (unsigned long)state);
@@ -2419,62 +2427,64 @@ struct tty_driver *uart_console_device(struct console *co, int *index)
  *	level uart drivers to expand uart_port, rather than having yet
  *	more levels of structures.
  */
-int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
+int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 {
 	struct uart_state *state;
+	struct tty_port *port;
 	int ret = 0;
 	struct device *tty_dev;
 
 	BUG_ON(in_interrupt());
 
-	if (port->line >= drv->nr)
+	if (uport->line >= drv->nr)
 		return -EINVAL;
 
-	state = drv->state + port->line;
+	state = drv->state + uport->line;
+	port = &state->port;
 
 	mutex_lock(&port_mutex);
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 	if (state->uart_port) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	state->uart_port = port;
+	state->uart_port = uport;
 	state->pm_state = -1;
 
-	port->cons = drv->cons;
-	port->state = state;
+	uport->cons = drv->cons;
+	uport->state = state;
 
 	/*
 	 * If this port is a console, then the spinlock is already
 	 * initialised.
 	 */
-	if (!(uart_console(port) && (port->cons->flags & CON_ENABLED))) {
-		spin_lock_init(&port->lock);
-		lockdep_set_class(&port->lock, &port_lock_key);
+	if (!(uart_console(uport) && (uport->cons->flags & CON_ENABLED))) {
+		spin_lock_init(&uport->lock);
+		lockdep_set_class(&uport->lock, &port_lock_key);
 	}
 
-	uart_configure_port(drv, state, port);
+	uart_configure_port(drv, state, uport);
 
 	/*
 	 * Register the port whether it's detected or not.  This allows
 	 * setserial to be used to alter this ports parameters.
 	 */
-	tty_dev = tty_register_device(drv->tty_driver, port->line, port->dev);
+	tty_dev = tty_register_device(drv->tty_driver, uport->line, uport->dev);
 	if (likely(!IS_ERR(tty_dev))) {
 		device_init_wakeup(tty_dev, 1);
 		device_set_wakeup_enable(tty_dev, 0);
 	} else
 		printk(KERN_ERR "Cannot register tty device on line %d\n",
-		       port->line);
+		       uport->line);
 
 	/*
 	 * Ensure UPF_DEAD is not set.
 	 */
-	port->flags &= ~UPF_DEAD;
+	uport->flags &= ~UPF_DEAD;
 
  out:
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 	mutex_unlock(&port_mutex);
 
 	return ret;
@@ -2489,15 +2499,16 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
  *	core driver.  No further calls will be made to the low-level code
  *	for this port.
  */
-int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
+int uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
 {
-	struct uart_state *state = drv->state + port->line;
+	struct uart_state *state = drv->state + uport->line;
+	struct tty_port *port = &state->port;
 
 	BUG_ON(in_interrupt());
 
-	if (state->uart_port != port)
+	if (state->uart_port != uport)
 		printk(KERN_ALERT "Removing wrong port: %p != %p\n",
-			state->uart_port, port);
+			state->uart_port, uport);
 
 	mutex_lock(&port_mutex);
 
@@ -2505,28 +2516,28 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
 	 * Mark the port "dead" - this prevents any opens from
 	 * succeeding while we shut down the port.
 	 */
-	mutex_lock(&state->mutex);
-	port->flags |= UPF_DEAD;
-	mutex_unlock(&state->mutex);
+	mutex_lock(&port->mutex);
+	uport->flags |= UPF_DEAD;
+	mutex_unlock(&port->mutex);
 
 	/*
 	 * Remove the devices from the tty layer
 	 */
-	tty_unregister_device(drv->tty_driver, port->line);
+	tty_unregister_device(drv->tty_driver, uport->line);
 
-	if (state->port.tty)
-		tty_vhangup(state->port.tty);
+	if (port->tty)
+		tty_vhangup(port->tty);
 
 	/*
 	 * Free the port IO and memory resources, if any.
 	 */
-	if (port->type != PORT_UNKNOWN)
-		port->ops->release_port(port);
+	if (uport->type != PORT_UNKNOWN)
+		uport->ops->release_port(uport);
 
 	/*
 	 * Indicate that there isn't a port here anymore.
 	 */
-	port->type = PORT_UNKNOWN;
+	uport->type = PORT_UNKNOWN;
 
 	/*
 	 * Kill the tasklet, and free resources.
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index f98dc78abb27..27767ea5fa29 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -351,8 +351,6 @@ struct uart_state {
 	struct tasklet_struct	tlet;
 	wait_queue_head_t	delta_msr_wait;
 	struct uart_port	*uart_port;
-
-	struct mutex		mutex;
 };
 
 #define UART_XMIT_SIZE	PAGE_SIZE
-- 
cgit v1.2.3


From bdc04e3174e18f475289fa8f4144f66686326b7e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:31 -0700
Subject: serial: move delta_msr_wait into the tty_port

This is used by various drivers not just serial and can be extracted
as commonality

Signed-off-by: Alan Cox <alan@linux.intel.com>
---
 drivers/char/cyclades.c         |  9 ++++-----
 drivers/char/esp.c              |  7 +++----
 drivers/char/mxser.c            |  8 +++-----
 drivers/char/tty_port.c         |  2 ++
 drivers/serial/8250.c           |  2 +-
 drivers/serial/amba-pl010.c     |  2 +-
 drivers/serial/amba-pl011.c     |  2 +-
 drivers/serial/atmel_serial.c   |  2 +-
 drivers/serial/icom.c           |  2 +-
 drivers/serial/imx.c            |  4 ++--
 drivers/serial/ioc3_serial.c    |  8 ++++----
 drivers/serial/ioc4_serial.c    |  8 ++++----
 drivers/serial/ip22zilog.c      |  2 +-
 drivers/serial/msm_serial.c     |  2 +-
 drivers/serial/pmac_zilog.c     |  2 +-
 drivers/serial/pnx8xxx_uart.c   |  2 +-
 drivers/serial/pxa.c            |  2 +-
 drivers/serial/sa1100.c         |  2 +-
 drivers/serial/sb1250-duart.c   |  2 +-
 drivers/serial/serial_core.c    | 18 +++++++++++-------
 drivers/serial/serial_ks8695.c  |  2 +-
 drivers/serial/serial_lh7a40x.c |  2 +-
 drivers/serial/sunsab.c         |  2 +-
 drivers/serial/sunsu.c          |  2 +-
 drivers/serial/sunzilog.c       |  2 +-
 drivers/serial/timbuart.c       |  2 +-
 drivers/serial/vr41xx_siu.c     |  2 +-
 drivers/serial/zs.c             |  2 +-
 include/linux/cyclades.h        |  1 -
 include/linux/hayesesp.h        |  1 -
 include/linux/serial_core.h     |  1 -
 include/linux/tty.h             |  1 +
 32 files changed, 54 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 70bd61b2a7d7..df5038bbcbc2 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -729,7 +729,7 @@ static void cyy_chip_modem(struct cyclades_card *cinfo, int chip,
 		if (mdm_change & CyRI)
 			info->icount.rng++;
 
-		wake_up_interruptible(&info->delta_msr_wait);
+		wake_up_interruptible(&info->port.delta_msr_wait);
 	}
 
 	if ((mdm_change & CyDCD) && (info->port.flags & ASYNC_CHECK_CD)) {
@@ -1197,7 +1197,7 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 			break;
 		}
 		if (delta_count)
-			wake_up_interruptible(&info->delta_msr_wait);
+			wake_up_interruptible(&info->port.delta_msr_wait);
 		if (special_count)
 			tty_schedule_flip(tty);
 		tty_kref_put(tty);
@@ -1464,7 +1464,7 @@ static void cy_shutdown(struct cyclades_port *info, struct tty_struct *tty)
 		spin_lock_irqsave(&card->card_lock, flags);
 
 		/* Clear delta_msr_wait queue to avoid mem leaks. */
-		wake_up_interruptible(&info->delta_msr_wait);
+		wake_up_interruptible(&info->port.delta_msr_wait);
 
 		if (info->port.xmit_buf) {
 			unsigned char *temp;
@@ -2788,7 +2788,7 @@ cy_ioctl(struct tty_struct *tty, struct file *file,
 		/* note the counters on entry */
 		cnow = info->icount;
 		spin_unlock_irqrestore(&info->card->card_lock, flags);
-		ret_val = wait_event_interruptible(info->delta_msr_wait,
+		ret_val = wait_event_interruptible(info->port.delta_msr_wait,
 				cy_cflags_changed(info, arg, &cnow));
 		break;
 
@@ -3153,7 +3153,6 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 		info->port.close_delay = 5 * HZ / 10;
 		info->port.flags = STD_COM_FLAGS;
 		init_completion(&info->shutdown_wait);
-		init_waitqueue_head(&info->delta_msr_wait);
 
 		if (cy_is_Z(cinfo)) {
 			struct FIRM_ID *firm_id = cinfo->base_addr + ID_ADDRESS;
diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index a5c59fc2b0ff..b19d43cd9542 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -572,7 +572,7 @@ static void check_modem_status(struct esp_struct *info)
 			info->icount.dcd++;
 		if (status & UART_MSR_DCTS)
 			info->icount.cts++;
-		wake_up_interruptible(&info->delta_msr_wait);
+		wake_up_interruptible(&info->port.delta_msr_wait);
 	}
 
 	if ((info->port.flags & ASYNC_CHECK_CD) && (status & UART_MSR_DDCD)) {
@@ -927,7 +927,7 @@ static void shutdown(struct esp_struct *info)
 	 * clear delta_msr_wait queue to avoid mem leaks: we may free the irq
 	 * here so the queue might never be waken up
 	 */
-	wake_up_interruptible(&info->delta_msr_wait);
+	wake_up_interruptible(&info->port.delta_msr_wait);
 	wake_up_interruptible(&info->break_wait);
 
 	/* stop a DMA transfer on the port being closed */
@@ -1800,7 +1800,7 @@ static int rs_ioctl(struct tty_struct *tty, struct file *file,
 		spin_unlock_irqrestore(&info->lock, flags);
 		while (1) {
 			/* FIXME: convert to new style wakeup */
-			interruptible_sleep_on(&info->delta_msr_wait);
+			interruptible_sleep_on(&info->port.delta_msr_wait);
 			/* see if a signal did it */
 			if (signal_pending(current))
 				return -ERESTARTSYS;
@@ -2452,7 +2452,6 @@ static int __init espserial_init(void)
 		info->config.flow_off = flow_off;
 		info->config.pio_threshold = pio_threshold;
 		info->next_port = ports;
-		init_waitqueue_head(&info->delta_msr_wait);
 		init_waitqueue_head(&info->break_wait);
 		ports = info;
 		printk(KERN_INFO "ttyP%d at 0x%04x (irq = %d) is an ESP ",
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 30544ca5e956..37058ff7da7d 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -258,7 +258,6 @@ struct mxser_port {
 	struct mxser_mon mon_data;
 
 	spinlock_t slock;
-	wait_queue_head_t delta_msr_wait;
 };
 
 struct mxser_board {
@@ -818,7 +817,7 @@ static void mxser_check_modem_status(struct tty_struct *tty,
 	if (status & UART_MSR_DCTS)
 		port->icount.cts++;
 	port->mon_data.modem_status = status;
-	wake_up_interruptible(&port->delta_msr_wait);
+	wake_up_interruptible(&port->port.delta_msr_wait);
 
 	if ((port->port.flags & ASYNC_CHECK_CD) && (status & UART_MSR_DDCD)) {
 		if (status & UART_MSR_DCD)
@@ -973,7 +972,7 @@ static void mxser_shutdown(struct tty_struct *tty)
 	 * clear delta_msr_wait queue to avoid mem leaks: we may free the irq
 	 * here so the queue might never be waken up
 	 */
-	wake_up_interruptible(&info->delta_msr_wait);
+	wake_up_interruptible(&info->port.delta_msr_wait);
 
 	/*
 	 * Free the IRQ, if necessary
@@ -1762,7 +1761,7 @@ static int mxser_ioctl(struct tty_struct *tty, struct file *file,
 		cnow = info->icount;	/* note the counters on entry */
 		spin_unlock_irqrestore(&info->slock, flags);
 
-		return wait_event_interruptible(info->delta_msr_wait,
+		return wait_event_interruptible(info->port.delta_msr_wait,
 				mxser_cflags_changed(info, arg, &cnow));
 	/*
 	 * Get counter of input serial line interrupts (DCD,RI,DSR,CTS)
@@ -2414,7 +2413,6 @@ static int __devinit mxser_initbrd(struct mxser_board *brd,
 		info->port.close_delay = 5 * HZ / 10;
 		info->port.closing_wait = 30 * HZ;
 		info->normal_termios = mxvar_sdriver->init_termios;
-		init_waitqueue_head(&info->delta_msr_wait);
 		memset(&info->mon_data, 0, sizeof(struct mxser_mon));
 		info->err_shadow = 0;
 		spin_lock_init(&info->slock);
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 549bd0fa8bb6..c767e30a1425 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -23,6 +23,7 @@ void tty_port_init(struct tty_port *port)
 	memset(port, 0, sizeof(*port));
 	init_waitqueue_head(&port->open_wait);
 	init_waitqueue_head(&port->close_wait);
+	init_waitqueue_head(&port->delta_msr_wait);
 	mutex_init(&port->mutex);
 	spin_lock_init(&port->lock);
 	port->close_delay = (50 * HZ) / 100;
@@ -124,6 +125,7 @@ void tty_port_hangup(struct tty_port *port)
 	port->tty = NULL;
 	spin_unlock_irqrestore(&port->lock, flags);
 	wake_up_interruptible(&port->open_wait);
+	wake_up_interruptible(&port->delta_msr_wait);
 	tty_port_shutdown(port);
 }
 EXPORT_SYMBOL(tty_port_hangup);
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index e415c5eca599..2209620d2349 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1510,7 +1510,7 @@ static unsigned int check_modem_status(struct uart_8250_port *up)
 		if (status & UART_MSR_DCTS)
 			uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
 
-		wake_up_interruptible(&up->port.state->delta_msr_wait);
+		wake_up_interruptible(&up->port.state->port.delta_msr_wait);
 	}
 
 	return status;
diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c
index 39032413d4a1..429a8ae86933 100644
--- a/drivers/serial/amba-pl010.c
+++ b/drivers/serial/amba-pl010.c
@@ -225,7 +225,7 @@ static void pl010_modem_status(struct uart_amba_port *uap)
 	if (delta & UART01x_FR_CTS)
 		uart_handle_cts_change(&uap->port, status & UART01x_FR_CTS);
 
-	wake_up_interruptible(&uap->port.state->delta_msr_wait);
+	wake_up_interruptible(&uap->port.state->port.delta_msr_wait);
 }
 
 static irqreturn_t pl010_int(int irq, void *dev_id)
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index ef82a34baf0f..ef7adc8135dd 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -226,7 +226,7 @@ static void pl011_modem_status(struct uart_amba_port *uap)
 	if (delta & UART01x_FR_CTS)
 		uart_handle_cts_change(&uap->port, status & UART01x_FR_CTS);
 
-	wake_up_interruptible(&uap->port.state->delta_msr_wait);
+	wake_up_interruptible(&uap->port.state->port.delta_msr_wait);
 }
 
 static irqreturn_t pl011_int(int irq, void *dev_id)
diff --git a/drivers/serial/atmel_serial.c b/drivers/serial/atmel_serial.c
index 963e3c12af41..3551c5cb7094 100644
--- a/drivers/serial/atmel_serial.c
+++ b/drivers/serial/atmel_serial.c
@@ -776,7 +776,7 @@ static void atmel_tasklet_func(unsigned long data)
 		if (status_change & ATMEL_US_CTS)
 			uart_handle_cts_change(port, !(status & ATMEL_US_CTS));
 
-		wake_up_interruptible(&port->state->delta_msr_wait);
+		wake_up_interruptible(&port->state->port.delta_msr_wait);
 
 		atmel_port->irq_status_prev = status;
 	}
diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c
index f86c47e08a06..2d7feecaf492 100644
--- a/drivers/serial/icom.c
+++ b/drivers/serial/icom.c
@@ -695,7 +695,7 @@ static inline void check_modem_status(struct icom_port *icom_port)
 					       delta_status & ICOM_CTS);
 
 		wake_up_interruptible(&icom_port->uart_port.state->
-				      delta_msr_wait);
+				      port.delta_msr_wait);
 		old_status = status;
 	}
 	spin_unlock(&icom_port->uart_port.lock);
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 1febeafcb97a..18130f11238e 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -224,7 +224,7 @@ static void imx_mctrl_check(struct imx_port *sport)
 	if (changed & TIOCM_CTS)
 		uart_handle_cts_change(&sport->port, status & TIOCM_CTS);
 
-	wake_up_interruptible(&sport->port.state->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->port.delta_msr_wait);
 }
 
 /*
@@ -388,7 +388,7 @@ static irqreturn_t imx_rtsint(int irq, void *dev_id)
 
 	writel(USR1_RTSD, sport->port.membase + USR1);
 	uart_handle_cts_change(&sport->port, !!val);
-	wake_up_interruptible(&sport->port.state->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->port.delta_msr_wait);
 
 	spin_unlock_irqrestore(&sport->port.lock, flags);
 	return IRQ_HANDLED;
diff --git a/drivers/serial/ioc3_serial.c b/drivers/serial/ioc3_serial.c
index de4ab1bfee8d..d8983dd5c4b2 100644
--- a/drivers/serial/ioc3_serial.c
+++ b/drivers/serial/ioc3_serial.c
@@ -1287,7 +1287,7 @@ static inline int do_read(struct uart_port *the_port, char *buf, int len)
 							(port->ip_port, 0);
 						wake_up_interruptible
 						    (&the_port->state->
-						     delta_msr_wait);
+						     port.delta_msr_wait);
 					}
 
 					/* If we had any data to return, we
@@ -1491,7 +1491,7 @@ ioc3uart_intr_one(struct ioc3_submodule *is,
 				uart_handle_dcd_change(the_port,
 						shadow & SHADOW_DCD);
 				wake_up_interruptible
-				    (&the_port->state->delta_msr_wait);
+				    (&the_port->state->port.delta_msr_wait);
 			} else if ((port->ip_notify & N_DDCD)
 				   && !(shadow & SHADOW_DCD)) {
 				/* Flag delta DCD/no DCD */
@@ -1511,7 +1511,7 @@ ioc3uart_intr_one(struct ioc3_submodule *is,
 				uart_handle_cts_change(the_port, shadow
 						& SHADOW_CTS);
 				wake_up_interruptible
-				    (&the_port->state->delta_msr_wait);
+				    (&the_port->state->port.delta_msr_wait);
 			}
 		}
 
@@ -1728,7 +1728,7 @@ static void ic3_shutdown(struct uart_port *the_port)
 		return;
 
 	state = the_port->state;
-	wake_up_interruptible(&state->delta_msr_wait);
+	wake_up_interruptible(&state->port.delta_msr_wait);
 
 	spin_lock_irqsave(&the_port->lock, port_flags);
 	set_notification(port, N_ALL, 0);
diff --git a/drivers/serial/ioc4_serial.c b/drivers/serial/ioc4_serial.c
index 2055d323f15f..2e02c3026d24 100644
--- a/drivers/serial/ioc4_serial.c
+++ b/drivers/serial/ioc4_serial.c
@@ -1882,7 +1882,7 @@ static void handle_intr(void *arg, uint32_t sio_ir)
 				the_port = port->ip_port;
 				the_port->icount.dcd = 1;
 				wake_up_interruptible
-					    (&the_port->state->delta_msr_wait);
+					    (&the_port->state->port.delta_msr_wait);
 			} else if ((port->ip_notify & N_DDCD)
 					&& !(shadow & IOC4_SHADOW_DCD)) {
 				/* Flag delta DCD/no DCD */
@@ -1904,7 +1904,7 @@ static void handle_intr(void *arg, uint32_t sio_ir)
 				the_port->icount.cts =
 					(shadow & IOC4_SHADOW_CTS) ? 1 : 0;
 				wake_up_interruptible
-					(&the_port->state->delta_msr_wait);
+					(&the_port->state->port.delta_msr_wait);
 			}
 		}
 
@@ -2237,7 +2237,7 @@ static inline int do_read(struct uart_port *the_port, unsigned char *buf,
 						the_port->icount.dcd = 0;
 						wake_up_interruptible
 						    (&the_port->state->
-							delta_msr_wait);
+							port.delta_msr_wait);
 					}
 
 					/* If we had any data to return, we
@@ -2439,7 +2439,7 @@ static void ic4_shutdown(struct uart_port *the_port)
 	state = the_port->state;
 	port->ip_port = NULL;
 
-	wake_up_interruptible(&state->delta_msr_wait);
+	wake_up_interruptible(&state->port.delta_msr_wait);
 
 	if (state->port.tty)
 		set_bit(TTY_IO_ERROR, &state->port.tty->flags);
diff --git a/drivers/serial/ip22zilog.c b/drivers/serial/ip22zilog.c
index 2e847deb41dc..ebff4a1d4bcc 100644
--- a/drivers/serial/ip22zilog.c
+++ b/drivers/serial/ip22zilog.c
@@ -354,7 +354,7 @@ static void ip22zilog_status_handle(struct uart_ip22zilog_port *up,
 			uart_handle_cts_change(&up->port,
 					       (status & CTS));
 
-		wake_up_interruptible(&up->port.state->delta_msr_wait);
+		wake_up_interruptible(&up->port.state->port.delta_msr_wait);
 	}
 
 	up->prev_status = status;
diff --git a/drivers/serial/msm_serial.c b/drivers/serial/msm_serial.c
index ff18d50c99c1..b05c5aa02cb4 100644
--- a/drivers/serial/msm_serial.c
+++ b/drivers/serial/msm_serial.c
@@ -169,7 +169,7 @@ static void handle_delta_cts(struct uart_port *port)
 {
 	msm_write(port, UART_CR_CMD_RESET_CTS, UART_CR);
 	port->icount.cts++;
-	wake_up_interruptible(&port->state->delta_msr_wait);
+	wake_up_interruptible(&port->state->port.delta_msr_wait);
 }
 
 static irqreturn_t msm_irq(int irq, void *dev_id)
diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c
index 0dc786835dca..0700cd10b97c 100644
--- a/drivers/serial/pmac_zilog.c
+++ b/drivers/serial/pmac_zilog.c
@@ -369,7 +369,7 @@ static void pmz_status_handle(struct uart_pmac_port *uap)
 			uart_handle_cts_change(&uap->port,
 					       !(status & CTS));
 
-		wake_up_interruptible(&uap->port.state->delta_msr_wait);
+		wake_up_interruptible(&uap->port.state->port.delta_msr_wait);
 	}
 
 	if (status & BRK_ABRT)
diff --git a/drivers/serial/pnx8xxx_uart.c b/drivers/serial/pnx8xxx_uart.c
index 2da747635275..0aa75a97531c 100644
--- a/drivers/serial/pnx8xxx_uart.c
+++ b/drivers/serial/pnx8xxx_uart.c
@@ -100,7 +100,7 @@ static void pnx8xxx_mctrl_check(struct pnx8xxx_port *sport)
 	if (changed & TIOCM_CTS)
 		uart_handle_cts_change(&sport->port, status & TIOCM_CTS);
 
-	wake_up_interruptible(&sport->port.state->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->port.delta_msr_wait);
 }
 
 /*
diff --git a/drivers/serial/pxa.c b/drivers/serial/pxa.c
index ad48919c0415..6443b7ff274a 100644
--- a/drivers/serial/pxa.c
+++ b/drivers/serial/pxa.c
@@ -220,7 +220,7 @@ static inline void check_modem_status(struct uart_pxa_port *up)
 	if (status & UART_MSR_DCTS)
 		uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
 
-	wake_up_interruptible(&up->port.state->delta_msr_wait);
+	wake_up_interruptible(&up->port.state->port.delta_msr_wait);
 }
 
 /*
diff --git a/drivers/serial/sa1100.c b/drivers/serial/sa1100.c
index 61ef3ae24927..7f5e26873220 100644
--- a/drivers/serial/sa1100.c
+++ b/drivers/serial/sa1100.c
@@ -117,7 +117,7 @@ static void sa1100_mctrl_check(struct sa1100_port *sport)
 	if (changed & TIOCM_CTS)
 		uart_handle_cts_change(&sport->port, status & TIOCM_CTS);
 
-	wake_up_interruptible(&sport->port.state->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->port.delta_msr_wait);
 }
 
 /*
diff --git a/drivers/serial/sb1250-duart.c b/drivers/serial/sb1250-duart.c
index fa5f303b36d3..a2f2b3254499 100644
--- a/drivers/serial/sb1250-duart.c
+++ b/drivers/serial/sb1250-duart.c
@@ -440,7 +440,7 @@ static void sbd_status_handle(struct sbd_port *sport)
 
 	if (delta & ((M_DUART_IN_PIN2_VAL | M_DUART_IN_PIN0_VAL) <<
 		     S_DUART_IN_PIN_CHNG))
-		wake_up_interruptible(&uport->state->delta_msr_wait);
+		wake_up_interruptible(&uport->state->port.delta_msr_wait);
 }
 
 static irqreturn_t sbd_interrupt(int irq, void *dev_id)
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 9d42e57e1971..e16d15343dfd 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -215,7 +215,8 @@ static int uart_startup(struct uart_state *state, int init_hw)
 static void uart_shutdown(struct uart_state *state)
 {
 	struct uart_port *uport = state->uart_port;
-	struct tty_struct *tty = state->port.tty;
+	struct tty_port *port = &state->port;
+	struct tty_struct *tty = port->tty;
 
 	/*
 	 * Set the TTY IO error marker
@@ -223,7 +224,7 @@ static void uart_shutdown(struct uart_state *state)
 	if (tty)
 		set_bit(TTY_IO_ERROR, &tty->flags);
 
-	if (test_and_clear_bit(ASYNCB_INITIALIZED, &state->port.flags)) {
+	if (test_and_clear_bit(ASYNCB_INITIALIZED, &port->flags)) {
 		/*
 		 * Turn off DTR and RTS early.
 		 */
@@ -237,7 +238,7 @@ static void uart_shutdown(struct uart_state *state)
 		 * any outstanding file descriptors should be pointing at
 		 * hung_up_tty_fops now.
 		 */
-		wake_up_interruptible(&state->delta_msr_wait);
+		wake_up_interruptible(&port->delta_msr_wait);
 
 		/*
 		 * Free the IRQ and disable the port.
@@ -1004,11 +1005,15 @@ static int uart_do_autoconfig(struct uart_state *state)
  * - mask passed in arg for lines of interest
  *   (use |'ed TIOCM_RNG/DSR/CD/CTS for masking)
  * Caller should use TIOCGICOUNT to see which one it was
+ *
+ * FIXME: This wants extracting into a common all driver implementation
+ * of TIOCMWAIT using tty_port.
  */
 static int
 uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 {
 	struct uart_port *uport = state->uart_port;
+	struct tty_port *port = &state->port;
 	DECLARE_WAITQUEUE(wait, current);
 	struct uart_icount cprev, cnow;
 	int ret;
@@ -1025,7 +1030,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 	uport->ops->enable_ms(uport);
 	spin_unlock_irq(&uport->lock);
 
-	add_wait_queue(&state->delta_msr_wait, &wait);
+	add_wait_queue(&port->delta_msr_wait, &wait);
 	for (;;) {
 		spin_lock_irq(&uport->lock);
 		memcpy(&cnow, &uport->icount, sizeof(struct uart_icount));
@@ -1053,7 +1058,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 	}
 
 	current->state = TASK_RUNNING;
-	remove_wait_queue(&state->delta_msr_wait, &wait);
+	remove_wait_queue(&port->delta_msr_wait, &wait);
 
 	return ret;
 }
@@ -1430,7 +1435,7 @@ static void uart_hangup(struct tty_struct *tty)
 		clear_bit(ASYNCB_NORMAL_ACTIVE, &port->flags);
 		port->tty = NULL;
 		wake_up_interruptible(&port->open_wait);
-		wake_up_interruptible(&state->delta_msr_wait);
+		wake_up_interruptible(&port->delta_msr_wait);
 	}
 	mutex_unlock(&port->mutex);
 }
@@ -2378,7 +2383,6 @@ int uart_register_driver(struct uart_driver *drv)
 		tty_port_init(port);
 		port->close_delay     = 500;	/* .5 seconds */
 		port->closing_wait    = 30000;	/* 30 seconds */
-		init_waitqueue_head(&state->delta_msr_wait);
 		tasklet_init(&state->tlet, uart_tasklet_action,
 			     (unsigned long)state);
 	}
diff --git a/drivers/serial/serial_ks8695.c b/drivers/serial/serial_ks8695.c
index 4560b2e70685..2e71bbc04dac 100644
--- a/drivers/serial/serial_ks8695.c
+++ b/drivers/serial/serial_ks8695.c
@@ -266,7 +266,7 @@ static irqreturn_t ks8695uart_modem_status(int irq, void *dev_id)
 	if (status & URMS_URTERI)
 		port->icount.rng++;
 
-	wake_up_interruptible(&port->state->delta_msr_wait);
+	wake_up_interruptible(&port->state->port.delta_msr_wait);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/serial/serial_lh7a40x.c b/drivers/serial/serial_lh7a40x.c
index 057fc5e8cc8d..ea744707c4d6 100644
--- a/drivers/serial/serial_lh7a40x.c
+++ b/drivers/serial/serial_lh7a40x.c
@@ -241,7 +241,7 @@ static void lh7a40xuart_modem_status (struct uart_port* port)
 	if (delta & CTS)
 		uart_handle_cts_change (port, status & CTS);
 
-	wake_up_interruptible (&port->state->delta_msr_wait);
+	wake_up_interruptible (&port->state->port.delta_msr_wait);
 }
 
 static irqreturn_t lh7a40xuart_int (int irq, void* dev_id)
diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c
index 7c4f2fe8e246..d1ad34128635 100644
--- a/drivers/serial/sunsab.c
+++ b/drivers/serial/sunsab.c
@@ -297,7 +297,7 @@ static void check_status(struct uart_sunsab_port *up,
 		up->port.icount.dsr++;
 	}
 
-	wake_up_interruptible(&up->port.state->delta_msr_wait);
+	wake_up_interruptible(&up->port.state->port.delta_msr_wait);
 }
 
 static irqreturn_t sunsab_interrupt(int irq, void *dev_id)
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index 5a32365b58ad..68d262b15749 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -441,7 +441,7 @@ static void check_modem_status(struct uart_sunsu_port *up)
 	if (status & UART_MSR_DCTS)
 		uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
 
-	wake_up_interruptible(&up->port.state->delta_msr_wait);
+	wake_up_interruptible(&up->port.state->port.delta_msr_wait);
 }
 
 static irqreturn_t sunsu_serial_interrupt(int irq, void *dev_id)
diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c
index 055034d12b1c..ef693ae22e7f 100644
--- a/drivers/serial/sunzilog.c
+++ b/drivers/serial/sunzilog.c
@@ -451,7 +451,7 @@ static void sunzilog_status_handle(struct uart_sunzilog_port *up,
 			uart_handle_cts_change(&up->port,
 					       (status & CTS));
 
-		wake_up_interruptible(&up->port.state->delta_msr_wait);
+		wake_up_interruptible(&up->port.state->port.delta_msr_wait);
 	}
 
 	up->prev_status = status;
diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c
index 3d40be6f389f..34b31da01d09 100644
--- a/drivers/serial/timbuart.c
+++ b/drivers/serial/timbuart.c
@@ -231,7 +231,7 @@ static void timbuart_mctrl_check(struct uart_port *port, u32 isr, u32 *ier)
 		iowrite32(CTS_DELTA, port->membase + TIMBUART_ISR);
 		cts = timbuart_get_mctrl(port);
 		uart_handle_cts_change(port, cts & TIOCM_CTS);
-		wake_up_interruptible(&port->state->delta_msr_wait);
+		wake_up_interruptible(&port->state->port.delta_msr_wait);
 	}
 
 	*ier |= CTS_DELTA;
diff --git a/drivers/serial/vr41xx_siu.c b/drivers/serial/vr41xx_siu.c
index cf4410e6d53b..3beb6ab4fa68 100644
--- a/drivers/serial/vr41xx_siu.c
+++ b/drivers/serial/vr41xx_siu.c
@@ -386,7 +386,7 @@ static inline void check_modem_status(struct uart_port *port)
 	if (msr & UART_MSR_DCTS)
 		uart_handle_cts_change(port, msr & UART_MSR_CTS);
 
-	wake_up_interruptible(&port->state->delta_msr_wait);
+	wake_up_interruptible(&port->state->port.delta_msr_wait);
 }
 
 static inline void transmit_chars(struct uart_port *port)
diff --git a/drivers/serial/zs.c b/drivers/serial/zs.c
index b9c9fb9198d6..1a7fd3e70315 100644
--- a/drivers/serial/zs.c
+++ b/drivers/serial/zs.c
@@ -686,7 +686,7 @@ static void zs_status_handle(struct zs_port *zport, struct zs_port *zport_a)
 			uport->icount.rng++;
 
 		if (delta)
-			wake_up_interruptible(&uport->state->delta_msr_wait);
+			wake_up_interruptible(&uport->state->port.delta_msr_wait);
 
 		spin_lock(&scc->zlock);
 	}
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index bbebef7713b3..a5049eaf782d 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -578,7 +578,6 @@ struct cyclades_port {
 	struct cyclades_idle_stats	idle_stats;
 	struct cyclades_icount	icount;
 	struct completion       shutdown_wait;
-	wait_queue_head_t       delta_msr_wait;
 	int throttle;
 };
 
diff --git a/include/linux/hayesesp.h b/include/linux/hayesesp.h
index 940aeb51d53f..92b08cfe4a75 100644
--- a/include/linux/hayesesp.h
+++ b/include/linux/hayesesp.h
@@ -96,7 +96,6 @@ struct esp_struct {
 	int			xmit_head;
 	int			xmit_tail;
 	int			xmit_cnt;
-	wait_queue_head_t	delta_msr_wait;
 	wait_queue_head_t	break_wait;
 	struct async_icount	icount;	/* kernel counters for the 4 input interrupts */
 	struct hayes_esp_config config; /* port configuration */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 27767ea5fa29..bcafecd3b7c1 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -349,7 +349,6 @@ struct uart_state {
 	struct circ_buf		xmit;
 
 	struct tasklet_struct	tlet;
-	wait_queue_head_t	delta_msr_wait;
 	struct uart_port	*uart_port;
 };
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 9fdc3d84baad..0daa8a72b176 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -203,6 +203,7 @@ struct tty_port {
 	int			count;		/* Usage count */
 	wait_queue_head_t	open_wait;	/* Open waiters */
 	wait_queue_head_t	close_wait;	/* Close waiters */
+	wait_queue_head_t	delta_msr_wait;	/* Modem status change */
 	unsigned long		flags;		/* TTY flags ASY_*/
 	struct mutex		mutex;		/* Locking */
 	unsigned char		*xmit_buf;	/* Optional buffer */
-- 
cgit v1.2.3


From b58d13a0216d4e0753668214f23e1d2c24c30f8c Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:32 -0700
Subject: serial: move port users helper

This little helper is now tty_port specific and useful generally so move it

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/serial_core.c | 6 ++----
 include/linux/tty.h          | 5 +++++
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index e16d15343dfd..3c45a8d7eb7f 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -52,8 +52,6 @@ static struct lock_class_key port_lock_key;
 
 #define HIGH_BITS_OFFSET	((sizeof(long)-sizeof(int))*8)
 
-#define uart_users(state)	((state)->port.count + (state)->port.blocked_open)
-
 #ifdef CONFIG_SERIAL_CORE_CONSOLE
 #define uart_console(port)	((port)->cons && (port)->cons->index == (port)->line)
 #else
@@ -758,7 +756,7 @@ static int uart_set_info(struct uart_state *state,
 		/*
 		 * Make sure that we are the sole user of this port.
 		 */
-		if (uart_users(state) > 1)
+		if (tty_port_users(port) > 1)
 			goto exit;
 
 		/*
@@ -974,7 +972,7 @@ static int uart_do_autoconfig(struct uart_state *state)
 		return -ERESTARTSYS;
 
 	ret = -EBUSY;
-	if (uart_users(state) == 1) {
+	if (tty_port_users(port) == 1) {
 		uart_shutdown(state);
 
 		/*
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 0daa8a72b176..a0e03309a379 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -467,6 +467,11 @@ extern int tty_port_close_start(struct tty_port *port,
 extern void tty_port_close_end(struct tty_port *port, struct tty_struct *tty);
 extern void tty_port_close(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
+extern inline int tty_port_users(struct tty_port *port)
+{
+	return port->count + port->blocked_open;
+}
+
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
 extern int tty_set_ldisc(struct tty_struct *tty, int ldisc);
-- 
cgit v1.2.3


From 016af53a6de6837e5be3da68901083ea85ebb4da Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:32 -0700
Subject: serial: kill USF_CLOSING_* definitions

The serial layer for some reason uses different defines for the special
case close delays and then conditionally switches to/from the normal ones
in the ioctls.

Remove this rather pointless abstraction

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/serial_core.c | 6 +++---
 include/linux/serial_core.h  | 3 ---
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 3c45a8d7eb7f..69c4e20530f5 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -652,7 +652,7 @@ static int uart_get_info(struct uart_state *state,
 	tmp.xmit_fifo_size  = uport->fifosize;
 	tmp.baud_base	    = uport->uartclk / 16;
 	tmp.close_delay	    = port->close_delay / 10;
-	tmp.closing_wait    = port->closing_wait == USF_CLOSING_WAIT_NONE ?
+	tmp.closing_wait    = port->closing_wait == ASYNC_CLOSING_WAIT_NONE ?
 				ASYNC_CLOSING_WAIT_NONE :
 				port->closing_wait / 10;
 	tmp.custom_divisor  = uport->custom_divisor;
@@ -690,7 +690,7 @@ static int uart_set_info(struct uart_state *state,
 	new_serial.irq = irq_canonicalize(new_serial.irq);
 	close_delay = new_serial.close_delay * 10;
 	closing_wait = new_serial.closing_wait == ASYNC_CLOSING_WAIT_NONE ?
-			USF_CLOSING_WAIT_NONE : new_serial.closing_wait * 10;
+			ASYNC_CLOSING_WAIT_NONE : new_serial.closing_wait * 10;
 
 	/*
 	 * This semaphore protects port->count.  It is also
@@ -1307,7 +1307,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	 */
 	tty->closing = 1;
 
-	if (port->closing_wait != USF_CLOSING_WAIT_NONE)
+	if (port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, msecs_to_jiffies(port->closing_wait));
 
 	/*
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index bcafecd3b7c1..d58e460844dd 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -342,9 +342,6 @@ struct uart_port {
 struct uart_state {
 	struct tty_port		port;
 
-#define USF_CLOSING_WAIT_INF	(0)
-#define USF_CLOSING_WAIT_NONE	(~0U)
-
 	int			pm_state;
 	struct circ_buf		xmit;
 
-- 
cgit v1.2.3


From fe1ae7fdd2ee603f2d95f04e09a68f7f79045127 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:33 -0700
Subject: tty: USB serial termios bits

Various drivers have hacks to mangle termios structures. This stems from
the fact there is no nice setup hook for configuring the termios settings
when the port is created

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_io.c            |  1 +
 drivers/usb/serial/ark3116.c     | 46 ++++++++++------------------------------
 drivers/usb/serial/cypress_m8.c  | 12 +++--------
 drivers/usb/serial/empeg.c       | 12 +++--------
 drivers/usb/serial/iuu_phoenix.c | 31 ++++++++++++---------------
 drivers/usb/serial/kobil_sct.c   | 22 +++++++++----------
 drivers/usb/serial/oti6858.c     | 21 +++++++++---------
 drivers/usb/serial/spcp8x5.c     | 21 +++++++++---------
 drivers/usb/serial/usb-serial.c  | 38 ++++++++++++++++++++++++++++++++-
 drivers/usb/serial/whiteheat.c   |  6 +++---
 include/linux/usb/serial.h       |  3 +++
 11 files changed, 106 insertions(+), 107 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 385cca7074da..05f443c47bbe 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1184,6 +1184,7 @@ int tty_init_termios(struct tty_struct *tty)
 	tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(tty_init_termios);
 
 /**
  *	tty_driver_install_tty() - install a tty entry in the driver
diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c
index 7ddde4ddfb4b..5d25d3e52bf6 100644
--- a/drivers/usb/serial/ark3116.c
+++ b/drivers/usb/serial/ark3116.c
@@ -35,11 +35,6 @@ static struct usb_device_id id_table [] = {
 };
 MODULE_DEVICE_TABLE(usb, id_table);
 
-struct ark3116_private {
-	spinlock_t lock;
-	u8 termios_initialized;
-};
-
 static inline void ARK3116_SND(struct usb_serial *serial, int seq,
 			       __u8 request, __u8 requesttype,
 			       __u16 value, __u16 index)
@@ -82,22 +77,11 @@ static inline void ARK3116_RCV_QUIET(struct usb_serial *serial,
 static int ark3116_attach(struct usb_serial *serial)
 {
 	char *buf;
-	struct ark3116_private *priv;
-	int i;
-
-	for (i = 0; i < serial->num_ports; ++i) {
-		priv = kzalloc(sizeof(struct ark3116_private), GFP_KERNEL);
-		if (!priv)
-			goto cleanup;
-		spin_lock_init(&priv->lock);
-
-		usb_set_serial_port_data(serial->port[i], priv);
-	}
 
 	buf = kmalloc(1, GFP_KERNEL);
 	if (!buf) {
 		dbg("error kmalloc -> out of mem?");
-		goto cleanup;
+		return -ENOMEM;
 	}
 
 	/* 3 */
@@ -149,13 +133,16 @@ static int ark3116_attach(struct usb_serial *serial)
 
 	kfree(buf);
 	return 0;
+}
 
-cleanup:
-	for (--i; i >= 0; --i) {
-		kfree(usb_get_serial_port_data(serial->port[i]));
-		usb_set_serial_port_data(serial->port[i], NULL);
-	}
-	return -ENOMEM;
+static void ark3116_init_termios(struct tty_struct *tty)
+{
+	struct ktermios *termios = tty->termios;
+	*termios = tty_std_termios;
+	termios->c_cflag = B9600 | CS8
+				      | CREAD | HUPCL | CLOCAL;
+	termios->c_ispeed = 9600;
+	termios->c_ospeed = 9600;
 }
 
 static void ark3116_set_termios(struct tty_struct *tty,
@@ -163,10 +150,8 @@ static void ark3116_set_termios(struct tty_struct *tty,
 				struct ktermios *old_termios)
 {
 	struct usb_serial *serial = port->serial;
-	struct ark3116_private *priv = usb_get_serial_port_data(port);
 	struct ktermios *termios = tty->termios;
 	unsigned int cflag = termios->c_cflag;
-	unsigned long flags;
 	int baud;
 	int ark3116_baud;
 	char *buf;
@@ -176,16 +161,6 @@ static void ark3116_set_termios(struct tty_struct *tty,
 
 	dbg("%s - port %d", __func__, port->number);
 
-	spin_lock_irqsave(&priv->lock, flags);
-	if (!priv->termios_initialized) {
-		*termios = tty_std_termios;
-		termios->c_cflag = B9600 | CS8
-					      | CREAD | HUPCL | CLOCAL;
-		termios->c_ispeed = 9600;
-		termios->c_ospeed = 9600;
-		priv->termios_initialized = 1;
-	}
-	spin_unlock_irqrestore(&priv->lock, flags);
 
 	cflag = termios->c_cflag;
 	termios->c_cflag &= ~(CMSPAR|CRTSCTS);
@@ -454,6 +429,7 @@ static struct usb_serial_driver ark3116_device = {
 	.num_ports =		1,
 	.attach =		ark3116_attach,
 	.set_termios =		ark3116_set_termios,
+	.init_termios =		ark3116_init_termios,
 	.ioctl =		ark3116_ioctl,
 	.tiocmget =		ark3116_tiocmget,
 	.open =			ark3116_open,
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index df1adb9a4367..e0a8b715f2f2 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -657,15 +657,7 @@ static int cypress_open(struct tty_struct *tty, struct usb_serial_port *port)
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	/* Set termios */
-	result = cypress_write(tty, port, NULL, 0);
-
-	if (result) {
-		dev_err(&port->dev,
-			"%s - failed setting the control lines - error %d\n",
-							__func__, result);
-		return result;
-	} else
-		dbg("%s - success setting the control lines", __func__);
+	cypress_send(port);
 
 	if (tty)
 		cypress_set_termios(tty, port, &priv->tmp_termios);
@@ -1003,6 +995,8 @@ static void cypress_set_termios(struct tty_struct *tty,
 	dbg("%s - port %d", __func__, port->number);
 
 	spin_lock_irqsave(&priv->lock, flags);
+	/* We can't clean this one up as we don't know the device type
+	   early enough */
 	if (!priv->termios_initialized) {
 		if (priv->chiptype == CT_EARTHMATE) {
 			*(tty->termios) = tty_std_termios;
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index da559a773b51..33c9e9cf9eb2 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -89,8 +89,7 @@ static int  empeg_chars_in_buffer(struct tty_struct *tty);
 static void empeg_throttle(struct tty_struct *tty);
 static void empeg_unthrottle(struct tty_struct *tty);
 static int  empeg_startup(struct usb_serial *serial);
-static void empeg_set_termios(struct tty_struct *tty,
-		struct usb_serial_port *port, struct ktermios *old_termios);
+static void empeg_init_termios(struct tty_struct *tty);
 static void empeg_write_bulk_callback(struct urb *urb);
 static void empeg_read_bulk_callback(struct urb *urb);
 
@@ -122,7 +121,7 @@ static struct usb_serial_driver empeg_device = {
 	.throttle =		empeg_throttle,
 	.unthrottle =		empeg_unthrottle,
 	.attach =		empeg_startup,
-	.set_termios =		empeg_set_termios,
+	.init_termios =		empeg_init_termios,
 	.write =		empeg_write,
 	.write_room =		empeg_write_room,
 	.chars_in_buffer =	empeg_chars_in_buffer,
@@ -148,9 +147,6 @@ static int empeg_open(struct tty_struct *tty,struct usb_serial_port *port)
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* Force default termio settings */
-	empeg_set_termios(tty, port, NULL);
-
 	bytes_in = 0;
 	bytes_out = 0;
 
@@ -423,11 +419,9 @@ static int  empeg_startup(struct usb_serial *serial)
 }
 
 
-static void empeg_set_termios(struct tty_struct *tty,
-		struct usb_serial_port *port, struct ktermios *old_termios)
+static void empeg_init_termios(struct tty_struct *tty)
 {
 	struct ktermios *termios = tty->termios;
-	dbg("%s - port %d", __func__, port->number);
 
 	/*
 	 * The empeg-car player wants these particular tty settings.
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index f3f9be0469c5..6138c1cda35f 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -71,7 +71,6 @@ struct iuu_private {
 	spinlock_t lock;	/* store irq state */
 	wait_queue_head_t delta_msr_wait;
 	u8 line_status;
-	u8 termios_initialized;
 	int tiostatus;		/* store IUART SIGNAL for tiocmget call */
 	u8 reset;		/* if 1 reset is needed */
 	int poll;		/* number of poll */
@@ -1018,13 +1017,24 @@ static void iuu_close(struct usb_serial_port *port)
 	}
 }
 
+static void iuu_init_termios(struct tty_struct *tty)
+{
+	*(tty->termios) = tty_std_termios;
+	tty->termios->c_cflag = CLOCAL | CREAD | CS8 | B9600
+				| TIOCM_CTS | CSTOPB | PARENB;
+	tty->termios->c_ispeed = 9600;
+	tty->termios->c_ospeed = 9600;
+	tty->termios->c_lflag = 0;
+	tty->termios->c_oflag = 0;
+	tty->termios->c_iflag = 0;
+}
+
 static int iuu_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	u8 *buf;
 	int result;
 	u32 actual;
-	unsigned long flags;
 	struct iuu_private *priv = usb_get_serial_port_data(port);
 
 	dbg("%s -  port %d", __func__, port->number);
@@ -1063,21 +1073,7 @@ static int iuu_open(struct tty_struct *tty, struct usb_serial_port *port)
 			  port->bulk_in_buffer, 512,
 			  NULL, NULL);
 
-	/* set the termios structure */
-	spin_lock_irqsave(&priv->lock, flags);
-	if (tty && !priv->termios_initialized) {
-		*(tty->termios) = tty_std_termios;
-		tty->termios->c_cflag = CLOCAL | CREAD | CS8 | B9600
-					| TIOCM_CTS | CSTOPB | PARENB;
-		tty->termios->c_ispeed = 9600;
-		tty->termios->c_ospeed = 9600;
-		tty->termios->c_lflag = 0;
-		tty->termios->c_oflag = 0;
-		tty->termios->c_iflag = 0;
-		priv->termios_initialized = 1;
-		priv->poll = 0;
-	}
-	spin_unlock_irqrestore(&priv->lock, flags);
+	priv->poll = 0;
 
 	/* initialize writebuf */
 #define FISH(a, b, c, d) do { \
@@ -1200,6 +1196,7 @@ static struct usb_serial_driver iuu_device = {
 	.tiocmget = iuu_tiocmget,
 	.tiocmset = iuu_tiocmset,
 	.set_termios = iuu_set_termios,
+	.init_termios = iuu_init_termios,
 	.attach = iuu_startup,
 	.release = iuu_release,
 };
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index 97901578343a..45ea694b3ae6 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -84,7 +84,7 @@ static void kobil_read_int_callback(struct urb *urb);
 static void kobil_write_callback(struct urb *purb);
 static void kobil_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
-
+static void kobil_init_termios(struct tty_struct *tty);
 
 static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(KOBIL_VENDOR_ID, KOBIL_ADAPTER_B_PRODUCT_ID) },
@@ -119,6 +119,7 @@ static struct usb_serial_driver kobil_device = {
 	.release =		kobil_release,
 	.ioctl =		kobil_ioctl,
 	.set_termios =		kobil_set_termios,
+	.init_termios =		kobil_init_termios,
 	.tiocmget =		kobil_tiocmget,
 	.tiocmset =		kobil_tiocmset,
 	.open =			kobil_open,
@@ -209,6 +210,15 @@ static void kobil_release(struct usb_serial *serial)
 		kfree(usb_get_serial_port_data(serial->port[i]));
 }
 
+static void kobil_init_termios(struct tty_struct *tty)
+{
+	/* Default to echo off and other sane device settings */
+	tty->termios->c_lflag = 0;
+	tty->termios->c_lflag &= ~(ISIG | ICANON | ECHO | IEXTEN | XCASE);
+	tty->termios->c_iflag = IGNBRK | IGNPAR | IXOFF;
+	/* do NOT translate CR to CR-NL (0x0A -> 0x0A 0x0D) */
+	tty->termios->c_oflag &= ~ONLCR;
+}
 
 static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
@@ -224,16 +234,6 @@ static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port)
 	/* someone sets the dev to 0 if the close method has been called */
 	port->interrupt_in_urb->dev = port->serial->dev;
 
-	if (tty) {
-
-		/* Default to echo off and other sane device settings */
-		tty->termios->c_lflag = 0;
-		tty->termios->c_lflag &= ~(ISIG | ICANON | ECHO | IEXTEN |
-								 XCASE);
-		tty->termios->c_iflag = IGNBRK | IGNPAR | IXOFF;
-		/* do NOT translate CR to CR-NL (0x0A -> 0x0A 0x0D) */
-		tty->termios->c_oflag &= ~ONLCR;
-	}
 	/* allocate memory for transfer buffer */
 	transfer_buffer = kzalloc(transfer_buffer_length, GFP_KERNEL);
 	if (!transfer_buffer)
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index e90f88a3b040..0f4a70ce3823 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -145,6 +145,7 @@ static int oti6858_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void oti6858_close(struct usb_serial_port *port);
 static void oti6858_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
+static void oti6858_init_termios(struct tty_struct *tty);
 static int oti6858_ioctl(struct tty_struct *tty, struct file *file,
 			unsigned int cmd, unsigned long arg);
 static void oti6858_read_int_callback(struct urb *urb);
@@ -185,6 +186,7 @@ static struct usb_serial_driver oti6858_device = {
 	.write =		oti6858_write,
 	.ioctl =		oti6858_ioctl,
 	.set_termios =		oti6858_set_termios,
+	.init_termios = 	oti6858_init_termios,
 	.tiocmget =		oti6858_tiocmget,
 	.tiocmset =		oti6858_tiocmset,
 	.read_bulk_callback =	oti6858_read_bulk_callback,
@@ -205,7 +207,6 @@ struct oti6858_private {
 	struct {
 		u8 read_urb_in_use;
 		u8 write_urb_in_use;
-		u8 termios_initialized;
 	} flags;
 	struct delayed_work delayed_write_work;
 
@@ -446,6 +447,14 @@ static int oti6858_chars_in_buffer(struct tty_struct *tty)
 	return chars;
 }
 
+static void oti6858_init_termios(struct tty_struct *tty)
+{
+	*(tty->termios) = tty_std_termios;
+	tty->termios->c_cflag = B38400 | CS8 | CREAD | HUPCL | CLOCAL;
+	tty->termios->c_ispeed = 38400;
+	tty->termios->c_ospeed = 38400;
+}
+
 static void oti6858_set_termios(struct tty_struct *tty,
 		struct usb_serial_port *port, struct ktermios *old_termios)
 {
@@ -463,16 +472,6 @@ static void oti6858_set_termios(struct tty_struct *tty,
 		return;
 	}
 
-	spin_lock_irqsave(&priv->lock, flags);
-	if (!priv->flags.termios_initialized) {
-		*(tty->termios) = tty_std_termios;
-		tty->termios->c_cflag = B38400 | CS8 | CREAD | HUPCL | CLOCAL;
-		tty->termios->c_ispeed = 38400;
-		tty->termios->c_ospeed = 38400;
-		priv->flags.termios_initialized = 1;
-	}
-	spin_unlock_irqrestore(&priv->lock, flags);
-
 	cflag = tty->termios->c_cflag;
 
 	spin_lock_irqsave(&priv->lock, flags);
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 8b312a05a353..61e7c40b94fb 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -299,7 +299,6 @@ struct spcp8x5_private {
 	wait_queue_head_t	delta_msr_wait;
 	u8 			line_control;
 	u8 			line_status;
-	u8 			termios_initialized;
 };
 
 /* desc : when device plug in,this function would be called.
@@ -498,6 +497,15 @@ static void spcp8x5_close(struct usb_serial_port *port)
 		dev_dbg(&port->dev, "usb_unlink_urb(read_urb) = %d\n", result);
 }
 
+static void spcp8x5_init_termios(struct tty_struct *tty)
+{
+	/* for the 1st time call this function */
+	*(tty->termios) = tty_std_termios;
+	tty->termios->c_cflag = B115200 | CS8 | CREAD | HUPCL | CLOCAL;
+	tty->termios->c_ispeed = 115200;
+	tty->termios->c_ospeed = 115200;
+}
+
 /* set the serial param for transfer. we should check if we really need to
  * transfer. if we set flow control we should do this too. */
 static void spcp8x5_set_termios(struct tty_struct *tty,
@@ -514,16 +522,6 @@ static void spcp8x5_set_termios(struct tty_struct *tty,
 	int i;
 	u8 control;
 
-	/* for the 1st time call this function */
-	spin_lock_irqsave(&priv->lock, flags);
-	if (!priv->termios_initialized) {
-		*(tty->termios) = tty_std_termios;
-		tty->termios->c_cflag = B115200 | CS8 | CREAD | HUPCL | CLOCAL;
-		tty->termios->c_ispeed = 115200;
-		tty->termios->c_ospeed = 115200;
-		priv->termios_initialized = 1;
-	}
-	spin_unlock_irqrestore(&priv->lock, flags);
 
 	/* check that they really want us to change something */
 	if (!tty_termios_hw_change(tty->termios, old_termios))
@@ -1008,6 +1006,7 @@ static struct usb_serial_driver spcp8x5_device = {
 	.carrier_raised		= spcp8x5_carrier_raised,
 	.write 			= spcp8x5_write,
 	.set_termios 		= spcp8x5_set_termios,
+	.init_termios		= spcp8x5_init_termios,
 	.ioctl 			= spcp8x5_ioctl,
 	.tiocmget 		= spcp8x5_tiocmget,
 	.tiocmset 		= spcp8x5_tiocmset,
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 3dda6841e724..80c1f4d8e910 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -721,6 +721,41 @@ static const struct tty_port_operations serial_port_ops = {
 	.dtr_rts = serial_dtr_rts,
 };
 
+/**
+ *	serial_install		-	install tty
+ *	@driver: the driver (USB in our case)
+ *	@tty: the tty being created
+ *
+ *	Create the termios objects for this tty. We use the default USB
+ *	serial ones but permit them to be overriddenby serial->type->termios.
+ *	This lets us remove all the ugly hackery
+ */
+
+static int serial_install(struct tty_driver *driver, struct tty_struct *tty)
+{
+	int idx = tty->index;
+	struct usb_serial *serial;
+	int retval;
+
+	/* If the termios setup has yet to be done */
+	if (tty->driver->termios[idx] == NULL) {
+		/* perform the standard setup */
+		retval = tty_init_termios(tty);
+		if (retval)
+			return retval;
+		/* allow the driver to update it */
+		serial = usb_serial_get_by_index(tty->index);
+		if (serial->type->init_termios)
+			serial->type->init_termios(tty);
+		usb_serial_put(serial);
+	}
+	/* Final install (we use the default method) */
+	tty_driver_kref_get(driver);
+	tty->count++;
+	driver->ttys[idx] = tty;
+	return 0;
+}
+
 int usb_serial_probe(struct usb_interface *interface,
 			       const struct usb_device_id *id)
 {
@@ -1228,7 +1263,8 @@ static const struct tty_operations serial_ops = {
 	.chars_in_buffer =	serial_chars_in_buffer,
 	.tiocmget =		serial_tiocmget,
 	.tiocmset =		serial_tiocmset,
-	.shutdown =		serial_do_free,
+	.shutdown = 		serial_do_free,
+	.install = 		serial_install,
 	.proc_fops =		&serial_proc_fops,
 };
 
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 81f2ae505966..62424eec33ec 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -259,7 +259,7 @@ static int firm_send_command(struct usb_serial_port *port, __u8 command,
 						__u8 *data, __u8 datasize);
 static int firm_open(struct usb_serial_port *port);
 static int firm_close(struct usb_serial_port *port);
-static int firm_setup_port(struct tty_struct *tty);
+static void firm_setup_port(struct tty_struct *tty);
 static int firm_set_rts(struct usb_serial_port *port, __u8 onoff);
 static int firm_set_dtr(struct usb_serial_port *port, __u8 onoff);
 static int firm_set_break(struct usb_serial_port *port, __u8 onoff);
@@ -1210,7 +1210,7 @@ static int firm_close(struct usb_serial_port *port)
 }
 
 
-static int firm_setup_port(struct tty_struct *tty)
+static void firm_setup_port(struct tty_struct *tty)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct whiteheat_port_settings port_settings;
@@ -1285,7 +1285,7 @@ static int firm_setup_port(struct tty_struct *tty)
 	port_settings.lloop = 0;
 
 	/* now send the message to the device */
-	return firm_send_command(port, WHITEHEAT_SETUP_PORT,
+	firm_send_command(port, WHITEHEAT_SETUP_PORT,
 			(__u8 *)&port_settings, sizeof(port_settings));
 }
 
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 1cbaf4a6b449..7b85e327af91 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -260,6 +260,9 @@ struct usb_serial_driver {
 	   be an attached tty at this point */
 	void (*dtr_rts)(struct usb_serial_port *port, int on);
 	int  (*carrier_raised)(struct usb_serial_port *port);
+	/* Called by the usb serial hooks to allow the user to rework the
+	   termios state */
+	void (*init_termios)(struct tty_struct *tty);
 	/* USB events */
 	void (*read_int_callback)(struct urb *urb);
 	void (*write_int_callback)(struct urb *urb);
-- 
cgit v1.2.3


From e92166517e3ca9bfb416f91e69cf0373b55b6ede Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 6 Aug 2009 15:09:28 +0200
Subject: tty: handle VT specific compat ioctls in vt driver

The VT specific compat_ioctl handlers are the only ones
in common code that require the BKL. Moving them into
the vt driver lets us remove the BKL from the other handlers
and cleans up the code.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/vt.c       |   3 +
 drivers/char/vt_ioctl.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/tty.h     |   3 +
 3 files changed, 209 insertions(+)

(limited to 'include')

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 33214d92ca4c..5dfbfa7d7606 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -2910,6 +2910,9 @@ static const struct tty_operations con_ops = {
 	.flush_chars = con_flush_chars,
 	.chars_in_buffer = con_chars_in_buffer,
 	.ioctl = vt_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = vt_compat_ioctl,
+#endif
 	.stop = con_stop,
 	.start = con_start,
 	.throttle = con_throttle,
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 0fceb8f4d6f2..30b5d128037c 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -16,6 +16,7 @@
 #include <linux/tty.h>
 #include <linux/timer.h>
 #include <linux/kernel.h>
+#include <linux/compat.h>
 #include <linux/module.h>
 #include <linux/kd.h>
 #include <linux/vt.h>
@@ -1376,6 +1377,208 @@ void vc_SAK(struct work_struct *work)
 	release_console_sem();
 }
 
+#ifdef CONFIG_COMPAT
+
+struct compat_consolefontdesc {
+	unsigned short charcount;       /* characters in font (256 or 512) */
+	unsigned short charheight;      /* scan lines per character (1-32) */
+	compat_caddr_t chardata;	/* font data in expanded form */
+};
+
+static inline int
+compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd,
+			 int perm, struct console_font_op *op)
+{
+	struct compat_consolefontdesc cfdarg;
+	int i;
+
+	if (copy_from_user(&cfdarg, user_cfd, sizeof(struct compat_consolefontdesc)))
+		return -EFAULT;
+
+	switch (cmd) {
+	case PIO_FONTX:
+		if (!perm)
+			return -EPERM;
+		op->op = KD_FONT_OP_SET;
+		op->flags = KD_FONT_FLAG_OLD;
+		op->width = 8;
+		op->height = cfdarg.charheight;
+		op->charcount = cfdarg.charcount;
+		op->data = compat_ptr(cfdarg.chardata);
+		return con_font_op(vc_cons[fg_console].d, op);
+	case GIO_FONTX:
+		op->op = KD_FONT_OP_GET;
+		op->flags = KD_FONT_FLAG_OLD;
+		op->width = 8;
+		op->height = cfdarg.charheight;
+		op->charcount = cfdarg.charcount;
+		op->data = compat_ptr(cfdarg.chardata);
+		i = con_font_op(vc_cons[fg_console].d, op);
+		if (i)
+			return i;
+		cfdarg.charheight = op->height;
+		cfdarg.charcount = op->charcount;
+		if (copy_to_user(user_cfd, &cfdarg, sizeof(struct compat_consolefontdesc)))
+			return -EFAULT;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+struct compat_console_font_op {
+	compat_uint_t op;        /* operation code KD_FONT_OP_* */
+	compat_uint_t flags;     /* KD_FONT_FLAG_* */
+	compat_uint_t width, height;     /* font size */
+	compat_uint_t charcount;
+	compat_caddr_t data;    /* font data with height fixed to 32 */
+};
+
+static inline int
+compat_kdfontop_ioctl(struct compat_console_font_op __user *fontop,
+			 int perm, struct console_font_op *op, struct vc_data *vc)
+{
+	int i;
+
+	if (copy_from_user(op, fontop, sizeof(struct compat_console_font_op)))
+		return -EFAULT;
+	if (!perm && op->op != KD_FONT_OP_GET)
+		return -EPERM;
+	op->data = compat_ptr(((struct compat_console_font_op *)op)->data);
+	op->flags |= KD_FONT_FLAG_OLD;
+	i = con_font_op(vc, op);
+	if (i)
+		return i;
+	((struct compat_console_font_op *)op)->data = (unsigned long)op->data;
+	if (copy_to_user(fontop, op, sizeof(struct compat_console_font_op)))
+		return -EFAULT;
+	return 0;
+}
+
+struct compat_unimapdesc {
+	unsigned short entry_ct;
+	compat_caddr_t entries;
+};
+
+static inline int
+compat_unimap_ioctl(unsigned int cmd, struct compat_unimapdesc __user *user_ud,
+			 int perm, struct vc_data *vc)
+{
+	struct compat_unimapdesc tmp;
+	struct unipair __user *tmp_entries;
+
+	if (copy_from_user(&tmp, user_ud, sizeof tmp))
+		return -EFAULT;
+	tmp_entries = compat_ptr(tmp.entries);
+	if (tmp_entries)
+		if (!access_ok(VERIFY_WRITE, tmp_entries,
+				tmp.entry_ct*sizeof(struct unipair)))
+			return -EFAULT;
+	switch (cmd) {
+	case PIO_UNIMAP:
+		if (!perm)
+			return -EPERM;
+		return con_set_unimap(vc, tmp.entry_ct, tmp_entries);
+	case GIO_UNIMAP:
+		if (!perm && fg_console != vc->vc_num)
+			return -EPERM;
+		return con_get_unimap(vc, tmp.entry_ct, &(user_ud->entry_ct), tmp_entries);
+	}
+	return 0;
+}
+
+long vt_compat_ioctl(struct tty_struct *tty, struct file * file,
+	     unsigned int cmd, unsigned long arg)
+{
+	struct vc_data *vc = tty->driver_data;
+	struct console_font_op op;	/* used in multiple places here */
+	struct kbd_struct *kbd;
+	unsigned int console;
+	void __user *up = (void __user *)arg;
+	int perm;
+	int ret = 0;
+
+	console = vc->vc_num;
+
+	lock_kernel();
+
+	if (!vc_cons_allocated(console)) { 	/* impossible? */
+		ret = -ENOIOCTLCMD;
+		goto out;
+	}
+
+	/*
+	 * To have permissions to do most of the vt ioctls, we either have
+	 * to be the owner of the tty, or have CAP_SYS_TTY_CONFIG.
+	 */
+	perm = 0;
+	if (current->signal->tty == tty || capable(CAP_SYS_TTY_CONFIG))
+		perm = 1;
+
+	kbd = kbd_table + console;
+	switch (cmd) {
+	/*
+	 * these need special handlers for incompatible data structures
+	 */
+	case PIO_FONTX:
+	case GIO_FONTX:
+		ret = compat_fontx_ioctl(cmd, up, perm, &op);
+		break;
+
+	case KDFONTOP:
+		ret = compat_kdfontop_ioctl(up, perm, &op, vc);
+		break;
+
+	case PIO_UNIMAP:
+	case GIO_UNIMAP:
+		ret = do_unimap_ioctl(cmd, up, perm, vc);
+		break;
+
+	/*
+	 * all these treat 'arg' as an integer
+	 */
+	case KIOCSOUND:
+	case KDMKTONE:
+#ifdef CONFIG_X86
+	case KDADDIO:
+	case KDDELIO:
+#endif
+	case KDSETMODE:
+	case KDMAPDISP:
+	case KDUNMAPDISP:
+	case KDSKBMODE:
+	case KDSKBMETA:
+	case KDSKBLED:
+	case KDSETLED:
+	case KDSIGACCEPT:
+	case VT_ACTIVATE:
+	case VT_WAITACTIVE:
+	case VT_RELDISP:
+	case VT_DISALLOCATE:
+	case VT_RESIZE:
+	case VT_RESIZEX:
+		goto fallback;
+
+	/*
+	 * the rest has a compatible data structure behind arg,
+	 * but we have to convert it to a proper 64 bit pointer.
+	 */
+	default:
+		arg = (unsigned long)compat_ptr(arg);
+		goto fallback;
+	}
+out:
+	unlock_kernel();
+	return ret;
+
+fallback:
+	unlock_kernel();
+	return vt_ioctl(tty, file, cmd, arg);
+}
+
+
+#endif /* CONFIG_COMPAT */
+
+
 /*
  * Performs the back end of a vt switch. Called under the console
  * semaphore.
diff --git a/include/linux/tty.h b/include/linux/tty.h
index a0e03309a379..f0f43d08d8b8 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -536,5 +536,8 @@ extern int pcxe_open(struct tty_struct *tty, struct file *filp);
 extern int vt_ioctl(struct tty_struct *tty, struct file *file,
 		    unsigned int cmd, unsigned long arg);
 
+extern long vt_compat_ioctl(struct tty_struct *tty, struct file * file,
+		     unsigned int cmd, unsigned long arg);
+
 #endif /* __KERNEL__ */
 #endif
-- 
cgit v1.2.3


From be2f092bfc4f6a415bb4c3e2dcbf521a1f2a0fe5 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Sat, 5 Sep 2009 01:20:43 +0900
Subject: mtd: nand: add __nand_correct_data helper function

Split nand_correct_data() into two part, a pure calculation function
and a wrapper for mtd interface.

The tmio_nand driver can implement its ecc.correct function easily
using this __nand_correct_data helper.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Acked-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Acked-by: Vimal Singh <vimalsingh@ti.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_ecc.c  | 31 ++++++++++++++++++++++++-------
 include/linux/mtd/nand_ecc.h |  6 ++++++
 2 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index c0cb87d6d16e..db7ae9d6a296 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -417,22 +417,22 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 EXPORT_SYMBOL(nand_calculate_ecc);
 
 /**
- * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
- * @mtd:	MTD block structure
+ * __nand_correct_data - [NAND Interface] Detect and correct bit error(s)
  * @buf:	raw data read from the chip
  * @read_ecc:	ECC from the chip
  * @calc_ecc:	the ECC calculated from raw data
+ * @eccsize:	data bytes per ecc step (256 or 512)
  *
- * Detect and correct a 1 bit error for 256/512 byte block
+ * Detect and correct a 1 bit error for eccsize byte block
  */
-int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
-		      unsigned char *read_ecc, unsigned char *calc_ecc)
+int __nand_correct_data(unsigned char *buf,
+			unsigned char *read_ecc, unsigned char *calc_ecc,
+			unsigned int eccsize)
 {
 	unsigned char b0, b1, b2, bit_addr;
 	unsigned int byte_addr;
 	/* 256 or 512 bytes/ecc  */
-	const uint32_t eccsize_mult =
-			(((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
+	const uint32_t eccsize_mult = eccsize >> 8;
 
 	/*
 	 * b0 to b2 indicate which bit is faulty (if any)
@@ -495,6 +495,23 @@ int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
 	printk(KERN_ERR "uncorrectable error : ");
 	return -1;
 }
+EXPORT_SYMBOL(__nand_correct_data);
+
+/**
+ * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
+ * @mtd:	MTD block structure
+ * @buf:	raw data read from the chip
+ * @read_ecc:	ECC from the chip
+ * @calc_ecc:	the ECC calculated from raw data
+ *
+ * Detect and correct a 1 bit error for 256/512 byte block
+ */
+int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
+		      unsigned char *read_ecc, unsigned char *calc_ecc)
+{
+	return __nand_correct_data(buf, read_ecc, calc_ecc,
+				   ((struct nand_chip *)mtd->priv)->ecc.size);
+}
 EXPORT_SYMBOL(nand_correct_data);
 
 MODULE_LICENSE("GPL");
diff --git a/include/linux/mtd/nand_ecc.h b/include/linux/mtd/nand_ecc.h
index 090da505425d..052ea8ca2434 100644
--- a/include/linux/mtd/nand_ecc.h
+++ b/include/linux/mtd/nand_ecc.h
@@ -20,6 +20,12 @@ struct mtd_info;
  */
 int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code);
 
+/*
+ * Detect and correct a 1 bit error for eccsize byte block
+ */
+int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc,
+			unsigned int eccsize);
+
 /*
  * Detect and correct a 1 bit error for 256 byte block
  */
-- 
cgit v1.2.3


From b21495a03e20514eacd788a6b5d160667177cd94 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 20 Sep 2009 15:48:42 +0530
Subject: includecheck fix: include/acpi, acpi_bus.h

fix the following 'make includecheck' warning:

  include/acpi/acpi_bus.h: linux/device.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
LKML-Reference: <1247068058.4382.96.camel@ht.satnam>
Acked-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpi_bus.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 1fa3ffb7c93b..1b3b36068ca5 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -356,7 +356,6 @@ void acpi_remove_dir(struct acpi_device *);
 /*
  * Bind physical devices with ACPI devices
  */
-#include <linux/device.h>
 struct acpi_bus_type {
 	struct list_head list;
 	struct bus_type *bus;
-- 
cgit v1.2.3


From 067006a5f5b956fbdd183f9b799e7b8059b53f6c Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 20 Sep 2009 15:53:25 +0530
Subject: includecheck fix: include/drm, drm_memory.h

fix the following 'make includecheck' warning:

  include/drm/drm_memory.h: linux/vmalloc.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
LKML-Reference: <1247068169.4382.99.camel@ht.satnam>
Acked-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_memory.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_memory.h b/include/drm/drm_memory.h
index 63e425b5ea82..15af9b32ae42 100644
--- a/include/drm/drm_memory.h
+++ b/include/drm/drm_memory.h
@@ -44,8 +44,6 @@
 
 #if __OS_HAS_AGP
 
-#include <linux/vmalloc.h>
-
 #ifdef HAVE_PAGE_AGP
 #include <asm/agp.h>
 #else
-- 
cgit v1.2.3


From 43cc960980760f58d1f49ecd39e8cbfb063d63e1 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 20 Sep 2009 16:09:20 +0530
Subject: includecheck fix: include/linux, aio.h

fix the following 'make includecheck' warning:

  include/linux/aio.h: linux/aio_abi.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: bcrl@kvack.org
Cc: Sam Ravnborg <sam@ravnborg.org>
LKML-Reference: <1247068254.4382.101.camel@ht.satnam>
---
 include/linux/aio.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/aio.h b/include/linux/aio.h
index 47f7d932a01d..aea219d7d8d1 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -225,8 +225,6 @@ static inline void exit_aio(struct mm_struct *mm) { }
 
 #define io_wait_to_kiocb(wait) container_of(wait, struct kiocb, ki_wait)
 
-#include <linux/aio_abi.h>
-
 static inline struct kiocb *list_kiocb(struct list_head *h)
 {
 	return list_entry(h, struct kiocb, ki_list);
-- 
cgit v1.2.3


From 97572751d78133cf9a5f7165b252bf975f9dd17d Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 20 Sep 2009 16:20:44 +0530
Subject: includecheck fix: include/linux, page_cgroup.h

fix the following 'make includecheck' warning:

  include/linux/page_cgroup.h: linux/swap.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Paul Menage <menage@google.com>
---
 include/linux/page_cgroup.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 13f126c89ae8..ada779f24178 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -105,14 +105,14 @@ static inline void __init page_cgroup_init_flatmem(void)
 
 #endif
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 #include <linux/swap.h>
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
 extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
 extern int swap_cgroup_swapon(int type, unsigned long max_pages);
 extern void swap_cgroup_swapoff(int type);
 #else
-#include <linux/swap.h>
 
 static inline
 unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
-- 
cgit v1.2.3


From 83ba7c34d2b82dc608647f629616df393ab883f9 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 20 Sep 2009 16:24:00 +0530
Subject: includecheck fix: include/linux, ftrace.h

fix the following 'make includecheck' warning:

  include/linux/ftrace.h: linux/sched.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Sam Ravnborg <sam@ravnborg.org>
LKML-Reference: <1247068321.4382.102.camel@ht.satnam>
---
 include/linux/ftrace.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index dc3b1328aaeb..3c0924a18daf 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -446,7 +446,6 @@ static inline void unpause_graph_tracing(void) { }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 #ifdef CONFIG_TRACING
-#include <linux/sched.h>
 
 /* flags for current->trace */
 enum {
-- 
cgit v1.2.3


From 89f19f04dc72363d912fd007a399cb10310eff6e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sat, 19 Sep 2009 11:55:44 -0700
Subject: sched: Fix raciness in runqueue_is_locked()

runqueue_is_locked() is unavoidably racy due to a poor interface design.
It does

	cpu = get_cpu()
	ret = some_perpcu_thing(cpu);
	put_cpu(cpu);
	return ret;

Its return value is unreliable.

Fix.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <200909191855.n8JItiko022148@imap1.linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 +-
 kernel/sched.c        | 10 ++--------
 kernel/trace/trace.c  |  8 +++++++-
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8af3d249170e..cc37a3fa5065 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -257,7 +257,7 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
 extern void init_idle(struct task_struct *idle, int cpu);
 extern void init_idle_bootup_task(struct task_struct *idle);
 
-extern int runqueue_is_locked(void);
+extern int runqueue_is_locked(int cpu);
 extern void task_rq_unlock_wait(struct task_struct *p);
 
 extern cpumask_var_t nohz_cpu_mask;
diff --git a/kernel/sched.c b/kernel/sched.c
index faf4d463bbff..575fb0139038 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -681,15 +681,9 @@ inline void update_rq_clock(struct rq *rq)
  * This interface allows printk to be called with the runqueue lock
  * held and know whether or not it is OK to wake up the klogd.
  */
-int runqueue_is_locked(void)
+int runqueue_is_locked(int cpu)
 {
-	int cpu = get_cpu();
-	struct rq *rq = cpu_rq(cpu);
-	int ret;
-
-	ret = spin_is_locked(&rq->lock);
-	put_cpu();
-	return ret;
+	return spin_is_locked(&cpu_rq(cpu)->lock);
 }
 
 /*
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index fd52a19dd172..420232a1fbba 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -275,12 +275,18 @@ static DEFINE_SPINLOCK(tracing_start_lock);
  */
 void trace_wake_up(void)
 {
+	int cpu;
+
+	if (trace_flags & TRACE_ITER_BLOCK)
+		return;
 	/*
 	 * The runqueue_is_locked() can fail, but this is the best we
 	 * have for now:
 	 */
-	if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
+	cpu = get_cpu();
+	if (!runqueue_is_locked(cpu))
 		wake_up(&trace_wait);
+	put_cpu();
 }
 
 static int __init set_buf_size(char *str)
-- 
cgit v1.2.3


From 42f29a25207dc7b3051d299cc028d4b395d1328d Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Sun, 20 Sep 2009 18:14:12 -0400
Subject: kbuild: Don't define ALIGN and ENTRY when preprocessing linker
 scripts.

Adding a reference to <linux/linkage.h> to x86's <asm/cache.h> causes
the x86 linker script to have syntax errors, because the ALIGN and
ENTRY keywords get redefined to the assembly implementations of those.
One could fix this by adjusting the include structure, but I think any
solution based on that approach would be fragile.

Currently, it is impossible when writing a header to do something
different for assembly files and linker scripts, even though there are
clearly cases where one wants them to define macros differently for
the two (ENTRY being an excellent example).
So I think the right solution here is to introduce a new preprocessor
definition, called LINKER_SCRIPT that is set along with __ASSEMBLY__
for linker scripts, and to use that to not define ALIGN and ENTRY in
linker scripts.
I suspect we'll find other uses for this mechanism in
the future.

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/linux/linkage.h | 2 ++
 scripts/Makefile.build  | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 691f59171c6c..5126cceb6ae9 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -57,6 +57,7 @@
 
 #ifdef __ASSEMBLY__
 
+#ifndef LINKER_SCRIPT
 #define ALIGN __ALIGN
 #define ALIGN_STR __ALIGN_STR
 
@@ -66,6 +67,7 @@
   ALIGN; \
   name:
 #endif
+#endif /* LINKER_SCRIPT */
 
 #ifndef WEAK
 #define WEAK(name)	   \
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index d5425660a3df..341b58902ffc 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -271,7 +271,7 @@ targets += $(extra-y) $(MAKECMDGOALS) $(always)
 # ---------------------------------------------------------------------------
 quiet_cmd_cpp_lds_S = LDS     $@
       cmd_cpp_lds_S = $(CPP) $(cpp_flags) -P -C -U$(ARCH) \
-	                     -D__ASSEMBLY__ -o $@ $<
+	                     -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $<
 
 $(obj)/%.lds: $(src)/%.lds.S FORCE
 	$(call if_changed_dep,cpp_lds_S)
-- 
cgit v1.2.3


From 28d520433b6375740990ab99d69b0d0067fd656b Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 21 Sep 2009 14:33:58 +1000
Subject: drm/vgaarb: add VGA arbitration support to the drm and kms.

VGA arb requires DRM support for non-kms drivers, to turn on/off
irqs when disabling the mem/io regions.

VGA arb requires KMS support for GPUs where we can turn off VGA
decoding. Currently we know how to do this for intel and radeon
kms drivers, which allows them to be removed from the arbiter.

This patch comes from Fedora rawhide kernel.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_irq.c              | 27 +++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_dma.c        | 20 ++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h        |  1 +
 drivers/gpu/drm/i915/i915_reg.h        |  1 +
 drivers/gpu/drm/i915/intel_display.c   | 17 +++++++++++++++++
 drivers/gpu/drm/i915/intel_drv.h       |  1 +
 drivers/gpu/drm/radeon/r100.c          | 14 ++++++++++++++
 drivers/gpu/drm/radeon/r600.c          | 14 ++++++++++++++
 drivers/gpu/drm/radeon/r600d.h         |  1 +
 drivers/gpu/drm/radeon/radeon.h        |  2 ++
 drivers/gpu/drm/radeon/radeon_asic.h   | 12 ++++++++++++
 drivers/gpu/drm/radeon/radeon_device.c | 21 ++++++++++++++++++++-
 include/drm/drmP.h                     |  3 +++
 13 files changed, 133 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index f85aaf21e783..0a6f0b3bdc78 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -37,6 +37,7 @@
 
 #include <linux/interrupt.h>	/* For task queue support */
 
+#include <linux/vgaarb.h>
 /**
  * Get interrupt from bus id.
  *
@@ -171,6 +172,26 @@ err:
 }
 EXPORT_SYMBOL(drm_vblank_init);
 
+static void drm_irq_vgaarb_nokms(void *cookie, bool state)
+{
+	struct drm_device *dev = cookie;
+
+	if (dev->driver->vgaarb_irq) {
+		dev->driver->vgaarb_irq(dev, state);
+		return;
+	}
+
+	if (!dev->irq_enabled)
+		return;
+
+	if (state)
+		dev->driver->irq_uninstall(dev);
+	else {
+		dev->driver->irq_preinstall(dev);
+		dev->driver->irq_postinstall(dev);
+	}
+}
+
 /**
  * Install IRQ handler.
  *
@@ -231,6 +252,9 @@ int drm_irq_install(struct drm_device *dev)
 		return ret;
 	}
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		vga_client_register(dev->pdev, (void *)dev, drm_irq_vgaarb_nokms, NULL);
+
 	/* After installing handler */
 	ret = dev->driver->irq_postinstall(dev);
 	if (ret < 0) {
@@ -279,6 +303,9 @@ int drm_irq_uninstall(struct drm_device * dev)
 
 	DRM_DEBUG("irq=%d\n", dev->pdev->irq);
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		vga_client_register(dev->pdev, NULL, NULL, NULL);
+
 	dev->driver->irq_uninstall(dev);
 
 	free_irq(dev->pdev->irq, dev);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 9909505d070a..5a49a1867b35 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -33,6 +33,7 @@
 #include "intel_drv.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
+#include <linux/vgaarb.h>
 
 /* Really want an OS-independent resettable timer.  Would like to have
  * this loop run for (eg) 3 sec, but have the timer reset every time
@@ -1012,6 +1013,19 @@ static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size,
 	return 0;
 }
 
+/* true = enable decode, false = disable decoder */
+static unsigned int i915_vga_set_decode(void *cookie, bool state)
+{
+	struct drm_device *dev = cookie;
+
+	intel_modeset_vga_set_state(dev, state);
+	if (state)
+		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
+		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+	else
+		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+}
+
 static int i915_load_modeset_init(struct drm_device *dev,
 				  unsigned long prealloc_size,
 				  unsigned long agp_size)
@@ -1057,6 +1071,11 @@ static int i915_load_modeset_init(struct drm_device *dev,
 	if (ret)
 		DRM_INFO("failed to find VBIOS tables\n");
 
+	/* if we have > 1 VGA cards, then disable the radeon VGA resources */
+	ret = vga_client_register(dev->pdev, dev, NULL, i915_vga_set_decode);
+	if (ret)
+		goto destroy_ringbuffer;
+
 	ret = drm_irq_install(dev);
 	if (ret)
 		goto destroy_ringbuffer;
@@ -1324,6 +1343,7 @@ int i915_driver_unload(struct drm_device *dev)
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		drm_irq_uninstall(dev);
+		vga_client_register(dev->pdev, NULL, NULL, NULL);
 	}
 
 	if (dev->pdev->msi_enabled)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 77ed060b4292..a0632f8e76ac 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -766,6 +766,7 @@ static inline void opregion_enable_asle(struct drm_device *dev) { return; }
 /* modesetting */
 extern void intel_modeset_init(struct drm_device *dev);
 extern void intel_modeset_cleanup(struct drm_device *dev);
+extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state);
 
 /**
  * Lock test for when it's just for synchronization of ring access.
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e38cd21161c8..3f7963553464 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -30,6 +30,7 @@
  * fb aperture size and the amount of pre-reserved memory.
  */
 #define INTEL_GMCH_CTRL		0x52
+#define INTEL_GMCH_VGA_DISABLE  (1 << 1)
 #define INTEL_GMCH_ENABLED	0x4
 #define INTEL_GMCH_MEM_MASK	0x1
 #define INTEL_GMCH_MEM_64M	0x1
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 155719ff99d1..0227b1652906 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3917,3 +3917,20 @@ struct drm_encoder *intel_best_encoder(struct drm_connector *connector)
 
 	return &intel_output->enc;
 }
+
+/*
+ * set vga decode state - true == enable VGA decode
+ */
+int intel_modeset_vga_set_state(struct drm_device *dev, bool state)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u16 gmch_ctrl;
+
+	pci_read_config_word(dev_priv->bridge_dev, INTEL_GMCH_CTRL, &gmch_ctrl);
+	if (state)
+		gmch_ctrl &= ~INTEL_GMCH_VGA_DISABLE;
+	else
+		gmch_ctrl |= INTEL_GMCH_VGA_DISABLE;
+	pci_write_config_word(dev_priv->bridge_dev, INTEL_GMCH_CTRL, gmch_ctrl);
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index b9e47f1e1cc0..3a0004f755d0 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -178,4 +178,5 @@ extern int intel_framebuffer_create(struct drm_device *dev,
 				    struct drm_mode_fb_cmd *mode_cmd,
 				    struct drm_framebuffer **fb,
 				    struct drm_gem_object *obj);
+
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 737970b43aef..be51c5f7d0f6 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1955,6 +1955,20 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
 		rdev->mc.real_vram_size = rdev->mc.aper_size;
 }
 
+void r100_vga_set_state(struct radeon_device *rdev, bool state)
+{
+	uint32_t temp;
+
+	temp = RREG32(RADEON_CONFIG_CNTL);
+	if (state == false) {
+		temp &= ~(1<<8);
+		temp |= (1<<9);
+	} else {
+		temp &= ~(1<<9);
+	}
+	WREG32(RADEON_CONFIG_CNTL, temp);
+}
+
 void r100_vram_info(struct radeon_device *rdev)
 {
 	r100_vram_get_type(rdev);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 5f42fad19190..eab31c1d6df1 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1499,6 +1499,20 @@ int r600_startup(struct radeon_device *rdev)
 	return 0;
 }
 
+void r600_vga_set_state(struct radeon_device *rdev, bool state)
+{
+	uint32_t temp;
+
+	temp = RREG32(CONFIG_CNTL);
+	if (state == false) {
+		temp &= ~(1<<0);
+		temp |= (1<<1);
+	} else {
+		temp &= ~(1<<1);
+	}
+	WREG32(CONFIG_CNTL, temp);
+}
+
 int r600_resume(struct radeon_device *rdev)
 {
 	int r;
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 723295f59281..4a9028a85c9b 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -78,6 +78,7 @@
 #define CB_COLOR0_MASK                                  0x28100
 
 #define	CONFIG_MEMSIZE					0x5428
+#define CONFIG_CNTL					0x5424
 #define	CP_STAT						0x8680
 #define	CP_COHER_BASE					0x85F8
 #define	CP_DEBUG					0xC1FC
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 817af8ecff10..c839b608970f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -596,6 +596,7 @@ struct radeon_asic {
 	int (*suspend)(struct radeon_device *rdev);
 	void (*errata)(struct radeon_device *rdev);
 	void (*vram_info)(struct radeon_device *rdev);
+	void (*vga_set_state)(struct radeon_device *rdev, bool state);
 	int (*gpu_reset)(struct radeon_device *rdev);
 	int (*mc_init)(struct radeon_device *rdev);
 	void (*mc_fini)(struct radeon_device *rdev);
@@ -954,6 +955,7 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 #define radeon_cs_parse(p) rdev->asic->cs_parse((p))
 #define radeon_errata(rdev) (rdev)->asic->errata((rdev))
 #define radeon_vram_info(rdev) (rdev)->asic->vram_info((rdev))
+#define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state))
 #define radeon_gpu_reset(rdev) (rdev)->asic->gpu_reset((rdev))
 #define radeon_mc_init(rdev) (rdev)->asic->mc_init((rdev))
 #define radeon_mc_fini(rdev) (rdev)->asic->mc_fini((rdev))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 5f2a9e6f12c5..8968f78fa1e3 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -47,6 +47,7 @@ uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
 void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 void r100_errata(struct radeon_device *rdev);
 void r100_vram_info(struct radeon_device *rdev);
+void r100_vga_set_state(struct radeon_device *rdev, bool state);
 int r100_gpu_reset(struct radeon_device *rdev);
 int r100_mc_init(struct radeon_device *rdev);
 void r100_mc_fini(struct radeon_device *rdev);
@@ -89,6 +90,7 @@ static struct radeon_asic r100_asic = {
 	.init = &r100_init,
 	.errata = &r100_errata,
 	.vram_info = &r100_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &r100_gpu_reset,
 	.mc_init = &r100_mc_init,
 	.mc_fini = &r100_mc_fini,
@@ -158,6 +160,7 @@ static struct radeon_asic r300_asic = {
 	.init = &r300_init,
 	.errata = &r300_errata,
 	.vram_info = &r300_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &r300_gpu_reset,
 	.mc_init = &r300_mc_init,
 	.mc_fini = &r300_mc_fini,
@@ -208,6 +211,7 @@ static struct radeon_asic r420_asic = {
 	.resume = &r420_resume,
 	.errata = NULL,
 	.vram_info = NULL,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &r300_gpu_reset,
 	.mc_init = NULL,
 	.mc_fini = NULL,
@@ -262,6 +266,7 @@ static struct radeon_asic rs400_asic = {
 	.init = &r300_init,
 	.errata = &rs400_errata,
 	.vram_info = &rs400_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &r300_gpu_reset,
 	.mc_init = &rs400_mc_init,
 	.mc_fini = &rs400_mc_fini,
@@ -323,6 +328,7 @@ static struct radeon_asic rs600_asic = {
 	.init = &rs600_init,
 	.errata = &rs600_errata,
 	.vram_info = &rs600_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &r300_gpu_reset,
 	.mc_init = &rs600_mc_init,
 	.mc_fini = &rs600_mc_fini,
@@ -372,6 +378,7 @@ static struct radeon_asic rs690_asic = {
 	.init = &rs600_init,
 	.errata = &rs690_errata,
 	.vram_info = &rs690_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &r300_gpu_reset,
 	.mc_init = &rs690_mc_init,
 	.mc_fini = &rs690_mc_fini,
@@ -428,6 +435,7 @@ static struct radeon_asic rv515_asic = {
 	.init = &rv515_init,
 	.errata = &rv515_errata,
 	.vram_info = &rv515_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &rv515_gpu_reset,
 	.mc_init = &rv515_mc_init,
 	.mc_fini = &rv515_mc_fini,
@@ -477,6 +485,7 @@ static struct radeon_asic r520_asic = {
 	.init = &rv515_init,
 	.errata = &r520_errata,
 	.vram_info = &r520_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &rv515_gpu_reset,
 	.mc_init = &r520_mc_init,
 	.mc_fini = &r520_mc_fini,
@@ -520,6 +529,7 @@ int r600_init(struct radeon_device *rdev);
 void r600_fini(struct radeon_device *rdev);
 int r600_suspend(struct radeon_device *rdev);
 int r600_resume(struct radeon_device *rdev);
+void r600_vga_set_state(struct radeon_device *rdev, bool state);
 int r600_wb_init(struct radeon_device *rdev);
 void r600_wb_fini(struct radeon_device *rdev);
 void r600_cp_commit(struct radeon_device *rdev);
@@ -556,6 +566,7 @@ static struct radeon_asic r600_asic = {
 	.resume = &r600_resume,
 	.cp_commit = &r600_cp_commit,
 	.vram_info = NULL,
+	.vga_set_state = &r600_vga_set_state,
 	.gpu_reset = &r600_gpu_reset,
 	.mc_init = NULL,
 	.mc_fini = NULL,
@@ -606,6 +617,7 @@ static struct radeon_asic rv770_asic = {
 	.cp_commit = &r600_cp_commit,
 	.vram_info = NULL,
 	.gpu_reset = &rv770_gpu_reset,
+	.vga_set_state = &r600_vga_set_state,
 	.mc_init = NULL,
 	.mc_fini = NULL,
 	.wb_init = &r600_wb_init,
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 8a40c616b534..daf5db780956 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -29,6 +29,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/radeon_drm.h>
+#include <linux/vgaarb.h>
 #include "radeon_reg.h"
 #include "radeon.h"
 #include "radeon_asic.h"
@@ -480,7 +481,18 @@ void radeon_combios_fini(struct radeon_device *rdev)
 {
 }
 
+/* if we get transitioned to only one device, tak VGA back */
+static unsigned int radeon_vga_set_decode(void *cookie, bool state)
+{
+	struct radeon_device *rdev = cookie;
 
+	radeon_vga_set_state(rdev, state);
+	if (state)
+		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
+		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+	else
+		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+}
 /*
  * Radeon device.
  */
@@ -578,6 +590,13 @@ int radeon_device_init(struct radeon_device *rdev,
 	if (r) {
 		return r;
 	}
+
+	/* if we have > 1 VGA cards, then disable the radeon VGA resources */
+	r = vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode);
+	if (r) {
+		return -EINVAL;
+	}
+
 	if (!rdev->new_init_path) {
 		/* Setup errata flags */
 		radeon_errata(rdev);
@@ -586,7 +605,6 @@ int radeon_device_init(struct radeon_device *rdev,
 		/* Initialize surface registers */
 		radeon_surface_init(rdev);
 
-		/* TODO: disable VGA need to use VGA request */
 		/* BIOS*/
 		if (!radeon_get_bios(rdev)) {
 			if (ASIC_IS_AVIVO(rdev))
@@ -697,6 +715,7 @@ void radeon_device_fini(struct radeon_device *rdev)
 		radeon_agp_fini(rdev);
 #endif
 		radeon_irq_kms_fini(rdev);
+		vga_client_register(rdev->pdev, NULL, NULL, NULL);
 		radeon_fence_driver_fini(rdev);
 		radeon_clocks_fini(rdev);
 		radeon_object_fini(rdev);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index eeefb6369e19..c8e64bbadbcf 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -810,6 +810,9 @@ struct drm_driver {
 	int (*gem_init_object) (struct drm_gem_object *obj);
 	void (*gem_free_object) (struct drm_gem_object *obj);
 
+	/* vga arb irq handler */
+	void (*vgaarb_irq)(struct drm_device *dev, bool state);
+
 	/* Driver private ops for this object */
 	struct vm_operations_struct *gem_vm_ops;
 
-- 
cgit v1.2.3


From cd74c86bdf705f824d494a2bbda393d1d562b40a Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 21 Sep 2009 16:44:32 +1000
Subject: perf_counter, powerpc, sparc: Fix compilation after
 perf_counter_overflow() change

Commit 5622f295 ("x86, perf_counter, bts: Optimize BTS overflow
handling") removed the regs field from struct perf_sample_data and
added a regs parameter to perf_counter_overflow().  This breaks the
build on powerpc (and Sparc) as reported by Sachin Sant:

  arch/powerpc/kernel/perf_counter.c: In function 'record_and_restart':
  arch/powerpc/kernel/perf_counter.c:1165: error: unknown field 'regs' specified in initializer

This adjusts arch/powerpc/kernel/perf_counter.c to correspond with the
new struct perf_sample_data and perf_counter_overflow().

[ v2: also fix Sparc, Markus Metzger <markus.t.metzger@intel.com> ]

Reported-by: Sachin Sant <sachinp@in.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Markus Metzger <markus.t.metzger@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: benh@kernel.crashing.org
Cc: linuxppc-dev@ozlabs.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <19127.8400.376239.586120@drongo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |  3 +--
 arch/sparc/kernel/perf_counter.c   |  3 +--
 include/linux/perf_counter.h       | 17 -----------------
 3 files changed, 2 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 7ceefaf3a7f5..5ccf9bca96c0 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -1162,7 +1162,6 @@ static void record_and_restart(struct perf_counter *counter, unsigned long val,
 	 */
 	if (record) {
 		struct perf_sample_data data = {
-			.regs	= regs,
 			.addr	= 0,
 			.period	= counter->hw.last_period,
 		};
@@ -1170,7 +1169,7 @@ static void record_and_restart(struct perf_counter *counter, unsigned long val,
 		if (counter->attr.sample_type & PERF_SAMPLE_ADDR)
 			perf_get_data_addr(regs, &data.addr);
 
-		if (perf_counter_overflow(counter, nmi, &data)) {
+		if (perf_counter_overflow(counter, nmi, &data, regs)) {
 			/*
 			 * Interrupts are coming too fast - throttle them
 			 * by setting the counter to 0, so it will be
diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c
index 09de4035eaa9..b1265ce8a053 100644
--- a/arch/sparc/kernel/perf_counter.c
+++ b/arch/sparc/kernel/perf_counter.c
@@ -493,7 +493,6 @@ static int __kprobes perf_counter_nmi_handler(struct notifier_block *self,
 
 	regs = args->regs;
 
-	data.regs = regs;
 	data.addr = 0;
 
 	cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -513,7 +512,7 @@ static int __kprobes perf_counter_nmi_handler(struct notifier_block *self,
 		if (!sparc_perf_counter_set_period(counter, hwc, idx))
 			continue;
 
-		if (perf_counter_overflow(counter, 1, &data))
+		if (perf_counter_overflow(counter, 1, &data, regs))
 			sparc_pmu_disable_counter(hwc, idx);
 	}
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index bd341007c4fc..740caad09a44 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -849,23 +849,6 @@ static inline void perf_counter_comm(struct task_struct *tsk)		{ }
 static inline void perf_counter_fork(struct task_struct *tsk)		{ }
 static inline void perf_counter_init(void)				{ }
 
-static inline int
-perf_output_begin(struct perf_output_handle *handle, struct perf_counter *c,
-		  unsigned int size, int nmi, int sample)		{ }
-static inline void perf_output_end(struct perf_output_handle *handle)	{ }
-static inline void
-perf_output_copy(struct perf_output_handle *handle,
-		 const void *buf, unsigned int len)			{ }
-static inline void
-perf_output_sample(struct perf_output_handle *handle,
-		   struct perf_event_header *header,
-		   struct perf_sample_data *data,
-		   struct perf_counter *counter)			{ }
-static inline void
-perf_prepare_sample(struct perf_event_header *header,
-		    struct perf_sample_data *data,
-		    struct perf_counter *counter,
-		    struct pt_regs *regs)				{ }
 #endif
 
 #define perf_output_put(handle, x) \
-- 
cgit v1.2.3


From 0d721ceadbeaa24d7f9dd41b3e5e29912327a7e1 Mon Sep 17 00:00:00 2001
From: Peter Williams <pwil3058@bigpond.net.au>
Date: Mon, 21 Sep 2009 01:31:53 +0000
Subject: sched: Simplify sys_sched_rr_get_interval() system call

By removing the need for it to know details of scheduling classes.

This allows PlugSched to define orthogonal scheduling classes.

Signed-off-by: Peter Williams <pwil3058@bigpond.net.au>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <06d1b89ee15a0eef82d7.1253496713@mudlark.pw.nest>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  2 ++
 kernel/sched.c          | 17 +----------------
 kernel/sched_fair.c     | 21 +++++++++++++++++++++
 kernel/sched_idletask.c |  7 +++++++
 kernel/sched_rt.c       | 13 +++++++++++++
 5 files changed, 44 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index cc37a3fa5065..239c8e0dba9f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1075,6 +1075,8 @@ struct sched_class {
 	void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
 			     int oldprio, int running);
 
+	unsigned int (*get_rr_interval) (struct task_struct *task);
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	void (*moved_group) (struct task_struct *p);
 #endif
diff --git a/kernel/sched.c b/kernel/sched.c
index 1b900fb1c6e1..830967e18285 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6819,23 +6819,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 	if (retval)
 		goto out_unlock;
 
-	/*
-	 * Time slice is 0 for SCHED_FIFO tasks and for SCHED_OTHER
-	 * tasks that are on an otherwise idle runqueue:
-	 */
-	time_slice = 0;
-	if (p->policy == SCHED_RR) {
-		time_slice = DEF_TIMESLICE;
-	} else if (p->policy != SCHED_FIFO) {
-		struct sched_entity *se = &p->se;
-		unsigned long flags;
-		struct rq *rq;
+	time_slice = p->sched_class->get_rr_interval(p);
 
-		rq = task_rq_lock(p, &flags);
-		if (rq->cfs.load.weight)
-			time_slice = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
-		task_rq_unlock(rq, &flags);
-	}
 	read_unlock(&tasklist_lock);
 	jiffies_to_timespec(time_slice, &t);
 	retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 566e3bb78ed9..cd73738f0d5f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1938,6 +1938,25 @@ static void moved_group_fair(struct task_struct *p)
 }
 #endif
 
+unsigned int get_rr_interval_fair(struct task_struct *task)
+{
+	struct sched_entity *se = &task->se;
+	unsigned long flags;
+	struct rq *rq;
+	unsigned int rr_interval = 0;
+
+	/*
+	 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
+	 * idle runqueue:
+	 */
+	rq = task_rq_lock(task, &flags);
+	if (rq->cfs.load.weight)
+		rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
+	task_rq_unlock(rq, &flags);
+
+	return rr_interval;
+}
+
 /*
  * All the scheduling class methods:
  */
@@ -1966,6 +1985,8 @@ static const struct sched_class fair_sched_class = {
 	.prio_changed		= prio_changed_fair,
 	.switched_to		= switched_to_fair,
 
+	.get_rr_interval	= get_rr_interval_fair,
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	.moved_group		= moved_group_fair,
 #endif
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index a8b448af004b..b133a28fcde3 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -97,6 +97,11 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
 		check_preempt_curr(rq, p, 0);
 }
 
+unsigned int get_rr_interval_idle(struct task_struct *task)
+{
+	return 0;
+}
+
 /*
  * Simple, special scheduling class for the per-CPU idle tasks:
  */
@@ -122,6 +127,8 @@ static const struct sched_class idle_sched_class = {
 	.set_curr_task          = set_curr_task_idle,
 	.task_tick		= task_tick_idle,
 
+	.get_rr_interval	= get_rr_interval_idle,
+
 	.prio_changed		= prio_changed_idle,
 	.switched_to		= switched_to_idle,
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 13de7126a6ab..a4d790cddb19 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1734,6 +1734,17 @@ static void set_curr_task_rt(struct rq *rq)
 	dequeue_pushable_task(rq, p);
 }
 
+unsigned int get_rr_interval_rt(struct task_struct *task)
+{
+	/*
+	 * Time slice is 0 for SCHED_FIFO tasks
+	 */
+	if (task->policy == SCHED_RR)
+		return DEF_TIMESLICE;
+	else
+		return 0;
+}
+
 static const struct sched_class rt_sched_class = {
 	.next			= &fair_sched_class,
 	.enqueue_task		= enqueue_task_rt,
@@ -1762,6 +1773,8 @@ static const struct sched_class rt_sched_class = {
 	.set_curr_task          = set_curr_task_rt,
 	.task_tick		= task_tick_rt,
 
+	.get_rr_interval	= get_rr_interval_rt,
+
 	.prio_changed		= prio_changed_rt,
 	.switched_to		= switched_to_rt,
 };
-- 
cgit v1.2.3


From 65abc8653c282ded3dbdb9ec1227784140ba28cd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 21 Sep 2009 10:18:27 +0200
Subject: perf_counter: Rename list_entry -> group_entry, counter_list ->
 group_list

This is in preparation of the big rename, but also makes sense
in a standalone way: 'list_entry' is a bad name as we already
have a list_entry() in list.h.

Also, the 'counter list' is too vague, it doesnt tell us the
purpose of that list.

Clarify these names to show that it's all about the group
hiearchy.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  4 +--
 kernel/perf_counter.c        | 71 ++++++++++++++++++++++----------------------
 2 files changed, 37 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 740caad09a44..f64862732673 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -543,7 +543,7 @@ struct perf_pending_entry {
  */
 struct perf_counter {
 #ifdef CONFIG_PERF_COUNTERS
-	struct list_head		list_entry;
+	struct list_head		group_entry;
 	struct list_head		event_entry;
 	struct list_head		sibling_list;
 	int				nr_siblings;
@@ -649,7 +649,7 @@ struct perf_counter_context {
 	 */
 	struct mutex			mutex;
 
-	struct list_head		counter_list;
+	struct list_head		group_list;
 	struct list_head		event_list;
 	int				nr_counters;
 	int				nr_active;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index cc768ab81ac8..13ad73aed4ca 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -258,9 +258,9 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	 * leader's sibling list:
 	 */
 	if (group_leader == counter)
-		list_add_tail(&counter->list_entry, &ctx->counter_list);
+		list_add_tail(&counter->group_entry, &ctx->group_list);
 	else {
-		list_add_tail(&counter->list_entry, &group_leader->sibling_list);
+		list_add_tail(&counter->group_entry, &group_leader->sibling_list);
 		group_leader->nr_siblings++;
 	}
 
@@ -279,13 +279,13 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
 	struct perf_counter *sibling, *tmp;
 
-	if (list_empty(&counter->list_entry))
+	if (list_empty(&counter->group_entry))
 		return;
 	ctx->nr_counters--;
 	if (counter->attr.inherit_stat)
 		ctx->nr_stat--;
 
-	list_del_init(&counter->list_entry);
+	list_del_init(&counter->group_entry);
 	list_del_rcu(&counter->event_entry);
 
 	if (counter->group_leader != counter)
@@ -296,10 +296,9 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	 * upgrade the siblings to singleton counters by adding them
 	 * to the context list directly:
 	 */
-	list_for_each_entry_safe(sibling, tmp,
-				 &counter->sibling_list, list_entry) {
+	list_for_each_entry_safe(sibling, tmp, &counter->sibling_list, group_entry) {
 
-		list_move_tail(&sibling->list_entry, &ctx->counter_list);
+		list_move_tail(&sibling->group_entry, &ctx->group_list);
 		sibling->group_leader = sibling;
 	}
 }
@@ -343,7 +342,7 @@ group_sched_out(struct perf_counter *group_counter,
 	/*
 	 * Schedule out siblings (if any):
 	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
+	list_for_each_entry(counter, &group_counter->sibling_list, group_entry)
 		counter_sched_out(counter, cpuctx, ctx);
 
 	if (group_counter->attr.exclusive)
@@ -435,7 +434,7 @@ retry:
 	/*
 	 * If the context is active we need to retry the smp call.
 	 */
-	if (ctx->nr_active && !list_empty(&counter->list_entry)) {
+	if (ctx->nr_active && !list_empty(&counter->group_entry)) {
 		spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
@@ -445,7 +444,7 @@ retry:
 	 * can remove the counter safely, if the call above did not
 	 * succeed.
 	 */
-	if (!list_empty(&counter->list_entry)) {
+	if (!list_empty(&counter->group_entry)) {
 		list_del_counter(counter, ctx);
 	}
 	spin_unlock_irq(&ctx->lock);
@@ -497,7 +496,7 @@ static void update_group_times(struct perf_counter *leader)
 	struct perf_counter *counter;
 
 	update_counter_times(leader);
-	list_for_each_entry(counter, &leader->sibling_list, list_entry)
+	list_for_each_entry(counter, &leader->sibling_list, group_entry)
 		update_counter_times(counter);
 }
 
@@ -643,7 +642,7 @@ group_sched_in(struct perf_counter *group_counter,
 	/*
 	 * Schedule in siblings as one group (if any):
 	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+	list_for_each_entry(counter, &group_counter->sibling_list, group_entry) {
 		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
 			partial_group = counter;
 			goto group_error;
@@ -657,7 +656,7 @@ group_error:
 	 * Groups can be scheduled in as one unit only, so undo any
 	 * partial group before returning:
 	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+	list_for_each_entry(counter, &group_counter->sibling_list, group_entry) {
 		if (counter == partial_group)
 			break;
 		counter_sched_out(counter, cpuctx, ctx);
@@ -678,7 +677,7 @@ static int is_software_only_group(struct perf_counter *leader)
 	if (!is_software_counter(leader))
 		return 0;
 
-	list_for_each_entry(counter, &leader->sibling_list, list_entry)
+	list_for_each_entry(counter, &leader->sibling_list, group_entry)
 		if (!is_software_counter(counter))
 			return 0;
 
@@ -842,7 +841,7 @@ retry:
 	/*
 	 * we need to retry the smp call.
 	 */
-	if (ctx->is_active && list_empty(&counter->list_entry)) {
+	if (ctx->is_active && list_empty(&counter->group_entry)) {
 		spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
@@ -852,7 +851,7 @@ retry:
 	 * can add the counter safely, if it the call above did not
 	 * succeed.
 	 */
-	if (list_empty(&counter->list_entry))
+	if (list_empty(&counter->group_entry))
 		add_counter_to_ctx(counter, ctx);
 	spin_unlock_irq(&ctx->lock);
 }
@@ -872,7 +871,7 @@ static void __perf_counter_mark_enabled(struct perf_counter *counter,
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
-	list_for_each_entry(sub, &counter->sibling_list, list_entry)
+	list_for_each_entry(sub, &counter->sibling_list, group_entry)
 		if (sub->state >= PERF_COUNTER_STATE_INACTIVE)
 			sub->tstamp_enabled =
 				ctx->time - sub->total_time_enabled;
@@ -1032,7 +1031,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 
 	perf_disable();
 	if (ctx->nr_active) {
-		list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		list_for_each_entry(counter, &ctx->group_list, group_entry) {
 			if (counter != counter->group_leader)
 				counter_sched_out(counter, cpuctx, ctx);
 			else
@@ -1252,7 +1251,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 	 * First go through the list and put on any pinned groups
 	 * in order to give them the best chance of going on.
 	 */
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
 		if (counter->state <= PERF_COUNTER_STATE_OFF ||
 		    !counter->attr.pinned)
 			continue;
@@ -1276,7 +1275,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 		}
 	}
 
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
 		/*
 		 * Ignore counters in OFF or ERROR state, and
 		 * ignore pinned counters since we did them already.
@@ -1369,7 +1368,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
 	u64 interrupts, freq;
 
 	spin_lock(&ctx->lock);
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
 		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 			continue;
 
@@ -1441,8 +1440,8 @@ static void rotate_ctx(struct perf_counter_context *ctx)
 	 * Rotate the first entry last (works just fine for group counters too):
 	 */
 	perf_disable();
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		list_move_tail(&counter->list_entry, &ctx->counter_list);
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
+		list_move_tail(&counter->group_entry, &ctx->group_list);
 		break;
 	}
 	perf_enable();
@@ -1498,7 +1497,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
 
 	spin_lock(&ctx->lock);
 
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
 		if (!counter->attr.enable_on_exec)
 			continue;
 		counter->attr.enable_on_exec = 0;
@@ -1575,7 +1574,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 	memset(ctx, 0, sizeof(*ctx));
 	spin_lock_init(&ctx->lock);
 	mutex_init(&ctx->mutex);
-	INIT_LIST_HEAD(&ctx->counter_list);
+	INIT_LIST_HEAD(&ctx->group_list);
 	INIT_LIST_HEAD(&ctx->event_list);
 	atomic_set(&ctx->refcount, 1);
 	ctx->task = task;
@@ -1818,7 +1817,7 @@ static int perf_counter_read_group(struct perf_counter *counter,
 
 	size += err;
 
-	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		err = perf_counter_read_entry(sub, read_format,
 				buf + size);
 		if (err < 0)
@@ -1948,7 +1947,7 @@ static void perf_counter_for_each(struct perf_counter *counter,
 
 	perf_counter_for_each_child(counter, func);
 	func(counter);
-	list_for_each_entry(sibling, &counter->sibling_list, list_entry)
+	list_for_each_entry(sibling, &counter->sibling_list, group_entry)
 		perf_counter_for_each_child(counter, func);
 	mutex_unlock(&ctx->mutex);
 }
@@ -2832,7 +2831,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 
 	perf_output_copy(handle, values, n * sizeof(u64));
 
-	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		n = 0;
 
 		if (sub != counter)
@@ -4118,7 +4117,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	mutex_init(&counter->child_mutex);
 	INIT_LIST_HEAD(&counter->child_list);
 
-	INIT_LIST_HEAD(&counter->list_entry);
+	INIT_LIST_HEAD(&counter->group_entry);
 	INIT_LIST_HEAD(&counter->event_entry);
 	INIT_LIST_HEAD(&counter->sibling_list);
 	init_waitqueue_head(&counter->waitq);
@@ -4544,7 +4543,7 @@ static int inherit_group(struct perf_counter *parent_counter,
 				 child, NULL, child_ctx);
 	if (IS_ERR(leader))
 		return PTR_ERR(leader);
-	list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) {
+	list_for_each_entry(sub, &parent_counter->sibling_list, group_entry) {
 		child_ctr = inherit_counter(sub, parent, parent_ctx,
 					    child, leader, child_ctx);
 		if (IS_ERR(child_ctr))
@@ -4670,8 +4669,8 @@ void perf_counter_exit_task(struct task_struct *child)
 	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
 
 again:
-	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
-				 list_entry)
+	list_for_each_entry_safe(child_counter, tmp, &child_ctx->group_list,
+				 group_entry)
 		__perf_counter_exit_task(child_counter, child_ctx, child);
 
 	/*
@@ -4679,7 +4678,7 @@ again:
 	 * its siblings to the list, but we obtained 'tmp' before that which
 	 * will still point to the list head terminating the iteration.
 	 */
-	if (!list_empty(&child_ctx->counter_list))
+	if (!list_empty(&child_ctx->group_list))
 		goto again;
 
 	mutex_unlock(&child_ctx->mutex);
@@ -4701,7 +4700,7 @@ void perf_counter_free_task(struct task_struct *task)
 
 	mutex_lock(&ctx->mutex);
 again:
-	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) {
+	list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry) {
 		struct perf_counter *parent = counter->parent;
 
 		if (WARN_ON_ONCE(!parent))
@@ -4717,7 +4716,7 @@ again:
 		free_counter(counter);
 	}
 
-	if (!list_empty(&ctx->counter_list))
+	if (!list_empty(&ctx->group_list))
 		goto again;
 
 	mutex_unlock(&ctx->mutex);
@@ -4847,7 +4846,7 @@ static void __perf_counter_exit_cpu(void *info)
 	struct perf_counter_context *ctx = &cpuctx->ctx;
 	struct perf_counter *counter, *tmp;
 
-	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
+	list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry)
 		__perf_counter_remove_from_context(counter);
 }
 static void perf_counter_exit_cpu(int cpu)
-- 
cgit v1.2.3


From cdd6c482c9ff9c55475ee7392ec8f672eddb7be6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 21 Sep 2009 12:02:48 +0200
Subject: perf: Do the big rename: Performance Counters -> Performance Events

Bye-bye Performance Counters, welcome Performance Events!

In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.

Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.

All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)

The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.

Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.

User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)

This patch has been generated via the following script:

  FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')

  sed -i \
    -e 's/PERF_EVENT_/PERF_RECORD_/g' \
    -e 's/PERF_COUNTER/PERF_EVENT/g' \
    -e 's/perf_counter/perf_event/g' \
    -e 's/nb_counters/nb_events/g' \
    -e 's/swcounter/swevent/g' \
    -e 's/tpcounter_event/tp_event/g' \
    $FILES

  for N in $(find . -name perf_counter.[ch]); do
    M=$(echo $N | sed 's/perf_counter/perf_event/g')
    mv $N $M
  done

  FILES=$(find . -name perf_event.*)

  sed -i \
    -e 's/COUNTER_MASK/REG_MASK/g' \
    -e 's/COUNTER/EVENT/g' \
    -e 's/\<event\>/event_id/g' \
    -e 's/counter/event/g' \
    -e 's/Counter/Event/g' \
    $FILES

... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.

Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.

( NOTE: 'counters' are still the proper terminology when we deal
  with hardware registers - and these sed scripts are a bit
  over-eager in renaming them. I've undone some of that, but
  in case there's something left where 'counter' would be
  better than 'event' we can undo that on an individual basis
  instead of touching an otherwise nicely automated patch. )

Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/include/asm/unistd.h           |    2 +-
 arch/arm/kernel/calls.S                 |    2 +-
 arch/blackfin/include/asm/unistd.h      |    2 +-
 arch/blackfin/mach-common/entry.S       |    2 +-
 arch/frv/Kconfig                        |    2 +-
 arch/frv/include/asm/perf_counter.h     |   17 -
 arch/frv/include/asm/perf_event.h       |   17 +
 arch/frv/include/asm/unistd.h           |    2 +-
 arch/frv/kernel/entry.S                 |    2 +-
 arch/frv/lib/Makefile                   |    2 +-
 arch/frv/lib/perf_counter.c             |   19 -
 arch/frv/lib/perf_event.c               |   19 +
 arch/m68k/include/asm/unistd.h          |    2 +-
 arch/m68k/kernel/entry.S                |    2 +-
 arch/m68knommu/kernel/syscalltable.S    |    2 +-
 arch/microblaze/include/asm/unistd.h    |    2 +-
 arch/microblaze/kernel/syscall_table.S  |    2 +-
 arch/mips/include/asm/unistd.h          |    6 +-
 arch/mips/kernel/scall32-o32.S          |    2 +-
 arch/mips/kernel/scall64-64.S           |    2 +-
 arch/mips/kernel/scall64-n32.S          |    2 +-
 arch/mips/kernel/scall64-o32.S          |    2 +-
 arch/mn10300/include/asm/unistd.h       |    2 +-
 arch/mn10300/kernel/entry.S             |    2 +-
 arch/parisc/Kconfig                     |    2 +-
 arch/parisc/include/asm/perf_counter.h  |    7 -
 arch/parisc/include/asm/perf_event.h    |    7 +
 arch/parisc/include/asm/unistd.h        |    4 +-
 arch/parisc/kernel/syscall_table.S      |    2 +-
 arch/powerpc/Kconfig                    |    2 +-
 arch/powerpc/include/asm/hw_irq.h       |   22 +-
 arch/powerpc/include/asm/paca.h         |    2 +-
 arch/powerpc/include/asm/perf_counter.h |  110 -
 arch/powerpc/include/asm/perf_event.h   |  110 +
 arch/powerpc/include/asm/systbl.h       |    2 +-
 arch/powerpc/include/asm/unistd.h       |    2 +-
 arch/powerpc/kernel/Makefile            |    2 +-
 arch/powerpc/kernel/asm-offsets.c       |    2 +-
 arch/powerpc/kernel/entry_64.S          |    8 +-
 arch/powerpc/kernel/irq.c               |    8 +-
 arch/powerpc/kernel/mpc7450-pmu.c       |    2 +-
 arch/powerpc/kernel/perf_callchain.c    |    2 +-
 arch/powerpc/kernel/perf_counter.c      | 1315 --------
 arch/powerpc/kernel/perf_event.c        | 1315 ++++++++
 arch/powerpc/kernel/power4-pmu.c        |    2 +-
 arch/powerpc/kernel/power5+-pmu.c       |    2 +-
 arch/powerpc/kernel/power5-pmu.c        |    2 +-
 arch/powerpc/kernel/power6-pmu.c        |    2 +-
 arch/powerpc/kernel/power7-pmu.c        |    2 +-
 arch/powerpc/kernel/ppc970-pmu.c        |    2 +-
 arch/powerpc/kernel/time.c              |   30 +-
 arch/powerpc/mm/fault.c                 |    8 +-
 arch/powerpc/platforms/Kconfig.cputype  |    4 +-
 arch/s390/Kconfig                       |    2 +-
 arch/s390/include/asm/perf_counter.h    |   10 -
 arch/s390/include/asm/perf_event.h      |   10 +
 arch/s390/include/asm/unistd.h          |    2 +-
 arch/s390/kernel/compat_wrapper.S       |    8 +-
 arch/s390/kernel/syscalls.S             |    2 +-
 arch/s390/mm/fault.c                    |    8 +-
 arch/sh/Kconfig                         |    2 +-
 arch/sh/include/asm/perf_counter.h      |    9 -
 arch/sh/include/asm/perf_event.h        |    9 +
 arch/sh/include/asm/unistd_32.h         |    2 +-
 arch/sh/include/asm/unistd_64.h         |    2 +-
 arch/sh/kernel/syscalls_32.S            |    2 +-
 arch/sh/kernel/syscalls_64.S            |    2 +-
 arch/sh/mm/fault_32.c                   |    8 +-
 arch/sh/mm/tlbflush_64.c                |    8 +-
 arch/sparc/Kconfig                      |    4 +-
 arch/sparc/include/asm/perf_counter.h   |   14 -
 arch/sparc/include/asm/perf_event.h     |   14 +
 arch/sparc/include/asm/unistd.h         |    2 +-
 arch/sparc/kernel/Makefile              |    2 +-
 arch/sparc/kernel/nmi.c                 |    4 +-
 arch/sparc/kernel/pcr.c                 |   10 +-
 arch/sparc/kernel/perf_counter.c        |  556 ----
 arch/sparc/kernel/perf_event.c          |  556 ++++
 arch/sparc/kernel/systbls_32.S          |    2 +-
 arch/sparc/kernel/systbls_64.S          |    4 +-
 arch/x86/Kconfig                        |    2 +-
 arch/x86/ia32/ia32entry.S               |    2 +-
 arch/x86/include/asm/entry_arch.h       |    2 +-
 arch/x86/include/asm/perf_counter.h     |  108 -
 arch/x86/include/asm/perf_event.h       |  108 +
 arch/x86/include/asm/unistd_32.h        |    2 +-
 arch/x86/include/asm/unistd_64.h        |    4 +-
 arch/x86/kernel/apic/apic.c             |    6 +-
 arch/x86/kernel/cpu/Makefile            |    2 +-
 arch/x86/kernel/cpu/common.c            |    4 +-
 arch/x86/kernel/cpu/perf_counter.c      | 2298 --------------
 arch/x86/kernel/cpu/perf_event.c        | 2298 ++++++++++++++
 arch/x86/kernel/cpu/perfctr-watchdog.c  |    2 +-
 arch/x86/kernel/entry_64.S              |    2 +-
 arch/x86/kernel/irqinit.c               |    2 +-
 arch/x86/kernel/syscall_table_32.S      |    2 +-
 arch/x86/mm/fault.c                     |    8 +-
 arch/x86/oprofile/op_model_ppro.c       |    4 +-
 arch/x86/oprofile/op_x86_model.h        |    2 +-
 drivers/char/sysrq.c                    |    4 +-
 fs/exec.c                               |    6 +-
 include/asm-generic/unistd.h            |    4 +-
 include/linux/init_task.h               |   14 +-
 include/linux/perf_counter.h            |  858 ------
 include/linux/perf_event.h              |  858 ++++++
 include/linux/prctl.h                   |    4 +-
 include/linux/sched.h                   |   12 +-
 include/linux/syscalls.h                |    6 +-
 include/trace/ftrace.h                  |   10 +-
 init/Kconfig                            |    8 +-
 kernel/Makefile                         |    2 +-
 kernel/exit.c                           |    8 +-
 kernel/fork.c                           |    8 +-
 kernel/perf_counter.c                   | 5000 -------------------------------
 kernel/perf_event.c                     | 5000 +++++++++++++++++++++++++++++++
 kernel/sched.c                          |   14 +-
 kernel/sys.c                            |   10 +-
 kernel/sys_ni.c                         |    2 +-
 kernel/sysctl.c                         |   22 +-
 kernel/timer.c                          |    4 +-
 kernel/trace/trace_syscalls.c           |    6 +-
 mm/mmap.c                               |    6 +-
 mm/mprotect.c                           |    4 +-
 tools/perf/Makefile                     |    2 +-
 tools/perf/builtin-annotate.c           |   28 +-
 tools/perf/builtin-record.c             |   22 +-
 tools/perf/builtin-report.c             |   48 +-
 tools/perf/builtin-sched.c              |   20 +-
 tools/perf/builtin-stat.c               |   10 +-
 tools/perf/builtin-timechart.c          |   14 +-
 tools/perf/builtin-top.c                |   12 +-
 tools/perf/builtin-trace.c              |   22 +-
 tools/perf/design.txt                   |   58 +-
 tools/perf/perf.h                       |   12 +-
 tools/perf/util/event.h                 |    4 +-
 tools/perf/util/header.c                |    6 +-
 tools/perf/util/header.h                |    8 +-
 tools/perf/util/parse-events.c          |   32 +-
 tools/perf/util/parse-events.h          |    2 +-
 tools/perf/util/trace-event-info.c      |    8 +-
 tools/perf/util/trace-event.h           |    2 +-
 141 files changed, 10694 insertions(+), 10694 deletions(-)
 delete mode 100644 arch/frv/include/asm/perf_counter.h
 create mode 100644 arch/frv/include/asm/perf_event.h
 delete mode 100644 arch/frv/lib/perf_counter.c
 create mode 100644 arch/frv/lib/perf_event.c
 delete mode 100644 arch/parisc/include/asm/perf_counter.h
 create mode 100644 arch/parisc/include/asm/perf_event.h
 delete mode 100644 arch/powerpc/include/asm/perf_counter.h
 create mode 100644 arch/powerpc/include/asm/perf_event.h
 delete mode 100644 arch/powerpc/kernel/perf_counter.c
 create mode 100644 arch/powerpc/kernel/perf_event.c
 delete mode 100644 arch/s390/include/asm/perf_counter.h
 create mode 100644 arch/s390/include/asm/perf_event.h
 delete mode 100644 arch/sh/include/asm/perf_counter.h
 create mode 100644 arch/sh/include/asm/perf_event.h
 delete mode 100644 arch/sparc/include/asm/perf_counter.h
 create mode 100644 arch/sparc/include/asm/perf_event.h
 delete mode 100644 arch/sparc/kernel/perf_counter.c
 create mode 100644 arch/sparc/kernel/perf_event.c
 delete mode 100644 arch/x86/include/asm/perf_counter.h
 create mode 100644 arch/x86/include/asm/perf_event.h
 delete mode 100644 arch/x86/kernel/cpu/perf_counter.c
 create mode 100644 arch/x86/kernel/cpu/perf_event.c
 delete mode 100644 include/linux/perf_counter.h
 create mode 100644 include/linux/perf_event.h
 delete mode 100644 kernel/perf_counter.c
 create mode 100644 kernel/perf_event.c

(limited to 'include')

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 9122c9ee18fb..89f7eade20af 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -390,7 +390,7 @@
 #define __NR_preadv			(__NR_SYSCALL_BASE+361)
 #define __NR_pwritev			(__NR_SYSCALL_BASE+362)
 #define __NR_rt_tgsigqueueinfo		(__NR_SYSCALL_BASE+363)
-#define __NR_perf_counter_open		(__NR_SYSCALL_BASE+364)
+#define __NR_perf_event_open		(__NR_SYSCALL_BASE+364)
 
 /*
  * The following SWIs are ARM private.
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index ecfa98954d1d..fafce1b5c69f 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -373,7 +373,7 @@
 		CALL(sys_preadv)
 		CALL(sys_pwritev)
 		CALL(sys_rt_tgsigqueueinfo)
-		CALL(sys_perf_counter_open)
+		CALL(sys_perf_event_open)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index c8e7ee4768cd..02b1529dad57 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h
@@ -381,7 +381,7 @@
 #define __NR_preadv		366
 #define __NR_pwritev		367
 #define __NR_rt_tgsigqueueinfo	368
-#define __NR_perf_counter_open	369
+#define __NR_perf_event_open	369
 
 #define __NR_syscall		370
 #define NR_syscalls		__NR_syscall
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 01af24cde362..1e7cac23e25f 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1620,7 +1620,7 @@ ENTRY(_sys_call_table)
 	.long _sys_preadv
 	.long _sys_pwritev
 	.long _sys_rt_tgsigqueueinfo
-	.long _sys_perf_counter_open
+	.long _sys_perf_event_open
 
 	.rept NR_syscalls-(.-_sys_call_table)/4
 	.long _sys_ni_syscall
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index b86e19c9b5b0..4b5830bcbe2e 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -7,7 +7,7 @@ config FRV
 	default y
 	select HAVE_IDE
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 
 config ZONE_DMA
 	bool
diff --git a/arch/frv/include/asm/perf_counter.h b/arch/frv/include/asm/perf_counter.h
deleted file mode 100644
index ccf726e61b2e..000000000000
--- a/arch/frv/include/asm/perf_counter.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* FRV performance counter support
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef _ASM_PERF_COUNTER_H
-#define _ASM_PERF_COUNTER_H
-
-#define PERF_COUNTER_INDEX_OFFSET	0
-
-#endif /* _ASM_PERF_COUNTER_H */
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h
new file mode 100644
index 000000000000..a69e0155d146
--- /dev/null
+++ b/arch/frv/include/asm/perf_event.h
@@ -0,0 +1,17 @@
+/* FRV performance event support
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_PERF_EVENT_H
+#define _ASM_PERF_EVENT_H
+
+#define PERF_EVENT_INDEX_OFFSET	0
+
+#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index 4a8fb427ce0a..be6ef0f5cd42 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h
@@ -342,7 +342,7 @@
 #define __NR_preadv		333
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
-#define __NR_perf_counter_open	336
+#define __NR_perf_event_open	336
 
 #ifdef __KERNEL__
 
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index fde1e446b440..189397ec012a 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1525,6 +1525,6 @@ sys_call_table:
 	.long sys_preadv
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
 
 syscall_table_size = (. - sys_call_table)
diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile
index 0a377210c89b..f4709756d0d9 100644
--- a/arch/frv/lib/Makefile
+++ b/arch/frv/lib/Makefile
@@ -5,4 +5,4 @@
 lib-y := \
 	__ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
 	checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
-	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_counter.o
+	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o
diff --git a/arch/frv/lib/perf_counter.c b/arch/frv/lib/perf_counter.c
deleted file mode 100644
index 2000feecd571..000000000000
--- a/arch/frv/lib/perf_counter.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Performance counter handling
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/perf_counter.h>
-
-/*
- * mark the performance counter as pending
- */
-void set_perf_counter_pending(void)
-{
-}
diff --git a/arch/frv/lib/perf_event.c b/arch/frv/lib/perf_event.c
new file mode 100644
index 000000000000..9ac5acfd2e91
--- /dev/null
+++ b/arch/frv/lib/perf_event.c
@@ -0,0 +1,19 @@
+/* Performance event handling
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/perf_event.h>
+
+/*
+ * mark the performance event as pending
+ */
+void set_perf_event_pending(void)
+{
+}
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 946d8691f2b0..48b87f5ced50 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -335,7 +335,7 @@
 #define __NR_preadv		329
 #define __NR_pwritev		330
 #define __NR_rt_tgsigqueueinfo	331
-#define __NR_perf_counter_open	332
+#define __NR_perf_event_open	332
 
 #ifdef __KERNEL__
 
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 922f52e7ed1a..c5b33634c980 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -756,5 +756,5 @@ sys_call_table:
 	.long sys_preadv
 	.long sys_pwritev		/* 330 */
 	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
 
diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S
index 0ae123e08985..23535cc415ae 100644
--- a/arch/m68knommu/kernel/syscalltable.S
+++ b/arch/m68knommu/kernel/syscalltable.S
@@ -350,7 +350,7 @@ ENTRY(sys_call_table)
 	.long sys_preadv
 	.long sys_pwritev		/* 330 */
 	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 0b852327c0e7..cb05a07e55e9 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -381,7 +381,7 @@
 #define __NR_preadv		363 /* new */
 #define __NR_pwritev		364 /* new */
 #define __NR_rt_tgsigqueueinfo	365 /* new */
-#define __NR_perf_counter_open	366 /* new */
+#define __NR_perf_event_open	366 /* new */
 
 #define __NR_syscalls		367
 
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 457216097dfd..ecec19155135 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -370,4 +370,4 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall
 	.long sys_ni_syscall
 	.long sys_rt_tgsigqueueinfo	/* 365 */
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index e753a777949b..8c9dfa9e9018 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -353,7 +353,7 @@
 #define __NR_preadv			(__NR_Linux + 330)
 #define __NR_pwritev			(__NR_Linux + 331)
 #define __NR_rt_tgsigqueueinfo		(__NR_Linux + 332)
-#define __NR_perf_counter_open		(__NR_Linux + 333)
+#define __NR_perf_event_open		(__NR_Linux + 333)
 #define __NR_accept4			(__NR_Linux + 334)
 
 /*
@@ -664,7 +664,7 @@
 #define __NR_preadv			(__NR_Linux + 289)
 #define __NR_pwritev			(__NR_Linux + 290)
 #define __NR_rt_tgsigqueueinfo		(__NR_Linux + 291)
-#define __NR_perf_counter_open		(__NR_Linux + 292)
+#define __NR_perf_event_open		(__NR_Linux + 292)
 #define __NR_accept4			(__NR_Linux + 293)
 
 /*
@@ -979,7 +979,7 @@
 #define __NR_preadv			(__NR_Linux + 293)
 #define __NR_pwritev			(__NR_Linux + 294)
 #define __NR_rt_tgsigqueueinfo		(__NR_Linux + 295)
-#define __NR_perf_counter_open		(__NR_Linux + 296)
+#define __NR_perf_event_open		(__NR_Linux + 296)
 #define __NR_accept4			(__NR_Linux + 297)
 
 /*
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 7c2de4f091c4..fd2a9bb620d6 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -581,7 +581,7 @@ einval:	li	v0, -ENOSYS
 	sys	sys_preadv		6	/* 4330 */
 	sys	sys_pwritev		6
 	sys	sys_rt_tgsigqueueinfo	4
-	sys	sys_perf_counter_open	5
+	sys	sys_perf_event_open	5
 	sys	sys_accept4		4
 	.endm
 
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index b97b993846d6..18bf7f32c5e4 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -418,6 +418,6 @@ sys_call_table:
 	PTR	sys_preadv
 	PTR	sys_pwritev			/* 5390 */
 	PTR	sys_rt_tgsigqueueinfo
-	PTR	sys_perf_counter_open
+	PTR	sys_perf_event_open
 	PTR	sys_accept4
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 1a6ae124635b..6ebc07976694 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -416,6 +416,6 @@ EXPORT(sysn32_call_table)
 	PTR	sys_preadv
 	PTR	sys_pwritev
 	PTR	compat_sys_rt_tgsigqueueinfo	/* 5295 */
-	PTR	sys_perf_counter_open
+	PTR	sys_perf_event_open
 	PTR	sys_accept4
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index cd31087a651f..9bbf9775e0bd 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -536,6 +536,6 @@ sys_call_table:
 	PTR	compat_sys_preadv		/* 4330 */
 	PTR	compat_sys_pwritev
 	PTR	compat_sys_rt_tgsigqueueinfo
-	PTR	sys_perf_counter_open
+	PTR	sys_perf_event_open
 	PTR	sys_accept4
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index fad68616af32..2a983931c11f 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -347,7 +347,7 @@
 #define __NR_preadv		334
 #define __NR_pwritev		335
 #define __NR_rt_tgsigqueueinfo	336
-#define __NR_perf_counter_open	337
+#define __NR_perf_event_open	337
 
 #ifdef __KERNEL__
 
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index e0d2563af4f2..a94e7ea3faa6 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -723,7 +723,7 @@ ENTRY(sys_call_table)
 	.long sys_preadv
 	.long sys_pwritev		/* 335 */
 	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
 
 
 nr_syscalls=(.-sys_call_table)/4
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 06f8d5b5b0f9..f388dc68f605 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -16,7 +16,7 @@ config PARISC
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
 	select BUG
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 	select GENERIC_ATOMIC64 if !64BIT
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/parisc/include/asm/perf_counter.h b/arch/parisc/include/asm/perf_counter.h
deleted file mode 100644
index dc9e829f7013..000000000000
--- a/arch/parisc/include/asm/perf_counter.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_PARISC_PERF_COUNTER_H
-#define __ASM_PARISC_PERF_COUNTER_H
-
-/* parisc only supports software counters through this interface. */
-static inline void set_perf_counter_pending(void) { }
-
-#endif /* __ASM_PARISC_PERF_COUNTER_H */
diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h
new file mode 100644
index 000000000000..cc146427d8f9
--- /dev/null
+++ b/arch/parisc/include/asm/perf_event.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_PARISC_PERF_EVENT_H
+#define __ASM_PARISC_PERF_EVENT_H
+
+/* parisc only supports software events through this interface. */
+static inline void set_perf_event_pending(void) { }
+
+#endif /* __ASM_PARISC_PERF_EVENT_H */
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index f3d3b8b012c4..cda158318c62 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -810,9 +810,9 @@
 #define __NR_preadv		(__NR_Linux + 315)
 #define __NR_pwritev		(__NR_Linux + 316)
 #define __NR_rt_tgsigqueueinfo	(__NR_Linux + 317)
-#define __NR_perf_counter_open	(__NR_Linux + 318)
+#define __NR_perf_event_open	(__NR_Linux + 318)
 
-#define __NR_Linux_syscalls	(__NR_perf_counter_open + 1)
+#define __NR_Linux_syscalls	(__NR_perf_event_open + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index cf145eb026b3..843f423dec67 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -416,7 +416,7 @@
 	ENTRY_COMP(preadv)		/* 315 */
 	ENTRY_COMP(pwritev)
 	ENTRY_COMP(rt_tgsigqueueinfo)
-	ENTRY_SAME(perf_counter_open)
+	ENTRY_SAME(perf_event_open)
 
 	/* Nothing yet */
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8250902265c6..4fd479059d65 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -129,7 +129,7 @@ config PPC
 	select HAVE_OPROFILE
 	select HAVE_SYSCALL_WRAPPERS if PPC64
 	select GENERIC_ATOMIC64 if PPC32
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index e73d554538dd..abbc2aaaced5 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -135,43 +135,43 @@ static inline int irqs_disabled_flags(unsigned long flags)
  */
 struct irq_chip;
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 
 #ifdef CONFIG_PPC64
-static inline unsigned long test_perf_counter_pending(void)
+static inline unsigned long test_perf_event_pending(void)
 {
 	unsigned long x;
 
 	asm volatile("lbz %0,%1(13)"
 		: "=r" (x)
-		: "i" (offsetof(struct paca_struct, perf_counter_pending)));
+		: "i" (offsetof(struct paca_struct, perf_event_pending)));
 	return x;
 }
 
-static inline void set_perf_counter_pending(void)
+static inline void set_perf_event_pending(void)
 {
 	asm volatile("stb %0,%1(13)" : :
 		"r" (1),
-		"i" (offsetof(struct paca_struct, perf_counter_pending)));
+		"i" (offsetof(struct paca_struct, perf_event_pending)));
 }
 
-static inline void clear_perf_counter_pending(void)
+static inline void clear_perf_event_pending(void)
 {
 	asm volatile("stb %0,%1(13)" : :
 		"r" (0),
-		"i" (offsetof(struct paca_struct, perf_counter_pending)));
+		"i" (offsetof(struct paca_struct, perf_event_pending)));
 }
 #endif /* CONFIG_PPC64 */
 
-#else  /* CONFIG_PERF_COUNTERS */
+#else  /* CONFIG_PERF_EVENTS */
 
-static inline unsigned long test_perf_counter_pending(void)
+static inline unsigned long test_perf_event_pending(void)
 {
 	return 0;
 }
 
-static inline void clear_perf_counter_pending(void) {}
-#endif /* CONFIG_PERF_COUNTERS */
+static inline void clear_perf_event_pending(void) {}
+#endif /* CONFIG_PERF_EVENTS */
 
 #endif	/* __KERNEL__ */
 #endif	/* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index b634456ea893..154f405b642f 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -122,7 +122,7 @@ struct paca_struct {
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
-	u8 perf_counter_pending;	/* PM interrupt while soft-disabled */
+	u8 perf_event_pending;	/* PM interrupt while soft-disabled */
 
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
deleted file mode 100644
index 0ea0639fcf75..000000000000
--- a/arch/powerpc/include/asm/perf_counter.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Performance counter support - PowerPC-specific definitions.
- *
- * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/types.h>
-
-#include <asm/hw_irq.h>
-
-#define MAX_HWCOUNTERS		8
-#define MAX_EVENT_ALTERNATIVES	8
-#define MAX_LIMITED_HWCOUNTERS	2
-
-/*
- * This struct provides the constants and functions needed to
- * describe the PMU on a particular POWER-family CPU.
- */
-struct power_pmu {
-	const char	*name;
-	int		n_counter;
-	int		max_alternatives;
-	unsigned long	add_fields;
-	unsigned long	test_adder;
-	int		(*compute_mmcr)(u64 events[], int n_ev,
-				unsigned int hwc[], unsigned long mmcr[]);
-	int		(*get_constraint)(u64 event, unsigned long *mskp,
-				unsigned long *valp);
-	int		(*get_alternatives)(u64 event, unsigned int flags,
-				u64 alt[]);
-	void		(*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
-	int		(*limited_pmc_event)(u64 event);
-	u32		flags;
-	int		n_generic;
-	int		*generic_events;
-	int		(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
-			       [PERF_COUNT_HW_CACHE_OP_MAX]
-			       [PERF_COUNT_HW_CACHE_RESULT_MAX];
-};
-
-/*
- * Values for power_pmu.flags
- */
-#define PPMU_LIMITED_PMC5_6	1	/* PMC5/6 have limited function */
-#define PPMU_ALT_SIPR		2	/* uses alternate posn for SIPR/HV */
-
-/*
- * Values for flags to get_alternatives()
- */
-#define PPMU_LIMITED_PMC_OK	1	/* can put this on a limited PMC */
-#define PPMU_LIMITED_PMC_REQD	2	/* have to put this on a limited PMC */
-#define PPMU_ONLY_COUNT_RUN	4	/* only counting in run state */
-
-extern int register_power_pmu(struct power_pmu *);
-
-struct pt_regs;
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-
-#define PERF_COUNTER_INDEX_OFFSET	1
-
-/*
- * Only override the default definitions in include/linux/perf_counter.h
- * if we have hardware PMU support.
- */
-#ifdef CONFIG_PPC_PERF_CTRS
-#define perf_misc_flags(regs)	perf_misc_flags(regs)
-#endif
-
-/*
- * The power_pmu.get_constraint function returns a 32/64-bit value and
- * a 32/64-bit mask that express the constraints between this event and
- * other events.
- *
- * The value and mask are divided up into (non-overlapping) bitfields
- * of three different types:
- *
- * Select field: this expresses the constraint that some set of bits
- * in MMCR* needs to be set to a specific value for this event.  For a
- * select field, the mask contains 1s in every bit of the field, and
- * the value contains a unique value for each possible setting of the
- * MMCR* bits.  The constraint checking code will ensure that two events
- * that set the same field in their masks have the same value in their
- * value dwords.
- *
- * Add field: this expresses the constraint that there can be at most
- * N events in a particular class.  A field of k bits can be used for
- * N <= 2^(k-1) - 1.  The mask has the most significant bit of the field
- * set (and the other bits 0), and the value has only the least significant
- * bit of the field set.  In addition, the 'add_fields' and 'test_adder'
- * in the struct power_pmu for this processor come into play.  The
- * add_fields value contains 1 in the LSB of the field, and the
- * test_adder contains 2^(k-1) - 1 - N in the field.
- *
- * NAND field: this expresses the constraint that you may not have events
- * in all of a set of classes.  (For example, on PPC970, you can't select
- * events from the FPU, ISU and IDU simultaneously, although any two are
- * possible.)  For N classes, the field is N+1 bits wide, and each class
- * is assigned one bit from the least-significant N bits.  The mask has
- * only the most-significant bit set, and the value has only the bit
- * for the event's class set.  The test_adder has the least significant
- * bit set in the field.
- *
- * If an event is not subject to the constraint expressed by a particular
- * field, then it will have 0 in both the mask and value for that field.
- */
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
new file mode 100644
index 000000000000..2499aaadaeb9
--- /dev/null
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -0,0 +1,110 @@
+/*
+ * Performance event support - PowerPC-specific definitions.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+
+#include <asm/hw_irq.h>
+
+#define MAX_HWEVENTS		8
+#define MAX_EVENT_ALTERNATIVES	8
+#define MAX_LIMITED_HWEVENTS	2
+
+/*
+ * This struct provides the constants and functions needed to
+ * describe the PMU on a particular POWER-family CPU.
+ */
+struct power_pmu {
+	const char	*name;
+	int		n_event;
+	int		max_alternatives;
+	unsigned long	add_fields;
+	unsigned long	test_adder;
+	int		(*compute_mmcr)(u64 events[], int n_ev,
+				unsigned int hwc[], unsigned long mmcr[]);
+	int		(*get_constraint)(u64 event_id, unsigned long *mskp,
+				unsigned long *valp);
+	int		(*get_alternatives)(u64 event_id, unsigned int flags,
+				u64 alt[]);
+	void		(*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
+	int		(*limited_pmc_event)(u64 event_id);
+	u32		flags;
+	int		n_generic;
+	int		*generic_events;
+	int		(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+			       [PERF_COUNT_HW_CACHE_OP_MAX]
+			       [PERF_COUNT_HW_CACHE_RESULT_MAX];
+};
+
+/*
+ * Values for power_pmu.flags
+ */
+#define PPMU_LIMITED_PMC5_6	1	/* PMC5/6 have limited function */
+#define PPMU_ALT_SIPR		2	/* uses alternate posn for SIPR/HV */
+
+/*
+ * Values for flags to get_alternatives()
+ */
+#define PPMU_LIMITED_PMC_OK	1	/* can put this on a limited PMC */
+#define PPMU_LIMITED_PMC_REQD	2	/* have to put this on a limited PMC */
+#define PPMU_ONLY_COUNT_RUN	4	/* only counting in run state */
+
+extern int register_power_pmu(struct power_pmu *);
+
+struct pt_regs;
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+
+#define PERF_EVENT_INDEX_OFFSET	1
+
+/*
+ * Only override the default definitions in include/linux/perf_event.h
+ * if we have hardware PMU support.
+ */
+#ifdef CONFIG_PPC_PERF_CTRS
+#define perf_misc_flags(regs)	perf_misc_flags(regs)
+#endif
+
+/*
+ * The power_pmu.get_constraint function returns a 32/64-bit value and
+ * a 32/64-bit mask that express the constraints between this event_id and
+ * other events.
+ *
+ * The value and mask are divided up into (non-overlapping) bitfields
+ * of three different types:
+ *
+ * Select field: this expresses the constraint that some set of bits
+ * in MMCR* needs to be set to a specific value for this event_id.  For a
+ * select field, the mask contains 1s in every bit of the field, and
+ * the value contains a unique value for each possible setting of the
+ * MMCR* bits.  The constraint checking code will ensure that two events
+ * that set the same field in their masks have the same value in their
+ * value dwords.
+ *
+ * Add field: this expresses the constraint that there can be at most
+ * N events in a particular class.  A field of k bits can be used for
+ * N <= 2^(k-1) - 1.  The mask has the most significant bit of the field
+ * set (and the other bits 0), and the value has only the least significant
+ * bit of the field set.  In addition, the 'add_fields' and 'test_adder'
+ * in the struct power_pmu for this processor come into play.  The
+ * add_fields value contains 1 in the LSB of the field, and the
+ * test_adder contains 2^(k-1) - 1 - N in the field.
+ *
+ * NAND field: this expresses the constraint that you may not have events
+ * in all of a set of classes.  (For example, on PPC970, you can't select
+ * events from the FPU, ISU and IDU simultaneously, although any two are
+ * possible.)  For N classes, the field is N+1 bits wide, and each class
+ * is assigned one bit from the least-significant N bits.  The mask has
+ * only the most-significant bit set, and the value has only the bit
+ * for the event_id's class set.  The test_adder has the least significant
+ * bit set in the field.
+ *
+ * If an event_id is not subject to the constraint expressed by a particular
+ * field, then it will have 0 in both the mask and value for that field.
+ */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index ed24bd92fe49..c7d671a7d9a1 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -322,7 +322,7 @@ SYSCALL_SPU(epoll_create1)
 SYSCALL_SPU(dup3)
 SYSCALL_SPU(pipe2)
 SYSCALL(inotify_init1)
-SYSCALL_SPU(perf_counter_open)
+SYSCALL_SPU(perf_event_open)
 COMPAT_SYS_SPU(preadv)
 COMPAT_SYS_SPU(pwritev)
 COMPAT_SYS(rt_tgsigqueueinfo)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index cef080bfc607..f6ca76176766 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -341,7 +341,7 @@
 #define __NR_dup3		316
 #define __NR_pipe2		317
 #define __NR_inotify_init1	318
-#define __NR_perf_counter_open	319
+#define __NR_perf_event_open	319
 #define __NR_preadv		320
 #define __NR_pwritev		321
 #define __NR_rt_tgsigqueueinfo	322
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 569f79ccd310..b23664a0b86c 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -97,7 +97,7 @@ obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
-obj-$(CONFIG_PPC_PERF_CTRS)	+= perf_counter.o perf_callchain.o
+obj-$(CONFIG_PPC_PERF_CTRS)	+= perf_event.o perf_callchain.o
 obj64-$(CONFIG_PPC_PERF_CTRS)	+= power4-pmu.o ppc970-pmu.o power5-pmu.o \
 				   power5+-pmu.o power6-pmu.o power7-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)	+= mpc7450-pmu.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f0df285f0f87..0812b0f414bb 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -133,7 +133,7 @@ int main(void)
 	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
 	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
 	DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
-	DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
+	DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending));
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
 #ifdef CONFIG_PPC_MM_SLICES
 	DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 66bcda34a6bb..900e0eea0099 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -556,14 +556,14 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
 2:
 	TRACE_AND_RESTORE_IRQ(r5);
 
-#ifdef CONFIG_PERF_COUNTERS
-	/* check paca->perf_counter_pending if we're enabling ints */
+#ifdef CONFIG_PERF_EVENTS
+	/* check paca->perf_event_pending if we're enabling ints */
 	lbz	r3,PACAPERFPEND(r13)
 	and.	r3,r3,r5
 	beq	27f
-	bl	.perf_counter_do_pending
+	bl	.perf_event_do_pending
 27:
-#endif /* CONFIG_PERF_COUNTERS */
+#endif /* CONFIG_PERF_EVENTS */
 
 	/* extract EE bit and use it to restore paca->hard_enabled */
 	ld	r3,_MSR(r1)
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index f7f376ea7b17..e5d121177984 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,7 +53,7 @@
 #include <linux/bootmem.h>
 #include <linux/pci.h>
 #include <linux/debugfs.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -138,9 +138,9 @@ notrace void raw_local_irq_restore(unsigned long en)
 	}
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
-	if (test_perf_counter_pending()) {
-		clear_perf_counter_pending();
-		perf_counter_do_pending();
+	if (test_perf_event_pending()) {
+		clear_perf_event_pending();
+		perf_event_do_pending();
 	}
 
 	/*
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
index cc466d039af6..09d72028f317 100644
--- a/arch/powerpc/kernel/mpc7450-pmu.c
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/string.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
 
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index f74b62c67511..0a03cf70d247 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -10,7 +10,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/percpu.h>
 #include <linux/uaccess.h>
 #include <linux/mm.h>
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
deleted file mode 100644
index 5ccf9bca96c0..000000000000
--- a/arch/powerpc/kernel/perf_counter.c
+++ /dev/null
@@ -1,1315 +0,0 @@
-/*
- * Performance counter support - powerpc architecture code
- *
- * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/perf_counter.h>
-#include <linux/percpu.h>
-#include <linux/hardirq.h>
-#include <asm/reg.h>
-#include <asm/pmc.h>
-#include <asm/machdep.h>
-#include <asm/firmware.h>
-#include <asm/ptrace.h>
-
-struct cpu_hw_counters {
-	int n_counters;
-	int n_percpu;
-	int disabled;
-	int n_added;
-	int n_limited;
-	u8  pmcs_enabled;
-	struct perf_counter *counter[MAX_HWCOUNTERS];
-	u64 events[MAX_HWCOUNTERS];
-	unsigned int flags[MAX_HWCOUNTERS];
-	unsigned long mmcr[3];
-	struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
-	u8  limited_hwidx[MAX_LIMITED_HWCOUNTERS];
-	u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
-	unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
-	unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
-};
-DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
-
-struct power_pmu *ppmu;
-
-/*
- * Normally, to ignore kernel events we set the FCS (freeze counters
- * in supervisor mode) bit in MMCR0, but if the kernel runs with the
- * hypervisor bit set in the MSR, or if we are running on a processor
- * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
- * then we need to use the FCHV bit to ignore kernel events.
- */
-static unsigned int freeze_counters_kernel = MMCR0_FCS;
-
-/*
- * 32-bit doesn't have MMCRA but does have an MMCR2,
- * and a few other names are different.
- */
-#ifdef CONFIG_PPC32
-
-#define MMCR0_FCHV		0
-#define MMCR0_PMCjCE		MMCR0_PMCnCE
-
-#define SPRN_MMCRA		SPRN_MMCR2
-#define MMCRA_SAMPLE_ENABLE	0
-
-static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
-{
-	return 0;
-}
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
-static inline u32 perf_get_misc_flags(struct pt_regs *regs)
-{
-	return 0;
-}
-static inline void perf_read_regs(struct pt_regs *regs) { }
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PPC32 */
-
-/*
- * Things that are specific to 64-bit implementations.
- */
-#ifdef CONFIG_PPC64
-
-static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
-{
-	unsigned long mmcra = regs->dsisr;
-
-	if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
-		unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
-		if (slot > 1)
-			return 4 * (slot - 1);
-	}
-	return 0;
-}
-
-/*
- * The user wants a data address recorded.
- * If we're not doing instruction sampling, give them the SDAR
- * (sampled data address).  If we are doing instruction sampling, then
- * only give them the SDAR if it corresponds to the instruction
- * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC
- * bit in MMCRA.
- */
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
-{
-	unsigned long mmcra = regs->dsisr;
-	unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
-		POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
-
-	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
-		*addrp = mfspr(SPRN_SDAR);
-}
-
-static inline u32 perf_get_misc_flags(struct pt_regs *regs)
-{
-	unsigned long mmcra = regs->dsisr;
-
-	if (TRAP(regs) != 0xf00)
-		return 0;	/* not a PMU interrupt */
-
-	if (ppmu->flags & PPMU_ALT_SIPR) {
-		if (mmcra & POWER6_MMCRA_SIHV)
-			return PERF_EVENT_MISC_HYPERVISOR;
-		return (mmcra & POWER6_MMCRA_SIPR) ?
-			PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL;
-	}
-	if (mmcra & MMCRA_SIHV)
-		return PERF_EVENT_MISC_HYPERVISOR;
-	return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
-		PERF_EVENT_MISC_KERNEL;
-}
-
-/*
- * Overload regs->dsisr to store MMCRA so we only need to read it once
- * on each interrupt.
- */
-static inline void perf_read_regs(struct pt_regs *regs)
-{
-	regs->dsisr = mfspr(SPRN_MMCRA);
-}
-
-/*
- * If interrupts were soft-disabled when a PMU interrupt occurs, treat
- * it as an NMI.
- */
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
-	return !regs->softe;
-}
-
-#endif /* CONFIG_PPC64 */
-
-static void perf_counter_interrupt(struct pt_regs *regs);
-
-void perf_counter_print_debug(void)
-{
-}
-
-/*
- * Read one performance monitor counter (PMC).
- */
-static unsigned long read_pmc(int idx)
-{
-	unsigned long val;
-
-	switch (idx) {
-	case 1:
-		val = mfspr(SPRN_PMC1);
-		break;
-	case 2:
-		val = mfspr(SPRN_PMC2);
-		break;
-	case 3:
-		val = mfspr(SPRN_PMC3);
-		break;
-	case 4:
-		val = mfspr(SPRN_PMC4);
-		break;
-	case 5:
-		val = mfspr(SPRN_PMC5);
-		break;
-	case 6:
-		val = mfspr(SPRN_PMC6);
-		break;
-#ifdef CONFIG_PPC64
-	case 7:
-		val = mfspr(SPRN_PMC7);
-		break;
-	case 8:
-		val = mfspr(SPRN_PMC8);
-		break;
-#endif /* CONFIG_PPC64 */
-	default:
-		printk(KERN_ERR "oops trying to read PMC%d\n", idx);
-		val = 0;
-	}
-	return val;
-}
-
-/*
- * Write one PMC.
- */
-static void write_pmc(int idx, unsigned long val)
-{
-	switch (idx) {
-	case 1:
-		mtspr(SPRN_PMC1, val);
-		break;
-	case 2:
-		mtspr(SPRN_PMC2, val);
-		break;
-	case 3:
-		mtspr(SPRN_PMC3, val);
-		break;
-	case 4:
-		mtspr(SPRN_PMC4, val);
-		break;
-	case 5:
-		mtspr(SPRN_PMC5, val);
-		break;
-	case 6:
-		mtspr(SPRN_PMC6, val);
-		break;
-#ifdef CONFIG_PPC64
-	case 7:
-		mtspr(SPRN_PMC7, val);
-		break;
-	case 8:
-		mtspr(SPRN_PMC8, val);
-		break;
-#endif /* CONFIG_PPC64 */
-	default:
-		printk(KERN_ERR "oops trying to write PMC%d\n", idx);
-	}
-}
-
-/*
- * Check if a set of events can all go on the PMU at once.
- * If they can't, this will look at alternative codes for the events
- * and see if any combination of alternative codes is feasible.
- * The feasible set is returned in event[].
- */
-static int power_check_constraints(struct cpu_hw_counters *cpuhw,
-				   u64 event[], unsigned int cflags[],
-				   int n_ev)
-{
-	unsigned long mask, value, nv;
-	unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
-	int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
-	int i, j;
-	unsigned long addf = ppmu->add_fields;
-	unsigned long tadd = ppmu->test_adder;
-
-	if (n_ev > ppmu->n_counter)
-		return -1;
-
-	/* First see if the events will go on as-is */
-	for (i = 0; i < n_ev; ++i) {
-		if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
-		    && !ppmu->limited_pmc_event(event[i])) {
-			ppmu->get_alternatives(event[i], cflags[i],
-					       cpuhw->alternatives[i]);
-			event[i] = cpuhw->alternatives[i][0];
-		}
-		if (ppmu->get_constraint(event[i], &cpuhw->amasks[i][0],
-					 &cpuhw->avalues[i][0]))
-			return -1;
-	}
-	value = mask = 0;
-	for (i = 0; i < n_ev; ++i) {
-		nv = (value | cpuhw->avalues[i][0]) +
-			(value & cpuhw->avalues[i][0] & addf);
-		if ((((nv + tadd) ^ value) & mask) != 0 ||
-		    (((nv + tadd) ^ cpuhw->avalues[i][0]) &
-		     cpuhw->amasks[i][0]) != 0)
-			break;
-		value = nv;
-		mask |= cpuhw->amasks[i][0];
-	}
-	if (i == n_ev)
-		return 0;	/* all OK */
-
-	/* doesn't work, gather alternatives... */
-	if (!ppmu->get_alternatives)
-		return -1;
-	for (i = 0; i < n_ev; ++i) {
-		choice[i] = 0;
-		n_alt[i] = ppmu->get_alternatives(event[i], cflags[i],
-						  cpuhw->alternatives[i]);
-		for (j = 1; j < n_alt[i]; ++j)
-			ppmu->get_constraint(cpuhw->alternatives[i][j],
-					     &cpuhw->amasks[i][j],
-					     &cpuhw->avalues[i][j]);
-	}
-
-	/* enumerate all possibilities and see if any will work */
-	i = 0;
-	j = -1;
-	value = mask = nv = 0;
-	while (i < n_ev) {
-		if (j >= 0) {
-			/* we're backtracking, restore context */
-			value = svalues[i];
-			mask = smasks[i];
-			j = choice[i];
-		}
-		/*
-		 * See if any alternative k for event i,
-		 * where k > j, will satisfy the constraints.
-		 */
-		while (++j < n_alt[i]) {
-			nv = (value | cpuhw->avalues[i][j]) +
-				(value & cpuhw->avalues[i][j] & addf);
-			if ((((nv + tadd) ^ value) & mask) == 0 &&
-			    (((nv + tadd) ^ cpuhw->avalues[i][j])
-			     & cpuhw->amasks[i][j]) == 0)
-				break;
-		}
-		if (j >= n_alt[i]) {
-			/*
-			 * No feasible alternative, backtrack
-			 * to event i-1 and continue enumerating its
-			 * alternatives from where we got up to.
-			 */
-			if (--i < 0)
-				return -1;
-		} else {
-			/*
-			 * Found a feasible alternative for event i,
-			 * remember where we got up to with this event,
-			 * go on to the next event, and start with
-			 * the first alternative for it.
-			 */
-			choice[i] = j;
-			svalues[i] = value;
-			smasks[i] = mask;
-			value = nv;
-			mask |= cpuhw->amasks[i][j];
-			++i;
-			j = -1;
-		}
-	}
-
-	/* OK, we have a feasible combination, tell the caller the solution */
-	for (i = 0; i < n_ev; ++i)
-		event[i] = cpuhw->alternatives[i][choice[i]];
-	return 0;
-}
-
-/*
- * Check if newly-added counters have consistent settings for
- * exclude_{user,kernel,hv} with each other and any previously
- * added counters.
- */
-static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
-			  int n_prev, int n_new)
-{
-	int eu = 0, ek = 0, eh = 0;
-	int i, n, first;
-	struct perf_counter *counter;
-
-	n = n_prev + n_new;
-	if (n <= 1)
-		return 0;
-
-	first = 1;
-	for (i = 0; i < n; ++i) {
-		if (cflags[i] & PPMU_LIMITED_PMC_OK) {
-			cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
-			continue;
-		}
-		counter = ctrs[i];
-		if (first) {
-			eu = counter->attr.exclude_user;
-			ek = counter->attr.exclude_kernel;
-			eh = counter->attr.exclude_hv;
-			first = 0;
-		} else if (counter->attr.exclude_user != eu ||
-			   counter->attr.exclude_kernel != ek ||
-			   counter->attr.exclude_hv != eh) {
-			return -EAGAIN;
-		}
-	}
-
-	if (eu || ek || eh)
-		for (i = 0; i < n; ++i)
-			if (cflags[i] & PPMU_LIMITED_PMC_OK)
-				cflags[i] |= PPMU_LIMITED_PMC_REQD;
-
-	return 0;
-}
-
-static void power_pmu_read(struct perf_counter *counter)
-{
-	s64 val, delta, prev;
-
-	if (!counter->hw.idx)
-		return;
-	/*
-	 * Performance monitor interrupts come even when interrupts
-	 * are soft-disabled, as long as interrupts are hard-enabled.
-	 * Therefore we treat them like NMIs.
-	 */
-	do {
-		prev = atomic64_read(&counter->hw.prev_count);
-		barrier();
-		val = read_pmc(counter->hw.idx);
-	} while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
-
-	/* The counters are only 32 bits wide */
-	delta = (val - prev) & 0xfffffffful;
-	atomic64_add(delta, &counter->count);
-	atomic64_sub(delta, &counter->hw.period_left);
-}
-
-/*
- * On some machines, PMC5 and PMC6 can't be written, don't respect
- * the freeze conditions, and don't generate interrupts.  This tells
- * us if `counter' is using such a PMC.
- */
-static int is_limited_pmc(int pmcnum)
-{
-	return (ppmu->flags & PPMU_LIMITED_PMC5_6)
-		&& (pmcnum == 5 || pmcnum == 6);
-}
-
-static void freeze_limited_counters(struct cpu_hw_counters *cpuhw,
-				    unsigned long pmc5, unsigned long pmc6)
-{
-	struct perf_counter *counter;
-	u64 val, prev, delta;
-	int i;
-
-	for (i = 0; i < cpuhw->n_limited; ++i) {
-		counter = cpuhw->limited_counter[i];
-		if (!counter->hw.idx)
-			continue;
-		val = (counter->hw.idx == 5) ? pmc5 : pmc6;
-		prev = atomic64_read(&counter->hw.prev_count);
-		counter->hw.idx = 0;
-		delta = (val - prev) & 0xfffffffful;
-		atomic64_add(delta, &counter->count);
-	}
-}
-
-static void thaw_limited_counters(struct cpu_hw_counters *cpuhw,
-				  unsigned long pmc5, unsigned long pmc6)
-{
-	struct perf_counter *counter;
-	u64 val;
-	int i;
-
-	for (i = 0; i < cpuhw->n_limited; ++i) {
-		counter = cpuhw->limited_counter[i];
-		counter->hw.idx = cpuhw->limited_hwidx[i];
-		val = (counter->hw.idx == 5) ? pmc5 : pmc6;
-		atomic64_set(&counter->hw.prev_count, val);
-		perf_counter_update_userpage(counter);
-	}
-}
-
-/*
- * Since limited counters don't respect the freeze conditions, we
- * have to read them immediately after freezing or unfreezing the
- * other counters.  We try to keep the values from the limited
- * counters as consistent as possible by keeping the delay (in
- * cycles and instructions) between freezing/unfreezing and reading
- * the limited counters as small and consistent as possible.
- * Therefore, if any limited counters are in use, we read them
- * both, and always in the same order, to minimize variability,
- * and do it inside the same asm that writes MMCR0.
- */
-static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
-{
-	unsigned long pmc5, pmc6;
-
-	if (!cpuhw->n_limited) {
-		mtspr(SPRN_MMCR0, mmcr0);
-		return;
-	}
-
-	/*
-	 * Write MMCR0, then read PMC5 and PMC6 immediately.
-	 * To ensure we don't get a performance monitor interrupt
-	 * between writing MMCR0 and freezing/thawing the limited
-	 * counters, we first write MMCR0 with the counter overflow
-	 * interrupt enable bits turned off.
-	 */
-	asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
-		     : "=&r" (pmc5), "=&r" (pmc6)
-		     : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
-		       "i" (SPRN_MMCR0),
-		       "i" (SPRN_PMC5), "i" (SPRN_PMC6));
-
-	if (mmcr0 & MMCR0_FC)
-		freeze_limited_counters(cpuhw, pmc5, pmc6);
-	else
-		thaw_limited_counters(cpuhw, pmc5, pmc6);
-
-	/*
-	 * Write the full MMCR0 including the counter overflow interrupt
-	 * enable bits, if necessary.
-	 */
-	if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
-		mtspr(SPRN_MMCR0, mmcr0);
-}
-
-/*
- * Disable all counters to prevent PMU interrupts and to allow
- * counters to be added or removed.
- */
-void hw_perf_disable(void)
-{
-	struct cpu_hw_counters *cpuhw;
-	unsigned long flags;
-
-	if (!ppmu)
-		return;
-	local_irq_save(flags);
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-
-	if (!cpuhw->disabled) {
-		cpuhw->disabled = 1;
-		cpuhw->n_added = 0;
-
-		/*
-		 * Check if we ever enabled the PMU on this cpu.
-		 */
-		if (!cpuhw->pmcs_enabled) {
-			ppc_enable_pmcs();
-			cpuhw->pmcs_enabled = 1;
-		}
-
-		/*
-		 * Disable instruction sampling if it was enabled
-		 */
-		if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
-			mtspr(SPRN_MMCRA,
-			      cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
-			mb();
-		}
-
-		/*
-		 * Set the 'freeze counters' bit.
-		 * The barrier is to make sure the mtspr has been
-		 * executed and the PMU has frozen the counters
-		 * before we return.
-		 */
-		write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
-		mb();
-	}
-	local_irq_restore(flags);
-}
-
-/*
- * Re-enable all counters if disable == 0.
- * If we were previously disabled and counters were added, then
- * put the new config on the PMU.
- */
-void hw_perf_enable(void)
-{
-	struct perf_counter *counter;
-	struct cpu_hw_counters *cpuhw;
-	unsigned long flags;
-	long i;
-	unsigned long val;
-	s64 left;
-	unsigned int hwc_index[MAX_HWCOUNTERS];
-	int n_lim;
-	int idx;
-
-	if (!ppmu)
-		return;
-	local_irq_save(flags);
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-	if (!cpuhw->disabled) {
-		local_irq_restore(flags);
-		return;
-	}
-	cpuhw->disabled = 0;
-
-	/*
-	 * If we didn't change anything, or only removed counters,
-	 * no need to recalculate MMCR* settings and reset the PMCs.
-	 * Just reenable the PMU with the current MMCR* settings
-	 * (possibly updated for removal of counters).
-	 */
-	if (!cpuhw->n_added) {
-		mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
-		mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
-		if (cpuhw->n_counters == 0)
-			ppc_set_pmu_inuse(0);
-		goto out_enable;
-	}
-
-	/*
-	 * Compute MMCR* values for the new set of counters
-	 */
-	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
-			       cpuhw->mmcr)) {
-		/* shouldn't ever get here */
-		printk(KERN_ERR "oops compute_mmcr failed\n");
-		goto out;
-	}
-
-	/*
-	 * Add in MMCR0 freeze bits corresponding to the
-	 * attr.exclude_* bits for the first counter.
-	 * We have already checked that all counters have the
-	 * same values for these bits as the first counter.
-	 */
-	counter = cpuhw->counter[0];
-	if (counter->attr.exclude_user)
-		cpuhw->mmcr[0] |= MMCR0_FCP;
-	if (counter->attr.exclude_kernel)
-		cpuhw->mmcr[0] |= freeze_counters_kernel;
-	if (counter->attr.exclude_hv)
-		cpuhw->mmcr[0] |= MMCR0_FCHV;
-
-	/*
-	 * Write the new configuration to MMCR* with the freeze
-	 * bit set and set the hardware counters to their initial values.
-	 * Then unfreeze the counters.
-	 */
-	ppc_set_pmu_inuse(1);
-	mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
-	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
-	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
-				| MMCR0_FC);
-
-	/*
-	 * Read off any pre-existing counters that need to move
-	 * to another PMC.
-	 */
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		counter = cpuhw->counter[i];
-		if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
-			power_pmu_read(counter);
-			write_pmc(counter->hw.idx, 0);
-			counter->hw.idx = 0;
-		}
-	}
-
-	/*
-	 * Initialize the PMCs for all the new and moved counters.
-	 */
-	cpuhw->n_limited = n_lim = 0;
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		counter = cpuhw->counter[i];
-		if (counter->hw.idx)
-			continue;
-		idx = hwc_index[i] + 1;
-		if (is_limited_pmc(idx)) {
-			cpuhw->limited_counter[n_lim] = counter;
-			cpuhw->limited_hwidx[n_lim] = idx;
-			++n_lim;
-			continue;
-		}
-		val = 0;
-		if (counter->hw.sample_period) {
-			left = atomic64_read(&counter->hw.period_left);
-			if (left < 0x80000000L)
-				val = 0x80000000L - left;
-		}
-		atomic64_set(&counter->hw.prev_count, val);
-		counter->hw.idx = idx;
-		write_pmc(idx, val);
-		perf_counter_update_userpage(counter);
-	}
-	cpuhw->n_limited = n_lim;
-	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
-
- out_enable:
-	mb();
-	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
-
-	/*
-	 * Enable instruction sampling if necessary
-	 */
-	if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
-		mb();
-		mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
-	}
-
- out:
-	local_irq_restore(flags);
-}
-
-static int collect_events(struct perf_counter *group, int max_count,
-			  struct perf_counter *ctrs[], u64 *events,
-			  unsigned int *flags)
-{
-	int n = 0;
-	struct perf_counter *counter;
-
-	if (!is_software_counter(group)) {
-		if (n >= max_count)
-			return -1;
-		ctrs[n] = group;
-		flags[n] = group->hw.counter_base;
-		events[n++] = group->hw.config;
-	}
-	list_for_each_entry(counter, &group->sibling_list, list_entry) {
-		if (!is_software_counter(counter) &&
-		    counter->state != PERF_COUNTER_STATE_OFF) {
-			if (n >= max_count)
-				return -1;
-			ctrs[n] = counter;
-			flags[n] = counter->hw.counter_base;
-			events[n++] = counter->hw.config;
-		}
-	}
-	return n;
-}
-
-static void counter_sched_in(struct perf_counter *counter, int cpu)
-{
-	counter->state = PERF_COUNTER_STATE_ACTIVE;
-	counter->oncpu = cpu;
-	counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
-	if (is_software_counter(counter))
-		counter->pmu->enable(counter);
-}
-
-/*
- * Called to enable a whole group of counters.
- * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
- * Assumes the caller has disabled interrupts and has
- * frozen the PMU with hw_perf_save_disable.
- */
-int hw_perf_group_sched_in(struct perf_counter *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_counter_context *ctx, int cpu)
-{
-	struct cpu_hw_counters *cpuhw;
-	long i, n, n0;
-	struct perf_counter *sub;
-
-	if (!ppmu)
-		return 0;
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-	n0 = cpuhw->n_counters;
-	n = collect_events(group_leader, ppmu->n_counter - n0,
-			   &cpuhw->counter[n0], &cpuhw->events[n0],
-			   &cpuhw->flags[n0]);
-	if (n < 0)
-		return -EAGAIN;
-	if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n))
-		return -EAGAIN;
-	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0);
-	if (i < 0)
-		return -EAGAIN;
-	cpuhw->n_counters = n0 + n;
-	cpuhw->n_added += n;
-
-	/*
-	 * OK, this group can go on; update counter states etc.,
-	 * and enable any software counters
-	 */
-	for (i = n0; i < n0 + n; ++i)
-		cpuhw->counter[i]->hw.config = cpuhw->events[i];
-	cpuctx->active_oncpu += n;
-	n = 1;
-	counter_sched_in(group_leader, cpu);
-	list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
-		if (sub->state != PERF_COUNTER_STATE_OFF) {
-			counter_sched_in(sub, cpu);
-			++n;
-		}
-	}
-	ctx->nr_active += n;
-
-	return 1;
-}
-
-/*
- * Add a counter to the PMU.
- * If all counters are not already frozen, then we disable and
- * re-enable the PMU in order to get hw_perf_enable to do the
- * actual work of reconfiguring the PMU.
- */
-static int power_pmu_enable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuhw;
-	unsigned long flags;
-	int n0;
-	int ret = -EAGAIN;
-
-	local_irq_save(flags);
-	perf_disable();
-
-	/*
-	 * Add the counter to the list (if there is room)
-	 * and check whether the total set is still feasible.
-	 */
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-	n0 = cpuhw->n_counters;
-	if (n0 >= ppmu->n_counter)
-		goto out;
-	cpuhw->counter[n0] = counter;
-	cpuhw->events[n0] = counter->hw.config;
-	cpuhw->flags[n0] = counter->hw.counter_base;
-	if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1))
-		goto out;
-	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
-		goto out;
-
-	counter->hw.config = cpuhw->events[n0];
-	++cpuhw->n_counters;
-	++cpuhw->n_added;
-
-	ret = 0;
- out:
-	perf_enable();
-	local_irq_restore(flags);
-	return ret;
-}
-
-/*
- * Remove a counter from the PMU.
- */
-static void power_pmu_disable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuhw;
-	long i;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	perf_disable();
-
-	power_pmu_read(counter);
-
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		if (counter == cpuhw->counter[i]) {
-			while (++i < cpuhw->n_counters)
-				cpuhw->counter[i-1] = cpuhw->counter[i];
-			--cpuhw->n_counters;
-			ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
-			if (counter->hw.idx) {
-				write_pmc(counter->hw.idx, 0);
-				counter->hw.idx = 0;
-			}
-			perf_counter_update_userpage(counter);
-			break;
-		}
-	}
-	for (i = 0; i < cpuhw->n_limited; ++i)
-		if (counter == cpuhw->limited_counter[i])
-			break;
-	if (i < cpuhw->n_limited) {
-		while (++i < cpuhw->n_limited) {
-			cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
-			cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
-		}
-		--cpuhw->n_limited;
-	}
-	if (cpuhw->n_counters == 0) {
-		/* disable exceptions if no counters are running */
-		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
-	}
-
-	perf_enable();
-	local_irq_restore(flags);
-}
-
-/*
- * Re-enable interrupts on a counter after they were throttled
- * because they were coming too fast.
- */
-static void power_pmu_unthrottle(struct perf_counter *counter)
-{
-	s64 val, left;
-	unsigned long flags;
-
-	if (!counter->hw.idx || !counter->hw.sample_period)
-		return;
-	local_irq_save(flags);
-	perf_disable();
-	power_pmu_read(counter);
-	left = counter->hw.sample_period;
-	counter->hw.last_period = left;
-	val = 0;
-	if (left < 0x80000000L)
-		val = 0x80000000L - left;
-	write_pmc(counter->hw.idx, val);
-	atomic64_set(&counter->hw.prev_count, val);
-	atomic64_set(&counter->hw.period_left, left);
-	perf_counter_update_userpage(counter);
-	perf_enable();
-	local_irq_restore(flags);
-}
-
-struct pmu power_pmu = {
-	.enable		= power_pmu_enable,
-	.disable	= power_pmu_disable,
-	.read		= power_pmu_read,
-	.unthrottle	= power_pmu_unthrottle,
-};
-
-/*
- * Return 1 if we might be able to put counter on a limited PMC,
- * or 0 if not.
- * A counter can only go on a limited PMC if it counts something
- * that a limited PMC can count, doesn't require interrupts, and
- * doesn't exclude any processor mode.
- */
-static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev,
-				 unsigned int flags)
-{
-	int n;
-	u64 alt[MAX_EVENT_ALTERNATIVES];
-
-	if (counter->attr.exclude_user
-	    || counter->attr.exclude_kernel
-	    || counter->attr.exclude_hv
-	    || counter->attr.sample_period)
-		return 0;
-
-	if (ppmu->limited_pmc_event(ev))
-		return 1;
-
-	/*
-	 * The requested event isn't on a limited PMC already;
-	 * see if any alternative code goes on a limited PMC.
-	 */
-	if (!ppmu->get_alternatives)
-		return 0;
-
-	flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
-	n = ppmu->get_alternatives(ev, flags, alt);
-
-	return n > 0;
-}
-
-/*
- * Find an alternative event that goes on a normal PMC, if possible,
- * and return the event code, or 0 if there is no such alternative.
- * (Note: event code 0 is "don't count" on all machines.)
- */
-static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
-{
-	u64 alt[MAX_EVENT_ALTERNATIVES];
-	int n;
-
-	flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
-	n = ppmu->get_alternatives(ev, flags, alt);
-	if (!n)
-		return 0;
-	return alt[0];
-}
-
-/* Number of perf_counters counting hardware events */
-static atomic_t num_counters;
-/* Used to avoid races in calling reserve/release_pmc_hardware */
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
-/*
- * Release the PMU if this is the last perf_counter.
- */
-static void hw_perf_counter_destroy(struct perf_counter *counter)
-{
-	if (!atomic_add_unless(&num_counters, -1, 1)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_dec_return(&num_counters) == 0)
-			release_pmc_hardware();
-		mutex_unlock(&pmc_reserve_mutex);
-	}
-}
-
-/*
- * Translate a generic cache event config to a raw event code.
- */
-static int hw_perf_cache_event(u64 config, u64 *eventp)
-{
-	unsigned long type, op, result;
-	int ev;
-
-	if (!ppmu->cache_events)
-		return -EINVAL;
-
-	/* unpack config */
-	type = config & 0xff;
-	op = (config >> 8) & 0xff;
-	result = (config >> 16) & 0xff;
-
-	if (type >= PERF_COUNT_HW_CACHE_MAX ||
-	    op >= PERF_COUNT_HW_CACHE_OP_MAX ||
-	    result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-		return -EINVAL;
-
-	ev = (*ppmu->cache_events)[type][op][result];
-	if (ev == 0)
-		return -EOPNOTSUPP;
-	if (ev == -1)
-		return -EINVAL;
-	*eventp = ev;
-	return 0;
-}
-
-const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
-{
-	u64 ev;
-	unsigned long flags;
-	struct perf_counter *ctrs[MAX_HWCOUNTERS];
-	u64 events[MAX_HWCOUNTERS];
-	unsigned int cflags[MAX_HWCOUNTERS];
-	int n;
-	int err;
-	struct cpu_hw_counters *cpuhw;
-
-	if (!ppmu)
-		return ERR_PTR(-ENXIO);
-	switch (counter->attr.type) {
-	case PERF_TYPE_HARDWARE:
-		ev = counter->attr.config;
-		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
-			return ERR_PTR(-EOPNOTSUPP);
-		ev = ppmu->generic_events[ev];
-		break;
-	case PERF_TYPE_HW_CACHE:
-		err = hw_perf_cache_event(counter->attr.config, &ev);
-		if (err)
-			return ERR_PTR(err);
-		break;
-	case PERF_TYPE_RAW:
-		ev = counter->attr.config;
-		break;
-	default:
-		return ERR_PTR(-EINVAL);
-	}
-	counter->hw.config_base = ev;
-	counter->hw.idx = 0;
-
-	/*
-	 * If we are not running on a hypervisor, force the
-	 * exclude_hv bit to 0 so that we don't care what
-	 * the user set it to.
-	 */
-	if (!firmware_has_feature(FW_FEATURE_LPAR))
-		counter->attr.exclude_hv = 0;
-
-	/*
-	 * If this is a per-task counter, then we can use
-	 * PM_RUN_* events interchangeably with their non RUN_*
-	 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
-	 * XXX we should check if the task is an idle task.
-	 */
-	flags = 0;
-	if (counter->ctx->task)
-		flags |= PPMU_ONLY_COUNT_RUN;
-
-	/*
-	 * If this machine has limited counters, check whether this
-	 * event could go on a limited counter.
-	 */
-	if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
-		if (can_go_on_limited_pmc(counter, ev, flags)) {
-			flags |= PPMU_LIMITED_PMC_OK;
-		} else if (ppmu->limited_pmc_event(ev)) {
-			/*
-			 * The requested event is on a limited PMC,
-			 * but we can't use a limited PMC; see if any
-			 * alternative goes on a normal PMC.
-			 */
-			ev = normal_pmc_alternative(ev, flags);
-			if (!ev)
-				return ERR_PTR(-EINVAL);
-		}
-	}
-
-	/*
-	 * If this is in a group, check if it can go on with all the
-	 * other hardware counters in the group.  We assume the counter
-	 * hasn't been linked into its leader's sibling list at this point.
-	 */
-	n = 0;
-	if (counter->group_leader != counter) {
-		n = collect_events(counter->group_leader, ppmu->n_counter - 1,
-				   ctrs, events, cflags);
-		if (n < 0)
-			return ERR_PTR(-EINVAL);
-	}
-	events[n] = ev;
-	ctrs[n] = counter;
-	cflags[n] = flags;
-	if (check_excludes(ctrs, cflags, n, 1))
-		return ERR_PTR(-EINVAL);
-
-	cpuhw = &get_cpu_var(cpu_hw_counters);
-	err = power_check_constraints(cpuhw, events, cflags, n + 1);
-	put_cpu_var(cpu_hw_counters);
-	if (err)
-		return ERR_PTR(-EINVAL);
-
-	counter->hw.config = events[n];
-	counter->hw.counter_base = cflags[n];
-	counter->hw.last_period = counter->hw.sample_period;
-	atomic64_set(&counter->hw.period_left, counter->hw.last_period);
-
-	/*
-	 * See if we need to reserve the PMU.
-	 * If no counters are currently in use, then we have to take a
-	 * mutex to ensure that we don't race with another task doing
-	 * reserve_pmc_hardware or release_pmc_hardware.
-	 */
-	err = 0;
-	if (!atomic_inc_not_zero(&num_counters)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&num_counters) == 0 &&
-		    reserve_pmc_hardware(perf_counter_interrupt))
-			err = -EBUSY;
-		else
-			atomic_inc(&num_counters);
-		mutex_unlock(&pmc_reserve_mutex);
-	}
-	counter->destroy = hw_perf_counter_destroy;
-
-	if (err)
-		return ERR_PTR(err);
-	return &power_pmu;
-}
-
-/*
- * A counter has overflowed; update its count and record
- * things if requested.  Note that interrupts are hard-disabled
- * here so there is no possibility of being interrupted.
- */
-static void record_and_restart(struct perf_counter *counter, unsigned long val,
-			       struct pt_regs *regs, int nmi)
-{
-	u64 period = counter->hw.sample_period;
-	s64 prev, delta, left;
-	int record = 0;
-
-	/* we don't have to worry about interrupts here */
-	prev = atomic64_read(&counter->hw.prev_count);
-	delta = (val - prev) & 0xfffffffful;
-	atomic64_add(delta, &counter->count);
-
-	/*
-	 * See if the total period for this counter has expired,
-	 * and update for the next period.
-	 */
-	val = 0;
-	left = atomic64_read(&counter->hw.period_left) - delta;
-	if (period) {
-		if (left <= 0) {
-			left += period;
-			if (left <= 0)
-				left = period;
-			record = 1;
-		}
-		if (left < 0x80000000LL)
-			val = 0x80000000LL - left;
-	}
-
-	/*
-	 * Finally record data if requested.
-	 */
-	if (record) {
-		struct perf_sample_data data = {
-			.addr	= 0,
-			.period	= counter->hw.last_period,
-		};
-
-		if (counter->attr.sample_type & PERF_SAMPLE_ADDR)
-			perf_get_data_addr(regs, &data.addr);
-
-		if (perf_counter_overflow(counter, nmi, &data, regs)) {
-			/*
-			 * Interrupts are coming too fast - throttle them
-			 * by setting the counter to 0, so it will be
-			 * at least 2^30 cycles until the next interrupt
-			 * (assuming each counter counts at most 2 counts
-			 * per cycle).
-			 */
-			val = 0;
-			left = ~0ULL >> 1;
-		}
-	}
-
-	write_pmc(counter->hw.idx, val);
-	atomic64_set(&counter->hw.prev_count, val);
-	atomic64_set(&counter->hw.period_left, left);
-	perf_counter_update_userpage(counter);
-}
-
-/*
- * Called from generic code to get the misc flags (i.e. processor mode)
- * for an event.
- */
-unsigned long perf_misc_flags(struct pt_regs *regs)
-{
-	u32 flags = perf_get_misc_flags(regs);
-
-	if (flags)
-		return flags;
-	return user_mode(regs) ? PERF_EVENT_MISC_USER :
-		PERF_EVENT_MISC_KERNEL;
-}
-
-/*
- * Called from generic code to get the instruction pointer
- * for an event.
- */
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
-{
-	unsigned long ip;
-
-	if (TRAP(regs) != 0xf00)
-		return regs->nip;	/* not a PMU interrupt */
-
-	ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
-	return ip;
-}
-
-/*
- * Performance monitor interrupt stuff
- */
-static void perf_counter_interrupt(struct pt_regs *regs)
-{
-	int i;
-	struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
-	struct perf_counter *counter;
-	unsigned long val;
-	int found = 0;
-	int nmi;
-
-	if (cpuhw->n_limited)
-		freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
-					mfspr(SPRN_PMC6));
-
-	perf_read_regs(regs);
-
-	nmi = perf_intr_is_nmi(regs);
-	if (nmi)
-		nmi_enter();
-	else
-		irq_enter();
-
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		counter = cpuhw->counter[i];
-		if (!counter->hw.idx || is_limited_pmc(counter->hw.idx))
-			continue;
-		val = read_pmc(counter->hw.idx);
-		if ((int)val < 0) {
-			/* counter has overflowed */
-			found = 1;
-			record_and_restart(counter, val, regs, nmi);
-		}
-	}
-
-	/*
-	 * In case we didn't find and reset the counter that caused
-	 * the interrupt, scan all counters and reset any that are
-	 * negative, to avoid getting continual interrupts.
-	 * Any that we processed in the previous loop will not be negative.
-	 */
-	if (!found) {
-		for (i = 0; i < ppmu->n_counter; ++i) {
-			if (is_limited_pmc(i + 1))
-				continue;
-			val = read_pmc(i + 1);
-			if ((int)val < 0)
-				write_pmc(i + 1, 0);
-		}
-	}
-
-	/*
-	 * Reset MMCR0 to its normal value.  This will set PMXE and
-	 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
-	 * and thus allow interrupts to occur again.
-	 * XXX might want to use MSR.PM to keep the counters frozen until
-	 * we get back out of this interrupt.
-	 */
-	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
-
-	if (nmi)
-		nmi_exit();
-	else
-		irq_exit();
-}
-
-void hw_perf_counter_setup(int cpu)
-{
-	struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
-
-	if (!ppmu)
-		return;
-	memset(cpuhw, 0, sizeof(*cpuhw));
-	cpuhw->mmcr[0] = MMCR0_FC;
-}
-
-int register_power_pmu(struct power_pmu *pmu)
-{
-	if (ppmu)
-		return -EBUSY;		/* something's already registered */
-
-	ppmu = pmu;
-	pr_info("%s performance monitor hardware support registered\n",
-		pmu->name);
-
-#ifdef MSR_HV
-	/*
-	 * Use FCHV to ignore kernel events if MSR.HV is set.
-	 */
-	if (mfmsr() & MSR_HV)
-		freeze_counters_kernel = MMCR0_FCHV;
-#endif /* CONFIG_PPC64 */
-
-	return 0;
-}
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
new file mode 100644
index 000000000000..c98321fcb459
--- /dev/null
+++ b/arch/powerpc/kernel/perf_event.c
@@ -0,0 +1,1315 @@
+/*
+ * Performance event support - powerpc architecture code
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <asm/reg.h>
+#include <asm/pmc.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/ptrace.h>
+
+struct cpu_hw_events {
+	int n_events;
+	int n_percpu;
+	int disabled;
+	int n_added;
+	int n_limited;
+	u8  pmcs_enabled;
+	struct perf_event *event[MAX_HWEVENTS];
+	u64 events[MAX_HWEVENTS];
+	unsigned int flags[MAX_HWEVENTS];
+	unsigned long mmcr[3];
+	struct perf_event *limited_event[MAX_LIMITED_HWEVENTS];
+	u8  limited_hwidx[MAX_LIMITED_HWEVENTS];
+	u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
+	unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
+	unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
+};
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+struct power_pmu *ppmu;
+
+/*
+ * Normally, to ignore kernel events we set the FCS (freeze events
+ * in supervisor mode) bit in MMCR0, but if the kernel runs with the
+ * hypervisor bit set in the MSR, or if we are running on a processor
+ * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
+ * then we need to use the FCHV bit to ignore kernel events.
+ */
+static unsigned int freeze_events_kernel = MMCR0_FCS;
+
+/*
+ * 32-bit doesn't have MMCRA but does have an MMCR2,
+ * and a few other names are different.
+ */
+#ifdef CONFIG_PPC32
+
+#define MMCR0_FCHV		0
+#define MMCR0_PMCjCE		MMCR0_PMCnCE
+
+#define SPRN_MMCRA		SPRN_MMCR2
+#define MMCRA_SAMPLE_ENABLE	0
+
+static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
+{
+	return 0;
+}
+static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
+static inline u32 perf_get_misc_flags(struct pt_regs *regs)
+{
+	return 0;
+}
+static inline void perf_read_regs(struct pt_regs *regs) { }
+static inline int perf_intr_is_nmi(struct pt_regs *regs)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PPC32 */
+
+/*
+ * Things that are specific to 64-bit implementations.
+ */
+#ifdef CONFIG_PPC64
+
+static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
+{
+	unsigned long mmcra = regs->dsisr;
+
+	if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
+		unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
+		if (slot > 1)
+			return 4 * (slot - 1);
+	}
+	return 0;
+}
+
+/*
+ * The user wants a data address recorded.
+ * If we're not doing instruction sampling, give them the SDAR
+ * (sampled data address).  If we are doing instruction sampling, then
+ * only give them the SDAR if it corresponds to the instruction
+ * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC
+ * bit in MMCRA.
+ */
+static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
+{
+	unsigned long mmcra = regs->dsisr;
+	unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
+		POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
+
+	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
+		*addrp = mfspr(SPRN_SDAR);
+}
+
+static inline u32 perf_get_misc_flags(struct pt_regs *regs)
+{
+	unsigned long mmcra = regs->dsisr;
+
+	if (TRAP(regs) != 0xf00)
+		return 0;	/* not a PMU interrupt */
+
+	if (ppmu->flags & PPMU_ALT_SIPR) {
+		if (mmcra & POWER6_MMCRA_SIHV)
+			return PERF_RECORD_MISC_HYPERVISOR;
+		return (mmcra & POWER6_MMCRA_SIPR) ?
+			PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL;
+	}
+	if (mmcra & MMCRA_SIHV)
+		return PERF_RECORD_MISC_HYPERVISOR;
+	return (mmcra & MMCRA_SIPR) ? PERF_RECORD_MISC_USER :
+		PERF_RECORD_MISC_KERNEL;
+}
+
+/*
+ * Overload regs->dsisr to store MMCRA so we only need to read it once
+ * on each interrupt.
+ */
+static inline void perf_read_regs(struct pt_regs *regs)
+{
+	regs->dsisr = mfspr(SPRN_MMCRA);
+}
+
+/*
+ * If interrupts were soft-disabled when a PMU interrupt occurs, treat
+ * it as an NMI.
+ */
+static inline int perf_intr_is_nmi(struct pt_regs *regs)
+{
+	return !regs->softe;
+}
+
+#endif /* CONFIG_PPC64 */
+
+static void perf_event_interrupt(struct pt_regs *regs);
+
+void perf_event_print_debug(void)
+{
+}
+
+/*
+ * Read one performance monitor event (PMC).
+ */
+static unsigned long read_pmc(int idx)
+{
+	unsigned long val;
+
+	switch (idx) {
+	case 1:
+		val = mfspr(SPRN_PMC1);
+		break;
+	case 2:
+		val = mfspr(SPRN_PMC2);
+		break;
+	case 3:
+		val = mfspr(SPRN_PMC3);
+		break;
+	case 4:
+		val = mfspr(SPRN_PMC4);
+		break;
+	case 5:
+		val = mfspr(SPRN_PMC5);
+		break;
+	case 6:
+		val = mfspr(SPRN_PMC6);
+		break;
+#ifdef CONFIG_PPC64
+	case 7:
+		val = mfspr(SPRN_PMC7);
+		break;
+	case 8:
+		val = mfspr(SPRN_PMC8);
+		break;
+#endif /* CONFIG_PPC64 */
+	default:
+		printk(KERN_ERR "oops trying to read PMC%d\n", idx);
+		val = 0;
+	}
+	return val;
+}
+
+/*
+ * Write one PMC.
+ */
+static void write_pmc(int idx, unsigned long val)
+{
+	switch (idx) {
+	case 1:
+		mtspr(SPRN_PMC1, val);
+		break;
+	case 2:
+		mtspr(SPRN_PMC2, val);
+		break;
+	case 3:
+		mtspr(SPRN_PMC3, val);
+		break;
+	case 4:
+		mtspr(SPRN_PMC4, val);
+		break;
+	case 5:
+		mtspr(SPRN_PMC5, val);
+		break;
+	case 6:
+		mtspr(SPRN_PMC6, val);
+		break;
+#ifdef CONFIG_PPC64
+	case 7:
+		mtspr(SPRN_PMC7, val);
+		break;
+	case 8:
+		mtspr(SPRN_PMC8, val);
+		break;
+#endif /* CONFIG_PPC64 */
+	default:
+		printk(KERN_ERR "oops trying to write PMC%d\n", idx);
+	}
+}
+
+/*
+ * Check if a set of events can all go on the PMU at once.
+ * If they can't, this will look at alternative codes for the events
+ * and see if any combination of alternative codes is feasible.
+ * The feasible set is returned in event_id[].
+ */
+static int power_check_constraints(struct cpu_hw_events *cpuhw,
+				   u64 event_id[], unsigned int cflags[],
+				   int n_ev)
+{
+	unsigned long mask, value, nv;
+	unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
+	int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS];
+	int i, j;
+	unsigned long addf = ppmu->add_fields;
+	unsigned long tadd = ppmu->test_adder;
+
+	if (n_ev > ppmu->n_event)
+		return -1;
+
+	/* First see if the events will go on as-is */
+	for (i = 0; i < n_ev; ++i) {
+		if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
+		    && !ppmu->limited_pmc_event(event_id[i])) {
+			ppmu->get_alternatives(event_id[i], cflags[i],
+					       cpuhw->alternatives[i]);
+			event_id[i] = cpuhw->alternatives[i][0];
+		}
+		if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
+					 &cpuhw->avalues[i][0]))
+			return -1;
+	}
+	value = mask = 0;
+	for (i = 0; i < n_ev; ++i) {
+		nv = (value | cpuhw->avalues[i][0]) +
+			(value & cpuhw->avalues[i][0] & addf);
+		if ((((nv + tadd) ^ value) & mask) != 0 ||
+		    (((nv + tadd) ^ cpuhw->avalues[i][0]) &
+		     cpuhw->amasks[i][0]) != 0)
+			break;
+		value = nv;
+		mask |= cpuhw->amasks[i][0];
+	}
+	if (i == n_ev)
+		return 0;	/* all OK */
+
+	/* doesn't work, gather alternatives... */
+	if (!ppmu->get_alternatives)
+		return -1;
+	for (i = 0; i < n_ev; ++i) {
+		choice[i] = 0;
+		n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i],
+						  cpuhw->alternatives[i]);
+		for (j = 1; j < n_alt[i]; ++j)
+			ppmu->get_constraint(cpuhw->alternatives[i][j],
+					     &cpuhw->amasks[i][j],
+					     &cpuhw->avalues[i][j]);
+	}
+
+	/* enumerate all possibilities and see if any will work */
+	i = 0;
+	j = -1;
+	value = mask = nv = 0;
+	while (i < n_ev) {
+		if (j >= 0) {
+			/* we're backtracking, restore context */
+			value = svalues[i];
+			mask = smasks[i];
+			j = choice[i];
+		}
+		/*
+		 * See if any alternative k for event_id i,
+		 * where k > j, will satisfy the constraints.
+		 */
+		while (++j < n_alt[i]) {
+			nv = (value | cpuhw->avalues[i][j]) +
+				(value & cpuhw->avalues[i][j] & addf);
+			if ((((nv + tadd) ^ value) & mask) == 0 &&
+			    (((nv + tadd) ^ cpuhw->avalues[i][j])
+			     & cpuhw->amasks[i][j]) == 0)
+				break;
+		}
+		if (j >= n_alt[i]) {
+			/*
+			 * No feasible alternative, backtrack
+			 * to event_id i-1 and continue enumerating its
+			 * alternatives from where we got up to.
+			 */
+			if (--i < 0)
+				return -1;
+		} else {
+			/*
+			 * Found a feasible alternative for event_id i,
+			 * remember where we got up to with this event_id,
+			 * go on to the next event_id, and start with
+			 * the first alternative for it.
+			 */
+			choice[i] = j;
+			svalues[i] = value;
+			smasks[i] = mask;
+			value = nv;
+			mask |= cpuhw->amasks[i][j];
+			++i;
+			j = -1;
+		}
+	}
+
+	/* OK, we have a feasible combination, tell the caller the solution */
+	for (i = 0; i < n_ev; ++i)
+		event_id[i] = cpuhw->alternatives[i][choice[i]];
+	return 0;
+}
+
+/*
+ * Check if newly-added events have consistent settings for
+ * exclude_{user,kernel,hv} with each other and any previously
+ * added events.
+ */
+static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
+			  int n_prev, int n_new)
+{
+	int eu = 0, ek = 0, eh = 0;
+	int i, n, first;
+	struct perf_event *event;
+
+	n = n_prev + n_new;
+	if (n <= 1)
+		return 0;
+
+	first = 1;
+	for (i = 0; i < n; ++i) {
+		if (cflags[i] & PPMU_LIMITED_PMC_OK) {
+			cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
+			continue;
+		}
+		event = ctrs[i];
+		if (first) {
+			eu = event->attr.exclude_user;
+			ek = event->attr.exclude_kernel;
+			eh = event->attr.exclude_hv;
+			first = 0;
+		} else if (event->attr.exclude_user != eu ||
+			   event->attr.exclude_kernel != ek ||
+			   event->attr.exclude_hv != eh) {
+			return -EAGAIN;
+		}
+	}
+
+	if (eu || ek || eh)
+		for (i = 0; i < n; ++i)
+			if (cflags[i] & PPMU_LIMITED_PMC_OK)
+				cflags[i] |= PPMU_LIMITED_PMC_REQD;
+
+	return 0;
+}
+
+static void power_pmu_read(struct perf_event *event)
+{
+	s64 val, delta, prev;
+
+	if (!event->hw.idx)
+		return;
+	/*
+	 * Performance monitor interrupts come even when interrupts
+	 * are soft-disabled, as long as interrupts are hard-enabled.
+	 * Therefore we treat them like NMIs.
+	 */
+	do {
+		prev = atomic64_read(&event->hw.prev_count);
+		barrier();
+		val = read_pmc(event->hw.idx);
+	} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+
+	/* The events are only 32 bits wide */
+	delta = (val - prev) & 0xfffffffful;
+	atomic64_add(delta, &event->count);
+	atomic64_sub(delta, &event->hw.period_left);
+}
+
+/*
+ * On some machines, PMC5 and PMC6 can't be written, don't respect
+ * the freeze conditions, and don't generate interrupts.  This tells
+ * us if `event' is using such a PMC.
+ */
+static int is_limited_pmc(int pmcnum)
+{
+	return (ppmu->flags & PPMU_LIMITED_PMC5_6)
+		&& (pmcnum == 5 || pmcnum == 6);
+}
+
+static void freeze_limited_events(struct cpu_hw_events *cpuhw,
+				    unsigned long pmc5, unsigned long pmc6)
+{
+	struct perf_event *event;
+	u64 val, prev, delta;
+	int i;
+
+	for (i = 0; i < cpuhw->n_limited; ++i) {
+		event = cpuhw->limited_event[i];
+		if (!event->hw.idx)
+			continue;
+		val = (event->hw.idx == 5) ? pmc5 : pmc6;
+		prev = atomic64_read(&event->hw.prev_count);
+		event->hw.idx = 0;
+		delta = (val - prev) & 0xfffffffful;
+		atomic64_add(delta, &event->count);
+	}
+}
+
+static void thaw_limited_events(struct cpu_hw_events *cpuhw,
+				  unsigned long pmc5, unsigned long pmc6)
+{
+	struct perf_event *event;
+	u64 val;
+	int i;
+
+	for (i = 0; i < cpuhw->n_limited; ++i) {
+		event = cpuhw->limited_event[i];
+		event->hw.idx = cpuhw->limited_hwidx[i];
+		val = (event->hw.idx == 5) ? pmc5 : pmc6;
+		atomic64_set(&event->hw.prev_count, val);
+		perf_event_update_userpage(event);
+	}
+}
+
+/*
+ * Since limited events don't respect the freeze conditions, we
+ * have to read them immediately after freezing or unfreezing the
+ * other events.  We try to keep the values from the limited
+ * events as consistent as possible by keeping the delay (in
+ * cycles and instructions) between freezing/unfreezing and reading
+ * the limited events as small and consistent as possible.
+ * Therefore, if any limited events are in use, we read them
+ * both, and always in the same order, to minimize variability,
+ * and do it inside the same asm that writes MMCR0.
+ */
+static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
+{
+	unsigned long pmc5, pmc6;
+
+	if (!cpuhw->n_limited) {
+		mtspr(SPRN_MMCR0, mmcr0);
+		return;
+	}
+
+	/*
+	 * Write MMCR0, then read PMC5 and PMC6 immediately.
+	 * To ensure we don't get a performance monitor interrupt
+	 * between writing MMCR0 and freezing/thawing the limited
+	 * events, we first write MMCR0 with the event overflow
+	 * interrupt enable bits turned off.
+	 */
+	asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
+		     : "=&r" (pmc5), "=&r" (pmc6)
+		     : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
+		       "i" (SPRN_MMCR0),
+		       "i" (SPRN_PMC5), "i" (SPRN_PMC6));
+
+	if (mmcr0 & MMCR0_FC)
+		freeze_limited_events(cpuhw, pmc5, pmc6);
+	else
+		thaw_limited_events(cpuhw, pmc5, pmc6);
+
+	/*
+	 * Write the full MMCR0 including the event overflow interrupt
+	 * enable bits, if necessary.
+	 */
+	if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
+		mtspr(SPRN_MMCR0, mmcr0);
+}
+
+/*
+ * Disable all events to prevent PMU interrupts and to allow
+ * events to be added or removed.
+ */
+void hw_perf_disable(void)
+{
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags;
+
+	if (!ppmu)
+		return;
+	local_irq_save(flags);
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+
+	if (!cpuhw->disabled) {
+		cpuhw->disabled = 1;
+		cpuhw->n_added = 0;
+
+		/*
+		 * Check if we ever enabled the PMU on this cpu.
+		 */
+		if (!cpuhw->pmcs_enabled) {
+			ppc_enable_pmcs();
+			cpuhw->pmcs_enabled = 1;
+		}
+
+		/*
+		 * Disable instruction sampling if it was enabled
+		 */
+		if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+			mtspr(SPRN_MMCRA,
+			      cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+			mb();
+		}
+
+		/*
+		 * Set the 'freeze events' bit.
+		 * The barrier is to make sure the mtspr has been
+		 * executed and the PMU has frozen the events
+		 * before we return.
+		 */
+		write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
+		mb();
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Re-enable all events if disable == 0.
+ * If we were previously disabled and events were added, then
+ * put the new config on the PMU.
+ */
+void hw_perf_enable(void)
+{
+	struct perf_event *event;
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags;
+	long i;
+	unsigned long val;
+	s64 left;
+	unsigned int hwc_index[MAX_HWEVENTS];
+	int n_lim;
+	int idx;
+
+	if (!ppmu)
+		return;
+	local_irq_save(flags);
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+	if (!cpuhw->disabled) {
+		local_irq_restore(flags);
+		return;
+	}
+	cpuhw->disabled = 0;
+
+	/*
+	 * If we didn't change anything, or only removed events,
+	 * no need to recalculate MMCR* settings and reset the PMCs.
+	 * Just reenable the PMU with the current MMCR* settings
+	 * (possibly updated for removal of events).
+	 */
+	if (!cpuhw->n_added) {
+		mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+		mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+		if (cpuhw->n_events == 0)
+			ppc_set_pmu_inuse(0);
+		goto out_enable;
+	}
+
+	/*
+	 * Compute MMCR* values for the new set of events
+	 */
+	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
+			       cpuhw->mmcr)) {
+		/* shouldn't ever get here */
+		printk(KERN_ERR "oops compute_mmcr failed\n");
+		goto out;
+	}
+
+	/*
+	 * Add in MMCR0 freeze bits corresponding to the
+	 * attr.exclude_* bits for the first event.
+	 * We have already checked that all events have the
+	 * same values for these bits as the first event.
+	 */
+	event = cpuhw->event[0];
+	if (event->attr.exclude_user)
+		cpuhw->mmcr[0] |= MMCR0_FCP;
+	if (event->attr.exclude_kernel)
+		cpuhw->mmcr[0] |= freeze_events_kernel;
+	if (event->attr.exclude_hv)
+		cpuhw->mmcr[0] |= MMCR0_FCHV;
+
+	/*
+	 * Write the new configuration to MMCR* with the freeze
+	 * bit set and set the hardware events to their initial values.
+	 * Then unfreeze the events.
+	 */
+	ppc_set_pmu_inuse(1);
+	mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
+				| MMCR0_FC);
+
+	/*
+	 * Read off any pre-existing events that need to move
+	 * to another PMC.
+	 */
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		event = cpuhw->event[i];
+		if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) {
+			power_pmu_read(event);
+			write_pmc(event->hw.idx, 0);
+			event->hw.idx = 0;
+		}
+	}
+
+	/*
+	 * Initialize the PMCs for all the new and moved events.
+	 */
+	cpuhw->n_limited = n_lim = 0;
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		event = cpuhw->event[i];
+		if (event->hw.idx)
+			continue;
+		idx = hwc_index[i] + 1;
+		if (is_limited_pmc(idx)) {
+			cpuhw->limited_event[n_lim] = event;
+			cpuhw->limited_hwidx[n_lim] = idx;
+			++n_lim;
+			continue;
+		}
+		val = 0;
+		if (event->hw.sample_period) {
+			left = atomic64_read(&event->hw.period_left);
+			if (left < 0x80000000L)
+				val = 0x80000000L - left;
+		}
+		atomic64_set(&event->hw.prev_count, val);
+		event->hw.idx = idx;
+		write_pmc(idx, val);
+		perf_event_update_userpage(event);
+	}
+	cpuhw->n_limited = n_lim;
+	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+
+ out_enable:
+	mb();
+	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+
+	/*
+	 * Enable instruction sampling if necessary
+	 */
+	if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+		mb();
+		mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+	}
+
+ out:
+	local_irq_restore(flags);
+}
+
+static int collect_events(struct perf_event *group, int max_count,
+			  struct perf_event *ctrs[], u64 *events,
+			  unsigned int *flags)
+{
+	int n = 0;
+	struct perf_event *event;
+
+	if (!is_software_event(group)) {
+		if (n >= max_count)
+			return -1;
+		ctrs[n] = group;
+		flags[n] = group->hw.event_base;
+		events[n++] = group->hw.config;
+	}
+	list_for_each_entry(event, &group->sibling_list, list_entry) {
+		if (!is_software_event(event) &&
+		    event->state != PERF_EVENT_STATE_OFF) {
+			if (n >= max_count)
+				return -1;
+			ctrs[n] = event;
+			flags[n] = event->hw.event_base;
+			events[n++] = event->hw.config;
+		}
+	}
+	return n;
+}
+
+static void event_sched_in(struct perf_event *event, int cpu)
+{
+	event->state = PERF_EVENT_STATE_ACTIVE;
+	event->oncpu = cpu;
+	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
+	if (is_software_event(event))
+		event->pmu->enable(event);
+}
+
+/*
+ * Called to enable a whole group of events.
+ * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
+ * Assumes the caller has disabled interrupts and has
+ * frozen the PMU with hw_perf_save_disable.
+ */
+int hw_perf_group_sched_in(struct perf_event *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx, int cpu)
+{
+	struct cpu_hw_events *cpuhw;
+	long i, n, n0;
+	struct perf_event *sub;
+
+	if (!ppmu)
+		return 0;
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+	n0 = cpuhw->n_events;
+	n = collect_events(group_leader, ppmu->n_event - n0,
+			   &cpuhw->event[n0], &cpuhw->events[n0],
+			   &cpuhw->flags[n0]);
+	if (n < 0)
+		return -EAGAIN;
+	if (check_excludes(cpuhw->event, cpuhw->flags, n0, n))
+		return -EAGAIN;
+	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0);
+	if (i < 0)
+		return -EAGAIN;
+	cpuhw->n_events = n0 + n;
+	cpuhw->n_added += n;
+
+	/*
+	 * OK, this group can go on; update event states etc.,
+	 * and enable any software events
+	 */
+	for (i = n0; i < n0 + n; ++i)
+		cpuhw->event[i]->hw.config = cpuhw->events[i];
+	cpuctx->active_oncpu += n;
+	n = 1;
+	event_sched_in(group_leader, cpu);
+	list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
+		if (sub->state != PERF_EVENT_STATE_OFF) {
+			event_sched_in(sub, cpu);
+			++n;
+		}
+	}
+	ctx->nr_active += n;
+
+	return 1;
+}
+
+/*
+ * Add a event to the PMU.
+ * If all events are not already frozen, then we disable and
+ * re-enable the PMU in order to get hw_perf_enable to do the
+ * actual work of reconfiguring the PMU.
+ */
+static int power_pmu_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags;
+	int n0;
+	int ret = -EAGAIN;
+
+	local_irq_save(flags);
+	perf_disable();
+
+	/*
+	 * Add the event to the list (if there is room)
+	 * and check whether the total set is still feasible.
+	 */
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+	n0 = cpuhw->n_events;
+	if (n0 >= ppmu->n_event)
+		goto out;
+	cpuhw->event[n0] = event;
+	cpuhw->events[n0] = event->hw.config;
+	cpuhw->flags[n0] = event->hw.event_base;
+	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
+		goto out;
+	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
+		goto out;
+
+	event->hw.config = cpuhw->events[n0];
+	++cpuhw->n_events;
+	++cpuhw->n_added;
+
+	ret = 0;
+ out:
+	perf_enable();
+	local_irq_restore(flags);
+	return ret;
+}
+
+/*
+ * Remove a event from the PMU.
+ */
+static void power_pmu_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuhw;
+	long i;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	perf_disable();
+
+	power_pmu_read(event);
+
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		if (event == cpuhw->event[i]) {
+			while (++i < cpuhw->n_events)
+				cpuhw->event[i-1] = cpuhw->event[i];
+			--cpuhw->n_events;
+			ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr);
+			if (event->hw.idx) {
+				write_pmc(event->hw.idx, 0);
+				event->hw.idx = 0;
+			}
+			perf_event_update_userpage(event);
+			break;
+		}
+	}
+	for (i = 0; i < cpuhw->n_limited; ++i)
+		if (event == cpuhw->limited_event[i])
+			break;
+	if (i < cpuhw->n_limited) {
+		while (++i < cpuhw->n_limited) {
+			cpuhw->limited_event[i-1] = cpuhw->limited_event[i];
+			cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
+		}
+		--cpuhw->n_limited;
+	}
+	if (cpuhw->n_events == 0) {
+		/* disable exceptions if no events are running */
+		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
+	}
+
+	perf_enable();
+	local_irq_restore(flags);
+}
+
+/*
+ * Re-enable interrupts on a event after they were throttled
+ * because they were coming too fast.
+ */
+static void power_pmu_unthrottle(struct perf_event *event)
+{
+	s64 val, left;
+	unsigned long flags;
+
+	if (!event->hw.idx || !event->hw.sample_period)
+		return;
+	local_irq_save(flags);
+	perf_disable();
+	power_pmu_read(event);
+	left = event->hw.sample_period;
+	event->hw.last_period = left;
+	val = 0;
+	if (left < 0x80000000L)
+		val = 0x80000000L - left;
+	write_pmc(event->hw.idx, val);
+	atomic64_set(&event->hw.prev_count, val);
+	atomic64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+	perf_enable();
+	local_irq_restore(flags);
+}
+
+struct pmu power_pmu = {
+	.enable		= power_pmu_enable,
+	.disable	= power_pmu_disable,
+	.read		= power_pmu_read,
+	.unthrottle	= power_pmu_unthrottle,
+};
+
+/*
+ * Return 1 if we might be able to put event on a limited PMC,
+ * or 0 if not.
+ * A event can only go on a limited PMC if it counts something
+ * that a limited PMC can count, doesn't require interrupts, and
+ * doesn't exclude any processor mode.
+ */
+static int can_go_on_limited_pmc(struct perf_event *event, u64 ev,
+				 unsigned int flags)
+{
+	int n;
+	u64 alt[MAX_EVENT_ALTERNATIVES];
+
+	if (event->attr.exclude_user
+	    || event->attr.exclude_kernel
+	    || event->attr.exclude_hv
+	    || event->attr.sample_period)
+		return 0;
+
+	if (ppmu->limited_pmc_event(ev))
+		return 1;
+
+	/*
+	 * The requested event_id isn't on a limited PMC already;
+	 * see if any alternative code goes on a limited PMC.
+	 */
+	if (!ppmu->get_alternatives)
+		return 0;
+
+	flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
+	n = ppmu->get_alternatives(ev, flags, alt);
+
+	return n > 0;
+}
+
+/*
+ * Find an alternative event_id that goes on a normal PMC, if possible,
+ * and return the event_id code, or 0 if there is no such alternative.
+ * (Note: event_id code 0 is "don't count" on all machines.)
+ */
+static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
+{
+	u64 alt[MAX_EVENT_ALTERNATIVES];
+	int n;
+
+	flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
+	n = ppmu->get_alternatives(ev, flags, alt);
+	if (!n)
+		return 0;
+	return alt[0];
+}
+
+/* Number of perf_events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_pmc_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Release the PMU if this is the last perf_event.
+ */
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	if (!atomic_add_unless(&num_events, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_events) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+/*
+ * Translate a generic cache event_id config to a raw event_id code.
+ */
+static int hw_perf_cache_event(u64 config, u64 *eventp)
+{
+	unsigned long type, op, result;
+	int ev;
+
+	if (!ppmu->cache_events)
+		return -EINVAL;
+
+	/* unpack config */
+	type = config & 0xff;
+	op = (config >> 8) & 0xff;
+	result = (config >> 16) & 0xff;
+
+	if (type >= PERF_COUNT_HW_CACHE_MAX ||
+	    op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+	    result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ev = (*ppmu->cache_events)[type][op][result];
+	if (ev == 0)
+		return -EOPNOTSUPP;
+	if (ev == -1)
+		return -EINVAL;
+	*eventp = ev;
+	return 0;
+}
+
+const struct pmu *hw_perf_event_init(struct perf_event *event)
+{
+	u64 ev;
+	unsigned long flags;
+	struct perf_event *ctrs[MAX_HWEVENTS];
+	u64 events[MAX_HWEVENTS];
+	unsigned int cflags[MAX_HWEVENTS];
+	int n;
+	int err;
+	struct cpu_hw_events *cpuhw;
+
+	if (!ppmu)
+		return ERR_PTR(-ENXIO);
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		ev = event->attr.config;
+		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
+			return ERR_PTR(-EOPNOTSUPP);
+		ev = ppmu->generic_events[ev];
+		break;
+	case PERF_TYPE_HW_CACHE:
+		err = hw_perf_cache_event(event->attr.config, &ev);
+		if (err)
+			return ERR_PTR(err);
+		break;
+	case PERF_TYPE_RAW:
+		ev = event->attr.config;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+	event->hw.config_base = ev;
+	event->hw.idx = 0;
+
+	/*
+	 * If we are not running on a hypervisor, force the
+	 * exclude_hv bit to 0 so that we don't care what
+	 * the user set it to.
+	 */
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		event->attr.exclude_hv = 0;
+
+	/*
+	 * If this is a per-task event, then we can use
+	 * PM_RUN_* events interchangeably with their non RUN_*
+	 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
+	 * XXX we should check if the task is an idle task.
+	 */
+	flags = 0;
+	if (event->ctx->task)
+		flags |= PPMU_ONLY_COUNT_RUN;
+
+	/*
+	 * If this machine has limited events, check whether this
+	 * event_id could go on a limited event.
+	 */
+	if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
+		if (can_go_on_limited_pmc(event, ev, flags)) {
+			flags |= PPMU_LIMITED_PMC_OK;
+		} else if (ppmu->limited_pmc_event(ev)) {
+			/*
+			 * The requested event_id is on a limited PMC,
+			 * but we can't use a limited PMC; see if any
+			 * alternative goes on a normal PMC.
+			 */
+			ev = normal_pmc_alternative(ev, flags);
+			if (!ev)
+				return ERR_PTR(-EINVAL);
+		}
+	}
+
+	/*
+	 * If this is in a group, check if it can go on with all the
+	 * other hardware events in the group.  We assume the event
+	 * hasn't been linked into its leader's sibling list at this point.
+	 */
+	n = 0;
+	if (event->group_leader != event) {
+		n = collect_events(event->group_leader, ppmu->n_event - 1,
+				   ctrs, events, cflags);
+		if (n < 0)
+			return ERR_PTR(-EINVAL);
+	}
+	events[n] = ev;
+	ctrs[n] = event;
+	cflags[n] = flags;
+	if (check_excludes(ctrs, cflags, n, 1))
+		return ERR_PTR(-EINVAL);
+
+	cpuhw = &get_cpu_var(cpu_hw_events);
+	err = power_check_constraints(cpuhw, events, cflags, n + 1);
+	put_cpu_var(cpu_hw_events);
+	if (err)
+		return ERR_PTR(-EINVAL);
+
+	event->hw.config = events[n];
+	event->hw.event_base = cflags[n];
+	event->hw.last_period = event->hw.sample_period;
+	atomic64_set(&event->hw.period_left, event->hw.last_period);
+
+	/*
+	 * See if we need to reserve the PMU.
+	 * If no events are currently in use, then we have to take a
+	 * mutex to ensure that we don't race with another task doing
+	 * reserve_pmc_hardware or release_pmc_hardware.
+	 */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_events) == 0 &&
+		    reserve_pmc_hardware(perf_event_interrupt))
+			err = -EBUSY;
+		else
+			atomic_inc(&num_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	event->destroy = hw_perf_event_destroy;
+
+	if (err)
+		return ERR_PTR(err);
+	return &power_pmu;
+}
+
+/*
+ * A event has overflowed; update its count and record
+ * things if requested.  Note that interrupts are hard-disabled
+ * here so there is no possibility of being interrupted.
+ */
+static void record_and_restart(struct perf_event *event, unsigned long val,
+			       struct pt_regs *regs, int nmi)
+{
+	u64 period = event->hw.sample_period;
+	s64 prev, delta, left;
+	int record = 0;
+
+	/* we don't have to worry about interrupts here */
+	prev = atomic64_read(&event->hw.prev_count);
+	delta = (val - prev) & 0xfffffffful;
+	atomic64_add(delta, &event->count);
+
+	/*
+	 * See if the total period for this event has expired,
+	 * and update for the next period.
+	 */
+	val = 0;
+	left = atomic64_read(&event->hw.period_left) - delta;
+	if (period) {
+		if (left <= 0) {
+			left += period;
+			if (left <= 0)
+				left = period;
+			record = 1;
+		}
+		if (left < 0x80000000LL)
+			val = 0x80000000LL - left;
+	}
+
+	/*
+	 * Finally record data if requested.
+	 */
+	if (record) {
+		struct perf_sample_data data = {
+			.addr	= 0,
+			.period	= event->hw.last_period,
+		};
+
+		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
+			perf_get_data_addr(regs, &data.addr);
+
+		if (perf_event_overflow(event, nmi, &data, regs)) {
+			/*
+			 * Interrupts are coming too fast - throttle them
+			 * by setting the event to 0, so it will be
+			 * at least 2^30 cycles until the next interrupt
+			 * (assuming each event counts at most 2 counts
+			 * per cycle).
+			 */
+			val = 0;
+			left = ~0ULL >> 1;
+		}
+	}
+
+	write_pmc(event->hw.idx, val);
+	atomic64_set(&event->hw.prev_count, val);
+	atomic64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+}
+
+/*
+ * Called from generic code to get the misc flags (i.e. processor mode)
+ * for an event_id.
+ */
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	u32 flags = perf_get_misc_flags(regs);
+
+	if (flags)
+		return flags;
+	return user_mode(regs) ? PERF_RECORD_MISC_USER :
+		PERF_RECORD_MISC_KERNEL;
+}
+
+/*
+ * Called from generic code to get the instruction pointer
+ * for an event_id.
+ */
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	unsigned long ip;
+
+	if (TRAP(regs) != 0xf00)
+		return regs->nip;	/* not a PMU interrupt */
+
+	ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
+	return ip;
+}
+
+/*
+ * Performance monitor interrupt stuff
+ */
+static void perf_event_interrupt(struct pt_regs *regs)
+{
+	int i;
+	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+	struct perf_event *event;
+	unsigned long val;
+	int found = 0;
+	int nmi;
+
+	if (cpuhw->n_limited)
+		freeze_limited_events(cpuhw, mfspr(SPRN_PMC5),
+					mfspr(SPRN_PMC6));
+
+	perf_read_regs(regs);
+
+	nmi = perf_intr_is_nmi(regs);
+	if (nmi)
+		nmi_enter();
+	else
+		irq_enter();
+
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		event = cpuhw->event[i];
+		if (!event->hw.idx || is_limited_pmc(event->hw.idx))
+			continue;
+		val = read_pmc(event->hw.idx);
+		if ((int)val < 0) {
+			/* event has overflowed */
+			found = 1;
+			record_and_restart(event, val, regs, nmi);
+		}
+	}
+
+	/*
+	 * In case we didn't find and reset the event that caused
+	 * the interrupt, scan all events and reset any that are
+	 * negative, to avoid getting continual interrupts.
+	 * Any that we processed in the previous loop will not be negative.
+	 */
+	if (!found) {
+		for (i = 0; i < ppmu->n_event; ++i) {
+			if (is_limited_pmc(i + 1))
+				continue;
+			val = read_pmc(i + 1);
+			if ((int)val < 0)
+				write_pmc(i + 1, 0);
+		}
+	}
+
+	/*
+	 * Reset MMCR0 to its normal value.  This will set PMXE and
+	 * clear FC (freeze events) and PMAO (perf mon alert occurred)
+	 * and thus allow interrupts to occur again.
+	 * XXX might want to use MSR.PM to keep the events frozen until
+	 * we get back out of this interrupt.
+	 */
+	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+
+	if (nmi)
+		nmi_exit();
+	else
+		irq_exit();
+}
+
+void hw_perf_event_setup(int cpu)
+{
+	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+	if (!ppmu)
+		return;
+	memset(cpuhw, 0, sizeof(*cpuhw));
+	cpuhw->mmcr[0] = MMCR0_FC;
+}
+
+int register_power_pmu(struct power_pmu *pmu)
+{
+	if (ppmu)
+		return -EBUSY;		/* something's already registered */
+
+	ppmu = pmu;
+	pr_info("%s performance monitor hardware support registered\n",
+		pmu->name);
+
+#ifdef MSR_HV
+	/*
+	 * Use FCHV to ignore kernel events if MSR.HV is set.
+	 */
+	if (mfmsr() & MSR_HV)
+		freeze_events_kernel = MMCR0_FCHV;
+#endif /* CONFIG_PPC64 */
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 3c90a3d9173e..2a361cdda635 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 31918af3e355..0f4c1c73a6ad 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 867f6f663963..c351b3a57fbb 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index fa21890531da..ca399ba5034c 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 018d094d92f9..28a4daacdc02 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 75dccb71a043..479574413a93 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/string.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 465e498bcb33..df45a7449a66 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -53,7 +53,7 @@
 #include <linux/posix-timers.h>
 #include <linux/irq.h>
 #include <linux/delay.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/io.h>
 #include <asm/processor.h>
@@ -527,25 +527,25 @@ void __init iSeries_time_init_early(void)
 }
 #endif /* CONFIG_PPC_ISERIES */
 
-#if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32)
-DEFINE_PER_CPU(u8, perf_counter_pending);
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
+DEFINE_PER_CPU(u8, perf_event_pending);
 
-void set_perf_counter_pending(void)
+void set_perf_event_pending(void)
 {
-	get_cpu_var(perf_counter_pending) = 1;
+	get_cpu_var(perf_event_pending) = 1;
 	set_dec(1);
-	put_cpu_var(perf_counter_pending);
+	put_cpu_var(perf_event_pending);
 }
 
-#define test_perf_counter_pending()	__get_cpu_var(perf_counter_pending)
-#define clear_perf_counter_pending()	__get_cpu_var(perf_counter_pending) = 0
+#define test_perf_event_pending()	__get_cpu_var(perf_event_pending)
+#define clear_perf_event_pending()	__get_cpu_var(perf_event_pending) = 0
 
-#else  /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */
+#else  /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
 
-#define test_perf_counter_pending()	0
-#define clear_perf_counter_pending()
+#define test_perf_event_pending()	0
+#define clear_perf_event_pending()
 
-#endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */
+#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
 
 /*
  * For iSeries shared processors, we have to let the hypervisor
@@ -573,9 +573,9 @@ void timer_interrupt(struct pt_regs * regs)
 	set_dec(DECREMENTER_MAX);
 
 #ifdef CONFIG_PPC32
-	if (test_perf_counter_pending()) {
-		clear_perf_counter_pending();
-		perf_counter_do_pending();
+	if (test_perf_event_pending()) {
+		clear_perf_event_pending();
+		perf_event_do_pending();
 	}
 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
 		do_IRQ(regs);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 830bef0a1131..e7dae82c1285 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -29,7 +29,7 @@
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/firmware.h>
 #include <asm/page.h>
@@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 		die("Weird page fault", regs, SIGSEGV);
 	}
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -312,7 +312,7 @@ good_area:
 	}
 	if (ret & VM_FAULT_MAJOR) {
 		current->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 #ifdef CONFIG_PPC_SMLPAR
 		if (firmware_has_feature(FW_FEATURE_CMO)) {
@@ -323,7 +323,7 @@ good_area:
 #endif
 	} else {
 		current->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 	up_read(&mm->mmap_sem);
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 9efc8bda01b4..e382cae678b8 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -280,9 +280,9 @@ config PPC_HAVE_PMU_SUPPORT
 
 config PPC_PERF_CTRS
        def_bool y
-       depends on PERF_COUNTERS && PPC_HAVE_PMU_SUPPORT
+       depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
        help
-         This enables the powerpc-specific perf_counter back-end.
+         This enables the powerpc-specific perf_event back-end.
 
 config SMP
 	depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1c866efd217d..43c0acad7160 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -94,7 +94,7 @@ config S390
 	select HAVE_KVM if 64BIT
 	select HAVE_ARCH_TRACEHOOK
 	select INIT_ALL_POSSIBLE
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 
 config SCHED_OMIT_FRAME_POINTER
 	bool
diff --git a/arch/s390/include/asm/perf_counter.h b/arch/s390/include/asm/perf_counter.h
deleted file mode 100644
index 7015188c2cc2..000000000000
--- a/arch/s390/include/asm/perf_counter.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/*
- * Performance counter support - s390 specific definitions.
- *
- * Copyright 2009 Martin Schwidefsky, IBM Corporation.
- */
-
-static inline void set_perf_counter_pending(void) {}
-static inline void clear_perf_counter_pending(void) {}
-
-#define PERF_COUNTER_INDEX_OFFSET 0
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
new file mode 100644
index 000000000000..3840cbe77637
--- /dev/null
+++ b/arch/s390/include/asm/perf_event.h
@@ -0,0 +1,10 @@
+/*
+ * Performance event support - s390 specific definitions.
+ *
+ * Copyright 2009 Martin Schwidefsky, IBM Corporation.
+ */
+
+static inline void set_perf_event_pending(void) {}
+static inline void clear_perf_event_pending(void) {}
+
+#define PERF_EVENT_INDEX_OFFSET 0
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index c80602d7c880..cb5232df151e 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -268,7 +268,7 @@
 #define	__NR_preadv		328
 #define	__NR_pwritev		329
 #define __NR_rt_tgsigqueueinfo	330
-#define __NR_perf_counter_open	331
+#define __NR_perf_event_open	331
 #define NR_syscalls 332
 
 /* 
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 88a83366819f..624790042d41 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1832,11 +1832,11 @@ compat_sys_rt_tgsigqueueinfo_wrapper:
 	llgtr	%r5,%r5			# struct compat_siginfo *
 	jg	compat_sys_rt_tgsigqueueinfo_wrapper # branch to system call
 
-	.globl	sys_perf_counter_open_wrapper
-sys_perf_counter_open_wrapper:
-	llgtr	%r2,%r2			# const struct perf_counter_attr *
+	.globl	sys_perf_event_open_wrapper
+sys_perf_event_open_wrapper:
+	llgtr	%r2,%r2			# const struct perf_event_attr *
 	lgfr	%r3,%r3			# pid_t
 	lgfr	%r4,%r4			# int
 	lgfr	%r5,%r5			# int
 	llgfr	%r6,%r6			# unsigned long
-	jg	sys_perf_counter_open	# branch to system call
+	jg	sys_perf_event_open	# branch to system call
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index ad1acd200385..0b5083681e77 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -339,4 +339,4 @@ SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper)
 SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper)
 SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper)
 SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */
-SYSCALL(sys_perf_counter_open,sys_perf_counter_open,sys_perf_counter_open_wrapper)
+SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper)
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 1abbadd497e1..6d507462967a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -10,7 +10,7 @@
  *    Copyright (C) 1995  Linus Torvalds
  */
 
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -306,7 +306,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write)
 	 * interrupts again and then search the VMAs
 	 */
 	local_irq_enable();
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 	down_read(&mm->mmap_sem);
 
 	si_code = SEGV_MAPERR;
@@ -366,11 +366,11 @@ good_area:
 	}
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
         up_read(&mm->mmap_sem);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 4df3570fe511..b940424f8ccc 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -16,7 +16,7 @@ config SUPERH
 	select HAVE_IOREMAP_PROT if MMU
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
diff --git a/arch/sh/include/asm/perf_counter.h b/arch/sh/include/asm/perf_counter.h
deleted file mode 100644
index d8e6bb9c0ccc..000000000000
--- a/arch/sh/include/asm/perf_counter.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __ASM_SH_PERF_COUNTER_H
-#define __ASM_SH_PERF_COUNTER_H
-
-/* SH only supports software counters through this interface. */
-static inline void set_perf_counter_pending(void) {}
-
-#define PERF_COUNTER_INDEX_OFFSET	0
-
-#endif /* __ASM_SH_PERF_COUNTER_H */
diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h
new file mode 100644
index 000000000000..11a302297ab7
--- /dev/null
+++ b/arch/sh/include/asm/perf_event.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_SH_PERF_EVENT_H
+#define __ASM_SH_PERF_EVENT_H
+
+/* SH only supports software events through this interface. */
+static inline void set_perf_event_pending(void) {}
+
+#define PERF_EVENT_INDEX_OFFSET	0
+
+#endif /* __ASM_SH_PERF_EVENT_H */
diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index 925dd40d9d55..f3fd1b9eb6b1 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h
@@ -344,7 +344,7 @@
 #define __NR_preadv		333
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
-#define __NR_perf_counter_open	336
+#define __NR_perf_event_open	336
 
 #define NR_syscalls 337
 
diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index 2b84bc916bc5..343ce8f073ea 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h
@@ -384,7 +384,7 @@
 #define __NR_preadv		361
 #define __NR_pwritev		362
 #define __NR_rt_tgsigqueueinfo	363
-#define __NR_perf_counter_open	364
+#define __NR_perf_event_open	364
 
 #ifdef __KERNEL__
 
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 16ba225ede89..19fd11dd9871 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -352,4 +352,4 @@ ENTRY(sys_call_table)
 	.long sys_preadv
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index af6fb7410c21..5bfde6c77498 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -390,4 +390,4 @@ sys_call_table:
 	.long sys_preadv
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index 781b413ff82d..47530104e0ad 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -15,7 +15,7 @@
 #include <linux/mm.h>
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/io_trapped.h>
 #include <asm/system.h>
 #include <asm/mmu_context.h>
@@ -157,7 +157,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	if ((regs->sr & SR_IMASK) != SR_IMASK)
 		local_irq_enable();
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -208,11 +208,11 @@ survive:
 	}
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
index 2dcc48528f7a..de0b0e881823 100644
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -20,7 +20,7 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/interrupt.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -116,7 +116,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
 	/* Not an IO address, so reenable interrupts */
 	local_irq_enable();
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt or have no user
@@ -201,11 +201,11 @@ survive:
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 86b82348b97c..97fca4695e0b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -25,7 +25,7 @@ config SPARC
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select RTC_CLASS
 	select RTC_DRV_M48T59
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_API_DEBUG
 
@@ -47,7 +47,7 @@ config SPARC64
 	select RTC_DRV_BQ4802
 	select RTC_DRV_SUN4V
 	select RTC_DRV_STARFIRE
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/sparc/include/asm/perf_counter.h b/arch/sparc/include/asm/perf_counter.h
deleted file mode 100644
index 5d7a8ca0e491..000000000000
--- a/arch/sparc/include/asm/perf_counter.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __ASM_SPARC_PERF_COUNTER_H
-#define __ASM_SPARC_PERF_COUNTER_H
-
-extern void set_perf_counter_pending(void);
-
-#define	PERF_COUNTER_INDEX_OFFSET	0
-
-#ifdef CONFIG_PERF_COUNTERS
-extern void init_hw_perf_counters(void);
-#else
-static inline void init_hw_perf_counters(void)	{ }
-#endif
-
-#endif
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
new file mode 100644
index 000000000000..7e2669894ce8
--- /dev/null
+++ b/arch/sparc/include/asm/perf_event.h
@@ -0,0 +1,14 @@
+#ifndef __ASM_SPARC_PERF_EVENT_H
+#define __ASM_SPARC_PERF_EVENT_H
+
+extern void set_perf_event_pending(void);
+
+#define	PERF_EVENT_INDEX_OFFSET	0
+
+#ifdef CONFIG_PERF_EVENTS
+extern void init_hw_perf_events(void);
+#else
+static inline void init_hw_perf_events(void)	{ }
+#endif
+
+#endif
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 706df669f3b8..42f2316c3eaa 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -395,7 +395,7 @@
 #define __NR_preadv		324
 #define __NR_pwritev		325
 #define __NR_rt_tgsigqueueinfo	326
-#define __NR_perf_counter_open	327
+#define __NR_perf_event_open	327
 
 #define NR_SYSCALLS		328
 
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 247cc620cee5..3a048fad7ee2 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -104,5 +104,5 @@ obj-$(CONFIG_AUDIT)     += audit.o
 audit--$(CONFIG_AUDIT)  := compat_audit.o
 obj-$(CONFIG_COMPAT)    += $(audit--y)
 
-pc--$(CONFIG_PERF_COUNTERS) := perf_counter.o
+pc--$(CONFIG_PERF_EVENTS) := perf_event.o
 obj-$(CONFIG_SPARC64)	+= $(pc--y)
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 378eb53e0776..b129611590a4 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -19,7 +19,7 @@
 #include <linux/delay.h>
 #include <linux/smp.h>
 
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 #include <asm/ptrace.h>
 #include <asm/local.h>
 #include <asm/pcr.h>
@@ -265,7 +265,7 @@ int __init nmi_init(void)
 		}
 	}
 	if (!err)
-		init_hw_perf_counters();
+		init_hw_perf_events();
 
 	return err;
 }
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 68ff00107073..2d94e7a03af5 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -7,7 +7,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/pil.h>
 #include <asm/pcr.h>
@@ -15,7 +15,7 @@
 
 /* This code is shared between various users of the performance
  * counters.  Users will be oprofile, pseudo-NMI watchdog, and the
- * perf_counter support layer.
+ * perf_event support layer.
  */
 
 #define PCR_SUN4U_ENABLE	(PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE)
@@ -42,14 +42,14 @@ void deferred_pcr_work_irq(int irq, struct pt_regs *regs)
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
-#ifdef CONFIG_PERF_COUNTERS
-	perf_counter_do_pending();
+#ifdef CONFIG_PERF_EVENTS
+	perf_event_do_pending();
 #endif
 	irq_exit();
 	set_irq_regs(old_regs);
 }
 
-void set_perf_counter_pending(void)
+void set_perf_event_pending(void)
 {
 	set_softint(1 << PIL_DEFERRED_PCR_WORK);
 }
diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c
deleted file mode 100644
index b1265ce8a053..000000000000
--- a/arch/sparc/kernel/perf_counter.c
+++ /dev/null
@@ -1,556 +0,0 @@
-/* Performance counter support for sparc64.
- *
- * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
- *
- * This code is based almost entirely upon the x86 perf counter
- * code, which is:
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2009 Jaswinder Singh Rajput
- *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
- */
-
-#include <linux/perf_counter.h>
-#include <linux/kprobes.h>
-#include <linux/kernel.h>
-#include <linux/kdebug.h>
-#include <linux/mutex.h>
-
-#include <asm/cpudata.h>
-#include <asm/atomic.h>
-#include <asm/nmi.h>
-#include <asm/pcr.h>
-
-/* Sparc64 chips have two performance counters, 32-bits each, with
- * overflow interrupts generated on transition from 0xffffffff to 0.
- * The counters are accessed in one go using a 64-bit register.
- *
- * Both counters are controlled using a single control register.  The
- * only way to stop all sampling is to clear all of the context (user,
- * supervisor, hypervisor) sampling enable bits.  But these bits apply
- * to both counters, thus the two counters can't be enabled/disabled
- * individually.
- *
- * The control register has two event fields, one for each of the two
- * counters.  It's thus nearly impossible to have one counter going
- * while keeping the other one stopped.  Therefore it is possible to
- * get overflow interrupts for counters not currently "in use" and
- * that condition must be checked in the overflow interrupt handler.
- *
- * So we use a hack, in that we program inactive counters with the
- * "sw_count0" and "sw_count1" events.  These count how many times
- * the instruction "sethi %hi(0xfc000), %g0" is executed.  It's an
- * unusual way to encode a NOP and therefore will not trigger in
- * normal code.
- */
-
-#define MAX_HWCOUNTERS			2
-#define MAX_PERIOD			((1UL << 32) - 1)
-
-#define PIC_UPPER_INDEX			0
-#define PIC_LOWER_INDEX			1
-
-struct cpu_hw_counters {
-	struct perf_counter	*counters[MAX_HWCOUNTERS];
-	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)];
-	unsigned long		active_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)];
-	int enabled;
-};
-DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, };
-
-struct perf_event_map {
-	u16	encoding;
-	u8	pic_mask;
-#define PIC_NONE	0x00
-#define PIC_UPPER	0x01
-#define PIC_LOWER	0x02
-};
-
-struct sparc_pmu {
-	const struct perf_event_map	*(*event_map)(int);
-	int				max_events;
-	int				upper_shift;
-	int				lower_shift;
-	int				event_mask;
-	int				hv_bit;
-	int				irq_bit;
-	int				upper_nop;
-	int				lower_nop;
-};
-
-static const struct perf_event_map ultra3i_perfmon_event_map[] = {
-	[PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
-	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
-};
-
-static const struct perf_event_map *ultra3i_event_map(int event)
-{
-	return &ultra3i_perfmon_event_map[event];
-}
-
-static const struct sparc_pmu ultra3i_pmu = {
-	.event_map	= ultra3i_event_map,
-	.max_events	= ARRAY_SIZE(ultra3i_perfmon_event_map),
-	.upper_shift	= 11,
-	.lower_shift	= 4,
-	.event_mask	= 0x3f,
-	.upper_nop	= 0x1c,
-	.lower_nop	= 0x14,
-};
-
-static const struct perf_event_map niagara2_perfmon_event_map[] = {
-	[PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
-};
-
-static const struct perf_event_map *niagara2_event_map(int event)
-{
-	return &niagara2_perfmon_event_map[event];
-}
-
-static const struct sparc_pmu niagara2_pmu = {
-	.event_map	= niagara2_event_map,
-	.max_events	= ARRAY_SIZE(niagara2_perfmon_event_map),
-	.upper_shift	= 19,
-	.lower_shift	= 6,
-	.event_mask	= 0xfff,
-	.hv_bit		= 0x8,
-	.irq_bit	= 0x03,
-	.upper_nop	= 0x220,
-	.lower_nop	= 0x220,
-};
-
-static const struct sparc_pmu *sparc_pmu __read_mostly;
-
-static u64 event_encoding(u64 event, int idx)
-{
-	if (idx == PIC_UPPER_INDEX)
-		event <<= sparc_pmu->upper_shift;
-	else
-		event <<= sparc_pmu->lower_shift;
-	return event;
-}
-
-static u64 mask_for_index(int idx)
-{
-	return event_encoding(sparc_pmu->event_mask, idx);
-}
-
-static u64 nop_for_index(int idx)
-{
-	return event_encoding(idx == PIC_UPPER_INDEX ?
-			      sparc_pmu->upper_nop :
-			      sparc_pmu->lower_nop, idx);
-}
-
-static inline void sparc_pmu_enable_counter(struct hw_perf_counter *hwc,
-					    int idx)
-{
-	u64 val, mask = mask_for_index(idx);
-
-	val = pcr_ops->read();
-	pcr_ops->write((val & ~mask) | hwc->config);
-}
-
-static inline void sparc_pmu_disable_counter(struct hw_perf_counter *hwc,
-					     int idx)
-{
-	u64 mask = mask_for_index(idx);
-	u64 nop = nop_for_index(idx);
-	u64 val = pcr_ops->read();
-
-	pcr_ops->write((val & ~mask) | nop);
-}
-
-void hw_perf_enable(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val;
-	int i;
-
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
-	val = pcr_ops->read();
-
-	for (i = 0; i < MAX_HWCOUNTERS; i++) {
-		struct perf_counter *cp = cpuc->counters[i];
-		struct hw_perf_counter *hwc;
-
-		if (!cp)
-			continue;
-		hwc = &cp->hw;
-		val |= hwc->config_base;
-	}
-
-	pcr_ops->write(val);
-}
-
-void hw_perf_disable(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val;
-
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-
-	val = pcr_ops->read();
-	val &= ~(PCR_UTRACE | PCR_STRACE |
-		 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
-	pcr_ops->write(val);
-}
-
-static u32 read_pmc(int idx)
-{
-	u64 val;
-
-	read_pic(val);
-	if (idx == PIC_UPPER_INDEX)
-		val >>= 32;
-
-	return val & 0xffffffff;
-}
-
-static void write_pmc(int idx, u64 val)
-{
-	u64 shift, mask, pic;
-
-	shift = 0;
-	if (idx == PIC_UPPER_INDEX)
-		shift = 32;
-
-	mask = ((u64) 0xffffffff) << shift;
-	val <<= shift;
-
-	read_pic(pic);
-	pic &= ~mask;
-	pic |= val;
-	write_pic(pic);
-}
-
-static int sparc_perf_counter_set_period(struct perf_counter *counter,
-					 struct hw_perf_counter *hwc, int idx)
-{
-	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->sample_period;
-	int ret = 0;
-
-	if (unlikely(left <= -period)) {
-		left = period;
-		atomic64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	if (unlikely(left <= 0)) {
-		left += period;
-		atomic64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-	if (left > MAX_PERIOD)
-		left = MAX_PERIOD;
-
-	atomic64_set(&hwc->prev_count, (u64)-left);
-
-	write_pmc(idx, (u64)(-left) & 0xffffffff);
-
-	perf_counter_update_userpage(counter);
-
-	return ret;
-}
-
-static int sparc_pmu_enable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx = hwc->idx;
-
-	if (test_and_set_bit(idx, cpuc->used_mask))
-		return -EAGAIN;
-
-	sparc_pmu_disable_counter(hwc, idx);
-
-	cpuc->counters[idx] = counter;
-	set_bit(idx, cpuc->active_mask);
-
-	sparc_perf_counter_set_period(counter, hwc, idx);
-	sparc_pmu_enable_counter(hwc, idx);
-	perf_counter_update_userpage(counter);
-	return 0;
-}
-
-static u64 sparc_perf_counter_update(struct perf_counter *counter,
-				     struct hw_perf_counter *hwc, int idx)
-{
-	int shift = 64 - 32;
-	u64 prev_raw_count, new_raw_count;
-	s64 delta;
-
-again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
-	new_raw_count = read_pmc(idx);
-
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
-			     new_raw_count) != prev_raw_count)
-		goto again;
-
-	delta = (new_raw_count << shift) - (prev_raw_count << shift);
-	delta >>= shift;
-
-	atomic64_add(delta, &counter->count);
-	atomic64_sub(delta, &hwc->period_left);
-
-	return new_raw_count;
-}
-
-static void sparc_pmu_disable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx = hwc->idx;
-
-	clear_bit(idx, cpuc->active_mask);
-	sparc_pmu_disable_counter(hwc, idx);
-
-	barrier();
-
-	sparc_perf_counter_update(counter, hwc, idx);
-	cpuc->counters[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
-
-	perf_counter_update_userpage(counter);
-}
-
-static void sparc_pmu_read(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	sparc_perf_counter_update(counter, hwc, hwc->idx);
-}
-
-static void sparc_pmu_unthrottle(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	sparc_pmu_enable_counter(hwc, hwc->idx);
-}
-
-static atomic_t active_counters = ATOMIC_INIT(0);
-static DEFINE_MUTEX(pmc_grab_mutex);
-
-void perf_counter_grab_pmc(void)
-{
-	if (atomic_inc_not_zero(&active_counters))
-		return;
-
-	mutex_lock(&pmc_grab_mutex);
-	if (atomic_read(&active_counters) == 0) {
-		if (atomic_read(&nmi_active) > 0) {
-			on_each_cpu(stop_nmi_watchdog, NULL, 1);
-			BUG_ON(atomic_read(&nmi_active) != 0);
-		}
-		atomic_inc(&active_counters);
-	}
-	mutex_unlock(&pmc_grab_mutex);
-}
-
-void perf_counter_release_pmc(void)
-{
-	if (atomic_dec_and_mutex_lock(&active_counters, &pmc_grab_mutex)) {
-		if (atomic_read(&nmi_active) == 0)
-			on_each_cpu(start_nmi_watchdog, NULL, 1);
-		mutex_unlock(&pmc_grab_mutex);
-	}
-}
-
-static void hw_perf_counter_destroy(struct perf_counter *counter)
-{
-	perf_counter_release_pmc();
-}
-
-static int __hw_perf_counter_init(struct perf_counter *counter)
-{
-	struct perf_counter_attr *attr = &counter->attr;
-	struct hw_perf_counter *hwc = &counter->hw;
-	const struct perf_event_map *pmap;
-	u64 enc;
-
-	if (atomic_read(&nmi_active) < 0)
-		return -ENODEV;
-
-	if (attr->type != PERF_TYPE_HARDWARE)
-		return -EOPNOTSUPP;
-
-	if (attr->config >= sparc_pmu->max_events)
-		return -EINVAL;
-
-	perf_counter_grab_pmc();
-	counter->destroy = hw_perf_counter_destroy;
-
-	/* We save the enable bits in the config_base.  So to
-	 * turn off sampling just write 'config', and to enable
-	 * things write 'config | config_base'.
-	 */
-	hwc->config_base = sparc_pmu->irq_bit;
-	if (!attr->exclude_user)
-		hwc->config_base |= PCR_UTRACE;
-	if (!attr->exclude_kernel)
-		hwc->config_base |= PCR_STRACE;
-	if (!attr->exclude_hv)
-		hwc->config_base |= sparc_pmu->hv_bit;
-
-	if (!hwc->sample_period) {
-		hwc->sample_period = MAX_PERIOD;
-		hwc->last_period = hwc->sample_period;
-		atomic64_set(&hwc->period_left, hwc->sample_period);
-	}
-
-	pmap = sparc_pmu->event_map(attr->config);
-
-	enc = pmap->encoding;
-	if (pmap->pic_mask & PIC_UPPER) {
-		hwc->idx = PIC_UPPER_INDEX;
-		enc <<= sparc_pmu->upper_shift;
-	} else {
-		hwc->idx = PIC_LOWER_INDEX;
-		enc <<= sparc_pmu->lower_shift;
-	}
-
-	hwc->config |= enc;
-	return 0;
-}
-
-static const struct pmu pmu = {
-	.enable		= sparc_pmu_enable,
-	.disable	= sparc_pmu_disable,
-	.read		= sparc_pmu_read,
-	.unthrottle	= sparc_pmu_unthrottle,
-};
-
-const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
-{
-	int err = __hw_perf_counter_init(counter);
-
-	if (err)
-		return ERR_PTR(err);
-	return &pmu;
-}
-
-void perf_counter_print_debug(void)
-{
-	unsigned long flags;
-	u64 pcr, pic;
-	int cpu;
-
-	if (!sparc_pmu)
-		return;
-
-	local_irq_save(flags);
-
-	cpu = smp_processor_id();
-
-	pcr = pcr_ops->read();
-	read_pic(pic);
-
-	pr_info("\n");
-	pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
-		cpu, pcr, pic);
-
-	local_irq_restore(flags);
-}
-
-static int __kprobes perf_counter_nmi_handler(struct notifier_block *self,
-					      unsigned long cmd, void *__args)
-{
-	struct die_args *args = __args;
-	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	struct pt_regs *regs;
-	int idx;
-
-	if (!atomic_read(&active_counters))
-		return NOTIFY_DONE;
-
-	switch (cmd) {
-	case DIE_NMI:
-		break;
-
-	default:
-		return NOTIFY_DONE;
-	}
-
-	regs = args->regs;
-
-	data.addr = 0;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-	for (idx = 0; idx < MAX_HWCOUNTERS; idx++) {
-		struct perf_counter *counter = cpuc->counters[idx];
-		struct hw_perf_counter *hwc;
-		u64 val;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-		hwc = &counter->hw;
-		val = sparc_perf_counter_update(counter, hwc, idx);
-		if (val & (1ULL << 31))
-			continue;
-
-		data.period = counter->hw.last_period;
-		if (!sparc_perf_counter_set_period(counter, hwc, idx))
-			continue;
-
-		if (perf_counter_overflow(counter, 1, &data, regs))
-			sparc_pmu_disable_counter(hwc, idx);
-	}
-
-	return NOTIFY_STOP;
-}
-
-static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
-	.notifier_call		= perf_counter_nmi_handler,
-};
-
-static bool __init supported_pmu(void)
-{
-	if (!strcmp(sparc_pmu_type, "ultra3i")) {
-		sparc_pmu = &ultra3i_pmu;
-		return true;
-	}
-	if (!strcmp(sparc_pmu_type, "niagara2")) {
-		sparc_pmu = &niagara2_pmu;
-		return true;
-	}
-	return false;
-}
-
-void __init init_hw_perf_counters(void)
-{
-	pr_info("Performance counters: ");
-
-	if (!supported_pmu()) {
-		pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
-		return;
-	}
-
-	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
-
-	/* All sparc64 PMUs currently have 2 counters.  But this simple
-	 * driver only supports one active counter at a time.
-	 */
-	perf_max_counters = 1;
-
-	register_die_notifier(&perf_counter_nmi_notifier);
-}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
new file mode 100644
index 000000000000..2d6a1b10c81d
--- /dev/null
+++ b/arch/sparc/kernel/perf_event.c
@@ -0,0 +1,556 @@
+/* Performance event support for sparc64.
+ *
+ * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
+ *
+ * This code is based almost entirely upon the x86 perf event
+ * code, which is:
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ */
+
+#include <linux/perf_event.h>
+#include <linux/kprobes.h>
+#include <linux/kernel.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+
+#include <asm/cpudata.h>
+#include <asm/atomic.h>
+#include <asm/nmi.h>
+#include <asm/pcr.h>
+
+/* Sparc64 chips have two performance counters, 32-bits each, with
+ * overflow interrupts generated on transition from 0xffffffff to 0.
+ * The counters are accessed in one go using a 64-bit register.
+ *
+ * Both counters are controlled using a single control register.  The
+ * only way to stop all sampling is to clear all of the context (user,
+ * supervisor, hypervisor) sampling enable bits.  But these bits apply
+ * to both counters, thus the two counters can't be enabled/disabled
+ * individually.
+ *
+ * The control register has two event fields, one for each of the two
+ * counters.  It's thus nearly impossible to have one counter going
+ * while keeping the other one stopped.  Therefore it is possible to
+ * get overflow interrupts for counters not currently "in use" and
+ * that condition must be checked in the overflow interrupt handler.
+ *
+ * So we use a hack, in that we program inactive counters with the
+ * "sw_count0" and "sw_count1" events.  These count how many times
+ * the instruction "sethi %hi(0xfc000), %g0" is executed.  It's an
+ * unusual way to encode a NOP and therefore will not trigger in
+ * normal code.
+ */
+
+#define MAX_HWEVENTS			2
+#define MAX_PERIOD			((1UL << 32) - 1)
+
+#define PIC_UPPER_INDEX			0
+#define PIC_LOWER_INDEX			1
+
+struct cpu_hw_events {
+	struct perf_event	*events[MAX_HWEVENTS];
+	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+	unsigned long		active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+	int enabled;
+};
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
+
+struct perf_event_map {
+	u16	encoding;
+	u8	pic_mask;
+#define PIC_NONE	0x00
+#define PIC_UPPER	0x01
+#define PIC_LOWER	0x02
+};
+
+struct sparc_pmu {
+	const struct perf_event_map	*(*event_map)(int);
+	int				max_events;
+	int				upper_shift;
+	int				lower_shift;
+	int				event_mask;
+	int				hv_bit;
+	int				irq_bit;
+	int				upper_nop;
+	int				lower_nop;
+};
+
+static const struct perf_event_map ultra3i_perfmon_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
+};
+
+static const struct perf_event_map *ultra3i_event_map(int event_id)
+{
+	return &ultra3i_perfmon_event_map[event_id];
+}
+
+static const struct sparc_pmu ultra3i_pmu = {
+	.event_map	= ultra3i_event_map,
+	.max_events	= ARRAY_SIZE(ultra3i_perfmon_event_map),
+	.upper_shift	= 11,
+	.lower_shift	= 4,
+	.event_mask	= 0x3f,
+	.upper_nop	= 0x1c,
+	.lower_nop	= 0x14,
+};
+
+static const struct perf_event_map niagara2_perfmon_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
+};
+
+static const struct perf_event_map *niagara2_event_map(int event_id)
+{
+	return &niagara2_perfmon_event_map[event_id];
+}
+
+static const struct sparc_pmu niagara2_pmu = {
+	.event_map	= niagara2_event_map,
+	.max_events	= ARRAY_SIZE(niagara2_perfmon_event_map),
+	.upper_shift	= 19,
+	.lower_shift	= 6,
+	.event_mask	= 0xfff,
+	.hv_bit		= 0x8,
+	.irq_bit	= 0x03,
+	.upper_nop	= 0x220,
+	.lower_nop	= 0x220,
+};
+
+static const struct sparc_pmu *sparc_pmu __read_mostly;
+
+static u64 event_encoding(u64 event_id, int idx)
+{
+	if (idx == PIC_UPPER_INDEX)
+		event_id <<= sparc_pmu->upper_shift;
+	else
+		event_id <<= sparc_pmu->lower_shift;
+	return event_id;
+}
+
+static u64 mask_for_index(int idx)
+{
+	return event_encoding(sparc_pmu->event_mask, idx);
+}
+
+static u64 nop_for_index(int idx)
+{
+	return event_encoding(idx == PIC_UPPER_INDEX ?
+			      sparc_pmu->upper_nop :
+			      sparc_pmu->lower_nop, idx);
+}
+
+static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc,
+					    int idx)
+{
+	u64 val, mask = mask_for_index(idx);
+
+	val = pcr_ops->read();
+	pcr_ops->write((val & ~mask) | hwc->config);
+}
+
+static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc,
+					     int idx)
+{
+	u64 mask = mask_for_index(idx);
+	u64 nop = nop_for_index(idx);
+	u64 val = pcr_ops->read();
+
+	pcr_ops->write((val & ~mask) | nop);
+}
+
+void hw_perf_enable(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+	int i;
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	val = pcr_ops->read();
+
+	for (i = 0; i < MAX_HWEVENTS; i++) {
+		struct perf_event *cp = cpuc->events[i];
+		struct hw_perf_event *hwc;
+
+		if (!cp)
+			continue;
+		hwc = &cp->hw;
+		val |= hwc->config_base;
+	}
+
+	pcr_ops->write(val);
+}
+
+void hw_perf_disable(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+
+	val = pcr_ops->read();
+	val &= ~(PCR_UTRACE | PCR_STRACE |
+		 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
+	pcr_ops->write(val);
+}
+
+static u32 read_pmc(int idx)
+{
+	u64 val;
+
+	read_pic(val);
+	if (idx == PIC_UPPER_INDEX)
+		val >>= 32;
+
+	return val & 0xffffffff;
+}
+
+static void write_pmc(int idx, u64 val)
+{
+	u64 shift, mask, pic;
+
+	shift = 0;
+	if (idx == PIC_UPPER_INDEX)
+		shift = 32;
+
+	mask = ((u64) 0xffffffff) << shift;
+	val <<= shift;
+
+	read_pic(pic);
+	pic &= ~mask;
+	pic |= val;
+	write_pic(pic);
+}
+
+static int sparc_perf_event_set_period(struct perf_event *event,
+					 struct hw_perf_event *hwc, int idx)
+{
+	s64 left = atomic64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+	if (left > MAX_PERIOD)
+		left = MAX_PERIOD;
+
+	atomic64_set(&hwc->prev_count, (u64)-left);
+
+	write_pmc(idx, (u64)(-left) & 0xffffffff);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+static int sparc_pmu_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (test_and_set_bit(idx, cpuc->used_mask))
+		return -EAGAIN;
+
+	sparc_pmu_disable_event(hwc, idx);
+
+	cpuc->events[idx] = event;
+	set_bit(idx, cpuc->active_mask);
+
+	sparc_perf_event_set_period(event, hwc, idx);
+	sparc_pmu_enable_event(hwc, idx);
+	perf_event_update_userpage(event);
+	return 0;
+}
+
+static u64 sparc_perf_event_update(struct perf_event *event,
+				     struct hw_perf_event *hwc, int idx)
+{
+	int shift = 64 - 32;
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+
+again:
+	prev_raw_count = atomic64_read(&hwc->prev_count);
+	new_raw_count = read_pmc(idx);
+
+	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	atomic64_add(delta, &event->count);
+	atomic64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static void sparc_pmu_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	clear_bit(idx, cpuc->active_mask);
+	sparc_pmu_disable_event(hwc, idx);
+
+	barrier();
+
+	sparc_perf_event_update(event, hwc, idx);
+	cpuc->events[idx] = NULL;
+	clear_bit(idx, cpuc->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+static void sparc_pmu_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	sparc_perf_event_update(event, hwc, hwc->idx);
+}
+
+static void sparc_pmu_unthrottle(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	sparc_pmu_enable_event(hwc, hwc->idx);
+}
+
+static atomic_t active_events = ATOMIC_INIT(0);
+static DEFINE_MUTEX(pmc_grab_mutex);
+
+void perf_event_grab_pmc(void)
+{
+	if (atomic_inc_not_zero(&active_events))
+		return;
+
+	mutex_lock(&pmc_grab_mutex);
+	if (atomic_read(&active_events) == 0) {
+		if (atomic_read(&nmi_active) > 0) {
+			on_each_cpu(stop_nmi_watchdog, NULL, 1);
+			BUG_ON(atomic_read(&nmi_active) != 0);
+		}
+		atomic_inc(&active_events);
+	}
+	mutex_unlock(&pmc_grab_mutex);
+}
+
+void perf_event_release_pmc(void)
+{
+	if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) {
+		if (atomic_read(&nmi_active) == 0)
+			on_each_cpu(start_nmi_watchdog, NULL, 1);
+		mutex_unlock(&pmc_grab_mutex);
+	}
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	perf_event_release_pmc();
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	const struct perf_event_map *pmap;
+	u64 enc;
+
+	if (atomic_read(&nmi_active) < 0)
+		return -ENODEV;
+
+	if (attr->type != PERF_TYPE_HARDWARE)
+		return -EOPNOTSUPP;
+
+	if (attr->config >= sparc_pmu->max_events)
+		return -EINVAL;
+
+	perf_event_grab_pmc();
+	event->destroy = hw_perf_event_destroy;
+
+	/* We save the enable bits in the config_base.  So to
+	 * turn off sampling just write 'config', and to enable
+	 * things write 'config | config_base'.
+	 */
+	hwc->config_base = sparc_pmu->irq_bit;
+	if (!attr->exclude_user)
+		hwc->config_base |= PCR_UTRACE;
+	if (!attr->exclude_kernel)
+		hwc->config_base |= PCR_STRACE;
+	if (!attr->exclude_hv)
+		hwc->config_base |= sparc_pmu->hv_bit;
+
+	if (!hwc->sample_period) {
+		hwc->sample_period = MAX_PERIOD;
+		hwc->last_period = hwc->sample_period;
+		atomic64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	pmap = sparc_pmu->event_map(attr->config);
+
+	enc = pmap->encoding;
+	if (pmap->pic_mask & PIC_UPPER) {
+		hwc->idx = PIC_UPPER_INDEX;
+		enc <<= sparc_pmu->upper_shift;
+	} else {
+		hwc->idx = PIC_LOWER_INDEX;
+		enc <<= sparc_pmu->lower_shift;
+	}
+
+	hwc->config |= enc;
+	return 0;
+}
+
+static const struct pmu pmu = {
+	.enable		= sparc_pmu_enable,
+	.disable	= sparc_pmu_disable,
+	.read		= sparc_pmu_read,
+	.unthrottle	= sparc_pmu_unthrottle,
+};
+
+const struct pmu *hw_perf_event_init(struct perf_event *event)
+{
+	int err = __hw_perf_event_init(event);
+
+	if (err)
+		return ERR_PTR(err);
+	return &pmu;
+}
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+	u64 pcr, pic;
+	int cpu;
+
+	if (!sparc_pmu)
+		return;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+
+	pcr = pcr_ops->read();
+	read_pic(pic);
+
+	pr_info("\n");
+	pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
+		cpu, pcr, pic);
+
+	local_irq_restore(flags);
+}
+
+static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
+					      unsigned long cmd, void *__args)
+{
+	struct die_args *args = __args;
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct pt_regs *regs;
+	int idx;
+
+	if (!atomic_read(&active_events))
+		return NOTIFY_DONE;
+
+	switch (cmd) {
+	case DIE_NMI:
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	regs = args->regs;
+
+	data.addr = 0;
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+	for (idx = 0; idx < MAX_HWEVENTS; idx++) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+		u64 val;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+		hwc = &event->hw;
+		val = sparc_perf_event_update(event, hwc, idx);
+		if (val & (1ULL << 31))
+			continue;
+
+		data.period = event->hw.last_period;
+		if (!sparc_perf_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			sparc_pmu_disable_event(hwc, idx);
+	}
+
+	return NOTIFY_STOP;
+}
+
+static __read_mostly struct notifier_block perf_event_nmi_notifier = {
+	.notifier_call		= perf_event_nmi_handler,
+};
+
+static bool __init supported_pmu(void)
+{
+	if (!strcmp(sparc_pmu_type, "ultra3i")) {
+		sparc_pmu = &ultra3i_pmu;
+		return true;
+	}
+	if (!strcmp(sparc_pmu_type, "niagara2")) {
+		sparc_pmu = &niagara2_pmu;
+		return true;
+	}
+	return false;
+}
+
+void __init init_hw_perf_events(void)
+{
+	pr_info("Performance events: ");
+
+	if (!supported_pmu()) {
+		pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
+		return;
+	}
+
+	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
+
+	/* All sparc64 PMUs currently have 2 events.  But this simple
+	 * driver only supports one active event at a time.
+	 */
+	perf_max_events = 1;
+
+	register_die_notifier(&perf_event_nmi_notifier);
+}
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 04181577cb65..0f1658d37490 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -82,5 +82,5 @@ sys_call_table:
 /*310*/	.long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 /*315*/	.long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
-/*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open
+/*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open
 
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 91b06b7f7acf..009825f6e73c 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -83,7 +83,7 @@ sys_call_table32:
 /*310*/	.word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
 	.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
-	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_counter_open
+	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open
 
 #endif /* CONFIG_COMPAT */
 
@@ -158,4 +158,4 @@ sys_call_table:
 /*310*/	.word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 	.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
-	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open
+	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 51c59015b280..e4ff5d1280ca 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,7 +24,7 @@ config X86
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_IDE
 	select HAVE_OPROFILE
-	select HAVE_PERF_COUNTERS if (!M386 && !M486)
+	select HAVE_PERF_EVENTS if (!M386 && !M486)
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select ARCH_WANT_OPTIONAL_GPIOLIB
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index ba331bfd1112..74619c4f9fda 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -831,5 +831,5 @@ ia32_sys_call_table:
 	.quad compat_sys_preadv
 	.quad compat_sys_pwritev
 	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
-	.quad sys_perf_counter_open
+	.quad sys_perf_event_open
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 5e3f2044f0d3..f5693c81a1db 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,7 +49,7 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
 #endif
 
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
deleted file mode 100644
index e7b7c938ae27..000000000000
--- a/arch/x86/include/asm/perf_counter.h
+++ /dev/null
@@ -1,108 +0,0 @@
-#ifndef _ASM_X86_PERF_COUNTER_H
-#define _ASM_X86_PERF_COUNTER_H
-
-/*
- * Performance counter hw details:
- */
-
-#define X86_PMC_MAX_GENERIC					8
-#define X86_PMC_MAX_FIXED					3
-
-#define X86_PMC_IDX_GENERIC				        0
-#define X86_PMC_IDX_FIXED				       32
-#define X86_PMC_IDX_MAX					       64
-
-#define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
-#define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
-
-#define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
-#define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
-
-#define ARCH_PERFMON_EVENTSEL0_ENABLE			  (1 << 22)
-#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
-
-/*
- * Includes eventsel and unit mask as well:
- */
-#define ARCH_PERFMON_EVENT_MASK				    0xffff
-
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
-		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
-
-#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
-
-/*
- * Intel "Architectural Performance Monitoring" CPUID
- * detection/enumeration details:
- */
-union cpuid10_eax {
-	struct {
-		unsigned int version_id:8;
-		unsigned int num_counters:8;
-		unsigned int bit_width:8;
-		unsigned int mask_length:8;
-	} split;
-	unsigned int full;
-};
-
-union cpuid10_edx {
-	struct {
-		unsigned int num_counters_fixed:4;
-		unsigned int reserved:28;
-	} split;
-	unsigned int full;
-};
-
-
-/*
- * Fixed-purpose performance counters:
- */
-
-/*
- * All 3 fixed-mode PMCs are configured via this single MSR:
- */
-#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL			0x38d
-
-/*
- * The counts are available in three separate MSRs:
- */
-
-/* Instr_Retired.Any: */
-#define MSR_ARCH_PERFMON_FIXED_CTR0			0x309
-#define X86_PMC_IDX_FIXED_INSTRUCTIONS			(X86_PMC_IDX_FIXED + 0)
-
-/* CPU_CLK_Unhalted.Core: */
-#define MSR_ARCH_PERFMON_FIXED_CTR1			0x30a
-#define X86_PMC_IDX_FIXED_CPU_CYCLES			(X86_PMC_IDX_FIXED + 1)
-
-/* CPU_CLK_Unhalted.Ref: */
-#define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
-#define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
-
-/*
- * We model BTS tracing as another fixed-mode PMC.
- *
- * We choose a value in the middle of the fixed counter range, since lower
- * values are used by actual fixed counters and higher values are used
- * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
- */
-#define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)
-
-
-#ifdef CONFIG_PERF_COUNTERS
-extern void init_hw_perf_counters(void);
-extern void perf_counters_lapic_init(void);
-
-#define PERF_COUNTER_INDEX_OFFSET			0
-
-#else
-static inline void init_hw_perf_counters(void)		{ }
-static inline void perf_counters_lapic_init(void)	{ }
-#endif
-
-#endif /* _ASM_X86_PERF_COUNTER_H */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
new file mode 100644
index 000000000000..ad7ce3fd5065
--- /dev/null
+++ b/arch/x86/include/asm/perf_event.h
@@ -0,0 +1,108 @@
+#ifndef _ASM_X86_PERF_EVENT_H
+#define _ASM_X86_PERF_EVENT_H
+
+/*
+ * Performance event hw details:
+ */
+
+#define X86_PMC_MAX_GENERIC					8
+#define X86_PMC_MAX_FIXED					3
+
+#define X86_PMC_IDX_GENERIC				        0
+#define X86_PMC_IDX_FIXED				       32
+#define X86_PMC_IDX_MAX					       64
+
+#define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
+#define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
+
+#define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
+#define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
+
+#define ARCH_PERFMON_EVENTSEL0_ENABLE			  (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
+#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
+#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
+
+/*
+ * Includes eventsel and unit mask as well:
+ */
+#define ARCH_PERFMON_EVENT_MASK				    0xffff
+
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
+		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
+
+#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
+
+/*
+ * Intel "Architectural Performance Monitoring" CPUID
+ * detection/enumeration details:
+ */
+union cpuid10_eax {
+	struct {
+		unsigned int version_id:8;
+		unsigned int num_events:8;
+		unsigned int bit_width:8;
+		unsigned int mask_length:8;
+	} split;
+	unsigned int full;
+};
+
+union cpuid10_edx {
+	struct {
+		unsigned int num_events_fixed:4;
+		unsigned int reserved:28;
+	} split;
+	unsigned int full;
+};
+
+
+/*
+ * Fixed-purpose performance events:
+ */
+
+/*
+ * All 3 fixed-mode PMCs are configured via this single MSR:
+ */
+#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL			0x38d
+
+/*
+ * The counts are available in three separate MSRs:
+ */
+
+/* Instr_Retired.Any: */
+#define MSR_ARCH_PERFMON_FIXED_CTR0			0x309
+#define X86_PMC_IDX_FIXED_INSTRUCTIONS			(X86_PMC_IDX_FIXED + 0)
+
+/* CPU_CLK_Unhalted.Core: */
+#define MSR_ARCH_PERFMON_FIXED_CTR1			0x30a
+#define X86_PMC_IDX_FIXED_CPU_CYCLES			(X86_PMC_IDX_FIXED + 1)
+
+/* CPU_CLK_Unhalted.Ref: */
+#define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
+#define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
+
+/*
+ * We model BTS tracing as another fixed-mode PMC.
+ *
+ * We choose a value in the middle of the fixed event range, since lower
+ * values are used by actual fixed events and higher values are used
+ * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
+ */
+#define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)
+
+
+#ifdef CONFIG_PERF_EVENTS
+extern void init_hw_perf_events(void);
+extern void perf_events_lapic_init(void);
+
+#define PERF_EVENT_INDEX_OFFSET			0
+
+#else
+static inline void init_hw_perf_events(void)		{ }
+static inline void perf_events_lapic_init(void)	{ }
+#endif
+
+#endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 8deaada61bc8..6fb3c209a7e3 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -341,7 +341,7 @@
 #define __NR_preadv		333
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
-#define __NR_perf_counter_open	336
+#define __NR_perf_event_open	336
 
 #ifdef __KERNEL__
 
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index b9f3c60de5f7..8d3ad0adbc68 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -659,8 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv)
 __SYSCALL(__NR_pwritev, sys_pwritev)
 #define __NR_rt_tgsigqueueinfo			297
 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
-#define __NR_perf_counter_open			298
-__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
+#define __NR_perf_event_open			298
+__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a34601f52987..754174d09deb 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -14,7 +14,7 @@
  *	Mikael Pettersson	:	PM converted to driver model.
  */
 
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
 #include <linux/acpi_pmtmr.h>
@@ -35,7 +35,7 @@
 #include <linux/smp.h>
 #include <linux/mm.h>
 
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 #include <asm/x86_init.h>
 #include <asm/pgalloc.h>
 #include <asm/atomic.h>
@@ -1189,7 +1189,7 @@ void __cpuinit setup_local_APIC(void)
 		apic_write(APIC_ESR, 0);
 	}
 #endif
-	perf_counters_lapic_init();
+	perf_events_lapic_init();
 
 	preempt_disable();
 
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 8dd30638fe44..68537e957a9b 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -27,7 +27,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR)		+= centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
-obj-$(CONFIG_PERF_COUNTERS)		+= perf_counter.o
+obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2fea97eccf77..cc25c2b4a567 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,7 +13,7 @@
 #include <linux/io.h>
 
 #include <asm/stackprotector.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 #include <asm/mmu_context.h>
 #include <asm/hypervisor.h>
 #include <asm/processor.h>
@@ -869,7 +869,7 @@ void __init identify_boot_cpu(void)
 #else
 	vgetcpu_set_mode();
 #endif
-	init_hw_perf_counters();
+	init_hw_perf_events();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
deleted file mode 100644
index b1f115696c84..000000000000
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ /dev/null
@@ -1,2298 +0,0 @@
-/*
- * Performance counter x86 architecture code
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2009 Jaswinder Singh Rajput
- *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
- *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
- *
- *  For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_counter.h>
-#include <linux/capability.h>
-#include <linux/notifier.h>
-#include <linux/hardirq.h>
-#include <linux/kprobes.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/sched.h>
-#include <linux/uaccess.h>
-#include <linux/highmem.h>
-#include <linux/cpu.h>
-
-#include <asm/apic.h>
-#include <asm/stacktrace.h>
-#include <asm/nmi.h>
-
-static u64 perf_counter_mask __read_mostly;
-
-/* The maximal number of PEBS counters: */
-#define MAX_PEBS_COUNTERS	4
-
-/* The size of a BTS record in bytes: */
-#define BTS_RECORD_SIZE		24
-
-/* The size of a per-cpu BTS buffer in bytes: */
-#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)
-
-/* The BTS overflow threshold in bytes from the end of the buffer: */
-#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)
-
-
-/*
- * Bits in the debugctlmsr controlling branch tracing.
- */
-#define X86_DEBUGCTL_TR			(1 << 6)
-#define X86_DEBUGCTL_BTS		(1 << 7)
-#define X86_DEBUGCTL_BTINT		(1 << 8)
-#define X86_DEBUGCTL_BTS_OFF_OS		(1 << 9)
-#define X86_DEBUGCTL_BTS_OFF_USR	(1 << 10)
-
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
-	u64	bts_buffer_base;
-	u64	bts_index;
-	u64	bts_absolute_maximum;
-	u64	bts_interrupt_threshold;
-	u64	pebs_buffer_base;
-	u64	pebs_index;
-	u64	pebs_absolute_maximum;
-	u64	pebs_interrupt_threshold;
-	u64	pebs_counter_reset[MAX_PEBS_COUNTERS];
-};
-
-struct cpu_hw_counters {
-	struct perf_counter	*counters[X86_PMC_IDX_MAX];
-	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	unsigned long		interrupts;
-	int			enabled;
-	struct debug_store	*ds;
-};
-
-/*
- * struct x86_pmu - generic x86 pmu
- */
-struct x86_pmu {
-	const char	*name;
-	int		version;
-	int		(*handle_irq)(struct pt_regs *);
-	void		(*disable_all)(void);
-	void		(*enable_all)(void);
-	void		(*enable)(struct hw_perf_counter *, int);
-	void		(*disable)(struct hw_perf_counter *, int);
-	unsigned	eventsel;
-	unsigned	perfctr;
-	u64		(*event_map)(int);
-	u64		(*raw_event)(u64);
-	int		max_events;
-	int		num_counters;
-	int		num_counters_fixed;
-	int		counter_bits;
-	u64		counter_mask;
-	int		apic;
-	u64		max_period;
-	u64		intel_ctrl;
-	void		(*enable_bts)(u64 config);
-	void		(*disable_bts)(void);
-};
-
-static struct x86_pmu x86_pmu __read_mostly;
-
-static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
-	.enabled = 1,
-};
-
-/*
- * Not sure about some of these
- */
-static const u64 p6_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0079,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0f2e,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x012e,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-  [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
-};
-
-static u64 p6_pmu_event_map(int hw_event)
-{
-	return p6_perfmon_event_map[hw_event];
-}
-
-/*
- * Counter setting that is specified not to count anything.
- * We use this to effectively disable a counter.
- *
- * L2_RQSTS with 0 MESI unit mask.
- */
-#define P6_NOP_COUNTER			0x0000002EULL
-
-static u64 p6_pmu_raw_event(u64 hw_event)
-{
-#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define P6_EVNTSEL_INV_MASK		0x00800000ULL
-#define P6_EVNTSEL_COUNTER_MASK		0xFF000000ULL
-
-#define P6_EVNTSEL_MASK			\
-	(P6_EVNTSEL_EVENT_MASK |	\
-	 P6_EVNTSEL_UNIT_MASK  |	\
-	 P6_EVNTSEL_EDGE_MASK  |	\
-	 P6_EVNTSEL_INV_MASK   |	\
-	 P6_EVNTSEL_COUNTER_MASK)
-
-	return hw_event & P6_EVNTSEL_MASK;
-}
-
-
-/*
- * Intel PerfMon v3. Used on Core2 and later.
- */
-static const u64 intel_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
-};
-
-static u64 intel_pmu_event_map(int hw_event)
-{
-	return intel_perfmon_event_map[hw_event];
-}
-
-/*
- * Generalized hw caching related hw_event table, filled
- * in on a per model basis. A value of 0 means
- * 'not supported', -1 means 'hw_event makes no sense on
- * this CPU', any other value means the raw hw_event
- * ID.
- */
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-
-static u64 __read_mostly hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX];
-
-static const u64 nehalem_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
-		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
-		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0x0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
-		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
-		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
-		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
-		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
-		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0x0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
-		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
-		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static const u64 core2_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
-		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
-		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
-		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static const u64 atom_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
-		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static u64 intel_pmu_raw_event(u64 hw_event)
-{
-#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
-#define CORE_EVNTSEL_COUNTER_MASK	0xFF000000ULL
-
-#define CORE_EVNTSEL_MASK		\
-	(CORE_EVNTSEL_EVENT_MASK |	\
-	 CORE_EVNTSEL_UNIT_MASK  |	\
-	 CORE_EVNTSEL_EDGE_MASK  |	\
-	 CORE_EVNTSEL_INV_MASK  |	\
-	 CORE_EVNTSEL_COUNTER_MASK)
-
-	return hw_event & CORE_EVNTSEL_MASK;
-}
-
-static const u64 amd_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
-		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
-		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
-		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
-		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
-		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-/*
- * AMD Performance Monitor K7 and later.
- */
-static const u64 amd_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-};
-
-static u64 amd_pmu_event_map(int hw_event)
-{
-	return amd_perfmon_event_map[hw_event];
-}
-
-static u64 amd_pmu_raw_event(u64 hw_event)
-{
-#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
-#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
-#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
-#define K7_EVNTSEL_INV_MASK	0x000800000ULL
-#define K7_EVNTSEL_COUNTER_MASK	0x0FF000000ULL
-
-#define K7_EVNTSEL_MASK			\
-	(K7_EVNTSEL_EVENT_MASK |	\
-	 K7_EVNTSEL_UNIT_MASK  |	\
-	 K7_EVNTSEL_EDGE_MASK  |	\
-	 K7_EVNTSEL_INV_MASK   |	\
-	 K7_EVNTSEL_COUNTER_MASK)
-
-	return hw_event & K7_EVNTSEL_MASK;
-}
-
-/*
- * Propagate counter elapsed time into the generic counter.
- * Can only be executed on the CPU where the counter is active.
- * Returns the delta events processed.
- */
-static u64
-x86_perf_counter_update(struct perf_counter *counter,
-			struct hw_perf_counter *hwc, int idx)
-{
-	int shift = 64 - x86_pmu.counter_bits;
-	u64 prev_raw_count, new_raw_count;
-	s64 delta;
-
-	if (idx == X86_PMC_IDX_FIXED_BTS)
-		return 0;
-
-	/*
-	 * Careful: an NMI might modify the previous counter value.
-	 *
-	 * Our tactic to handle this is to first atomically read and
-	 * exchange a new raw count - then add that new-prev delta
-	 * count to the generic counter atomically:
-	 */
-again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
-	rdmsrl(hwc->counter_base + idx, new_raw_count);
-
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
-					new_raw_count) != prev_raw_count)
-		goto again;
-
-	/*
-	 * Now we have the new raw value and have updated the prev
-	 * timestamp already. We can now calculate the elapsed delta
-	 * (counter-)time and add that to the generic counter.
-	 *
-	 * Careful, not all hw sign-extends above the physical width
-	 * of the count.
-	 */
-	delta = (new_raw_count << shift) - (prev_raw_count << shift);
-	delta >>= shift;
-
-	atomic64_add(delta, &counter->count);
-	atomic64_sub(delta, &hwc->period_left);
-
-	return new_raw_count;
-}
-
-static atomic_t active_counters;
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
-static bool reserve_pmc_hardware(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	int i;
-
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		disable_lapic_nmi_watchdog();
-
-	for (i = 0; i < x86_pmu.num_counters; i++) {
-		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
-			goto perfctr_fail;
-	}
-
-	for (i = 0; i < x86_pmu.num_counters; i++) {
-		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
-			goto eventsel_fail;
-	}
-#endif
-
-	return true;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-eventsel_fail:
-	for (i--; i >= 0; i--)
-		release_evntsel_nmi(x86_pmu.eventsel + i);
-
-	i = x86_pmu.num_counters;
-
-perfctr_fail:
-	for (i--; i >= 0; i--)
-		release_perfctr_nmi(x86_pmu.perfctr + i);
-
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		enable_lapic_nmi_watchdog();
-
-	return false;
-#endif
-}
-
-static void release_pmc_hardware(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	int i;
-
-	for (i = 0; i < x86_pmu.num_counters; i++) {
-		release_perfctr_nmi(x86_pmu.perfctr + i);
-		release_evntsel_nmi(x86_pmu.eventsel + i);
-	}
-
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		enable_lapic_nmi_watchdog();
-#endif
-}
-
-static inline bool bts_available(void)
-{
-	return x86_pmu.enable_bts != NULL;
-}
-
-static inline void init_debug_store_on_cpu(int cpu)
-{
-	struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
-
-	if (!ds)
-		return;
-
-	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
-		     (u32)((u64)(unsigned long)ds),
-		     (u32)((u64)(unsigned long)ds >> 32));
-}
-
-static inline void fini_debug_store_on_cpu(int cpu)
-{
-	if (!per_cpu(cpu_hw_counters, cpu).ds)
-		return;
-
-	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
-}
-
-static void release_bts_hardware(void)
-{
-	int cpu;
-
-	if (!bts_available())
-		return;
-
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		fini_debug_store_on_cpu(cpu);
-
-	for_each_possible_cpu(cpu) {
-		struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
-
-		if (!ds)
-			continue;
-
-		per_cpu(cpu_hw_counters, cpu).ds = NULL;
-
-		kfree((void *)(unsigned long)ds->bts_buffer_base);
-		kfree(ds);
-	}
-
-	put_online_cpus();
-}
-
-static int reserve_bts_hardware(void)
-{
-	int cpu, err = 0;
-
-	if (!bts_available())
-		return 0;
-
-	get_online_cpus();
-
-	for_each_possible_cpu(cpu) {
-		struct debug_store *ds;
-		void *buffer;
-
-		err = -ENOMEM;
-		buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
-		if (unlikely(!buffer))
-			break;
-
-		ds = kzalloc(sizeof(*ds), GFP_KERNEL);
-		if (unlikely(!ds)) {
-			kfree(buffer);
-			break;
-		}
-
-		ds->bts_buffer_base = (u64)(unsigned long)buffer;
-		ds->bts_index = ds->bts_buffer_base;
-		ds->bts_absolute_maximum =
-			ds->bts_buffer_base + BTS_BUFFER_SIZE;
-		ds->bts_interrupt_threshold =
-			ds->bts_absolute_maximum - BTS_OVFL_TH;
-
-		per_cpu(cpu_hw_counters, cpu).ds = ds;
-		err = 0;
-	}
-
-	if (err)
-		release_bts_hardware();
-	else {
-		for_each_online_cpu(cpu)
-			init_debug_store_on_cpu(cpu);
-	}
-
-	put_online_cpus();
-
-	return err;
-}
-
-static void hw_perf_counter_destroy(struct perf_counter *counter)
-{
-	if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
-		release_pmc_hardware();
-		release_bts_hardware();
-		mutex_unlock(&pmc_reserve_mutex);
-	}
-}
-
-static inline int x86_pmu_initialized(void)
-{
-	return x86_pmu.handle_irq != NULL;
-}
-
-static inline int
-set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
-{
-	unsigned int cache_type, cache_op, cache_result;
-	u64 config, val;
-
-	config = attr->config;
-
-	cache_type = (config >>  0) & 0xff;
-	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
-		return -EINVAL;
-
-	cache_op = (config >>  8) & 0xff;
-	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
-		return -EINVAL;
-
-	cache_result = (config >> 16) & 0xff;
-	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-		return -EINVAL;
-
-	val = hw_cache_event_ids[cache_type][cache_op][cache_result];
-
-	if (val == 0)
-		return -ENOENT;
-
-	if (val == -1)
-		return -EINVAL;
-
-	hwc->config |= val;
-
-	return 0;
-}
-
-static void intel_pmu_enable_bts(u64 config)
-{
-	unsigned long debugctlmsr;
-
-	debugctlmsr = get_debugctlmsr();
-
-	debugctlmsr |= X86_DEBUGCTL_TR;
-	debugctlmsr |= X86_DEBUGCTL_BTS;
-	debugctlmsr |= X86_DEBUGCTL_BTINT;
-
-	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
-		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
-
-	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
-		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
-
-	update_debugctlmsr(debugctlmsr);
-}
-
-static void intel_pmu_disable_bts(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	unsigned long debugctlmsr;
-
-	if (!cpuc->ds)
-		return;
-
-	debugctlmsr = get_debugctlmsr();
-
-	debugctlmsr &=
-		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
-		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
-
-	update_debugctlmsr(debugctlmsr);
-}
-
-/*
- * Setup the hardware configuration for a given attr_type
- */
-static int __hw_perf_counter_init(struct perf_counter *counter)
-{
-	struct perf_counter_attr *attr = &counter->attr;
-	struct hw_perf_counter *hwc = &counter->hw;
-	u64 config;
-	int err;
-
-	if (!x86_pmu_initialized())
-		return -ENODEV;
-
-	err = 0;
-	if (!atomic_inc_not_zero(&active_counters)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&active_counters) == 0) {
-			if (!reserve_pmc_hardware())
-				err = -EBUSY;
-			else
-				err = reserve_bts_hardware();
-		}
-		if (!err)
-			atomic_inc(&active_counters);
-		mutex_unlock(&pmc_reserve_mutex);
-	}
-	if (err)
-		return err;
-
-	counter->destroy = hw_perf_counter_destroy;
-
-	/*
-	 * Generate PMC IRQs:
-	 * (keep 'enabled' bit clear for now)
-	 */
-	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
-
-	/*
-	 * Count user and OS events unless requested not to.
-	 */
-	if (!attr->exclude_user)
-		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
-	if (!attr->exclude_kernel)
-		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
-
-	if (!hwc->sample_period) {
-		hwc->sample_period = x86_pmu.max_period;
-		hwc->last_period = hwc->sample_period;
-		atomic64_set(&hwc->period_left, hwc->sample_period);
-	} else {
-		/*
-		 * If we have a PMU initialized but no APIC
-		 * interrupts, we cannot sample hardware
-		 * counters (user-space has to fall back and
-		 * sample via a hrtimer based software counter):
-		 */
-		if (!x86_pmu.apic)
-			return -EOPNOTSUPP;
-	}
-
-	/*
-	 * Raw hw_event type provide the config in the hw_event structure
-	 */
-	if (attr->type == PERF_TYPE_RAW) {
-		hwc->config |= x86_pmu.raw_event(attr->config);
-		return 0;
-	}
-
-	if (attr->type == PERF_TYPE_HW_CACHE)
-		return set_ext_hw_attr(hwc, attr);
-
-	if (attr->config >= x86_pmu.max_events)
-		return -EINVAL;
-
-	/*
-	 * The generic map:
-	 */
-	config = x86_pmu.event_map(attr->config);
-
-	if (config == 0)
-		return -ENOENT;
-
-	if (config == -1LL)
-		return -EINVAL;
-
-	/*
-	 * Branch tracing:
-	 */
-	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
-	    (hwc->sample_period == 1)) {
-		/* BTS is not supported by this architecture. */
-		if (!bts_available())
-			return -EOPNOTSUPP;
-
-		/* BTS is currently only allowed for user-mode. */
-		if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
-			return -EOPNOTSUPP;
-	}
-
-	hwc->config |= config;
-
-	return 0;
-}
-
-static void p6_pmu_disable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val;
-
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	barrier();
-
-	/* p6 only has one enable register */
-	rdmsrl(MSR_P6_EVNTSEL0, val);
-	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static void intel_pmu_disable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	barrier();
-
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
-		intel_pmu_disable_bts();
-}
-
-static void amd_pmu_disable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	int idx;
-
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	/*
-	 * ensure we write the disable before we start disabling the
-	 * counters proper, so that amd_pmu_enable_counter() does the
-	 * right thing.
-	 */
-	barrier();
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		u64 val;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
-		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
-			continue;
-		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
-		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
-	}
-}
-
-void hw_perf_disable(void)
-{
-	if (!x86_pmu_initialized())
-		return;
-	return x86_pmu.disable_all();
-}
-
-static void p6_pmu_enable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	unsigned long val;
-
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
-	/* p6 only has one enable register */
-	rdmsrl(MSR_P6_EVNTSEL0, val);
-	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static void intel_pmu_enable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
-
-	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
-		struct perf_counter *counter =
-			cpuc->counters[X86_PMC_IDX_FIXED_BTS];
-
-		if (WARN_ON_ONCE(!counter))
-			return;
-
-		intel_pmu_enable_bts(counter->hw.config);
-	}
-}
-
-static void amd_pmu_enable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	int idx;
-
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		struct perf_counter *counter = cpuc->counters[idx];
-		u64 val;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		val = counter->hw.config;
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
-	}
-}
-
-void hw_perf_enable(void)
-{
-	if (!x86_pmu_initialized())
-		return;
-	x86_pmu.enable_all();
-}
-
-static inline u64 intel_pmu_get_status(void)
-{
-	u64 status;
-
-	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-
-	return status;
-}
-
-static inline void intel_pmu_ack_status(u64 ack)
-{
-	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
-}
-
-static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	(void)checking_wrmsrl(hwc->config_base + idx,
-			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
-}
-
-static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	(void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
-}
-
-static inline void
-intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
-{
-	int idx = __idx - X86_PMC_IDX_FIXED;
-	u64 ctrl_val, mask;
-
-	mask = 0xfULL << (idx * 4);
-
-	rdmsrl(hwc->config_base, ctrl_val);
-	ctrl_val &= ~mask;
-	(void)checking_wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static inline void
-p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val = P6_NOP_COUNTER;
-
-	if (cpuc->enabled)
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-
-	(void)checking_wrmsrl(hwc->config_base + idx, val);
-}
-
-static inline void
-intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		intel_pmu_disable_bts();
-		return;
-	}
-
-	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-		intel_pmu_disable_fixed(hwc, idx);
-		return;
-	}
-
-	x86_pmu_disable_counter(hwc, idx);
-}
-
-static inline void
-amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	x86_pmu_disable_counter(hwc, idx);
-}
-
-static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
-
-/*
- * Set the next IRQ period, based on the hwc->period_left value.
- * To be called with the counter disabled in hw:
- */
-static int
-x86_perf_counter_set_period(struct perf_counter *counter,
-			     struct hw_perf_counter *hwc, int idx)
-{
-	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->sample_period;
-	int err, ret = 0;
-
-	if (idx == X86_PMC_IDX_FIXED_BTS)
-		return 0;
-
-	/*
-	 * If we are way outside a reasoable range then just skip forward:
-	 */
-	if (unlikely(left <= -period)) {
-		left = period;
-		atomic64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	if (unlikely(left <= 0)) {
-		left += period;
-		atomic64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-	/*
-	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
-	 */
-	if (unlikely(left < 2))
-		left = 2;
-
-	if (left > x86_pmu.max_period)
-		left = x86_pmu.max_period;
-
-	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
-
-	/*
-	 * The hw counter starts counting from this counter offset,
-	 * mark it to be able to extra future deltas:
-	 */
-	atomic64_set(&hwc->prev_count, (u64)-left);
-
-	err = checking_wrmsrl(hwc->counter_base + idx,
-			     (u64)(-left) & x86_pmu.counter_mask);
-
-	perf_counter_update_userpage(counter);
-
-	return ret;
-}
-
-static inline void
-intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
-{
-	int idx = __idx - X86_PMC_IDX_FIXED;
-	u64 ctrl_val, bits, mask;
-	int err;
-
-	/*
-	 * Enable IRQ generation (0x8),
-	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
-	 * if requested:
-	 */
-	bits = 0x8ULL;
-	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
-		bits |= 0x2;
-	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
-		bits |= 0x1;
-	bits <<= (idx * 4);
-	mask = 0xfULL << (idx * 4);
-
-	rdmsrl(hwc->config_base, ctrl_val);
-	ctrl_val &= ~mask;
-	ctrl_val |= bits;
-	err = checking_wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val;
-
-	val = hwc->config;
-	if (cpuc->enabled)
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-
-	(void)checking_wrmsrl(hwc->config_base + idx, val);
-}
-
-
-static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		if (!__get_cpu_var(cpu_hw_counters).enabled)
-			return;
-
-		intel_pmu_enable_bts(hwc->config);
-		return;
-	}
-
-	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-		intel_pmu_enable_fixed(hwc, idx);
-		return;
-	}
-
-	x86_pmu_enable_counter(hwc, idx);
-}
-
-static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	if (cpuc->enabled)
-		x86_pmu_enable_counter(hwc, idx);
-}
-
-static int
-fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
-{
-	unsigned int hw_event;
-
-	hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
-
-	if (unlikely((hw_event ==
-		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
-		     (hwc->sample_period == 1)))
-		return X86_PMC_IDX_FIXED_BTS;
-
-	if (!x86_pmu.num_counters_fixed)
-		return -1;
-
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
-		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
-		return X86_PMC_IDX_FIXED_CPU_CYCLES;
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
-		return X86_PMC_IDX_FIXED_BUS_CYCLES;
-
-	return -1;
-}
-
-/*
- * Find a PMC slot for the freshly enabled / scheduled in counter:
- */
-static int x86_pmu_enable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx;
-
-	idx = fixed_mode_idx(counter, hwc);
-	if (idx == X86_PMC_IDX_FIXED_BTS) {
-		/* BTS is already occupied. */
-		if (test_and_set_bit(idx, cpuc->used_mask))
-			return -EAGAIN;
-
-		hwc->config_base	= 0;
-		hwc->counter_base	= 0;
-		hwc->idx		= idx;
-	} else if (idx >= 0) {
-		/*
-		 * Try to get the fixed counter, if that is already taken
-		 * then try to get a generic counter:
-		 */
-		if (test_and_set_bit(idx, cpuc->used_mask))
-			goto try_generic;
-
-		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-		/*
-		 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
-		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
-		 */
-		hwc->counter_base =
-			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
-		hwc->idx = idx;
-	} else {
-		idx = hwc->idx;
-		/* Try to get the previous generic counter again */
-		if (test_and_set_bit(idx, cpuc->used_mask)) {
-try_generic:
-			idx = find_first_zero_bit(cpuc->used_mask,
-						  x86_pmu.num_counters);
-			if (idx == x86_pmu.num_counters)
-				return -EAGAIN;
-
-			set_bit(idx, cpuc->used_mask);
-			hwc->idx = idx;
-		}
-		hwc->config_base  = x86_pmu.eventsel;
-		hwc->counter_base = x86_pmu.perfctr;
-	}
-
-	perf_counters_lapic_init();
-
-	x86_pmu.disable(hwc, idx);
-
-	cpuc->counters[idx] = counter;
-	set_bit(idx, cpuc->active_mask);
-
-	x86_perf_counter_set_period(counter, hwc, idx);
-	x86_pmu.enable(hwc, idx);
-
-	perf_counter_update_userpage(counter);
-
-	return 0;
-}
-
-static void x86_pmu_unthrottle(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-
-	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
-				cpuc->counters[hwc->idx] != counter))
-		return;
-
-	x86_pmu.enable(hwc, hwc->idx);
-}
-
-void perf_counter_print_debug(void)
-{
-	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
-	struct cpu_hw_counters *cpuc;
-	unsigned long flags;
-	int cpu, idx;
-
-	if (!x86_pmu.num_counters)
-		return;
-
-	local_irq_save(flags);
-
-	cpu = smp_processor_id();
-	cpuc = &per_cpu(cpu_hw_counters, cpu);
-
-	if (x86_pmu.version >= 2) {
-		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
-		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
-		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
-
-		pr_info("\n");
-		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
-		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
-		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
-		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
-	}
-	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
-		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
-
-		prev_left = per_cpu(pmc_prev_left[idx], cpu);
-
-		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
-			cpu, idx, pmc_ctrl);
-		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
-			cpu, idx, pmc_count);
-		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
-			cpu, idx, prev_left);
-	}
-	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
-		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
-
-		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
-			cpu, idx, pmc_count);
-	}
-	local_irq_restore(flags);
-}
-
-static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc)
-{
-	struct debug_store *ds = cpuc->ds;
-	struct bts_record {
-		u64	from;
-		u64	to;
-		u64	flags;
-	};
-	struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
-	struct bts_record *at, *top;
-	struct perf_output_handle handle;
-	struct perf_event_header header;
-	struct perf_sample_data data;
-	struct pt_regs regs;
-
-	if (!counter)
-		return;
-
-	if (!ds)
-		return;
-
-	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
-	top = (struct bts_record *)(unsigned long)ds->bts_index;
-
-	if (top <= at)
-		return;
-
-	ds->bts_index = ds->bts_buffer_base;
-
-
-	data.period	= counter->hw.last_period;
-	data.addr	= 0;
-	regs.ip		= 0;
-
-	/*
-	 * Prepare a generic sample, i.e. fill in the invariant fields.
-	 * We will overwrite the from and to address before we output
-	 * the sample.
-	 */
-	perf_prepare_sample(&header, &data, counter, &regs);
-
-	if (perf_output_begin(&handle, counter,
-			      header.size * (top - at), 1, 1))
-		return;
-
-	for (; at < top; at++) {
-		data.ip		= at->from;
-		data.addr	= at->to;
-
-		perf_output_sample(&handle, &header, &data, counter);
-	}
-
-	perf_output_end(&handle);
-
-	/* There's new data available. */
-	counter->hw.interrupts++;
-	counter->pending_kill = POLL_IN;
-}
-
-static void x86_pmu_disable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx = hwc->idx;
-
-	/*
-	 * Must be done before we disable, otherwise the nmi handler
-	 * could reenable again:
-	 */
-	clear_bit(idx, cpuc->active_mask);
-	x86_pmu.disable(hwc, idx);
-
-	/*
-	 * Make sure the cleared pointer becomes visible before we
-	 * (potentially) free the counter:
-	 */
-	barrier();
-
-	/*
-	 * Drain the remaining delta count out of a counter
-	 * that we are disabling:
-	 */
-	x86_perf_counter_update(counter, hwc, idx);
-
-	/* Drain the remaining BTS records. */
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
-		intel_pmu_drain_bts_buffer(cpuc);
-
-	cpuc->counters[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
-
-	perf_counter_update_userpage(counter);
-}
-
-/*
- * Save and restart an expired counter. Called by NMI contexts,
- * so it has to be careful about preempting normal counter ops:
- */
-static int intel_pmu_save_and_restart(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx = hwc->idx;
-	int ret;
-
-	x86_perf_counter_update(counter, hwc, idx);
-	ret = x86_perf_counter_set_period(counter, hwc, idx);
-
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-		intel_pmu_enable_counter(hwc, idx);
-
-	return ret;
-}
-
-static void intel_pmu_reset(void)
-{
-	struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds;
-	unsigned long flags;
-	int idx;
-
-	if (!x86_pmu.num_counters)
-		return;
-
-	local_irq_save(flags);
-
-	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
-		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
-	}
-	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
-		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
-	}
-	if (ds)
-		ds->bts_index = ds->bts_buffer_base;
-
-	local_irq_restore(flags);
-}
-
-static int p6_pmu_handle_irq(struct pt_regs *regs)
-{
-	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
-	int idx, handled = 0;
-	u64 val;
-
-	data.addr = 0;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		counter = cpuc->counters[idx];
-		hwc = &counter->hw;
-
-		val = x86_perf_counter_update(counter, hwc, idx);
-		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
-			continue;
-
-		/*
-		 * counter overflow
-		 */
-		handled		= 1;
-		data.period	= counter->hw.last_period;
-
-		if (!x86_perf_counter_set_period(counter, hwc, idx))
-			continue;
-
-		if (perf_counter_overflow(counter, 1, &data, regs))
-			p6_pmu_disable_counter(hwc, idx);
-	}
-
-	if (handled)
-		inc_irq_stat(apic_perf_irqs);
-
-	return handled;
-}
-
-/*
- * This handler is triggered by the local APIC, so the APIC IRQ handling
- * rules apply:
- */
-static int intel_pmu_handle_irq(struct pt_regs *regs)
-{
-	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	int bit, loops;
-	u64 ack, status;
-
-	data.addr = 0;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	perf_disable();
-	intel_pmu_drain_bts_buffer(cpuc);
-	status = intel_pmu_get_status();
-	if (!status) {
-		perf_enable();
-		return 0;
-	}
-
-	loops = 0;
-again:
-	if (++loops > 100) {
-		WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
-		perf_counter_print_debug();
-		intel_pmu_reset();
-		perf_enable();
-		return 1;
-	}
-
-	inc_irq_stat(apic_perf_irqs);
-	ack = status;
-	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
-		struct perf_counter *counter = cpuc->counters[bit];
-
-		clear_bit(bit, (unsigned long *) &status);
-		if (!test_bit(bit, cpuc->active_mask))
-			continue;
-
-		if (!intel_pmu_save_and_restart(counter))
-			continue;
-
-		data.period = counter->hw.last_period;
-
-		if (perf_counter_overflow(counter, 1, &data, regs))
-			intel_pmu_disable_counter(&counter->hw, bit);
-	}
-
-	intel_pmu_ack_status(ack);
-
-	/*
-	 * Repeat if there is more work to be done:
-	 */
-	status = intel_pmu_get_status();
-	if (status)
-		goto again;
-
-	perf_enable();
-
-	return 1;
-}
-
-static int amd_pmu_handle_irq(struct pt_regs *regs)
-{
-	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
-	int idx, handled = 0;
-	u64 val;
-
-	data.addr = 0;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		counter = cpuc->counters[idx];
-		hwc = &counter->hw;
-
-		val = x86_perf_counter_update(counter, hwc, idx);
-		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
-			continue;
-
-		/*
-		 * counter overflow
-		 */
-		handled		= 1;
-		data.period	= counter->hw.last_period;
-
-		if (!x86_perf_counter_set_period(counter, hwc, idx))
-			continue;
-
-		if (perf_counter_overflow(counter, 1, &data, regs))
-			amd_pmu_disable_counter(hwc, idx);
-	}
-
-	if (handled)
-		inc_irq_stat(apic_perf_irqs);
-
-	return handled;
-}
-
-void smp_perf_pending_interrupt(struct pt_regs *regs)
-{
-	irq_enter();
-	ack_APIC_irq();
-	inc_irq_stat(apic_pending_irqs);
-	perf_counter_do_pending();
-	irq_exit();
-}
-
-void set_perf_counter_pending(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
-#endif
-}
-
-void perf_counters_lapic_init(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	if (!x86_pmu.apic || !x86_pmu_initialized())
-		return;
-
-	/*
-	 * Always use NMI for PMU
-	 */
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-#endif
-}
-
-static int __kprobes
-perf_counter_nmi_handler(struct notifier_block *self,
-			 unsigned long cmd, void *__args)
-{
-	struct die_args *args = __args;
-	struct pt_regs *regs;
-
-	if (!atomic_read(&active_counters))
-		return NOTIFY_DONE;
-
-	switch (cmd) {
-	case DIE_NMI:
-	case DIE_NMI_IPI:
-		break;
-
-	default:
-		return NOTIFY_DONE;
-	}
-
-	regs = args->regs;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-#endif
-	/*
-	 * Can't rely on the handled return value to say it was our NMI, two
-	 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
-	 *
-	 * If the first NMI handles both, the latter will be empty and daze
-	 * the CPU.
-	 */
-	x86_pmu.handle_irq(regs);
-
-	return NOTIFY_STOP;
-}
-
-static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
-	.notifier_call		= perf_counter_nmi_handler,
-	.next			= NULL,
-	.priority		= 1
-};
-
-static struct x86_pmu p6_pmu = {
-	.name			= "p6",
-	.handle_irq		= p6_pmu_handle_irq,
-	.disable_all		= p6_pmu_disable_all,
-	.enable_all		= p6_pmu_enable_all,
-	.enable			= p6_pmu_enable_counter,
-	.disable		= p6_pmu_disable_counter,
-	.eventsel		= MSR_P6_EVNTSEL0,
-	.perfctr		= MSR_P6_PERFCTR0,
-	.event_map		= p6_pmu_event_map,
-	.raw_event		= p6_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
-	.apic			= 1,
-	.max_period		= (1ULL << 31) - 1,
-	.version		= 0,
-	.num_counters		= 2,
-	/*
-	 * Counters have 40 bits implemented. However they are designed such
-	 * that bits [32-39] are sign extensions of bit 31. As such the
-	 * effective width of a counter for P6-like PMU is 32 bits only.
-	 *
-	 * See IA-32 Intel Architecture Software developer manual Vol 3B
-	 */
-	.counter_bits		= 32,
-	.counter_mask		= (1ULL << 32) - 1,
-};
-
-static struct x86_pmu intel_pmu = {
-	.name			= "Intel",
-	.handle_irq		= intel_pmu_handle_irq,
-	.disable_all		= intel_pmu_disable_all,
-	.enable_all		= intel_pmu_enable_all,
-	.enable			= intel_pmu_enable_counter,
-	.disable		= intel_pmu_disable_counter,
-	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
-	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
-	.event_map		= intel_pmu_event_map,
-	.raw_event		= intel_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
-	.apic			= 1,
-	/*
-	 * Intel PMCs cannot be accessed sanely above 32 bit width,
-	 * so we install an artificial 1<<31 period regardless of
-	 * the generic counter period:
-	 */
-	.max_period		= (1ULL << 31) - 1,
-	.enable_bts		= intel_pmu_enable_bts,
-	.disable_bts		= intel_pmu_disable_bts,
-};
-
-static struct x86_pmu amd_pmu = {
-	.name			= "AMD",
-	.handle_irq		= amd_pmu_handle_irq,
-	.disable_all		= amd_pmu_disable_all,
-	.enable_all		= amd_pmu_enable_all,
-	.enable			= amd_pmu_enable_counter,
-	.disable		= amd_pmu_disable_counter,
-	.eventsel		= MSR_K7_EVNTSEL0,
-	.perfctr		= MSR_K7_PERFCTR0,
-	.event_map		= amd_pmu_event_map,
-	.raw_event		= amd_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		= 4,
-	.counter_bits		= 48,
-	.counter_mask		= (1ULL << 48) - 1,
-	.apic			= 1,
-	/* use highest bit to detect overflow */
-	.max_period		= (1ULL << 47) - 1,
-};
-
-static int p6_pmu_init(void)
-{
-	switch (boot_cpu_data.x86_model) {
-	case 1:
-	case 3:  /* Pentium Pro */
-	case 5:
-	case 6:  /* Pentium II */
-	case 7:
-	case 8:
-	case 11: /* Pentium III */
-		break;
-	case 9:
-	case 13:
-		/* Pentium M */
-		break;
-	default:
-		pr_cont("unsupported p6 CPU model %d ",
-			boot_cpu_data.x86_model);
-		return -ENODEV;
-	}
-
-	x86_pmu = p6_pmu;
-
-	if (!cpu_has_apic) {
-		pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
-		pr_info("no hardware sampling interrupt available.\n");
-		x86_pmu.apic = 0;
-	}
-
-	return 0;
-}
-
-static int intel_pmu_init(void)
-{
-	union cpuid10_edx edx;
-	union cpuid10_eax eax;
-	unsigned int unused;
-	unsigned int ebx;
-	int version;
-
-	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-		/* check for P6 processor family */
-	   if (boot_cpu_data.x86 == 6) {
-		return p6_pmu_init();
-	   } else {
-		return -ENODEV;
-	   }
-	}
-
-	/*
-	 * Check whether the Architectural PerfMon supports
-	 * Branch Misses Retired hw_event or not.
-	 */
-	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
-	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
-		return -ENODEV;
-
-	version = eax.split.version_id;
-	if (version < 2)
-		return -ENODEV;
-
-	x86_pmu				= intel_pmu;
-	x86_pmu.version			= version;
-	x86_pmu.num_counters		= eax.split.num_counters;
-	x86_pmu.counter_bits		= eax.split.bit_width;
-	x86_pmu.counter_mask		= (1ULL << eax.split.bit_width) - 1;
-
-	/*
-	 * Quirk: v2 perfmon does not report fixed-purpose counters, so
-	 * assume at least 3 counters:
-	 */
-	x86_pmu.num_counters_fixed	= max((int)edx.split.num_counters_fixed, 3);
-
-	/*
-	 * Install the hw-cache-events table:
-	 */
-	switch (boot_cpu_data.x86_model) {
-	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
-	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
-	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
-	case 29: /* six-core 45 nm xeon "Dunnington" */
-		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		pr_cont("Core2 events, ");
-		break;
-	default:
-	case 26:
-		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		pr_cont("Nehalem/Corei7 events, ");
-		break;
-	case 28:
-		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		pr_cont("Atom events, ");
-		break;
-	}
-	return 0;
-}
-
-static int amd_pmu_init(void)
-{
-	/* Performance-monitoring supported from K7 and later: */
-	if (boot_cpu_data.x86 < 6)
-		return -ENODEV;
-
-	x86_pmu = amd_pmu;
-
-	/* Events are common for all AMDs */
-	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
-	       sizeof(hw_cache_event_ids));
-
-	return 0;
-}
-
-void __init init_hw_perf_counters(void)
-{
-	int err;
-
-	pr_info("Performance Counters: ");
-
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_INTEL:
-		err = intel_pmu_init();
-		break;
-	case X86_VENDOR_AMD:
-		err = amd_pmu_init();
-		break;
-	default:
-		return;
-	}
-	if (err != 0) {
-		pr_cont("no PMU driver, software counters only.\n");
-		return;
-	}
-
-	pr_cont("%s PMU driver.\n", x86_pmu.name);
-
-	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
-		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
-		     x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
-		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
-	}
-	perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
-	perf_max_counters = x86_pmu.num_counters;
-
-	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
-		WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
-		     x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
-		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
-	}
-
-	perf_counter_mask |=
-		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
-	x86_pmu.intel_ctrl = perf_counter_mask;
-
-	perf_counters_lapic_init();
-	register_die_notifier(&perf_counter_nmi_notifier);
-
-	pr_info("... version:                 %d\n",     x86_pmu.version);
-	pr_info("... bit width:               %d\n",     x86_pmu.counter_bits);
-	pr_info("... generic counters:        %d\n",     x86_pmu.num_counters);
-	pr_info("... value mask:              %016Lx\n", x86_pmu.counter_mask);
-	pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
-	pr_info("... fixed-purpose counters:  %d\n",     x86_pmu.num_counters_fixed);
-	pr_info("... counter mask:            %016Lx\n", perf_counter_mask);
-}
-
-static inline void x86_pmu_read(struct perf_counter *counter)
-{
-	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
-}
-
-static const struct pmu pmu = {
-	.enable		= x86_pmu_enable,
-	.disable	= x86_pmu_disable,
-	.read		= x86_pmu_read,
-	.unthrottle	= x86_pmu_unthrottle,
-};
-
-const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
-{
-	int err;
-
-	err = __hw_perf_counter_init(counter);
-	if (err) {
-		if (counter->destroy)
-			counter->destroy(counter);
-		return ERR_PTR(err);
-	}
-
-	return &pmu;
-}
-
-/*
- * callchain support
- */
-
-static inline
-void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-	if (entry->nr < PERF_MAX_STACK_DEPTH)
-		entry->ip[entry->nr++] = ip;
-}
-
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
-static DEFINE_PER_CPU(int, in_nmi_frame);
-
-
-static void
-backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-	/* Ignore warnings */
-}
-
-static void backtrace_warning(void *data, char *msg)
-{
-	/* Ignore warnings */
-}
-
-static int backtrace_stack(void *data, char *name)
-{
-	per_cpu(in_nmi_frame, smp_processor_id()) =
-			x86_is_stack_id(NMI_STACK, name);
-
-	return 0;
-}
-
-static void backtrace_address(void *data, unsigned long addr, int reliable)
-{
-	struct perf_callchain_entry *entry = data;
-
-	if (per_cpu(in_nmi_frame, smp_processor_id()))
-		return;
-
-	if (reliable)
-		callchain_store(entry, addr);
-}
-
-static const struct stacktrace_ops backtrace_ops = {
-	.warning		= backtrace_warning,
-	.warning_symbol		= backtrace_warning_symbol,
-	.stack			= backtrace_stack,
-	.address		= backtrace_address,
-};
-
-#include "../dumpstack.h"
-
-static void
-perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
-	callchain_store(entry, regs->ip);
-
-	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
-}
-
-/*
- * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
- */
-static unsigned long
-copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
-{
-	unsigned long offset, addr = (unsigned long)from;
-	int type = in_nmi() ? KM_NMI : KM_IRQ0;
-	unsigned long size, len = 0;
-	struct page *page;
-	void *map;
-	int ret;
-
-	do {
-		ret = __get_user_pages_fast(addr, 1, 0, &page);
-		if (!ret)
-			break;
-
-		offset = addr & (PAGE_SIZE - 1);
-		size = min(PAGE_SIZE - offset, n - len);
-
-		map = kmap_atomic(page, type);
-		memcpy(to, map+offset, size);
-		kunmap_atomic(map, type);
-		put_page(page);
-
-		len  += size;
-		to   += size;
-		addr += size;
-
-	} while (len < n);
-
-	return len;
-}
-
-static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
-{
-	unsigned long bytes;
-
-	bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
-
-	return bytes == sizeof(*frame);
-}
-
-static void
-perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-	struct stack_frame frame;
-	const void __user *fp;
-
-	if (!user_mode(regs))
-		regs = task_pt_regs(current);
-
-	fp = (void __user *)regs->bp;
-
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, regs->ip);
-
-	while (entry->nr < PERF_MAX_STACK_DEPTH) {
-		frame.next_frame	     = NULL;
-		frame.return_address = 0;
-
-		if (!copy_stack_frame(fp, &frame))
-			break;
-
-		if ((unsigned long)fp < regs->sp)
-			break;
-
-		callchain_store(entry, frame.return_address);
-		fp = frame.next_frame;
-	}
-}
-
-static void
-perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-	int is_user;
-
-	if (!regs)
-		return;
-
-	is_user = user_mode(regs);
-
-	if (!current || current->pid == 0)
-		return;
-
-	if (is_user && current->state != TASK_RUNNING)
-		return;
-
-	if (!is_user)
-		perf_callchain_kernel(regs, entry);
-
-	if (current->mm)
-		perf_callchain_user(regs, entry);
-}
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-	struct perf_callchain_entry *entry;
-
-	if (in_nmi())
-		entry = &__get_cpu_var(pmc_nmi_entry);
-	else
-		entry = &__get_cpu_var(pmc_irq_entry);
-
-	entry->nr = 0;
-
-	perf_do_callchain(regs, entry);
-
-	return entry;
-}
-
-void hw_perf_counter_setup_online(int cpu)
-{
-	init_debug_store_on_cpu(cpu);
-}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
new file mode 100644
index 000000000000..0d03629fb1a5
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -0,0 +1,2298 @@
+/*
+ * Performance events x86 architecture code
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/capability.h>
+#include <linux/notifier.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/highmem.h>
+#include <linux/cpu.h>
+
+#include <asm/apic.h>
+#include <asm/stacktrace.h>
+#include <asm/nmi.h>
+
+static u64 perf_event_mask __read_mostly;
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS	4
+
+/* The size of a BTS record in bytes: */
+#define BTS_RECORD_SIZE		24
+
+/* The size of a per-cpu BTS buffer in bytes: */
+#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)
+
+/* The BTS overflow threshold in bytes from the end of the buffer: */
+#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)
+
+
+/*
+ * Bits in the debugctlmsr controlling branch tracing.
+ */
+#define X86_DEBUGCTL_TR			(1 << 6)
+#define X86_DEBUGCTL_BTS		(1 << 7)
+#define X86_DEBUGCTL_BTINT		(1 << 8)
+#define X86_DEBUGCTL_BTS_OFF_OS		(1 << 9)
+#define X86_DEBUGCTL_BTS_OFF_USR	(1 << 10)
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+	u64	bts_buffer_base;
+	u64	bts_index;
+	u64	bts_absolute_maximum;
+	u64	bts_interrupt_threshold;
+	u64	pebs_buffer_base;
+	u64	pebs_index;
+	u64	pebs_absolute_maximum;
+	u64	pebs_interrupt_threshold;
+	u64	pebs_event_reset[MAX_PEBS_EVENTS];
+};
+
+struct cpu_hw_events {
+	struct perf_event	*events[X86_PMC_IDX_MAX];
+	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long		interrupts;
+	int			enabled;
+	struct debug_store	*ds;
+};
+
+/*
+ * struct x86_pmu - generic x86 pmu
+ */
+struct x86_pmu {
+	const char	*name;
+	int		version;
+	int		(*handle_irq)(struct pt_regs *);
+	void		(*disable_all)(void);
+	void		(*enable_all)(void);
+	void		(*enable)(struct hw_perf_event *, int);
+	void		(*disable)(struct hw_perf_event *, int);
+	unsigned	eventsel;
+	unsigned	perfctr;
+	u64		(*event_map)(int);
+	u64		(*raw_event)(u64);
+	int		max_events;
+	int		num_events;
+	int		num_events_fixed;
+	int		event_bits;
+	u64		event_mask;
+	int		apic;
+	u64		max_period;
+	u64		intel_ctrl;
+	void		(*enable_bts)(u64 config);
+	void		(*disable_bts)(void);
+};
+
+static struct x86_pmu x86_pmu __read_mostly;
+
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+	.enabled = 1,
+};
+
+/*
+ * Not sure about some of these
+ */
+static const u64 p6_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0079,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x012e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
+};
+
+static u64 p6_pmu_event_map(int hw_event)
+{
+	return p6_perfmon_event_map[hw_event];
+}
+
+/*
+ * Event setting that is specified not to count anything.
+ * We use this to effectively disable a counter.
+ *
+ * L2_RQSTS with 0 MESI unit mask.
+ */
+#define P6_NOP_EVENT			0x0000002EULL
+
+static u64 p6_pmu_raw_event(u64 hw_event)
+{
+#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
+#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
+#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
+#define P6_EVNTSEL_INV_MASK		0x00800000ULL
+#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
+
+#define P6_EVNTSEL_MASK			\
+	(P6_EVNTSEL_EVENT_MASK |	\
+	 P6_EVNTSEL_UNIT_MASK  |	\
+	 P6_EVNTSEL_EDGE_MASK  |	\
+	 P6_EVNTSEL_INV_MASK   |	\
+	 P6_EVNTSEL_REG_MASK)
+
+	return hw_event & P6_EVNTSEL_MASK;
+}
+
+
+/*
+ * Intel PerfMon v3. Used on Core2 and later.
+ */
+static const u64 intel_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
+};
+
+static u64 intel_pmu_event_map(int hw_event)
+{
+	return intel_perfmon_event_map[hw_event];
+}
+
+/*
+ * Generalized hw caching related hw_event table, filled
+ * in on a per model basis. A value of 0 means
+ * 'not supported', -1 means 'hw_event makes no sense on
+ * this CPU', any other value means the raw hw_event
+ * ID.
+ */
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+static u64 __read_mostly hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+static const u64 nehalem_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static const u64 core2_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static const u64 atom_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static u64 intel_pmu_raw_event(u64 hw_event)
+{
+#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
+#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
+#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
+#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
+#define CORE_EVNTSEL_REG_MASK	0xFF000000ULL
+
+#define CORE_EVNTSEL_MASK		\
+	(CORE_EVNTSEL_EVENT_MASK |	\
+	 CORE_EVNTSEL_UNIT_MASK  |	\
+	 CORE_EVNTSEL_EDGE_MASK  |	\
+	 CORE_EVNTSEL_INV_MASK  |	\
+	 CORE_EVNTSEL_REG_MASK)
+
+	return hw_event & CORE_EVNTSEL_MASK;
+}
+
+static const u64 amd_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
+		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
+		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
+		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
+		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+/*
+ * AMD Performance Monitor K7 and later.
+ */
+static const u64 amd_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+};
+
+static u64 amd_pmu_event_map(int hw_event)
+{
+	return amd_perfmon_event_map[hw_event];
+}
+
+static u64 amd_pmu_raw_event(u64 hw_event)
+{
+#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
+#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
+#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
+#define K7_EVNTSEL_INV_MASK	0x000800000ULL
+#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
+
+#define K7_EVNTSEL_MASK			\
+	(K7_EVNTSEL_EVENT_MASK |	\
+	 K7_EVNTSEL_UNIT_MASK  |	\
+	 K7_EVNTSEL_EDGE_MASK  |	\
+	 K7_EVNTSEL_INV_MASK   |	\
+	 K7_EVNTSEL_REG_MASK)
+
+	return hw_event & K7_EVNTSEL_MASK;
+}
+
+/*
+ * Propagate event elapsed time into the generic event.
+ * Can only be executed on the CPU where the event is active.
+ * Returns the delta events processed.
+ */
+static u64
+x86_perf_event_update(struct perf_event *event,
+			struct hw_perf_event *hwc, int idx)
+{
+	int shift = 64 - x86_pmu.event_bits;
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+
+	if (idx == X86_PMC_IDX_FIXED_BTS)
+		return 0;
+
+	/*
+	 * Careful: an NMI might modify the previous event value.
+	 *
+	 * Our tactic to handle this is to first atomically read and
+	 * exchange a new raw count - then add that new-prev delta
+	 * count to the generic event atomically:
+	 */
+again:
+	prev_raw_count = atomic64_read(&hwc->prev_count);
+	rdmsrl(hwc->event_base + idx, new_raw_count);
+
+	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+					new_raw_count) != prev_raw_count)
+		goto again;
+
+	/*
+	 * Now we have the new raw value and have updated the prev
+	 * timestamp already. We can now calculate the elapsed delta
+	 * (event-)time and add that to the generic event.
+	 *
+	 * Careful, not all hw sign-extends above the physical width
+	 * of the count.
+	 */
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	atomic64_add(delta, &event->count);
+	atomic64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static atomic_t active_events;
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+static bool reserve_pmc_hardware(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	int i;
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		disable_lapic_nmi_watchdog();
+
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
+			goto perfctr_fail;
+	}
+
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
+			goto eventsel_fail;
+	}
+#endif
+
+	return true;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+eventsel_fail:
+	for (i--; i >= 0; i--)
+		release_evntsel_nmi(x86_pmu.eventsel + i);
+
+	i = x86_pmu.num_events;
+
+perfctr_fail:
+	for (i--; i >= 0; i--)
+		release_perfctr_nmi(x86_pmu.perfctr + i);
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		enable_lapic_nmi_watchdog();
+
+	return false;
+#endif
+}
+
+static void release_pmc_hardware(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	int i;
+
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		release_perfctr_nmi(x86_pmu.perfctr + i);
+		release_evntsel_nmi(x86_pmu.eventsel + i);
+	}
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		enable_lapic_nmi_watchdog();
+#endif
+}
+
+static inline bool bts_available(void)
+{
+	return x86_pmu.enable_bts != NULL;
+}
+
+static inline void init_debug_store_on_cpu(int cpu)
+{
+	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+	if (!ds)
+		return;
+
+	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
+		     (u32)((u64)(unsigned long)ds),
+		     (u32)((u64)(unsigned long)ds >> 32));
+}
+
+static inline void fini_debug_store_on_cpu(int cpu)
+{
+	if (!per_cpu(cpu_hw_events, cpu).ds)
+		return;
+
+	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
+}
+
+static void release_bts_hardware(void)
+{
+	int cpu;
+
+	if (!bts_available())
+		return;
+
+	get_online_cpus();
+
+	for_each_online_cpu(cpu)
+		fini_debug_store_on_cpu(cpu);
+
+	for_each_possible_cpu(cpu) {
+		struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+		if (!ds)
+			continue;
+
+		per_cpu(cpu_hw_events, cpu).ds = NULL;
+
+		kfree((void *)(unsigned long)ds->bts_buffer_base);
+		kfree(ds);
+	}
+
+	put_online_cpus();
+}
+
+static int reserve_bts_hardware(void)
+{
+	int cpu, err = 0;
+
+	if (!bts_available())
+		return 0;
+
+	get_online_cpus();
+
+	for_each_possible_cpu(cpu) {
+		struct debug_store *ds;
+		void *buffer;
+
+		err = -ENOMEM;
+		buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
+		if (unlikely(!buffer))
+			break;
+
+		ds = kzalloc(sizeof(*ds), GFP_KERNEL);
+		if (unlikely(!ds)) {
+			kfree(buffer);
+			break;
+		}
+
+		ds->bts_buffer_base = (u64)(unsigned long)buffer;
+		ds->bts_index = ds->bts_buffer_base;
+		ds->bts_absolute_maximum =
+			ds->bts_buffer_base + BTS_BUFFER_SIZE;
+		ds->bts_interrupt_threshold =
+			ds->bts_absolute_maximum - BTS_OVFL_TH;
+
+		per_cpu(cpu_hw_events, cpu).ds = ds;
+		err = 0;
+	}
+
+	if (err)
+		release_bts_hardware();
+	else {
+		for_each_online_cpu(cpu)
+			init_debug_store_on_cpu(cpu);
+	}
+
+	put_online_cpus();
+
+	return err;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
+		release_pmc_hardware();
+		release_bts_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+static inline int x86_pmu_initialized(void)
+{
+	return x86_pmu.handle_irq != NULL;
+}
+
+static inline int
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	u64 config, val;
+
+	config = attr->config;
+
+	cache_type = (config >>  0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+
+	cache_op = (config >>  8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	val = hw_cache_event_ids[cache_type][cache_op][cache_result];
+
+	if (val == 0)
+		return -ENOENT;
+
+	if (val == -1)
+		return -EINVAL;
+
+	hwc->config |= val;
+
+	return 0;
+}
+
+static void intel_pmu_enable_bts(u64 config)
+{
+	unsigned long debugctlmsr;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr |= X86_DEBUGCTL_TR;
+	debugctlmsr |= X86_DEBUGCTL_BTS;
+	debugctlmsr |= X86_DEBUGCTL_BTINT;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
+
+	update_debugctlmsr(debugctlmsr);
+}
+
+static void intel_pmu_disable_bts(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	unsigned long debugctlmsr;
+
+	if (!cpuc->ds)
+		return;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr &=
+		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
+		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
+
+	update_debugctlmsr(debugctlmsr);
+}
+
+/*
+ * Setup the hardware configuration for a given attr_type
+ */
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	u64 config;
+	int err;
+
+	if (!x86_pmu_initialized())
+		return -ENODEV;
+
+	err = 0;
+	if (!atomic_inc_not_zero(&active_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&active_events) == 0) {
+			if (!reserve_pmc_hardware())
+				err = -EBUSY;
+			else
+				err = reserve_bts_hardware();
+		}
+		if (!err)
+			atomic_inc(&active_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	if (err)
+		return err;
+
+	event->destroy = hw_perf_event_destroy;
+
+	/*
+	 * Generate PMC IRQs:
+	 * (keep 'enabled' bit clear for now)
+	 */
+	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
+
+	/*
+	 * Count user and OS events unless requested not to.
+	 */
+	if (!attr->exclude_user)
+		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
+	if (!attr->exclude_kernel)
+		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
+
+	if (!hwc->sample_period) {
+		hwc->sample_period = x86_pmu.max_period;
+		hwc->last_period = hwc->sample_period;
+		atomic64_set(&hwc->period_left, hwc->sample_period);
+	} else {
+		/*
+		 * If we have a PMU initialized but no APIC
+		 * interrupts, we cannot sample hardware
+		 * events (user-space has to fall back and
+		 * sample via a hrtimer based software event):
+		 */
+		if (!x86_pmu.apic)
+			return -EOPNOTSUPP;
+	}
+
+	/*
+	 * Raw hw_event type provide the config in the hw_event structure
+	 */
+	if (attr->type == PERF_TYPE_RAW) {
+		hwc->config |= x86_pmu.raw_event(attr->config);
+		return 0;
+	}
+
+	if (attr->type == PERF_TYPE_HW_CACHE)
+		return set_ext_hw_attr(hwc, attr);
+
+	if (attr->config >= x86_pmu.max_events)
+		return -EINVAL;
+
+	/*
+	 * The generic map:
+	 */
+	config = x86_pmu.event_map(attr->config);
+
+	if (config == 0)
+		return -ENOENT;
+
+	if (config == -1LL)
+		return -EINVAL;
+
+	/*
+	 * Branch tracing:
+	 */
+	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
+	    (hwc->sample_period == 1)) {
+		/* BTS is not supported by this architecture. */
+		if (!bts_available())
+			return -EOPNOTSUPP;
+
+		/* BTS is currently only allowed for user-mode. */
+		if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+			return -EOPNOTSUPP;
+	}
+
+	hwc->config |= config;
+
+	return 0;
+}
+
+static void p6_pmu_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	barrier();
+
+	/* p6 only has one enable register */
+	rdmsrl(MSR_P6_EVNTSEL0, val);
+	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static void intel_pmu_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	barrier();
+
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+		intel_pmu_disable_bts();
+}
+
+static void amd_pmu_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int idx;
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	/*
+	 * ensure we write the disable before we start disabling the
+	 * events proper, so that amd_pmu_enable_event() does the
+	 * right thing.
+	 */
+	barrier();
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		u64 val;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
+			continue;
+		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+	}
+}
+
+void hw_perf_disable(void)
+{
+	if (!x86_pmu_initialized())
+		return;
+	return x86_pmu.disable_all();
+}
+
+static void p6_pmu_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	unsigned long val;
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	/* p6 only has one enable register */
+	rdmsrl(MSR_P6_EVNTSEL0, val);
+	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static void intel_pmu_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+
+	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+		struct perf_event *event =
+			cpuc->events[X86_PMC_IDX_FIXED_BTS];
+
+		if (WARN_ON_ONCE(!event))
+			return;
+
+		intel_pmu_enable_bts(event->hw.config);
+	}
+}
+
+static void amd_pmu_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int idx;
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		struct perf_event *event = cpuc->events[idx];
+		u64 val;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		val = event->hw.config;
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+	}
+}
+
+void hw_perf_enable(void)
+{
+	if (!x86_pmu_initialized())
+		return;
+	x86_pmu.enable_all();
+}
+
+static inline u64 intel_pmu_get_status(void)
+{
+	u64 status;
+
+	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+
+	return status;
+}
+
+static inline void intel_pmu_ack_status(u64 ack)
+{
+	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+}
+
+static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	(void)checking_wrmsrl(hwc->config_base + idx,
+			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
+}
+
+static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	(void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
+}
+
+static inline void
+intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
+{
+	int idx = __idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, mask;
+
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	(void)checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static inline void
+p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val = P6_NOP_EVENT;
+
+	if (cpuc->enabled)
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+
+	(void)checking_wrmsrl(hwc->config_base + idx, val);
+}
+
+static inline void
+intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+		intel_pmu_disable_bts();
+		return;
+	}
+
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_disable_fixed(hwc, idx);
+		return;
+	}
+
+	x86_pmu_disable_event(hwc, idx);
+}
+
+static inline void
+amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	x86_pmu_disable_event(hwc, idx);
+}
+
+static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
+
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the event disabled in hw:
+ */
+static int
+x86_perf_event_set_period(struct perf_event *event,
+			     struct hw_perf_event *hwc, int idx)
+{
+	s64 left = atomic64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int err, ret = 0;
+
+	if (idx == X86_PMC_IDX_FIXED_BTS)
+		return 0;
+
+	/*
+	 * If we are way outside a reasoable range then just skip forward:
+	 */
+	if (unlikely(left <= -period)) {
+		left = period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+	/*
+	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
+	 */
+	if (unlikely(left < 2))
+		left = 2;
+
+	if (left > x86_pmu.max_period)
+		left = x86_pmu.max_period;
+
+	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
+
+	/*
+	 * The hw event starts counting from this event offset,
+	 * mark it to be able to extra future deltas:
+	 */
+	atomic64_set(&hwc->prev_count, (u64)-left);
+
+	err = checking_wrmsrl(hwc->event_base + idx,
+			     (u64)(-left) & x86_pmu.event_mask);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+static inline void
+intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
+{
+	int idx = __idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, bits, mask;
+	int err;
+
+	/*
+	 * Enable IRQ generation (0x8),
+	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+	 * if requested:
+	 */
+	bits = 0x8ULL;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+		bits |= 0x2;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+		bits |= 0x1;
+	bits <<= (idx * 4);
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	ctrl_val |= bits;
+	err = checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+
+	val = hwc->config;
+	if (cpuc->enabled)
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+
+	(void)checking_wrmsrl(hwc->config_base + idx, val);
+}
+
+
+static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+		if (!__get_cpu_var(cpu_hw_events).enabled)
+			return;
+
+		intel_pmu_enable_bts(hwc->config);
+		return;
+	}
+
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_enable_fixed(hwc, idx);
+		return;
+	}
+
+	x86_pmu_enable_event(hwc, idx);
+}
+
+static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (cpuc->enabled)
+		x86_pmu_enable_event(hwc, idx);
+}
+
+static int
+fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
+{
+	unsigned int hw_event;
+
+	hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
+
+	if (unlikely((hw_event ==
+		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
+		     (hwc->sample_period == 1)))
+		return X86_PMC_IDX_FIXED_BTS;
+
+	if (!x86_pmu.num_events_fixed)
+		return -1;
+
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
+		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
+		return X86_PMC_IDX_FIXED_CPU_CYCLES;
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
+		return X86_PMC_IDX_FIXED_BUS_CYCLES;
+
+	return -1;
+}
+
+/*
+ * Find a PMC slot for the freshly enabled / scheduled in event:
+ */
+static int x86_pmu_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	idx = fixed_mode_idx(event, hwc);
+	if (idx == X86_PMC_IDX_FIXED_BTS) {
+		/* BTS is already occupied. */
+		if (test_and_set_bit(idx, cpuc->used_mask))
+			return -EAGAIN;
+
+		hwc->config_base	= 0;
+		hwc->event_base	= 0;
+		hwc->idx		= idx;
+	} else if (idx >= 0) {
+		/*
+		 * Try to get the fixed event, if that is already taken
+		 * then try to get a generic event:
+		 */
+		if (test_and_set_bit(idx, cpuc->used_mask))
+			goto try_generic;
+
+		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+		/*
+		 * We set it so that event_base + idx in wrmsr/rdmsr maps to
+		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
+		 */
+		hwc->event_base =
+			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+		hwc->idx = idx;
+	} else {
+		idx = hwc->idx;
+		/* Try to get the previous generic event again */
+		if (test_and_set_bit(idx, cpuc->used_mask)) {
+try_generic:
+			idx = find_first_zero_bit(cpuc->used_mask,
+						  x86_pmu.num_events);
+			if (idx == x86_pmu.num_events)
+				return -EAGAIN;
+
+			set_bit(idx, cpuc->used_mask);
+			hwc->idx = idx;
+		}
+		hwc->config_base  = x86_pmu.eventsel;
+		hwc->event_base = x86_pmu.perfctr;
+	}
+
+	perf_events_lapic_init();
+
+	x86_pmu.disable(hwc, idx);
+
+	cpuc->events[idx] = event;
+	set_bit(idx, cpuc->active_mask);
+
+	x86_perf_event_set_period(event, hwc, idx);
+	x86_pmu.enable(hwc, idx);
+
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void x86_pmu_unthrottle(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
+				cpuc->events[hwc->idx] != event))
+		return;
+
+	x86_pmu.enable(hwc, hwc->idx);
+}
+
+void perf_event_print_debug(void)
+{
+	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
+	struct cpu_hw_events *cpuc;
+	unsigned long flags;
+	int cpu, idx;
+
+	if (!x86_pmu.num_events)
+		return;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+	cpuc = &per_cpu(cpu_hw_events, cpu);
+
+	if (x86_pmu.version >= 2) {
+		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
+		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
+
+		pr_info("\n");
+		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
+		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
+		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
+		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
+	}
+	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
+		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
+
+		prev_left = per_cpu(pmc_prev_left[idx], cpu);
+
+		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
+			cpu, idx, pmc_ctrl);
+		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
+			cpu, idx, pmc_count);
+		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
+			cpu, idx, prev_left);
+	}
+	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
+
+		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
+			cpu, idx, pmc_count);
+	}
+	local_irq_restore(flags);
+}
+
+static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
+{
+	struct debug_store *ds = cpuc->ds;
+	struct bts_record {
+		u64	from;
+		u64	to;
+		u64	flags;
+	};
+	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
+	struct bts_record *at, *top;
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+	struct perf_sample_data data;
+	struct pt_regs regs;
+
+	if (!event)
+		return;
+
+	if (!ds)
+		return;
+
+	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+	top = (struct bts_record *)(unsigned long)ds->bts_index;
+
+	if (top <= at)
+		return;
+
+	ds->bts_index = ds->bts_buffer_base;
+
+
+	data.period	= event->hw.last_period;
+	data.addr	= 0;
+	regs.ip		= 0;
+
+	/*
+	 * Prepare a generic sample, i.e. fill in the invariant fields.
+	 * We will overwrite the from and to address before we output
+	 * the sample.
+	 */
+	perf_prepare_sample(&header, &data, event, &regs);
+
+	if (perf_output_begin(&handle, event,
+			      header.size * (top - at), 1, 1))
+		return;
+
+	for (; at < top; at++) {
+		data.ip		= at->from;
+		data.addr	= at->to;
+
+		perf_output_sample(&handle, &header, &data, event);
+	}
+
+	perf_output_end(&handle);
+
+	/* There's new data available. */
+	event->hw.interrupts++;
+	event->pending_kill = POLL_IN;
+}
+
+static void x86_pmu_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	/*
+	 * Must be done before we disable, otherwise the nmi handler
+	 * could reenable again:
+	 */
+	clear_bit(idx, cpuc->active_mask);
+	x86_pmu.disable(hwc, idx);
+
+	/*
+	 * Make sure the cleared pointer becomes visible before we
+	 * (potentially) free the event:
+	 */
+	barrier();
+
+	/*
+	 * Drain the remaining delta count out of a event
+	 * that we are disabling:
+	 */
+	x86_perf_event_update(event, hwc, idx);
+
+	/* Drain the remaining BTS records. */
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
+		intel_pmu_drain_bts_buffer(cpuc);
+
+	cpuc->events[idx] = NULL;
+	clear_bit(idx, cpuc->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+/*
+ * Save and restart an expired event. Called by NMI contexts,
+ * so it has to be careful about preempting normal event ops:
+ */
+static int intel_pmu_save_and_restart(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	int ret;
+
+	x86_perf_event_update(event, hwc, idx);
+	ret = x86_perf_event_set_period(event, hwc, idx);
+
+	if (event->state == PERF_EVENT_STATE_ACTIVE)
+		intel_pmu_enable_event(hwc, idx);
+
+	return ret;
+}
+
+static void intel_pmu_reset(void)
+{
+	struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
+	unsigned long flags;
+	int idx;
+
+	if (!x86_pmu.num_events)
+		return;
+
+	local_irq_save(flags);
+
+	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
+		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
+	}
+	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+	}
+	if (ds)
+		ds->bts_index = ds->bts_buffer_base;
+
+	local_irq_restore(flags);
+}
+
+static int p6_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	int idx, handled = 0;
+	u64 val;
+
+	data.addr = 0;
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		event = cpuc->events[idx];
+		hwc = &event->hw;
+
+		val = x86_perf_event_update(event, hwc, idx);
+		if (val & (1ULL << (x86_pmu.event_bits - 1)))
+			continue;
+
+		/*
+		 * event overflow
+		 */
+		handled		= 1;
+		data.period	= event->hw.last_period;
+
+		if (!x86_perf_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			p6_pmu_disable_event(hwc, idx);
+	}
+
+	if (handled)
+		inc_irq_stat(apic_perf_irqs);
+
+	return handled;
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int intel_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	int bit, loops;
+	u64 ack, status;
+
+	data.addr = 0;
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+
+	perf_disable();
+	intel_pmu_drain_bts_buffer(cpuc);
+	status = intel_pmu_get_status();
+	if (!status) {
+		perf_enable();
+		return 0;
+	}
+
+	loops = 0;
+again:
+	if (++loops > 100) {
+		WARN_ONCE(1, "perfevents: irq loop stuck!\n");
+		perf_event_print_debug();
+		intel_pmu_reset();
+		perf_enable();
+		return 1;
+	}
+
+	inc_irq_stat(apic_perf_irqs);
+	ack = status;
+	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+		struct perf_event *event = cpuc->events[bit];
+
+		clear_bit(bit, (unsigned long *) &status);
+		if (!test_bit(bit, cpuc->active_mask))
+			continue;
+
+		if (!intel_pmu_save_and_restart(event))
+			continue;
+
+		data.period = event->hw.last_period;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			intel_pmu_disable_event(&event->hw, bit);
+	}
+
+	intel_pmu_ack_status(ack);
+
+	/*
+	 * Repeat if there is more work to be done:
+	 */
+	status = intel_pmu_get_status();
+	if (status)
+		goto again;
+
+	perf_enable();
+
+	return 1;
+}
+
+static int amd_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	int idx, handled = 0;
+	u64 val;
+
+	data.addr = 0;
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		event = cpuc->events[idx];
+		hwc = &event->hw;
+
+		val = x86_perf_event_update(event, hwc, idx);
+		if (val & (1ULL << (x86_pmu.event_bits - 1)))
+			continue;
+
+		/*
+		 * event overflow
+		 */
+		handled		= 1;
+		data.period	= event->hw.last_period;
+
+		if (!x86_perf_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			amd_pmu_disable_event(hwc, idx);
+	}
+
+	if (handled)
+		inc_irq_stat(apic_perf_irqs);
+
+	return handled;
+}
+
+void smp_perf_pending_interrupt(struct pt_regs *regs)
+{
+	irq_enter();
+	ack_APIC_irq();
+	inc_irq_stat(apic_pending_irqs);
+	perf_event_do_pending();
+	irq_exit();
+}
+
+void set_perf_event_pending(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
+#endif
+}
+
+void perf_events_lapic_init(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	if (!x86_pmu.apic || !x86_pmu_initialized())
+		return;
+
+	/*
+	 * Always use NMI for PMU
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
+}
+
+static int __kprobes
+perf_event_nmi_handler(struct notifier_block *self,
+			 unsigned long cmd, void *__args)
+{
+	struct die_args *args = __args;
+	struct pt_regs *regs;
+
+	if (!atomic_read(&active_events))
+		return NOTIFY_DONE;
+
+	switch (cmd) {
+	case DIE_NMI:
+	case DIE_NMI_IPI:
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	regs = args->regs;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
+	/*
+	 * Can't rely on the handled return value to say it was our NMI, two
+	 * events could trigger 'simultaneously' raising two back-to-back NMIs.
+	 *
+	 * If the first NMI handles both, the latter will be empty and daze
+	 * the CPU.
+	 */
+	x86_pmu.handle_irq(regs);
+
+	return NOTIFY_STOP;
+}
+
+static __read_mostly struct notifier_block perf_event_nmi_notifier = {
+	.notifier_call		= perf_event_nmi_handler,
+	.next			= NULL,
+	.priority		= 1
+};
+
+static struct x86_pmu p6_pmu = {
+	.name			= "p6",
+	.handle_irq		= p6_pmu_handle_irq,
+	.disable_all		= p6_pmu_disable_all,
+	.enable_all		= p6_pmu_enable_all,
+	.enable			= p6_pmu_enable_event,
+	.disable		= p6_pmu_disable_event,
+	.eventsel		= MSR_P6_EVNTSEL0,
+	.perfctr		= MSR_P6_PERFCTR0,
+	.event_map		= p6_pmu_event_map,
+	.raw_event		= p6_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
+	.apic			= 1,
+	.max_period		= (1ULL << 31) - 1,
+	.version		= 0,
+	.num_events		= 2,
+	/*
+	 * Events have 40 bits implemented. However they are designed such
+	 * that bits [32-39] are sign extensions of bit 31. As such the
+	 * effective width of a event for P6-like PMU is 32 bits only.
+	 *
+	 * See IA-32 Intel Architecture Software developer manual Vol 3B
+	 */
+	.event_bits		= 32,
+	.event_mask		= (1ULL << 32) - 1,
+};
+
+static struct x86_pmu intel_pmu = {
+	.name			= "Intel",
+	.handle_irq		= intel_pmu_handle_irq,
+	.disable_all		= intel_pmu_disable_all,
+	.enable_all		= intel_pmu_enable_all,
+	.enable			= intel_pmu_enable_event,
+	.disable		= intel_pmu_disable_event,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= intel_pmu_event_map,
+	.raw_event		= intel_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	.apic			= 1,
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32 bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic event period:
+	 */
+	.max_period		= (1ULL << 31) - 1,
+	.enable_bts		= intel_pmu_enable_bts,
+	.disable_bts		= intel_pmu_disable_bts,
+};
+
+static struct x86_pmu amd_pmu = {
+	.name			= "AMD",
+	.handle_irq		= amd_pmu_handle_irq,
+	.disable_all		= amd_pmu_disable_all,
+	.enable_all		= amd_pmu_enable_all,
+	.enable			= amd_pmu_enable_event,
+	.disable		= amd_pmu_disable_event,
+	.eventsel		= MSR_K7_EVNTSEL0,
+	.perfctr		= MSR_K7_PERFCTR0,
+	.event_map		= amd_pmu_event_map,
+	.raw_event		= amd_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
+	.num_events		= 4,
+	.event_bits		= 48,
+	.event_mask		= (1ULL << 48) - 1,
+	.apic			= 1,
+	/* use highest bit to detect overflow */
+	.max_period		= (1ULL << 47) - 1,
+};
+
+static int p6_pmu_init(void)
+{
+	switch (boot_cpu_data.x86_model) {
+	case 1:
+	case 3:  /* Pentium Pro */
+	case 5:
+	case 6:  /* Pentium II */
+	case 7:
+	case 8:
+	case 11: /* Pentium III */
+		break;
+	case 9:
+	case 13:
+		/* Pentium M */
+		break;
+	default:
+		pr_cont("unsupported p6 CPU model %d ",
+			boot_cpu_data.x86_model);
+		return -ENODEV;
+	}
+
+	x86_pmu = p6_pmu;
+
+	if (!cpu_has_apic) {
+		pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
+		pr_info("no hardware sampling interrupt available.\n");
+		x86_pmu.apic = 0;
+	}
+
+	return 0;
+}
+
+static int intel_pmu_init(void)
+{
+	union cpuid10_edx edx;
+	union cpuid10_eax eax;
+	unsigned int unused;
+	unsigned int ebx;
+	int version;
+
+	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+		/* check for P6 processor family */
+	   if (boot_cpu_data.x86 == 6) {
+		return p6_pmu_init();
+	   } else {
+		return -ENODEV;
+	   }
+	}
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Branch Misses Retired hw_event or not.
+	 */
+	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
+	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
+		return -ENODEV;
+
+	version = eax.split.version_id;
+	if (version < 2)
+		return -ENODEV;
+
+	x86_pmu				= intel_pmu;
+	x86_pmu.version			= version;
+	x86_pmu.num_events		= eax.split.num_events;
+	x86_pmu.event_bits		= eax.split.bit_width;
+	x86_pmu.event_mask		= (1ULL << eax.split.bit_width) - 1;
+
+	/*
+	 * Quirk: v2 perfmon does not report fixed-purpose events, so
+	 * assume at least 3 events:
+	 */
+	x86_pmu.num_events_fixed	= max((int)edx.split.num_events_fixed, 3);
+
+	/*
+	 * Install the hw-cache-events table:
+	 */
+	switch (boot_cpu_data.x86_model) {
+	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
+	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
+	case 29: /* six-core 45 nm xeon "Dunnington" */
+		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("Core2 events, ");
+		break;
+	default:
+	case 26:
+		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("Nehalem/Corei7 events, ");
+		break;
+	case 28:
+		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("Atom events, ");
+		break;
+	}
+	return 0;
+}
+
+static int amd_pmu_init(void)
+{
+	/* Performance-monitoring supported from K7 and later: */
+	if (boot_cpu_data.x86 < 6)
+		return -ENODEV;
+
+	x86_pmu = amd_pmu;
+
+	/* Events are common for all AMDs */
+	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
+	       sizeof(hw_cache_event_ids));
+
+	return 0;
+}
+
+void __init init_hw_perf_events(void)
+{
+	int err;
+
+	pr_info("Performance Events: ");
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_INTEL:
+		err = intel_pmu_init();
+		break;
+	case X86_VENDOR_AMD:
+		err = amd_pmu_init();
+		break;
+	default:
+		return;
+	}
+	if (err != 0) {
+		pr_cont("no PMU driver, software events only.\n");
+		return;
+	}
+
+	pr_cont("%s PMU driver.\n", x86_pmu.name);
+
+	if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
+		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+		     x86_pmu.num_events, X86_PMC_MAX_GENERIC);
+		x86_pmu.num_events = X86_PMC_MAX_GENERIC;
+	}
+	perf_event_mask = (1 << x86_pmu.num_events) - 1;
+	perf_max_events = x86_pmu.num_events;
+
+	if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
+		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+		     x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
+		x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
+	}
+
+	perf_event_mask |=
+		((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
+	x86_pmu.intel_ctrl = perf_event_mask;
+
+	perf_events_lapic_init();
+	register_die_notifier(&perf_event_nmi_notifier);
+
+	pr_info("... version:                 %d\n",     x86_pmu.version);
+	pr_info("... bit width:               %d\n",     x86_pmu.event_bits);
+	pr_info("... generic events:        %d\n",     x86_pmu.num_events);
+	pr_info("... value mask:              %016Lx\n", x86_pmu.event_mask);
+	pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
+	pr_info("... fixed-purpose events:  %d\n",     x86_pmu.num_events_fixed);
+	pr_info("... event mask:            %016Lx\n", perf_event_mask);
+}
+
+static inline void x86_pmu_read(struct perf_event *event)
+{
+	x86_perf_event_update(event, &event->hw, event->hw.idx);
+}
+
+static const struct pmu pmu = {
+	.enable		= x86_pmu_enable,
+	.disable	= x86_pmu_disable,
+	.read		= x86_pmu_read,
+	.unthrottle	= x86_pmu_unthrottle,
+};
+
+const struct pmu *hw_perf_event_init(struct perf_event *event)
+{
+	int err;
+
+	err = __hw_perf_event_init(event);
+	if (err) {
+		if (event->destroy)
+			event->destroy(event);
+		return ERR_PTR(err);
+	}
+
+	return &pmu;
+}
+
+/*
+ * callchain support
+ */
+
+static inline
+void callchain_store(struct perf_callchain_entry *entry, u64 ip)
+{
+	if (entry->nr < PERF_MAX_STACK_DEPTH)
+		entry->ip[entry->nr++] = ip;
+}
+
+static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
+static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
+static DEFINE_PER_CPU(int, in_nmi_frame);
+
+
+static void
+backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+	/* Ignore warnings */
+}
+
+static void backtrace_warning(void *data, char *msg)
+{
+	/* Ignore warnings */
+}
+
+static int backtrace_stack(void *data, char *name)
+{
+	per_cpu(in_nmi_frame, smp_processor_id()) =
+			x86_is_stack_id(NMI_STACK, name);
+
+	return 0;
+}
+
+static void backtrace_address(void *data, unsigned long addr, int reliable)
+{
+	struct perf_callchain_entry *entry = data;
+
+	if (per_cpu(in_nmi_frame, smp_processor_id()))
+		return;
+
+	if (reliable)
+		callchain_store(entry, addr);
+}
+
+static const struct stacktrace_ops backtrace_ops = {
+	.warning		= backtrace_warning,
+	.warning_symbol		= backtrace_warning_symbol,
+	.stack			= backtrace_stack,
+	.address		= backtrace_address,
+};
+
+#include "../dumpstack.h"
+
+static void
+perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	callchain_store(entry, PERF_CONTEXT_KERNEL);
+	callchain_store(entry, regs->ip);
+
+	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
+}
+
+/*
+ * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
+ */
+static unsigned long
+copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
+{
+	unsigned long offset, addr = (unsigned long)from;
+	int type = in_nmi() ? KM_NMI : KM_IRQ0;
+	unsigned long size, len = 0;
+	struct page *page;
+	void *map;
+	int ret;
+
+	do {
+		ret = __get_user_pages_fast(addr, 1, 0, &page);
+		if (!ret)
+			break;
+
+		offset = addr & (PAGE_SIZE - 1);
+		size = min(PAGE_SIZE - offset, n - len);
+
+		map = kmap_atomic(page, type);
+		memcpy(to, map+offset, size);
+		kunmap_atomic(map, type);
+		put_page(page);
+
+		len  += size;
+		to   += size;
+		addr += size;
+
+	} while (len < n);
+
+	return len;
+}
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+	unsigned long bytes;
+
+	bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
+
+	return bytes == sizeof(*frame);
+}
+
+static void
+perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	struct stack_frame frame;
+	const void __user *fp;
+
+	if (!user_mode(regs))
+		regs = task_pt_regs(current);
+
+	fp = (void __user *)regs->bp;
+
+	callchain_store(entry, PERF_CONTEXT_USER);
+	callchain_store(entry, regs->ip);
+
+	while (entry->nr < PERF_MAX_STACK_DEPTH) {
+		frame.next_frame	     = NULL;
+		frame.return_address = 0;
+
+		if (!copy_stack_frame(fp, &frame))
+			break;
+
+		if ((unsigned long)fp < regs->sp)
+			break;
+
+		callchain_store(entry, frame.return_address);
+		fp = frame.next_frame;
+	}
+}
+
+static void
+perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	int is_user;
+
+	if (!regs)
+		return;
+
+	is_user = user_mode(regs);
+
+	if (!current || current->pid == 0)
+		return;
+
+	if (is_user && current->state != TASK_RUNNING)
+		return;
+
+	if (!is_user)
+		perf_callchain_kernel(regs, entry);
+
+	if (current->mm)
+		perf_callchain_user(regs, entry);
+}
+
+struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	struct perf_callchain_entry *entry;
+
+	if (in_nmi())
+		entry = &__get_cpu_var(pmc_nmi_entry);
+	else
+		entry = &__get_cpu_var(pmc_irq_entry);
+
+	entry->nr = 0;
+
+	perf_do_callchain(regs, entry);
+
+	return entry;
+}
+
+void hw_perf_event_setup_online(int cpu)
+{
+	init_debug_store_on_cpu(cpu);
+}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 392bea43b890..fab786f60ed6 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -20,7 +20,7 @@
 #include <linux/kprobes.h>
 
 #include <asm/apic.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 
 struct nmi_watchdog_ctlblk {
 	unsigned int cccr_msr;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index d59fe323807e..681c3fda7391 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1021,7 +1021,7 @@ apicinterrupt ERROR_APIC_VECTOR \
 apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 apicinterrupt LOCAL_PENDING_VECTOR \
 	perf_pending_interrupt smp_perf_pending_interrupt
 #endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 300883112e3d..40f30773fb29 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -208,7 +208,7 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 
 	/* Performance monitoring interrupts: */
-# ifdef CONFIG_PERF_COUNTERS
+# ifdef CONFIG_PERF_EVENTS
 	alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
 # endif
 
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d51321ddafda..0157cd26d7cc 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -335,4 +335,4 @@ ENTRY(sys_call_table)
 	.long sys_preadv
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 775a020990a5..82728f2c6d55 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,7 +10,7 @@
 #include <linux/bootmem.h>		/* max_low_pfn			*/
 #include <linux/kprobes.h>		/* __kprobes, ...		*/
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
-#include <linux/perf_counter.h>		/* perf_swcounter_event		*/
+#include <linux/perf_event.h>		/* perf_sw_event		*/
 
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
@@ -1017,7 +1017,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -1114,11 +1114,11 @@ good_area:
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 4899215999de..8eb05878554c 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -234,11 +234,11 @@ static void arch_perfmon_setup_counters(void)
 	if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
 		current_cpu_data.x86_model == 15) {
 		eax.split.version_id = 2;
-		eax.split.num_counters = 2;
+		eax.split.num_events = 2;
 		eax.split.bit_width = 40;
 	}
 
-	num_counters = eax.split.num_counters;
+	num_counters = eax.split.num_events;
 
 	op_arch_perfmon_spec.num_counters = num_counters;
 	op_arch_perfmon_spec.num_controls = num_counters;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index b83776180c7f..7b8e75d16081 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -13,7 +13,7 @@
 #define OP_X86_MODEL_H
 
 #include <asm/types.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 
 struct op_msr {
 	unsigned long	addr;
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 50eecfe1d724..44203ff599da 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -26,7 +26,7 @@
 #include <linux/proc_fs.h>
 #include <linux/nmi.h>
 #include <linux/quotaops.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/suspend.h>
@@ -252,7 +252,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty)
 	struct pt_regs *regs = get_irq_regs();
 	if (regs)
 		show_regs(regs);
-	perf_counter_print_debug();
+	perf_event_print_debug();
 }
 static struct sysrq_key_op sysrq_showregs_op = {
 	.handler	= sysrq_handle_showregs,
diff --git a/fs/exec.c b/fs/exec.c
index 172ceb6edde4..434dba778ccc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -33,7 +33,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
 #include <linux/key.h>
@@ -923,7 +923,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
 	task_lock(tsk);
 	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
 	task_unlock(tsk);
-	perf_counter_comm(tsk);
+	perf_event_comm(tsk);
 }
 
 int flush_old_exec(struct linux_binprm * bprm)
@@ -997,7 +997,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 	 * security domain:
 	 */
 	if (!get_dumpable(current->mm))
-		perf_counter_exit_task(current);
+		perf_event_exit_task(current);
 
 	/* An exec changes our domain. We are no longer part of the thread
 	   group */
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 1125e5a1ee5d..d76b66acea95 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -620,8 +620,8 @@ __SYSCALL(__NR_move_pages, sys_move_pages)
 
 #define __NR_rt_tgsigqueueinfo 240
 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
-#define __NR_perf_counter_open 241
-__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
+#define __NR_perf_event_open 241
+__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 
 #undef __NR_syscalls
 #define __NR_syscalls 242
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9e7f2e8fc66e..21a6f5d9af22 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -106,13 +106,13 @@ extern struct group_info init_groups;
 
 extern struct cred init_cred;
 
-#ifdef CONFIG_PERF_COUNTERS
-# define INIT_PERF_COUNTERS(tsk)					\
-	.perf_counter_mutex = 						\
-		 __MUTEX_INITIALIZER(tsk.perf_counter_mutex),		\
-	.perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list),
+#ifdef CONFIG_PERF_EVENTS
+# define INIT_PERF_EVENTS(tsk)					\
+	.perf_event_mutex = 						\
+		 __MUTEX_INITIALIZER(tsk.perf_event_mutex),		\
+	.perf_event_list = LIST_HEAD_INIT(tsk.perf_event_list),
 #else
-# define INIT_PERF_COUNTERS(tsk)
+# define INIT_PERF_EVENTS(tsk)
 #endif
 
 /*
@@ -178,7 +178,7 @@ extern struct cred init_cred;
 	},								\
 	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
 	INIT_IDS							\
-	INIT_PERF_COUNTERS(tsk)						\
+	INIT_PERF_EVENTS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 	INIT_FTRACE_GRAPH						\
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
deleted file mode 100644
index f64862732673..000000000000
--- a/include/linux/perf_counter.h
+++ /dev/null
@@ -1,858 +0,0 @@
-/*
- *  Performance counters:
- *
- *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
- *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
- *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
- *
- *  Data type definitions, declarations, prototypes.
- *
- *    Started by: Thomas Gleixner and Ingo Molnar
- *
- *  For licencing details see kernel-base/COPYING
- */
-#ifndef _LINUX_PERF_COUNTER_H
-#define _LINUX_PERF_COUNTER_H
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#include <asm/byteorder.h>
-
-/*
- * User-space ABI bits:
- */
-
-/*
- * attr.type
- */
-enum perf_type_id {
-	PERF_TYPE_HARDWARE			= 0,
-	PERF_TYPE_SOFTWARE			= 1,
-	PERF_TYPE_TRACEPOINT			= 2,
-	PERF_TYPE_HW_CACHE			= 3,
-	PERF_TYPE_RAW				= 4,
-
-	PERF_TYPE_MAX,				/* non-ABI */
-};
-
-/*
- * Generalized performance counter event types, used by the
- * attr.event_id parameter of the sys_perf_counter_open()
- * syscall:
- */
-enum perf_hw_id {
-	/*
-	 * Common hardware events, generalized by the kernel:
-	 */
-	PERF_COUNT_HW_CPU_CYCLES		= 0,
-	PERF_COUNT_HW_INSTRUCTIONS		= 1,
-	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
-	PERF_COUNT_HW_CACHE_MISSES		= 3,
-	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
-	PERF_COUNT_HW_BRANCH_MISSES		= 5,
-	PERF_COUNT_HW_BUS_CYCLES		= 6,
-
-	PERF_COUNT_HW_MAX,			/* non-ABI */
-};
-
-/*
- * Generalized hardware cache counters:
- *
- *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
- *       { read, write, prefetch } x
- *       { accesses, misses }
- */
-enum perf_hw_cache_id {
-	PERF_COUNT_HW_CACHE_L1D			= 0,
-	PERF_COUNT_HW_CACHE_L1I			= 1,
-	PERF_COUNT_HW_CACHE_LL			= 2,
-	PERF_COUNT_HW_CACHE_DTLB		= 3,
-	PERF_COUNT_HW_CACHE_ITLB		= 4,
-	PERF_COUNT_HW_CACHE_BPU			= 5,
-
-	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_id {
-	PERF_COUNT_HW_CACHE_OP_READ		= 0,
-	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
-	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
-
-	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_result_id {
-	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
-	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
-
-	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
-};
-
-/*
- * Special "software" counters provided by the kernel, even if the hardware
- * does not support performance counters. These counters measure various
- * physical and sw events of the kernel (and allow the profiling of them as
- * well):
- */
-enum perf_sw_ids {
-	PERF_COUNT_SW_CPU_CLOCK			= 0,
-	PERF_COUNT_SW_TASK_CLOCK		= 1,
-	PERF_COUNT_SW_PAGE_FAULTS		= 2,
-	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
-	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
-	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
-	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
-
-	PERF_COUNT_SW_MAX,			/* non-ABI */
-};
-
-/*
- * Bits that can be set in attr.sample_type to request information
- * in the overflow packets.
- */
-enum perf_counter_sample_format {
-	PERF_SAMPLE_IP				= 1U << 0,
-	PERF_SAMPLE_TID				= 1U << 1,
-	PERF_SAMPLE_TIME			= 1U << 2,
-	PERF_SAMPLE_ADDR			= 1U << 3,
-	PERF_SAMPLE_READ			= 1U << 4,
-	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
-	PERF_SAMPLE_ID				= 1U << 6,
-	PERF_SAMPLE_CPU				= 1U << 7,
-	PERF_SAMPLE_PERIOD			= 1U << 8,
-	PERF_SAMPLE_STREAM_ID			= 1U << 9,
-	PERF_SAMPLE_RAW				= 1U << 10,
-
-	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
-};
-
-/*
- * The format of the data returned by read() on a perf counter fd,
- * as specified by attr.read_format:
- *
- * struct read_format {
- * 	{ u64		value;
- * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
- * 	  { u64		id;           } && PERF_FORMAT_ID
- * 	} && !PERF_FORMAT_GROUP
- *
- * 	{ u64		nr;
- * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
- * 	  { u64		value;
- * 	    { u64	id;           } && PERF_FORMAT_ID
- * 	  }		cntr[nr];
- * 	} && PERF_FORMAT_GROUP
- * };
- */
-enum perf_counter_read_format {
-	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
-	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
-	PERF_FORMAT_ID				= 1U << 2,
-	PERF_FORMAT_GROUP			= 1U << 3,
-
-	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
-};
-
-#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
-
-/*
- * Hardware event to monitor via a performance monitoring counter:
- */
-struct perf_counter_attr {
-
-	/*
-	 * Major type: hardware/software/tracepoint/etc.
-	 */
-	__u32			type;
-
-	/*
-	 * Size of the attr structure, for fwd/bwd compat.
-	 */
-	__u32			size;
-
-	/*
-	 * Type specific configuration information.
-	 */
-	__u64			config;
-
-	union {
-		__u64		sample_period;
-		__u64		sample_freq;
-	};
-
-	__u64			sample_type;
-	__u64			read_format;
-
-	__u64			disabled       :  1, /* off by default        */
-				inherit	       :  1, /* children inherit it   */
-				pinned	       :  1, /* must always be on PMU */
-				exclusive      :  1, /* only group on PMU     */
-				exclude_user   :  1, /* don't count user      */
-				exclude_kernel :  1, /* ditto kernel          */
-				exclude_hv     :  1, /* ditto hypervisor      */
-				exclude_idle   :  1, /* don't count when idle */
-				mmap           :  1, /* include mmap data     */
-				comm	       :  1, /* include comm data     */
-				freq           :  1, /* use freq, not period  */
-				inherit_stat   :  1, /* per task counts       */
-				enable_on_exec :  1, /* next exec enables     */
-				task           :  1, /* trace fork/exit       */
-				watermark      :  1, /* wakeup_watermark      */
-
-				__reserved_1   : 49;
-
-	union {
-		__u32		wakeup_events;	  /* wakeup every n events */
-		__u32		wakeup_watermark; /* bytes before wakeup   */
-	};
-	__u32			__reserved_2;
-
-	__u64			__reserved_3;
-};
-
-/*
- * Ioctls that can be done on a perf counter fd:
- */
-#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
-#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
-#define PERF_COUNTER_IOC_REFRESH	_IO ('$', 2)
-#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
-#define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
-#define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
-
-enum perf_counter_ioc_flags {
-	PERF_IOC_FLAG_GROUP		= 1U << 0,
-};
-
-/*
- * Structure of the page that can be mapped via mmap
- */
-struct perf_counter_mmap_page {
-	__u32	version;		/* version number of this structure */
-	__u32	compat_version;		/* lowest version this is compat with */
-
-	/*
-	 * Bits needed to read the hw counters in user-space.
-	 *
-	 *   u32 seq;
-	 *   s64 count;
-	 *
-	 *   do {
-	 *     seq = pc->lock;
-	 *
-	 *     barrier()
-	 *     if (pc->index) {
-	 *       count = pmc_read(pc->index - 1);
-	 *       count += pc->offset;
-	 *     } else
-	 *       goto regular_read;
-	 *
-	 *     barrier();
-	 *   } while (pc->lock != seq);
-	 *
-	 * NOTE: for obvious reason this only works on self-monitoring
-	 *       processes.
-	 */
-	__u32	lock;			/* seqlock for synchronization */
-	__u32	index;			/* hardware counter identifier */
-	__s64	offset;			/* add to hardware counter value */
-	__u64	time_enabled;		/* time counter active */
-	__u64	time_running;		/* time counter on cpu */
-
-		/*
-		 * Hole for extension of the self monitor capabilities
-		 */
-
-	__u64	__reserved[123];	/* align to 1k */
-
-	/*
-	 * Control data for the mmap() data buffer.
-	 *
-	 * User-space reading the @data_head value should issue an rmb(), on
-	 * SMP capable platforms, after reading this value -- see
-	 * perf_counter_wakeup().
-	 *
-	 * When the mapping is PROT_WRITE the @data_tail value should be
-	 * written by userspace to reflect the last read data. In this case
-	 * the kernel will not over-write unread data.
-	 */
-	__u64   data_head;		/* head in the data section */
-	__u64	data_tail;		/* user-space written tail */
-};
-
-#define PERF_EVENT_MISC_CPUMODE_MASK		(3 << 0)
-#define PERF_EVENT_MISC_CPUMODE_UNKNOWN		(0 << 0)
-#define PERF_EVENT_MISC_KERNEL			(1 << 0)
-#define PERF_EVENT_MISC_USER			(2 << 0)
-#define PERF_EVENT_MISC_HYPERVISOR		(3 << 0)
-
-struct perf_event_header {
-	__u32	type;
-	__u16	misc;
-	__u16	size;
-};
-
-enum perf_event_type {
-
-	/*
-	 * The MMAP events record the PROT_EXEC mappings so that we can
-	 * correlate userspace IPs to code. They have the following structure:
-	 *
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	u64				addr;
-	 *	u64				len;
-	 *	u64				pgoff;
-	 *	char				filename[];
-	 * };
-	 */
-	PERF_EVENT_MMAP			= 1,
-
-	/*
-	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u64				id;
-	 * 	u64				lost;
-	 * };
-	 */
-	PERF_EVENT_LOST			= 2,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	char				comm[];
-	 * };
-	 */
-	PERF_EVENT_COMM			= 3,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	u64				time;
-	 * };
-	 */
-	PERF_EVENT_EXIT			= 4,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u64				time;
-	 *	u64				id;
-	 *	u64				stream_id;
-	 * };
-	 */
-	PERF_EVENT_THROTTLE		= 5,
-	PERF_EVENT_UNTHROTTLE		= 6,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	{ u64				time;     } && PERF_SAMPLE_TIME
-	 * };
-	 */
-	PERF_EVENT_FORK			= 7,
-
-	/*
-	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u32				pid, tid;
-	 *
-	 * 	struct read_format		values;
-	 * };
-	 */
-	PERF_EVENT_READ			= 8,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
-	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
-	 *	{ u64			time;     } && PERF_SAMPLE_TIME
-	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
-	 *	{ u64			id;	  } && PERF_SAMPLE_ID
-	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
-	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
-	 * 	{ u64			period;   } && PERF_SAMPLE_PERIOD
-	 *
-	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
-	 *
-	 *	{ u64			nr,
-	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
-	 *
-	 * 	#
-	 * 	# The RAW record below is opaque data wrt the ABI
-	 * 	#
-	 * 	# That is, the ABI doesn't make any promises wrt to
-	 * 	# the stability of its content, it may vary depending
-	 * 	# on event, hardware, kernel version and phase of
-	 * 	# the moon.
-	 * 	#
-	 * 	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
-	 * 	#
-	 *
-	 *	{ u32			size;
-	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
-	 * };
-	 */
-	PERF_EVENT_SAMPLE		= 9,
-
-	PERF_EVENT_MAX,			/* non-ABI */
-};
-
-enum perf_callchain_context {
-	PERF_CONTEXT_HV			= (__u64)-32,
-	PERF_CONTEXT_KERNEL		= (__u64)-128,
-	PERF_CONTEXT_USER		= (__u64)-512,
-
-	PERF_CONTEXT_GUEST		= (__u64)-2048,
-	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
-	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
-
-	PERF_CONTEXT_MAX		= (__u64)-4095,
-};
-
-#define PERF_FLAG_FD_NO_GROUP	(1U << 0)
-#define PERF_FLAG_FD_OUTPUT	(1U << 1)
-
-#ifdef __KERNEL__
-/*
- * Kernel-internal data types and definitions:
- */
-
-#ifdef CONFIG_PERF_COUNTERS
-# include <asm/perf_counter.h>
-#endif
-
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/rculist.h>
-#include <linux/rcupdate.h>
-#include <linux/spinlock.h>
-#include <linux/hrtimer.h>
-#include <linux/fs.h>
-#include <linux/pid_namespace.h>
-#include <asm/atomic.h>
-
-#define PERF_MAX_STACK_DEPTH		255
-
-struct perf_callchain_entry {
-	__u64				nr;
-	__u64				ip[PERF_MAX_STACK_DEPTH];
-};
-
-struct perf_raw_record {
-	u32				size;
-	void				*data;
-};
-
-struct task_struct;
-
-/**
- * struct hw_perf_counter - performance counter hardware details:
- */
-struct hw_perf_counter {
-#ifdef CONFIG_PERF_COUNTERS
-	union {
-		struct { /* hardware */
-			u64		config;
-			unsigned long	config_base;
-			unsigned long	counter_base;
-			int		idx;
-		};
-		union { /* software */
-			atomic64_t	count;
-			struct hrtimer	hrtimer;
-		};
-	};
-	atomic64_t			prev_count;
-	u64				sample_period;
-	u64				last_period;
-	atomic64_t			period_left;
-	u64				interrupts;
-
-	u64				freq_count;
-	u64				freq_interrupts;
-	u64				freq_stamp;
-#endif
-};
-
-struct perf_counter;
-
-/**
- * struct pmu - generic performance monitoring unit
- */
-struct pmu {
-	int (*enable)			(struct perf_counter *counter);
-	void (*disable)			(struct perf_counter *counter);
-	void (*read)			(struct perf_counter *counter);
-	void (*unthrottle)		(struct perf_counter *counter);
-};
-
-/**
- * enum perf_counter_active_state - the states of a counter
- */
-enum perf_counter_active_state {
-	PERF_COUNTER_STATE_ERROR	= -2,
-	PERF_COUNTER_STATE_OFF		= -1,
-	PERF_COUNTER_STATE_INACTIVE	=  0,
-	PERF_COUNTER_STATE_ACTIVE	=  1,
-};
-
-struct file;
-
-struct perf_mmap_data {
-	struct rcu_head			rcu_head;
-	int				nr_pages;	/* nr of data pages  */
-	int				writable;	/* are we writable   */
-	int				nr_locked;	/* nr pages mlocked  */
-
-	atomic_t			poll;		/* POLL_ for wakeups */
-	atomic_t			events;		/* event limit       */
-
-	atomic_long_t			head;		/* write position    */
-	atomic_long_t			done_head;	/* completed head    */
-
-	atomic_t			lock;		/* concurrent writes */
-	atomic_t			wakeup;		/* needs a wakeup    */
-	atomic_t			lost;		/* nr records lost   */
-
-	long				watermark;	/* wakeup watermark  */
-
-	struct perf_counter_mmap_page   *user_page;
-	void				*data_pages[0];
-};
-
-struct perf_pending_entry {
-	struct perf_pending_entry *next;
-	void (*func)(struct perf_pending_entry *);
-};
-
-/**
- * struct perf_counter - performance counter kernel representation:
- */
-struct perf_counter {
-#ifdef CONFIG_PERF_COUNTERS
-	struct list_head		group_entry;
-	struct list_head		event_entry;
-	struct list_head		sibling_list;
-	int				nr_siblings;
-	struct perf_counter		*group_leader;
-	struct perf_counter		*output;
-	const struct pmu		*pmu;
-
-	enum perf_counter_active_state	state;
-	atomic64_t			count;
-
-	/*
-	 * These are the total time in nanoseconds that the counter
-	 * has been enabled (i.e. eligible to run, and the task has
-	 * been scheduled in, if this is a per-task counter)
-	 * and running (scheduled onto the CPU), respectively.
-	 *
-	 * They are computed from tstamp_enabled, tstamp_running and
-	 * tstamp_stopped when the counter is in INACTIVE or ACTIVE state.
-	 */
-	u64				total_time_enabled;
-	u64				total_time_running;
-
-	/*
-	 * These are timestamps used for computing total_time_enabled
-	 * and total_time_running when the counter is in INACTIVE or
-	 * ACTIVE state, measured in nanoseconds from an arbitrary point
-	 * in time.
-	 * tstamp_enabled: the notional time when the counter was enabled
-	 * tstamp_running: the notional time when the counter was scheduled on
-	 * tstamp_stopped: in INACTIVE state, the notional time when the
-	 *	counter was scheduled off.
-	 */
-	u64				tstamp_enabled;
-	u64				tstamp_running;
-	u64				tstamp_stopped;
-
-	struct perf_counter_attr	attr;
-	struct hw_perf_counter		hw;
-
-	struct perf_counter_context	*ctx;
-	struct file			*filp;
-
-	/*
-	 * These accumulate total time (in nanoseconds) that children
-	 * counters have been enabled and running, respectively.
-	 */
-	atomic64_t			child_total_time_enabled;
-	atomic64_t			child_total_time_running;
-
-	/*
-	 * Protect attach/detach and child_list:
-	 */
-	struct mutex			child_mutex;
-	struct list_head		child_list;
-	struct perf_counter		*parent;
-
-	int				oncpu;
-	int				cpu;
-
-	struct list_head		owner_entry;
-	struct task_struct		*owner;
-
-	/* mmap bits */
-	struct mutex			mmap_mutex;
-	atomic_t			mmap_count;
-	struct perf_mmap_data		*data;
-
-	/* poll related */
-	wait_queue_head_t		waitq;
-	struct fasync_struct		*fasync;
-
-	/* delayed work for NMIs and such */
-	int				pending_wakeup;
-	int				pending_kill;
-	int				pending_disable;
-	struct perf_pending_entry	pending;
-
-	atomic_t			event_limit;
-
-	void (*destroy)(struct perf_counter *);
-	struct rcu_head			rcu_head;
-
-	struct pid_namespace		*ns;
-	u64				id;
-#endif
-};
-
-/**
- * struct perf_counter_context - counter context structure
- *
- * Used as a container for task counters and CPU counters as well:
- */
-struct perf_counter_context {
-	/*
-	 * Protect the states of the counters in the list,
-	 * nr_active, and the list:
-	 */
-	spinlock_t			lock;
-	/*
-	 * Protect the list of counters.  Locking either mutex or lock
-	 * is sufficient to ensure the list doesn't change; to change
-	 * the list you need to lock both the mutex and the spinlock.
-	 */
-	struct mutex			mutex;
-
-	struct list_head		group_list;
-	struct list_head		event_list;
-	int				nr_counters;
-	int				nr_active;
-	int				is_active;
-	int				nr_stat;
-	atomic_t			refcount;
-	struct task_struct		*task;
-
-	/*
-	 * Context clock, runs when context enabled.
-	 */
-	u64				time;
-	u64				timestamp;
-
-	/*
-	 * These fields let us detect when two contexts have both
-	 * been cloned (inherited) from a common ancestor.
-	 */
-	struct perf_counter_context	*parent_ctx;
-	u64				parent_gen;
-	u64				generation;
-	int				pin_count;
-	struct rcu_head			rcu_head;
-};
-
-/**
- * struct perf_counter_cpu_context - per cpu counter context structure
- */
-struct perf_cpu_context {
-	struct perf_counter_context	ctx;
-	struct perf_counter_context	*task_ctx;
-	int				active_oncpu;
-	int				max_pertask;
-	int				exclusive;
-
-	/*
-	 * Recursion avoidance:
-	 *
-	 * task, softirq, irq, nmi context
-	 */
-	int				recursion[4];
-};
-
-struct perf_output_handle {
-	struct perf_counter	*counter;
-	struct perf_mmap_data	*data;
-	unsigned long		head;
-	unsigned long		offset;
-	int			nmi;
-	int			sample;
-	int			locked;
-	unsigned long		flags;
-};
-
-#ifdef CONFIG_PERF_COUNTERS
-
-/*
- * Set by architecture code:
- */
-extern int perf_max_counters;
-
-extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter);
-
-extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
-extern void perf_counter_task_sched_out(struct task_struct *task,
-					struct task_struct *next, int cpu);
-extern void perf_counter_task_tick(struct task_struct *task, int cpu);
-extern int perf_counter_init_task(struct task_struct *child);
-extern void perf_counter_exit_task(struct task_struct *child);
-extern void perf_counter_free_task(struct task_struct *task);
-extern void set_perf_counter_pending(void);
-extern void perf_counter_do_pending(void);
-extern void perf_counter_print_debug(void);
-extern void __perf_disable(void);
-extern bool __perf_enable(void);
-extern void perf_disable(void);
-extern void perf_enable(void);
-extern int perf_counter_task_disable(void);
-extern int perf_counter_task_enable(void);
-extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_counter_context *ctx, int cpu);
-extern void perf_counter_update_userpage(struct perf_counter *counter);
-
-struct perf_sample_data {
-	u64				type;
-
-	u64				ip;
-	struct {
-		u32	pid;
-		u32	tid;
-	}				tid_entry;
-	u64				time;
-	u64				addr;
-	u64				id;
-	u64				stream_id;
-	struct {
-		u32	cpu;
-		u32	reserved;
-	}				cpu_entry;
-	u64				period;
-	struct perf_callchain_entry	*callchain;
-	struct perf_raw_record		*raw;
-};
-
-extern void perf_output_sample(struct perf_output_handle *handle,
-			       struct perf_event_header *header,
-			       struct perf_sample_data *data,
-			       struct perf_counter *counter);
-extern void perf_prepare_sample(struct perf_event_header *header,
-				struct perf_sample_data *data,
-				struct perf_counter *counter,
-				struct pt_regs *regs);
-
-extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
-				 struct perf_sample_data *data,
-				 struct pt_regs *regs);
-
-/*
- * Return 1 for a software counter, 0 for a hardware counter
- */
-static inline int is_software_counter(struct perf_counter *counter)
-{
-	return (counter->attr.type != PERF_TYPE_RAW) &&
-		(counter->attr.type != PERF_TYPE_HARDWARE) &&
-		(counter->attr.type != PERF_TYPE_HW_CACHE);
-}
-
-extern atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
-
-extern void __perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
-
-static inline void
-perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
-{
-	if (atomic_read(&perf_swcounter_enabled[event]))
-		__perf_swcounter_event(event, nr, nmi, regs, addr);
-}
-
-extern void __perf_counter_mmap(struct vm_area_struct *vma);
-
-static inline void perf_counter_mmap(struct vm_area_struct *vma)
-{
-	if (vma->vm_flags & VM_EXEC)
-		__perf_counter_mmap(vma);
-}
-
-extern void perf_counter_comm(struct task_struct *tsk);
-extern void perf_counter_fork(struct task_struct *tsk);
-
-extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
-
-extern int sysctl_perf_counter_paranoid;
-extern int sysctl_perf_counter_mlock;
-extern int sysctl_perf_counter_sample_rate;
-
-extern void perf_counter_init(void);
-extern void perf_tpcounter_event(int event_id, u64 addr, u64 count,
-				 void *record, int entry_size);
-
-#ifndef perf_misc_flags
-#define perf_misc_flags(regs)	(user_mode(regs) ? PERF_EVENT_MISC_USER : \
-				 PERF_EVENT_MISC_KERNEL)
-#define perf_instruction_pointer(regs)	instruction_pointer(regs)
-#endif
-
-extern int perf_output_begin(struct perf_output_handle *handle,
-			     struct perf_counter *counter, unsigned int size,
-			     int nmi, int sample);
-extern void perf_output_end(struct perf_output_handle *handle);
-extern void perf_output_copy(struct perf_output_handle *handle,
-			     const void *buf, unsigned int len);
-#else
-static inline void
-perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
-static inline void
-perf_counter_task_sched_out(struct task_struct *task,
-			    struct task_struct *next, int cpu)		{ }
-static inline void
-perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
-static inline int perf_counter_init_task(struct task_struct *child)	{ return 0; }
-static inline void perf_counter_exit_task(struct task_struct *child)	{ }
-static inline void perf_counter_free_task(struct task_struct *task)	{ }
-static inline void perf_counter_do_pending(void)			{ }
-static inline void perf_counter_print_debug(void)			{ }
-static inline void perf_disable(void)					{ }
-static inline void perf_enable(void)					{ }
-static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
-static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
-
-static inline void
-perf_swcounter_event(u32 event, u64 nr, int nmi,
-		     struct pt_regs *regs, u64 addr)			{ }
-
-static inline void perf_counter_mmap(struct vm_area_struct *vma)	{ }
-static inline void perf_counter_comm(struct task_struct *tsk)		{ }
-static inline void perf_counter_fork(struct task_struct *tsk)		{ }
-static inline void perf_counter_init(void)				{ }
-
-#endif
-
-#define perf_output_put(handle, x) \
-	perf_output_copy((handle), &(x), sizeof(x))
-
-#endif /* __KERNEL__ */
-#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
new file mode 100644
index 000000000000..ae9d9ed6df2a
--- /dev/null
+++ b/include/linux/perf_event.h
@@ -0,0 +1,858 @@
+/*
+ *  Performance events:
+ *
+ *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
+ *
+ *  Data type definitions, declarations, prototypes.
+ *
+ *    Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_PERF_EVENT_H
+#define _LINUX_PERF_EVENT_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/byteorder.h>
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * attr.type
+ */
+enum perf_type_id {
+	PERF_TYPE_HARDWARE			= 0,
+	PERF_TYPE_SOFTWARE			= 1,
+	PERF_TYPE_TRACEPOINT			= 2,
+	PERF_TYPE_HW_CACHE			= 3,
+	PERF_TYPE_RAW				= 4,
+
+	PERF_TYPE_MAX,				/* non-ABI */
+};
+
+/*
+ * Generalized performance event event_id types, used by the
+ * attr.event_id parameter of the sys_perf_event_open()
+ * syscall:
+ */
+enum perf_hw_id {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+
+	PERF_COUNT_HW_MAX,			/* non-ABI */
+};
+
+/*
+ * Generalized hardware cache events:
+ *
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
+ *       { read, write, prefetch } x
+ *       { accesses, misses }
+ */
+enum perf_hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D			= 0,
+	PERF_COUNT_HW_CACHE_L1I			= 1,
+	PERF_COUNT_HW_CACHE_LL			= 2,
+	PERF_COUNT_HW_CACHE_DTLB		= 3,
+	PERF_COUNT_HW_CACHE_ITLB		= 4,
+	PERF_COUNT_HW_CACHE_BPU			= 5,
+
+	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ		= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
+
+	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
+	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
+
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
+};
+
+/*
+ * Special "software" events provided by the kernel, even if the hardware
+ * does not support performance events. These events measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+	PERF_COUNT_SW_CPU_CLOCK			= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+
+	PERF_COUNT_SW_MAX,			/* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.sample_type to request information
+ * in the overflow packets.
+ */
+enum perf_event_sample_format {
+	PERF_SAMPLE_IP				= 1U << 0,
+	PERF_SAMPLE_TID				= 1U << 1,
+	PERF_SAMPLE_TIME			= 1U << 2,
+	PERF_SAMPLE_ADDR			= 1U << 3,
+	PERF_SAMPLE_READ			= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
+	PERF_SAMPLE_ID				= 1U << 6,
+	PERF_SAMPLE_CPU				= 1U << 7,
+	PERF_SAMPLE_PERIOD			= 1U << 8,
+	PERF_SAMPLE_STREAM_ID			= 1U << 9,
+	PERF_SAMPLE_RAW				= 1U << 10,
+
+	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
+};
+
+/*
+ * The format of the data returned by read() on a perf event fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ * 	{ u64		value;
+ * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ * 	  { u64		id;           } && PERF_FORMAT_ID
+ * 	} && !PERF_FORMAT_GROUP
+ *
+ * 	{ u64		nr;
+ * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ * 	  { u64		value;
+ * 	    { u64	id;           } && PERF_FORMAT_ID
+ * 	  }		cntr[nr];
+ * 	} && PERF_FORMAT_GROUP
+ * };
+ */
+enum perf_event_read_format {
+	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
+	PERF_FORMAT_ID				= 1U << 2,
+	PERF_FORMAT_GROUP			= 1U << 3,
+
+	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
+};
+
+#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
+
+/*
+ * Hardware event_id to monitor via a performance monitoring event:
+ */
+struct perf_event_attr {
+
+	/*
+	 * Major type: hardware/software/tracepoint/etc.
+	 */
+	__u32			type;
+
+	/*
+	 * Size of the attr structure, for fwd/bwd compat.
+	 */
+	__u32			size;
+
+	/*
+	 * Type specific configuration information.
+	 */
+	__u64			config;
+
+	union {
+		__u64		sample_period;
+		__u64		sample_freq;
+	};
+
+	__u64			sample_type;
+	__u64			read_format;
+
+	__u64			disabled       :  1, /* off by default        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
+				mmap           :  1, /* include mmap data     */
+				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
+				inherit_stat   :  1, /* per task counts       */
+				enable_on_exec :  1, /* next exec enables     */
+				task           :  1, /* trace fork/exit       */
+				watermark      :  1, /* wakeup_watermark      */
+
+				__reserved_1   : 49;
+
+	union {
+		__u32		wakeup_events;	  /* wakeup every n events */
+		__u32		wakeup_watermark; /* bytes before wakeup   */
+	};
+	__u32			__reserved_2;
+
+	__u64			__reserved_3;
+};
+
+/*
+ * Ioctls that can be done on a perf event fd:
+ */
+#define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE	_IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH	_IO ('$', 2)
+#define PERF_EVENT_IOC_RESET		_IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, u64)
+#define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
+
+enum perf_event_ioc_flags {
+	PERF_IOC_FLAG_GROUP		= 1U << 0,
+};
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_event_mmap_page {
+	__u32	version;		/* version number of this structure */
+	__u32	compat_version;		/* lowest version this is compat with */
+
+	/*
+	 * Bits needed to read the hw events in user-space.
+	 *
+	 *   u32 seq;
+	 *   s64 count;
+	 *
+	 *   do {
+	 *     seq = pc->lock;
+	 *
+	 *     barrier()
+	 *     if (pc->index) {
+	 *       count = pmc_read(pc->index - 1);
+	 *       count += pc->offset;
+	 *     } else
+	 *       goto regular_read;
+	 *
+	 *     barrier();
+	 *   } while (pc->lock != seq);
+	 *
+	 * NOTE: for obvious reason this only works on self-monitoring
+	 *       processes.
+	 */
+	__u32	lock;			/* seqlock for synchronization */
+	__u32	index;			/* hardware event identifier */
+	__s64	offset;			/* add to hardware event value */
+	__u64	time_enabled;		/* time event active */
+	__u64	time_running;		/* time event on cpu */
+
+		/*
+		 * Hole for extension of the self monitor capabilities
+		 */
+
+	__u64	__reserved[123];	/* align to 1k */
+
+	/*
+	 * Control data for the mmap() data buffer.
+	 *
+	 * User-space reading the @data_head value should issue an rmb(), on
+	 * SMP capable platforms, after reading this value -- see
+	 * perf_event_wakeup().
+	 *
+	 * When the mapping is PROT_WRITE the @data_tail value should be
+	 * written by userspace to reflect the last read data. In this case
+	 * the kernel will not over-write unread data.
+	 */
+	__u64   data_head;		/* head in the data section */
+	__u64	data_tail;		/* user-space written tail */
+};
+
+#define PERF_RECORD_MISC_CPUMODE_MASK		(3 << 0)
+#define PERF_RECORD_MISC_CPUMODE_UNKNOWN		(0 << 0)
+#define PERF_RECORD_MISC_KERNEL			(1 << 0)
+#define PERF_RECORD_MISC_USER			(2 << 0)
+#define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
+
+struct perf_event_header {
+	__u32	type;
+	__u16	misc;
+	__u16	size;
+};
+
+enum perf_event_type {
+
+	/*
+	 * The MMAP events record the PROT_EXEC mappings so that we can
+	 * correlate userspace IPs to code. They have the following structure:
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	char				filename[];
+	 * };
+	 */
+	PERF_RECORD_MMAP			= 1,
+
+	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u64				id;
+	 * 	u64				lost;
+	 * };
+	 */
+	PERF_RECORD_LOST			= 2,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	char				comm[];
+	 * };
+	 */
+	PERF_RECORD_COMM			= 3,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	u64				time;
+	 * };
+	 */
+	PERF_RECORD_EXIT			= 4,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				time;
+	 *	u64				id;
+	 *	u64				stream_id;
+	 * };
+	 */
+	PERF_RECORD_THROTTLE		= 5,
+	PERF_RECORD_UNTHROTTLE		= 6,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	{ u64				time;     } && PERF_SAMPLE_TIME
+	 * };
+	 */
+	PERF_RECORD_FORK			= 7,
+
+	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u32				pid, tid;
+	 *
+	 * 	struct read_format		values;
+	 * };
+	 */
+	PERF_RECORD_READ			= 8,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
+	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 *	{ u64			time;     } && PERF_SAMPLE_TIME
+	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
+	 *	{ u64			id;	  } && PERF_SAMPLE_ID
+	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 * 	{ u64			period;   } && PERF_SAMPLE_PERIOD
+	 *
+	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
+	 *
+	 *	{ u64			nr,
+	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+	 *
+	 * 	#
+	 * 	# The RAW record below is opaque data wrt the ABI
+	 * 	#
+	 * 	# That is, the ABI doesn't make any promises wrt to
+	 * 	# the stability of its content, it may vary depending
+	 * 	# on event_id, hardware, kernel version and phase of
+	 * 	# the moon.
+	 * 	#
+	 * 	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+	 * 	#
+	 *
+	 *	{ u32			size;
+	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
+	 * };
+	 */
+	PERF_RECORD_SAMPLE		= 9,
+
+	PERF_RECORD_MAX,			/* non-ABI */
+};
+
+enum perf_callchain_context {
+	PERF_CONTEXT_HV			= (__u64)-32,
+	PERF_CONTEXT_KERNEL		= (__u64)-128,
+	PERF_CONTEXT_USER		= (__u64)-512,
+
+	PERF_CONTEXT_GUEST		= (__u64)-2048,
+	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
+	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
+
+	PERF_CONTEXT_MAX		= (__u64)-4095,
+};
+
+#define PERF_FLAG_FD_NO_GROUP	(1U << 0)
+#define PERF_FLAG_FD_OUTPUT	(1U << 1)
+
+#ifdef __KERNEL__
+/*
+ * Kernel-internal data types and definitions:
+ */
+
+#ifdef CONFIG_PERF_EVENTS
+# include <asm/perf_event.h>
+#endif
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <linux/hrtimer.h>
+#include <linux/fs.h>
+#include <linux/pid_namespace.h>
+#include <asm/atomic.h>
+
+#define PERF_MAX_STACK_DEPTH		255
+
+struct perf_callchain_entry {
+	__u64				nr;
+	__u64				ip[PERF_MAX_STACK_DEPTH];
+};
+
+struct perf_raw_record {
+	u32				size;
+	void				*data;
+};
+
+struct task_struct;
+
+/**
+ * struct hw_perf_event - performance event hardware details:
+ */
+struct hw_perf_event {
+#ifdef CONFIG_PERF_EVENTS
+	union {
+		struct { /* hardware */
+			u64		config;
+			unsigned long	config_base;
+			unsigned long	event_base;
+			int		idx;
+		};
+		union { /* software */
+			atomic64_t	count;
+			struct hrtimer	hrtimer;
+		};
+	};
+	atomic64_t			prev_count;
+	u64				sample_period;
+	u64				last_period;
+	atomic64_t			period_left;
+	u64				interrupts;
+
+	u64				freq_count;
+	u64				freq_interrupts;
+	u64				freq_stamp;
+#endif
+};
+
+struct perf_event;
+
+/**
+ * struct pmu - generic performance monitoring unit
+ */
+struct pmu {
+	int (*enable)			(struct perf_event *event);
+	void (*disable)			(struct perf_event *event);
+	void (*read)			(struct perf_event *event);
+	void (*unthrottle)		(struct perf_event *event);
+};
+
+/**
+ * enum perf_event_active_state - the states of a event
+ */
+enum perf_event_active_state {
+	PERF_EVENT_STATE_ERROR	= -2,
+	PERF_EVENT_STATE_OFF		= -1,
+	PERF_EVENT_STATE_INACTIVE	=  0,
+	PERF_EVENT_STATE_ACTIVE	=  1,
+};
+
+struct file;
+
+struct perf_mmap_data {
+	struct rcu_head			rcu_head;
+	int				nr_pages;	/* nr of data pages  */
+	int				writable;	/* are we writable   */
+	int				nr_locked;	/* nr pages mlocked  */
+
+	atomic_t			poll;		/* POLL_ for wakeups */
+	atomic_t			events;		/* event_id limit       */
+
+	atomic_long_t			head;		/* write position    */
+	atomic_long_t			done_head;	/* completed head    */
+
+	atomic_t			lock;		/* concurrent writes */
+	atomic_t			wakeup;		/* needs a wakeup    */
+	atomic_t			lost;		/* nr records lost   */
+
+	long				watermark;	/* wakeup watermark  */
+
+	struct perf_event_mmap_page   *user_page;
+	void				*data_pages[0];
+};
+
+struct perf_pending_entry {
+	struct perf_pending_entry *next;
+	void (*func)(struct perf_pending_entry *);
+};
+
+/**
+ * struct perf_event - performance event kernel representation:
+ */
+struct perf_event {
+#ifdef CONFIG_PERF_EVENTS
+	struct list_head		group_entry;
+	struct list_head		event_entry;
+	struct list_head		sibling_list;
+	int				nr_siblings;
+	struct perf_event		*group_leader;
+	struct perf_event		*output;
+	const struct pmu		*pmu;
+
+	enum perf_event_active_state	state;
+	atomic64_t			count;
+
+	/*
+	 * These are the total time in nanoseconds that the event
+	 * has been enabled (i.e. eligible to run, and the task has
+	 * been scheduled in, if this is a per-task event)
+	 * and running (scheduled onto the CPU), respectively.
+	 *
+	 * They are computed from tstamp_enabled, tstamp_running and
+	 * tstamp_stopped when the event is in INACTIVE or ACTIVE state.
+	 */
+	u64				total_time_enabled;
+	u64				total_time_running;
+
+	/*
+	 * These are timestamps used for computing total_time_enabled
+	 * and total_time_running when the event is in INACTIVE or
+	 * ACTIVE state, measured in nanoseconds from an arbitrary point
+	 * in time.
+	 * tstamp_enabled: the notional time when the event was enabled
+	 * tstamp_running: the notional time when the event was scheduled on
+	 * tstamp_stopped: in INACTIVE state, the notional time when the
+	 *	event was scheduled off.
+	 */
+	u64				tstamp_enabled;
+	u64				tstamp_running;
+	u64				tstamp_stopped;
+
+	struct perf_event_attr	attr;
+	struct hw_perf_event		hw;
+
+	struct perf_event_context	*ctx;
+	struct file			*filp;
+
+	/*
+	 * These accumulate total time (in nanoseconds) that children
+	 * events have been enabled and running, respectively.
+	 */
+	atomic64_t			child_total_time_enabled;
+	atomic64_t			child_total_time_running;
+
+	/*
+	 * Protect attach/detach and child_list:
+	 */
+	struct mutex			child_mutex;
+	struct list_head		child_list;
+	struct perf_event		*parent;
+
+	int				oncpu;
+	int				cpu;
+
+	struct list_head		owner_entry;
+	struct task_struct		*owner;
+
+	/* mmap bits */
+	struct mutex			mmap_mutex;
+	atomic_t			mmap_count;
+	struct perf_mmap_data		*data;
+
+	/* poll related */
+	wait_queue_head_t		waitq;
+	struct fasync_struct		*fasync;
+
+	/* delayed work for NMIs and such */
+	int				pending_wakeup;
+	int				pending_kill;
+	int				pending_disable;
+	struct perf_pending_entry	pending;
+
+	atomic_t			event_limit;
+
+	void (*destroy)(struct perf_event *);
+	struct rcu_head			rcu_head;
+
+	struct pid_namespace		*ns;
+	u64				id;
+#endif
+};
+
+/**
+ * struct perf_event_context - event context structure
+ *
+ * Used as a container for task events and CPU events as well:
+ */
+struct perf_event_context {
+	/*
+	 * Protect the states of the events in the list,
+	 * nr_active, and the list:
+	 */
+	spinlock_t			lock;
+	/*
+	 * Protect the list of events.  Locking either mutex or lock
+	 * is sufficient to ensure the list doesn't change; to change
+	 * the list you need to lock both the mutex and the spinlock.
+	 */
+	struct mutex			mutex;
+
+	struct list_head		group_list;
+	struct list_head		event_list;
+	int				nr_events;
+	int				nr_active;
+	int				is_active;
+	int				nr_stat;
+	atomic_t			refcount;
+	struct task_struct		*task;
+
+	/*
+	 * Context clock, runs when context enabled.
+	 */
+	u64				time;
+	u64				timestamp;
+
+	/*
+	 * These fields let us detect when two contexts have both
+	 * been cloned (inherited) from a common ancestor.
+	 */
+	struct perf_event_context	*parent_ctx;
+	u64				parent_gen;
+	u64				generation;
+	int				pin_count;
+	struct rcu_head			rcu_head;
+};
+
+/**
+ * struct perf_event_cpu_context - per cpu event context structure
+ */
+struct perf_cpu_context {
+	struct perf_event_context	ctx;
+	struct perf_event_context	*task_ctx;
+	int				active_oncpu;
+	int				max_pertask;
+	int				exclusive;
+
+	/*
+	 * Recursion avoidance:
+	 *
+	 * task, softirq, irq, nmi context
+	 */
+	int				recursion[4];
+};
+
+struct perf_output_handle {
+	struct perf_event	*event;
+	struct perf_mmap_data	*data;
+	unsigned long		head;
+	unsigned long		offset;
+	int			nmi;
+	int			sample;
+	int			locked;
+	unsigned long		flags;
+};
+
+#ifdef CONFIG_PERF_EVENTS
+
+/*
+ * Set by architecture code:
+ */
+extern int perf_max_events;
+
+extern const struct pmu *hw_perf_event_init(struct perf_event *event);
+
+extern void perf_event_task_sched_in(struct task_struct *task, int cpu);
+extern void perf_event_task_sched_out(struct task_struct *task,
+					struct task_struct *next, int cpu);
+extern void perf_event_task_tick(struct task_struct *task, int cpu);
+extern int perf_event_init_task(struct task_struct *child);
+extern void perf_event_exit_task(struct task_struct *child);
+extern void perf_event_free_task(struct task_struct *task);
+extern void set_perf_event_pending(void);
+extern void perf_event_do_pending(void);
+extern void perf_event_print_debug(void);
+extern void __perf_disable(void);
+extern bool __perf_enable(void);
+extern void perf_disable(void);
+extern void perf_enable(void);
+extern int perf_event_task_disable(void);
+extern int perf_event_task_enable(void);
+extern int hw_perf_group_sched_in(struct perf_event *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx, int cpu);
+extern void perf_event_update_userpage(struct perf_event *event);
+
+struct perf_sample_data {
+	u64				type;
+
+	u64				ip;
+	struct {
+		u32	pid;
+		u32	tid;
+	}				tid_entry;
+	u64				time;
+	u64				addr;
+	u64				id;
+	u64				stream_id;
+	struct {
+		u32	cpu;
+		u32	reserved;
+	}				cpu_entry;
+	u64				period;
+	struct perf_callchain_entry	*callchain;
+	struct perf_raw_record		*raw;
+};
+
+extern void perf_output_sample(struct perf_output_handle *handle,
+			       struct perf_event_header *header,
+			       struct perf_sample_data *data,
+			       struct perf_event *event);
+extern void perf_prepare_sample(struct perf_event_header *header,
+				struct perf_sample_data *data,
+				struct perf_event *event,
+				struct pt_regs *regs);
+
+extern int perf_event_overflow(struct perf_event *event, int nmi,
+				 struct perf_sample_data *data,
+				 struct pt_regs *regs);
+
+/*
+ * Return 1 for a software event, 0 for a hardware event
+ */
+static inline int is_software_event(struct perf_event *event)
+{
+	return (event->attr.type != PERF_TYPE_RAW) &&
+		(event->attr.type != PERF_TYPE_HARDWARE) &&
+		(event->attr.type != PERF_TYPE_HW_CACHE);
+}
+
+extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+
+extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
+
+static inline void
+perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
+{
+	if (atomic_read(&perf_swevent_enabled[event_id]))
+		__perf_sw_event(event_id, nr, nmi, regs, addr);
+}
+
+extern void __perf_event_mmap(struct vm_area_struct *vma);
+
+static inline void perf_event_mmap(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_EXEC)
+		__perf_event_mmap(vma);
+}
+
+extern void perf_event_comm(struct task_struct *tsk);
+extern void perf_event_fork(struct task_struct *tsk);
+
+extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
+
+extern int sysctl_perf_event_paranoid;
+extern int sysctl_perf_event_mlock;
+extern int sysctl_perf_event_sample_rate;
+
+extern void perf_event_init(void);
+extern void perf_tp_event(int event_id, u64 addr, u64 count,
+				 void *record, int entry_size);
+
+#ifndef perf_misc_flags
+#define perf_misc_flags(regs)	(user_mode(regs) ? PERF_RECORD_MISC_USER : \
+				 PERF_RECORD_MISC_KERNEL)
+#define perf_instruction_pointer(regs)	instruction_pointer(regs)
+#endif
+
+extern int perf_output_begin(struct perf_output_handle *handle,
+			     struct perf_event *event, unsigned int size,
+			     int nmi, int sample);
+extern void perf_output_end(struct perf_output_handle *handle);
+extern void perf_output_copy(struct perf_output_handle *handle,
+			     const void *buf, unsigned int len);
+#else
+static inline void
+perf_event_task_sched_in(struct task_struct *task, int cpu)		{ }
+static inline void
+perf_event_task_sched_out(struct task_struct *task,
+			    struct task_struct *next, int cpu)		{ }
+static inline void
+perf_event_task_tick(struct task_struct *task, int cpu)		{ }
+static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
+static inline void perf_event_exit_task(struct task_struct *child)	{ }
+static inline void perf_event_free_task(struct task_struct *task)	{ }
+static inline void perf_event_do_pending(void)			{ }
+static inline void perf_event_print_debug(void)			{ }
+static inline void perf_disable(void)					{ }
+static inline void perf_enable(void)					{ }
+static inline int perf_event_task_disable(void)	{ return -EINVAL; }
+static inline int perf_event_task_enable(void)	{ return -EINVAL; }
+
+static inline void
+perf_sw_event(u32 event_id, u64 nr, int nmi,
+		     struct pt_regs *regs, u64 addr)			{ }
+
+static inline void perf_event_mmap(struct vm_area_struct *vma)	{ }
+static inline void perf_event_comm(struct task_struct *tsk)		{ }
+static inline void perf_event_fork(struct task_struct *tsk)		{ }
+static inline void perf_event_init(void)				{ }
+
+#endif
+
+#define perf_output_put(handle, x) \
+	perf_output_copy((handle), &(x), sizeof(x))
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_PERF_EVENT_H */
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index b00df4c79c63..07bff666e65b 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -85,7 +85,7 @@
 #define PR_SET_TIMERSLACK 29
 #define PR_GET_TIMERSLACK 30
 
-#define PR_TASK_PERF_COUNTERS_DISABLE		31
-#define PR_TASK_PERF_COUNTERS_ENABLE		32
+#define PR_TASK_PERF_EVENTS_DISABLE		31
+#define PR_TASK_PERF_EVENTS_ENABLE		32
 
 #endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8af3d249170e..8b265a8986d0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -100,7 +100,7 @@ struct robust_list_head;
 struct bio;
 struct fs_struct;
 struct bts_context;
-struct perf_counter_context;
+struct perf_event_context;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -701,7 +701,7 @@ struct user_struct {
 #endif
 #endif
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 	atomic_long_t locked_vm;
 #endif
 };
@@ -1449,10 +1449,10 @@ struct task_struct {
 	struct list_head pi_state_list;
 	struct futex_pi_state *pi_state_cache;
 #endif
-#ifdef CONFIG_PERF_COUNTERS
-	struct perf_counter_context *perf_counter_ctxp;
-	struct mutex perf_counter_mutex;
-	struct list_head perf_counter_list;
+#ifdef CONFIG_PERF_EVENTS
+	struct perf_event_context *perf_event_ctxp;
+	struct mutex perf_event_mutex;
+	struct list_head perf_event_list;
 #endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;	/* Protected by alloc_lock */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a8e37821cc60..02f19f9a76c6 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -55,7 +55,7 @@ struct compat_timeval;
 struct robust_list_head;
 struct getcpu_cache;
 struct old_linux_dirent;
-struct perf_counter_attr;
+struct perf_event_attr;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -885,7 +885,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int,
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 
-asmlinkage long sys_perf_counter_open(
-		struct perf_counter_attr __user *attr_uptr,
+asmlinkage long sys_perf_event_open(
+		struct perf_event_attr __user *attr_uptr,
 		pid_t pid, int cpu, int group_fd, unsigned long flags);
 #endif
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 72a3b437b829..ec91e78244f0 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -378,7 +378,7 @@ static inline int ftrace_get_offsets_##call(				\
 #ifdef CONFIG_EVENT_PROFILE
 
 /*
- * Generate the functions needed for tracepoint perf_counter support.
+ * Generate the functions needed for tracepoint perf_event support.
  *
  * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
  *
@@ -656,7 +656,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  * {
  *	struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
  *	struct ftrace_event_call *event_call = &event_<call>;
- *	extern void perf_tpcounter_event(int, u64, u64, void *, int);
+ *	extern void perf_tp_event(int, u64, u64, void *, int);
  *	struct ftrace_raw_##call *entry;
  *	u64 __addr = 0, __count = 1;
  *	unsigned long irq_flags;
@@ -691,7 +691,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *
  *		<assign>  <- affect our values
  *
- *		perf_tpcounter_event(event_call->id, __addr, __count, entry,
+ *		perf_tp_event(event_call->id, __addr, __count, entry,
  *			     __entry_size);  <- submit them to perf counter
  *	} while (0);
  *
@@ -712,7 +712,7 @@ static void ftrace_profile_##call(proto)				\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
 	struct ftrace_event_call *event_call = &event_##call;		\
-	extern void perf_tpcounter_event(int, u64, u64, void *, int);	\
+	extern void perf_tp_event(int, u64, u64, void *, int);	\
 	struct ftrace_raw_##call *entry;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
@@ -742,7 +742,7 @@ static void ftrace_profile_##call(proto)				\
 									\
 		{ assign; }						\
 									\
-		perf_tpcounter_event(event_call->id, __addr, __count, entry,\
+		perf_tp_event(event_call->id, __addr, __count, entry,\
 			     __entry_size);				\
 	} while (0);							\
 									\
diff --git a/init/Kconfig b/init/Kconfig
index 8e8b76d8a272..cfdf5c322806 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -915,17 +915,17 @@ config AIO
           by some high performance threaded applications. Disabling
           this option saves about 7k.
 
-config HAVE_PERF_COUNTERS
+config HAVE_PERF_EVENTS
 	bool
 	help
 	  See tools/perf/design.txt for details.
 
 menu "Performance Counters"
 
-config PERF_COUNTERS
+config PERF_EVENTS
 	bool "Kernel Performance Counters"
 	default y if PROFILING
-	depends on HAVE_PERF_COUNTERS
+	depends on HAVE_PERF_EVENTS
 	select ANON_INODES
 	help
 	  Enable kernel support for performance counter hardware.
@@ -947,7 +947,7 @@ config PERF_COUNTERS
 
 config EVENT_PROFILE
 	bool "Tracepoint profiling sources"
-	depends on PERF_COUNTERS && EVENT_TRACING
+	depends on PERF_EVENTS && EVENT_TRACING
 	default y
 	help
 	 Allow the use of tracepoints as software performance counters.
diff --git a/kernel/Makefile b/kernel/Makefile
index 3d9c7e27e3f9..e26a546eac44 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -96,7 +96,7 @@ obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_SLOW_WORK) += slow-work.o
-obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/exit.c b/kernel/exit.c
index ae5d8660ddff..e47ee8a06135 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -47,7 +47,7 @@
 #include <linux/tracehook.h>
 #include <linux/fs_struct.h>
 #include <linux/init_task.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <trace/events/sched.h>
 
 #include <asm/uaccess.h>
@@ -154,8 +154,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 {
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 
-#ifdef CONFIG_PERF_COUNTERS
-	WARN_ON_ONCE(tsk->perf_counter_ctxp);
+#ifdef CONFIG_PERF_EVENTS
+	WARN_ON_ONCE(tsk->perf_event_ctxp);
 #endif
 	trace_sched_process_free(tsk);
 	put_task_struct(tsk);
@@ -981,7 +981,7 @@ NORET_TYPE void do_exit(long code)
 	 * Flush inherited counters to the parent - before the parent
 	 * gets woken up by child-exit notifications.
 	 */
-	perf_counter_exit_task(tsk);
+	perf_event_exit_task(tsk);
 
 	exit_notify(tsk, group_dead);
 #ifdef CONFIG_NUMA
diff --git a/kernel/fork.c b/kernel/fork.c
index bfee931ee3fb..2cebfb23b0b8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -61,7 +61,7 @@
 #include <linux/blkdev.h>
 #include <linux/fs_struct.h>
 #include <linux/magic.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1078,7 +1078,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
 
-	retval = perf_counter_init_task(p);
+	retval = perf_event_init_task(p);
 	if (retval)
 		goto bad_fork_cleanup_policy;
 
@@ -1253,7 +1253,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
-	perf_counter_fork(p);
+	perf_event_fork(p);
 	return p;
 
 bad_fork_free_pid:
@@ -1280,7 +1280,7 @@ bad_fork_cleanup_semundo:
 bad_fork_cleanup_audit:
 	audit_free(p);
 bad_fork_cleanup_policy:
-	perf_counter_free_task(p);
+	perf_event_free_task(p);
 #ifdef CONFIG_NUMA
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
deleted file mode 100644
index 62de0db8092b..000000000000
--- a/kernel/perf_counter.c
+++ /dev/null
@@ -1,5000 +0,0 @@
-/*
- * Performance counter core code
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
- *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- *
- *  For licensing details see kernel-base/COPYING
- */
-
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <linux/file.h>
-#include <linux/poll.h>
-#include <linux/sysfs.h>
-#include <linux/dcache.h>
-#include <linux/percpu.h>
-#include <linux/ptrace.h>
-#include <linux/vmstat.h>
-#include <linux/hardirq.h>
-#include <linux/rculist.h>
-#include <linux/uaccess.h>
-#include <linux/syscalls.h>
-#include <linux/anon_inodes.h>
-#include <linux/kernel_stat.h>
-#include <linux/perf_counter.h>
-
-#include <asm/irq_regs.h>
-
-/*
- * Each CPU has a list of per CPU counters:
- */
-DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
-
-int perf_max_counters __read_mostly = 1;
-static int perf_reserved_percpu __read_mostly;
-static int perf_overcommit __read_mostly = 1;
-
-static atomic_t nr_counters __read_mostly;
-static atomic_t nr_mmap_counters __read_mostly;
-static atomic_t nr_comm_counters __read_mostly;
-static atomic_t nr_task_counters __read_mostly;
-
-/*
- * perf counter paranoia level:
- *  -1 - not paranoid at all
- *   0 - disallow raw tracepoint access for unpriv
- *   1 - disallow cpu counters for unpriv
- *   2 - disallow kernel profiling for unpriv
- */
-int sysctl_perf_counter_paranoid __read_mostly = 1;
-
-static inline bool perf_paranoid_tracepoint_raw(void)
-{
-	return sysctl_perf_counter_paranoid > -1;
-}
-
-static inline bool perf_paranoid_cpu(void)
-{
-	return sysctl_perf_counter_paranoid > 0;
-}
-
-static inline bool perf_paranoid_kernel(void)
-{
-	return sysctl_perf_counter_paranoid > 1;
-}
-
-int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
-
-/*
- * max perf counter sample rate
- */
-int sysctl_perf_counter_sample_rate __read_mostly = 100000;
-
-static atomic64_t perf_counter_id;
-
-/*
- * Lock for (sysadmin-configurable) counter reservations:
- */
-static DEFINE_SPINLOCK(perf_resource_lock);
-
-/*
- * Architecture provided APIs - weak aliases:
- */
-extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
-{
-	return NULL;
-}
-
-void __weak hw_perf_disable(void)		{ barrier(); }
-void __weak hw_perf_enable(void)		{ barrier(); }
-
-void __weak hw_perf_counter_setup(int cpu)	{ barrier(); }
-void __weak hw_perf_counter_setup_online(int cpu)	{ barrier(); }
-
-int __weak
-hw_perf_group_sched_in(struct perf_counter *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_counter_context *ctx, int cpu)
-{
-	return 0;
-}
-
-void __weak perf_counter_print_debug(void)	{ }
-
-static DEFINE_PER_CPU(int, perf_disable_count);
-
-void __perf_disable(void)
-{
-	__get_cpu_var(perf_disable_count)++;
-}
-
-bool __perf_enable(void)
-{
-	return !--__get_cpu_var(perf_disable_count);
-}
-
-void perf_disable(void)
-{
-	__perf_disable();
-	hw_perf_disable();
-}
-
-void perf_enable(void)
-{
-	if (__perf_enable())
-		hw_perf_enable();
-}
-
-static void get_ctx(struct perf_counter_context *ctx)
-{
-	WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
-}
-
-static void free_ctx(struct rcu_head *head)
-{
-	struct perf_counter_context *ctx;
-
-	ctx = container_of(head, struct perf_counter_context, rcu_head);
-	kfree(ctx);
-}
-
-static void put_ctx(struct perf_counter_context *ctx)
-{
-	if (atomic_dec_and_test(&ctx->refcount)) {
-		if (ctx->parent_ctx)
-			put_ctx(ctx->parent_ctx);
-		if (ctx->task)
-			put_task_struct(ctx->task);
-		call_rcu(&ctx->rcu_head, free_ctx);
-	}
-}
-
-static void unclone_ctx(struct perf_counter_context *ctx)
-{
-	if (ctx->parent_ctx) {
-		put_ctx(ctx->parent_ctx);
-		ctx->parent_ctx = NULL;
-	}
-}
-
-/*
- * If we inherit counters we want to return the parent counter id
- * to userspace.
- */
-static u64 primary_counter_id(struct perf_counter *counter)
-{
-	u64 id = counter->id;
-
-	if (counter->parent)
-		id = counter->parent->id;
-
-	return id;
-}
-
-/*
- * Get the perf_counter_context for a task and lock it.
- * This has to cope with with the fact that until it is locked,
- * the context could get moved to another task.
- */
-static struct perf_counter_context *
-perf_lock_task_context(struct task_struct *task, unsigned long *flags)
-{
-	struct perf_counter_context *ctx;
-
-	rcu_read_lock();
- retry:
-	ctx = rcu_dereference(task->perf_counter_ctxp);
-	if (ctx) {
-		/*
-		 * If this context is a clone of another, it might
-		 * get swapped for another underneath us by
-		 * perf_counter_task_sched_out, though the
-		 * rcu_read_lock() protects us from any context
-		 * getting freed.  Lock the context and check if it
-		 * got swapped before we could get the lock, and retry
-		 * if so.  If we locked the right context, then it
-		 * can't get swapped on us any more.
-		 */
-		spin_lock_irqsave(&ctx->lock, *flags);
-		if (ctx != rcu_dereference(task->perf_counter_ctxp)) {
-			spin_unlock_irqrestore(&ctx->lock, *flags);
-			goto retry;
-		}
-
-		if (!atomic_inc_not_zero(&ctx->refcount)) {
-			spin_unlock_irqrestore(&ctx->lock, *flags);
-			ctx = NULL;
-		}
-	}
-	rcu_read_unlock();
-	return ctx;
-}
-
-/*
- * Get the context for a task and increment its pin_count so it
- * can't get swapped to another task.  This also increments its
- * reference count so that the context can't get freed.
- */
-static struct perf_counter_context *perf_pin_task_context(struct task_struct *task)
-{
-	struct perf_counter_context *ctx;
-	unsigned long flags;
-
-	ctx = perf_lock_task_context(task, &flags);
-	if (ctx) {
-		++ctx->pin_count;
-		spin_unlock_irqrestore(&ctx->lock, flags);
-	}
-	return ctx;
-}
-
-static void perf_unpin_context(struct perf_counter_context *ctx)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ctx->lock, flags);
-	--ctx->pin_count;
-	spin_unlock_irqrestore(&ctx->lock, flags);
-	put_ctx(ctx);
-}
-
-/*
- * Add a counter from the lists for its context.
- * Must be called with ctx->mutex and ctx->lock held.
- */
-static void
-list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
-{
-	struct perf_counter *group_leader = counter->group_leader;
-
-	/*
-	 * Depending on whether it is a standalone or sibling counter,
-	 * add it straight to the context's counter list, or to the group
-	 * leader's sibling list:
-	 */
-	if (group_leader == counter)
-		list_add_tail(&counter->group_entry, &ctx->group_list);
-	else {
-		list_add_tail(&counter->group_entry, &group_leader->sibling_list);
-		group_leader->nr_siblings++;
-	}
-
-	list_add_rcu(&counter->event_entry, &ctx->event_list);
-	ctx->nr_counters++;
-	if (counter->attr.inherit_stat)
-		ctx->nr_stat++;
-}
-
-/*
- * Remove a counter from the lists for its context.
- * Must be called with ctx->mutex and ctx->lock held.
- */
-static void
-list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
-{
-	struct perf_counter *sibling, *tmp;
-
-	if (list_empty(&counter->group_entry))
-		return;
-	ctx->nr_counters--;
-	if (counter->attr.inherit_stat)
-		ctx->nr_stat--;
-
-	list_del_init(&counter->group_entry);
-	list_del_rcu(&counter->event_entry);
-
-	if (counter->group_leader != counter)
-		counter->group_leader->nr_siblings--;
-
-	/*
-	 * If this was a group counter with sibling counters then
-	 * upgrade the siblings to singleton counters by adding them
-	 * to the context list directly:
-	 */
-	list_for_each_entry_safe(sibling, tmp, &counter->sibling_list, group_entry) {
-
-		list_move_tail(&sibling->group_entry, &ctx->group_list);
-		sibling->group_leader = sibling;
-	}
-}
-
-static void
-counter_sched_out(struct perf_counter *counter,
-		  struct perf_cpu_context *cpuctx,
-		  struct perf_counter_context *ctx)
-{
-	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
-		return;
-
-	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	if (counter->pending_disable) {
-		counter->pending_disable = 0;
-		counter->state = PERF_COUNTER_STATE_OFF;
-	}
-	counter->tstamp_stopped = ctx->time;
-	counter->pmu->disable(counter);
-	counter->oncpu = -1;
-
-	if (!is_software_counter(counter))
-		cpuctx->active_oncpu--;
-	ctx->nr_active--;
-	if (counter->attr.exclusive || !cpuctx->active_oncpu)
-		cpuctx->exclusive = 0;
-}
-
-static void
-group_sched_out(struct perf_counter *group_counter,
-		struct perf_cpu_context *cpuctx,
-		struct perf_counter_context *ctx)
-{
-	struct perf_counter *counter;
-
-	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
-		return;
-
-	counter_sched_out(group_counter, cpuctx, ctx);
-
-	/*
-	 * Schedule out siblings (if any):
-	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, group_entry)
-		counter_sched_out(counter, cpuctx, ctx);
-
-	if (group_counter->attr.exclusive)
-		cpuctx->exclusive = 0;
-}
-
-/*
- * Cross CPU call to remove a performance counter
- *
- * We disable the counter on the hardware level first. After that we
- * remove it from the context list.
- */
-static void __perf_counter_remove_from_context(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter *counter = info;
-	struct perf_counter_context *ctx = counter->ctx;
-
-	/*
-	 * If this is a task context, we need to check whether it is
-	 * the current task context of this cpu. If not it has been
-	 * scheduled out before the smp call arrived.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return;
-
-	spin_lock(&ctx->lock);
-	/*
-	 * Protect the list operation against NMI by disabling the
-	 * counters on a global level.
-	 */
-	perf_disable();
-
-	counter_sched_out(counter, cpuctx, ctx);
-
-	list_del_counter(counter, ctx);
-
-	if (!ctx->task) {
-		/*
-		 * Allow more per task counters with respect to the
-		 * reservation:
-		 */
-		cpuctx->max_pertask =
-			min(perf_max_counters - ctx->nr_counters,
-			    perf_max_counters - perf_reserved_percpu);
-	}
-
-	perf_enable();
-	spin_unlock(&ctx->lock);
-}
-
-
-/*
- * Remove the counter from a task's (or a CPU's) list of counters.
- *
- * Must be called with ctx->mutex held.
- *
- * CPU counters are removed with a smp call. For task counters we only
- * call when the task is on a CPU.
- *
- * If counter->ctx is a cloned context, callers must make sure that
- * every task struct that counter->ctx->task could possibly point to
- * remains valid.  This is OK when called from perf_release since
- * that only calls us on the top-level context, which can't be a clone.
- * When called from perf_counter_exit_task, it's OK because the
- * context has been detached from its task.
- */
-static void perf_counter_remove_from_context(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		/*
-		 * Per cpu counters are removed via an smp call and
-		 * the removal is always sucessful.
-		 */
-		smp_call_function_single(counter->cpu,
-					 __perf_counter_remove_from_context,
-					 counter, 1);
-		return;
-	}
-
-retry:
-	task_oncpu_function_call(task, __perf_counter_remove_from_context,
-				 counter);
-
-	spin_lock_irq(&ctx->lock);
-	/*
-	 * If the context is active we need to retry the smp call.
-	 */
-	if (ctx->nr_active && !list_empty(&counter->group_entry)) {
-		spin_unlock_irq(&ctx->lock);
-		goto retry;
-	}
-
-	/*
-	 * The lock prevents that this context is scheduled in so we
-	 * can remove the counter safely, if the call above did not
-	 * succeed.
-	 */
-	if (!list_empty(&counter->group_entry)) {
-		list_del_counter(counter, ctx);
-	}
-	spin_unlock_irq(&ctx->lock);
-}
-
-static inline u64 perf_clock(void)
-{
-	return cpu_clock(smp_processor_id());
-}
-
-/*
- * Update the record of the current time in a context.
- */
-static void update_context_time(struct perf_counter_context *ctx)
-{
-	u64 now = perf_clock();
-
-	ctx->time += now - ctx->timestamp;
-	ctx->timestamp = now;
-}
-
-/*
- * Update the total_time_enabled and total_time_running fields for a counter.
- */
-static void update_counter_times(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	u64 run_end;
-
-	if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
-	    counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE)
-		return;
-
-	counter->total_time_enabled = ctx->time - counter->tstamp_enabled;
-
-	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
-		run_end = counter->tstamp_stopped;
-	else
-		run_end = ctx->time;
-
-	counter->total_time_running = run_end - counter->tstamp_running;
-}
-
-/*
- * Update total_time_enabled and total_time_running for all counters in a group.
- */
-static void update_group_times(struct perf_counter *leader)
-{
-	struct perf_counter *counter;
-
-	update_counter_times(leader);
-	list_for_each_entry(counter, &leader->sibling_list, group_entry)
-		update_counter_times(counter);
-}
-
-/*
- * Cross CPU call to disable a performance counter
- */
-static void __perf_counter_disable(void *info)
-{
-	struct perf_counter *counter = info;
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter_context *ctx = counter->ctx;
-
-	/*
-	 * If this is a per-task counter, need to check whether this
-	 * counter's task is the current task on this cpu.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return;
-
-	spin_lock(&ctx->lock);
-
-	/*
-	 * If the counter is on, turn it off.
-	 * If it is in error state, leave it in error state.
-	 */
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
-		update_context_time(ctx);
-		update_group_times(counter);
-		if (counter == counter->group_leader)
-			group_sched_out(counter, cpuctx, ctx);
-		else
-			counter_sched_out(counter, cpuctx, ctx);
-		counter->state = PERF_COUNTER_STATE_OFF;
-	}
-
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Disable a counter.
- *
- * If counter->ctx is a cloned context, callers must make sure that
- * every task struct that counter->ctx->task could possibly point to
- * remains valid.  This condition is satisifed when called through
- * perf_counter_for_each_child or perf_counter_for_each because they
- * hold the top-level counter's child_mutex, so any descendant that
- * goes to exit will block in sync_child_counter.
- * When called from perf_pending_counter it's OK because counter->ctx
- * is the current context on this CPU and preemption is disabled,
- * hence we can't get into perf_counter_task_sched_out for this context.
- */
-static void perf_counter_disable(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		/*
-		 * Disable the counter on the cpu that it's on
-		 */
-		smp_call_function_single(counter->cpu, __perf_counter_disable,
-					 counter, 1);
-		return;
-	}
-
- retry:
-	task_oncpu_function_call(task, __perf_counter_disable, counter);
-
-	spin_lock_irq(&ctx->lock);
-	/*
-	 * If the counter is still active, we need to retry the cross-call.
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
-		spin_unlock_irq(&ctx->lock);
-		goto retry;
-	}
-
-	/*
-	 * Since we have the lock this context can't be scheduled
-	 * in, so we can change the state safely.
-	 */
-	if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
-		update_group_times(counter);
-		counter->state = PERF_COUNTER_STATE_OFF;
-	}
-
-	spin_unlock_irq(&ctx->lock);
-}
-
-static int
-counter_sched_in(struct perf_counter *counter,
-		 struct perf_cpu_context *cpuctx,
-		 struct perf_counter_context *ctx,
-		 int cpu)
-{
-	if (counter->state <= PERF_COUNTER_STATE_OFF)
-		return 0;
-
-	counter->state = PERF_COUNTER_STATE_ACTIVE;
-	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
-	/*
-	 * The new state must be visible before we turn it on in the hardware:
-	 */
-	smp_wmb();
-
-	if (counter->pmu->enable(counter)) {
-		counter->state = PERF_COUNTER_STATE_INACTIVE;
-		counter->oncpu = -1;
-		return -EAGAIN;
-	}
-
-	counter->tstamp_running += ctx->time - counter->tstamp_stopped;
-
-	if (!is_software_counter(counter))
-		cpuctx->active_oncpu++;
-	ctx->nr_active++;
-
-	if (counter->attr.exclusive)
-		cpuctx->exclusive = 1;
-
-	return 0;
-}
-
-static int
-group_sched_in(struct perf_counter *group_counter,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_counter_context *ctx,
-	       int cpu)
-{
-	struct perf_counter *counter, *partial_group;
-	int ret;
-
-	if (group_counter->state == PERF_COUNTER_STATE_OFF)
-		return 0;
-
-	ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
-	if (ret)
-		return ret < 0 ? ret : 0;
-
-	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
-		return -EAGAIN;
-
-	/*
-	 * Schedule in siblings as one group (if any):
-	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, group_entry) {
-		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
-			partial_group = counter;
-			goto group_error;
-		}
-	}
-
-	return 0;
-
-group_error:
-	/*
-	 * Groups can be scheduled in as one unit only, so undo any
-	 * partial group before returning:
-	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, group_entry) {
-		if (counter == partial_group)
-			break;
-		counter_sched_out(counter, cpuctx, ctx);
-	}
-	counter_sched_out(group_counter, cpuctx, ctx);
-
-	return -EAGAIN;
-}
-
-/*
- * Return 1 for a group consisting entirely of software counters,
- * 0 if the group contains any hardware counters.
- */
-static int is_software_only_group(struct perf_counter *leader)
-{
-	struct perf_counter *counter;
-
-	if (!is_software_counter(leader))
-		return 0;
-
-	list_for_each_entry(counter, &leader->sibling_list, group_entry)
-		if (!is_software_counter(counter))
-			return 0;
-
-	return 1;
-}
-
-/*
- * Work out whether we can put this counter group on the CPU now.
- */
-static int group_can_go_on(struct perf_counter *counter,
-			   struct perf_cpu_context *cpuctx,
-			   int can_add_hw)
-{
-	/*
-	 * Groups consisting entirely of software counters can always go on.
-	 */
-	if (is_software_only_group(counter))
-		return 1;
-	/*
-	 * If an exclusive group is already on, no other hardware
-	 * counters can go on.
-	 */
-	if (cpuctx->exclusive)
-		return 0;
-	/*
-	 * If this group is exclusive and there are already
-	 * counters on the CPU, it can't go on.
-	 */
-	if (counter->attr.exclusive && cpuctx->active_oncpu)
-		return 0;
-	/*
-	 * Otherwise, try to add it if all previous groups were able
-	 * to go on.
-	 */
-	return can_add_hw;
-}
-
-static void add_counter_to_ctx(struct perf_counter *counter,
-			       struct perf_counter_context *ctx)
-{
-	list_add_counter(counter, ctx);
-	counter->tstamp_enabled = ctx->time;
-	counter->tstamp_running = ctx->time;
-	counter->tstamp_stopped = ctx->time;
-}
-
-/*
- * Cross CPU call to install and enable a performance counter
- *
- * Must be called with ctx->mutex held
- */
-static void __perf_install_in_context(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter *counter = info;
-	struct perf_counter_context *ctx = counter->ctx;
-	struct perf_counter *leader = counter->group_leader;
-	int cpu = smp_processor_id();
-	int err;
-
-	/*
-	 * If this is a task context, we need to check whether it is
-	 * the current task context of this cpu. If not it has been
-	 * scheduled out before the smp call arrived.
-	 * Or possibly this is the right context but it isn't
-	 * on this cpu because it had no counters.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx) {
-		if (cpuctx->task_ctx || ctx->task != current)
-			return;
-		cpuctx->task_ctx = ctx;
-	}
-
-	spin_lock(&ctx->lock);
-	ctx->is_active = 1;
-	update_context_time(ctx);
-
-	/*
-	 * Protect the list operation against NMI by disabling the
-	 * counters on a global level. NOP for non NMI based counters.
-	 */
-	perf_disable();
-
-	add_counter_to_ctx(counter, ctx);
-
-	/*
-	 * Don't put the counter on if it is disabled or if
-	 * it is in a group and the group isn't on.
-	 */
-	if (counter->state != PERF_COUNTER_STATE_INACTIVE ||
-	    (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE))
-		goto unlock;
-
-	/*
-	 * An exclusive counter can't go on if there are already active
-	 * hardware counters, and no hardware counter can go on if there
-	 * is already an exclusive counter on.
-	 */
-	if (!group_can_go_on(counter, cpuctx, 1))
-		err = -EEXIST;
-	else
-		err = counter_sched_in(counter, cpuctx, ctx, cpu);
-
-	if (err) {
-		/*
-		 * This counter couldn't go on.  If it is in a group
-		 * then we have to pull the whole group off.
-		 * If the counter group is pinned then put it in error state.
-		 */
-		if (leader != counter)
-			group_sched_out(leader, cpuctx, ctx);
-		if (leader->attr.pinned) {
-			update_group_times(leader);
-			leader->state = PERF_COUNTER_STATE_ERROR;
-		}
-	}
-
-	if (!err && !ctx->task && cpuctx->max_pertask)
-		cpuctx->max_pertask--;
-
- unlock:
-	perf_enable();
-
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Attach a performance counter to a context
- *
- * First we add the counter to the list with the hardware enable bit
- * in counter->hw_config cleared.
- *
- * If the counter is attached to a task which is on a CPU we use a smp
- * call to enable it in the task context. The task might have been
- * scheduled away, but we check this in the smp call again.
- *
- * Must be called with ctx->mutex held.
- */
-static void
-perf_install_in_context(struct perf_counter_context *ctx,
-			struct perf_counter *counter,
-			int cpu)
-{
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		/*
-		 * Per cpu counters are installed via an smp call and
-		 * the install is always sucessful.
-		 */
-		smp_call_function_single(cpu, __perf_install_in_context,
-					 counter, 1);
-		return;
-	}
-
-retry:
-	task_oncpu_function_call(task, __perf_install_in_context,
-				 counter);
-
-	spin_lock_irq(&ctx->lock);
-	/*
-	 * we need to retry the smp call.
-	 */
-	if (ctx->is_active && list_empty(&counter->group_entry)) {
-		spin_unlock_irq(&ctx->lock);
-		goto retry;
-	}
-
-	/*
-	 * The lock prevents that this context is scheduled in so we
-	 * can add the counter safely, if it the call above did not
-	 * succeed.
-	 */
-	if (list_empty(&counter->group_entry))
-		add_counter_to_ctx(counter, ctx);
-	spin_unlock_irq(&ctx->lock);
-}
-
-/*
- * Put a counter into inactive state and update time fields.
- * Enabling the leader of a group effectively enables all
- * the group members that aren't explicitly disabled, so we
- * have to update their ->tstamp_enabled also.
- * Note: this works for group members as well as group leaders
- * since the non-leader members' sibling_lists will be empty.
- */
-static void __perf_counter_mark_enabled(struct perf_counter *counter,
-					struct perf_counter_context *ctx)
-{
-	struct perf_counter *sub;
-
-	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
-	list_for_each_entry(sub, &counter->sibling_list, group_entry)
-		if (sub->state >= PERF_COUNTER_STATE_INACTIVE)
-			sub->tstamp_enabled =
-				ctx->time - sub->total_time_enabled;
-}
-
-/*
- * Cross CPU call to enable a performance counter
- */
-static void __perf_counter_enable(void *info)
-{
-	struct perf_counter *counter = info;
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter_context *ctx = counter->ctx;
-	struct perf_counter *leader = counter->group_leader;
-	int err;
-
-	/*
-	 * If this is a per-task counter, need to check whether this
-	 * counter's task is the current task on this cpu.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx) {
-		if (cpuctx->task_ctx || ctx->task != current)
-			return;
-		cpuctx->task_ctx = ctx;
-	}
-
-	spin_lock(&ctx->lock);
-	ctx->is_active = 1;
-	update_context_time(ctx);
-
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		goto unlock;
-	__perf_counter_mark_enabled(counter, ctx);
-
-	/*
-	 * If the counter is in a group and isn't the group leader,
-	 * then don't put it on unless the group is on.
-	 */
-	if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)
-		goto unlock;
-
-	if (!group_can_go_on(counter, cpuctx, 1)) {
-		err = -EEXIST;
-	} else {
-		perf_disable();
-		if (counter == leader)
-			err = group_sched_in(counter, cpuctx, ctx,
-					     smp_processor_id());
-		else
-			err = counter_sched_in(counter, cpuctx, ctx,
-					       smp_processor_id());
-		perf_enable();
-	}
-
-	if (err) {
-		/*
-		 * If this counter can't go on and it's part of a
-		 * group, then the whole group has to come off.
-		 */
-		if (leader != counter)
-			group_sched_out(leader, cpuctx, ctx);
-		if (leader->attr.pinned) {
-			update_group_times(leader);
-			leader->state = PERF_COUNTER_STATE_ERROR;
-		}
-	}
-
- unlock:
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Enable a counter.
- *
- * If counter->ctx is a cloned context, callers must make sure that
- * every task struct that counter->ctx->task could possibly point to
- * remains valid.  This condition is satisfied when called through
- * perf_counter_for_each_child or perf_counter_for_each as described
- * for perf_counter_disable.
- */
-static void perf_counter_enable(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		/*
-		 * Enable the counter on the cpu that it's on
-		 */
-		smp_call_function_single(counter->cpu, __perf_counter_enable,
-					 counter, 1);
-		return;
-	}
-
-	spin_lock_irq(&ctx->lock);
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		goto out;
-
-	/*
-	 * If the counter is in error state, clear that first.
-	 * That way, if we see the counter in error state below, we
-	 * know that it has gone back into error state, as distinct
-	 * from the task having been scheduled away before the
-	 * cross-call arrived.
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ERROR)
-		counter->state = PERF_COUNTER_STATE_OFF;
-
- retry:
-	spin_unlock_irq(&ctx->lock);
-	task_oncpu_function_call(task, __perf_counter_enable, counter);
-
-	spin_lock_irq(&ctx->lock);
-
-	/*
-	 * If the context is active and the counter is still off,
-	 * we need to retry the cross-call.
-	 */
-	if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF)
-		goto retry;
-
-	/*
-	 * Since we have the lock this context can't be scheduled
-	 * in, so we can change the state safely.
-	 */
-	if (counter->state == PERF_COUNTER_STATE_OFF)
-		__perf_counter_mark_enabled(counter, ctx);
-
- out:
-	spin_unlock_irq(&ctx->lock);
-}
-
-static int perf_counter_refresh(struct perf_counter *counter, int refresh)
-{
-	/*
-	 * not supported on inherited counters
-	 */
-	if (counter->attr.inherit)
-		return -EINVAL;
-
-	atomic_add(refresh, &counter->event_limit);
-	perf_counter_enable(counter);
-
-	return 0;
-}
-
-void __perf_counter_sched_out(struct perf_counter_context *ctx,
-			      struct perf_cpu_context *cpuctx)
-{
-	struct perf_counter *counter;
-
-	spin_lock(&ctx->lock);
-	ctx->is_active = 0;
-	if (likely(!ctx->nr_counters))
-		goto out;
-	update_context_time(ctx);
-
-	perf_disable();
-	if (ctx->nr_active) {
-		list_for_each_entry(counter, &ctx->group_list, group_entry) {
-			if (counter != counter->group_leader)
-				counter_sched_out(counter, cpuctx, ctx);
-			else
-				group_sched_out(counter, cpuctx, ctx);
-		}
-	}
-	perf_enable();
- out:
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Test whether two contexts are equivalent, i.e. whether they
- * have both been cloned from the same version of the same context
- * and they both have the same number of enabled counters.
- * If the number of enabled counters is the same, then the set
- * of enabled counters should be the same, because these are both
- * inherited contexts, therefore we can't access individual counters
- * in them directly with an fd; we can only enable/disable all
- * counters via prctl, or enable/disable all counters in a family
- * via ioctl, which will have the same effect on both contexts.
- */
-static int context_equiv(struct perf_counter_context *ctx1,
-			 struct perf_counter_context *ctx2)
-{
-	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
-		&& ctx1->parent_gen == ctx2->parent_gen
-		&& !ctx1->pin_count && !ctx2->pin_count;
-}
-
-static void __perf_counter_read(void *counter);
-
-static void __perf_counter_sync_stat(struct perf_counter *counter,
-				     struct perf_counter *next_counter)
-{
-	u64 value;
-
-	if (!counter->attr.inherit_stat)
-		return;
-
-	/*
-	 * Update the counter value, we cannot use perf_counter_read()
-	 * because we're in the middle of a context switch and have IRQs
-	 * disabled, which upsets smp_call_function_single(), however
-	 * we know the counter must be on the current CPU, therefore we
-	 * don't need to use it.
-	 */
-	switch (counter->state) {
-	case PERF_COUNTER_STATE_ACTIVE:
-		__perf_counter_read(counter);
-		break;
-
-	case PERF_COUNTER_STATE_INACTIVE:
-		update_counter_times(counter);
-		break;
-
-	default:
-		break;
-	}
-
-	/*
-	 * In order to keep per-task stats reliable we need to flip the counter
-	 * values when we flip the contexts.
-	 */
-	value = atomic64_read(&next_counter->count);
-	value = atomic64_xchg(&counter->count, value);
-	atomic64_set(&next_counter->count, value);
-
-	swap(counter->total_time_enabled, next_counter->total_time_enabled);
-	swap(counter->total_time_running, next_counter->total_time_running);
-
-	/*
-	 * Since we swizzled the values, update the user visible data too.
-	 */
-	perf_counter_update_userpage(counter);
-	perf_counter_update_userpage(next_counter);
-}
-
-#define list_next_entry(pos, member) \
-	list_entry(pos->member.next, typeof(*pos), member)
-
-static void perf_counter_sync_stat(struct perf_counter_context *ctx,
-				   struct perf_counter_context *next_ctx)
-{
-	struct perf_counter *counter, *next_counter;
-
-	if (!ctx->nr_stat)
-		return;
-
-	counter = list_first_entry(&ctx->event_list,
-				   struct perf_counter, event_entry);
-
-	next_counter = list_first_entry(&next_ctx->event_list,
-					struct perf_counter, event_entry);
-
-	while (&counter->event_entry != &ctx->event_list &&
-	       &next_counter->event_entry != &next_ctx->event_list) {
-
-		__perf_counter_sync_stat(counter, next_counter);
-
-		counter = list_next_entry(counter, event_entry);
-		next_counter = list_next_entry(next_counter, event_entry);
-	}
-}
-
-/*
- * Called from scheduler to remove the counters of the current task,
- * with interrupts disabled.
- *
- * We stop each counter and update the counter value in counter->count.
- *
- * This does not protect us against NMI, but disable()
- * sets the disabled bit in the control field of counter _before_
- * accessing the counter control register. If a NMI hits, then it will
- * not restart the counter.
- */
-void perf_counter_task_sched_out(struct task_struct *task,
-				 struct task_struct *next, int cpu)
-{
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = task->perf_counter_ctxp;
-	struct perf_counter_context *next_ctx;
-	struct perf_counter_context *parent;
-	struct pt_regs *regs;
-	int do_switch = 1;
-
-	regs = task_pt_regs(task);
-	perf_swcounter_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
-
-	if (likely(!ctx || !cpuctx->task_ctx))
-		return;
-
-	update_context_time(ctx);
-
-	rcu_read_lock();
-	parent = rcu_dereference(ctx->parent_ctx);
-	next_ctx = next->perf_counter_ctxp;
-	if (parent && next_ctx &&
-	    rcu_dereference(next_ctx->parent_ctx) == parent) {
-		/*
-		 * Looks like the two contexts are clones, so we might be
-		 * able to optimize the context switch.  We lock both
-		 * contexts and check that they are clones under the
-		 * lock (including re-checking that neither has been
-		 * uncloned in the meantime).  It doesn't matter which
-		 * order we take the locks because no other cpu could
-		 * be trying to lock both of these tasks.
-		 */
-		spin_lock(&ctx->lock);
-		spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
-		if (context_equiv(ctx, next_ctx)) {
-			/*
-			 * XXX do we need a memory barrier of sorts
-			 * wrt to rcu_dereference() of perf_counter_ctxp
-			 */
-			task->perf_counter_ctxp = next_ctx;
-			next->perf_counter_ctxp = ctx;
-			ctx->task = next;
-			next_ctx->task = task;
-			do_switch = 0;
-
-			perf_counter_sync_stat(ctx, next_ctx);
-		}
-		spin_unlock(&next_ctx->lock);
-		spin_unlock(&ctx->lock);
-	}
-	rcu_read_unlock();
-
-	if (do_switch) {
-		__perf_counter_sched_out(ctx, cpuctx);
-		cpuctx->task_ctx = NULL;
-	}
-}
-
-/*
- * Called with IRQs disabled
- */
-static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-
-	if (!cpuctx->task_ctx)
-		return;
-
-	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
-		return;
-
-	__perf_counter_sched_out(ctx, cpuctx);
-	cpuctx->task_ctx = NULL;
-}
-
-/*
- * Called with IRQs disabled
- */
-static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
-{
-	__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
-}
-
-static void
-__perf_counter_sched_in(struct perf_counter_context *ctx,
-			struct perf_cpu_context *cpuctx, int cpu)
-{
-	struct perf_counter *counter;
-	int can_add_hw = 1;
-
-	spin_lock(&ctx->lock);
-	ctx->is_active = 1;
-	if (likely(!ctx->nr_counters))
-		goto out;
-
-	ctx->timestamp = perf_clock();
-
-	perf_disable();
-
-	/*
-	 * First go through the list and put on any pinned groups
-	 * in order to give them the best chance of going on.
-	 */
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		if (counter->state <= PERF_COUNTER_STATE_OFF ||
-		    !counter->attr.pinned)
-			continue;
-		if (counter->cpu != -1 && counter->cpu != cpu)
-			continue;
-
-		if (counter != counter->group_leader)
-			counter_sched_in(counter, cpuctx, ctx, cpu);
-		else {
-			if (group_can_go_on(counter, cpuctx, 1))
-				group_sched_in(counter, cpuctx, ctx, cpu);
-		}
-
-		/*
-		 * If this pinned group hasn't been scheduled,
-		 * put it in error state.
-		 */
-		if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
-			update_group_times(counter);
-			counter->state = PERF_COUNTER_STATE_ERROR;
-		}
-	}
-
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		/*
-		 * Ignore counters in OFF or ERROR state, and
-		 * ignore pinned counters since we did them already.
-		 */
-		if (counter->state <= PERF_COUNTER_STATE_OFF ||
-		    counter->attr.pinned)
-			continue;
-
-		/*
-		 * Listen to the 'cpu' scheduling filter constraint
-		 * of counters:
-		 */
-		if (counter->cpu != -1 && counter->cpu != cpu)
-			continue;
-
-		if (counter != counter->group_leader) {
-			if (counter_sched_in(counter, cpuctx, ctx, cpu))
-				can_add_hw = 0;
-		} else {
-			if (group_can_go_on(counter, cpuctx, can_add_hw)) {
-				if (group_sched_in(counter, cpuctx, ctx, cpu))
-					can_add_hw = 0;
-			}
-		}
-	}
-	perf_enable();
- out:
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Called from scheduler to add the counters of the current task
- * with interrupts disabled.
- *
- * We restore the counter value and then enable it.
- *
- * This does not protect us against NMI, but enable()
- * sets the enabled bit in the control field of counter _before_
- * accessing the counter control register. If a NMI hits, then it will
- * keep the counter running.
- */
-void perf_counter_task_sched_in(struct task_struct *task, int cpu)
-{
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = task->perf_counter_ctxp;
-
-	if (likely(!ctx))
-		return;
-	if (cpuctx->task_ctx == ctx)
-		return;
-	__perf_counter_sched_in(ctx, cpuctx, cpu);
-	cpuctx->task_ctx = ctx;
-}
-
-static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
-{
-	struct perf_counter_context *ctx = &cpuctx->ctx;
-
-	__perf_counter_sched_in(ctx, cpuctx, cpu);
-}
-
-#define MAX_INTERRUPTS (~0ULL)
-
-static void perf_log_throttle(struct perf_counter *counter, int enable);
-
-static void perf_adjust_period(struct perf_counter *counter, u64 events)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	u64 period, sample_period;
-	s64 delta;
-
-	events *= hwc->sample_period;
-	period = div64_u64(events, counter->attr.sample_freq);
-
-	delta = (s64)(period - hwc->sample_period);
-	delta = (delta + 7) / 8; /* low pass filter */
-
-	sample_period = hwc->sample_period + delta;
-
-	if (!sample_period)
-		sample_period = 1;
-
-	hwc->sample_period = sample_period;
-}
-
-static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
-{
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
-	u64 interrupts, freq;
-
-	spin_lock(&ctx->lock);
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
-			continue;
-
-		hwc = &counter->hw;
-
-		interrupts = hwc->interrupts;
-		hwc->interrupts = 0;
-
-		/*
-		 * unthrottle counters on the tick
-		 */
-		if (interrupts == MAX_INTERRUPTS) {
-			perf_log_throttle(counter, 1);
-			counter->pmu->unthrottle(counter);
-			interrupts = 2*sysctl_perf_counter_sample_rate/HZ;
-		}
-
-		if (!counter->attr.freq || !counter->attr.sample_freq)
-			continue;
-
-		/*
-		 * if the specified freq < HZ then we need to skip ticks
-		 */
-		if (counter->attr.sample_freq < HZ) {
-			freq = counter->attr.sample_freq;
-
-			hwc->freq_count += freq;
-			hwc->freq_interrupts += interrupts;
-
-			if (hwc->freq_count < HZ)
-				continue;
-
-			interrupts = hwc->freq_interrupts;
-			hwc->freq_interrupts = 0;
-			hwc->freq_count -= HZ;
-		} else
-			freq = HZ;
-
-		perf_adjust_period(counter, freq * interrupts);
-
-		/*
-		 * In order to avoid being stalled by an (accidental) huge
-		 * sample period, force reset the sample period if we didn't
-		 * get any events in this freq period.
-		 */
-		if (!interrupts) {
-			perf_disable();
-			counter->pmu->disable(counter);
-			atomic64_set(&hwc->period_left, 0);
-			counter->pmu->enable(counter);
-			perf_enable();
-		}
-	}
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Round-robin a context's counters:
- */
-static void rotate_ctx(struct perf_counter_context *ctx)
-{
-	struct perf_counter *counter;
-
-	if (!ctx->nr_counters)
-		return;
-
-	spin_lock(&ctx->lock);
-	/*
-	 * Rotate the first entry last (works just fine for group counters too):
-	 */
-	perf_disable();
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		list_move_tail(&counter->group_entry, &ctx->group_list);
-		break;
-	}
-	perf_enable();
-
-	spin_unlock(&ctx->lock);
-}
-
-void perf_counter_task_tick(struct task_struct *curr, int cpu)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx;
-
-	if (!atomic_read(&nr_counters))
-		return;
-
-	cpuctx = &per_cpu(perf_cpu_context, cpu);
-	ctx = curr->perf_counter_ctxp;
-
-	perf_ctx_adjust_freq(&cpuctx->ctx);
-	if (ctx)
-		perf_ctx_adjust_freq(ctx);
-
-	perf_counter_cpu_sched_out(cpuctx);
-	if (ctx)
-		__perf_counter_task_sched_out(ctx);
-
-	rotate_ctx(&cpuctx->ctx);
-	if (ctx)
-		rotate_ctx(ctx);
-
-	perf_counter_cpu_sched_in(cpuctx, cpu);
-	if (ctx)
-		perf_counter_task_sched_in(curr, cpu);
-}
-
-/*
- * Enable all of a task's counters that have been marked enable-on-exec.
- * This expects task == current.
- */
-static void perf_counter_enable_on_exec(struct task_struct *task)
-{
-	struct perf_counter_context *ctx;
-	struct perf_counter *counter;
-	unsigned long flags;
-	int enabled = 0;
-
-	local_irq_save(flags);
-	ctx = task->perf_counter_ctxp;
-	if (!ctx || !ctx->nr_counters)
-		goto out;
-
-	__perf_counter_task_sched_out(ctx);
-
-	spin_lock(&ctx->lock);
-
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		if (!counter->attr.enable_on_exec)
-			continue;
-		counter->attr.enable_on_exec = 0;
-		if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
-			continue;
-		__perf_counter_mark_enabled(counter, ctx);
-		enabled = 1;
-	}
-
-	/*
-	 * Unclone this context if we enabled any counter.
-	 */
-	if (enabled)
-		unclone_ctx(ctx);
-
-	spin_unlock(&ctx->lock);
-
-	perf_counter_task_sched_in(task, smp_processor_id());
- out:
-	local_irq_restore(flags);
-}
-
-/*
- * Cross CPU call to read the hardware counter
- */
-static void __perf_counter_read(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter *counter = info;
-	struct perf_counter_context *ctx = counter->ctx;
-	unsigned long flags;
-
-	/*
-	 * If this is a task context, we need to check whether it is
-	 * the current task context of this cpu.  If not it has been
-	 * scheduled out before the smp call arrived.  In that case
-	 * counter->count would have been updated to a recent sample
-	 * when the counter was scheduled out.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return;
-
-	local_irq_save(flags);
-	if (ctx->is_active)
-		update_context_time(ctx);
-	counter->pmu->read(counter);
-	update_counter_times(counter);
-	local_irq_restore(flags);
-}
-
-static u64 perf_counter_read(struct perf_counter *counter)
-{
-	/*
-	 * If counter is enabled and currently active on a CPU, update the
-	 * value in the counter structure:
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
-		smp_call_function_single(counter->oncpu,
-					 __perf_counter_read, counter, 1);
-	} else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
-		update_counter_times(counter);
-	}
-
-	return atomic64_read(&counter->count);
-}
-
-/*
- * Initialize the perf_counter context in a task_struct:
- */
-static void
-__perf_counter_init_context(struct perf_counter_context *ctx,
-			    struct task_struct *task)
-{
-	memset(ctx, 0, sizeof(*ctx));
-	spin_lock_init(&ctx->lock);
-	mutex_init(&ctx->mutex);
-	INIT_LIST_HEAD(&ctx->group_list);
-	INIT_LIST_HEAD(&ctx->event_list);
-	atomic_set(&ctx->refcount, 1);
-	ctx->task = task;
-}
-
-static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
-{
-	struct perf_counter_context *ctx;
-	struct perf_cpu_context *cpuctx;
-	struct task_struct *task;
-	unsigned long flags;
-	int err;
-
-	/*
-	 * If cpu is not a wildcard then this is a percpu counter:
-	 */
-	if (cpu != -1) {
-		/* Must be root to operate on a CPU counter: */
-		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-			return ERR_PTR(-EACCES);
-
-		if (cpu < 0 || cpu > num_possible_cpus())
-			return ERR_PTR(-EINVAL);
-
-		/*
-		 * We could be clever and allow to attach a counter to an
-		 * offline CPU and activate it when the CPU comes up, but
-		 * that's for later.
-		 */
-		if (!cpu_isset(cpu, cpu_online_map))
-			return ERR_PTR(-ENODEV);
-
-		cpuctx = &per_cpu(perf_cpu_context, cpu);
-		ctx = &cpuctx->ctx;
-		get_ctx(ctx);
-
-		return ctx;
-	}
-
-	rcu_read_lock();
-	if (!pid)
-		task = current;
-	else
-		task = find_task_by_vpid(pid);
-	if (task)
-		get_task_struct(task);
-	rcu_read_unlock();
-
-	if (!task)
-		return ERR_PTR(-ESRCH);
-
-	/*
-	 * Can't attach counters to a dying task.
-	 */
-	err = -ESRCH;
-	if (task->flags & PF_EXITING)
-		goto errout;
-
-	/* Reuse ptrace permission checks for now. */
-	err = -EACCES;
-	if (!ptrace_may_access(task, PTRACE_MODE_READ))
-		goto errout;
-
- retry:
-	ctx = perf_lock_task_context(task, &flags);
-	if (ctx) {
-		unclone_ctx(ctx);
-		spin_unlock_irqrestore(&ctx->lock, flags);
-	}
-
-	if (!ctx) {
-		ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
-		err = -ENOMEM;
-		if (!ctx)
-			goto errout;
-		__perf_counter_init_context(ctx, task);
-		get_ctx(ctx);
-		if (cmpxchg(&task->perf_counter_ctxp, NULL, ctx)) {
-			/*
-			 * We raced with some other task; use
-			 * the context they set.
-			 */
-			kfree(ctx);
-			goto retry;
-		}
-		get_task_struct(task);
-	}
-
-	put_task_struct(task);
-	return ctx;
-
- errout:
-	put_task_struct(task);
-	return ERR_PTR(err);
-}
-
-static void free_counter_rcu(struct rcu_head *head)
-{
-	struct perf_counter *counter;
-
-	counter = container_of(head, struct perf_counter, rcu_head);
-	if (counter->ns)
-		put_pid_ns(counter->ns);
-	kfree(counter);
-}
-
-static void perf_pending_sync(struct perf_counter *counter);
-
-static void free_counter(struct perf_counter *counter)
-{
-	perf_pending_sync(counter);
-
-	if (!counter->parent) {
-		atomic_dec(&nr_counters);
-		if (counter->attr.mmap)
-			atomic_dec(&nr_mmap_counters);
-		if (counter->attr.comm)
-			atomic_dec(&nr_comm_counters);
-		if (counter->attr.task)
-			atomic_dec(&nr_task_counters);
-	}
-
-	if (counter->output) {
-		fput(counter->output->filp);
-		counter->output = NULL;
-	}
-
-	if (counter->destroy)
-		counter->destroy(counter);
-
-	put_ctx(counter->ctx);
-	call_rcu(&counter->rcu_head, free_counter_rcu);
-}
-
-/*
- * Called when the last reference to the file is gone.
- */
-static int perf_release(struct inode *inode, struct file *file)
-{
-	struct perf_counter *counter = file->private_data;
-	struct perf_counter_context *ctx = counter->ctx;
-
-	file->private_data = NULL;
-
-	WARN_ON_ONCE(ctx->parent_ctx);
-	mutex_lock(&ctx->mutex);
-	perf_counter_remove_from_context(counter);
-	mutex_unlock(&ctx->mutex);
-
-	mutex_lock(&counter->owner->perf_counter_mutex);
-	list_del_init(&counter->owner_entry);
-	mutex_unlock(&counter->owner->perf_counter_mutex);
-	put_task_struct(counter->owner);
-
-	free_counter(counter);
-
-	return 0;
-}
-
-static int perf_counter_read_size(struct perf_counter *counter)
-{
-	int entry = sizeof(u64); /* value */
-	int size = 0;
-	int nr = 1;
-
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-		size += sizeof(u64);
-
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-		size += sizeof(u64);
-
-	if (counter->attr.read_format & PERF_FORMAT_ID)
-		entry += sizeof(u64);
-
-	if (counter->attr.read_format & PERF_FORMAT_GROUP) {
-		nr += counter->group_leader->nr_siblings;
-		size += sizeof(u64);
-	}
-
-	size += entry * nr;
-
-	return size;
-}
-
-static u64 perf_counter_read_value(struct perf_counter *counter)
-{
-	struct perf_counter *child;
-	u64 total = 0;
-
-	total += perf_counter_read(counter);
-	list_for_each_entry(child, &counter->child_list, child_list)
-		total += perf_counter_read(child);
-
-	return total;
-}
-
-static int perf_counter_read_entry(struct perf_counter *counter,
-				   u64 read_format, char __user *buf)
-{
-	int n = 0, count = 0;
-	u64 values[2];
-
-	values[n++] = perf_counter_read_value(counter);
-	if (read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(counter);
-
-	count = n * sizeof(u64);
-
-	if (copy_to_user(buf, values, count))
-		return -EFAULT;
-
-	return count;
-}
-
-static int perf_counter_read_group(struct perf_counter *counter,
-				   u64 read_format, char __user *buf)
-{
-	struct perf_counter *leader = counter->group_leader, *sub;
-	int n = 0, size = 0, err = -EFAULT;
-	u64 values[3];
-
-	values[n++] = 1 + leader->nr_siblings;
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = leader->total_time_enabled +
-			atomic64_read(&leader->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = leader->total_time_running +
-			atomic64_read(&leader->child_total_time_running);
-	}
-
-	size = n * sizeof(u64);
-
-	if (copy_to_user(buf, values, size))
-		return -EFAULT;
-
-	err = perf_counter_read_entry(leader, read_format, buf + size);
-	if (err < 0)
-		return err;
-
-	size += err;
-
-	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
-		err = perf_counter_read_entry(sub, read_format,
-				buf + size);
-		if (err < 0)
-			return err;
-
-		size += err;
-	}
-
-	return size;
-}
-
-static int perf_counter_read_one(struct perf_counter *counter,
-				 u64 read_format, char __user *buf)
-{
-	u64 values[4];
-	int n = 0;
-
-	values[n++] = perf_counter_read_value(counter);
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = counter->total_time_enabled +
-			atomic64_read(&counter->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = counter->total_time_running +
-			atomic64_read(&counter->child_total_time_running);
-	}
-	if (read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(counter);
-
-	if (copy_to_user(buf, values, n * sizeof(u64)))
-		return -EFAULT;
-
-	return n * sizeof(u64);
-}
-
-/*
- * Read the performance counter - simple non blocking version for now
- */
-static ssize_t
-perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
-{
-	u64 read_format = counter->attr.read_format;
-	int ret;
-
-	/*
-	 * Return end-of-file for a read on a counter that is in
-	 * error state (i.e. because it was pinned but it couldn't be
-	 * scheduled on to the CPU at some point).
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ERROR)
-		return 0;
-
-	if (count < perf_counter_read_size(counter))
-		return -ENOSPC;
-
-	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	mutex_lock(&counter->child_mutex);
-	if (read_format & PERF_FORMAT_GROUP)
-		ret = perf_counter_read_group(counter, read_format, buf);
-	else
-		ret = perf_counter_read_one(counter, read_format, buf);
-	mutex_unlock(&counter->child_mutex);
-
-	return ret;
-}
-
-static ssize_t
-perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
-{
-	struct perf_counter *counter = file->private_data;
-
-	return perf_read_hw(counter, buf, count);
-}
-
-static unsigned int perf_poll(struct file *file, poll_table *wait)
-{
-	struct perf_counter *counter = file->private_data;
-	struct perf_mmap_data *data;
-	unsigned int events = POLL_HUP;
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (data)
-		events = atomic_xchg(&data->poll, 0);
-	rcu_read_unlock();
-
-	poll_wait(file, &counter->waitq, wait);
-
-	return events;
-}
-
-static void perf_counter_reset(struct perf_counter *counter)
-{
-	(void)perf_counter_read(counter);
-	atomic64_set(&counter->count, 0);
-	perf_counter_update_userpage(counter);
-}
-
-/*
- * Holding the top-level counter's child_mutex means that any
- * descendant process that has inherited this counter will block
- * in sync_child_counter if it goes to exit, thus satisfying the
- * task existence requirements of perf_counter_enable/disable.
- */
-static void perf_counter_for_each_child(struct perf_counter *counter,
-					void (*func)(struct perf_counter *))
-{
-	struct perf_counter *child;
-
-	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	mutex_lock(&counter->child_mutex);
-	func(counter);
-	list_for_each_entry(child, &counter->child_list, child_list)
-		func(child);
-	mutex_unlock(&counter->child_mutex);
-}
-
-static void perf_counter_for_each(struct perf_counter *counter,
-				  void (*func)(struct perf_counter *))
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct perf_counter *sibling;
-
-	WARN_ON_ONCE(ctx->parent_ctx);
-	mutex_lock(&ctx->mutex);
-	counter = counter->group_leader;
-
-	perf_counter_for_each_child(counter, func);
-	func(counter);
-	list_for_each_entry(sibling, &counter->sibling_list, group_entry)
-		perf_counter_for_each_child(counter, func);
-	mutex_unlock(&ctx->mutex);
-}
-
-static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	unsigned long size;
-	int ret = 0;
-	u64 value;
-
-	if (!counter->attr.sample_period)
-		return -EINVAL;
-
-	size = copy_from_user(&value, arg, sizeof(value));
-	if (size != sizeof(value))
-		return -EFAULT;
-
-	if (!value)
-		return -EINVAL;
-
-	spin_lock_irq(&ctx->lock);
-	if (counter->attr.freq) {
-		if (value > sysctl_perf_counter_sample_rate) {
-			ret = -EINVAL;
-			goto unlock;
-		}
-
-		counter->attr.sample_freq = value;
-	} else {
-		counter->attr.sample_period = value;
-		counter->hw.sample_period = value;
-	}
-unlock:
-	spin_unlock_irq(&ctx->lock);
-
-	return ret;
-}
-
-int perf_counter_set_output(struct perf_counter *counter, int output_fd);
-
-static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	struct perf_counter *counter = file->private_data;
-	void (*func)(struct perf_counter *);
-	u32 flags = arg;
-
-	switch (cmd) {
-	case PERF_COUNTER_IOC_ENABLE:
-		func = perf_counter_enable;
-		break;
-	case PERF_COUNTER_IOC_DISABLE:
-		func = perf_counter_disable;
-		break;
-	case PERF_COUNTER_IOC_RESET:
-		func = perf_counter_reset;
-		break;
-
-	case PERF_COUNTER_IOC_REFRESH:
-		return perf_counter_refresh(counter, arg);
-
-	case PERF_COUNTER_IOC_PERIOD:
-		return perf_counter_period(counter, (u64 __user *)arg);
-
-	case PERF_COUNTER_IOC_SET_OUTPUT:
-		return perf_counter_set_output(counter, arg);
-
-	default:
-		return -ENOTTY;
-	}
-
-	if (flags & PERF_IOC_FLAG_GROUP)
-		perf_counter_for_each(counter, func);
-	else
-		perf_counter_for_each_child(counter, func);
-
-	return 0;
-}
-
-int perf_counter_task_enable(void)
-{
-	struct perf_counter *counter;
-
-	mutex_lock(&current->perf_counter_mutex);
-	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
-		perf_counter_for_each_child(counter, perf_counter_enable);
-	mutex_unlock(&current->perf_counter_mutex);
-
-	return 0;
-}
-
-int perf_counter_task_disable(void)
-{
-	struct perf_counter *counter;
-
-	mutex_lock(&current->perf_counter_mutex);
-	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
-		perf_counter_for_each_child(counter, perf_counter_disable);
-	mutex_unlock(&current->perf_counter_mutex);
-
-	return 0;
-}
-
-#ifndef PERF_COUNTER_INDEX_OFFSET
-# define PERF_COUNTER_INDEX_OFFSET 0
-#endif
-
-static int perf_counter_index(struct perf_counter *counter)
-{
-	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
-		return 0;
-
-	return counter->hw.idx + 1 - PERF_COUNTER_INDEX_OFFSET;
-}
-
-/*
- * Callers need to ensure there can be no nesting of this function, otherwise
- * the seqlock logic goes bad. We can not serialize this because the arch
- * code calls this from NMI context.
- */
-void perf_counter_update_userpage(struct perf_counter *counter)
-{
-	struct perf_counter_mmap_page *userpg;
-	struct perf_mmap_data *data;
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (!data)
-		goto unlock;
-
-	userpg = data->user_page;
-
-	/*
-	 * Disable preemption so as to not let the corresponding user-space
-	 * spin too long if we get preempted.
-	 */
-	preempt_disable();
-	++userpg->lock;
-	barrier();
-	userpg->index = perf_counter_index(counter);
-	userpg->offset = atomic64_read(&counter->count);
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-		userpg->offset -= atomic64_read(&counter->hw.prev_count);
-
-	userpg->time_enabled = counter->total_time_enabled +
-			atomic64_read(&counter->child_total_time_enabled);
-
-	userpg->time_running = counter->total_time_running +
-			atomic64_read(&counter->child_total_time_running);
-
-	barrier();
-	++userpg->lock;
-	preempt_enable();
-unlock:
-	rcu_read_unlock();
-}
-
-static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct perf_counter *counter = vma->vm_file->private_data;
-	struct perf_mmap_data *data;
-	int ret = VM_FAULT_SIGBUS;
-
-	if (vmf->flags & FAULT_FLAG_MKWRITE) {
-		if (vmf->pgoff == 0)
-			ret = 0;
-		return ret;
-	}
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (!data)
-		goto unlock;
-
-	if (vmf->pgoff == 0) {
-		vmf->page = virt_to_page(data->user_page);
-	} else {
-		int nr = vmf->pgoff - 1;
-
-		if ((unsigned)nr > data->nr_pages)
-			goto unlock;
-
-		if (vmf->flags & FAULT_FLAG_WRITE)
-			goto unlock;
-
-		vmf->page = virt_to_page(data->data_pages[nr]);
-	}
-
-	get_page(vmf->page);
-	vmf->page->mapping = vma->vm_file->f_mapping;
-	vmf->page->index   = vmf->pgoff;
-
-	ret = 0;
-unlock:
-	rcu_read_unlock();
-
-	return ret;
-}
-
-static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
-{
-	struct perf_mmap_data *data;
-	unsigned long size;
-	int i;
-
-	WARN_ON(atomic_read(&counter->mmap_count));
-
-	size = sizeof(struct perf_mmap_data);
-	size += nr_pages * sizeof(void *);
-
-	data = kzalloc(size, GFP_KERNEL);
-	if (!data)
-		goto fail;
-
-	data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!data->user_page)
-		goto fail_user_page;
-
-	for (i = 0; i < nr_pages; i++) {
-		data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
-		if (!data->data_pages[i])
-			goto fail_data_pages;
-	}
-
-	data->nr_pages = nr_pages;
-	atomic_set(&data->lock, -1);
-
-	if (counter->attr.watermark) {
-		data->watermark = min_t(long, PAGE_SIZE * nr_pages,
-				      counter->attr.wakeup_watermark);
-	}
-	if (!data->watermark)
-		data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
-
-	rcu_assign_pointer(counter->data, data);
-
-	return 0;
-
-fail_data_pages:
-	for (i--; i >= 0; i--)
-		free_page((unsigned long)data->data_pages[i]);
-
-	free_page((unsigned long)data->user_page);
-
-fail_user_page:
-	kfree(data);
-
-fail:
-	return -ENOMEM;
-}
-
-static void perf_mmap_free_page(unsigned long addr)
-{
-	struct page *page = virt_to_page((void *)addr);
-
-	page->mapping = NULL;
-	__free_page(page);
-}
-
-static void __perf_mmap_data_free(struct rcu_head *rcu_head)
-{
-	struct perf_mmap_data *data;
-	int i;
-
-	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
-
-	perf_mmap_free_page((unsigned long)data->user_page);
-	for (i = 0; i < data->nr_pages; i++)
-		perf_mmap_free_page((unsigned long)data->data_pages[i]);
-
-	kfree(data);
-}
-
-static void perf_mmap_data_free(struct perf_counter *counter)
-{
-	struct perf_mmap_data *data = counter->data;
-
-	WARN_ON(atomic_read(&counter->mmap_count));
-
-	rcu_assign_pointer(counter->data, NULL);
-	call_rcu(&data->rcu_head, __perf_mmap_data_free);
-}
-
-static void perf_mmap_open(struct vm_area_struct *vma)
-{
-	struct perf_counter *counter = vma->vm_file->private_data;
-
-	atomic_inc(&counter->mmap_count);
-}
-
-static void perf_mmap_close(struct vm_area_struct *vma)
-{
-	struct perf_counter *counter = vma->vm_file->private_data;
-
-	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) {
-		struct user_struct *user = current_user();
-
-		atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
-		vma->vm_mm->locked_vm -= counter->data->nr_locked;
-		perf_mmap_data_free(counter);
-		mutex_unlock(&counter->mmap_mutex);
-	}
-}
-
-static struct vm_operations_struct perf_mmap_vmops = {
-	.open		= perf_mmap_open,
-	.close		= perf_mmap_close,
-	.fault		= perf_mmap_fault,
-	.page_mkwrite	= perf_mmap_fault,
-};
-
-static int perf_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	struct perf_counter *counter = file->private_data;
-	unsigned long user_locked, user_lock_limit;
-	struct user_struct *user = current_user();
-	unsigned long locked, lock_limit;
-	unsigned long vma_size;
-	unsigned long nr_pages;
-	long user_extra, extra;
-	int ret = 0;
-
-	if (!(vma->vm_flags & VM_SHARED))
-		return -EINVAL;
-
-	vma_size = vma->vm_end - vma->vm_start;
-	nr_pages = (vma_size / PAGE_SIZE) - 1;
-
-	/*
-	 * If we have data pages ensure they're a power-of-two number, so we
-	 * can do bitmasks instead of modulo.
-	 */
-	if (nr_pages != 0 && !is_power_of_2(nr_pages))
-		return -EINVAL;
-
-	if (vma_size != PAGE_SIZE * (1 + nr_pages))
-		return -EINVAL;
-
-	if (vma->vm_pgoff != 0)
-		return -EINVAL;
-
-	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	mutex_lock(&counter->mmap_mutex);
-	if (counter->output) {
-		ret = -EINVAL;
-		goto unlock;
-	}
-
-	if (atomic_inc_not_zero(&counter->mmap_count)) {
-		if (nr_pages != counter->data->nr_pages)
-			ret = -EINVAL;
-		goto unlock;
-	}
-
-	user_extra = nr_pages + 1;
-	user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
-
-	/*
-	 * Increase the limit linearly with more CPUs:
-	 */
-	user_lock_limit *= num_online_cpus();
-
-	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
-
-	extra = 0;
-	if (user_locked > user_lock_limit)
-		extra = user_locked - user_lock_limit;
-
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
-	lock_limit >>= PAGE_SHIFT;
-	locked = vma->vm_mm->locked_vm + extra;
-
-	if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
-		!capable(CAP_IPC_LOCK)) {
-		ret = -EPERM;
-		goto unlock;
-	}
-
-	WARN_ON(counter->data);
-	ret = perf_mmap_data_alloc(counter, nr_pages);
-	if (ret)
-		goto unlock;
-
-	atomic_set(&counter->mmap_count, 1);
-	atomic_long_add(user_extra, &user->locked_vm);
-	vma->vm_mm->locked_vm += extra;
-	counter->data->nr_locked = extra;
-	if (vma->vm_flags & VM_WRITE)
-		counter->data->writable = 1;
-
-unlock:
-	mutex_unlock(&counter->mmap_mutex);
-
-	vma->vm_flags |= VM_RESERVED;
-	vma->vm_ops = &perf_mmap_vmops;
-
-	return ret;
-}
-
-static int perf_fasync(int fd, struct file *filp, int on)
-{
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	struct perf_counter *counter = filp->private_data;
-	int retval;
-
-	mutex_lock(&inode->i_mutex);
-	retval = fasync_helper(fd, filp, on, &counter->fasync);
-	mutex_unlock(&inode->i_mutex);
-
-	if (retval < 0)
-		return retval;
-
-	return 0;
-}
-
-static const struct file_operations perf_fops = {
-	.release		= perf_release,
-	.read			= perf_read,
-	.poll			= perf_poll,
-	.unlocked_ioctl		= perf_ioctl,
-	.compat_ioctl		= perf_ioctl,
-	.mmap			= perf_mmap,
-	.fasync			= perf_fasync,
-};
-
-/*
- * Perf counter wakeup
- *
- * If there's data, ensure we set the poll() state and publish everything
- * to user-space before waking everybody up.
- */
-
-void perf_counter_wakeup(struct perf_counter *counter)
-{
-	wake_up_all(&counter->waitq);
-
-	if (counter->pending_kill) {
-		kill_fasync(&counter->fasync, SIGIO, counter->pending_kill);
-		counter->pending_kill = 0;
-	}
-}
-
-/*
- * Pending wakeups
- *
- * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
- *
- * The NMI bit means we cannot possibly take locks. Therefore, maintain a
- * single linked list and use cmpxchg() to add entries lockless.
- */
-
-static void perf_pending_counter(struct perf_pending_entry *entry)
-{
-	struct perf_counter *counter = container_of(entry,
-			struct perf_counter, pending);
-
-	if (counter->pending_disable) {
-		counter->pending_disable = 0;
-		__perf_counter_disable(counter);
-	}
-
-	if (counter->pending_wakeup) {
-		counter->pending_wakeup = 0;
-		perf_counter_wakeup(counter);
-	}
-}
-
-#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
-
-static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
-	PENDING_TAIL,
-};
-
-static void perf_pending_queue(struct perf_pending_entry *entry,
-			       void (*func)(struct perf_pending_entry *))
-{
-	struct perf_pending_entry **head;
-
-	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
-		return;
-
-	entry->func = func;
-
-	head = &get_cpu_var(perf_pending_head);
-
-	do {
-		entry->next = *head;
-	} while (cmpxchg(head, entry->next, entry) != entry->next);
-
-	set_perf_counter_pending();
-
-	put_cpu_var(perf_pending_head);
-}
-
-static int __perf_pending_run(void)
-{
-	struct perf_pending_entry *list;
-	int nr = 0;
-
-	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
-	while (list != PENDING_TAIL) {
-		void (*func)(struct perf_pending_entry *);
-		struct perf_pending_entry *entry = list;
-
-		list = list->next;
-
-		func = entry->func;
-		entry->next = NULL;
-		/*
-		 * Ensure we observe the unqueue before we issue the wakeup,
-		 * so that we won't be waiting forever.
-		 * -- see perf_not_pending().
-		 */
-		smp_wmb();
-
-		func(entry);
-		nr++;
-	}
-
-	return nr;
-}
-
-static inline int perf_not_pending(struct perf_counter *counter)
-{
-	/*
-	 * If we flush on whatever cpu we run, there is a chance we don't
-	 * need to wait.
-	 */
-	get_cpu();
-	__perf_pending_run();
-	put_cpu();
-
-	/*
-	 * Ensure we see the proper queue state before going to sleep
-	 * so that we do not miss the wakeup. -- see perf_pending_handle()
-	 */
-	smp_rmb();
-	return counter->pending.next == NULL;
-}
-
-static void perf_pending_sync(struct perf_counter *counter)
-{
-	wait_event(counter->waitq, perf_not_pending(counter));
-}
-
-void perf_counter_do_pending(void)
-{
-	__perf_pending_run();
-}
-
-/*
- * Callchain support -- arch specific
- */
-
-__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-	return NULL;
-}
-
-/*
- * Output
- */
-static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
-			      unsigned long offset, unsigned long head)
-{
-	unsigned long mask;
-
-	if (!data->writable)
-		return true;
-
-	mask = (data->nr_pages << PAGE_SHIFT) - 1;
-
-	offset = (offset - tail) & mask;
-	head   = (head   - tail) & mask;
-
-	if ((int)(head - offset) < 0)
-		return false;
-
-	return true;
-}
-
-static void perf_output_wakeup(struct perf_output_handle *handle)
-{
-	atomic_set(&handle->data->poll, POLL_IN);
-
-	if (handle->nmi) {
-		handle->counter->pending_wakeup = 1;
-		perf_pending_queue(&handle->counter->pending,
-				   perf_pending_counter);
-	} else
-		perf_counter_wakeup(handle->counter);
-}
-
-/*
- * Curious locking construct.
- *
- * We need to ensure a later event doesn't publish a head when a former
- * event isn't done writing. However since we need to deal with NMIs we
- * cannot fully serialize things.
- *
- * What we do is serialize between CPUs so we only have to deal with NMI
- * nesting on a single CPU.
- *
- * We only publish the head (and generate a wakeup) when the outer-most
- * event completes.
- */
-static void perf_output_lock(struct perf_output_handle *handle)
-{
-	struct perf_mmap_data *data = handle->data;
-	int cpu;
-
-	handle->locked = 0;
-
-	local_irq_save(handle->flags);
-	cpu = smp_processor_id();
-
-	if (in_nmi() && atomic_read(&data->lock) == cpu)
-		return;
-
-	while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
-		cpu_relax();
-
-	handle->locked = 1;
-}
-
-static void perf_output_unlock(struct perf_output_handle *handle)
-{
-	struct perf_mmap_data *data = handle->data;
-	unsigned long head;
-	int cpu;
-
-	data->done_head = data->head;
-
-	if (!handle->locked)
-		goto out;
-
-again:
-	/*
-	 * The xchg implies a full barrier that ensures all writes are done
-	 * before we publish the new head, matched by a rmb() in userspace when
-	 * reading this position.
-	 */
-	while ((head = atomic_long_xchg(&data->done_head, 0)))
-		data->user_page->data_head = head;
-
-	/*
-	 * NMI can happen here, which means we can miss a done_head update.
-	 */
-
-	cpu = atomic_xchg(&data->lock, -1);
-	WARN_ON_ONCE(cpu != smp_processor_id());
-
-	/*
-	 * Therefore we have to validate we did not indeed do so.
-	 */
-	if (unlikely(atomic_long_read(&data->done_head))) {
-		/*
-		 * Since we had it locked, we can lock it again.
-		 */
-		while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
-			cpu_relax();
-
-		goto again;
-	}
-
-	if (atomic_xchg(&data->wakeup, 0))
-		perf_output_wakeup(handle);
-out:
-	local_irq_restore(handle->flags);
-}
-
-void perf_output_copy(struct perf_output_handle *handle,
-		      const void *buf, unsigned int len)
-{
-	unsigned int pages_mask;
-	unsigned int offset;
-	unsigned int size;
-	void **pages;
-
-	offset		= handle->offset;
-	pages_mask	= handle->data->nr_pages - 1;
-	pages		= handle->data->data_pages;
-
-	do {
-		unsigned int page_offset;
-		int nr;
-
-		nr	    = (offset >> PAGE_SHIFT) & pages_mask;
-		page_offset = offset & (PAGE_SIZE - 1);
-		size	    = min_t(unsigned int, PAGE_SIZE - page_offset, len);
-
-		memcpy(pages[nr] + page_offset, buf, size);
-
-		len	    -= size;
-		buf	    += size;
-		offset	    += size;
-	} while (len);
-
-	handle->offset = offset;
-
-	/*
-	 * Check we didn't copy past our reservation window, taking the
-	 * possible unsigned int wrap into account.
-	 */
-	WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
-}
-
-int perf_output_begin(struct perf_output_handle *handle,
-		      struct perf_counter *counter, unsigned int size,
-		      int nmi, int sample)
-{
-	struct perf_counter *output_counter;
-	struct perf_mmap_data *data;
-	unsigned long tail, offset, head;
-	int have_lost;
-	struct {
-		struct perf_event_header header;
-		u64			 id;
-		u64			 lost;
-	} lost_event;
-
-	rcu_read_lock();
-	/*
-	 * For inherited counters we send all the output towards the parent.
-	 */
-	if (counter->parent)
-		counter = counter->parent;
-
-	output_counter = rcu_dereference(counter->output);
-	if (output_counter)
-		counter = output_counter;
-
-	data = rcu_dereference(counter->data);
-	if (!data)
-		goto out;
-
-	handle->data	= data;
-	handle->counter	= counter;
-	handle->nmi	= nmi;
-	handle->sample	= sample;
-
-	if (!data->nr_pages)
-		goto fail;
-
-	have_lost = atomic_read(&data->lost);
-	if (have_lost)
-		size += sizeof(lost_event);
-
-	perf_output_lock(handle);
-
-	do {
-		/*
-		 * Userspace could choose to issue a mb() before updating the
-		 * tail pointer. So that all reads will be completed before the
-		 * write is issued.
-		 */
-		tail = ACCESS_ONCE(data->user_page->data_tail);
-		smp_rmb();
-		offset = head = atomic_long_read(&data->head);
-		head += size;
-		if (unlikely(!perf_output_space(data, tail, offset, head)))
-			goto fail;
-	} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
-
-	handle->offset	= offset;
-	handle->head	= head;
-
-	if (head - tail > data->watermark)
-		atomic_set(&data->wakeup, 1);
-
-	if (have_lost) {
-		lost_event.header.type = PERF_EVENT_LOST;
-		lost_event.header.misc = 0;
-		lost_event.header.size = sizeof(lost_event);
-		lost_event.id          = counter->id;
-		lost_event.lost        = atomic_xchg(&data->lost, 0);
-
-		perf_output_put(handle, lost_event);
-	}
-
-	return 0;
-
-fail:
-	atomic_inc(&data->lost);
-	perf_output_unlock(handle);
-out:
-	rcu_read_unlock();
-
-	return -ENOSPC;
-}
-
-void perf_output_end(struct perf_output_handle *handle)
-{
-	struct perf_counter *counter = handle->counter;
-	struct perf_mmap_data *data = handle->data;
-
-	int wakeup_events = counter->attr.wakeup_events;
-
-	if (handle->sample && wakeup_events) {
-		int events = atomic_inc_return(&data->events);
-		if (events >= wakeup_events) {
-			atomic_sub(wakeup_events, &data->events);
-			atomic_set(&data->wakeup, 1);
-		}
-	}
-
-	perf_output_unlock(handle);
-	rcu_read_unlock();
-}
-
-static u32 perf_counter_pid(struct perf_counter *counter, struct task_struct *p)
-{
-	/*
-	 * only top level counters have the pid namespace they were created in
-	 */
-	if (counter->parent)
-		counter = counter->parent;
-
-	return task_tgid_nr_ns(p, counter->ns);
-}
-
-static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
-{
-	/*
-	 * only top level counters have the pid namespace they were created in
-	 */
-	if (counter->parent)
-		counter = counter->parent;
-
-	return task_pid_nr_ns(p, counter->ns);
-}
-
-static void perf_output_read_one(struct perf_output_handle *handle,
-				 struct perf_counter *counter)
-{
-	u64 read_format = counter->attr.read_format;
-	u64 values[4];
-	int n = 0;
-
-	values[n++] = atomic64_read(&counter->count);
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = counter->total_time_enabled +
-			atomic64_read(&counter->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = counter->total_time_running +
-			atomic64_read(&counter->child_total_time_running);
-	}
-	if (read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(counter);
-
-	perf_output_copy(handle, values, n * sizeof(u64));
-}
-
-/*
- * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
- */
-static void perf_output_read_group(struct perf_output_handle *handle,
-			    struct perf_counter *counter)
-{
-	struct perf_counter *leader = counter->group_leader, *sub;
-	u64 read_format = counter->attr.read_format;
-	u64 values[5];
-	int n = 0;
-
-	values[n++] = 1 + leader->nr_siblings;
-
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-		values[n++] = leader->total_time_enabled;
-
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-		values[n++] = leader->total_time_running;
-
-	if (leader != counter)
-		leader->pmu->read(leader);
-
-	values[n++] = atomic64_read(&leader->count);
-	if (read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(leader);
-
-	perf_output_copy(handle, values, n * sizeof(u64));
-
-	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
-		n = 0;
-
-		if (sub != counter)
-			sub->pmu->read(sub);
-
-		values[n++] = atomic64_read(&sub->count);
-		if (read_format & PERF_FORMAT_ID)
-			values[n++] = primary_counter_id(sub);
-
-		perf_output_copy(handle, values, n * sizeof(u64));
-	}
-}
-
-static void perf_output_read(struct perf_output_handle *handle,
-			     struct perf_counter *counter)
-{
-	if (counter->attr.read_format & PERF_FORMAT_GROUP)
-		perf_output_read_group(handle, counter);
-	else
-		perf_output_read_one(handle, counter);
-}
-
-void perf_output_sample(struct perf_output_handle *handle,
-			struct perf_event_header *header,
-			struct perf_sample_data *data,
-			struct perf_counter *counter)
-{
-	u64 sample_type = data->type;
-
-	perf_output_put(handle, *header);
-
-	if (sample_type & PERF_SAMPLE_IP)
-		perf_output_put(handle, data->ip);
-
-	if (sample_type & PERF_SAMPLE_TID)
-		perf_output_put(handle, data->tid_entry);
-
-	if (sample_type & PERF_SAMPLE_TIME)
-		perf_output_put(handle, data->time);
-
-	if (sample_type & PERF_SAMPLE_ADDR)
-		perf_output_put(handle, data->addr);
-
-	if (sample_type & PERF_SAMPLE_ID)
-		perf_output_put(handle, data->id);
-
-	if (sample_type & PERF_SAMPLE_STREAM_ID)
-		perf_output_put(handle, data->stream_id);
-
-	if (sample_type & PERF_SAMPLE_CPU)
-		perf_output_put(handle, data->cpu_entry);
-
-	if (sample_type & PERF_SAMPLE_PERIOD)
-		perf_output_put(handle, data->period);
-
-	if (sample_type & PERF_SAMPLE_READ)
-		perf_output_read(handle, counter);
-
-	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		if (data->callchain) {
-			int size = 1;
-
-			if (data->callchain)
-				size += data->callchain->nr;
-
-			size *= sizeof(u64);
-
-			perf_output_copy(handle, data->callchain, size);
-		} else {
-			u64 nr = 0;
-			perf_output_put(handle, nr);
-		}
-	}
-
-	if (sample_type & PERF_SAMPLE_RAW) {
-		if (data->raw) {
-			perf_output_put(handle, data->raw->size);
-			perf_output_copy(handle, data->raw->data,
-					 data->raw->size);
-		} else {
-			struct {
-				u32	size;
-				u32	data;
-			} raw = {
-				.size = sizeof(u32),
-				.data = 0,
-			};
-			perf_output_put(handle, raw);
-		}
-	}
-}
-
-void perf_prepare_sample(struct perf_event_header *header,
-			 struct perf_sample_data *data,
-			 struct perf_counter *counter,
-			 struct pt_regs *regs)
-{
-	u64 sample_type = counter->attr.sample_type;
-
-	data->type = sample_type;
-
-	header->type = PERF_EVENT_SAMPLE;
-	header->size = sizeof(*header);
-
-	header->misc = 0;
-	header->misc |= perf_misc_flags(regs);
-
-	if (sample_type & PERF_SAMPLE_IP) {
-		data->ip = perf_instruction_pointer(regs);
-
-		header->size += sizeof(data->ip);
-	}
-
-	if (sample_type & PERF_SAMPLE_TID) {
-		/* namespace issues */
-		data->tid_entry.pid = perf_counter_pid(counter, current);
-		data->tid_entry.tid = perf_counter_tid(counter, current);
-
-		header->size += sizeof(data->tid_entry);
-	}
-
-	if (sample_type & PERF_SAMPLE_TIME) {
-		data->time = perf_clock();
-
-		header->size += sizeof(data->time);
-	}
-
-	if (sample_type & PERF_SAMPLE_ADDR)
-		header->size += sizeof(data->addr);
-
-	if (sample_type & PERF_SAMPLE_ID) {
-		data->id = primary_counter_id(counter);
-
-		header->size += sizeof(data->id);
-	}
-
-	if (sample_type & PERF_SAMPLE_STREAM_ID) {
-		data->stream_id = counter->id;
-
-		header->size += sizeof(data->stream_id);
-	}
-
-	if (sample_type & PERF_SAMPLE_CPU) {
-		data->cpu_entry.cpu		= raw_smp_processor_id();
-		data->cpu_entry.reserved	= 0;
-
-		header->size += sizeof(data->cpu_entry);
-	}
-
-	if (sample_type & PERF_SAMPLE_PERIOD)
-		header->size += sizeof(data->period);
-
-	if (sample_type & PERF_SAMPLE_READ)
-		header->size += perf_counter_read_size(counter);
-
-	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		int size = 1;
-
-		data->callchain = perf_callchain(regs);
-
-		if (data->callchain)
-			size += data->callchain->nr;
-
-		header->size += size * sizeof(u64);
-	}
-
-	if (sample_type & PERF_SAMPLE_RAW) {
-		int size = sizeof(u32);
-
-		if (data->raw)
-			size += data->raw->size;
-		else
-			size += sizeof(u32);
-
-		WARN_ON_ONCE(size & (sizeof(u64)-1));
-		header->size += size;
-	}
-}
-
-static void perf_counter_output(struct perf_counter *counter, int nmi,
-				struct perf_sample_data *data,
-				struct pt_regs *regs)
-{
-	struct perf_output_handle handle;
-	struct perf_event_header header;
-
-	perf_prepare_sample(&header, data, counter, regs);
-
-	if (perf_output_begin(&handle, counter, header.size, nmi, 1))
-		return;
-
-	perf_output_sample(&handle, &header, data, counter);
-
-	perf_output_end(&handle);
-}
-
-/*
- * read event
- */
-
-struct perf_read_event {
-	struct perf_event_header	header;
-
-	u32				pid;
-	u32				tid;
-};
-
-static void
-perf_counter_read_event(struct perf_counter *counter,
-			struct task_struct *task)
-{
-	struct perf_output_handle handle;
-	struct perf_read_event read_event = {
-		.header = {
-			.type = PERF_EVENT_READ,
-			.misc = 0,
-			.size = sizeof(read_event) + perf_counter_read_size(counter),
-		},
-		.pid = perf_counter_pid(counter, task),
-		.tid = perf_counter_tid(counter, task),
-	};
-	int ret;
-
-	ret = perf_output_begin(&handle, counter, read_event.header.size, 0, 0);
-	if (ret)
-		return;
-
-	perf_output_put(&handle, read_event);
-	perf_output_read(&handle, counter);
-
-	perf_output_end(&handle);
-}
-
-/*
- * task tracking -- fork/exit
- *
- * enabled by: attr.comm | attr.mmap | attr.task
- */
-
-struct perf_task_event {
-	struct task_struct		*task;
-	struct perf_counter_context	*task_ctx;
-
-	struct {
-		struct perf_event_header	header;
-
-		u32				pid;
-		u32				ppid;
-		u32				tid;
-		u32				ptid;
-		u64				time;
-	} event;
-};
-
-static void perf_counter_task_output(struct perf_counter *counter,
-				     struct perf_task_event *task_event)
-{
-	struct perf_output_handle handle;
-	int size;
-	struct task_struct *task = task_event->task;
-	int ret;
-
-	size  = task_event->event.header.size;
-	ret = perf_output_begin(&handle, counter, size, 0, 0);
-
-	if (ret)
-		return;
-
-	task_event->event.pid = perf_counter_pid(counter, task);
-	task_event->event.ppid = perf_counter_pid(counter, current);
-
-	task_event->event.tid = perf_counter_tid(counter, task);
-	task_event->event.ptid = perf_counter_tid(counter, current);
-
-	task_event->event.time = perf_clock();
-
-	perf_output_put(&handle, task_event->event);
-
-	perf_output_end(&handle);
-}
-
-static int perf_counter_task_match(struct perf_counter *counter)
-{
-	if (counter->attr.comm || counter->attr.mmap || counter->attr.task)
-		return 1;
-
-	return 0;
-}
-
-static void perf_counter_task_ctx(struct perf_counter_context *ctx,
-				  struct perf_task_event *task_event)
-{
-	struct perf_counter *counter;
-
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_counter_task_match(counter))
-			perf_counter_task_output(counter, task_event);
-	}
-	rcu_read_unlock();
-}
-
-static void perf_counter_task_event(struct perf_task_event *task_event)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx = task_event->task_ctx;
-
-	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_counter_task_ctx(&cpuctx->ctx, task_event);
-	put_cpu_var(perf_cpu_context);
-
-	rcu_read_lock();
-	if (!ctx)
-		ctx = rcu_dereference(task_event->task->perf_counter_ctxp);
-	if (ctx)
-		perf_counter_task_ctx(ctx, task_event);
-	rcu_read_unlock();
-}
-
-static void perf_counter_task(struct task_struct *task,
-			      struct perf_counter_context *task_ctx,
-			      int new)
-{
-	struct perf_task_event task_event;
-
-	if (!atomic_read(&nr_comm_counters) &&
-	    !atomic_read(&nr_mmap_counters) &&
-	    !atomic_read(&nr_task_counters))
-		return;
-
-	task_event = (struct perf_task_event){
-		.task	  = task,
-		.task_ctx = task_ctx,
-		.event    = {
-			.header = {
-				.type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
-				.misc = 0,
-				.size = sizeof(task_event.event),
-			},
-			/* .pid  */
-			/* .ppid */
-			/* .tid  */
-			/* .ptid */
-		},
-	};
-
-	perf_counter_task_event(&task_event);
-}
-
-void perf_counter_fork(struct task_struct *task)
-{
-	perf_counter_task(task, NULL, 1);
-}
-
-/*
- * comm tracking
- */
-
-struct perf_comm_event {
-	struct task_struct	*task;
-	char			*comm;
-	int			comm_size;
-
-	struct {
-		struct perf_event_header	header;
-
-		u32				pid;
-		u32				tid;
-	} event;
-};
-
-static void perf_counter_comm_output(struct perf_counter *counter,
-				     struct perf_comm_event *comm_event)
-{
-	struct perf_output_handle handle;
-	int size = comm_event->event.header.size;
-	int ret = perf_output_begin(&handle, counter, size, 0, 0);
-
-	if (ret)
-		return;
-
-	comm_event->event.pid = perf_counter_pid(counter, comm_event->task);
-	comm_event->event.tid = perf_counter_tid(counter, comm_event->task);
-
-	perf_output_put(&handle, comm_event->event);
-	perf_output_copy(&handle, comm_event->comm,
-				   comm_event->comm_size);
-	perf_output_end(&handle);
-}
-
-static int perf_counter_comm_match(struct perf_counter *counter)
-{
-	if (counter->attr.comm)
-		return 1;
-
-	return 0;
-}
-
-static void perf_counter_comm_ctx(struct perf_counter_context *ctx,
-				  struct perf_comm_event *comm_event)
-{
-	struct perf_counter *counter;
-
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_counter_comm_match(counter))
-			perf_counter_comm_output(counter, comm_event);
-	}
-	rcu_read_unlock();
-}
-
-static void perf_counter_comm_event(struct perf_comm_event *comm_event)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx;
-	unsigned int size;
-	char comm[TASK_COMM_LEN];
-
-	memset(comm, 0, sizeof(comm));
-	strncpy(comm, comm_event->task->comm, sizeof(comm));
-	size = ALIGN(strlen(comm)+1, sizeof(u64));
-
-	comm_event->comm = comm;
-	comm_event->comm_size = size;
-
-	comm_event->event.header.size = sizeof(comm_event->event) + size;
-
-	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_counter_comm_ctx(&cpuctx->ctx, comm_event);
-	put_cpu_var(perf_cpu_context);
-
-	rcu_read_lock();
-	/*
-	 * doesn't really matter which of the child contexts the
-	 * events ends up in.
-	 */
-	ctx = rcu_dereference(current->perf_counter_ctxp);
-	if (ctx)
-		perf_counter_comm_ctx(ctx, comm_event);
-	rcu_read_unlock();
-}
-
-void perf_counter_comm(struct task_struct *task)
-{
-	struct perf_comm_event comm_event;
-
-	if (task->perf_counter_ctxp)
-		perf_counter_enable_on_exec(task);
-
-	if (!atomic_read(&nr_comm_counters))
-		return;
-
-	comm_event = (struct perf_comm_event){
-		.task	= task,
-		/* .comm      */
-		/* .comm_size */
-		.event  = {
-			.header = {
-				.type = PERF_EVENT_COMM,
-				.misc = 0,
-				/* .size */
-			},
-			/* .pid */
-			/* .tid */
-		},
-	};
-
-	perf_counter_comm_event(&comm_event);
-}
-
-/*
- * mmap tracking
- */
-
-struct perf_mmap_event {
-	struct vm_area_struct	*vma;
-
-	const char		*file_name;
-	int			file_size;
-
-	struct {
-		struct perf_event_header	header;
-
-		u32				pid;
-		u32				tid;
-		u64				start;
-		u64				len;
-		u64				pgoff;
-	} event;
-};
-
-static void perf_counter_mmap_output(struct perf_counter *counter,
-				     struct perf_mmap_event *mmap_event)
-{
-	struct perf_output_handle handle;
-	int size = mmap_event->event.header.size;
-	int ret = perf_output_begin(&handle, counter, size, 0, 0);
-
-	if (ret)
-		return;
-
-	mmap_event->event.pid = perf_counter_pid(counter, current);
-	mmap_event->event.tid = perf_counter_tid(counter, current);
-
-	perf_output_put(&handle, mmap_event->event);
-	perf_output_copy(&handle, mmap_event->file_name,
-				   mmap_event->file_size);
-	perf_output_end(&handle);
-}
-
-static int perf_counter_mmap_match(struct perf_counter *counter,
-				   struct perf_mmap_event *mmap_event)
-{
-	if (counter->attr.mmap)
-		return 1;
-
-	return 0;
-}
-
-static void perf_counter_mmap_ctx(struct perf_counter_context *ctx,
-				  struct perf_mmap_event *mmap_event)
-{
-	struct perf_counter *counter;
-
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_counter_mmap_match(counter, mmap_event))
-			perf_counter_mmap_output(counter, mmap_event);
-	}
-	rcu_read_unlock();
-}
-
-static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx;
-	struct vm_area_struct *vma = mmap_event->vma;
-	struct file *file = vma->vm_file;
-	unsigned int size;
-	char tmp[16];
-	char *buf = NULL;
-	const char *name;
-
-	memset(tmp, 0, sizeof(tmp));
-
-	if (file) {
-		/*
-		 * d_path works from the end of the buffer backwards, so we
-		 * need to add enough zero bytes after the string to handle
-		 * the 64bit alignment we do later.
-		 */
-		buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL);
-		if (!buf) {
-			name = strncpy(tmp, "//enomem", sizeof(tmp));
-			goto got_name;
-		}
-		name = d_path(&file->f_path, buf, PATH_MAX);
-		if (IS_ERR(name)) {
-			name = strncpy(tmp, "//toolong", sizeof(tmp));
-			goto got_name;
-		}
-	} else {
-		if (arch_vma_name(mmap_event->vma)) {
-			name = strncpy(tmp, arch_vma_name(mmap_event->vma),
-				       sizeof(tmp));
-			goto got_name;
-		}
-
-		if (!vma->vm_mm) {
-			name = strncpy(tmp, "[vdso]", sizeof(tmp));
-			goto got_name;
-		}
-
-		name = strncpy(tmp, "//anon", sizeof(tmp));
-		goto got_name;
-	}
-
-got_name:
-	size = ALIGN(strlen(name)+1, sizeof(u64));
-
-	mmap_event->file_name = name;
-	mmap_event->file_size = size;
-
-	mmap_event->event.header.size = sizeof(mmap_event->event) + size;
-
-	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event);
-	put_cpu_var(perf_cpu_context);
-
-	rcu_read_lock();
-	/*
-	 * doesn't really matter which of the child contexts the
-	 * events ends up in.
-	 */
-	ctx = rcu_dereference(current->perf_counter_ctxp);
-	if (ctx)
-		perf_counter_mmap_ctx(ctx, mmap_event);
-	rcu_read_unlock();
-
-	kfree(buf);
-}
-
-void __perf_counter_mmap(struct vm_area_struct *vma)
-{
-	struct perf_mmap_event mmap_event;
-
-	if (!atomic_read(&nr_mmap_counters))
-		return;
-
-	mmap_event = (struct perf_mmap_event){
-		.vma	= vma,
-		/* .file_name */
-		/* .file_size */
-		.event  = {
-			.header = {
-				.type = PERF_EVENT_MMAP,
-				.misc = 0,
-				/* .size */
-			},
-			/* .pid */
-			/* .tid */
-			.start  = vma->vm_start,
-			.len    = vma->vm_end - vma->vm_start,
-			.pgoff  = vma->vm_pgoff,
-		},
-	};
-
-	perf_counter_mmap_event(&mmap_event);
-}
-
-/*
- * IRQ throttle logging
- */
-
-static void perf_log_throttle(struct perf_counter *counter, int enable)
-{
-	struct perf_output_handle handle;
-	int ret;
-
-	struct {
-		struct perf_event_header	header;
-		u64				time;
-		u64				id;
-		u64				stream_id;
-	} throttle_event = {
-		.header = {
-			.type = PERF_EVENT_THROTTLE,
-			.misc = 0,
-			.size = sizeof(throttle_event),
-		},
-		.time		= perf_clock(),
-		.id		= primary_counter_id(counter),
-		.stream_id	= counter->id,
-	};
-
-	if (enable)
-		throttle_event.header.type = PERF_EVENT_UNTHROTTLE;
-
-	ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0);
-	if (ret)
-		return;
-
-	perf_output_put(&handle, throttle_event);
-	perf_output_end(&handle);
-}
-
-/*
- * Generic counter overflow handling, sampling.
- */
-
-static int __perf_counter_overflow(struct perf_counter *counter, int nmi,
-				   int throttle, struct perf_sample_data *data,
-				   struct pt_regs *regs)
-{
-	int events = atomic_read(&counter->event_limit);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int ret = 0;
-
-	throttle = (throttle && counter->pmu->unthrottle != NULL);
-
-	if (!throttle) {
-		hwc->interrupts++;
-	} else {
-		if (hwc->interrupts != MAX_INTERRUPTS) {
-			hwc->interrupts++;
-			if (HZ * hwc->interrupts >
-					(u64)sysctl_perf_counter_sample_rate) {
-				hwc->interrupts = MAX_INTERRUPTS;
-				perf_log_throttle(counter, 0);
-				ret = 1;
-			}
-		} else {
-			/*
-			 * Keep re-disabling counters even though on the previous
-			 * pass we disabled it - just in case we raced with a
-			 * sched-in and the counter got enabled again:
-			 */
-			ret = 1;
-		}
-	}
-
-	if (counter->attr.freq) {
-		u64 now = perf_clock();
-		s64 delta = now - hwc->freq_stamp;
-
-		hwc->freq_stamp = now;
-
-		if (delta > 0 && delta < TICK_NSEC)
-			perf_adjust_period(counter, NSEC_PER_SEC / (int)delta);
-	}
-
-	/*
-	 * XXX event_limit might not quite work as expected on inherited
-	 * counters
-	 */
-
-	counter->pending_kill = POLL_IN;
-	if (events && atomic_dec_and_test(&counter->event_limit)) {
-		ret = 1;
-		counter->pending_kill = POLL_HUP;
-		if (nmi) {
-			counter->pending_disable = 1;
-			perf_pending_queue(&counter->pending,
-					   perf_pending_counter);
-		} else
-			perf_counter_disable(counter);
-	}
-
-	perf_counter_output(counter, nmi, data, regs);
-	return ret;
-}
-
-int perf_counter_overflow(struct perf_counter *counter, int nmi,
-			  struct perf_sample_data *data,
-			  struct pt_regs *regs)
-{
-	return __perf_counter_overflow(counter, nmi, 1, data, regs);
-}
-
-/*
- * Generic software counter infrastructure
- */
-
-/*
- * We directly increment counter->count and keep a second value in
- * counter->hw.period_left to count intervals. This period counter
- * is kept in the range [-sample_period, 0] so that we can use the
- * sign as trigger.
- */
-
-static u64 perf_swcounter_set_period(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	u64 period = hwc->last_period;
-	u64 nr, offset;
-	s64 old, val;
-
-	hwc->last_period = hwc->sample_period;
-
-again:
-	old = val = atomic64_read(&hwc->period_left);
-	if (val < 0)
-		return 0;
-
-	nr = div64_u64(period + val, period);
-	offset = nr * period;
-	val -= offset;
-	if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
-		goto again;
-
-	return nr;
-}
-
-static void perf_swcounter_overflow(struct perf_counter *counter,
-				    int nmi, struct perf_sample_data *data,
-				    struct pt_regs *regs)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	int throttle = 0;
-	u64 overflow;
-
-	data->period = counter->hw.last_period;
-	overflow = perf_swcounter_set_period(counter);
-
-	if (hwc->interrupts == MAX_INTERRUPTS)
-		return;
-
-	for (; overflow; overflow--) {
-		if (__perf_counter_overflow(counter, nmi, throttle,
-					    data, regs)) {
-			/*
-			 * We inhibit the overflow from happening when
-			 * hwc->interrupts == MAX_INTERRUPTS.
-			 */
-			break;
-		}
-		throttle = 1;
-	}
-}
-
-static void perf_swcounter_unthrottle(struct perf_counter *counter)
-{
-	/*
-	 * Nothing to do, we already reset hwc->interrupts.
-	 */
-}
-
-static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
-			       int nmi, struct perf_sample_data *data,
-			       struct pt_regs *regs)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-
-	atomic64_add(nr, &counter->count);
-
-	if (!hwc->sample_period)
-		return;
-
-	if (!regs)
-		return;
-
-	if (!atomic64_add_negative(nr, &hwc->period_left))
-		perf_swcounter_overflow(counter, nmi, data, regs);
-}
-
-static int perf_swcounter_is_counting(struct perf_counter *counter)
-{
-	/*
-	 * The counter is active, we're good!
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-		return 1;
-
-	/*
-	 * The counter is off/error, not counting.
-	 */
-	if (counter->state != PERF_COUNTER_STATE_INACTIVE)
-		return 0;
-
-	/*
-	 * The counter is inactive, if the context is active
-	 * we're part of a group that didn't make it on the 'pmu',
-	 * not counting.
-	 */
-	if (counter->ctx->is_active)
-		return 0;
-
-	/*
-	 * We're inactive and the context is too, this means the
-	 * task is scheduled out, we're counting events that happen
-	 * to us, like migration events.
-	 */
-	return 1;
-}
-
-static int perf_swcounter_match(struct perf_counter *counter,
-				enum perf_type_id type,
-				u32 event_id, struct pt_regs *regs)
-{
-	if (!perf_swcounter_is_counting(counter))
-		return 0;
-
-	if (counter->attr.type != type)
-		return 0;
-	if (counter->attr.config != event_id)
-		return 0;
-
-	if (regs) {
-		if (counter->attr.exclude_user && user_mode(regs))
-			return 0;
-
-		if (counter->attr.exclude_kernel && !user_mode(regs))
-			return 0;
-	}
-
-	return 1;
-}
-
-static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
-				     enum perf_type_id type,
-				     u32 event_id, u64 nr, int nmi,
-				     struct perf_sample_data *data,
-				     struct pt_regs *regs)
-{
-	struct perf_counter *counter;
-
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_swcounter_match(counter, type, event_id, regs))
-			perf_swcounter_add(counter, nr, nmi, data, regs);
-	}
-	rcu_read_unlock();
-}
-
-static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
-{
-	if (in_nmi())
-		return &cpuctx->recursion[3];
-
-	if (in_irq())
-		return &cpuctx->recursion[2];
-
-	if (in_softirq())
-		return &cpuctx->recursion[1];
-
-	return &cpuctx->recursion[0];
-}
-
-static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
-				    u64 nr, int nmi,
-				    struct perf_sample_data *data,
-				    struct pt_regs *regs)
-{
-	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
-	int *recursion = perf_swcounter_recursion_context(cpuctx);
-	struct perf_counter_context *ctx;
-
-	if (*recursion)
-		goto out;
-
-	(*recursion)++;
-	barrier();
-
-	perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
-				 nr, nmi, data, regs);
-	rcu_read_lock();
-	/*
-	 * doesn't really matter which of the child contexts the
-	 * events ends up in.
-	 */
-	ctx = rcu_dereference(current->perf_counter_ctxp);
-	if (ctx)
-		perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data, regs);
-	rcu_read_unlock();
-
-	barrier();
-	(*recursion)--;
-
-out:
-	put_cpu_var(perf_cpu_context);
-}
-
-void __perf_swcounter_event(u32 event, u64 nr, int nmi,
-			    struct pt_regs *regs, u64 addr)
-{
-	struct perf_sample_data data = {
-		.addr = addr,
-	};
-
-	do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi,
-				&data, regs);
-}
-
-static void perf_swcounter_read(struct perf_counter *counter)
-{
-}
-
-static int perf_swcounter_enable(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-
-	if (hwc->sample_period) {
-		hwc->last_period = hwc->sample_period;
-		perf_swcounter_set_period(counter);
-	}
-	return 0;
-}
-
-static void perf_swcounter_disable(struct perf_counter *counter)
-{
-}
-
-static const struct pmu perf_ops_generic = {
-	.enable		= perf_swcounter_enable,
-	.disable	= perf_swcounter_disable,
-	.read		= perf_swcounter_read,
-	.unthrottle	= perf_swcounter_unthrottle,
-};
-
-/*
- * hrtimer based swcounter callback
- */
-
-static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
-{
-	enum hrtimer_restart ret = HRTIMER_RESTART;
-	struct perf_sample_data data;
-	struct pt_regs *regs;
-	struct perf_counter *counter;
-	u64 period;
-
-	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
-	counter->pmu->read(counter);
-
-	data.addr = 0;
-	regs = get_irq_regs();
-	/*
-	 * In case we exclude kernel IPs or are somehow not in interrupt
-	 * context, provide the next best thing, the user IP.
-	 */
-	if ((counter->attr.exclude_kernel || !regs) &&
-			!counter->attr.exclude_user)
-		regs = task_pt_regs(current);
-
-	if (regs) {
-		if (perf_counter_overflow(counter, 0, &data, regs))
-			ret = HRTIMER_NORESTART;
-	}
-
-	period = max_t(u64, 10000, counter->hw.sample_period);
-	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
-
-	return ret;
-}
-
-/*
- * Software counter: cpu wall time clock
- */
-
-static void cpu_clock_perf_counter_update(struct perf_counter *counter)
-{
-	int cpu = raw_smp_processor_id();
-	s64 prev;
-	u64 now;
-
-	now = cpu_clock(cpu);
-	prev = atomic64_read(&counter->hw.prev_count);
-	atomic64_set(&counter->hw.prev_count, now);
-	atomic64_add(now - prev, &counter->count);
-}
-
-static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	int cpu = raw_smp_processor_id();
-
-	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	hwc->hrtimer.function = perf_swcounter_hrtimer;
-	if (hwc->sample_period) {
-		u64 period = max_t(u64, 10000, hwc->sample_period);
-		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(period), 0,
-				HRTIMER_MODE_REL, 0);
-	}
-
-	return 0;
-}
-
-static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
-{
-	if (counter->hw.sample_period)
-		hrtimer_cancel(&counter->hw.hrtimer);
-	cpu_clock_perf_counter_update(counter);
-}
-
-static void cpu_clock_perf_counter_read(struct perf_counter *counter)
-{
-	cpu_clock_perf_counter_update(counter);
-}
-
-static const struct pmu perf_ops_cpu_clock = {
-	.enable		= cpu_clock_perf_counter_enable,
-	.disable	= cpu_clock_perf_counter_disable,
-	.read		= cpu_clock_perf_counter_read,
-};
-
-/*
- * Software counter: task time clock
- */
-
-static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
-{
-	u64 prev;
-	s64 delta;
-
-	prev = atomic64_xchg(&counter->hw.prev_count, now);
-	delta = now - prev;
-	atomic64_add(delta, &counter->count);
-}
-
-static int task_clock_perf_counter_enable(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	u64 now;
-
-	now = counter->ctx->time;
-
-	atomic64_set(&hwc->prev_count, now);
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	hwc->hrtimer.function = perf_swcounter_hrtimer;
-	if (hwc->sample_period) {
-		u64 period = max_t(u64, 10000, hwc->sample_period);
-		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(period), 0,
-				HRTIMER_MODE_REL, 0);
-	}
-
-	return 0;
-}
-
-static void task_clock_perf_counter_disable(struct perf_counter *counter)
-{
-	if (counter->hw.sample_period)
-		hrtimer_cancel(&counter->hw.hrtimer);
-	task_clock_perf_counter_update(counter, counter->ctx->time);
-
-}
-
-static void task_clock_perf_counter_read(struct perf_counter *counter)
-{
-	u64 time;
-
-	if (!in_nmi()) {
-		update_context_time(counter->ctx);
-		time = counter->ctx->time;
-	} else {
-		u64 now = perf_clock();
-		u64 delta = now - counter->ctx->timestamp;
-		time = counter->ctx->time + delta;
-	}
-
-	task_clock_perf_counter_update(counter, time);
-}
-
-static const struct pmu perf_ops_task_clock = {
-	.enable		= task_clock_perf_counter_enable,
-	.disable	= task_clock_perf_counter_disable,
-	.read		= task_clock_perf_counter_read,
-};
-
-#ifdef CONFIG_EVENT_PROFILE
-void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
-			  int entry_size)
-{
-	struct perf_raw_record raw = {
-		.size = entry_size,
-		.data = record,
-	};
-
-	struct perf_sample_data data = {
-		.addr = addr,
-		.raw = &raw,
-	};
-
-	struct pt_regs *regs = get_irq_regs();
-
-	if (!regs)
-		regs = task_pt_regs(current);
-
-	do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
-				&data, regs);
-}
-EXPORT_SYMBOL_GPL(perf_tpcounter_event);
-
-extern int ftrace_profile_enable(int);
-extern void ftrace_profile_disable(int);
-
-static void tp_perf_counter_destroy(struct perf_counter *counter)
-{
-	ftrace_profile_disable(counter->attr.config);
-}
-
-static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
-{
-	/*
-	 * Raw tracepoint data is a severe data leak, only allow root to
-	 * have these.
-	 */
-	if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
-			perf_paranoid_tracepoint_raw() &&
-			!capable(CAP_SYS_ADMIN))
-		return ERR_PTR(-EPERM);
-
-	if (ftrace_profile_enable(counter->attr.config))
-		return NULL;
-
-	counter->destroy = tp_perf_counter_destroy;
-
-	return &perf_ops_generic;
-}
-#else
-static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
-{
-	return NULL;
-}
-#endif
-
-atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
-
-static void sw_perf_counter_destroy(struct perf_counter *counter)
-{
-	u64 event_id = counter->attr.config;
-
-	WARN_ON(counter->parent);
-
-	atomic_dec(&perf_swcounter_enabled[event_id]);
-}
-
-static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
-{
-	const struct pmu *pmu = NULL;
-	u64 event_id = counter->attr.config;
-
-	/*
-	 * Software counters (currently) can't in general distinguish
-	 * between user, kernel and hypervisor events.
-	 * However, context switches and cpu migrations are considered
-	 * to be kernel events, and page faults are never hypervisor
-	 * events.
-	 */
-	switch (event_id) {
-	case PERF_COUNT_SW_CPU_CLOCK:
-		pmu = &perf_ops_cpu_clock;
-
-		break;
-	case PERF_COUNT_SW_TASK_CLOCK:
-		/*
-		 * If the user instantiates this as a per-cpu counter,
-		 * use the cpu_clock counter instead.
-		 */
-		if (counter->ctx->task)
-			pmu = &perf_ops_task_clock;
-		else
-			pmu = &perf_ops_cpu_clock;
-
-		break;
-	case PERF_COUNT_SW_PAGE_FAULTS:
-	case PERF_COUNT_SW_PAGE_FAULTS_MIN:
-	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
-	case PERF_COUNT_SW_CONTEXT_SWITCHES:
-	case PERF_COUNT_SW_CPU_MIGRATIONS:
-		if (!counter->parent) {
-			atomic_inc(&perf_swcounter_enabled[event_id]);
-			counter->destroy = sw_perf_counter_destroy;
-		}
-		pmu = &perf_ops_generic;
-		break;
-	}
-
-	return pmu;
-}
-
-/*
- * Allocate and initialize a counter structure
- */
-static struct perf_counter *
-perf_counter_alloc(struct perf_counter_attr *attr,
-		   int cpu,
-		   struct perf_counter_context *ctx,
-		   struct perf_counter *group_leader,
-		   struct perf_counter *parent_counter,
-		   gfp_t gfpflags)
-{
-	const struct pmu *pmu;
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
-	long err;
-
-	counter = kzalloc(sizeof(*counter), gfpflags);
-	if (!counter)
-		return ERR_PTR(-ENOMEM);
-
-	/*
-	 * Single counters are their own group leaders, with an
-	 * empty sibling list:
-	 */
-	if (!group_leader)
-		group_leader = counter;
-
-	mutex_init(&counter->child_mutex);
-	INIT_LIST_HEAD(&counter->child_list);
-
-	INIT_LIST_HEAD(&counter->group_entry);
-	INIT_LIST_HEAD(&counter->event_entry);
-	INIT_LIST_HEAD(&counter->sibling_list);
-	init_waitqueue_head(&counter->waitq);
-
-	mutex_init(&counter->mmap_mutex);
-
-	counter->cpu		= cpu;
-	counter->attr		= *attr;
-	counter->group_leader	= group_leader;
-	counter->pmu		= NULL;
-	counter->ctx		= ctx;
-	counter->oncpu		= -1;
-
-	counter->parent		= parent_counter;
-
-	counter->ns		= get_pid_ns(current->nsproxy->pid_ns);
-	counter->id		= atomic64_inc_return(&perf_counter_id);
-
-	counter->state		= PERF_COUNTER_STATE_INACTIVE;
-
-	if (attr->disabled)
-		counter->state = PERF_COUNTER_STATE_OFF;
-
-	pmu = NULL;
-
-	hwc = &counter->hw;
-	hwc->sample_period = attr->sample_period;
-	if (attr->freq && attr->sample_freq)
-		hwc->sample_period = 1;
-	hwc->last_period = hwc->sample_period;
-
-	atomic64_set(&hwc->period_left, hwc->sample_period);
-
-	/*
-	 * we currently do not support PERF_FORMAT_GROUP on inherited counters
-	 */
-	if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
-		goto done;
-
-	switch (attr->type) {
-	case PERF_TYPE_RAW:
-	case PERF_TYPE_HARDWARE:
-	case PERF_TYPE_HW_CACHE:
-		pmu = hw_perf_counter_init(counter);
-		break;
-
-	case PERF_TYPE_SOFTWARE:
-		pmu = sw_perf_counter_init(counter);
-		break;
-
-	case PERF_TYPE_TRACEPOINT:
-		pmu = tp_perf_counter_init(counter);
-		break;
-
-	default:
-		break;
-	}
-done:
-	err = 0;
-	if (!pmu)
-		err = -EINVAL;
-	else if (IS_ERR(pmu))
-		err = PTR_ERR(pmu);
-
-	if (err) {
-		if (counter->ns)
-			put_pid_ns(counter->ns);
-		kfree(counter);
-		return ERR_PTR(err);
-	}
-
-	counter->pmu = pmu;
-
-	if (!counter->parent) {
-		atomic_inc(&nr_counters);
-		if (counter->attr.mmap)
-			atomic_inc(&nr_mmap_counters);
-		if (counter->attr.comm)
-			atomic_inc(&nr_comm_counters);
-		if (counter->attr.task)
-			atomic_inc(&nr_task_counters);
-	}
-
-	return counter;
-}
-
-static int perf_copy_attr(struct perf_counter_attr __user *uattr,
-			  struct perf_counter_attr *attr)
-{
-	u32 size;
-	int ret;
-
-	if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
-		return -EFAULT;
-
-	/*
-	 * zero the full structure, so that a short copy will be nice.
-	 */
-	memset(attr, 0, sizeof(*attr));
-
-	ret = get_user(size, &uattr->size);
-	if (ret)
-		return ret;
-
-	if (size > PAGE_SIZE)	/* silly large */
-		goto err_size;
-
-	if (!size)		/* abi compat */
-		size = PERF_ATTR_SIZE_VER0;
-
-	if (size < PERF_ATTR_SIZE_VER0)
-		goto err_size;
-
-	/*
-	 * If we're handed a bigger struct than we know of,
-	 * ensure all the unknown bits are 0 - i.e. new
-	 * user-space does not rely on any kernel feature
-	 * extensions we dont know about yet.
-	 */
-	if (size > sizeof(*attr)) {
-		unsigned char __user *addr;
-		unsigned char __user *end;
-		unsigned char val;
-
-		addr = (void __user *)uattr + sizeof(*attr);
-		end  = (void __user *)uattr + size;
-
-		for (; addr < end; addr++) {
-			ret = get_user(val, addr);
-			if (ret)
-				return ret;
-			if (val)
-				goto err_size;
-		}
-		size = sizeof(*attr);
-	}
-
-	ret = copy_from_user(attr, uattr, size);
-	if (ret)
-		return -EFAULT;
-
-	/*
-	 * If the type exists, the corresponding creation will verify
-	 * the attr->config.
-	 */
-	if (attr->type >= PERF_TYPE_MAX)
-		return -EINVAL;
-
-	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
-		return -EINVAL;
-
-	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
-		return -EINVAL;
-
-	if (attr->read_format & ~(PERF_FORMAT_MAX-1))
-		return -EINVAL;
-
-out:
-	return ret;
-
-err_size:
-	put_user(sizeof(*attr), &uattr->size);
-	ret = -E2BIG;
-	goto out;
-}
-
-int perf_counter_set_output(struct perf_counter *counter, int output_fd)
-{
-	struct perf_counter *output_counter = NULL;
-	struct file *output_file = NULL;
-	struct perf_counter *old_output;
-	int fput_needed = 0;
-	int ret = -EINVAL;
-
-	if (!output_fd)
-		goto set;
-
-	output_file = fget_light(output_fd, &fput_needed);
-	if (!output_file)
-		return -EBADF;
-
-	if (output_file->f_op != &perf_fops)
-		goto out;
-
-	output_counter = output_file->private_data;
-
-	/* Don't chain output fds */
-	if (output_counter->output)
-		goto out;
-
-	/* Don't set an output fd when we already have an output channel */
-	if (counter->data)
-		goto out;
-
-	atomic_long_inc(&output_file->f_count);
-
-set:
-	mutex_lock(&counter->mmap_mutex);
-	old_output = counter->output;
-	rcu_assign_pointer(counter->output, output_counter);
-	mutex_unlock(&counter->mmap_mutex);
-
-	if (old_output) {
-		/*
-		 * we need to make sure no existing perf_output_*()
-		 * is still referencing this counter.
-		 */
-		synchronize_rcu();
-		fput(old_output->filp);
-	}
-
-	ret = 0;
-out:
-	fput_light(output_file, fput_needed);
-	return ret;
-}
-
-/**
- * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
- *
- * @attr_uptr:	event type attributes for monitoring/sampling
- * @pid:		target pid
- * @cpu:		target cpu
- * @group_fd:		group leader counter fd
- */
-SYSCALL_DEFINE5(perf_counter_open,
-		struct perf_counter_attr __user *, attr_uptr,
-		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
-{
-	struct perf_counter *counter, *group_leader;
-	struct perf_counter_attr attr;
-	struct perf_counter_context *ctx;
-	struct file *counter_file = NULL;
-	struct file *group_file = NULL;
-	int fput_needed = 0;
-	int fput_needed2 = 0;
-	int err;
-
-	/* for future expandability... */
-	if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
-		return -EINVAL;
-
-	err = perf_copy_attr(attr_uptr, &attr);
-	if (err)
-		return err;
-
-	if (!attr.exclude_kernel) {
-		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
-			return -EACCES;
-	}
-
-	if (attr.freq) {
-		if (attr.sample_freq > sysctl_perf_counter_sample_rate)
-			return -EINVAL;
-	}
-
-	/*
-	 * Get the target context (task or percpu):
-	 */
-	ctx = find_get_context(pid, cpu);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
-
-	/*
-	 * Look up the group leader (we will attach this counter to it):
-	 */
-	group_leader = NULL;
-	if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
-		err = -EINVAL;
-		group_file = fget_light(group_fd, &fput_needed);
-		if (!group_file)
-			goto err_put_context;
-		if (group_file->f_op != &perf_fops)
-			goto err_put_context;
-
-		group_leader = group_file->private_data;
-		/*
-		 * Do not allow a recursive hierarchy (this new sibling
-		 * becoming part of another group-sibling):
-		 */
-		if (group_leader->group_leader != group_leader)
-			goto err_put_context;
-		/*
-		 * Do not allow to attach to a group in a different
-		 * task or CPU context:
-		 */
-		if (group_leader->ctx != ctx)
-			goto err_put_context;
-		/*
-		 * Only a group leader can be exclusive or pinned
-		 */
-		if (attr.exclusive || attr.pinned)
-			goto err_put_context;
-	}
-
-	counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
-				     NULL, GFP_KERNEL);
-	err = PTR_ERR(counter);
-	if (IS_ERR(counter))
-		goto err_put_context;
-
-	err = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
-	if (err < 0)
-		goto err_free_put_context;
-
-	counter_file = fget_light(err, &fput_needed2);
-	if (!counter_file)
-		goto err_free_put_context;
-
-	if (flags & PERF_FLAG_FD_OUTPUT) {
-		err = perf_counter_set_output(counter, group_fd);
-		if (err)
-			goto err_fput_free_put_context;
-	}
-
-	counter->filp = counter_file;
-	WARN_ON_ONCE(ctx->parent_ctx);
-	mutex_lock(&ctx->mutex);
-	perf_install_in_context(ctx, counter, cpu);
-	++ctx->generation;
-	mutex_unlock(&ctx->mutex);
-
-	counter->owner = current;
-	get_task_struct(current);
-	mutex_lock(&current->perf_counter_mutex);
-	list_add_tail(&counter->owner_entry, &current->perf_counter_list);
-	mutex_unlock(&current->perf_counter_mutex);
-
-err_fput_free_put_context:
-	fput_light(counter_file, fput_needed2);
-
-err_free_put_context:
-	if (err < 0)
-		kfree(counter);
-
-err_put_context:
-	if (err < 0)
-		put_ctx(ctx);
-
-	fput_light(group_file, fput_needed);
-
-	return err;
-}
-
-/*
- * inherit a counter from parent task to child task:
- */
-static struct perf_counter *
-inherit_counter(struct perf_counter *parent_counter,
-	      struct task_struct *parent,
-	      struct perf_counter_context *parent_ctx,
-	      struct task_struct *child,
-	      struct perf_counter *group_leader,
-	      struct perf_counter_context *child_ctx)
-{
-	struct perf_counter *child_counter;
-
-	/*
-	 * Instead of creating recursive hierarchies of counters,
-	 * we link inherited counters back to the original parent,
-	 * which has a filp for sure, which we use as the reference
-	 * count:
-	 */
-	if (parent_counter->parent)
-		parent_counter = parent_counter->parent;
-
-	child_counter = perf_counter_alloc(&parent_counter->attr,
-					   parent_counter->cpu, child_ctx,
-					   group_leader, parent_counter,
-					   GFP_KERNEL);
-	if (IS_ERR(child_counter))
-		return child_counter;
-	get_ctx(child_ctx);
-
-	/*
-	 * Make the child state follow the state of the parent counter,
-	 * not its attr.disabled bit.  We hold the parent's mutex,
-	 * so we won't race with perf_counter_{en, dis}able_family.
-	 */
-	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
-	else
-		child_counter->state = PERF_COUNTER_STATE_OFF;
-
-	if (parent_counter->attr.freq)
-		child_counter->hw.sample_period = parent_counter->hw.sample_period;
-
-	/*
-	 * Link it up in the child's context:
-	 */
-	add_counter_to_ctx(child_counter, child_ctx);
-
-	/*
-	 * Get a reference to the parent filp - we will fput it
-	 * when the child counter exits. This is safe to do because
-	 * we are in the parent and we know that the filp still
-	 * exists and has a nonzero count:
-	 */
-	atomic_long_inc(&parent_counter->filp->f_count);
-
-	/*
-	 * Link this into the parent counter's child list
-	 */
-	WARN_ON_ONCE(parent_counter->ctx->parent_ctx);
-	mutex_lock(&parent_counter->child_mutex);
-	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
-	mutex_unlock(&parent_counter->child_mutex);
-
-	return child_counter;
-}
-
-static int inherit_group(struct perf_counter *parent_counter,
-	      struct task_struct *parent,
-	      struct perf_counter_context *parent_ctx,
-	      struct task_struct *child,
-	      struct perf_counter_context *child_ctx)
-{
-	struct perf_counter *leader;
-	struct perf_counter *sub;
-	struct perf_counter *child_ctr;
-
-	leader = inherit_counter(parent_counter, parent, parent_ctx,
-				 child, NULL, child_ctx);
-	if (IS_ERR(leader))
-		return PTR_ERR(leader);
-	list_for_each_entry(sub, &parent_counter->sibling_list, group_entry) {
-		child_ctr = inherit_counter(sub, parent, parent_ctx,
-					    child, leader, child_ctx);
-		if (IS_ERR(child_ctr))
-			return PTR_ERR(child_ctr);
-	}
-	return 0;
-}
-
-static void sync_child_counter(struct perf_counter *child_counter,
-			       struct task_struct *child)
-{
-	struct perf_counter *parent_counter = child_counter->parent;
-	u64 child_val;
-
-	if (child_counter->attr.inherit_stat)
-		perf_counter_read_event(child_counter, child);
-
-	child_val = atomic64_read(&child_counter->count);
-
-	/*
-	 * Add back the child's count to the parent's count:
-	 */
-	atomic64_add(child_val, &parent_counter->count);
-	atomic64_add(child_counter->total_time_enabled,
-		     &parent_counter->child_total_time_enabled);
-	atomic64_add(child_counter->total_time_running,
-		     &parent_counter->child_total_time_running);
-
-	/*
-	 * Remove this counter from the parent's list
-	 */
-	WARN_ON_ONCE(parent_counter->ctx->parent_ctx);
-	mutex_lock(&parent_counter->child_mutex);
-	list_del_init(&child_counter->child_list);
-	mutex_unlock(&parent_counter->child_mutex);
-
-	/*
-	 * Release the parent counter, if this was the last
-	 * reference to it.
-	 */
-	fput(parent_counter->filp);
-}
-
-static void
-__perf_counter_exit_task(struct perf_counter *child_counter,
-			 struct perf_counter_context *child_ctx,
-			 struct task_struct *child)
-{
-	struct perf_counter *parent_counter;
-
-	update_counter_times(child_counter);
-	perf_counter_remove_from_context(child_counter);
-
-	parent_counter = child_counter->parent;
-	/*
-	 * It can happen that parent exits first, and has counters
-	 * that are still around due to the child reference. These
-	 * counters need to be zapped - but otherwise linger.
-	 */
-	if (parent_counter) {
-		sync_child_counter(child_counter, child);
-		free_counter(child_counter);
-	}
-}
-
-/*
- * When a child task exits, feed back counter values to parent counters.
- */
-void perf_counter_exit_task(struct task_struct *child)
-{
-	struct perf_counter *child_counter, *tmp;
-	struct perf_counter_context *child_ctx;
-	unsigned long flags;
-
-	if (likely(!child->perf_counter_ctxp)) {
-		perf_counter_task(child, NULL, 0);
-		return;
-	}
-
-	local_irq_save(flags);
-	/*
-	 * We can't reschedule here because interrupts are disabled,
-	 * and either child is current or it is a task that can't be
-	 * scheduled, so we are now safe from rescheduling changing
-	 * our context.
-	 */
-	child_ctx = child->perf_counter_ctxp;
-	__perf_counter_task_sched_out(child_ctx);
-
-	/*
-	 * Take the context lock here so that if find_get_context is
-	 * reading child->perf_counter_ctxp, we wait until it has
-	 * incremented the context's refcount before we do put_ctx below.
-	 */
-	spin_lock(&child_ctx->lock);
-	child->perf_counter_ctxp = NULL;
-	/*
-	 * If this context is a clone; unclone it so it can't get
-	 * swapped to another process while we're removing all
-	 * the counters from it.
-	 */
-	unclone_ctx(child_ctx);
-	spin_unlock_irqrestore(&child_ctx->lock, flags);
-
-	/*
-	 * Report the task dead after unscheduling the counters so that we
-	 * won't get any samples after PERF_EVENT_EXIT. We can however still
-	 * get a few PERF_EVENT_READ events.
-	 */
-	perf_counter_task(child, child_ctx, 0);
-
-	/*
-	 * We can recurse on the same lock type through:
-	 *
-	 *   __perf_counter_exit_task()
-	 *     sync_child_counter()
-	 *       fput(parent_counter->filp)
-	 *         perf_release()
-	 *           mutex_lock(&ctx->mutex)
-	 *
-	 * But since its the parent context it won't be the same instance.
-	 */
-	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
-
-again:
-	list_for_each_entry_safe(child_counter, tmp, &child_ctx->group_list,
-				 group_entry)
-		__perf_counter_exit_task(child_counter, child_ctx, child);
-
-	/*
-	 * If the last counter was a group counter, it will have appended all
-	 * its siblings to the list, but we obtained 'tmp' before that which
-	 * will still point to the list head terminating the iteration.
-	 */
-	if (!list_empty(&child_ctx->group_list))
-		goto again;
-
-	mutex_unlock(&child_ctx->mutex);
-
-	put_ctx(child_ctx);
-}
-
-/*
- * free an unexposed, unused context as created by inheritance by
- * init_task below, used by fork() in case of fail.
- */
-void perf_counter_free_task(struct task_struct *task)
-{
-	struct perf_counter_context *ctx = task->perf_counter_ctxp;
-	struct perf_counter *counter, *tmp;
-
-	if (!ctx)
-		return;
-
-	mutex_lock(&ctx->mutex);
-again:
-	list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry) {
-		struct perf_counter *parent = counter->parent;
-
-		if (WARN_ON_ONCE(!parent))
-			continue;
-
-		mutex_lock(&parent->child_mutex);
-		list_del_init(&counter->child_list);
-		mutex_unlock(&parent->child_mutex);
-
-		fput(parent->filp);
-
-		list_del_counter(counter, ctx);
-		free_counter(counter);
-	}
-
-	if (!list_empty(&ctx->group_list))
-		goto again;
-
-	mutex_unlock(&ctx->mutex);
-
-	put_ctx(ctx);
-}
-
-/*
- * Initialize the perf_counter context in task_struct
- */
-int perf_counter_init_task(struct task_struct *child)
-{
-	struct perf_counter_context *child_ctx, *parent_ctx;
-	struct perf_counter_context *cloned_ctx;
-	struct perf_counter *counter;
-	struct task_struct *parent = current;
-	int inherited_all = 1;
-	int ret = 0;
-
-	child->perf_counter_ctxp = NULL;
-
-	mutex_init(&child->perf_counter_mutex);
-	INIT_LIST_HEAD(&child->perf_counter_list);
-
-	if (likely(!parent->perf_counter_ctxp))
-		return 0;
-
-	/*
-	 * This is executed from the parent task context, so inherit
-	 * counters that have been marked for cloning.
-	 * First allocate and initialize a context for the child.
-	 */
-
-	child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
-	if (!child_ctx)
-		return -ENOMEM;
-
-	__perf_counter_init_context(child_ctx, child);
-	child->perf_counter_ctxp = child_ctx;
-	get_task_struct(child);
-
-	/*
-	 * If the parent's context is a clone, pin it so it won't get
-	 * swapped under us.
-	 */
-	parent_ctx = perf_pin_task_context(parent);
-
-	/*
-	 * No need to check if parent_ctx != NULL here; since we saw
-	 * it non-NULL earlier, the only reason for it to become NULL
-	 * is if we exit, and since we're currently in the middle of
-	 * a fork we can't be exiting at the same time.
-	 */
-
-	/*
-	 * Lock the parent list. No need to lock the child - not PID
-	 * hashed yet and not running, so nobody can access it.
-	 */
-	mutex_lock(&parent_ctx->mutex);
-
-	/*
-	 * We dont have to disable NMIs - we are only looking at
-	 * the list, not manipulating it:
-	 */
-	list_for_each_entry_rcu(counter, &parent_ctx->event_list, event_entry) {
-		if (counter != counter->group_leader)
-			continue;
-
-		if (!counter->attr.inherit) {
-			inherited_all = 0;
-			continue;
-		}
-
-		ret = inherit_group(counter, parent, parent_ctx,
-					     child, child_ctx);
-		if (ret) {
-			inherited_all = 0;
-			break;
-		}
-	}
-
-	if (inherited_all) {
-		/*
-		 * Mark the child context as a clone of the parent
-		 * context, or of whatever the parent is a clone of.
-		 * Note that if the parent is a clone, it could get
-		 * uncloned at any point, but that doesn't matter
-		 * because the list of counters and the generation
-		 * count can't have changed since we took the mutex.
-		 */
-		cloned_ctx = rcu_dereference(parent_ctx->parent_ctx);
-		if (cloned_ctx) {
-			child_ctx->parent_ctx = cloned_ctx;
-			child_ctx->parent_gen = parent_ctx->parent_gen;
-		} else {
-			child_ctx->parent_ctx = parent_ctx;
-			child_ctx->parent_gen = parent_ctx->generation;
-		}
-		get_ctx(child_ctx->parent_ctx);
-	}
-
-	mutex_unlock(&parent_ctx->mutex);
-
-	perf_unpin_context(parent_ctx);
-
-	return ret;
-}
-
-static void __cpuinit perf_counter_init_cpu(int cpu)
-{
-	struct perf_cpu_context *cpuctx;
-
-	cpuctx = &per_cpu(perf_cpu_context, cpu);
-	__perf_counter_init_context(&cpuctx->ctx, NULL);
-
-	spin_lock(&perf_resource_lock);
-	cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
-	spin_unlock(&perf_resource_lock);
-
-	hw_perf_counter_setup(cpu);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static void __perf_counter_exit_cpu(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter_context *ctx = &cpuctx->ctx;
-	struct perf_counter *counter, *tmp;
-
-	list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry)
-		__perf_counter_remove_from_context(counter);
-}
-static void perf_counter_exit_cpu(int cpu)
-{
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = &cpuctx->ctx;
-
-	mutex_lock(&ctx->mutex);
-	smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
-	mutex_unlock(&ctx->mutex);
-}
-#else
-static inline void perf_counter_exit_cpu(int cpu) { }
-#endif
-
-static int __cpuinit
-perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (long)hcpu;
-
-	switch (action) {
-
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		perf_counter_init_cpu(cpu);
-		break;
-
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		hw_perf_counter_setup_online(cpu);
-		break;
-
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		perf_counter_exit_cpu(cpu);
-		break;
-
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-/*
- * This has to have a higher priority than migration_notifier in sched.c.
- */
-static struct notifier_block __cpuinitdata perf_cpu_nb = {
-	.notifier_call		= perf_cpu_notify,
-	.priority		= 20,
-};
-
-void __init perf_counter_init(void)
-{
-	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
-			(void *)(long)smp_processor_id());
-	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
-			(void *)(long)smp_processor_id());
-	register_cpu_notifier(&perf_cpu_nb);
-}
-
-static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
-{
-	return sprintf(buf, "%d\n", perf_reserved_percpu);
-}
-
-static ssize_t
-perf_set_reserve_percpu(struct sysdev_class *class,
-			const char *buf,
-			size_t count)
-{
-	struct perf_cpu_context *cpuctx;
-	unsigned long val;
-	int err, cpu, mpt;
-
-	err = strict_strtoul(buf, 10, &val);
-	if (err)
-		return err;
-	if (val > perf_max_counters)
-		return -EINVAL;
-
-	spin_lock(&perf_resource_lock);
-	perf_reserved_percpu = val;
-	for_each_online_cpu(cpu) {
-		cpuctx = &per_cpu(perf_cpu_context, cpu);
-		spin_lock_irq(&cpuctx->ctx.lock);
-		mpt = min(perf_max_counters - cpuctx->ctx.nr_counters,
-			  perf_max_counters - perf_reserved_percpu);
-		cpuctx->max_pertask = mpt;
-		spin_unlock_irq(&cpuctx->ctx.lock);
-	}
-	spin_unlock(&perf_resource_lock);
-
-	return count;
-}
-
-static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf)
-{
-	return sprintf(buf, "%d\n", perf_overcommit);
-}
-
-static ssize_t
-perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
-{
-	unsigned long val;
-	int err;
-
-	err = strict_strtoul(buf, 10, &val);
-	if (err)
-		return err;
-	if (val > 1)
-		return -EINVAL;
-
-	spin_lock(&perf_resource_lock);
-	perf_overcommit = val;
-	spin_unlock(&perf_resource_lock);
-
-	return count;
-}
-
-static SYSDEV_CLASS_ATTR(
-				reserve_percpu,
-				0644,
-				perf_show_reserve_percpu,
-				perf_set_reserve_percpu
-			);
-
-static SYSDEV_CLASS_ATTR(
-				overcommit,
-				0644,
-				perf_show_overcommit,
-				perf_set_overcommit
-			);
-
-static struct attribute *perfclass_attrs[] = {
-	&attr_reserve_percpu.attr,
-	&attr_overcommit.attr,
-	NULL
-};
-
-static struct attribute_group perfclass_attr_group = {
-	.attrs			= perfclass_attrs,
-	.name			= "perf_counters",
-};
-
-static int __init perf_counter_sysfs_init(void)
-{
-	return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
-				  &perfclass_attr_group);
-}
-device_initcall(perf_counter_sysfs_init);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
new file mode 100644
index 000000000000..6e8b99a04e1e
--- /dev/null
+++ b/kernel/perf_event.c
@@ -0,0 +1,5000 @@
+/*
+ * Performance event core code
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ *  For licensing details see kernel-base/COPYING
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/sysfs.h>
+#include <linux/dcache.h>
+#include <linux/percpu.h>
+#include <linux/ptrace.h>
+#include <linux/vmstat.h>
+#include <linux/hardirq.h>
+#include <linux/rculist.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/anon_inodes.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+
+#include <asm/irq_regs.h>
+
+/*
+ * Each CPU has a list of per CPU events:
+ */
+DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
+
+int perf_max_events __read_mostly = 1;
+static int perf_reserved_percpu __read_mostly;
+static int perf_overcommit __read_mostly = 1;
+
+static atomic_t nr_events __read_mostly;
+static atomic_t nr_mmap_events __read_mostly;
+static atomic_t nr_comm_events __read_mostly;
+static atomic_t nr_task_events __read_mostly;
+
+/*
+ * perf event paranoia level:
+ *  -1 - not paranoid at all
+ *   0 - disallow raw tracepoint access for unpriv
+ *   1 - disallow cpu events for unpriv
+ *   2 - disallow kernel profiling for unpriv
+ */
+int sysctl_perf_event_paranoid __read_mostly = 1;
+
+static inline bool perf_paranoid_tracepoint_raw(void)
+{
+	return sysctl_perf_event_paranoid > -1;
+}
+
+static inline bool perf_paranoid_cpu(void)
+{
+	return sysctl_perf_event_paranoid > 0;
+}
+
+static inline bool perf_paranoid_kernel(void)
+{
+	return sysctl_perf_event_paranoid > 1;
+}
+
+int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
+
+/*
+ * max perf event sample rate
+ */
+int sysctl_perf_event_sample_rate __read_mostly = 100000;
+
+static atomic64_t perf_event_id;
+
+/*
+ * Lock for (sysadmin-configurable) event reservations:
+ */
+static DEFINE_SPINLOCK(perf_resource_lock);
+
+/*
+ * Architecture provided APIs - weak aliases:
+ */
+extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
+{
+	return NULL;
+}
+
+void __weak hw_perf_disable(void)		{ barrier(); }
+void __weak hw_perf_enable(void)		{ barrier(); }
+
+void __weak hw_perf_event_setup(int cpu)	{ barrier(); }
+void __weak hw_perf_event_setup_online(int cpu)	{ barrier(); }
+
+int __weak
+hw_perf_group_sched_in(struct perf_event *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx, int cpu)
+{
+	return 0;
+}
+
+void __weak perf_event_print_debug(void)	{ }
+
+static DEFINE_PER_CPU(int, perf_disable_count);
+
+void __perf_disable(void)
+{
+	__get_cpu_var(perf_disable_count)++;
+}
+
+bool __perf_enable(void)
+{
+	return !--__get_cpu_var(perf_disable_count);
+}
+
+void perf_disable(void)
+{
+	__perf_disable();
+	hw_perf_disable();
+}
+
+void perf_enable(void)
+{
+	if (__perf_enable())
+		hw_perf_enable();
+}
+
+static void get_ctx(struct perf_event_context *ctx)
+{
+	WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
+}
+
+static void free_ctx(struct rcu_head *head)
+{
+	struct perf_event_context *ctx;
+
+	ctx = container_of(head, struct perf_event_context, rcu_head);
+	kfree(ctx);
+}
+
+static void put_ctx(struct perf_event_context *ctx)
+{
+	if (atomic_dec_and_test(&ctx->refcount)) {
+		if (ctx->parent_ctx)
+			put_ctx(ctx->parent_ctx);
+		if (ctx->task)
+			put_task_struct(ctx->task);
+		call_rcu(&ctx->rcu_head, free_ctx);
+	}
+}
+
+static void unclone_ctx(struct perf_event_context *ctx)
+{
+	if (ctx->parent_ctx) {
+		put_ctx(ctx->parent_ctx);
+		ctx->parent_ctx = NULL;
+	}
+}
+
+/*
+ * If we inherit events we want to return the parent event id
+ * to userspace.
+ */
+static u64 primary_event_id(struct perf_event *event)
+{
+	u64 id = event->id;
+
+	if (event->parent)
+		id = event->parent->id;
+
+	return id;
+}
+
+/*
+ * Get the perf_event_context for a task and lock it.
+ * This has to cope with with the fact that until it is locked,
+ * the context could get moved to another task.
+ */
+static struct perf_event_context *
+perf_lock_task_context(struct task_struct *task, unsigned long *flags)
+{
+	struct perf_event_context *ctx;
+
+	rcu_read_lock();
+ retry:
+	ctx = rcu_dereference(task->perf_event_ctxp);
+	if (ctx) {
+		/*
+		 * If this context is a clone of another, it might
+		 * get swapped for another underneath us by
+		 * perf_event_task_sched_out, though the
+		 * rcu_read_lock() protects us from any context
+		 * getting freed.  Lock the context and check if it
+		 * got swapped before we could get the lock, and retry
+		 * if so.  If we locked the right context, then it
+		 * can't get swapped on us any more.
+		 */
+		spin_lock_irqsave(&ctx->lock, *flags);
+		if (ctx != rcu_dereference(task->perf_event_ctxp)) {
+			spin_unlock_irqrestore(&ctx->lock, *flags);
+			goto retry;
+		}
+
+		if (!atomic_inc_not_zero(&ctx->refcount)) {
+			spin_unlock_irqrestore(&ctx->lock, *flags);
+			ctx = NULL;
+		}
+	}
+	rcu_read_unlock();
+	return ctx;
+}
+
+/*
+ * Get the context for a task and increment its pin_count so it
+ * can't get swapped to another task.  This also increments its
+ * reference count so that the context can't get freed.
+ */
+static struct perf_event_context *perf_pin_task_context(struct task_struct *task)
+{
+	struct perf_event_context *ctx;
+	unsigned long flags;
+
+	ctx = perf_lock_task_context(task, &flags);
+	if (ctx) {
+		++ctx->pin_count;
+		spin_unlock_irqrestore(&ctx->lock, flags);
+	}
+	return ctx;
+}
+
+static void perf_unpin_context(struct perf_event_context *ctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+	--ctx->pin_count;
+	spin_unlock_irqrestore(&ctx->lock, flags);
+	put_ctx(ctx);
+}
+
+/*
+ * Add a event from the lists for its context.
+ * Must be called with ctx->mutex and ctx->lock held.
+ */
+static void
+list_add_event(struct perf_event *event, struct perf_event_context *ctx)
+{
+	struct perf_event *group_leader = event->group_leader;
+
+	/*
+	 * Depending on whether it is a standalone or sibling event,
+	 * add it straight to the context's event list, or to the group
+	 * leader's sibling list:
+	 */
+	if (group_leader == event)
+		list_add_tail(&event->group_entry, &ctx->group_list);
+	else {
+		list_add_tail(&event->group_entry, &group_leader->sibling_list);
+		group_leader->nr_siblings++;
+	}
+
+	list_add_rcu(&event->event_entry, &ctx->event_list);
+	ctx->nr_events++;
+	if (event->attr.inherit_stat)
+		ctx->nr_stat++;
+}
+
+/*
+ * Remove a event from the lists for its context.
+ * Must be called with ctx->mutex and ctx->lock held.
+ */
+static void
+list_del_event(struct perf_event *event, struct perf_event_context *ctx)
+{
+	struct perf_event *sibling, *tmp;
+
+	if (list_empty(&event->group_entry))
+		return;
+	ctx->nr_events--;
+	if (event->attr.inherit_stat)
+		ctx->nr_stat--;
+
+	list_del_init(&event->group_entry);
+	list_del_rcu(&event->event_entry);
+
+	if (event->group_leader != event)
+		event->group_leader->nr_siblings--;
+
+	/*
+	 * If this was a group event with sibling events then
+	 * upgrade the siblings to singleton events by adding them
+	 * to the context list directly:
+	 */
+	list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
+
+		list_move_tail(&sibling->group_entry, &ctx->group_list);
+		sibling->group_leader = sibling;
+	}
+}
+
+static void
+event_sched_out(struct perf_event *event,
+		  struct perf_cpu_context *cpuctx,
+		  struct perf_event_context *ctx)
+{
+	if (event->state != PERF_EVENT_STATE_ACTIVE)
+		return;
+
+	event->state = PERF_EVENT_STATE_INACTIVE;
+	if (event->pending_disable) {
+		event->pending_disable = 0;
+		event->state = PERF_EVENT_STATE_OFF;
+	}
+	event->tstamp_stopped = ctx->time;
+	event->pmu->disable(event);
+	event->oncpu = -1;
+
+	if (!is_software_event(event))
+		cpuctx->active_oncpu--;
+	ctx->nr_active--;
+	if (event->attr.exclusive || !cpuctx->active_oncpu)
+		cpuctx->exclusive = 0;
+}
+
+static void
+group_sched_out(struct perf_event *group_event,
+		struct perf_cpu_context *cpuctx,
+		struct perf_event_context *ctx)
+{
+	struct perf_event *event;
+
+	if (group_event->state != PERF_EVENT_STATE_ACTIVE)
+		return;
+
+	event_sched_out(group_event, cpuctx, ctx);
+
+	/*
+	 * Schedule out siblings (if any):
+	 */
+	list_for_each_entry(event, &group_event->sibling_list, group_entry)
+		event_sched_out(event, cpuctx, ctx);
+
+	if (group_event->attr.exclusive)
+		cpuctx->exclusive = 0;
+}
+
+/*
+ * Cross CPU call to remove a performance event
+ *
+ * We disable the event on the hardware level first. After that we
+ * remove it from the context list.
+ */
+static void __perf_event_remove_from_context(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event *event = info;
+	struct perf_event_context *ctx = event->ctx;
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu. If not it has been
+	 * scheduled out before the smp call arrived.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	spin_lock(&ctx->lock);
+	/*
+	 * Protect the list operation against NMI by disabling the
+	 * events on a global level.
+	 */
+	perf_disable();
+
+	event_sched_out(event, cpuctx, ctx);
+
+	list_del_event(event, ctx);
+
+	if (!ctx->task) {
+		/*
+		 * Allow more per task events with respect to the
+		 * reservation:
+		 */
+		cpuctx->max_pertask =
+			min(perf_max_events - ctx->nr_events,
+			    perf_max_events - perf_reserved_percpu);
+	}
+
+	perf_enable();
+	spin_unlock(&ctx->lock);
+}
+
+
+/*
+ * Remove the event from a task's (or a CPU's) list of events.
+ *
+ * Must be called with ctx->mutex held.
+ *
+ * CPU events are removed with a smp call. For task events we only
+ * call when the task is on a CPU.
+ *
+ * If event->ctx is a cloned context, callers must make sure that
+ * every task struct that event->ctx->task could possibly point to
+ * remains valid.  This is OK when called from perf_release since
+ * that only calls us on the top-level context, which can't be a clone.
+ * When called from perf_event_exit_task, it's OK because the
+ * context has been detached from its task.
+ */
+static void perf_event_remove_from_context(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Per cpu events are removed via an smp call and
+		 * the removal is always sucessful.
+		 */
+		smp_call_function_single(event->cpu,
+					 __perf_event_remove_from_context,
+					 event, 1);
+		return;
+	}
+
+retry:
+	task_oncpu_function_call(task, __perf_event_remove_from_context,
+				 event);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * If the context is active we need to retry the smp call.
+	 */
+	if (ctx->nr_active && !list_empty(&event->group_entry)) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * The lock prevents that this context is scheduled in so we
+	 * can remove the event safely, if the call above did not
+	 * succeed.
+	 */
+	if (!list_empty(&event->group_entry)) {
+		list_del_event(event, ctx);
+	}
+	spin_unlock_irq(&ctx->lock);
+}
+
+static inline u64 perf_clock(void)
+{
+	return cpu_clock(smp_processor_id());
+}
+
+/*
+ * Update the record of the current time in a context.
+ */
+static void update_context_time(struct perf_event_context *ctx)
+{
+	u64 now = perf_clock();
+
+	ctx->time += now - ctx->timestamp;
+	ctx->timestamp = now;
+}
+
+/*
+ * Update the total_time_enabled and total_time_running fields for a event.
+ */
+static void update_event_times(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	u64 run_end;
+
+	if (event->state < PERF_EVENT_STATE_INACTIVE ||
+	    event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
+		return;
+
+	event->total_time_enabled = ctx->time - event->tstamp_enabled;
+
+	if (event->state == PERF_EVENT_STATE_INACTIVE)
+		run_end = event->tstamp_stopped;
+	else
+		run_end = ctx->time;
+
+	event->total_time_running = run_end - event->tstamp_running;
+}
+
+/*
+ * Update total_time_enabled and total_time_running for all events in a group.
+ */
+static void update_group_times(struct perf_event *leader)
+{
+	struct perf_event *event;
+
+	update_event_times(leader);
+	list_for_each_entry(event, &leader->sibling_list, group_entry)
+		update_event_times(event);
+}
+
+/*
+ * Cross CPU call to disable a performance event
+ */
+static void __perf_event_disable(void *info)
+{
+	struct perf_event *event = info;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event_context *ctx = event->ctx;
+
+	/*
+	 * If this is a per-task event, need to check whether this
+	 * event's task is the current task on this cpu.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	spin_lock(&ctx->lock);
+
+	/*
+	 * If the event is on, turn it off.
+	 * If it is in error state, leave it in error state.
+	 */
+	if (event->state >= PERF_EVENT_STATE_INACTIVE) {
+		update_context_time(ctx);
+		update_group_times(event);
+		if (event == event->group_leader)
+			group_sched_out(event, cpuctx, ctx);
+		else
+			event_sched_out(event, cpuctx, ctx);
+		event->state = PERF_EVENT_STATE_OFF;
+	}
+
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Disable a event.
+ *
+ * If event->ctx is a cloned context, callers must make sure that
+ * every task struct that event->ctx->task could possibly point to
+ * remains valid.  This condition is satisifed when called through
+ * perf_event_for_each_child or perf_event_for_each because they
+ * hold the top-level event's child_mutex, so any descendant that
+ * goes to exit will block in sync_child_event.
+ * When called from perf_pending_event it's OK because event->ctx
+ * is the current context on this CPU and preemption is disabled,
+ * hence we can't get into perf_event_task_sched_out for this context.
+ */
+static void perf_event_disable(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Disable the event on the cpu that it's on
+		 */
+		smp_call_function_single(event->cpu, __perf_event_disable,
+					 event, 1);
+		return;
+	}
+
+ retry:
+	task_oncpu_function_call(task, __perf_event_disable, event);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * If the event is still active, we need to retry the cross-call.
+	 */
+	if (event->state == PERF_EVENT_STATE_ACTIVE) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * Since we have the lock this context can't be scheduled
+	 * in, so we can change the state safely.
+	 */
+	if (event->state == PERF_EVENT_STATE_INACTIVE) {
+		update_group_times(event);
+		event->state = PERF_EVENT_STATE_OFF;
+	}
+
+	spin_unlock_irq(&ctx->lock);
+}
+
+static int
+event_sched_in(struct perf_event *event,
+		 struct perf_cpu_context *cpuctx,
+		 struct perf_event_context *ctx,
+		 int cpu)
+{
+	if (event->state <= PERF_EVENT_STATE_OFF)
+		return 0;
+
+	event->state = PERF_EVENT_STATE_ACTIVE;
+	event->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
+	/*
+	 * The new state must be visible before we turn it on in the hardware:
+	 */
+	smp_wmb();
+
+	if (event->pmu->enable(event)) {
+		event->state = PERF_EVENT_STATE_INACTIVE;
+		event->oncpu = -1;
+		return -EAGAIN;
+	}
+
+	event->tstamp_running += ctx->time - event->tstamp_stopped;
+
+	if (!is_software_event(event))
+		cpuctx->active_oncpu++;
+	ctx->nr_active++;
+
+	if (event->attr.exclusive)
+		cpuctx->exclusive = 1;
+
+	return 0;
+}
+
+static int
+group_sched_in(struct perf_event *group_event,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx,
+	       int cpu)
+{
+	struct perf_event *event, *partial_group;
+	int ret;
+
+	if (group_event->state == PERF_EVENT_STATE_OFF)
+		return 0;
+
+	ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu);
+	if (ret)
+		return ret < 0 ? ret : 0;
+
+	if (event_sched_in(group_event, cpuctx, ctx, cpu))
+		return -EAGAIN;
+
+	/*
+	 * Schedule in siblings as one group (if any):
+	 */
+	list_for_each_entry(event, &group_event->sibling_list, group_entry) {
+		if (event_sched_in(event, cpuctx, ctx, cpu)) {
+			partial_group = event;
+			goto group_error;
+		}
+	}
+
+	return 0;
+
+group_error:
+	/*
+	 * Groups can be scheduled in as one unit only, so undo any
+	 * partial group before returning:
+	 */
+	list_for_each_entry(event, &group_event->sibling_list, group_entry) {
+		if (event == partial_group)
+			break;
+		event_sched_out(event, cpuctx, ctx);
+	}
+	event_sched_out(group_event, cpuctx, ctx);
+
+	return -EAGAIN;
+}
+
+/*
+ * Return 1 for a group consisting entirely of software events,
+ * 0 if the group contains any hardware events.
+ */
+static int is_software_only_group(struct perf_event *leader)
+{
+	struct perf_event *event;
+
+	if (!is_software_event(leader))
+		return 0;
+
+	list_for_each_entry(event, &leader->sibling_list, group_entry)
+		if (!is_software_event(event))
+			return 0;
+
+	return 1;
+}
+
+/*
+ * Work out whether we can put this event group on the CPU now.
+ */
+static int group_can_go_on(struct perf_event *event,
+			   struct perf_cpu_context *cpuctx,
+			   int can_add_hw)
+{
+	/*
+	 * Groups consisting entirely of software events can always go on.
+	 */
+	if (is_software_only_group(event))
+		return 1;
+	/*
+	 * If an exclusive group is already on, no other hardware
+	 * events can go on.
+	 */
+	if (cpuctx->exclusive)
+		return 0;
+	/*
+	 * If this group is exclusive and there are already
+	 * events on the CPU, it can't go on.
+	 */
+	if (event->attr.exclusive && cpuctx->active_oncpu)
+		return 0;
+	/*
+	 * Otherwise, try to add it if all previous groups were able
+	 * to go on.
+	 */
+	return can_add_hw;
+}
+
+static void add_event_to_ctx(struct perf_event *event,
+			       struct perf_event_context *ctx)
+{
+	list_add_event(event, ctx);
+	event->tstamp_enabled = ctx->time;
+	event->tstamp_running = ctx->time;
+	event->tstamp_stopped = ctx->time;
+}
+
+/*
+ * Cross CPU call to install and enable a performance event
+ *
+ * Must be called with ctx->mutex held
+ */
+static void __perf_install_in_context(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event *event = info;
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_event *leader = event->group_leader;
+	int cpu = smp_processor_id();
+	int err;
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu. If not it has been
+	 * scheduled out before the smp call arrived.
+	 * Or possibly this is the right context but it isn't
+	 * on this cpu because it had no events.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		if (cpuctx->task_ctx || ctx->task != current)
+			return;
+		cpuctx->task_ctx = ctx;
+	}
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
+	update_context_time(ctx);
+
+	/*
+	 * Protect the list operation against NMI by disabling the
+	 * events on a global level. NOP for non NMI based events.
+	 */
+	perf_disable();
+
+	add_event_to_ctx(event, ctx);
+
+	/*
+	 * Don't put the event on if it is disabled or if
+	 * it is in a group and the group isn't on.
+	 */
+	if (event->state != PERF_EVENT_STATE_INACTIVE ||
+	    (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE))
+		goto unlock;
+
+	/*
+	 * An exclusive event can't go on if there are already active
+	 * hardware events, and no hardware event can go on if there
+	 * is already an exclusive event on.
+	 */
+	if (!group_can_go_on(event, cpuctx, 1))
+		err = -EEXIST;
+	else
+		err = event_sched_in(event, cpuctx, ctx, cpu);
+
+	if (err) {
+		/*
+		 * This event couldn't go on.  If it is in a group
+		 * then we have to pull the whole group off.
+		 * If the event group is pinned then put it in error state.
+		 */
+		if (leader != event)
+			group_sched_out(leader, cpuctx, ctx);
+		if (leader->attr.pinned) {
+			update_group_times(leader);
+			leader->state = PERF_EVENT_STATE_ERROR;
+		}
+	}
+
+	if (!err && !ctx->task && cpuctx->max_pertask)
+		cpuctx->max_pertask--;
+
+ unlock:
+	perf_enable();
+
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Attach a performance event to a context
+ *
+ * First we add the event to the list with the hardware enable bit
+ * in event->hw_config cleared.
+ *
+ * If the event is attached to a task which is on a CPU we use a smp
+ * call to enable it in the task context. The task might have been
+ * scheduled away, but we check this in the smp call again.
+ *
+ * Must be called with ctx->mutex held.
+ */
+static void
+perf_install_in_context(struct perf_event_context *ctx,
+			struct perf_event *event,
+			int cpu)
+{
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Per cpu events are installed via an smp call and
+		 * the install is always sucessful.
+		 */
+		smp_call_function_single(cpu, __perf_install_in_context,
+					 event, 1);
+		return;
+	}
+
+retry:
+	task_oncpu_function_call(task, __perf_install_in_context,
+				 event);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * we need to retry the smp call.
+	 */
+	if (ctx->is_active && list_empty(&event->group_entry)) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * The lock prevents that this context is scheduled in so we
+	 * can add the event safely, if it the call above did not
+	 * succeed.
+	 */
+	if (list_empty(&event->group_entry))
+		add_event_to_ctx(event, ctx);
+	spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Put a event into inactive state and update time fields.
+ * Enabling the leader of a group effectively enables all
+ * the group members that aren't explicitly disabled, so we
+ * have to update their ->tstamp_enabled also.
+ * Note: this works for group members as well as group leaders
+ * since the non-leader members' sibling_lists will be empty.
+ */
+static void __perf_event_mark_enabled(struct perf_event *event,
+					struct perf_event_context *ctx)
+{
+	struct perf_event *sub;
+
+	event->state = PERF_EVENT_STATE_INACTIVE;
+	event->tstamp_enabled = ctx->time - event->total_time_enabled;
+	list_for_each_entry(sub, &event->sibling_list, group_entry)
+		if (sub->state >= PERF_EVENT_STATE_INACTIVE)
+			sub->tstamp_enabled =
+				ctx->time - sub->total_time_enabled;
+}
+
+/*
+ * Cross CPU call to enable a performance event
+ */
+static void __perf_event_enable(void *info)
+{
+	struct perf_event *event = info;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_event *leader = event->group_leader;
+	int err;
+
+	/*
+	 * If this is a per-task event, need to check whether this
+	 * event's task is the current task on this cpu.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		if (cpuctx->task_ctx || ctx->task != current)
+			return;
+		cpuctx->task_ctx = ctx;
+	}
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
+	update_context_time(ctx);
+
+	if (event->state >= PERF_EVENT_STATE_INACTIVE)
+		goto unlock;
+	__perf_event_mark_enabled(event, ctx);
+
+	/*
+	 * If the event is in a group and isn't the group leader,
+	 * then don't put it on unless the group is on.
+	 */
+	if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
+		goto unlock;
+
+	if (!group_can_go_on(event, cpuctx, 1)) {
+		err = -EEXIST;
+	} else {
+		perf_disable();
+		if (event == leader)
+			err = group_sched_in(event, cpuctx, ctx,
+					     smp_processor_id());
+		else
+			err = event_sched_in(event, cpuctx, ctx,
+					       smp_processor_id());
+		perf_enable();
+	}
+
+	if (err) {
+		/*
+		 * If this event can't go on and it's part of a
+		 * group, then the whole group has to come off.
+		 */
+		if (leader != event)
+			group_sched_out(leader, cpuctx, ctx);
+		if (leader->attr.pinned) {
+			update_group_times(leader);
+			leader->state = PERF_EVENT_STATE_ERROR;
+		}
+	}
+
+ unlock:
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Enable a event.
+ *
+ * If event->ctx is a cloned context, callers must make sure that
+ * every task struct that event->ctx->task could possibly point to
+ * remains valid.  This condition is satisfied when called through
+ * perf_event_for_each_child or perf_event_for_each as described
+ * for perf_event_disable.
+ */
+static void perf_event_enable(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Enable the event on the cpu that it's on
+		 */
+		smp_call_function_single(event->cpu, __perf_event_enable,
+					 event, 1);
+		return;
+	}
+
+	spin_lock_irq(&ctx->lock);
+	if (event->state >= PERF_EVENT_STATE_INACTIVE)
+		goto out;
+
+	/*
+	 * If the event is in error state, clear that first.
+	 * That way, if we see the event in error state below, we
+	 * know that it has gone back into error state, as distinct
+	 * from the task having been scheduled away before the
+	 * cross-call arrived.
+	 */
+	if (event->state == PERF_EVENT_STATE_ERROR)
+		event->state = PERF_EVENT_STATE_OFF;
+
+ retry:
+	spin_unlock_irq(&ctx->lock);
+	task_oncpu_function_call(task, __perf_event_enable, event);
+
+	spin_lock_irq(&ctx->lock);
+
+	/*
+	 * If the context is active and the event is still off,
+	 * we need to retry the cross-call.
+	 */
+	if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF)
+		goto retry;
+
+	/*
+	 * Since we have the lock this context can't be scheduled
+	 * in, so we can change the state safely.
+	 */
+	if (event->state == PERF_EVENT_STATE_OFF)
+		__perf_event_mark_enabled(event, ctx);
+
+ out:
+	spin_unlock_irq(&ctx->lock);
+}
+
+static int perf_event_refresh(struct perf_event *event, int refresh)
+{
+	/*
+	 * not supported on inherited events
+	 */
+	if (event->attr.inherit)
+		return -EINVAL;
+
+	atomic_add(refresh, &event->event_limit);
+	perf_event_enable(event);
+
+	return 0;
+}
+
+void __perf_event_sched_out(struct perf_event_context *ctx,
+			      struct perf_cpu_context *cpuctx)
+{
+	struct perf_event *event;
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 0;
+	if (likely(!ctx->nr_events))
+		goto out;
+	update_context_time(ctx);
+
+	perf_disable();
+	if (ctx->nr_active) {
+		list_for_each_entry(event, &ctx->group_list, group_entry) {
+			if (event != event->group_leader)
+				event_sched_out(event, cpuctx, ctx);
+			else
+				group_sched_out(event, cpuctx, ctx);
+		}
+	}
+	perf_enable();
+ out:
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Test whether two contexts are equivalent, i.e. whether they
+ * have both been cloned from the same version of the same context
+ * and they both have the same number of enabled events.
+ * If the number of enabled events is the same, then the set
+ * of enabled events should be the same, because these are both
+ * inherited contexts, therefore we can't access individual events
+ * in them directly with an fd; we can only enable/disable all
+ * events via prctl, or enable/disable all events in a family
+ * via ioctl, which will have the same effect on both contexts.
+ */
+static int context_equiv(struct perf_event_context *ctx1,
+			 struct perf_event_context *ctx2)
+{
+	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
+		&& ctx1->parent_gen == ctx2->parent_gen
+		&& !ctx1->pin_count && !ctx2->pin_count;
+}
+
+static void __perf_event_read(void *event);
+
+static void __perf_event_sync_stat(struct perf_event *event,
+				     struct perf_event *next_event)
+{
+	u64 value;
+
+	if (!event->attr.inherit_stat)
+		return;
+
+	/*
+	 * Update the event value, we cannot use perf_event_read()
+	 * because we're in the middle of a context switch and have IRQs
+	 * disabled, which upsets smp_call_function_single(), however
+	 * we know the event must be on the current CPU, therefore we
+	 * don't need to use it.
+	 */
+	switch (event->state) {
+	case PERF_EVENT_STATE_ACTIVE:
+		__perf_event_read(event);
+		break;
+
+	case PERF_EVENT_STATE_INACTIVE:
+		update_event_times(event);
+		break;
+
+	default:
+		break;
+	}
+
+	/*
+	 * In order to keep per-task stats reliable we need to flip the event
+	 * values when we flip the contexts.
+	 */
+	value = atomic64_read(&next_event->count);
+	value = atomic64_xchg(&event->count, value);
+	atomic64_set(&next_event->count, value);
+
+	swap(event->total_time_enabled, next_event->total_time_enabled);
+	swap(event->total_time_running, next_event->total_time_running);
+
+	/*
+	 * Since we swizzled the values, update the user visible data too.
+	 */
+	perf_event_update_userpage(event);
+	perf_event_update_userpage(next_event);
+}
+
+#define list_next_entry(pos, member) \
+	list_entry(pos->member.next, typeof(*pos), member)
+
+static void perf_event_sync_stat(struct perf_event_context *ctx,
+				   struct perf_event_context *next_ctx)
+{
+	struct perf_event *event, *next_event;
+
+	if (!ctx->nr_stat)
+		return;
+
+	event = list_first_entry(&ctx->event_list,
+				   struct perf_event, event_entry);
+
+	next_event = list_first_entry(&next_ctx->event_list,
+					struct perf_event, event_entry);
+
+	while (&event->event_entry != &ctx->event_list &&
+	       &next_event->event_entry != &next_ctx->event_list) {
+
+		__perf_event_sync_stat(event, next_event);
+
+		event = list_next_entry(event, event_entry);
+		next_event = list_next_entry(next_event, event_entry);
+	}
+}
+
+/*
+ * Called from scheduler to remove the events of the current task,
+ * with interrupts disabled.
+ *
+ * We stop each event and update the event value in event->count.
+ *
+ * This does not protect us against NMI, but disable()
+ * sets the disabled bit in the control field of event _before_
+ * accessing the event control register. If a NMI hits, then it will
+ * not restart the event.
+ */
+void perf_event_task_sched_out(struct task_struct *task,
+				 struct task_struct *next, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_event_context *ctx = task->perf_event_ctxp;
+	struct perf_event_context *next_ctx;
+	struct perf_event_context *parent;
+	struct pt_regs *regs;
+	int do_switch = 1;
+
+	regs = task_pt_regs(task);
+	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
+
+	if (likely(!ctx || !cpuctx->task_ctx))
+		return;
+
+	update_context_time(ctx);
+
+	rcu_read_lock();
+	parent = rcu_dereference(ctx->parent_ctx);
+	next_ctx = next->perf_event_ctxp;
+	if (parent && next_ctx &&
+	    rcu_dereference(next_ctx->parent_ctx) == parent) {
+		/*
+		 * Looks like the two contexts are clones, so we might be
+		 * able to optimize the context switch.  We lock both
+		 * contexts and check that they are clones under the
+		 * lock (including re-checking that neither has been
+		 * uncloned in the meantime).  It doesn't matter which
+		 * order we take the locks because no other cpu could
+		 * be trying to lock both of these tasks.
+		 */
+		spin_lock(&ctx->lock);
+		spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
+		if (context_equiv(ctx, next_ctx)) {
+			/*
+			 * XXX do we need a memory barrier of sorts
+			 * wrt to rcu_dereference() of perf_event_ctxp
+			 */
+			task->perf_event_ctxp = next_ctx;
+			next->perf_event_ctxp = ctx;
+			ctx->task = next;
+			next_ctx->task = task;
+			do_switch = 0;
+
+			perf_event_sync_stat(ctx, next_ctx);
+		}
+		spin_unlock(&next_ctx->lock);
+		spin_unlock(&ctx->lock);
+	}
+	rcu_read_unlock();
+
+	if (do_switch) {
+		__perf_event_sched_out(ctx, cpuctx);
+		cpuctx->task_ctx = NULL;
+	}
+}
+
+/*
+ * Called with IRQs disabled
+ */
+static void __perf_event_task_sched_out(struct perf_event_context *ctx)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+
+	if (!cpuctx->task_ctx)
+		return;
+
+	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
+		return;
+
+	__perf_event_sched_out(ctx, cpuctx);
+	cpuctx->task_ctx = NULL;
+}
+
+/*
+ * Called with IRQs disabled
+ */
+static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx)
+{
+	__perf_event_sched_out(&cpuctx->ctx, cpuctx);
+}
+
+static void
+__perf_event_sched_in(struct perf_event_context *ctx,
+			struct perf_cpu_context *cpuctx, int cpu)
+{
+	struct perf_event *event;
+	int can_add_hw = 1;
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
+	if (likely(!ctx->nr_events))
+		goto out;
+
+	ctx->timestamp = perf_clock();
+
+	perf_disable();
+
+	/*
+	 * First go through the list and put on any pinned groups
+	 * in order to give them the best chance of going on.
+	 */
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		if (event->state <= PERF_EVENT_STATE_OFF ||
+		    !event->attr.pinned)
+			continue;
+		if (event->cpu != -1 && event->cpu != cpu)
+			continue;
+
+		if (event != event->group_leader)
+			event_sched_in(event, cpuctx, ctx, cpu);
+		else {
+			if (group_can_go_on(event, cpuctx, 1))
+				group_sched_in(event, cpuctx, ctx, cpu);
+		}
+
+		/*
+		 * If this pinned group hasn't been scheduled,
+		 * put it in error state.
+		 */
+		if (event->state == PERF_EVENT_STATE_INACTIVE) {
+			update_group_times(event);
+			event->state = PERF_EVENT_STATE_ERROR;
+		}
+	}
+
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		/*
+		 * Ignore events in OFF or ERROR state, and
+		 * ignore pinned events since we did them already.
+		 */
+		if (event->state <= PERF_EVENT_STATE_OFF ||
+		    event->attr.pinned)
+			continue;
+
+		/*
+		 * Listen to the 'cpu' scheduling filter constraint
+		 * of events:
+		 */
+		if (event->cpu != -1 && event->cpu != cpu)
+			continue;
+
+		if (event != event->group_leader) {
+			if (event_sched_in(event, cpuctx, ctx, cpu))
+				can_add_hw = 0;
+		} else {
+			if (group_can_go_on(event, cpuctx, can_add_hw)) {
+				if (group_sched_in(event, cpuctx, ctx, cpu))
+					can_add_hw = 0;
+			}
+		}
+	}
+	perf_enable();
+ out:
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Called from scheduler to add the events of the current task
+ * with interrupts disabled.
+ *
+ * We restore the event value and then enable it.
+ *
+ * This does not protect us against NMI, but enable()
+ * sets the enabled bit in the control field of event _before_
+ * accessing the event control register. If a NMI hits, then it will
+ * keep the event running.
+ */
+void perf_event_task_sched_in(struct task_struct *task, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_event_context *ctx = task->perf_event_ctxp;
+
+	if (likely(!ctx))
+		return;
+	if (cpuctx->task_ctx == ctx)
+		return;
+	__perf_event_sched_in(ctx, cpuctx, cpu);
+	cpuctx->task_ctx = ctx;
+}
+
+static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
+{
+	struct perf_event_context *ctx = &cpuctx->ctx;
+
+	__perf_event_sched_in(ctx, cpuctx, cpu);
+}
+
+#define MAX_INTERRUPTS (~0ULL)
+
+static void perf_log_throttle(struct perf_event *event, int enable);
+
+static void perf_adjust_period(struct perf_event *event, u64 events)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 period, sample_period;
+	s64 delta;
+
+	events *= hwc->sample_period;
+	period = div64_u64(events, event->attr.sample_freq);
+
+	delta = (s64)(period - hwc->sample_period);
+	delta = (delta + 7) / 8; /* low pass filter */
+
+	sample_period = hwc->sample_period + delta;
+
+	if (!sample_period)
+		sample_period = 1;
+
+	hwc->sample_period = sample_period;
+}
+
+static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
+{
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	u64 interrupts, freq;
+
+	spin_lock(&ctx->lock);
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		if (event->state != PERF_EVENT_STATE_ACTIVE)
+			continue;
+
+		hwc = &event->hw;
+
+		interrupts = hwc->interrupts;
+		hwc->interrupts = 0;
+
+		/*
+		 * unthrottle events on the tick
+		 */
+		if (interrupts == MAX_INTERRUPTS) {
+			perf_log_throttle(event, 1);
+			event->pmu->unthrottle(event);
+			interrupts = 2*sysctl_perf_event_sample_rate/HZ;
+		}
+
+		if (!event->attr.freq || !event->attr.sample_freq)
+			continue;
+
+		/*
+		 * if the specified freq < HZ then we need to skip ticks
+		 */
+		if (event->attr.sample_freq < HZ) {
+			freq = event->attr.sample_freq;
+
+			hwc->freq_count += freq;
+			hwc->freq_interrupts += interrupts;
+
+			if (hwc->freq_count < HZ)
+				continue;
+
+			interrupts = hwc->freq_interrupts;
+			hwc->freq_interrupts = 0;
+			hwc->freq_count -= HZ;
+		} else
+			freq = HZ;
+
+		perf_adjust_period(event, freq * interrupts);
+
+		/*
+		 * In order to avoid being stalled by an (accidental) huge
+		 * sample period, force reset the sample period if we didn't
+		 * get any events in this freq period.
+		 */
+		if (!interrupts) {
+			perf_disable();
+			event->pmu->disable(event);
+			atomic64_set(&hwc->period_left, 0);
+			event->pmu->enable(event);
+			perf_enable();
+		}
+	}
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Round-robin a context's events:
+ */
+static void rotate_ctx(struct perf_event_context *ctx)
+{
+	struct perf_event *event;
+
+	if (!ctx->nr_events)
+		return;
+
+	spin_lock(&ctx->lock);
+	/*
+	 * Rotate the first entry last (works just fine for group events too):
+	 */
+	perf_disable();
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		list_move_tail(&event->group_entry, &ctx->group_list);
+		break;
+	}
+	perf_enable();
+
+	spin_unlock(&ctx->lock);
+}
+
+void perf_event_task_tick(struct task_struct *curr, int cpu)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx;
+
+	if (!atomic_read(&nr_events))
+		return;
+
+	cpuctx = &per_cpu(perf_cpu_context, cpu);
+	ctx = curr->perf_event_ctxp;
+
+	perf_ctx_adjust_freq(&cpuctx->ctx);
+	if (ctx)
+		perf_ctx_adjust_freq(ctx);
+
+	perf_event_cpu_sched_out(cpuctx);
+	if (ctx)
+		__perf_event_task_sched_out(ctx);
+
+	rotate_ctx(&cpuctx->ctx);
+	if (ctx)
+		rotate_ctx(ctx);
+
+	perf_event_cpu_sched_in(cpuctx, cpu);
+	if (ctx)
+		perf_event_task_sched_in(curr, cpu);
+}
+
+/*
+ * Enable all of a task's events that have been marked enable-on-exec.
+ * This expects task == current.
+ */
+static void perf_event_enable_on_exec(struct task_struct *task)
+{
+	struct perf_event_context *ctx;
+	struct perf_event *event;
+	unsigned long flags;
+	int enabled = 0;
+
+	local_irq_save(flags);
+	ctx = task->perf_event_ctxp;
+	if (!ctx || !ctx->nr_events)
+		goto out;
+
+	__perf_event_task_sched_out(ctx);
+
+	spin_lock(&ctx->lock);
+
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		if (!event->attr.enable_on_exec)
+			continue;
+		event->attr.enable_on_exec = 0;
+		if (event->state >= PERF_EVENT_STATE_INACTIVE)
+			continue;
+		__perf_event_mark_enabled(event, ctx);
+		enabled = 1;
+	}
+
+	/*
+	 * Unclone this context if we enabled any event.
+	 */
+	if (enabled)
+		unclone_ctx(ctx);
+
+	spin_unlock(&ctx->lock);
+
+	perf_event_task_sched_in(task, smp_processor_id());
+ out:
+	local_irq_restore(flags);
+}
+
+/*
+ * Cross CPU call to read the hardware event
+ */
+static void __perf_event_read(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event *event = info;
+	struct perf_event_context *ctx = event->ctx;
+	unsigned long flags;
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu.  If not it has been
+	 * scheduled out before the smp call arrived.  In that case
+	 * event->count would have been updated to a recent sample
+	 * when the event was scheduled out.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	local_irq_save(flags);
+	if (ctx->is_active)
+		update_context_time(ctx);
+	event->pmu->read(event);
+	update_event_times(event);
+	local_irq_restore(flags);
+}
+
+static u64 perf_event_read(struct perf_event *event)
+{
+	/*
+	 * If event is enabled and currently active on a CPU, update the
+	 * value in the event structure:
+	 */
+	if (event->state == PERF_EVENT_STATE_ACTIVE) {
+		smp_call_function_single(event->oncpu,
+					 __perf_event_read, event, 1);
+	} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
+		update_event_times(event);
+	}
+
+	return atomic64_read(&event->count);
+}
+
+/*
+ * Initialize the perf_event context in a task_struct:
+ */
+static void
+__perf_event_init_context(struct perf_event_context *ctx,
+			    struct task_struct *task)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	spin_lock_init(&ctx->lock);
+	mutex_init(&ctx->mutex);
+	INIT_LIST_HEAD(&ctx->group_list);
+	INIT_LIST_HEAD(&ctx->event_list);
+	atomic_set(&ctx->refcount, 1);
+	ctx->task = task;
+}
+
+static struct perf_event_context *find_get_context(pid_t pid, int cpu)
+{
+	struct perf_event_context *ctx;
+	struct perf_cpu_context *cpuctx;
+	struct task_struct *task;
+	unsigned long flags;
+	int err;
+
+	/*
+	 * If cpu is not a wildcard then this is a percpu event:
+	 */
+	if (cpu != -1) {
+		/* Must be root to operate on a CPU event: */
+		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+			return ERR_PTR(-EACCES);
+
+		if (cpu < 0 || cpu > num_possible_cpus())
+			return ERR_PTR(-EINVAL);
+
+		/*
+		 * We could be clever and allow to attach a event to an
+		 * offline CPU and activate it when the CPU comes up, but
+		 * that's for later.
+		 */
+		if (!cpu_isset(cpu, cpu_online_map))
+			return ERR_PTR(-ENODEV);
+
+		cpuctx = &per_cpu(perf_cpu_context, cpu);
+		ctx = &cpuctx->ctx;
+		get_ctx(ctx);
+
+		return ctx;
+	}
+
+	rcu_read_lock();
+	if (!pid)
+		task = current;
+	else
+		task = find_task_by_vpid(pid);
+	if (task)
+		get_task_struct(task);
+	rcu_read_unlock();
+
+	if (!task)
+		return ERR_PTR(-ESRCH);
+
+	/*
+	 * Can't attach events to a dying task.
+	 */
+	err = -ESRCH;
+	if (task->flags & PF_EXITING)
+		goto errout;
+
+	/* Reuse ptrace permission checks for now. */
+	err = -EACCES;
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto errout;
+
+ retry:
+	ctx = perf_lock_task_context(task, &flags);
+	if (ctx) {
+		unclone_ctx(ctx);
+		spin_unlock_irqrestore(&ctx->lock, flags);
+	}
+
+	if (!ctx) {
+		ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
+		err = -ENOMEM;
+		if (!ctx)
+			goto errout;
+		__perf_event_init_context(ctx, task);
+		get_ctx(ctx);
+		if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) {
+			/*
+			 * We raced with some other task; use
+			 * the context they set.
+			 */
+			kfree(ctx);
+			goto retry;
+		}
+		get_task_struct(task);
+	}
+
+	put_task_struct(task);
+	return ctx;
+
+ errout:
+	put_task_struct(task);
+	return ERR_PTR(err);
+}
+
+static void free_event_rcu(struct rcu_head *head)
+{
+	struct perf_event *event;
+
+	event = container_of(head, struct perf_event, rcu_head);
+	if (event->ns)
+		put_pid_ns(event->ns);
+	kfree(event);
+}
+
+static void perf_pending_sync(struct perf_event *event);
+
+static void free_event(struct perf_event *event)
+{
+	perf_pending_sync(event);
+
+	if (!event->parent) {
+		atomic_dec(&nr_events);
+		if (event->attr.mmap)
+			atomic_dec(&nr_mmap_events);
+		if (event->attr.comm)
+			atomic_dec(&nr_comm_events);
+		if (event->attr.task)
+			atomic_dec(&nr_task_events);
+	}
+
+	if (event->output) {
+		fput(event->output->filp);
+		event->output = NULL;
+	}
+
+	if (event->destroy)
+		event->destroy(event);
+
+	put_ctx(event->ctx);
+	call_rcu(&event->rcu_head, free_event_rcu);
+}
+
+/*
+ * Called when the last reference to the file is gone.
+ */
+static int perf_release(struct inode *inode, struct file *file)
+{
+	struct perf_event *event = file->private_data;
+	struct perf_event_context *ctx = event->ctx;
+
+	file->private_data = NULL;
+
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	perf_event_remove_from_context(event);
+	mutex_unlock(&ctx->mutex);
+
+	mutex_lock(&event->owner->perf_event_mutex);
+	list_del_init(&event->owner_entry);
+	mutex_unlock(&event->owner->perf_event_mutex);
+	put_task_struct(event->owner);
+
+	free_event(event);
+
+	return 0;
+}
+
+static int perf_event_read_size(struct perf_event *event)
+{
+	int entry = sizeof(u64); /* value */
+	int size = 0;
+	int nr = 1;
+
+	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		size += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		size += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_ID)
+		entry += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_GROUP) {
+		nr += event->group_leader->nr_siblings;
+		size += sizeof(u64);
+	}
+
+	size += entry * nr;
+
+	return size;
+}
+
+static u64 perf_event_read_value(struct perf_event *event)
+{
+	struct perf_event *child;
+	u64 total = 0;
+
+	total += perf_event_read(event);
+	list_for_each_entry(child, &event->child_list, child_list)
+		total += perf_event_read(child);
+
+	return total;
+}
+
+static int perf_event_read_entry(struct perf_event *event,
+				   u64 read_format, char __user *buf)
+{
+	int n = 0, count = 0;
+	u64 values[2];
+
+	values[n++] = perf_event_read_value(event);
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(event);
+
+	count = n * sizeof(u64);
+
+	if (copy_to_user(buf, values, count))
+		return -EFAULT;
+
+	return count;
+}
+
+static int perf_event_read_group(struct perf_event *event,
+				   u64 read_format, char __user *buf)
+{
+	struct perf_event *leader = event->group_leader, *sub;
+	int n = 0, size = 0, err = -EFAULT;
+	u64 values[3];
+
+	values[n++] = 1 + leader->nr_siblings;
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		values[n++] = leader->total_time_enabled +
+			atomic64_read(&leader->child_total_time_enabled);
+	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		values[n++] = leader->total_time_running +
+			atomic64_read(&leader->child_total_time_running);
+	}
+
+	size = n * sizeof(u64);
+
+	if (copy_to_user(buf, values, size))
+		return -EFAULT;
+
+	err = perf_event_read_entry(leader, read_format, buf + size);
+	if (err < 0)
+		return err;
+
+	size += err;
+
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+		err = perf_event_read_entry(sub, read_format,
+				buf + size);
+		if (err < 0)
+			return err;
+
+		size += err;
+	}
+
+	return size;
+}
+
+static int perf_event_read_one(struct perf_event *event,
+				 u64 read_format, char __user *buf)
+{
+	u64 values[4];
+	int n = 0;
+
+	values[n++] = perf_event_read_value(event);
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		values[n++] = event->total_time_enabled +
+			atomic64_read(&event->child_total_time_enabled);
+	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		values[n++] = event->total_time_running +
+			atomic64_read(&event->child_total_time_running);
+	}
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(event);
+
+	if (copy_to_user(buf, values, n * sizeof(u64)))
+		return -EFAULT;
+
+	return n * sizeof(u64);
+}
+
+/*
+ * Read the performance event - simple non blocking version for now
+ */
+static ssize_t
+perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
+{
+	u64 read_format = event->attr.read_format;
+	int ret;
+
+	/*
+	 * Return end-of-file for a read on a event that is in
+	 * error state (i.e. because it was pinned but it couldn't be
+	 * scheduled on to the CPU at some point).
+	 */
+	if (event->state == PERF_EVENT_STATE_ERROR)
+		return 0;
+
+	if (count < perf_event_read_size(event))
+		return -ENOSPC;
+
+	WARN_ON_ONCE(event->ctx->parent_ctx);
+	mutex_lock(&event->child_mutex);
+	if (read_format & PERF_FORMAT_GROUP)
+		ret = perf_event_read_group(event, read_format, buf);
+	else
+		ret = perf_event_read_one(event, read_format, buf);
+	mutex_unlock(&event->child_mutex);
+
+	return ret;
+}
+
+static ssize_t
+perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct perf_event *event = file->private_data;
+
+	return perf_read_hw(event, buf, count);
+}
+
+static unsigned int perf_poll(struct file *file, poll_table *wait)
+{
+	struct perf_event *event = file->private_data;
+	struct perf_mmap_data *data;
+	unsigned int events = POLL_HUP;
+
+	rcu_read_lock();
+	data = rcu_dereference(event->data);
+	if (data)
+		events = atomic_xchg(&data->poll, 0);
+	rcu_read_unlock();
+
+	poll_wait(file, &event->waitq, wait);
+
+	return events;
+}
+
+static void perf_event_reset(struct perf_event *event)
+{
+	(void)perf_event_read(event);
+	atomic64_set(&event->count, 0);
+	perf_event_update_userpage(event);
+}
+
+/*
+ * Holding the top-level event's child_mutex means that any
+ * descendant process that has inherited this event will block
+ * in sync_child_event if it goes to exit, thus satisfying the
+ * task existence requirements of perf_event_enable/disable.
+ */
+static void perf_event_for_each_child(struct perf_event *event,
+					void (*func)(struct perf_event *))
+{
+	struct perf_event *child;
+
+	WARN_ON_ONCE(event->ctx->parent_ctx);
+	mutex_lock(&event->child_mutex);
+	func(event);
+	list_for_each_entry(child, &event->child_list, child_list)
+		func(child);
+	mutex_unlock(&event->child_mutex);
+}
+
+static void perf_event_for_each(struct perf_event *event,
+				  void (*func)(struct perf_event *))
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_event *sibling;
+
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	event = event->group_leader;
+
+	perf_event_for_each_child(event, func);
+	func(event);
+	list_for_each_entry(sibling, &event->sibling_list, group_entry)
+		perf_event_for_each_child(event, func);
+	mutex_unlock(&ctx->mutex);
+}
+
+static int perf_event_period(struct perf_event *event, u64 __user *arg)
+{
+	struct perf_event_context *ctx = event->ctx;
+	unsigned long size;
+	int ret = 0;
+	u64 value;
+
+	if (!event->attr.sample_period)
+		return -EINVAL;
+
+	size = copy_from_user(&value, arg, sizeof(value));
+	if (size != sizeof(value))
+		return -EFAULT;
+
+	if (!value)
+		return -EINVAL;
+
+	spin_lock_irq(&ctx->lock);
+	if (event->attr.freq) {
+		if (value > sysctl_perf_event_sample_rate) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+
+		event->attr.sample_freq = value;
+	} else {
+		event->attr.sample_period = value;
+		event->hw.sample_period = value;
+	}
+unlock:
+	spin_unlock_irq(&ctx->lock);
+
+	return ret;
+}
+
+int perf_event_set_output(struct perf_event *event, int output_fd);
+
+static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct perf_event *event = file->private_data;
+	void (*func)(struct perf_event *);
+	u32 flags = arg;
+
+	switch (cmd) {
+	case PERF_EVENT_IOC_ENABLE:
+		func = perf_event_enable;
+		break;
+	case PERF_EVENT_IOC_DISABLE:
+		func = perf_event_disable;
+		break;
+	case PERF_EVENT_IOC_RESET:
+		func = perf_event_reset;
+		break;
+
+	case PERF_EVENT_IOC_REFRESH:
+		return perf_event_refresh(event, arg);
+
+	case PERF_EVENT_IOC_PERIOD:
+		return perf_event_period(event, (u64 __user *)arg);
+
+	case PERF_EVENT_IOC_SET_OUTPUT:
+		return perf_event_set_output(event, arg);
+
+	default:
+		return -ENOTTY;
+	}
+
+	if (flags & PERF_IOC_FLAG_GROUP)
+		perf_event_for_each(event, func);
+	else
+		perf_event_for_each_child(event, func);
+
+	return 0;
+}
+
+int perf_event_task_enable(void)
+{
+	struct perf_event *event;
+
+	mutex_lock(&current->perf_event_mutex);
+	list_for_each_entry(event, &current->perf_event_list, owner_entry)
+		perf_event_for_each_child(event, perf_event_enable);
+	mutex_unlock(&current->perf_event_mutex);
+
+	return 0;
+}
+
+int perf_event_task_disable(void)
+{
+	struct perf_event *event;
+
+	mutex_lock(&current->perf_event_mutex);
+	list_for_each_entry(event, &current->perf_event_list, owner_entry)
+		perf_event_for_each_child(event, perf_event_disable);
+	mutex_unlock(&current->perf_event_mutex);
+
+	return 0;
+}
+
+#ifndef PERF_EVENT_INDEX_OFFSET
+# define PERF_EVENT_INDEX_OFFSET 0
+#endif
+
+static int perf_event_index(struct perf_event *event)
+{
+	if (event->state != PERF_EVENT_STATE_ACTIVE)
+		return 0;
+
+	return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
+}
+
+/*
+ * Callers need to ensure there can be no nesting of this function, otherwise
+ * the seqlock logic goes bad. We can not serialize this because the arch
+ * code calls this from NMI context.
+ */
+void perf_event_update_userpage(struct perf_event *event)
+{
+	struct perf_event_mmap_page *userpg;
+	struct perf_mmap_data *data;
+
+	rcu_read_lock();
+	data = rcu_dereference(event->data);
+	if (!data)
+		goto unlock;
+
+	userpg = data->user_page;
+
+	/*
+	 * Disable preemption so as to not let the corresponding user-space
+	 * spin too long if we get preempted.
+	 */
+	preempt_disable();
+	++userpg->lock;
+	barrier();
+	userpg->index = perf_event_index(event);
+	userpg->offset = atomic64_read(&event->count);
+	if (event->state == PERF_EVENT_STATE_ACTIVE)
+		userpg->offset -= atomic64_read(&event->hw.prev_count);
+
+	userpg->time_enabled = event->total_time_enabled +
+			atomic64_read(&event->child_total_time_enabled);
+
+	userpg->time_running = event->total_time_running +
+			atomic64_read(&event->child_total_time_running);
+
+	barrier();
+	++userpg->lock;
+	preempt_enable();
+unlock:
+	rcu_read_unlock();
+}
+
+static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct perf_event *event = vma->vm_file->private_data;
+	struct perf_mmap_data *data;
+	int ret = VM_FAULT_SIGBUS;
+
+	if (vmf->flags & FAULT_FLAG_MKWRITE) {
+		if (vmf->pgoff == 0)
+			ret = 0;
+		return ret;
+	}
+
+	rcu_read_lock();
+	data = rcu_dereference(event->data);
+	if (!data)
+		goto unlock;
+
+	if (vmf->pgoff == 0) {
+		vmf->page = virt_to_page(data->user_page);
+	} else {
+		int nr = vmf->pgoff - 1;
+
+		if ((unsigned)nr > data->nr_pages)
+			goto unlock;
+
+		if (vmf->flags & FAULT_FLAG_WRITE)
+			goto unlock;
+
+		vmf->page = virt_to_page(data->data_pages[nr]);
+	}
+
+	get_page(vmf->page);
+	vmf->page->mapping = vma->vm_file->f_mapping;
+	vmf->page->index   = vmf->pgoff;
+
+	ret = 0;
+unlock:
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+{
+	struct perf_mmap_data *data;
+	unsigned long size;
+	int i;
+
+	WARN_ON(atomic_read(&event->mmap_count));
+
+	size = sizeof(struct perf_mmap_data);
+	size += nr_pages * sizeof(void *);
+
+	data = kzalloc(size, GFP_KERNEL);
+	if (!data)
+		goto fail;
+
+	data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!data->user_page)
+		goto fail_user_page;
+
+	for (i = 0; i < nr_pages; i++) {
+		data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
+		if (!data->data_pages[i])
+			goto fail_data_pages;
+	}
+
+	data->nr_pages = nr_pages;
+	atomic_set(&data->lock, -1);
+
+	if (event->attr.watermark) {
+		data->watermark = min_t(long, PAGE_SIZE * nr_pages,
+				      event->attr.wakeup_watermark);
+	}
+	if (!data->watermark)
+		data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
+
+	rcu_assign_pointer(event->data, data);
+
+	return 0;
+
+fail_data_pages:
+	for (i--; i >= 0; i--)
+		free_page((unsigned long)data->data_pages[i]);
+
+	free_page((unsigned long)data->user_page);
+
+fail_user_page:
+	kfree(data);
+
+fail:
+	return -ENOMEM;
+}
+
+static void perf_mmap_free_page(unsigned long addr)
+{
+	struct page *page = virt_to_page((void *)addr);
+
+	page->mapping = NULL;
+	__free_page(page);
+}
+
+static void __perf_mmap_data_free(struct rcu_head *rcu_head)
+{
+	struct perf_mmap_data *data;
+	int i;
+
+	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
+
+	perf_mmap_free_page((unsigned long)data->user_page);
+	for (i = 0; i < data->nr_pages; i++)
+		perf_mmap_free_page((unsigned long)data->data_pages[i]);
+
+	kfree(data);
+}
+
+static void perf_mmap_data_free(struct perf_event *event)
+{
+	struct perf_mmap_data *data = event->data;
+
+	WARN_ON(atomic_read(&event->mmap_count));
+
+	rcu_assign_pointer(event->data, NULL);
+	call_rcu(&data->rcu_head, __perf_mmap_data_free);
+}
+
+static void perf_mmap_open(struct vm_area_struct *vma)
+{
+	struct perf_event *event = vma->vm_file->private_data;
+
+	atomic_inc(&event->mmap_count);
+}
+
+static void perf_mmap_close(struct vm_area_struct *vma)
+{
+	struct perf_event *event = vma->vm_file->private_data;
+
+	WARN_ON_ONCE(event->ctx->parent_ctx);
+	if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
+		struct user_struct *user = current_user();
+
+		atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm);
+		vma->vm_mm->locked_vm -= event->data->nr_locked;
+		perf_mmap_data_free(event);
+		mutex_unlock(&event->mmap_mutex);
+	}
+}
+
+static struct vm_operations_struct perf_mmap_vmops = {
+	.open		= perf_mmap_open,
+	.close		= perf_mmap_close,
+	.fault		= perf_mmap_fault,
+	.page_mkwrite	= perf_mmap_fault,
+};
+
+static int perf_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct perf_event *event = file->private_data;
+	unsigned long user_locked, user_lock_limit;
+	struct user_struct *user = current_user();
+	unsigned long locked, lock_limit;
+	unsigned long vma_size;
+	unsigned long nr_pages;
+	long user_extra, extra;
+	int ret = 0;
+
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma_size = vma->vm_end - vma->vm_start;
+	nr_pages = (vma_size / PAGE_SIZE) - 1;
+
+	/*
+	 * If we have data pages ensure they're a power-of-two number, so we
+	 * can do bitmasks instead of modulo.
+	 */
+	if (nr_pages != 0 && !is_power_of_2(nr_pages))
+		return -EINVAL;
+
+	if (vma_size != PAGE_SIZE * (1 + nr_pages))
+		return -EINVAL;
+
+	if (vma->vm_pgoff != 0)
+		return -EINVAL;
+
+	WARN_ON_ONCE(event->ctx->parent_ctx);
+	mutex_lock(&event->mmap_mutex);
+	if (event->output) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	if (atomic_inc_not_zero(&event->mmap_count)) {
+		if (nr_pages != event->data->nr_pages)
+			ret = -EINVAL;
+		goto unlock;
+	}
+
+	user_extra = nr_pages + 1;
+	user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
+
+	/*
+	 * Increase the limit linearly with more CPUs:
+	 */
+	user_lock_limit *= num_online_cpus();
+
+	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
+
+	extra = 0;
+	if (user_locked > user_lock_limit)
+		extra = user_locked - user_lock_limit;
+
+	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+	lock_limit >>= PAGE_SHIFT;
+	locked = vma->vm_mm->locked_vm + extra;
+
+	if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
+		!capable(CAP_IPC_LOCK)) {
+		ret = -EPERM;
+		goto unlock;
+	}
+
+	WARN_ON(event->data);
+	ret = perf_mmap_data_alloc(event, nr_pages);
+	if (ret)
+		goto unlock;
+
+	atomic_set(&event->mmap_count, 1);
+	atomic_long_add(user_extra, &user->locked_vm);
+	vma->vm_mm->locked_vm += extra;
+	event->data->nr_locked = extra;
+	if (vma->vm_flags & VM_WRITE)
+		event->data->writable = 1;
+
+unlock:
+	mutex_unlock(&event->mmap_mutex);
+
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_ops = &perf_mmap_vmops;
+
+	return ret;
+}
+
+static int perf_fasync(int fd, struct file *filp, int on)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct perf_event *event = filp->private_data;
+	int retval;
+
+	mutex_lock(&inode->i_mutex);
+	retval = fasync_helper(fd, filp, on, &event->fasync);
+	mutex_unlock(&inode->i_mutex);
+
+	if (retval < 0)
+		return retval;
+
+	return 0;
+}
+
+static const struct file_operations perf_fops = {
+	.release		= perf_release,
+	.read			= perf_read,
+	.poll			= perf_poll,
+	.unlocked_ioctl		= perf_ioctl,
+	.compat_ioctl		= perf_ioctl,
+	.mmap			= perf_mmap,
+	.fasync			= perf_fasync,
+};
+
+/*
+ * Perf event wakeup
+ *
+ * If there's data, ensure we set the poll() state and publish everything
+ * to user-space before waking everybody up.
+ */
+
+void perf_event_wakeup(struct perf_event *event)
+{
+	wake_up_all(&event->waitq);
+
+	if (event->pending_kill) {
+		kill_fasync(&event->fasync, SIGIO, event->pending_kill);
+		event->pending_kill = 0;
+	}
+}
+
+/*
+ * Pending wakeups
+ *
+ * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
+ *
+ * The NMI bit means we cannot possibly take locks. Therefore, maintain a
+ * single linked list and use cmpxchg() to add entries lockless.
+ */
+
+static void perf_pending_event(struct perf_pending_entry *entry)
+{
+	struct perf_event *event = container_of(entry,
+			struct perf_event, pending);
+
+	if (event->pending_disable) {
+		event->pending_disable = 0;
+		__perf_event_disable(event);
+	}
+
+	if (event->pending_wakeup) {
+		event->pending_wakeup = 0;
+		perf_event_wakeup(event);
+	}
+}
+
+#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
+
+static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
+	PENDING_TAIL,
+};
+
+static void perf_pending_queue(struct perf_pending_entry *entry,
+			       void (*func)(struct perf_pending_entry *))
+{
+	struct perf_pending_entry **head;
+
+	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
+		return;
+
+	entry->func = func;
+
+	head = &get_cpu_var(perf_pending_head);
+
+	do {
+		entry->next = *head;
+	} while (cmpxchg(head, entry->next, entry) != entry->next);
+
+	set_perf_event_pending();
+
+	put_cpu_var(perf_pending_head);
+}
+
+static int __perf_pending_run(void)
+{
+	struct perf_pending_entry *list;
+	int nr = 0;
+
+	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
+	while (list != PENDING_TAIL) {
+		void (*func)(struct perf_pending_entry *);
+		struct perf_pending_entry *entry = list;
+
+		list = list->next;
+
+		func = entry->func;
+		entry->next = NULL;
+		/*
+		 * Ensure we observe the unqueue before we issue the wakeup,
+		 * so that we won't be waiting forever.
+		 * -- see perf_not_pending().
+		 */
+		smp_wmb();
+
+		func(entry);
+		nr++;
+	}
+
+	return nr;
+}
+
+static inline int perf_not_pending(struct perf_event *event)
+{
+	/*
+	 * If we flush on whatever cpu we run, there is a chance we don't
+	 * need to wait.
+	 */
+	get_cpu();
+	__perf_pending_run();
+	put_cpu();
+
+	/*
+	 * Ensure we see the proper queue state before going to sleep
+	 * so that we do not miss the wakeup. -- see perf_pending_handle()
+	 */
+	smp_rmb();
+	return event->pending.next == NULL;
+}
+
+static void perf_pending_sync(struct perf_event *event)
+{
+	wait_event(event->waitq, perf_not_pending(event));
+}
+
+void perf_event_do_pending(void)
+{
+	__perf_pending_run();
+}
+
+/*
+ * Callchain support -- arch specific
+ */
+
+__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	return NULL;
+}
+
+/*
+ * Output
+ */
+static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
+			      unsigned long offset, unsigned long head)
+{
+	unsigned long mask;
+
+	if (!data->writable)
+		return true;
+
+	mask = (data->nr_pages << PAGE_SHIFT) - 1;
+
+	offset = (offset - tail) & mask;
+	head   = (head   - tail) & mask;
+
+	if ((int)(head - offset) < 0)
+		return false;
+
+	return true;
+}
+
+static void perf_output_wakeup(struct perf_output_handle *handle)
+{
+	atomic_set(&handle->data->poll, POLL_IN);
+
+	if (handle->nmi) {
+		handle->event->pending_wakeup = 1;
+		perf_pending_queue(&handle->event->pending,
+				   perf_pending_event);
+	} else
+		perf_event_wakeup(handle->event);
+}
+
+/*
+ * Curious locking construct.
+ *
+ * We need to ensure a later event_id doesn't publish a head when a former
+ * event_id isn't done writing. However since we need to deal with NMIs we
+ * cannot fully serialize things.
+ *
+ * What we do is serialize between CPUs so we only have to deal with NMI
+ * nesting on a single CPU.
+ *
+ * We only publish the head (and generate a wakeup) when the outer-most
+ * event_id completes.
+ */
+static void perf_output_lock(struct perf_output_handle *handle)
+{
+	struct perf_mmap_data *data = handle->data;
+	int cpu;
+
+	handle->locked = 0;
+
+	local_irq_save(handle->flags);
+	cpu = smp_processor_id();
+
+	if (in_nmi() && atomic_read(&data->lock) == cpu)
+		return;
+
+	while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
+		cpu_relax();
+
+	handle->locked = 1;
+}
+
+static void perf_output_unlock(struct perf_output_handle *handle)
+{
+	struct perf_mmap_data *data = handle->data;
+	unsigned long head;
+	int cpu;
+
+	data->done_head = data->head;
+
+	if (!handle->locked)
+		goto out;
+
+again:
+	/*
+	 * The xchg implies a full barrier that ensures all writes are done
+	 * before we publish the new head, matched by a rmb() in userspace when
+	 * reading this position.
+	 */
+	while ((head = atomic_long_xchg(&data->done_head, 0)))
+		data->user_page->data_head = head;
+
+	/*
+	 * NMI can happen here, which means we can miss a done_head update.
+	 */
+
+	cpu = atomic_xchg(&data->lock, -1);
+	WARN_ON_ONCE(cpu != smp_processor_id());
+
+	/*
+	 * Therefore we have to validate we did not indeed do so.
+	 */
+	if (unlikely(atomic_long_read(&data->done_head))) {
+		/*
+		 * Since we had it locked, we can lock it again.
+		 */
+		while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
+			cpu_relax();
+
+		goto again;
+	}
+
+	if (atomic_xchg(&data->wakeup, 0))
+		perf_output_wakeup(handle);
+out:
+	local_irq_restore(handle->flags);
+}
+
+void perf_output_copy(struct perf_output_handle *handle,
+		      const void *buf, unsigned int len)
+{
+	unsigned int pages_mask;
+	unsigned int offset;
+	unsigned int size;
+	void **pages;
+
+	offset		= handle->offset;
+	pages_mask	= handle->data->nr_pages - 1;
+	pages		= handle->data->data_pages;
+
+	do {
+		unsigned int page_offset;
+		int nr;
+
+		nr	    = (offset >> PAGE_SHIFT) & pages_mask;
+		page_offset = offset & (PAGE_SIZE - 1);
+		size	    = min_t(unsigned int, PAGE_SIZE - page_offset, len);
+
+		memcpy(pages[nr] + page_offset, buf, size);
+
+		len	    -= size;
+		buf	    += size;
+		offset	    += size;
+	} while (len);
+
+	handle->offset = offset;
+
+	/*
+	 * Check we didn't copy past our reservation window, taking the
+	 * possible unsigned int wrap into account.
+	 */
+	WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
+}
+
+int perf_output_begin(struct perf_output_handle *handle,
+		      struct perf_event *event, unsigned int size,
+		      int nmi, int sample)
+{
+	struct perf_event *output_event;
+	struct perf_mmap_data *data;
+	unsigned long tail, offset, head;
+	int have_lost;
+	struct {
+		struct perf_event_header header;
+		u64			 id;
+		u64			 lost;
+	} lost_event;
+
+	rcu_read_lock();
+	/*
+	 * For inherited events we send all the output towards the parent.
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	output_event = rcu_dereference(event->output);
+	if (output_event)
+		event = output_event;
+
+	data = rcu_dereference(event->data);
+	if (!data)
+		goto out;
+
+	handle->data	= data;
+	handle->event	= event;
+	handle->nmi	= nmi;
+	handle->sample	= sample;
+
+	if (!data->nr_pages)
+		goto fail;
+
+	have_lost = atomic_read(&data->lost);
+	if (have_lost)
+		size += sizeof(lost_event);
+
+	perf_output_lock(handle);
+
+	do {
+		/*
+		 * Userspace could choose to issue a mb() before updating the
+		 * tail pointer. So that all reads will be completed before the
+		 * write is issued.
+		 */
+		tail = ACCESS_ONCE(data->user_page->data_tail);
+		smp_rmb();
+		offset = head = atomic_long_read(&data->head);
+		head += size;
+		if (unlikely(!perf_output_space(data, tail, offset, head)))
+			goto fail;
+	} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
+
+	handle->offset	= offset;
+	handle->head	= head;
+
+	if (head - tail > data->watermark)
+		atomic_set(&data->wakeup, 1);
+
+	if (have_lost) {
+		lost_event.header.type = PERF_RECORD_LOST;
+		lost_event.header.misc = 0;
+		lost_event.header.size = sizeof(lost_event);
+		lost_event.id          = event->id;
+		lost_event.lost        = atomic_xchg(&data->lost, 0);
+
+		perf_output_put(handle, lost_event);
+	}
+
+	return 0;
+
+fail:
+	atomic_inc(&data->lost);
+	perf_output_unlock(handle);
+out:
+	rcu_read_unlock();
+
+	return -ENOSPC;
+}
+
+void perf_output_end(struct perf_output_handle *handle)
+{
+	struct perf_event *event = handle->event;
+	struct perf_mmap_data *data = handle->data;
+
+	int wakeup_events = event->attr.wakeup_events;
+
+	if (handle->sample && wakeup_events) {
+		int events = atomic_inc_return(&data->events);
+		if (events >= wakeup_events) {
+			atomic_sub(wakeup_events, &data->events);
+			atomic_set(&data->wakeup, 1);
+		}
+	}
+
+	perf_output_unlock(handle);
+	rcu_read_unlock();
+}
+
+static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
+{
+	/*
+	 * only top level events have the pid namespace they were created in
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	return task_tgid_nr_ns(p, event->ns);
+}
+
+static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
+{
+	/*
+	 * only top level events have the pid namespace they were created in
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	return task_pid_nr_ns(p, event->ns);
+}
+
+static void perf_output_read_one(struct perf_output_handle *handle,
+				 struct perf_event *event)
+{
+	u64 read_format = event->attr.read_format;
+	u64 values[4];
+	int n = 0;
+
+	values[n++] = atomic64_read(&event->count);
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		values[n++] = event->total_time_enabled +
+			atomic64_read(&event->child_total_time_enabled);
+	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		values[n++] = event->total_time_running +
+			atomic64_read(&event->child_total_time_running);
+	}
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(event);
+
+	perf_output_copy(handle, values, n * sizeof(u64));
+}
+
+/*
+ * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
+ */
+static void perf_output_read_group(struct perf_output_handle *handle,
+			    struct perf_event *event)
+{
+	struct perf_event *leader = event->group_leader, *sub;
+	u64 read_format = event->attr.read_format;
+	u64 values[5];
+	int n = 0;
+
+	values[n++] = 1 + leader->nr_siblings;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = leader->total_time_enabled;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = leader->total_time_running;
+
+	if (leader != event)
+		leader->pmu->read(leader);
+
+	values[n++] = atomic64_read(&leader->count);
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(leader);
+
+	perf_output_copy(handle, values, n * sizeof(u64));
+
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+		n = 0;
+
+		if (sub != event)
+			sub->pmu->read(sub);
+
+		values[n++] = atomic64_read(&sub->count);
+		if (read_format & PERF_FORMAT_ID)
+			values[n++] = primary_event_id(sub);
+
+		perf_output_copy(handle, values, n * sizeof(u64));
+	}
+}
+
+static void perf_output_read(struct perf_output_handle *handle,
+			     struct perf_event *event)
+{
+	if (event->attr.read_format & PERF_FORMAT_GROUP)
+		perf_output_read_group(handle, event);
+	else
+		perf_output_read_one(handle, event);
+}
+
+void perf_output_sample(struct perf_output_handle *handle,
+			struct perf_event_header *header,
+			struct perf_sample_data *data,
+			struct perf_event *event)
+{
+	u64 sample_type = data->type;
+
+	perf_output_put(handle, *header);
+
+	if (sample_type & PERF_SAMPLE_IP)
+		perf_output_put(handle, data->ip);
+
+	if (sample_type & PERF_SAMPLE_TID)
+		perf_output_put(handle, data->tid_entry);
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		perf_output_put(handle, data->time);
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		perf_output_put(handle, data->addr);
+
+	if (sample_type & PERF_SAMPLE_ID)
+		perf_output_put(handle, data->id);
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		perf_output_put(handle, data->stream_id);
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		perf_output_put(handle, data->cpu_entry);
+
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		perf_output_put(handle, data->period);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		perf_output_read(handle, event);
+
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+		if (data->callchain) {
+			int size = 1;
+
+			if (data->callchain)
+				size += data->callchain->nr;
+
+			size *= sizeof(u64);
+
+			perf_output_copy(handle, data->callchain, size);
+		} else {
+			u64 nr = 0;
+			perf_output_put(handle, nr);
+		}
+	}
+
+	if (sample_type & PERF_SAMPLE_RAW) {
+		if (data->raw) {
+			perf_output_put(handle, data->raw->size);
+			perf_output_copy(handle, data->raw->data,
+					 data->raw->size);
+		} else {
+			struct {
+				u32	size;
+				u32	data;
+			} raw = {
+				.size = sizeof(u32),
+				.data = 0,
+			};
+			perf_output_put(handle, raw);
+		}
+	}
+}
+
+void perf_prepare_sample(struct perf_event_header *header,
+			 struct perf_sample_data *data,
+			 struct perf_event *event,
+			 struct pt_regs *regs)
+{
+	u64 sample_type = event->attr.sample_type;
+
+	data->type = sample_type;
+
+	header->type = PERF_RECORD_SAMPLE;
+	header->size = sizeof(*header);
+
+	header->misc = 0;
+	header->misc |= perf_misc_flags(regs);
+
+	if (sample_type & PERF_SAMPLE_IP) {
+		data->ip = perf_instruction_pointer(regs);
+
+		header->size += sizeof(data->ip);
+	}
+
+	if (sample_type & PERF_SAMPLE_TID) {
+		/* namespace issues */
+		data->tid_entry.pid = perf_event_pid(event, current);
+		data->tid_entry.tid = perf_event_tid(event, current);
+
+		header->size += sizeof(data->tid_entry);
+	}
+
+	if (sample_type & PERF_SAMPLE_TIME) {
+		data->time = perf_clock();
+
+		header->size += sizeof(data->time);
+	}
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		header->size += sizeof(data->addr);
+
+	if (sample_type & PERF_SAMPLE_ID) {
+		data->id = primary_event_id(event);
+
+		header->size += sizeof(data->id);
+	}
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID) {
+		data->stream_id = event->id;
+
+		header->size += sizeof(data->stream_id);
+	}
+
+	if (sample_type & PERF_SAMPLE_CPU) {
+		data->cpu_entry.cpu		= raw_smp_processor_id();
+		data->cpu_entry.reserved	= 0;
+
+		header->size += sizeof(data->cpu_entry);
+	}
+
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		header->size += sizeof(data->period);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		header->size += perf_event_read_size(event);
+
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+		int size = 1;
+
+		data->callchain = perf_callchain(regs);
+
+		if (data->callchain)
+			size += data->callchain->nr;
+
+		header->size += size * sizeof(u64);
+	}
+
+	if (sample_type & PERF_SAMPLE_RAW) {
+		int size = sizeof(u32);
+
+		if (data->raw)
+			size += data->raw->size;
+		else
+			size += sizeof(u32);
+
+		WARN_ON_ONCE(size & (sizeof(u64)-1));
+		header->size += size;
+	}
+}
+
+static void perf_event_output(struct perf_event *event, int nmi,
+				struct perf_sample_data *data,
+				struct pt_regs *regs)
+{
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+
+	perf_prepare_sample(&header, data, event, regs);
+
+	if (perf_output_begin(&handle, event, header.size, nmi, 1))
+		return;
+
+	perf_output_sample(&handle, &header, data, event);
+
+	perf_output_end(&handle);
+}
+
+/*
+ * read event_id
+ */
+
+struct perf_read_event {
+	struct perf_event_header	header;
+
+	u32				pid;
+	u32				tid;
+};
+
+static void
+perf_event_read_event(struct perf_event *event,
+			struct task_struct *task)
+{
+	struct perf_output_handle handle;
+	struct perf_read_event read_event = {
+		.header = {
+			.type = PERF_RECORD_READ,
+			.misc = 0,
+			.size = sizeof(read_event) + perf_event_read_size(event),
+		},
+		.pid = perf_event_pid(event, task),
+		.tid = perf_event_tid(event, task),
+	};
+	int ret;
+
+	ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, read_event);
+	perf_output_read(&handle, event);
+
+	perf_output_end(&handle);
+}
+
+/*
+ * task tracking -- fork/exit
+ *
+ * enabled by: attr.comm | attr.mmap | attr.task
+ */
+
+struct perf_task_event {
+	struct task_struct		*task;
+	struct perf_event_context	*task_ctx;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				ppid;
+		u32				tid;
+		u32				ptid;
+		u64				time;
+	} event_id;
+};
+
+static void perf_event_task_output(struct perf_event *event,
+				     struct perf_task_event *task_event)
+{
+	struct perf_output_handle handle;
+	int size;
+	struct task_struct *task = task_event->task;
+	int ret;
+
+	size  = task_event->event_id.header.size;
+	ret = perf_output_begin(&handle, event, size, 0, 0);
+
+	if (ret)
+		return;
+
+	task_event->event_id.pid = perf_event_pid(event, task);
+	task_event->event_id.ppid = perf_event_pid(event, current);
+
+	task_event->event_id.tid = perf_event_tid(event, task);
+	task_event->event_id.ptid = perf_event_tid(event, current);
+
+	task_event->event_id.time = perf_clock();
+
+	perf_output_put(&handle, task_event->event_id);
+
+	perf_output_end(&handle);
+}
+
+static int perf_event_task_match(struct perf_event *event)
+{
+	if (event->attr.comm || event->attr.mmap || event->attr.task)
+		return 1;
+
+	return 0;
+}
+
+static void perf_event_task_ctx(struct perf_event_context *ctx,
+				  struct perf_task_event *task_event)
+{
+	struct perf_event *event;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (perf_event_task_match(event))
+			perf_event_task_output(event, task_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_event_task_event(struct perf_task_event *task_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx = task_event->task_ctx;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_event_task_ctx(&cpuctx->ctx, task_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	if (!ctx)
+		ctx = rcu_dereference(task_event->task->perf_event_ctxp);
+	if (ctx)
+		perf_event_task_ctx(ctx, task_event);
+	rcu_read_unlock();
+}
+
+static void perf_event_task(struct task_struct *task,
+			      struct perf_event_context *task_ctx,
+			      int new)
+{
+	struct perf_task_event task_event;
+
+	if (!atomic_read(&nr_comm_events) &&
+	    !atomic_read(&nr_mmap_events) &&
+	    !atomic_read(&nr_task_events))
+		return;
+
+	task_event = (struct perf_task_event){
+		.task	  = task,
+		.task_ctx = task_ctx,
+		.event_id    = {
+			.header = {
+				.type = new ? PERF_RECORD_FORK : PERF_RECORD_EXIT,
+				.misc = 0,
+				.size = sizeof(task_event.event_id),
+			},
+			/* .pid  */
+			/* .ppid */
+			/* .tid  */
+			/* .ptid */
+		},
+	};
+
+	perf_event_task_event(&task_event);
+}
+
+void perf_event_fork(struct task_struct *task)
+{
+	perf_event_task(task, NULL, 1);
+}
+
+/*
+ * comm tracking
+ */
+
+struct perf_comm_event {
+	struct task_struct	*task;
+	char			*comm;
+	int			comm_size;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				tid;
+	} event_id;
+};
+
+static void perf_event_comm_output(struct perf_event *event,
+				     struct perf_comm_event *comm_event)
+{
+	struct perf_output_handle handle;
+	int size = comm_event->event_id.header.size;
+	int ret = perf_output_begin(&handle, event, size, 0, 0);
+
+	if (ret)
+		return;
+
+	comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
+	comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
+
+	perf_output_put(&handle, comm_event->event_id);
+	perf_output_copy(&handle, comm_event->comm,
+				   comm_event->comm_size);
+	perf_output_end(&handle);
+}
+
+static int perf_event_comm_match(struct perf_event *event)
+{
+	if (event->attr.comm)
+		return 1;
+
+	return 0;
+}
+
+static void perf_event_comm_ctx(struct perf_event_context *ctx,
+				  struct perf_comm_event *comm_event)
+{
+	struct perf_event *event;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (perf_event_comm_match(event))
+			perf_event_comm_output(event, comm_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_event_comm_event(struct perf_comm_event *comm_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx;
+	unsigned int size;
+	char comm[TASK_COMM_LEN];
+
+	memset(comm, 0, sizeof(comm));
+	strncpy(comm, comm_event->task->comm, sizeof(comm));
+	size = ALIGN(strlen(comm)+1, sizeof(u64));
+
+	comm_event->comm = comm;
+	comm_event->comm_size = size;
+
+	comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_event_comm_ctx(&cpuctx->ctx, comm_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_event_ctxp);
+	if (ctx)
+		perf_event_comm_ctx(ctx, comm_event);
+	rcu_read_unlock();
+}
+
+void perf_event_comm(struct task_struct *task)
+{
+	struct perf_comm_event comm_event;
+
+	if (task->perf_event_ctxp)
+		perf_event_enable_on_exec(task);
+
+	if (!atomic_read(&nr_comm_events))
+		return;
+
+	comm_event = (struct perf_comm_event){
+		.task	= task,
+		/* .comm      */
+		/* .comm_size */
+		.event_id  = {
+			.header = {
+				.type = PERF_RECORD_COMM,
+				.misc = 0,
+				/* .size */
+			},
+			/* .pid */
+			/* .tid */
+		},
+	};
+
+	perf_event_comm_event(&comm_event);
+}
+
+/*
+ * mmap tracking
+ */
+
+struct perf_mmap_event {
+	struct vm_area_struct	*vma;
+
+	const char		*file_name;
+	int			file_size;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				tid;
+		u64				start;
+		u64				len;
+		u64				pgoff;
+	} event_id;
+};
+
+static void perf_event_mmap_output(struct perf_event *event,
+				     struct perf_mmap_event *mmap_event)
+{
+	struct perf_output_handle handle;
+	int size = mmap_event->event_id.header.size;
+	int ret = perf_output_begin(&handle, event, size, 0, 0);
+
+	if (ret)
+		return;
+
+	mmap_event->event_id.pid = perf_event_pid(event, current);
+	mmap_event->event_id.tid = perf_event_tid(event, current);
+
+	perf_output_put(&handle, mmap_event->event_id);
+	perf_output_copy(&handle, mmap_event->file_name,
+				   mmap_event->file_size);
+	perf_output_end(&handle);
+}
+
+static int perf_event_mmap_match(struct perf_event *event,
+				   struct perf_mmap_event *mmap_event)
+{
+	if (event->attr.mmap)
+		return 1;
+
+	return 0;
+}
+
+static void perf_event_mmap_ctx(struct perf_event_context *ctx,
+				  struct perf_mmap_event *mmap_event)
+{
+	struct perf_event *event;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (perf_event_mmap_match(event, mmap_event))
+			perf_event_mmap_output(event, mmap_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx;
+	struct vm_area_struct *vma = mmap_event->vma;
+	struct file *file = vma->vm_file;
+	unsigned int size;
+	char tmp[16];
+	char *buf = NULL;
+	const char *name;
+
+	memset(tmp, 0, sizeof(tmp));
+
+	if (file) {
+		/*
+		 * d_path works from the end of the buffer backwards, so we
+		 * need to add enough zero bytes after the string to handle
+		 * the 64bit alignment we do later.
+		 */
+		buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL);
+		if (!buf) {
+			name = strncpy(tmp, "//enomem", sizeof(tmp));
+			goto got_name;
+		}
+		name = d_path(&file->f_path, buf, PATH_MAX);
+		if (IS_ERR(name)) {
+			name = strncpy(tmp, "//toolong", sizeof(tmp));
+			goto got_name;
+		}
+	} else {
+		if (arch_vma_name(mmap_event->vma)) {
+			name = strncpy(tmp, arch_vma_name(mmap_event->vma),
+				       sizeof(tmp));
+			goto got_name;
+		}
+
+		if (!vma->vm_mm) {
+			name = strncpy(tmp, "[vdso]", sizeof(tmp));
+			goto got_name;
+		}
+
+		name = strncpy(tmp, "//anon", sizeof(tmp));
+		goto got_name;
+	}
+
+got_name:
+	size = ALIGN(strlen(name)+1, sizeof(u64));
+
+	mmap_event->file_name = name;
+	mmap_event->file_size = size;
+
+	mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_event_ctxp);
+	if (ctx)
+		perf_event_mmap_ctx(ctx, mmap_event);
+	rcu_read_unlock();
+
+	kfree(buf);
+}
+
+void __perf_event_mmap(struct vm_area_struct *vma)
+{
+	struct perf_mmap_event mmap_event;
+
+	if (!atomic_read(&nr_mmap_events))
+		return;
+
+	mmap_event = (struct perf_mmap_event){
+		.vma	= vma,
+		/* .file_name */
+		/* .file_size */
+		.event_id  = {
+			.header = {
+				.type = PERF_RECORD_MMAP,
+				.misc = 0,
+				/* .size */
+			},
+			/* .pid */
+			/* .tid */
+			.start  = vma->vm_start,
+			.len    = vma->vm_end - vma->vm_start,
+			.pgoff  = vma->vm_pgoff,
+		},
+	};
+
+	perf_event_mmap_event(&mmap_event);
+}
+
+/*
+ * IRQ throttle logging
+ */
+
+static void perf_log_throttle(struct perf_event *event, int enable)
+{
+	struct perf_output_handle handle;
+	int ret;
+
+	struct {
+		struct perf_event_header	header;
+		u64				time;
+		u64				id;
+		u64				stream_id;
+	} throttle_event = {
+		.header = {
+			.type = PERF_RECORD_THROTTLE,
+			.misc = 0,
+			.size = sizeof(throttle_event),
+		},
+		.time		= perf_clock(),
+		.id		= primary_event_id(event),
+		.stream_id	= event->id,
+	};
+
+	if (enable)
+		throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
+
+	ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, throttle_event);
+	perf_output_end(&handle);
+}
+
+/*
+ * Generic event overflow handling, sampling.
+ */
+
+static int __perf_event_overflow(struct perf_event *event, int nmi,
+				   int throttle, struct perf_sample_data *data,
+				   struct pt_regs *regs)
+{
+	int events = atomic_read(&event->event_limit);
+	struct hw_perf_event *hwc = &event->hw;
+	int ret = 0;
+
+	throttle = (throttle && event->pmu->unthrottle != NULL);
+
+	if (!throttle) {
+		hwc->interrupts++;
+	} else {
+		if (hwc->interrupts != MAX_INTERRUPTS) {
+			hwc->interrupts++;
+			if (HZ * hwc->interrupts >
+					(u64)sysctl_perf_event_sample_rate) {
+				hwc->interrupts = MAX_INTERRUPTS;
+				perf_log_throttle(event, 0);
+				ret = 1;
+			}
+		} else {
+			/*
+			 * Keep re-disabling events even though on the previous
+			 * pass we disabled it - just in case we raced with a
+			 * sched-in and the event got enabled again:
+			 */
+			ret = 1;
+		}
+	}
+
+	if (event->attr.freq) {
+		u64 now = perf_clock();
+		s64 delta = now - hwc->freq_stamp;
+
+		hwc->freq_stamp = now;
+
+		if (delta > 0 && delta < TICK_NSEC)
+			perf_adjust_period(event, NSEC_PER_SEC / (int)delta);
+	}
+
+	/*
+	 * XXX event_limit might not quite work as expected on inherited
+	 * events
+	 */
+
+	event->pending_kill = POLL_IN;
+	if (events && atomic_dec_and_test(&event->event_limit)) {
+		ret = 1;
+		event->pending_kill = POLL_HUP;
+		if (nmi) {
+			event->pending_disable = 1;
+			perf_pending_queue(&event->pending,
+					   perf_pending_event);
+		} else
+			perf_event_disable(event);
+	}
+
+	perf_event_output(event, nmi, data, regs);
+	return ret;
+}
+
+int perf_event_overflow(struct perf_event *event, int nmi,
+			  struct perf_sample_data *data,
+			  struct pt_regs *regs)
+{
+	return __perf_event_overflow(event, nmi, 1, data, regs);
+}
+
+/*
+ * Generic software event infrastructure
+ */
+
+/*
+ * We directly increment event->count and keep a second value in
+ * event->hw.period_left to count intervals. This period event
+ * is kept in the range [-sample_period, 0] so that we can use the
+ * sign as trigger.
+ */
+
+static u64 perf_swevent_set_period(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 period = hwc->last_period;
+	u64 nr, offset;
+	s64 old, val;
+
+	hwc->last_period = hwc->sample_period;
+
+again:
+	old = val = atomic64_read(&hwc->period_left);
+	if (val < 0)
+		return 0;
+
+	nr = div64_u64(period + val, period);
+	offset = nr * period;
+	val -= offset;
+	if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
+		goto again;
+
+	return nr;
+}
+
+static void perf_swevent_overflow(struct perf_event *event,
+				    int nmi, struct perf_sample_data *data,
+				    struct pt_regs *regs)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int throttle = 0;
+	u64 overflow;
+
+	data->period = event->hw.last_period;
+	overflow = perf_swevent_set_period(event);
+
+	if (hwc->interrupts == MAX_INTERRUPTS)
+		return;
+
+	for (; overflow; overflow--) {
+		if (__perf_event_overflow(event, nmi, throttle,
+					    data, regs)) {
+			/*
+			 * We inhibit the overflow from happening when
+			 * hwc->interrupts == MAX_INTERRUPTS.
+			 */
+			break;
+		}
+		throttle = 1;
+	}
+}
+
+static void perf_swevent_unthrottle(struct perf_event *event)
+{
+	/*
+	 * Nothing to do, we already reset hwc->interrupts.
+	 */
+}
+
+static void perf_swevent_add(struct perf_event *event, u64 nr,
+			       int nmi, struct perf_sample_data *data,
+			       struct pt_regs *regs)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	atomic64_add(nr, &event->count);
+
+	if (!hwc->sample_period)
+		return;
+
+	if (!regs)
+		return;
+
+	if (!atomic64_add_negative(nr, &hwc->period_left))
+		perf_swevent_overflow(event, nmi, data, regs);
+}
+
+static int perf_swevent_is_counting(struct perf_event *event)
+{
+	/*
+	 * The event is active, we're good!
+	 */
+	if (event->state == PERF_EVENT_STATE_ACTIVE)
+		return 1;
+
+	/*
+	 * The event is off/error, not counting.
+	 */
+	if (event->state != PERF_EVENT_STATE_INACTIVE)
+		return 0;
+
+	/*
+	 * The event is inactive, if the context is active
+	 * we're part of a group that didn't make it on the 'pmu',
+	 * not counting.
+	 */
+	if (event->ctx->is_active)
+		return 0;
+
+	/*
+	 * We're inactive and the context is too, this means the
+	 * task is scheduled out, we're counting events that happen
+	 * to us, like migration events.
+	 */
+	return 1;
+}
+
+static int perf_swevent_match(struct perf_event *event,
+				enum perf_type_id type,
+				u32 event_id, struct pt_regs *regs)
+{
+	if (!perf_swevent_is_counting(event))
+		return 0;
+
+	if (event->attr.type != type)
+		return 0;
+	if (event->attr.config != event_id)
+		return 0;
+
+	if (regs) {
+		if (event->attr.exclude_user && user_mode(regs))
+			return 0;
+
+		if (event->attr.exclude_kernel && !user_mode(regs))
+			return 0;
+	}
+
+	return 1;
+}
+
+static void perf_swevent_ctx_event(struct perf_event_context *ctx,
+				     enum perf_type_id type,
+				     u32 event_id, u64 nr, int nmi,
+				     struct perf_sample_data *data,
+				     struct pt_regs *regs)
+{
+	struct perf_event *event;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (perf_swevent_match(event, type, event_id, regs))
+			perf_swevent_add(event, nr, nmi, data, regs);
+	}
+	rcu_read_unlock();
+}
+
+static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx)
+{
+	if (in_nmi())
+		return &cpuctx->recursion[3];
+
+	if (in_irq())
+		return &cpuctx->recursion[2];
+
+	if (in_softirq())
+		return &cpuctx->recursion[1];
+
+	return &cpuctx->recursion[0];
+}
+
+static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
+				    u64 nr, int nmi,
+				    struct perf_sample_data *data,
+				    struct pt_regs *regs)
+{
+	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+	int *recursion = perf_swevent_recursion_context(cpuctx);
+	struct perf_event_context *ctx;
+
+	if (*recursion)
+		goto out;
+
+	(*recursion)++;
+	barrier();
+
+	perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
+				 nr, nmi, data, regs);
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_event_ctxp);
+	if (ctx)
+		perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
+	rcu_read_unlock();
+
+	barrier();
+	(*recursion)--;
+
+out:
+	put_cpu_var(perf_cpu_context);
+}
+
+void __perf_sw_event(u32 event_id, u64 nr, int nmi,
+			    struct pt_regs *regs, u64 addr)
+{
+	struct perf_sample_data data = {
+		.addr = addr,
+	};
+
+	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi,
+				&data, regs);
+}
+
+static void perf_swevent_read(struct perf_event *event)
+{
+}
+
+static int perf_swevent_enable(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->sample_period) {
+		hwc->last_period = hwc->sample_period;
+		perf_swevent_set_period(event);
+	}
+	return 0;
+}
+
+static void perf_swevent_disable(struct perf_event *event)
+{
+}
+
+static const struct pmu perf_ops_generic = {
+	.enable		= perf_swevent_enable,
+	.disable	= perf_swevent_disable,
+	.read		= perf_swevent_read,
+	.unthrottle	= perf_swevent_unthrottle,
+};
+
+/*
+ * hrtimer based swevent callback
+ */
+
+static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
+{
+	enum hrtimer_restart ret = HRTIMER_RESTART;
+	struct perf_sample_data data;
+	struct pt_regs *regs;
+	struct perf_event *event;
+	u64 period;
+
+	event	= container_of(hrtimer, struct perf_event, hw.hrtimer);
+	event->pmu->read(event);
+
+	data.addr = 0;
+	regs = get_irq_regs();
+	/*
+	 * In case we exclude kernel IPs or are somehow not in interrupt
+	 * context, provide the next best thing, the user IP.
+	 */
+	if ((event->attr.exclude_kernel || !regs) &&
+			!event->attr.exclude_user)
+		regs = task_pt_regs(current);
+
+	if (regs) {
+		if (perf_event_overflow(event, 0, &data, regs))
+			ret = HRTIMER_NORESTART;
+	}
+
+	period = max_t(u64, 10000, event->hw.sample_period);
+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
+
+	return ret;
+}
+
+/*
+ * Software event: cpu wall time clock
+ */
+
+static void cpu_clock_perf_event_update(struct perf_event *event)
+{
+	int cpu = raw_smp_processor_id();
+	s64 prev;
+	u64 now;
+
+	now = cpu_clock(cpu);
+	prev = atomic64_read(&event->hw.prev_count);
+	atomic64_set(&event->hw.prev_count, now);
+	atomic64_add(now - prev, &event->count);
+}
+
+static int cpu_clock_perf_event_enable(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int cpu = raw_smp_processor_id();
+
+	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = perf_swevent_hrtimer;
+	if (hwc->sample_period) {
+		u64 period = max_t(u64, 10000, hwc->sample_period);
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
+
+	return 0;
+}
+
+static void cpu_clock_perf_event_disable(struct perf_event *event)
+{
+	if (event->hw.sample_period)
+		hrtimer_cancel(&event->hw.hrtimer);
+	cpu_clock_perf_event_update(event);
+}
+
+static void cpu_clock_perf_event_read(struct perf_event *event)
+{
+	cpu_clock_perf_event_update(event);
+}
+
+static const struct pmu perf_ops_cpu_clock = {
+	.enable		= cpu_clock_perf_event_enable,
+	.disable	= cpu_clock_perf_event_disable,
+	.read		= cpu_clock_perf_event_read,
+};
+
+/*
+ * Software event: task time clock
+ */
+
+static void task_clock_perf_event_update(struct perf_event *event, u64 now)
+{
+	u64 prev;
+	s64 delta;
+
+	prev = atomic64_xchg(&event->hw.prev_count, now);
+	delta = now - prev;
+	atomic64_add(delta, &event->count);
+}
+
+static int task_clock_perf_event_enable(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 now;
+
+	now = event->ctx->time;
+
+	atomic64_set(&hwc->prev_count, now);
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = perf_swevent_hrtimer;
+	if (hwc->sample_period) {
+		u64 period = max_t(u64, 10000, hwc->sample_period);
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
+
+	return 0;
+}
+
+static void task_clock_perf_event_disable(struct perf_event *event)
+{
+	if (event->hw.sample_period)
+		hrtimer_cancel(&event->hw.hrtimer);
+	task_clock_perf_event_update(event, event->ctx->time);
+
+}
+
+static void task_clock_perf_event_read(struct perf_event *event)
+{
+	u64 time;
+
+	if (!in_nmi()) {
+		update_context_time(event->ctx);
+		time = event->ctx->time;
+	} else {
+		u64 now = perf_clock();
+		u64 delta = now - event->ctx->timestamp;
+		time = event->ctx->time + delta;
+	}
+
+	task_clock_perf_event_update(event, time);
+}
+
+static const struct pmu perf_ops_task_clock = {
+	.enable		= task_clock_perf_event_enable,
+	.disable	= task_clock_perf_event_disable,
+	.read		= task_clock_perf_event_read,
+};
+
+#ifdef CONFIG_EVENT_PROFILE
+void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
+			  int entry_size)
+{
+	struct perf_raw_record raw = {
+		.size = entry_size,
+		.data = record,
+	};
+
+	struct perf_sample_data data = {
+		.addr = addr,
+		.raw = &raw,
+	};
+
+	struct pt_regs *regs = get_irq_regs();
+
+	if (!regs)
+		regs = task_pt_regs(current);
+
+	do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
+				&data, regs);
+}
+EXPORT_SYMBOL_GPL(perf_tp_event);
+
+extern int ftrace_profile_enable(int);
+extern void ftrace_profile_disable(int);
+
+static void tp_perf_event_destroy(struct perf_event *event)
+{
+	ftrace_profile_disable(event->attr.config);
+}
+
+static const struct pmu *tp_perf_event_init(struct perf_event *event)
+{
+	/*
+	 * Raw tracepoint data is a severe data leak, only allow root to
+	 * have these.
+	 */
+	if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
+			perf_paranoid_tracepoint_raw() &&
+			!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	if (ftrace_profile_enable(event->attr.config))
+		return NULL;
+
+	event->destroy = tp_perf_event_destroy;
+
+	return &perf_ops_generic;
+}
+#else
+static const struct pmu *tp_perf_event_init(struct perf_event *event)
+{
+	return NULL;
+}
+#endif
+
+atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+
+static void sw_perf_event_destroy(struct perf_event *event)
+{
+	u64 event_id = event->attr.config;
+
+	WARN_ON(event->parent);
+
+	atomic_dec(&perf_swevent_enabled[event_id]);
+}
+
+static const struct pmu *sw_perf_event_init(struct perf_event *event)
+{
+	const struct pmu *pmu = NULL;
+	u64 event_id = event->attr.config;
+
+	/*
+	 * Software events (currently) can't in general distinguish
+	 * between user, kernel and hypervisor events.
+	 * However, context switches and cpu migrations are considered
+	 * to be kernel events, and page faults are never hypervisor
+	 * events.
+	 */
+	switch (event_id) {
+	case PERF_COUNT_SW_CPU_CLOCK:
+		pmu = &perf_ops_cpu_clock;
+
+		break;
+	case PERF_COUNT_SW_TASK_CLOCK:
+		/*
+		 * If the user instantiates this as a per-cpu event,
+		 * use the cpu_clock event instead.
+		 */
+		if (event->ctx->task)
+			pmu = &perf_ops_task_clock;
+		else
+			pmu = &perf_ops_cpu_clock;
+
+		break;
+	case PERF_COUNT_SW_PAGE_FAULTS:
+	case PERF_COUNT_SW_PAGE_FAULTS_MIN:
+	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
+	case PERF_COUNT_SW_CONTEXT_SWITCHES:
+	case PERF_COUNT_SW_CPU_MIGRATIONS:
+		if (!event->parent) {
+			atomic_inc(&perf_swevent_enabled[event_id]);
+			event->destroy = sw_perf_event_destroy;
+		}
+		pmu = &perf_ops_generic;
+		break;
+	}
+
+	return pmu;
+}
+
+/*
+ * Allocate and initialize a event structure
+ */
+static struct perf_event *
+perf_event_alloc(struct perf_event_attr *attr,
+		   int cpu,
+		   struct perf_event_context *ctx,
+		   struct perf_event *group_leader,
+		   struct perf_event *parent_event,
+		   gfp_t gfpflags)
+{
+	const struct pmu *pmu;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	long err;
+
+	event = kzalloc(sizeof(*event), gfpflags);
+	if (!event)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * Single events are their own group leaders, with an
+	 * empty sibling list:
+	 */
+	if (!group_leader)
+		group_leader = event;
+
+	mutex_init(&event->child_mutex);
+	INIT_LIST_HEAD(&event->child_list);
+
+	INIT_LIST_HEAD(&event->group_entry);
+	INIT_LIST_HEAD(&event->event_entry);
+	INIT_LIST_HEAD(&event->sibling_list);
+	init_waitqueue_head(&event->waitq);
+
+	mutex_init(&event->mmap_mutex);
+
+	event->cpu		= cpu;
+	event->attr		= *attr;
+	event->group_leader	= group_leader;
+	event->pmu		= NULL;
+	event->ctx		= ctx;
+	event->oncpu		= -1;
+
+	event->parent		= parent_event;
+
+	event->ns		= get_pid_ns(current->nsproxy->pid_ns);
+	event->id		= atomic64_inc_return(&perf_event_id);
+
+	event->state		= PERF_EVENT_STATE_INACTIVE;
+
+	if (attr->disabled)
+		event->state = PERF_EVENT_STATE_OFF;
+
+	pmu = NULL;
+
+	hwc = &event->hw;
+	hwc->sample_period = attr->sample_period;
+	if (attr->freq && attr->sample_freq)
+		hwc->sample_period = 1;
+	hwc->last_period = hwc->sample_period;
+
+	atomic64_set(&hwc->period_left, hwc->sample_period);
+
+	/*
+	 * we currently do not support PERF_FORMAT_GROUP on inherited events
+	 */
+	if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
+		goto done;
+
+	switch (attr->type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		pmu = hw_perf_event_init(event);
+		break;
+
+	case PERF_TYPE_SOFTWARE:
+		pmu = sw_perf_event_init(event);
+		break;
+
+	case PERF_TYPE_TRACEPOINT:
+		pmu = tp_perf_event_init(event);
+		break;
+
+	default:
+		break;
+	}
+done:
+	err = 0;
+	if (!pmu)
+		err = -EINVAL;
+	else if (IS_ERR(pmu))
+		err = PTR_ERR(pmu);
+
+	if (err) {
+		if (event->ns)
+			put_pid_ns(event->ns);
+		kfree(event);
+		return ERR_PTR(err);
+	}
+
+	event->pmu = pmu;
+
+	if (!event->parent) {
+		atomic_inc(&nr_events);
+		if (event->attr.mmap)
+			atomic_inc(&nr_mmap_events);
+		if (event->attr.comm)
+			atomic_inc(&nr_comm_events);
+		if (event->attr.task)
+			atomic_inc(&nr_task_events);
+	}
+
+	return event;
+}
+
+static int perf_copy_attr(struct perf_event_attr __user *uattr,
+			  struct perf_event_attr *attr)
+{
+	u32 size;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
+		return -EFAULT;
+
+	/*
+	 * zero the full structure, so that a short copy will be nice.
+	 */
+	memset(attr, 0, sizeof(*attr));
+
+	ret = get_user(size, &uattr->size);
+	if (ret)
+		return ret;
+
+	if (size > PAGE_SIZE)	/* silly large */
+		goto err_size;
+
+	if (!size)		/* abi compat */
+		size = PERF_ATTR_SIZE_VER0;
+
+	if (size < PERF_ATTR_SIZE_VER0)
+		goto err_size;
+
+	/*
+	 * If we're handed a bigger struct than we know of,
+	 * ensure all the unknown bits are 0 - i.e. new
+	 * user-space does not rely on any kernel feature
+	 * extensions we dont know about yet.
+	 */
+	if (size > sizeof(*attr)) {
+		unsigned char __user *addr;
+		unsigned char __user *end;
+		unsigned char val;
+
+		addr = (void __user *)uattr + sizeof(*attr);
+		end  = (void __user *)uattr + size;
+
+		for (; addr < end; addr++) {
+			ret = get_user(val, addr);
+			if (ret)
+				return ret;
+			if (val)
+				goto err_size;
+		}
+		size = sizeof(*attr);
+	}
+
+	ret = copy_from_user(attr, uattr, size);
+	if (ret)
+		return -EFAULT;
+
+	/*
+	 * If the type exists, the corresponding creation will verify
+	 * the attr->config.
+	 */
+	if (attr->type >= PERF_TYPE_MAX)
+		return -EINVAL;
+
+	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
+		return -EINVAL;
+
+	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
+		return -EINVAL;
+
+	if (attr->read_format & ~(PERF_FORMAT_MAX-1))
+		return -EINVAL;
+
+out:
+	return ret;
+
+err_size:
+	put_user(sizeof(*attr), &uattr->size);
+	ret = -E2BIG;
+	goto out;
+}
+
+int perf_event_set_output(struct perf_event *event, int output_fd)
+{
+	struct perf_event *output_event = NULL;
+	struct file *output_file = NULL;
+	struct perf_event *old_output;
+	int fput_needed = 0;
+	int ret = -EINVAL;
+
+	if (!output_fd)
+		goto set;
+
+	output_file = fget_light(output_fd, &fput_needed);
+	if (!output_file)
+		return -EBADF;
+
+	if (output_file->f_op != &perf_fops)
+		goto out;
+
+	output_event = output_file->private_data;
+
+	/* Don't chain output fds */
+	if (output_event->output)
+		goto out;
+
+	/* Don't set an output fd when we already have an output channel */
+	if (event->data)
+		goto out;
+
+	atomic_long_inc(&output_file->f_count);
+
+set:
+	mutex_lock(&event->mmap_mutex);
+	old_output = event->output;
+	rcu_assign_pointer(event->output, output_event);
+	mutex_unlock(&event->mmap_mutex);
+
+	if (old_output) {
+		/*
+		 * we need to make sure no existing perf_output_*()
+		 * is still referencing this event.
+		 */
+		synchronize_rcu();
+		fput(old_output->filp);
+	}
+
+	ret = 0;
+out:
+	fput_light(output_file, fput_needed);
+	return ret;
+}
+
+/**
+ * sys_perf_event_open - open a performance event, associate it to a task/cpu
+ *
+ * @attr_uptr:	event_id type attributes for monitoring/sampling
+ * @pid:		target pid
+ * @cpu:		target cpu
+ * @group_fd:		group leader event fd
+ */
+SYSCALL_DEFINE5(perf_event_open,
+		struct perf_event_attr __user *, attr_uptr,
+		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
+{
+	struct perf_event *event, *group_leader;
+	struct perf_event_attr attr;
+	struct perf_event_context *ctx;
+	struct file *event_file = NULL;
+	struct file *group_file = NULL;
+	int fput_needed = 0;
+	int fput_needed2 = 0;
+	int err;
+
+	/* for future expandability... */
+	if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
+		return -EINVAL;
+
+	err = perf_copy_attr(attr_uptr, &attr);
+	if (err)
+		return err;
+
+	if (!attr.exclude_kernel) {
+		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+			return -EACCES;
+	}
+
+	if (attr.freq) {
+		if (attr.sample_freq > sysctl_perf_event_sample_rate)
+			return -EINVAL;
+	}
+
+	/*
+	 * Get the target context (task or percpu):
+	 */
+	ctx = find_get_context(pid, cpu);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	/*
+	 * Look up the group leader (we will attach this event to it):
+	 */
+	group_leader = NULL;
+	if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
+		err = -EINVAL;
+		group_file = fget_light(group_fd, &fput_needed);
+		if (!group_file)
+			goto err_put_context;
+		if (group_file->f_op != &perf_fops)
+			goto err_put_context;
+
+		group_leader = group_file->private_data;
+		/*
+		 * Do not allow a recursive hierarchy (this new sibling
+		 * becoming part of another group-sibling):
+		 */
+		if (group_leader->group_leader != group_leader)
+			goto err_put_context;
+		/*
+		 * Do not allow to attach to a group in a different
+		 * task or CPU context:
+		 */
+		if (group_leader->ctx != ctx)
+			goto err_put_context;
+		/*
+		 * Only a group leader can be exclusive or pinned
+		 */
+		if (attr.exclusive || attr.pinned)
+			goto err_put_context;
+	}
+
+	event = perf_event_alloc(&attr, cpu, ctx, group_leader,
+				     NULL, GFP_KERNEL);
+	err = PTR_ERR(event);
+	if (IS_ERR(event))
+		goto err_put_context;
+
+	err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0);
+	if (err < 0)
+		goto err_free_put_context;
+
+	event_file = fget_light(err, &fput_needed2);
+	if (!event_file)
+		goto err_free_put_context;
+
+	if (flags & PERF_FLAG_FD_OUTPUT) {
+		err = perf_event_set_output(event, group_fd);
+		if (err)
+			goto err_fput_free_put_context;
+	}
+
+	event->filp = event_file;
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	perf_install_in_context(ctx, event, cpu);
+	++ctx->generation;
+	mutex_unlock(&ctx->mutex);
+
+	event->owner = current;
+	get_task_struct(current);
+	mutex_lock(&current->perf_event_mutex);
+	list_add_tail(&event->owner_entry, &current->perf_event_list);
+	mutex_unlock(&current->perf_event_mutex);
+
+err_fput_free_put_context:
+	fput_light(event_file, fput_needed2);
+
+err_free_put_context:
+	if (err < 0)
+		kfree(event);
+
+err_put_context:
+	if (err < 0)
+		put_ctx(ctx);
+
+	fput_light(group_file, fput_needed);
+
+	return err;
+}
+
+/*
+ * inherit a event from parent task to child task:
+ */
+static struct perf_event *
+inherit_event(struct perf_event *parent_event,
+	      struct task_struct *parent,
+	      struct perf_event_context *parent_ctx,
+	      struct task_struct *child,
+	      struct perf_event *group_leader,
+	      struct perf_event_context *child_ctx)
+{
+	struct perf_event *child_event;
+
+	/*
+	 * Instead of creating recursive hierarchies of events,
+	 * we link inherited events back to the original parent,
+	 * which has a filp for sure, which we use as the reference
+	 * count:
+	 */
+	if (parent_event->parent)
+		parent_event = parent_event->parent;
+
+	child_event = perf_event_alloc(&parent_event->attr,
+					   parent_event->cpu, child_ctx,
+					   group_leader, parent_event,
+					   GFP_KERNEL);
+	if (IS_ERR(child_event))
+		return child_event;
+	get_ctx(child_ctx);
+
+	/*
+	 * Make the child state follow the state of the parent event,
+	 * not its attr.disabled bit.  We hold the parent's mutex,
+	 * so we won't race with perf_event_{en, dis}able_family.
+	 */
+	if (parent_event->state >= PERF_EVENT_STATE_INACTIVE)
+		child_event->state = PERF_EVENT_STATE_INACTIVE;
+	else
+		child_event->state = PERF_EVENT_STATE_OFF;
+
+	if (parent_event->attr.freq)
+		child_event->hw.sample_period = parent_event->hw.sample_period;
+
+	/*
+	 * Link it up in the child's context:
+	 */
+	add_event_to_ctx(child_event, child_ctx);
+
+	/*
+	 * Get a reference to the parent filp - we will fput it
+	 * when the child event exits. This is safe to do because
+	 * we are in the parent and we know that the filp still
+	 * exists and has a nonzero count:
+	 */
+	atomic_long_inc(&parent_event->filp->f_count);
+
+	/*
+	 * Link this into the parent event's child list
+	 */
+	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+	mutex_lock(&parent_event->child_mutex);
+	list_add_tail(&child_event->child_list, &parent_event->child_list);
+	mutex_unlock(&parent_event->child_mutex);
+
+	return child_event;
+}
+
+static int inherit_group(struct perf_event *parent_event,
+	      struct task_struct *parent,
+	      struct perf_event_context *parent_ctx,
+	      struct task_struct *child,
+	      struct perf_event_context *child_ctx)
+{
+	struct perf_event *leader;
+	struct perf_event *sub;
+	struct perf_event *child_ctr;
+
+	leader = inherit_event(parent_event, parent, parent_ctx,
+				 child, NULL, child_ctx);
+	if (IS_ERR(leader))
+		return PTR_ERR(leader);
+	list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
+		child_ctr = inherit_event(sub, parent, parent_ctx,
+					    child, leader, child_ctx);
+		if (IS_ERR(child_ctr))
+			return PTR_ERR(child_ctr);
+	}
+	return 0;
+}
+
+static void sync_child_event(struct perf_event *child_event,
+			       struct task_struct *child)
+{
+	struct perf_event *parent_event = child_event->parent;
+	u64 child_val;
+
+	if (child_event->attr.inherit_stat)
+		perf_event_read_event(child_event, child);
+
+	child_val = atomic64_read(&child_event->count);
+
+	/*
+	 * Add back the child's count to the parent's count:
+	 */
+	atomic64_add(child_val, &parent_event->count);
+	atomic64_add(child_event->total_time_enabled,
+		     &parent_event->child_total_time_enabled);
+	atomic64_add(child_event->total_time_running,
+		     &parent_event->child_total_time_running);
+
+	/*
+	 * Remove this event from the parent's list
+	 */
+	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+	mutex_lock(&parent_event->child_mutex);
+	list_del_init(&child_event->child_list);
+	mutex_unlock(&parent_event->child_mutex);
+
+	/*
+	 * Release the parent event, if this was the last
+	 * reference to it.
+	 */
+	fput(parent_event->filp);
+}
+
+static void
+__perf_event_exit_task(struct perf_event *child_event,
+			 struct perf_event_context *child_ctx,
+			 struct task_struct *child)
+{
+	struct perf_event *parent_event;
+
+	update_event_times(child_event);
+	perf_event_remove_from_context(child_event);
+
+	parent_event = child_event->parent;
+	/*
+	 * It can happen that parent exits first, and has events
+	 * that are still around due to the child reference. These
+	 * events need to be zapped - but otherwise linger.
+	 */
+	if (parent_event) {
+		sync_child_event(child_event, child);
+		free_event(child_event);
+	}
+}
+
+/*
+ * When a child task exits, feed back event values to parent events.
+ */
+void perf_event_exit_task(struct task_struct *child)
+{
+	struct perf_event *child_event, *tmp;
+	struct perf_event_context *child_ctx;
+	unsigned long flags;
+
+	if (likely(!child->perf_event_ctxp)) {
+		perf_event_task(child, NULL, 0);
+		return;
+	}
+
+	local_irq_save(flags);
+	/*
+	 * We can't reschedule here because interrupts are disabled,
+	 * and either child is current or it is a task that can't be
+	 * scheduled, so we are now safe from rescheduling changing
+	 * our context.
+	 */
+	child_ctx = child->perf_event_ctxp;
+	__perf_event_task_sched_out(child_ctx);
+
+	/*
+	 * Take the context lock here so that if find_get_context is
+	 * reading child->perf_event_ctxp, we wait until it has
+	 * incremented the context's refcount before we do put_ctx below.
+	 */
+	spin_lock(&child_ctx->lock);
+	child->perf_event_ctxp = NULL;
+	/*
+	 * If this context is a clone; unclone it so it can't get
+	 * swapped to another process while we're removing all
+	 * the events from it.
+	 */
+	unclone_ctx(child_ctx);
+	spin_unlock_irqrestore(&child_ctx->lock, flags);
+
+	/*
+	 * Report the task dead after unscheduling the events so that we
+	 * won't get any samples after PERF_RECORD_EXIT. We can however still
+	 * get a few PERF_RECORD_READ events.
+	 */
+	perf_event_task(child, child_ctx, 0);
+
+	/*
+	 * We can recurse on the same lock type through:
+	 *
+	 *   __perf_event_exit_task()
+	 *     sync_child_event()
+	 *       fput(parent_event->filp)
+	 *         perf_release()
+	 *           mutex_lock(&ctx->mutex)
+	 *
+	 * But since its the parent context it won't be the same instance.
+	 */
+	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
+
+again:
+	list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list,
+				 group_entry)
+		__perf_event_exit_task(child_event, child_ctx, child);
+
+	/*
+	 * If the last event was a group event, it will have appended all
+	 * its siblings to the list, but we obtained 'tmp' before that which
+	 * will still point to the list head terminating the iteration.
+	 */
+	if (!list_empty(&child_ctx->group_list))
+		goto again;
+
+	mutex_unlock(&child_ctx->mutex);
+
+	put_ctx(child_ctx);
+}
+
+/*
+ * free an unexposed, unused context as created by inheritance by
+ * init_task below, used by fork() in case of fail.
+ */
+void perf_event_free_task(struct task_struct *task)
+{
+	struct perf_event_context *ctx = task->perf_event_ctxp;
+	struct perf_event *event, *tmp;
+
+	if (!ctx)
+		return;
+
+	mutex_lock(&ctx->mutex);
+again:
+	list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) {
+		struct perf_event *parent = event->parent;
+
+		if (WARN_ON_ONCE(!parent))
+			continue;
+
+		mutex_lock(&parent->child_mutex);
+		list_del_init(&event->child_list);
+		mutex_unlock(&parent->child_mutex);
+
+		fput(parent->filp);
+
+		list_del_event(event, ctx);
+		free_event(event);
+	}
+
+	if (!list_empty(&ctx->group_list))
+		goto again;
+
+	mutex_unlock(&ctx->mutex);
+
+	put_ctx(ctx);
+}
+
+/*
+ * Initialize the perf_event context in task_struct
+ */
+int perf_event_init_task(struct task_struct *child)
+{
+	struct perf_event_context *child_ctx, *parent_ctx;
+	struct perf_event_context *cloned_ctx;
+	struct perf_event *event;
+	struct task_struct *parent = current;
+	int inherited_all = 1;
+	int ret = 0;
+
+	child->perf_event_ctxp = NULL;
+
+	mutex_init(&child->perf_event_mutex);
+	INIT_LIST_HEAD(&child->perf_event_list);
+
+	if (likely(!parent->perf_event_ctxp))
+		return 0;
+
+	/*
+	 * This is executed from the parent task context, so inherit
+	 * events that have been marked for cloning.
+	 * First allocate and initialize a context for the child.
+	 */
+
+	child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
+	if (!child_ctx)
+		return -ENOMEM;
+
+	__perf_event_init_context(child_ctx, child);
+	child->perf_event_ctxp = child_ctx;
+	get_task_struct(child);
+
+	/*
+	 * If the parent's context is a clone, pin it so it won't get
+	 * swapped under us.
+	 */
+	parent_ctx = perf_pin_task_context(parent);
+
+	/*
+	 * No need to check if parent_ctx != NULL here; since we saw
+	 * it non-NULL earlier, the only reason for it to become NULL
+	 * is if we exit, and since we're currently in the middle of
+	 * a fork we can't be exiting at the same time.
+	 */
+
+	/*
+	 * Lock the parent list. No need to lock the child - not PID
+	 * hashed yet and not running, so nobody can access it.
+	 */
+	mutex_lock(&parent_ctx->mutex);
+
+	/*
+	 * We dont have to disable NMIs - we are only looking at
+	 * the list, not manipulating it:
+	 */
+	list_for_each_entry_rcu(event, &parent_ctx->event_list, event_entry) {
+		if (event != event->group_leader)
+			continue;
+
+		if (!event->attr.inherit) {
+			inherited_all = 0;
+			continue;
+		}
+
+		ret = inherit_group(event, parent, parent_ctx,
+					     child, child_ctx);
+		if (ret) {
+			inherited_all = 0;
+			break;
+		}
+	}
+
+	if (inherited_all) {
+		/*
+		 * Mark the child context as a clone of the parent
+		 * context, or of whatever the parent is a clone of.
+		 * Note that if the parent is a clone, it could get
+		 * uncloned at any point, but that doesn't matter
+		 * because the list of events and the generation
+		 * count can't have changed since we took the mutex.
+		 */
+		cloned_ctx = rcu_dereference(parent_ctx->parent_ctx);
+		if (cloned_ctx) {
+			child_ctx->parent_ctx = cloned_ctx;
+			child_ctx->parent_gen = parent_ctx->parent_gen;
+		} else {
+			child_ctx->parent_ctx = parent_ctx;
+			child_ctx->parent_gen = parent_ctx->generation;
+		}
+		get_ctx(child_ctx->parent_ctx);
+	}
+
+	mutex_unlock(&parent_ctx->mutex);
+
+	perf_unpin_context(parent_ctx);
+
+	return ret;
+}
+
+static void __cpuinit perf_event_init_cpu(int cpu)
+{
+	struct perf_cpu_context *cpuctx;
+
+	cpuctx = &per_cpu(perf_cpu_context, cpu);
+	__perf_event_init_context(&cpuctx->ctx, NULL);
+
+	spin_lock(&perf_resource_lock);
+	cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
+	spin_unlock(&perf_resource_lock);
+
+	hw_perf_event_setup(cpu);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void __perf_event_exit_cpu(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event_context *ctx = &cpuctx->ctx;
+	struct perf_event *event, *tmp;
+
+	list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry)
+		__perf_event_remove_from_context(event);
+}
+static void perf_event_exit_cpu(int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_event_context *ctx = &cpuctx->ctx;
+
+	mutex_lock(&ctx->mutex);
+	smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
+	mutex_unlock(&ctx->mutex);
+}
+#else
+static inline void perf_event_exit_cpu(int cpu) { }
+#endif
+
+static int __cpuinit
+perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action) {
+
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		perf_event_init_cpu(cpu);
+		break;
+
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		hw_perf_event_setup_online(cpu);
+		break;
+
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		perf_event_exit_cpu(cpu);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+/*
+ * This has to have a higher priority than migration_notifier in sched.c.
+ */
+static struct notifier_block __cpuinitdata perf_cpu_nb = {
+	.notifier_call		= perf_cpu_notify,
+	.priority		= 20,
+};
+
+void __init perf_event_init(void)
+{
+	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
+			(void *)(long)smp_processor_id());
+	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
+			(void *)(long)smp_processor_id());
+	register_cpu_notifier(&perf_cpu_nb);
+}
+
+static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", perf_reserved_percpu);
+}
+
+static ssize_t
+perf_set_reserve_percpu(struct sysdev_class *class,
+			const char *buf,
+			size_t count)
+{
+	struct perf_cpu_context *cpuctx;
+	unsigned long val;
+	int err, cpu, mpt;
+
+	err = strict_strtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val > perf_max_events)
+		return -EINVAL;
+
+	spin_lock(&perf_resource_lock);
+	perf_reserved_percpu = val;
+	for_each_online_cpu(cpu) {
+		cpuctx = &per_cpu(perf_cpu_context, cpu);
+		spin_lock_irq(&cpuctx->ctx.lock);
+		mpt = min(perf_max_events - cpuctx->ctx.nr_events,
+			  perf_max_events - perf_reserved_percpu);
+		cpuctx->max_pertask = mpt;
+		spin_unlock_irq(&cpuctx->ctx.lock);
+	}
+	spin_unlock(&perf_resource_lock);
+
+	return count;
+}
+
+static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", perf_overcommit);
+}
+
+static ssize_t
+perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
+{
+	unsigned long val;
+	int err;
+
+	err = strict_strtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val > 1)
+		return -EINVAL;
+
+	spin_lock(&perf_resource_lock);
+	perf_overcommit = val;
+	spin_unlock(&perf_resource_lock);
+
+	return count;
+}
+
+static SYSDEV_CLASS_ATTR(
+				reserve_percpu,
+				0644,
+				perf_show_reserve_percpu,
+				perf_set_reserve_percpu
+			);
+
+static SYSDEV_CLASS_ATTR(
+				overcommit,
+				0644,
+				perf_show_overcommit,
+				perf_set_overcommit
+			);
+
+static struct attribute *perfclass_attrs[] = {
+	&attr_reserve_percpu.attr,
+	&attr_overcommit.attr,
+	NULL
+};
+
+static struct attribute_group perfclass_attr_group = {
+	.attrs			= perfclass_attrs,
+	.name			= "perf_events",
+};
+
+static int __init perf_event_sysfs_init(void)
+{
+	return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
+				  &perfclass_attr_group);
+}
+device_initcall(perf_event_sysfs_init);
diff --git a/kernel/sched.c b/kernel/sched.c
index faf4d463bbff..291c8d213d13 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -39,7 +39,7 @@
 #include <linux/completion.h>
 #include <linux/kernel_stat.h>
 #include <linux/debug_locks.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/security.h>
 #include <linux/notifier.h>
 #include <linux/profile.h>
@@ -2059,7 +2059,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		if (task_hot(p, old_rq->clock, NULL))
 			schedstat_inc(p, se.nr_forced2_migrations);
 #endif
-		perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS,
+		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
 				     1, 1, NULL, 0);
 	}
 	p->se.vruntime -= old_cfsrq->min_vruntime -
@@ -2724,7 +2724,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	 */
 	prev_state = prev->state;
 	finish_arch_switch(prev);
-	perf_counter_task_sched_in(current, cpu_of(rq));
+	perf_event_task_sched_in(current, cpu_of(rq));
 	finish_lock_switch(rq, prev);
 
 	fire_sched_in_preempt_notifiers(current);
@@ -5199,7 +5199,7 @@ void scheduler_tick(void)
 	curr->sched_class->task_tick(rq, curr, 0);
 	spin_unlock(&rq->lock);
 
-	perf_counter_task_tick(curr, cpu);
+	perf_event_task_tick(curr, cpu);
 
 #ifdef CONFIG_SMP
 	rq->idle_at_tick = idle_cpu(cpu);
@@ -5415,7 +5415,7 @@ need_resched_nonpreemptible:
 
 	if (likely(prev != next)) {
 		sched_info_switch(prev, next);
-		perf_counter_task_sched_out(prev, next, cpu);
+		perf_event_task_sched_out(prev, next, cpu);
 
 		rq->nr_switches++;
 		rq->curr = next;
@@ -7692,7 +7692,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 /*
  * Register at high priority so that task migration (migrate_all_tasks)
  * happens before everything else.  This has to be lower priority than
- * the notifier in the perf_counter subsystem, though.
+ * the notifier in the perf_event subsystem, though.
  */
 static struct notifier_block __cpuinitdata migration_notifier = {
 	.notifier_call = migration_call,
@@ -9549,7 +9549,7 @@ void __init sched_init(void)
 	alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
 #endif /* SMP */
 
-	perf_counter_init();
+	perf_event_init();
 
 	scheduler_running = 1;
 }
diff --git a/kernel/sys.c b/kernel/sys.c
index b3f1097c76fa..ea5c3bcac881 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -14,7 +14,7 @@
 #include <linux/prctl.h>
 #include <linux/highuid.h>
 #include <linux/fs.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/resource.h>
 #include <linux/kernel.h>
 #include <linux/kexec.h>
@@ -1511,11 +1511,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		case PR_SET_TSC:
 			error = SET_TSC_CTL(arg2);
 			break;
-		case PR_TASK_PERF_COUNTERS_DISABLE:
-			error = perf_counter_task_disable();
+		case PR_TASK_PERF_EVENTS_DISABLE:
+			error = perf_event_task_disable();
 			break;
-		case PR_TASK_PERF_COUNTERS_ENABLE:
-			error = perf_counter_task_enable();
+		case PR_TASK_PERF_EVENTS_ENABLE:
+			error = perf_event_task_enable();
 			break;
 		case PR_GET_TIMERSLACK:
 			error = current->timer_slack_ns;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 68320f6b07b5..515bc230ac2a 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -177,4 +177,4 @@ cond_syscall(sys_eventfd);
 cond_syscall(sys_eventfd2);
 
 /* performance counters: */
-cond_syscall(sys_perf_counter_open);
+cond_syscall(sys_perf_event_open);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a631ba684a4..6ba49c7cb128 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -50,7 +50,7 @@
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
 #include <linux/slow-work.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -964,28 +964,28 @@ static struct ctl_table kern_table[] = {
 		.child		= slow_work_sysctls,
 	},
 #endif
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "perf_counter_paranoid",
-		.data		= &sysctl_perf_counter_paranoid,
-		.maxlen		= sizeof(sysctl_perf_counter_paranoid),
+		.procname	= "perf_event_paranoid",
+		.data		= &sysctl_perf_event_paranoid,
+		.maxlen		= sizeof(sysctl_perf_event_paranoid),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "perf_counter_mlock_kb",
-		.data		= &sysctl_perf_counter_mlock,
-		.maxlen		= sizeof(sysctl_perf_counter_mlock),
+		.procname	= "perf_event_mlock_kb",
+		.data		= &sysctl_perf_event_mlock,
+		.maxlen		= sizeof(sysctl_perf_event_mlock),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "perf_counter_max_sample_rate",
-		.data		= &sysctl_perf_counter_sample_rate,
-		.maxlen		= sizeof(sysctl_perf_counter_sample_rate),
+		.procname	= "perf_event_max_sample_rate",
+		.data		= &sysctl_perf_event_sample_rate,
+		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
diff --git a/kernel/timer.c b/kernel/timer.c
index bbb51074680e..811e5c391456 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,7 +37,7 @@
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/sched.h>
 
 #include <asm/uaccess.h>
@@ -1187,7 +1187,7 @@ static void run_timer_softirq(struct softirq_action *h)
 {
 	struct tvec_base *base = __get_cpu_var(tvec_bases);
 
-	perf_counter_do_pending();
+	perf_event_do_pending();
 
 	hrtimer_run_pending();
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8712ce3c6a0e..233f3483ac83 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -2,7 +2,7 @@
 #include <trace/events/syscalls.h>
 #include <linux/kernel.h>
 #include <linux/ftrace.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/syscall.h>
 
 #include "trace_output.h"
@@ -414,7 +414,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 		rec->nr = syscall_nr;
 		syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 				       (unsigned long *)&rec->args);
-		perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
+		perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
 	} while(0);
 }
 
@@ -476,7 +476,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	rec.nr = syscall_nr;
 	rec.ret = syscall_get_return_value(current, regs);
 
-	perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
+	perf_tp_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
 }
 
 int reg_prof_syscall_exit(char *name)
diff --git a/mm/mmap.c b/mm/mmap.c
index 26892e346d8f..376492ed08f4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -28,7 +28,7 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -1220,7 +1220,7 @@ munmap_back:
 	if (correct_wcount)
 		atomic_inc(&inode->i_writecount);
 out:
-	perf_counter_mmap(vma);
+	perf_event_mmap(vma);
 
 	mm->total_vm += len >> PAGE_SHIFT;
 	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
@@ -2308,7 +2308,7 @@ int install_special_mapping(struct mm_struct *mm,
 
 	mm->total_vm += len >> PAGE_SHIFT;
 
-	perf_counter_mmap(vma);
+	perf_event_mmap(vma);
 
 	return 0;
 }
diff --git a/mm/mprotect.c b/mm/mprotect.c
index d80311baeb2d..8bc969d8112d 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -23,7 +23,7 @@
 #include <linux/swapops.h>
 #include <linux/mmu_notifier.h>
 #include <linux/migrate.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -300,7 +300,7 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
 		error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
 		if (error)
 			goto out;
-		perf_counter_mmap(vma);
+		perf_event_mmap(vma);
 		nstart = tmp;
 
 		if (nstart < prev->vm_end)
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 0aba8b6e9c54..b5f1953b6144 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -318,7 +318,7 @@ export PERL_PATH
 
 LIB_FILE=libperf.a
 
-LIB_H += ../../include/linux/perf_counter.h
+LIB_H += ../../include/linux/perf_event.h
 LIB_H += ../../include/linux/rbtree.h
 LIB_H += ../../include/linux/list.h
 LIB_H += util/include/linux/list.h
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 043d85b7e254..1ec741615814 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -505,7 +505,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		return -1;
 	}
 
-	if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
+	if (event->header.misc & PERF_RECORD_MISC_KERNEL) {
 		show = SHOW_KERNEL;
 		level = 'k';
 
@@ -513,7 +513,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
 		dump_printf(" ...... dso: %s\n", dso->name);
 
-	} else if (event->header.misc & PERF_EVENT_MISC_USER) {
+	} else if (event->header.misc & PERF_RECORD_MISC_USER) {
 
 		show = SHOW_USER;
 		level = '.';
@@ -565,7 +565,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->mmap.pid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
+	dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->mmap.pid,
@@ -575,7 +575,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 		event->mmap.filename);
 
 	if (thread == NULL || map == NULL) {
-		dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
 		return 0;
 	}
 
@@ -591,14 +591,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 	struct thread *thread;
 
 	thread = threads__findnew(event->comm.pid, &threads, &last_match);
-	dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+	dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->comm.comm, event->comm.pid);
 
 	if (thread == NULL ||
 	    thread__set_comm(thread, event->comm.comm)) {
-		dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
 		return -1;
 	}
 	total_comm++;
@@ -614,7 +614,7 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->fork.pid, &threads, &last_match);
 	parent = threads__findnew(event->fork.ppid, &threads, &last_match);
-	dump_printf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
+	dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->fork.pid, event->fork.ppid);
@@ -627,7 +627,7 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head)
 		return 0;
 
 	if (!thread || !parent || thread__fork(thread, parent)) {
-		dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
 		return -1;
 	}
 	total_fork++;
@@ -639,23 +639,23 @@ static int
 process_event(event_t *event, unsigned long offset, unsigned long head)
 {
 	switch (event->header.type) {
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return process_sample_event(event, offset, head);
 
-	case PERF_EVENT_MMAP:
+	case PERF_RECORD_MMAP:
 		return process_mmap_event(event, offset, head);
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event, offset, head);
 
-	case PERF_EVENT_FORK:
+	case PERF_RECORD_FORK:
 		return process_fork_event(event, offset, head);
 	/*
 	 * We dont process them right now but they are fine:
 	 */
 
-	case PERF_EVENT_THROTTLE:
-	case PERF_EVENT_UNTHROTTLE:
+	case PERF_RECORD_THROTTLE:
+	case PERF_RECORD_UNTHROTTLE:
 		return 0;
 
 	default:
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 2459e5a22ed8..a5a050af8e7d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -77,7 +77,7 @@ static struct mmap_data		mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 
 static unsigned long mmap_read_head(struct mmap_data *md)
 {
-	struct perf_counter_mmap_page *pc = md->base;
+	struct perf_event_mmap_page *pc = md->base;
 	long head;
 
 	head = pc->data_head;
@@ -88,7 +88,7 @@ static unsigned long mmap_read_head(struct mmap_data *md)
 
 static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
 {
-	struct perf_counter_mmap_page *pc = md->base;
+	struct perf_event_mmap_page *pc = md->base;
 
 	/*
 	 * ensure all reads are done before we write the tail out.
@@ -233,7 +233,7 @@ static pid_t pid_synthesize_comm_event(pid_t pid, int full)
 		}
 	}
 
-	comm_ev.header.type = PERF_EVENT_COMM;
+	comm_ev.header.type = PERF_RECORD_COMM;
 	size = ALIGN(size, sizeof(u64));
 	comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
 
@@ -288,7 +288,7 @@ static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
 	while (1) {
 		char bf[BUFSIZ], *pbf = bf;
 		struct mmap_event mmap_ev = {
-			.header = { .type = PERF_EVENT_MMAP },
+			.header = { .type = PERF_RECORD_MMAP },
 		};
 		int n;
 		size_t size;
@@ -355,7 +355,7 @@ static void synthesize_all(void)
 
 static int group_fd;
 
-static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr)
+static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
 {
 	struct perf_header_attr *h_attr;
 
@@ -371,7 +371,7 @@ static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int
 
 static void create_counter(int counter, int cpu, pid_t pid)
 {
-	struct perf_counter_attr *attr = attrs + counter;
+	struct perf_event_attr *attr = attrs + counter;
 	struct perf_header_attr *h_attr;
 	int track = !counter; /* only the first counter needs these */
 	struct {
@@ -417,7 +417,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
 	attr->disabled		= 1;
 
 try_again:
-	fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
+	fd[nr_cpu][counter] = sys_perf_event_open(attr, pid, cpu, group_fd, 0);
 
 	if (fd[nr_cpu][counter] < 0) {
 		int err = errno;
@@ -444,7 +444,7 @@ try_again:
 		printf("\n");
 		error("perfcounter syscall returned with %d (%s)\n",
 			fd[nr_cpu][counter], strerror(err));
-		die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
+		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 		exit(-1);
 	}
 
@@ -478,7 +478,7 @@ try_again:
 	if (multiplex && fd[nr_cpu][counter] != multiplex_fd) {
 		int ret;
 
-		ret = ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, multiplex_fd);
+		ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd);
 		assert(ret != -1);
 	} else {
 		event_array[nr_poll].fd = fd[nr_cpu][counter];
@@ -496,7 +496,7 @@ try_again:
 		}
 	}
 
-	ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE);
+	ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE);
 }
 
 static void open_counters(int cpu, pid_t pid)
@@ -642,7 +642,7 @@ static int __cmd_record(int argc, const char **argv)
 		if (done) {
 			for (i = 0; i < nr_cpu; i++) {
 				for (counter = 0; counter < nr_counters; counter++)
-					ioctl(fd[i][counter], PERF_COUNTER_IOC_DISABLE);
+					ioctl(fd[i][counter], PERF_EVENT_IOC_DISABLE);
 			}
 		}
 	}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index cdf9a8d27bb9..19669c20088e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1121,7 +1121,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		more_data += sizeof(u64);
 	}
 
-	dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
+	dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->header.misc,
@@ -1158,9 +1158,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 	if (comm_list && !strlist__has_entry(comm_list, thread->comm))
 		return 0;
 
-	cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK;
+	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	if (cpumode == PERF_EVENT_MISC_KERNEL) {
+	if (cpumode == PERF_RECORD_MISC_KERNEL) {
 		show = SHOW_KERNEL;
 		level = 'k';
 
@@ -1168,7 +1168,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
 		dump_printf(" ...... dso: %s\n", dso->name);
 
-	} else if (cpumode == PERF_EVENT_MISC_USER) {
+	} else if (cpumode == PERF_RECORD_MISC_USER) {
 
 		show = SHOW_USER;
 		level = '.';
@@ -1210,7 +1210,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->mmap.pid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
+	dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->mmap.pid,
@@ -1221,7 +1221,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 		event->mmap.filename);
 
 	if (thread == NULL || map == NULL) {
-		dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
 		return 0;
 	}
 
@@ -1238,14 +1238,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->comm.pid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+	dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->comm.comm, event->comm.pid);
 
 	if (thread == NULL ||
 	    thread__set_comm_adjust(thread, event->comm.comm)) {
-		dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
 		return -1;
 	}
 	total_comm++;
@@ -1262,10 +1262,10 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head)
 	thread = threads__findnew(event->fork.pid, &threads, &last_match);
 	parent = threads__findnew(event->fork.ppid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_%s: (%d:%d):(%d:%d)\n",
+	dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
-		event->header.type == PERF_EVENT_FORK ? "FORK" : "EXIT",
+		event->header.type == PERF_RECORD_FORK ? "FORK" : "EXIT",
 		event->fork.pid, event->fork.tid,
 		event->fork.ppid, event->fork.ptid);
 
@@ -1276,11 +1276,11 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head)
 	if (thread == parent)
 		return 0;
 
-	if (event->header.type == PERF_EVENT_EXIT)
+	if (event->header.type == PERF_RECORD_EXIT)
 		return 0;
 
 	if (!thread || !parent || thread__fork(thread, parent)) {
-		dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
 		return -1;
 	}
 	total_fork++;
@@ -1291,7 +1291,7 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_lost_event(event_t *event, unsigned long offset, unsigned long head)
 {
-	dump_printf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n",
+	dump_printf("%p [%p]: PERF_RECORD_LOST: id:%Ld: lost:%Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->lost.id,
@@ -1305,7 +1305,7 @@ process_lost_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_read_event(event_t *event, unsigned long offset, unsigned long head)
 {
-	struct perf_counter_attr *attr;
+	struct perf_event_attr *attr;
 
 	attr = perf_header__find_attr(event->read.id, header);
 
@@ -1319,7 +1319,7 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head)
 					   event->read.value);
 	}
 
-	dump_printf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n",
+	dump_printf("%p [%p]: PERF_RECORD_READ: %d %d %s %Lu\n",
 			(void *)(offset + head),
 			(void *)(long)(event->header.size),
 			event->read.pid,
@@ -1337,31 +1337,31 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	trace_event(event);
 
 	switch (event->header.type) {
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return process_sample_event(event, offset, head);
 
-	case PERF_EVENT_MMAP:
+	case PERF_RECORD_MMAP:
 		return process_mmap_event(event, offset, head);
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event, offset, head);
 
-	case PERF_EVENT_FORK:
-	case PERF_EVENT_EXIT:
+	case PERF_RECORD_FORK:
+	case PERF_RECORD_EXIT:
 		return process_task_event(event, offset, head);
 
-	case PERF_EVENT_LOST:
+	case PERF_RECORD_LOST:
 		return process_lost_event(event, offset, head);
 
-	case PERF_EVENT_READ:
+	case PERF_RECORD_READ:
 		return process_read_event(event, offset, head);
 
 	/*
 	 * We dont process them right now but they are fine:
 	 */
 
-	case PERF_EVENT_THROTTLE:
-	case PERF_EVENT_UNTHROTTLE:
+	case PERF_RECORD_THROTTLE:
+	case PERF_RECORD_UNTHROTTLE:
 		return 0;
 
 	default:
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 275d79c6627a..ea9c15c0cdfe 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1573,7 +1573,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		more_data += sizeof(u64);
 	}
 
-	dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
+	dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->header.misc,
@@ -1589,9 +1589,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		return -1;
 	}
 
-	cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK;
+	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	if (cpumode == PERF_EVENT_MISC_KERNEL) {
+	if (cpumode == PERF_RECORD_MISC_KERNEL) {
 		show = SHOW_KERNEL;
 		level = 'k';
 
@@ -1599,7 +1599,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
 		dump_printf(" ...... dso: %s\n", dso->name);
 
-	} else if (cpumode == PERF_EVENT_MISC_USER) {
+	} else if (cpumode == PERF_RECORD_MISC_USER) {
 
 		show = SHOW_USER;
 		level = '.';
@@ -1626,23 +1626,23 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 
 	nr_events++;
 	switch (event->header.type) {
-	case PERF_EVENT_MMAP:
+	case PERF_RECORD_MMAP:
 		return 0;
-	case PERF_EVENT_LOST:
+	case PERF_RECORD_LOST:
 		nr_lost_chunks++;
 		nr_lost_events += event->lost.lost;
 		return 0;
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event, offset, head);
 
-	case PERF_EVENT_EXIT ... PERF_EVENT_READ:
+	case PERF_RECORD_EXIT ... PERF_RECORD_READ:
 		return 0;
 
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return process_sample_event(event, offset, head);
 
-	case PERF_EVENT_MAX:
+	case PERF_RECORD_MAX:
 	default:
 		return -1;
 	}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 61b828236c11..16af2d82e858 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -48,7 +48,7 @@
 #include <sys/prctl.h>
 #include <math.h>
 
-static struct perf_counter_attr default_attrs[] = {
+static struct perf_event_attr default_attrs[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK	},
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
@@ -130,11 +130,11 @@ struct stats			runtime_cycles_stats;
 	 attrs[counter].config == PERF_COUNT_##c)
 
 #define ERR_PERF_OPEN \
-"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n"
+"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
 
 static void create_perf_stat_counter(int counter, int pid)
 {
-	struct perf_counter_attr *attr = attrs + counter;
+	struct perf_event_attr *attr = attrs + counter;
 
 	if (scale)
 		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -144,7 +144,7 @@ static void create_perf_stat_counter(int counter, int pid)
 		unsigned int cpu;
 
 		for (cpu = 0; cpu < nr_cpus; cpu++) {
-			fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
+			fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0);
 			if (fd[cpu][counter] < 0 && verbose)
 				fprintf(stderr, ERR_PERF_OPEN, counter,
 					fd[cpu][counter], strerror(errno));
@@ -154,7 +154,7 @@ static void create_perf_stat_counter(int counter, int pid)
 		attr->disabled	     = 1;
 		attr->enable_on_exec = 1;
 
-		fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0);
+		fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0);
 		if (fd[0][counter] < 0 && verbose)
 			fprintf(stderr, ERR_PERF_OPEN, counter,
 				fd[0][counter], strerror(errno));
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 600406396274..4405681b3134 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -937,21 +937,21 @@ process_event(event_t *event)
 
 	switch (event->header.type) {
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event);
-	case PERF_EVENT_FORK:
+	case PERF_RECORD_FORK:
 		return process_fork_event(event);
-	case PERF_EVENT_EXIT:
+	case PERF_RECORD_EXIT:
 		return process_exit_event(event);
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return queue_sample_event(event);
 
 	/*
 	 * We dont process them right now but they are fine:
 	 */
-	case PERF_EVENT_MMAP:
-	case PERF_EVENT_THROTTLE:
-	case PERF_EVENT_UNTHROTTLE:
+	case PERF_RECORD_MMAP:
+	case PERF_RECORD_THROTTLE:
+	case PERF_RECORD_UNTHROTTLE:
 		return 0;
 
 	default:
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 4002ccb36750..1ca88896eee4 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -901,7 +901,7 @@ struct mmap_data {
 
 static unsigned int mmap_read_head(struct mmap_data *md)
 {
-	struct perf_counter_mmap_page *pc = md->base;
+	struct perf_event_mmap_page *pc = md->base;
 	int head;
 
 	head = pc->data_head;
@@ -977,9 +977,9 @@ static void mmap_read_counter(struct mmap_data *md)
 
 		old += size;
 
-		if (event->header.type == PERF_EVENT_SAMPLE) {
+		if (event->header.type == PERF_RECORD_SAMPLE) {
 			int user =
-	(event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER;
+	(event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_USER;
 			process_event(event->ip.ip, md->counter, user);
 		}
 	}
@@ -1005,7 +1005,7 @@ int group_fd;
 
 static void start_counter(int i, int counter)
 {
-	struct perf_counter_attr *attr;
+	struct perf_event_attr *attr;
 	int cpu;
 
 	cpu = profile_cpu;
@@ -1019,7 +1019,7 @@ static void start_counter(int i, int counter)
 	attr->inherit		= (cpu < 0) && inherit;
 
 try_again:
-	fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0);
+	fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0);
 
 	if (fd[i][counter] < 0) {
 		int err = errno;
@@ -1044,7 +1044,7 @@ try_again:
 		printf("\n");
 		error("perfcounter syscall returned with %d (%s)\n",
 			fd[i][counter], strerror(err));
-		die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
+		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 		exit(-1);
 	}
 	assert(fd[i][counter] >= 0);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 914ab366e369..e9d256e2f47d 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -35,14 +35,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->comm.pid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+	dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->comm.comm, event->comm.pid);
 
 	if (thread == NULL ||
 	    thread__set_comm(thread, event->comm.comm)) {
-		dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
 		return -1;
 	}
 	total_comm++;
@@ -82,7 +82,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		more_data += sizeof(u64);
 	}
 
-	dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
+	dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->header.misc,
@@ -98,9 +98,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		return -1;
 	}
 
-	cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK;
+	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	if (cpumode == PERF_EVENT_MISC_KERNEL) {
+	if (cpumode == PERF_RECORD_MISC_KERNEL) {
 		show = SHOW_KERNEL;
 		level = 'k';
 
@@ -108,7 +108,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
 		dump_printf(" ...... dso: %s\n", dso->name);
 
-	} else if (cpumode == PERF_EVENT_MISC_USER) {
+	} else if (cpumode == PERF_RECORD_MISC_USER) {
 
 		show = SHOW_USER;
 		level = '.';
@@ -146,19 +146,19 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	trace_event(event);
 
 	switch (event->header.type) {
-	case PERF_EVENT_MMAP ... PERF_EVENT_LOST:
+	case PERF_RECORD_MMAP ... PERF_RECORD_LOST:
 		return 0;
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event, offset, head);
 
-	case PERF_EVENT_EXIT ... PERF_EVENT_READ:
+	case PERF_RECORD_EXIT ... PERF_RECORD_READ:
 		return 0;
 
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return process_sample_event(event, offset, head);
 
-	case PERF_EVENT_MAX:
+	case PERF_RECORD_MAX:
 	default:
 		return -1;
 	}
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index f71e0d245cba..f1946d107b10 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -18,10 +18,10 @@ underlying hardware counters.
 Performance counters are accessed via special file descriptors.
 There's one file descriptor per virtual counter used.
 
-The special file descriptor is opened via the perf_counter_open()
+The special file descriptor is opened via the perf_event_open()
 system call:
 
-   int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
+   int sys_perf_event_open(struct perf_event_hw_event *hw_event_uptr,
 			     pid_t pid, int cpu, int group_fd,
 			     unsigned long flags);
 
@@ -32,9 +32,9 @@ can be used to set the blocking mode, etc.
 Multiple counters can be kept open at a time, and the counters
 can be poll()ed.
 
-When creating a new counter fd, 'perf_counter_hw_event' is:
+When creating a new counter fd, 'perf_event_hw_event' is:
 
-struct perf_counter_hw_event {
+struct perf_event_hw_event {
         /*
          * The MSB of the config word signifies if the rest contains cpu
          * specific (raw) counter configuration data, if unset, the next
@@ -93,7 +93,7 @@ specified by 'event_id':
 
 /*
  * Generalized performance counter event types, used by the hw_event.event_id
- * parameter of the sys_perf_counter_open() syscall:
+ * parameter of the sys_perf_event_open() syscall:
  */
 enum hw_event_ids {
 	/*
@@ -159,7 +159,7 @@ in size.
  * reads on the counter should return the indicated quantities,
  * in increasing order of bit value, after the counter value.
  */
-enum perf_counter_read_format {
+enum perf_event_read_format {
         PERF_FORMAT_TOTAL_TIME_ENABLED  =  1,
         PERF_FORMAT_TOTAL_TIME_RUNNING  =  2,
 };
@@ -178,7 +178,7 @@ interrupt:
  * Bits that can be set in hw_event.record_type to request information
  * in the overflow packets.
  */
-enum perf_counter_record_format {
+enum perf_event_record_format {
         PERF_RECORD_IP          = 1U << 0,
         PERF_RECORD_TID         = 1U << 1,
         PERF_RECORD_TIME        = 1U << 2,
@@ -228,7 +228,7 @@ these events are recorded in the ring-buffer (see below).
 The 'comm' bit allows tracking of process comm data on process creation.
 This too is recorded in the ring-buffer (see below).
 
-The 'pid' parameter to the perf_counter_open() system call allows the
+The 'pid' parameter to the perf_event_open() system call allows the
 counter to be specific to a task:
 
  pid == 0: if the pid parameter is zero, the counter is attached to the
@@ -258,7 +258,7 @@ The 'flags' parameter is currently unused and must be zero.
 
 The 'group_fd' parameter allows counter "groups" to be set up.  A
 counter group has one counter which is the group "leader".  The leader
-is created first, with group_fd = -1 in the perf_counter_open call
+is created first, with group_fd = -1 in the perf_event_open call
 that creates it.  The rest of the group members are created
 subsequently, with group_fd giving the fd of the group leader.
 (A single counter on its own is created with group_fd = -1 and is
@@ -277,13 +277,13 @@ tracking are logged into a ring-buffer. This ring-buffer is created and
 accessed through mmap().
 
 The mmap size should be 1+2^n pages, where the first page is a meta-data page
-(struct perf_counter_mmap_page) that contains various bits of information such
+(struct perf_event_mmap_page) that contains various bits of information such
 as where the ring-buffer head is.
 
 /*
  * Structure of the page that can be mapped via mmap
  */
-struct perf_counter_mmap_page {
+struct perf_event_mmap_page {
         __u32   version;                /* version number of this structure */
         __u32   compat_version;         /* lowest version this is compat with */
 
@@ -317,7 +317,7 @@ struct perf_counter_mmap_page {
          * Control data for the mmap() data buffer.
          *
          * User-space reading this value should issue an rmb(), on SMP capable
-         * platforms, after reading this value -- see perf_counter_wakeup().
+         * platforms, after reading this value -- see perf_event_wakeup().
          */
         __u32   data_head;              /* head in the data section */
 };
@@ -327,9 +327,9 @@ NOTE: the hw-counter userspace bits are arch specific and are currently only
 
 The following 2^n pages are the ring-buffer which contains events of the form:
 
-#define PERF_EVENT_MISC_KERNEL          (1 << 0)
-#define PERF_EVENT_MISC_USER            (1 << 1)
-#define PERF_EVENT_MISC_OVERFLOW        (1 << 2)
+#define PERF_RECORD_MISC_KERNEL          (1 << 0)
+#define PERF_RECORD_MISC_USER            (1 << 1)
+#define PERF_RECORD_MISC_OVERFLOW        (1 << 2)
 
 struct perf_event_header {
         __u32   type;
@@ -353,8 +353,8 @@ enum perf_event_type {
          *      char                            filename[];
          * };
          */
-        PERF_EVENT_MMAP                 = 1,
-        PERF_EVENT_MUNMAP               = 2,
+        PERF_RECORD_MMAP                 = 1,
+        PERF_RECORD_MUNMAP               = 2,
 
         /*
          * struct {
@@ -364,10 +364,10 @@ enum perf_event_type {
          *      char                            comm[];
          * };
          */
-        PERF_EVENT_COMM                 = 3,
+        PERF_RECORD_COMM                 = 3,
 
         /*
-         * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
+         * When header.misc & PERF_RECORD_MISC_OVERFLOW the event_type field
          * will be PERF_RECORD_*
          *
          * struct {
@@ -397,7 +397,7 @@ Notification of new events is possible through poll()/select()/epoll() and
 fcntl() managing signals.
 
 Normally a notification is generated for every page filled, however one can
-additionally set perf_counter_hw_event.wakeup_events to generate one every
+additionally set perf_event_hw_event.wakeup_events to generate one every
 so many counter overflow events.
 
 Future work will include a splice() interface to the ring-buffer.
@@ -409,11 +409,11 @@ events but does continue to exist and maintain its count value.
 
 An individual counter or counter group can be enabled with
 
-	ioctl(fd, PERF_COUNTER_IOC_ENABLE);
+	ioctl(fd, PERF_EVENT_IOC_ENABLE);
 
 or disabled with
 
-	ioctl(fd, PERF_COUNTER_IOC_DISABLE);
+	ioctl(fd, PERF_EVENT_IOC_DISABLE);
 
 Enabling or disabling the leader of a group enables or disables the
 whole group; that is, while the group leader is disabled, none of the
@@ -424,16 +424,16 @@ other counter.
 
 Additionally, non-inherited overflow counters can use
 
-	ioctl(fd, PERF_COUNTER_IOC_REFRESH, nr);
+	ioctl(fd, PERF_EVENT_IOC_REFRESH, nr);
 
 to enable a counter for 'nr' events, after which it gets disabled again.
 
 A process can enable or disable all the counter groups that are
 attached to it, using prctl:
 
-	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+	prctl(PR_TASK_PERF_EVENTS_ENABLE);
 
-	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
+	prctl(PR_TASK_PERF_EVENTS_DISABLE);
 
 This applies to all counters on the current process, whether created
 by this process or by another, and doesn't affect any counters that
@@ -447,11 +447,11 @@ Arch requirements
 If your architecture does not have hardware performance metrics, you can
 still use the generic software counters based on hrtimers for sampling.
 
-So to start with, in order to add HAVE_PERF_COUNTERS to your Kconfig, you
+So to start with, in order to add HAVE_PERF_EVENTS to your Kconfig, you
 will need at least this:
-	- asm/perf_counter.h - a basic stub will suffice at first
+	- asm/perf_event.h - a basic stub will suffice at first
 	- support for atomic64 types (and associated helper functions)
-	- set_perf_counter_pending() implemented
+	- set_perf_event_pending() implemented
 
 If your architecture does have hardware capabilities, you can override the
-weak stub hw_perf_counter_init() to register hardware counters.
+weak stub hw_perf_event_init() to register hardware counters.
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 2abeb20d0bf3..8cc4623afd6f 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -52,15 +52,15 @@
 #include <sys/types.h>
 #include <sys/syscall.h>
 
-#include "../../include/linux/perf_counter.h"
+#include "../../include/linux/perf_event.h"
 #include "util/types.h"
 
 /*
- * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
+ * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
  * counters in the current task.
  */
-#define PR_TASK_PERF_COUNTERS_DISABLE   31
-#define PR_TASK_PERF_COUNTERS_ENABLE    32
+#define PR_TASK_PERF_EVENTS_DISABLE   31
+#define PR_TASK_PERF_EVENTS_ENABLE    32
 
 #ifndef NSEC_PER_SEC
 # define NSEC_PER_SEC			1000000000ULL
@@ -90,12 +90,12 @@ static inline unsigned long long rdclock(void)
 	_min1 < _min2 ? _min1 : _min2; })
 
 static inline int
-sys_perf_counter_open(struct perf_counter_attr *attr,
+sys_perf_event_open(struct perf_event_attr *attr,
 		      pid_t pid, int cpu, int group_fd,
 		      unsigned long flags)
 {
 	attr->size = sizeof(*attr);
-	return syscall(__NR_perf_counter_open, attr, pid, cpu,
+	return syscall(__NR_perf_event_open, attr, pid, cpu,
 		       group_fd, flags);
 }
 
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 018d414a09d1..2c9c26d6ded0 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -1,5 +1,5 @@
-#ifndef __PERF_EVENT_H
-#define __PERF_EVENT_H
+#ifndef __PERF_RECORD_H
+#define __PERF_RECORD_H
 #include "../perf.h"
 #include "util.h"
 #include <linux/list.h>
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index bb4fca3efcc3..e306857b2c2b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -9,7 +9,7 @@
 /*
  * Create new perf.data header attribute:
  */
-struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr)
+struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr)
 {
 	struct perf_header_attr *self = malloc(sizeof(*self));
 
@@ -134,7 +134,7 @@ struct perf_file_section {
 };
 
 struct perf_file_attr {
-	struct perf_counter_attr	attr;
+	struct perf_event_attr	attr;
 	struct perf_file_section	ids;
 };
 
@@ -320,7 +320,7 @@ u64 perf_header__sample_type(struct perf_header *header)
 	return type;
 }
 
-struct perf_counter_attr *
+struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header)
 {
 	int i;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 7b0e84a87179..a0761bc7863c 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -1,12 +1,12 @@
 #ifndef _PERF_HEADER_H
 #define _PERF_HEADER_H
 
-#include "../../../include/linux/perf_counter.h"
+#include "../../../include/linux/perf_event.h"
 #include <sys/types.h>
 #include "types.h"
 
 struct perf_header_attr {
-	struct perf_counter_attr attr;
+	struct perf_event_attr attr;
 	int ids, size;
 	u64 *id;
 	off_t id_offset;
@@ -34,11 +34,11 @@ char *perf_header__find_event(u64 id);
 
 
 struct perf_header_attr *
-perf_header_attr__new(struct perf_counter_attr *attr);
+perf_header_attr__new(struct perf_event_attr *attr);
 void perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
 
 u64 perf_header__sample_type(struct perf_header *header);
-struct perf_counter_attr *
+struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header);
 
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 89172fd0038b..13ab4b842d49 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -10,7 +10,7 @@
 
 int					nr_counters;
 
-struct perf_counter_attr		attrs[MAX_COUNTERS];
+struct perf_event_attr		attrs[MAX_COUNTERS];
 
 struct event_symbol {
 	u8		type;
@@ -48,13 +48,13 @@ static struct event_symbol event_symbols[] = {
   { CSW(CPU_MIGRATIONS),	"cpu-migrations",	"migrations"	},
 };
 
-#define __PERF_COUNTER_FIELD(config, name) \
-	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
+#define __PERF_EVENT_FIELD(config, name) \
+	((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT)
 
-#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
-#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
-#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
-#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
+#define PERF_EVENT_RAW(config)	__PERF_EVENT_FIELD(config, RAW)
+#define PERF_EVENT_CONFIG(config)	__PERF_EVENT_FIELD(config, CONFIG)
+#define PERF_EVENT_TYPE(config)	__PERF_EVENT_FIELD(config, TYPE)
+#define PERF_EVENT_ID(config)		__PERF_EVENT_FIELD(config, EVENT)
 
 static const char *hw_event_names[] = {
 	"cycles",
@@ -352,7 +352,7 @@ static int parse_aliases(const char **str, const char *names[][MAX_ALIASES], int
 }
 
 static enum event_result
-parse_generic_hw_event(const char **str, struct perf_counter_attr *attr)
+parse_generic_hw_event(const char **str, struct perf_event_attr *attr)
 {
 	const char *s = *str;
 	int cache_type = -1, cache_op = -1, cache_result = -1;
@@ -417,7 +417,7 @@ parse_single_tracepoint_event(char *sys_name,
 			      const char *evt_name,
 			      unsigned int evt_length,
 			      char *flags,
-			      struct perf_counter_attr *attr,
+			      struct perf_event_attr *attr,
 			      const char **strp)
 {
 	char evt_path[MAXPATHLEN];
@@ -505,7 +505,7 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags)
 
 
 static enum event_result parse_tracepoint_event(const char **strp,
-				    struct perf_counter_attr *attr)
+				    struct perf_event_attr *attr)
 {
 	const char *evt_name;
 	char *flags;
@@ -563,7 +563,7 @@ static int check_events(const char *str, unsigned int i)
 }
 
 static enum event_result
-parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
+parse_symbolic_event(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	unsigned int i;
@@ -582,7 +582,7 @@ parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
 }
 
 static enum event_result
-parse_raw_event(const char **strp, struct perf_counter_attr *attr)
+parse_raw_event(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	u64 config;
@@ -601,7 +601,7 @@ parse_raw_event(const char **strp, struct perf_counter_attr *attr)
 }
 
 static enum event_result
-parse_numeric_event(const char **strp, struct perf_counter_attr *attr)
+parse_numeric_event(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	char *endp;
@@ -623,7 +623,7 @@ parse_numeric_event(const char **strp, struct perf_counter_attr *attr)
 }
 
 static enum event_result
-parse_event_modifier(const char **strp, struct perf_counter_attr *attr)
+parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	int eu = 1, ek = 1, eh = 1;
@@ -656,7 +656,7 @@ parse_event_modifier(const char **strp, struct perf_counter_attr *attr)
  * Symbolic names are (almost) exactly matched.
  */
 static enum event_result
-parse_event_symbols(const char **str, struct perf_counter_attr *attr)
+parse_event_symbols(const char **str, struct perf_event_attr *attr)
 {
 	enum event_result ret;
 
@@ -711,7 +711,7 @@ static void store_event_type(const char *orgname)
 
 int parse_events(const struct option *opt __used, const char *str, int unset __used)
 {
-	struct perf_counter_attr attr;
+	struct perf_event_attr attr;
 	enum event_result ret;
 
 	if (strchr(str, ':'))
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 60704c15961f..30c608112845 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -16,7 +16,7 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
 
 extern int			nr_counters;
 
-extern struct perf_counter_attr attrs[MAX_COUNTERS];
+extern struct perf_event_attr attrs[MAX_COUNTERS];
 
 extern const char *event_name(int ctr);
 extern const char *__event_name(int type, u64 config);
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 1fd824c1f1c4..af4b0573b37f 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -480,12 +480,12 @@ out:
 }
 
 static struct tracepoint_path *
-get_tracepoints_path(struct perf_counter_attr *pattrs, int nb_counters)
+get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events)
 {
 	struct tracepoint_path path, *ppath = &path;
 	int i;
 
-	for (i = 0; i < nb_counters; i++) {
+	for (i = 0; i < nb_events; i++) {
 		if (pattrs[i].type != PERF_TYPE_TRACEPOINT)
 			continue;
 		ppath->next = tracepoint_id_to_path(pattrs[i].config);
@@ -496,7 +496,7 @@ get_tracepoints_path(struct perf_counter_attr *pattrs, int nb_counters)
 
 	return path.next;
 }
-void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters)
+void read_tracing_data(struct perf_event_attr *pattrs, int nb_events)
 {
 	char buf[BUFSIZ];
 	struct tracepoint_path *tps;
@@ -530,7 +530,7 @@ void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters)
 	page_size = getpagesize();
 	write_or_die(&page_size, 4);
 
-	tps = get_tracepoints_path(pattrs, nb_counters);
+	tps = get_tracepoints_path(pattrs, nb_events);
 
 	read_header_files();
 	read_ftrace_files(tps);
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index d35ebf1e29ff..693f815c9429 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -240,6 +240,6 @@ unsigned long long
 raw_field_value(struct event *event, const char *name, void *data);
 void *raw_field_ptr(struct event *event, const char *name, void *data);
 
-void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters);
+void read_tracing_data(struct perf_event_attr *pattrs, int nb_events);
 
 #endif /* _TRACE_EVENTS_H */
-- 
cgit v1.2.3


From 57c0c15b5244320065374ad2c54f4fbec77a6428 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 21 Sep 2009 12:20:38 +0200
Subject: perf: Tidy up after the big rename

 - provide compatibility Kconfig entry for existing PERF_COUNTERS .config's

 - provide courtesy copy of old perf_counter.h, for user-space projects

 - small indentation fixups

 - fix up MAINTAINERS

 - fix small x86 printout fallout

 - fix up small PowerPC comment fallout (use 'counter' as in register)

Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 MAINTAINERS                      |   2 +-
 arch/powerpc/include/asm/paca.h  |   2 +-
 arch/powerpc/kernel/perf_event.c |  12 +-
 arch/x86/kernel/cpu/perf_event.c |  14 +-
 include/linux/perf_counter.h     | 441 +++++++++++++++++++++++++++++++++++++++
 include/linux/perf_event.h       |  98 ++++-----
 init/Kconfig                     |  37 +++-
 kernel/perf_event.c              |   4 +-
 8 files changed, 534 insertions(+), 76 deletions(-)
 create mode 100644 include/linux/perf_counter.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 43761a00e3f1..751a307dc44e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4000,7 +4000,7 @@ S:	Maintained
 F:	include/linux/delayacct.h
 F:	kernel/delayacct.c
 
-PERFORMANCE COUNTER SUBSYSTEM
+PERFORMANCE EVENTS SUBSYSTEM
 M:	Peter Zijlstra <a.p.zijlstra@chello.nl>
 M:	Paul Mackerras <paulus@samba.org>
 M:	Ingo Molnar <mingo@elte.hu>
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 154f405b642f..7d8514ceceae 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -122,7 +122,7 @@ struct paca_struct {
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
-	u8 perf_event_pending;	/* PM interrupt while soft-disabled */
+	u8 perf_event_pending;		/* PM interrupt while soft-disabled */
 
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index c98321fcb459..197b7d958796 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -41,7 +41,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
 struct power_pmu *ppmu;
 
 /*
- * Normally, to ignore kernel events we set the FCS (freeze events
+ * Normally, to ignore kernel events we set the FCS (freeze counters
  * in supervisor mode) bit in MMCR0, but if the kernel runs with the
  * hypervisor bit set in the MSR, or if we are running on a processor
  * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
@@ -159,7 +159,7 @@ void perf_event_print_debug(void)
 }
 
 /*
- * Read one performance monitor event (PMC).
+ * Read one performance monitor counter (PMC).
  */
 static unsigned long read_pmc(int idx)
 {
@@ -409,7 +409,7 @@ static void power_pmu_read(struct perf_event *event)
 		val = read_pmc(event->hw.idx);
 	} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
 
-	/* The events are only 32 bits wide */
+	/* The counters are only 32 bits wide */
 	delta = (val - prev) & 0xfffffffful;
 	atomic64_add(delta, &event->count);
 	atomic64_sub(delta, &event->hw.period_left);
@@ -543,7 +543,7 @@ void hw_perf_disable(void)
 		}
 
 		/*
-		 * Set the 'freeze events' bit.
+		 * Set the 'freeze counters' bit.
 		 * The barrier is to make sure the mtspr has been
 		 * executed and the PMU has frozen the events
 		 * before we return.
@@ -1124,7 +1124,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 }
 
 /*
- * A event has overflowed; update its count and record
+ * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
  * here so there is no possibility of being interrupted.
  */
@@ -1271,7 +1271,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
 
 	/*
 	 * Reset MMCR0 to its normal value.  This will set PMXE and
-	 * clear FC (freeze events) and PMAO (perf mon alert occurred)
+	 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
 	 * and thus allow interrupts to occur again.
 	 * XXX might want to use MSR.PM to keep the events frozen until
 	 * we get back out of this interrupt.
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0d03629fb1a5..a3c7adb06b78 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2081,13 +2081,13 @@ void __init init_hw_perf_events(void)
 	perf_events_lapic_init();
 	register_die_notifier(&perf_event_nmi_notifier);
 
-	pr_info("... version:                 %d\n",     x86_pmu.version);
-	pr_info("... bit width:               %d\n",     x86_pmu.event_bits);
-	pr_info("... generic events:        %d\n",     x86_pmu.num_events);
-	pr_info("... value mask:              %016Lx\n", x86_pmu.event_mask);
-	pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
-	pr_info("... fixed-purpose events:  %d\n",     x86_pmu.num_events_fixed);
-	pr_info("... event mask:            %016Lx\n", perf_event_mask);
+	pr_info("... version:                %d\n",     x86_pmu.version);
+	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
+	pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
+	pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
+	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
+	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
+	pr_info("... event mask:             %016Lx\n", perf_event_mask);
 }
 
 static inline void x86_pmu_read(struct perf_event *event)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
new file mode 100644
index 000000000000..368bd70f1d2d
--- /dev/null
+++ b/include/linux/perf_counter.h
@@ -0,0 +1,441 @@
+/*
+ *  NOTE: this file will be removed in a future kernel release, it is
+ *  provided as a courtesy copy of user-space code that relies on the
+ *  old (pre-rename) symbols and constants.
+ *
+ *  Performance events:
+ *
+ *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
+ *
+ *  Data type definitions, declarations, prototypes.
+ *
+ *    Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_PERF_COUNTER_H
+#define _LINUX_PERF_COUNTER_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/byteorder.h>
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * attr.type
+ */
+enum perf_type_id {
+	PERF_TYPE_HARDWARE			= 0,
+	PERF_TYPE_SOFTWARE			= 1,
+	PERF_TYPE_TRACEPOINT			= 2,
+	PERF_TYPE_HW_CACHE			= 3,
+	PERF_TYPE_RAW				= 4,
+
+	PERF_TYPE_MAX,				/* non-ABI */
+};
+
+/*
+ * Generalized performance counter event types, used by the
+ * attr.event_id parameter of the sys_perf_counter_open()
+ * syscall:
+ */
+enum perf_hw_id {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+
+	PERF_COUNT_HW_MAX,			/* non-ABI */
+};
+
+/*
+ * Generalized hardware cache counters:
+ *
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
+ *       { read, write, prefetch } x
+ *       { accesses, misses }
+ */
+enum perf_hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D			= 0,
+	PERF_COUNT_HW_CACHE_L1I			= 1,
+	PERF_COUNT_HW_CACHE_LL			= 2,
+	PERF_COUNT_HW_CACHE_DTLB		= 3,
+	PERF_COUNT_HW_CACHE_ITLB		= 4,
+	PERF_COUNT_HW_CACHE_BPU			= 5,
+
+	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ		= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
+
+	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
+	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
+
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
+};
+
+/*
+ * Special "software" counters provided by the kernel, even if the hardware
+ * does not support performance counters. These counters measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+	PERF_COUNT_SW_CPU_CLOCK			= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+
+	PERF_COUNT_SW_MAX,			/* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.sample_type to request information
+ * in the overflow packets.
+ */
+enum perf_counter_sample_format {
+	PERF_SAMPLE_IP				= 1U << 0,
+	PERF_SAMPLE_TID				= 1U << 1,
+	PERF_SAMPLE_TIME			= 1U << 2,
+	PERF_SAMPLE_ADDR			= 1U << 3,
+	PERF_SAMPLE_READ			= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
+	PERF_SAMPLE_ID				= 1U << 6,
+	PERF_SAMPLE_CPU				= 1U << 7,
+	PERF_SAMPLE_PERIOD			= 1U << 8,
+	PERF_SAMPLE_STREAM_ID			= 1U << 9,
+	PERF_SAMPLE_RAW				= 1U << 10,
+
+	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
+};
+
+/*
+ * The format of the data returned by read() on a perf counter fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ *	{ u64		value;
+ *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		id;           } && PERF_FORMAT_ID
+ *	} && !PERF_FORMAT_GROUP
+ *
+ *	{ u64		nr;
+ *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		value;
+ *	    { u64	id;           } && PERF_FORMAT_ID
+ *	  }		cntr[nr];
+ *	} && PERF_FORMAT_GROUP
+ * };
+ */
+enum perf_counter_read_format {
+	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
+	PERF_FORMAT_ID				= 1U << 2,
+	PERF_FORMAT_GROUP			= 1U << 3,
+
+	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
+};
+
+#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
+
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_attr {
+
+	/*
+	 * Major type: hardware/software/tracepoint/etc.
+	 */
+	__u32			type;
+
+	/*
+	 * Size of the attr structure, for fwd/bwd compat.
+	 */
+	__u32			size;
+
+	/*
+	 * Type specific configuration information.
+	 */
+	__u64			config;
+
+	union {
+		__u64		sample_period;
+		__u64		sample_freq;
+	};
+
+	__u64			sample_type;
+	__u64			read_format;
+
+	__u64			disabled       :  1, /* off by default        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
+				mmap           :  1, /* include mmap data     */
+				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
+				inherit_stat   :  1, /* per task counts       */
+				enable_on_exec :  1, /* next exec enables     */
+				task           :  1, /* trace fork/exit       */
+				watermark      :  1, /* wakeup_watermark      */
+
+				__reserved_1   : 49;
+
+	union {
+		__u32		wakeup_events;	  /* wakeup every n events */
+		__u32		wakeup_watermark; /* bytes before wakeup   */
+	};
+	__u32			__reserved_2;
+
+	__u64			__reserved_3;
+};
+
+/*
+ * Ioctls that can be done on a perf counter fd:
+ */
+#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
+#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
+#define PERF_COUNTER_IOC_REFRESH	_IO ('$', 2)
+#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
+#define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
+#define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
+
+enum perf_counter_ioc_flags {
+	PERF_IOC_FLAG_GROUP		= 1U << 0,
+};
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_counter_mmap_page {
+	__u32	version;		/* version number of this structure */
+	__u32	compat_version;		/* lowest version this is compat with */
+
+	/*
+	 * Bits needed to read the hw counters in user-space.
+	 *
+	 *   u32 seq;
+	 *   s64 count;
+	 *
+	 *   do {
+	 *     seq = pc->lock;
+	 *
+	 *     barrier()
+	 *     if (pc->index) {
+	 *       count = pmc_read(pc->index - 1);
+	 *       count += pc->offset;
+	 *     } else
+	 *       goto regular_read;
+	 *
+	 *     barrier();
+	 *   } while (pc->lock != seq);
+	 *
+	 * NOTE: for obvious reason this only works on self-monitoring
+	 *       processes.
+	 */
+	__u32	lock;			/* seqlock for synchronization */
+	__u32	index;			/* hardware counter identifier */
+	__s64	offset;			/* add to hardware counter value */
+	__u64	time_enabled;		/* time counter active */
+	__u64	time_running;		/* time counter on cpu */
+
+		/*
+		 * Hole for extension of the self monitor capabilities
+		 */
+
+	__u64	__reserved[123];	/* align to 1k */
+
+	/*
+	 * Control data for the mmap() data buffer.
+	 *
+	 * User-space reading the @data_head value should issue an rmb(), on
+	 * SMP capable platforms, after reading this value -- see
+	 * perf_counter_wakeup().
+	 *
+	 * When the mapping is PROT_WRITE the @data_tail value should be
+	 * written by userspace to reflect the last read data. In this case
+	 * the kernel will not over-write unread data.
+	 */
+	__u64   data_head;		/* head in the data section */
+	__u64	data_tail;		/* user-space written tail */
+};
+
+#define PERF_EVENT_MISC_CPUMODE_MASK		(3 << 0)
+#define PERF_EVENT_MISC_CPUMODE_UNKNOWN		(0 << 0)
+#define PERF_EVENT_MISC_KERNEL			(1 << 0)
+#define PERF_EVENT_MISC_USER			(2 << 0)
+#define PERF_EVENT_MISC_HYPERVISOR		(3 << 0)
+
+struct perf_event_header {
+	__u32	type;
+	__u16	misc;
+	__u16	size;
+};
+
+enum perf_event_type {
+
+	/*
+	 * The MMAP events record the PROT_EXEC mappings so that we can
+	 * correlate userspace IPs to code. They have the following structure:
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	char				filename[];
+	 * };
+	 */
+	PERF_EVENT_MMAP			= 1,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				id;
+	 *	u64				lost;
+	 * };
+	 */
+	PERF_EVENT_LOST			= 2,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	char				comm[];
+	 * };
+	 */
+	PERF_EVENT_COMM			= 3,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	u64				time;
+	 * };
+	 */
+	PERF_EVENT_EXIT			= 4,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				time;
+	 *	u64				id;
+	 *	u64				stream_id;
+	 * };
+	 */
+	PERF_EVENT_THROTTLE		= 5,
+	PERF_EVENT_UNTHROTTLE		= 6,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	{ u64				time;     } && PERF_SAMPLE_TIME
+	 * };
+	 */
+	PERF_EVENT_FORK			= 7,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, tid;
+	 *
+	 *	struct read_format		values;
+	 * };
+	 */
+	PERF_EVENT_READ			= 8,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
+	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 *	{ u64			time;     } && PERF_SAMPLE_TIME
+	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
+	 *	{ u64			id;	  } && PERF_SAMPLE_ID
+	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
+	 *
+	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
+	 *
+	 *	{ u64			nr,
+	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+	 *
+	 *	#
+	 *	# The RAW record below is opaque data wrt the ABI
+	 *	#
+	 *	# That is, the ABI doesn't make any promises wrt to
+	 *	# the stability of its content, it may vary depending
+	 *	# on event, hardware, kernel version and phase of
+	 *	# the moon.
+	 *	#
+	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+	 *	#
+	 *
+	 *	{ u32			size;
+	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
+	 * };
+	 */
+	PERF_EVENT_SAMPLE		= 9,
+
+	PERF_EVENT_MAX,			/* non-ABI */
+};
+
+enum perf_callchain_context {
+	PERF_CONTEXT_HV			= (__u64)-32,
+	PERF_CONTEXT_KERNEL		= (__u64)-128,
+	PERF_CONTEXT_USER		= (__u64)-512,
+
+	PERF_CONTEXT_GUEST		= (__u64)-2048,
+	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
+	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
+
+	PERF_CONTEXT_MAX		= (__u64)-4095,
+};
+
+#define PERF_FLAG_FD_NO_GROUP		(1U << 0)
+#define PERF_FLAG_FD_OUTPUT		(1U << 1)
+
+/*
+ * In case some app still references the old symbols:
+ */
+
+#define __NR_perf_counter_open		__NR_perf_event_open
+
+#define PR_TASK_PERF_COUNTERS_DISABLE	PR_TASK_PERF_EVENTS_DISABLE
+#define PR_TASK_PERF_COUNTERS_ENABLE	PR_TASK_PERF_EVENTS_ENABLE
+
+#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ae9d9ed6df2a..acefaf71e6dd 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1,15 +1,15 @@
 /*
- *  Performance events:
+ * Performance events:
  *
  *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
  *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
  *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
  *
- *  Data type definitions, declarations, prototypes.
+ * Data type definitions, declarations, prototypes.
  *
  *    Started by: Thomas Gleixner and Ingo Molnar
  *
- *  For licencing details see kernel-base/COPYING
+ * For licencing details see kernel-base/COPYING
  */
 #ifndef _LINUX_PERF_EVENT_H
 #define _LINUX_PERF_EVENT_H
@@ -131,19 +131,19 @@ enum perf_event_sample_format {
  * as specified by attr.read_format:
  *
  * struct read_format {
- * 	{ u64		value;
- * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
- * 	  { u64		id;           } && PERF_FORMAT_ID
- * 	} && !PERF_FORMAT_GROUP
+ *	{ u64		value;
+ *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		id;           } && PERF_FORMAT_ID
+ *	} && !PERF_FORMAT_GROUP
  *
- * 	{ u64		nr;
- * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
- * 	  { u64		value;
- * 	    { u64	id;           } && PERF_FORMAT_ID
- * 	  }		cntr[nr];
- * 	} && PERF_FORMAT_GROUP
+ *	{ u64		nr;
+ *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		value;
+ *	    { u64	id;           } && PERF_FORMAT_ID
+ *	  }		cntr[nr];
+ *	} && PERF_FORMAT_GROUP
  * };
  */
 enum perf_event_read_format {
@@ -152,7 +152,7 @@ enum perf_event_read_format {
 	PERF_FORMAT_ID				= 1U << 2,
 	PERF_FORMAT_GROUP			= 1U << 3,
 
-	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
+	PERF_FORMAT_MAX = 1U << 4,		/* non-ABI */
 };
 
 #define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
@@ -216,8 +216,8 @@ struct perf_event_attr {
  * Ioctls that can be done on a perf event fd:
  */
 #define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
-#define PERF_EVENT_IOC_DISABLE	_IO ('$', 1)
-#define PERF_EVENT_IOC_REFRESH	_IO ('$', 2)
+#define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
 #define PERF_EVENT_IOC_RESET		_IO ('$', 3)
 #define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, u64)
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
@@ -314,9 +314,9 @@ enum perf_event_type {
 
 	/*
 	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u64				id;
-	 * 	u64				lost;
+	 *	struct perf_event_header	header;
+	 *	u64				id;
+	 *	u64				lost;
 	 * };
 	 */
 	PERF_RECORD_LOST			= 2,
@@ -383,23 +383,23 @@ enum perf_event_type {
 	 *	{ u64			id;	  } && PERF_SAMPLE_ID
 	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
 	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
-	 * 	{ u64			period;   } && PERF_SAMPLE_PERIOD
+	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
 	 *
 	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
 	 *
 	 *	{ u64			nr,
 	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
 	 *
-	 * 	#
-	 * 	# The RAW record below is opaque data wrt the ABI
-	 * 	#
-	 * 	# That is, the ABI doesn't make any promises wrt to
-	 * 	# the stability of its content, it may vary depending
-	 * 	# on event_id, hardware, kernel version and phase of
-	 * 	# the moon.
-	 * 	#
-	 * 	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
-	 * 	#
+	 *	#
+	 *	# The RAW record below is opaque data wrt the ABI
+	 *	#
+	 *	# That is, the ABI doesn't make any promises wrt to
+	 *	# the stability of its content, it may vary depending
+	 *	# on event, hardware, kernel version and phase of
+	 *	# the moon.
+	 *	#
+	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+	 *	#
 	 *
 	 *	{ u32			size;
 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
@@ -503,10 +503,10 @@ struct pmu {
  * enum perf_event_active_state - the states of a event
  */
 enum perf_event_active_state {
-	PERF_EVENT_STATE_ERROR	= -2,
+	PERF_EVENT_STATE_ERROR		= -2,
 	PERF_EVENT_STATE_OFF		= -1,
 	PERF_EVENT_STATE_INACTIVE	=  0,
-	PERF_EVENT_STATE_ACTIVE	=  1,
+	PERF_EVENT_STATE_ACTIVE		=  1,
 };
 
 struct file;
@@ -529,7 +529,7 @@ struct perf_mmap_data {
 
 	long				watermark;	/* wakeup watermark  */
 
-	struct perf_event_mmap_page   *user_page;
+	struct perf_event_mmap_page	*user_page;
 	void				*data_pages[0];
 };
 
@@ -694,14 +694,14 @@ struct perf_cpu_context {
 };
 
 struct perf_output_handle {
-	struct perf_event	*event;
-	struct perf_mmap_data	*data;
-	unsigned long		head;
-	unsigned long		offset;
-	int			nmi;
-	int			sample;
-	int			locked;
-	unsigned long		flags;
+	struct perf_event		*event;
+	struct perf_mmap_data		*data;
+	unsigned long			head;
+	unsigned long			offset;
+	int				nmi;
+	int				sample;
+	int				locked;
+	unsigned long			flags;
 };
 
 #ifdef CONFIG_PERF_EVENTS
@@ -829,22 +829,22 @@ static inline void
 perf_event_task_sched_out(struct task_struct *task,
 			    struct task_struct *next, int cpu)		{ }
 static inline void
-perf_event_task_tick(struct task_struct *task, int cpu)		{ }
+perf_event_task_tick(struct task_struct *task, int cpu)			{ }
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
-static inline void perf_event_do_pending(void)			{ }
-static inline void perf_event_print_debug(void)			{ }
+static inline void perf_event_do_pending(void)				{ }
+static inline void perf_event_print_debug(void)				{ }
 static inline void perf_disable(void)					{ }
 static inline void perf_enable(void)					{ }
-static inline int perf_event_task_disable(void)	{ return -EINVAL; }
-static inline int perf_event_task_enable(void)	{ return -EINVAL; }
+static inline int perf_event_task_disable(void)				{ return -EINVAL; }
+static inline int perf_event_task_enable(void)				{ return -EINVAL; }
 
 static inline void
 perf_sw_event(u32 event_id, u64 nr, int nmi,
 		     struct pt_regs *regs, u64 addr)			{ }
 
-static inline void perf_event_mmap(struct vm_area_struct *vma)	{ }
+static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
diff --git a/init/Kconfig b/init/Kconfig
index cfdf5c322806..706728be312f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -920,26 +920,31 @@ config HAVE_PERF_EVENTS
 	help
 	  See tools/perf/design.txt for details.
 
-menu "Performance Counters"
+menu "Kernel Performance Events And Counters"
 
 config PERF_EVENTS
-	bool "Kernel Performance Counters"
-	default y if PROFILING
+	bool "Kernel performance events and counters"
+	default y if (PROFILING || PERF_COUNTERS)
 	depends on HAVE_PERF_EVENTS
 	select ANON_INODES
 	help
-	  Enable kernel support for performance counter hardware.
+	  Enable kernel support for various performance events provided
+	  by software and hardware.
 
-	  Performance counters are special hardware registers available
-	  on most modern CPUs. These registers count the number of certain
+	  Software events are supported either build-in or via the
+	  use of generic tracepoints.
+
+	  Most modern CPUs support performance events via performance
+	  counter registers. These registers count the number of certain
 	  types of hw events: such as instructions executed, cachemisses
 	  suffered, or branches mis-predicted - without slowing down the
 	  kernel or applications. These registers can also trigger interrupts
 	  when a threshold number of events have passed - and can thus be
 	  used to profile the code that runs on that CPU.
 
-	  The Linux Performance Counter subsystem provides an abstraction of
-	  these hardware capabilities, available via a system call. It
+	  The Linux Performance Event subsystem provides an abstraction of
+	  these software and hardware cevent apabilities, available via a
+	  system call and used by the "perf" utility in tools/perf/. It
 	  provides per task and per CPU counters, and it provides event
 	  capabilities on top of those.
 
@@ -950,14 +955,26 @@ config EVENT_PROFILE
 	depends on PERF_EVENTS && EVENT_TRACING
 	default y
 	help
-	 Allow the use of tracepoints as software performance counters.
+	 Allow the use of tracepoints as software performance events.
 
-	 When this is enabled, you can create perf counters based on
+	 When this is enabled, you can create perf events based on
 	 tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID
 	 found in debugfs://tracing/events/*/*/id. (The -e/--events
 	 option to the perf tool can parse and interpret symbolic
 	 tracepoints, in the subsystem:tracepoint_name format.)
 
+config PERF_COUNTERS
+	bool "Kernel performance counters (old config option)"
+	depends on HAVE_PERF_EVENTS
+	help
+	  This config has been obsoleted by the PERF_EVENTS
+	  config option - please see that one for details.
+
+	  It has no effect on the kernel whether you enable
+	  it or not, it is a compatibility placeholder.
+
+	  Say N if unsure.
+
 endmenu
 
 config VM_EVENT_COUNTERS
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 6e8b99a04e1e..76ac4db405e9 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1,12 +1,12 @@
 /*
- * Performance event core code
+ * Performance events core code:
  *
  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
  *
- *  For licensing details see kernel-base/COPYING
+ * For licensing details see kernel-base/COPYING
  */
 
 #include <linux/fs.h>
-- 
cgit v1.2.3


From bcf56442429a15bdd6e1d81a9d4c89f93a44fdf7 Mon Sep 17 00:00:00 2001
From: GeunSik Lim <leemgs1@gmail.com>
Date: Tue, 16 Jun 2009 10:26:25 +0200
Subject: trivial: change address of the libcap source.

This is patch to change ftp site of the libcap source.
"ftp://linux.kernel.org" address does not exist.

Signed-off-by: GeunSik Lim <geunsik.lim@samsung.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/capability.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index c3021105edc0..c8f2a5f70ed5 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -7,7 +7,7 @@
  *
  * See here for the libcap library ("POSIX draft" compliance):
  *
- * ftp://linux.kernel.org/pub/linux/libs/security/linux-privs/kernel-2.6/
+ * ftp://www.kernel.org/pub/linux/libs/security/linux-privs/kernel-2.6/
  */
 
 #ifndef _LINUX_CAPABILITY_H
-- 
cgit v1.2.3


From 47a0dfaad9eb82b16193477cf99c2462af03c329 Mon Sep 17 00:00:00 2001
From: Ori Avtalion <ori@avtalion.name>
Date: Tue, 16 Jun 2009 12:17:53 +0300
Subject: trivial: fix typo in namei.h comment

Signed-off-by: Ori Avtalion <ori@avtalion.name>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/namei.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index d870ae2faedc..ec0f607b364a 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -40,7 +40,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
  *  - follow links at the end
  *  - require a directory
  *  - ending slashes ok even for nonexistent files
- *  - internal "there are more path compnents" flag
+ *  - internal "there are more path components" flag
  *  - locked when lookup done with dcache_lock held
  *  - dentry cache is untrusted; force a real lookup
  */
-- 
cgit v1.2.3


From b9049df5a0e7f35456c06b949b08b898b9c2e7bc Mon Sep 17 00:00:00 2001
From: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
Date: Tue, 23 Jun 2009 12:09:29 +0200
Subject: Change "useing" -> "using".

Signed-off-by: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 6273fa97b527..7ef0c7b94f31 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -94,7 +94,7 @@ struct execute_work {
 /*
  * initialize all of a work item in one go
  *
- * NOTE! No point in using "atomic_long_set()": useing a direct
+ * NOTE! No point in using "atomic_long_set()": using a direct
  * assignment of the work data initializer allows the compiler
  * to generate better code.
  */
-- 
cgit v1.2.3


From 411c94038594b2a3fd123d09bdec3fe2500e383d Mon Sep 17 00:00:00 2001
From: Anand Gadiyar <gadiyar@ti.com>
Date: Tue, 7 Jul 2009 15:24:23 +0530
Subject: trivial: fix typo "for for" in multiple files

trivial: fix typo "for for" in multiple files

Signed-off-by: Anand Gadiyar <gadiyar@ti.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/filesystems/nfsroot.txt            | 2 +-
 Documentation/powerpc/dts-bindings/marvell.txt   | 2 +-
 arch/blackfin/mach-bf538/include/mach/defBF539.h | 2 +-
 arch/powerpc/kernel/udbg_16550.c                 | 2 +-
 arch/powerpc/platforms/powermac/udbg_scc.c       | 2 +-
 drivers/edac/edac_core.h                         | 2 +-
 drivers/infiniband/hw/ipath/ipath_iba6110.c      | 2 +-
 drivers/isdn/i4l/isdn_common.c                   | 4 ++--
 drivers/md/md.h                                  | 2 +-
 drivers/net/bnx2x_reg.h                          | 2 +-
 drivers/net/rionet.c                             | 2 +-
 drivers/usb/host/ehci-pci.c                      | 2 +-
 drivers/usb/host/ehci.h                          | 2 +-
 fs/xfs/xfs_fs.h                                  | 2 +-
 include/linux/usb.h                              | 2 +-
 15 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/nfsroot.txt b/Documentation/filesystems/nfsroot.txt
index 68baddf3c3e0..3ba0b945aaf8 100644
--- a/Documentation/filesystems/nfsroot.txt
+++ b/Documentation/filesystems/nfsroot.txt
@@ -105,7 +105,7 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>
 		the client address and this parameter is NOT empty only
 		replies from the specified server are accepted.
 
-		Only required for for NFS root. That is autoconfiguration
+		Only required for NFS root. That is autoconfiguration
 		will not be triggered if it is missing and NFS root is not
 		in operation.
 
diff --git a/Documentation/powerpc/dts-bindings/marvell.txt b/Documentation/powerpc/dts-bindings/marvell.txt
index 3708a2fd4747..f1533d91953a 100644
--- a/Documentation/powerpc/dts-bindings/marvell.txt
+++ b/Documentation/powerpc/dts-bindings/marvell.txt
@@ -32,7 +32,7 @@ prefixed with the string "marvell,", for Marvell Technology Group Ltd.
       devices.  This field represents the number of cells needed to
       represent the address of the memory-mapped registers of devices
       within the system controller chip.
-    - #size-cells : Size representation for for the memory-mapped
+    - #size-cells : Size representation for the memory-mapped
       registers within the system controller chip.
     - #interrupt-cells : Defines the width of cells used to represent
       interrupts.
diff --git a/arch/blackfin/mach-bf538/include/mach/defBF539.h b/arch/blackfin/mach-bf538/include/mach/defBF539.h
index bdc330cd0e1c..1c58914a8740 100644
--- a/arch/blackfin/mach-bf538/include/mach/defBF539.h
+++ b/arch/blackfin/mach-bf538/include/mach/defBF539.h
@@ -2325,7 +2325,7 @@
 #define	AMBEN_B0_B1	0x0004	/* Enable Asynchronous Memory Banks 0 &	1 only */
 #define	AMBEN_B0_B1_B2	0x0006	/* Enable Asynchronous Memory Banks 0, 1, and 2 */
 #define	AMBEN_ALL	0x0008	/* Enable Asynchronous Memory Banks (all) 0, 1,	2, and 3 */
-#define	CDPRIO		0x0100	/* DMA has priority over core for for external accesses */
+#define	CDPRIO		0x0100	/* DMA has priority over core for external accesses */
 
 /* EBIU_AMGCTL Bit Positions */
 #define	AMCKEN_P		0x0000	/* Enable CLKOUT */
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index acb74a17bbbf..b4b167b33643 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -1,5 +1,5 @@
 /*
- * udbg for for NS16550 compatable serial ports
+ * udbg for NS16550 compatable serial ports
  *
  * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
  *
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
index 572771fd8463..9490157da62e 100644
--- a/arch/powerpc/platforms/powermac/udbg_scc.c
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -1,5 +1,5 @@
 /*
- * udbg for for zilog scc ports as found on Apple PowerMacs
+ * udbg for zilog scc ports as found on Apple PowerMacs
  *
  * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
  *
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 871c13b4c148..12f355cafdbe 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -286,7 +286,7 @@ enum scrub_type {
  *			is irrespective of the memory devices being mounted
  *			on both sides of the memory stick.
  *
- * Socket set:		All of the memory sticks that are required for for
+ * Socket set:		All of the memory sticks that are required for
  *			a single memory access or all of the memory sticks
  *			spanned by a chip-select row.  A single socket set
  *			has two chip-select rows and if double-sided sticks
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 02831ad070b8..4bd39c8af80f 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -809,7 +809,7 @@ static int ipath_setup_ht_reset(struct ipath_devdata *dd)
  * errors.  We only bother to do this at load time, because it's OK if
  * it happened before we were loaded (first time after boot/reset),
  * but any time after that, it's fatal anyway.  Also need to not check
- * for for upper byte errors if we are in 8 bit mode, so figure out
+ * for upper byte errors if we are in 8 bit mode, so figure out
  * our width.  For now, at least, also complain if it's 8 bit.
  */
 static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index 7188c59a76ff..adb1e8c36b46 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -761,7 +761,7 @@ isdn_getnum(char **p)
  * Be aware that this is not an atomic operation when sleep != 0, even though 
  * interrupts are turned off! Well, like that we are currently only called
  * on behalf of a read system call on raw device files (which are documented
- * to be dangerous and for for debugging purpose only). The inode semaphore
+ * to be dangerous and for debugging purpose only). The inode semaphore
  * takes care that this is not called for the same minor device number while
  * we are sleeping, but access is not serialized against simultaneous read()
  * from the corresponding ttyI device. Can other ugly events, like changes
@@ -873,7 +873,7 @@ isdn_readbchan(int di, int channel, u_char * buf, u_char * fp, int len, wait_que
  * Be aware that this is not an atomic operation when sleep != 0, even though
  * interrupts are turned off! Well, like that we are currently only called
  * on behalf of a read system call on raw device files (which are documented
- * to be dangerous and for for debugging purpose only). The inode semaphore
+ * to be dangerous and for debugging purpose only). The inode semaphore
  * takes care that this is not called for the same minor device number while
  * we are sleeping, but access is not serialized against simultaneous read()
  * from the corresponding ttyI device. Can other ugly events, like changes
diff --git a/drivers/md/md.h b/drivers/md/md.h
index f8fc188bc762..f55d2ff95133 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -201,7 +201,7 @@ struct mddev_s
 	 * INTR:     resync needs to be aborted for some reason
 	 * DONE:     thread is done and is waiting to be reaped
 	 * REQUEST:  user-space has requested a sync (used with SYNC)
-	 * CHECK:    user-space request for for check-only, no repair
+	 * CHECK:    user-space request for check-only, no repair
 	 * RESHAPE:  A reshape is happening
 	 *
 	 * If neither SYNC or RESHAPE are set, then it is a recovery.
diff --git a/drivers/net/bnx2x_reg.h b/drivers/net/bnx2x_reg.h
index 0695be14cf91..aa76cbada5e2 100644
--- a/drivers/net/bnx2x_reg.h
+++ b/drivers/net/bnx2x_reg.h
@@ -3122,7 +3122,7 @@
    The fields are:[4:0] - tail pointer; [10:5] - Link List size; 15:11] -
    header pointer. */
 #define TCM_REG_XX_TABLE					 0x50240
-/* [RW 4] Load value for for cfc ac credit cnt. */
+/* [RW 4] Load value for cfc ac credit cnt. */
 #define TM_REG_CFC_AC_CRDCNT_VAL				 0x164208
 /* [RW 4] Load value for cfc cld credit cnt. */
 #define TM_REG_CFC_CLD_CRDCNT_VAL				 0x164210
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index bc98e7f69ee9..ede937ee50c7 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -72,7 +72,7 @@ static int rionet_check = 0;
 static int rionet_capable = 1;
 
 /*
- * This is a fast lookup table for for translating TX
+ * This is a fast lookup table for translating TX
  * Ethernet packets into a destination RIO device. It
  * could be made into a hash table to save memory depending
  * on system trade-offs.
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index c2f1b7df918c..b5b83c43898a 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -242,7 +242,7 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
 	 * System suspend currently expects to be able to suspend the entire
 	 * device tree, device-at-a-time.  If we failed selective suspend
 	 * reports, system suspend would fail; so the root hub code must claim
-	 * success.  That's lying to usbcore, and it matters for for runtime
+	 * success.  That's lying to usbcore, and it matters for runtime
 	 * PM scenarios with selective suspend and remote wakeup...
 	 */
 	if (ehci->no_selective_suspend && device_can_wakeup(&pdev->dev))
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index 2bfff30f4704..48b9e889a18b 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -37,7 +37,7 @@ typedef __u16 __bitwise __hc16;
 #define __hc16	__le16
 #endif
 
-/* statistics can be kept for for tuning/monitoring */
+/* statistics can be kept for tuning/monitoring */
 struct ehci_stats {
 	/* irq usage */
 	unsigned long		normal;
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index c4ea51b55dce..f52ac276277e 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -117,7 +117,7 @@ struct getbmapx {
 #define BMV_IF_VALID	\
 	(BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC)
 
-/*	bmv_oflags values - returned for for each non-header segment */
+/*	bmv_oflags values - returned for each non-header segment */
 #define BMV_OF_PREALLOC		0x1	/* segment = unwritten pre-allocation */
 #define BMV_OF_DELALLOC		0x2	/* segment = delayed allocation */
 #define BMV_OF_LAST		0x4	/* segment is the last in the file */
diff --git a/include/linux/usb.h b/include/linux/usb.h
index a8fe05f224e5..19fabc487beb 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1071,7 +1071,7 @@ typedef void (*usb_complete_t)(struct urb *);
  * @start_frame: Returns the initial frame for isochronous transfers.
  * @number_of_packets: Lists the number of ISO transfer buffers.
  * @interval: Specifies the polling interval for interrupt or isochronous
- *	transfers.  The units are frames (milliseconds) for for full and low
+ *	transfers.  The units are frames (milliseconds) for full and low
  *	speed devices, and microframes (1/8 millisecond) for highspeed ones.
  * @error_count: Returns the number of ISO transfers that reported errors.
  * @context: For use in completion functions.  This normally points to
-- 
cgit v1.2.3


From fd589a8f0a13f53a2dd580b1fe170633cf6b095f Mon Sep 17 00:00:00 2001
From: Anand Gadiyar <gadiyar@ti.com>
Date: Thu, 16 Jul 2009 17:13:03 +0200
Subject: trivial: fix typo "to to" in multiple files

Signed-off-by: Anand Gadiyar <gadiyar@ti.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/hwmon/pc87427                  | 2 +-
 Documentation/networking/regulatory.txt      | 2 +-
 Documentation/scsi/scsi_fc_transport.txt     | 2 +-
 arch/ia64/ia32/sys_ia32.c                    | 2 +-
 arch/um/include/shared/ptrace_user.h         | 2 +-
 drivers/char/agp/uninorth-agp.c              | 2 +-
 drivers/gpu/drm/mga/mga_state.c              | 4 ++--
 drivers/lguest/page_tables.c                 | 2 +-
 drivers/media/video/gspca/m5602/m5602_core.c | 2 +-
 drivers/mmc/host/mxcmmc.c                    | 2 +-
 drivers/mtd/maps/ixp2000.c                   | 2 +-
 drivers/mtd/ubi/eba.c                        | 2 +-
 drivers/mtd/ubi/ubi.h                        | 2 +-
 drivers/net/bonding/bond_3ad.c               | 2 +-
 drivers/net/e1000/e1000_hw.c                 | 2 +-
 drivers/net/wireless/zd1211rw/zd_chip.c      | 2 +-
 drivers/scsi/megaraid/megaraid_sas.c         | 2 +-
 drivers/scsi/qla4xxx/ql4_os.c                | 4 ++--
 drivers/staging/rt2860/rtmp.h                | 2 +-
 drivers/usb/serial/cypress_m8.h              | 2 +-
 drivers/usb/serial/io_edgeport.c             | 2 +-
 drivers/usb/serial/kl5kusb105.c              | 2 +-
 fs/ext4/inode.c                              | 2 +-
 fs/gfs2/rgrp.c                               | 2 +-
 fs/ntfs/layout.h                             | 2 +-
 include/acpi/actypes.h                       | 2 +-
 include/acpi/platform/acgcc.h                | 2 +-
 include/rdma/ib_cm.h                         | 2 +-
 kernel/tracepoint.c                          | 2 +-
 lib/zlib_deflate/deflate.c                   | 4 ++--
 net/rxrpc/ar-call.c                          | 2 +-
 net/sched/sch_hfsc.c                         | 2 +-
 32 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/Documentation/hwmon/pc87427 b/Documentation/hwmon/pc87427
index d1ebbe510f35..db5cc1227a83 100644
--- a/Documentation/hwmon/pc87427
+++ b/Documentation/hwmon/pc87427
@@ -34,5 +34,5 @@ Fan rotation speeds are reported as 14-bit values from a gated clock
 signal. Speeds down to 83 RPM can be measured.
 
 An alarm is triggered if the rotation speed drops below a programmable
-limit. Another alarm is triggered if the speed is too low to to be measured
+limit. Another alarm is triggered if the speed is too low to be measured
 (including stalled or missing fan).
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
index eaa1a25946c1..ee31369e9e5b 100644
--- a/Documentation/networking/regulatory.txt
+++ b/Documentation/networking/regulatory.txt
@@ -96,7 +96,7 @@ Example code - drivers hinting an alpha2:
 
 This example comes from the zd1211rw device driver. You can start
 by having a mapping of your device's EEPROM country/regulatory
-domain value to to a specific alpha2 as follows:
+domain value to a specific alpha2 as follows:
 
 static struct zd_reg_alpha2_map reg_alpha2_map[] = {
 	{ ZD_REGDOMAIN_FCC, "US" },
diff --git a/Documentation/scsi/scsi_fc_transport.txt b/Documentation/scsi/scsi_fc_transport.txt
index d7f181701dc2..aec6549ab097 100644
--- a/Documentation/scsi/scsi_fc_transport.txt
+++ b/Documentation/scsi/scsi_fc_transport.txt
@@ -378,7 +378,7 @@ Vport Disable/Enable:
       int vport_disable(struct fc_vport *vport, bool disable)
 
     where:
-      vport:    Is vport to to be enabled or disabled
+      vport:    Is vport to be enabled or disabled
       disable:  If "true", the vport is to be disabled.
                 If "false", the vport is to be enabled.
 
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 16ef61a91d95..625ed8f76fce 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1270,7 +1270,7 @@ putreg (struct task_struct *child, int regno, unsigned int value)
 	      case PT_CS:
 		if (value != __USER_CS)
 			printk(KERN_ERR
-			       "ia32.putreg: attempt to to set invalid segment register %d = %x\n",
+			       "ia32.putreg: attempt to set invalid segment register %d = %x\n",
 			       regno, value);
 		break;
 	      default:
diff --git a/arch/um/include/shared/ptrace_user.h b/arch/um/include/shared/ptrace_user.h
index 4bce6e012889..7fd8539bc19a 100644
--- a/arch/um/include/shared/ptrace_user.h
+++ b/arch/um/include/shared/ptrace_user.h
@@ -29,7 +29,7 @@ extern int ptrace_setregs(long pid, unsigned long *regs_in);
  * recompilation. So, we use PTRACE_OLDSETOPTIONS in UML.
  * We also want to be able to build the kernel on 2.4, which doesn't
  * have PTRACE_OLDSETOPTIONS. So, if it is missing, we declare
- * PTRACE_OLDSETOPTIONS to to be the same as PTRACE_SETOPTIONS.
+ * PTRACE_OLDSETOPTIONS to be the same as PTRACE_SETOPTIONS.
  *
  * On architectures, that start to support PTRACE_O_TRACESYSGOOD on
  * linux 2.6, PTRACE_OLDSETOPTIONS never is defined, and also isn't
diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c
index 20ef1bf5e726..703959eba45a 100644
--- a/drivers/char/agp/uninorth-agp.c
+++ b/drivers/char/agp/uninorth-agp.c
@@ -270,7 +270,7 @@ static void uninorth_agp_enable(struct agp_bridge_data *bridge, u32 mode)
 
 	if ((uninorth_rev >= 0x30) && (uninorth_rev <= 0x33)) {
 		/*
-		 * We need to to set REQ_DEPTH to 7 for U3 versions 1.0, 2.1,
+		 * We need to set REQ_DEPTH to 7 for U3 versions 1.0, 2.1,
 		 * 2.2 and 2.3, Darwin do so.
 		 */
 		if ((command >> AGPSTAT_RQ_DEPTH_SHIFT) > 7)
diff --git a/drivers/gpu/drm/mga/mga_state.c b/drivers/gpu/drm/mga/mga_state.c
index b710fab21cb3..a53b848e0f17 100644
--- a/drivers/gpu/drm/mga/mga_state.c
+++ b/drivers/gpu/drm/mga/mga_state.c
@@ -239,7 +239,7 @@ static __inline__ void mga_g200_emit_pipe(drm_mga_private_t * dev_priv)
 		  MGA_WR34, 0x00000000,
 		  MGA_WR42, 0x0000ffff, MGA_WR60, 0x0000ffff);
 
-	/* Padding required to to hardware bug.
+	/* Padding required due to hardware bug.
 	 */
 	DMA_BLOCK(MGA_DMAPAD, 0xffffffff,
 		  MGA_DMAPAD, 0xffffffff,
@@ -317,7 +317,7 @@ static __inline__ void mga_g400_emit_pipe(drm_mga_private_t * dev_priv)
 		  MGA_WR52, MGA_G400_WR_MAGIC,	/* tex1 width        */
 		  MGA_WR60, MGA_G400_WR_MAGIC);	/* tex1 height       */
 
-	/* Padding required to to hardware bug */
+	/* Padding required due to hardware bug */
 	DMA_BLOCK(MGA_DMAPAD, 0xffffffff,
 		  MGA_DMAPAD, 0xffffffff,
 		  MGA_DMAPAD, 0xffffffff,
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index a8d0aee3bc0e..8aaad65c3bb5 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -894,7 +894,7 @@ void guest_set_pte(struct lg_cpu *cpu,
  * tells us they've changed.  When the Guest tries to use the new entry it will
  * fault and demand_page() will fix it up.
  *
- * So with that in mind here's our code to to update a (top-level) PGD entry:
+ * So with that in mind here's our code to update a (top-level) PGD entry:
  */
 void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 {
diff --git a/drivers/media/video/gspca/m5602/m5602_core.c b/drivers/media/video/gspca/m5602/m5602_core.c
index 8a5bba16ff32..7f1e5415850b 100644
--- a/drivers/media/video/gspca/m5602/m5602_core.c
+++ b/drivers/media/video/gspca/m5602/m5602_core.c
@@ -56,7 +56,7 @@ int m5602_read_bridge(struct sd *sd, const u8 address, u8 *i2c_data)
 	return (err < 0) ? err : 0;
 }
 
-/* Writes a byte to to the m5602 */
+/* Writes a byte to the m5602 */
 int m5602_write_bridge(struct sd *sd, const u8 address, const u8 i2c_data)
 {
 	int err;
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index bc14bb1b0579..88671529c45d 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -512,7 +512,7 @@ static void mxcmci_cmd_done(struct mxcmci_host *host, unsigned int stat)
 	}
 
 	/* For the DMA case the DMA engine handles the data transfer
-	 * automatically. For non DMA we have to to it ourselves.
+	 * automatically. For non DMA we have to do it ourselves.
 	 * Don't do it in interrupt context though.
 	 */
 	if (!mxcmci_use_dma(host) && host->data)
diff --git a/drivers/mtd/maps/ixp2000.c b/drivers/mtd/maps/ixp2000.c
index d4fb9a3ab4df..1bdf0ee6d0b6 100644
--- a/drivers/mtd/maps/ixp2000.c
+++ b/drivers/mtd/maps/ixp2000.c
@@ -184,7 +184,7 @@ static int ixp2000_flash_probe(struct platform_device *dev)
 	info->map.bankwidth = 1;
 
 	/*
- 	 * map_priv_2 is used to store a ptr to to the bank_setup routine
+ 	 * map_priv_2 is used to store a ptr to the bank_setup routine
  	 */
 	info->map.map_priv_2 = (unsigned long) ixp_data->bank_setup;
 
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index e4d9ef0c965a..9f87c99189a9 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -1065,7 +1065,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 	}
 
 	/*
-	 * Now we have got to calculate how much data we have to to copy. In
+	 * Now we have got to calculate how much data we have to copy. In
 	 * case of a static volume it is fairly easy - the VID header contains
 	 * the data size. In case of a dynamic volume it is more difficult - we
 	 * have to read the contents, cut 0xFF bytes from the end and copy only
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 6a5fe9633783..47877942decc 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -570,7 +570,7 @@ void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
 
 /*
  * ubi_rb_for_each_entry - walk an RB-tree.
- * @rb: a pointer to type 'struct rb_node' to to use as a loop counter
+ * @rb: a pointer to type 'struct rb_node' to use as a loop counter
  * @pos: a pointer to RB-tree entry type to use as a loop counter
  * @root: RB-tree's root
  * @member: the name of the 'struct rb_node' within the RB-tree entry
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index cea5cfe23b71..c3fa31c9f2a7 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1987,7 +1987,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
 			// find new aggregator for the related port(s)
 			new_aggregator = __get_first_agg(port);
 			for (; new_aggregator; new_aggregator = __get_next_agg(new_aggregator)) {
-				// if the new aggregator is empty, or it connected to to our port only
+				// if the new aggregator is empty, or it is connected to our port only
 				if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) {
 					break;
 				}
diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c
index cda6b397550d..45ac225a7aaa 100644
--- a/drivers/net/e1000/e1000_hw.c
+++ b/drivers/net/e1000/e1000_hw.c
@@ -3035,7 +3035,7 @@ s32 e1000_check_for_link(struct e1000_hw *hw)
                 /* If TBI compatibility is was previously off, turn it on. For
                  * compatibility with a TBI link partner, we will store bad
                  * packets. Some frames have an additional byte on the end and
-                 * will look like CRC errors to to the hardware.
+                 * will look like CRC errors to the hardware.
                  */
                 if (!hw->tbi_compatibility_on) {
                     hw->tbi_compatibility_on = true;
diff --git a/drivers/net/wireless/zd1211rw/zd_chip.c b/drivers/net/wireless/zd1211rw/zd_chip.c
index 5e110a2328ae..4e79a9800134 100644
--- a/drivers/net/wireless/zd1211rw/zd_chip.c
+++ b/drivers/net/wireless/zd1211rw/zd_chip.c
@@ -368,7 +368,7 @@ error:
 	return r;
 }
 
-/* MAC address: if custom mac addresses are to to be used CR_MAC_ADDR_P1 and
+/* MAC address: if custom mac addresses are to be used CR_MAC_ADDR_P1 and
  *              CR_MAC_ADDR_P2 must be overwritten
  */
 int zd_write_mac_addr(struct zd_chip *chip, const u8 *mac_addr)
diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index 7dc3d1894b1a..a39addc3a596 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -718,7 +718,7 @@ megasas_build_dcdb(struct megasas_instance *instance, struct scsi_cmnd *scp,
  * megasas_build_ldio -	Prepares IOs to logical devices
  * @instance:		Adapter soft state
  * @scp:		SCSI command
- * @cmd:		Command to to be prepared
+ * @cmd:		Command to be prepared
  *
  * Frames (and accompanying SGLs) for regular SCSI IOs use this function.
  */
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 40e3cafb3a9c..83c8b5e4fc8b 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -1422,7 +1422,7 @@ static void qla4xxx_slave_destroy(struct scsi_device *sdev)
 /**
  * qla4xxx_del_from_active_array - returns an active srb
  * @ha: Pointer to host adapter structure.
- * @index: index into to the active_array
+ * @index: index into the active_array
  *
  * This routine removes and returns the srb at the specified index
  **/
@@ -1500,7 +1500,7 @@ static int qla4xxx_wait_for_hba_online(struct scsi_qla_host *ha)
 
 /**
  * qla4xxx_eh_wait_for_commands - wait for active cmds to finish.
- * @ha: pointer to to HBA
+ * @ha: pointer to HBA
  * @t: target id
  * @l: lun id
  *
diff --git a/drivers/staging/rt2860/rtmp.h b/drivers/staging/rt2860/rtmp.h
index 3f498f6f3ff6..90fd40f24734 100644
--- a/drivers/staging/rt2860/rtmp.h
+++ b/drivers/staging/rt2860/rtmp.h
@@ -2060,7 +2060,7 @@ typedef struct _STA_ADMIN_CONFIG {
     BOOLEAN		AdhocBGJoined;		// Indicate Adhoc B/G Join.
     BOOLEAN		Adhoc20NJoined;		// Indicate Adhoc 20MHz N Join.
 #endif
-	// New for WPA, windows want us to to keep association information and
+	// New for WPA, windows want us to keep association information and
 	// Fixed IEs from last association response
 	NDIS_802_11_ASSOCIATION_INFORMATION     AssocInfo;
 	USHORT       ReqVarIELen;                // Length of next VIE include EID & Length
diff --git a/drivers/usb/serial/cypress_m8.h b/drivers/usb/serial/cypress_m8.h
index e772b01ac3ac..1fd360e04065 100644
--- a/drivers/usb/serial/cypress_m8.h
+++ b/drivers/usb/serial/cypress_m8.h
@@ -57,7 +57,7 @@
 #define	UART_RI		0x10	/* ring indicator - modem - device to host */
 #define UART_CD		0x40	/* carrier detect - modem - device to host */
 #define CYP_ERROR 	0x08	/* received from input report - device to host */
-/* Note - the below has nothing to to with the "feature report" reset */
+/* Note - the below has nothing to do with the "feature report" reset */
 #define CONTROL_RESET	0x08  	/* sent with output report - host to device */
 
 /* End of RS-232 protocol definitions */
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index dc0f832657e6..b97960ac92f2 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -2540,7 +2540,7 @@ static int calc_baud_rate_divisor(int baudrate, int *divisor)
 
 /*****************************************************************************
  * send_cmd_write_uart_register
- *  this function builds up a uart register message and sends to to the device.
+ *  this function builds up a uart register message and sends to the device.
  *****************************************************************************/
 static int send_cmd_write_uart_register(struct edgeport_port *edge_port,
 						__u8 regNum, __u8 regValue)
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index a61673133d7d..f7373371b137 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -38,7 +38,7 @@
  *   0.3a - implemented pools of write URBs
  *   0.3  - alpha version for public testing
  *   0.2  - TIOCMGET works, so autopilot(1) can be used!
- *   0.1  - can be used to to pilot-xfer -p /dev/ttyUSB0 -l
+ *   0.1  - can be used to do pilot-xfer -p /dev/ttyUSB0 -l
  *
  *   The driver skeleton is mainly based on mct_u232.c and various other
  *   pieces of code shamelessly copied from the drivers/usb/serial/ directory.
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4abd683b963d..3a798737e305 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2337,7 +2337,7 @@ static int __mpage_da_writepage(struct page *page,
 		/*
 		 * Rest of the page in the page_vec
 		 * redirty then and skip then. We will
-		 * try to to write them again after
+		 * try to write them again after
 		 * starting a new transaction
 		 */
 		redirty_page_for_writepage(wbc, page);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 28c590b7c9da..8f1cfb02a6cb 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -179,7 +179,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
  * always aligned to a 64 bit boundary.
  *
  * The size of the buffer is in bytes, but is it assumed that it is
- * always ok to to read a complete multiple of 64 bits at the end
+ * always ok to read a complete multiple of 64 bits at the end
  * of the block in case the end is no aligned to a natural boundary.
  *
  * Return: the block number (bitmap buffer scope) that was found
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 50931b1ce4b9..8b2549f672bf 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -829,7 +829,7 @@ enum {
 	/* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the
 	   F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT,
 	   F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest.  This mask
-	   is used to to obtain all flags that are valid for setting. */
+	   is used to obtain all flags that are valid for setting. */
 	/*
 	 * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all
 	 * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 37ba576d06e8..8052236d1a3d 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -288,7 +288,7 @@ typedef u32 acpi_physical_address;
 /*
  * Some compilers complain about unused variables. Sometimes we don't want to
  * use all the variables (for example, _acpi_module_name). This allows us
- * to to tell the compiler in a per-variable manner that a variable
+ * to tell the compiler in a per-variable manner that a variable
  * is unused
  */
 #ifndef ACPI_UNUSED_VAR
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index 935c5d7fc86e..6aadbf84ae71 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -57,7 +57,7 @@
 /*
  * Some compilers complain about unused variables. Sometimes we don't want to
  * use all the variables (for example, _acpi_module_name). This allows us
- * to to tell the compiler warning in a per-variable manner that a variable
+ * to tell the compiler warning in a per-variable manner that a variable
  * is unused.
  */
 #define ACPI_UNUSED_VAR __attribute__ ((unused))
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 938858304300..c8f94e8db69c 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -482,7 +482,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
  *   message.
  * @cm_id: Connection identifier associated with the connection message.
  * @service_timeout: The lower 5-bits specify the maximum time required for
- *   the sender to reply to to the connection message.  The upper 3-bits
+ *   the sender to reply to the connection message.  The upper 3-bits
  *   specify additional control flags.
  * @private_data: Optional user-defined private data sent with the
  *   message receipt acknowledgement.
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 9489a0a9b1be..cc89be5bc0f8 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -48,7 +48,7 @@ static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
 
 /*
  * Note about RCU :
- * It is used to to delay the free of multiple probes array until a quiescent
+ * It is used to delay the free of multiple probes array until a quiescent
  * state is reached.
  * Tracepoint entries modifications are protected by the tracepoints_mutex.
  */
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index c3e4a2baf835..46a31e5f49c3 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -135,7 +135,7 @@ static const config configuration_table[10] = {
 
 /* ===========================================================================
  * Update a hash value with the given input byte
- * IN  assertion: all calls to to UPDATE_HASH are made with consecutive
+ * IN  assertion: all calls to UPDATE_HASH are made with consecutive
  *    input characters, so that a running hash key can be computed from the
  *    previous key instead of complete recalculation each time.
  */
@@ -146,7 +146,7 @@ static const config configuration_table[10] = {
  * Insert string str in the dictionary and set match_head to the previous head
  * of the hash chain (the most recent string with same hash key). Return
  * the previous length of the hash chain.
- * IN  assertion: all calls to to INSERT_STRING are made with consecutive
+ * IN  assertion: all calls to INSERT_STRING are made with consecutive
  *    input characters and the first MIN_MATCH bytes of str are valid
  *    (except for the last MIN_MATCH-1 bytes of the input file).
  */
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index d9231245a79a..bc0019f704fe 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -96,7 +96,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 }
 
 /*
- * allocate a new client call and attempt to to get a connection slot for it
+ * allocate a new client call and attempt to get a connection slot for it
  */
 static struct rxrpc_call *rxrpc_alloc_client_call(
 	struct rxrpc_sock *rx,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 375d64cb1a3d..2c5c76be18f8 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -77,7 +77,7 @@
  *   The service curve parameters are converted to the internal
  *   representation. The slope values are scaled to avoid overflow.
  *   the inverse slope values as well as the y-projection of the 1st
- *   segment are kept in order to to avoid 64-bit divide operations
+ *   segment are kept in order to avoid 64-bit divide operations
  *   that are expensive on 32-bit architectures.
  */
 
-- 
cgit v1.2.3


From 3dbda77e6f3375f87090cfce97b2551d3723521b Mon Sep 17 00:00:00 2001
From: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Date: Thu, 23 Jul 2009 08:31:31 +0200
Subject: trivial: fix typos "man[ae]g?ment" -> "management"

Signed-off-by: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/DocBook/mtdnand.tmpl           | 2 +-
 Documentation/DocBook/scsi.tmpl              | 2 +-
 Documentation/scsi/ChangeLog.megaraid        | 2 +-
 Documentation/sound/alsa/HD-Audio-Models.txt | 2 +-
 Documentation/trace/ftrace.txt               | 2 +-
 arch/arm/Makefile                            | 2 +-
 arch/frv/lib/cache.S                         | 2 +-
 arch/mn10300/include/asm/cacheflush.h        | 4 ++--
 drivers/media/dvb/siano/smscoreapi.c         | 2 +-
 drivers/media/dvb/siano/smscoreapi.h         | 4 ++--
 drivers/media/radio/radio-mr800.c            | 2 +-
 drivers/message/fusion/mptbase.c             | 4 ++--
 drivers/net/macb.c                           | 2 +-
 drivers/net/wireless/ath/ath5k/reg.h         | 2 +-
 drivers/net/wireless/atmel.c                 | 2 +-
 drivers/usb/host/xhci.h                      | 2 +-
 drivers/usb/wusbcore/wa-hc.h                 | 2 +-
 include/linux/mISDNif.h                      | 2 +-
 18 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index 8e145857fc9d..df0d089d0fb9 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -568,7 +568,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 			<para>
 				The blocks in which the tables are stored are procteted against
 				accidental access by marking them bad in the memory bad block
-				table. The bad block table managment functions are allowed
+				table. The bad block table management functions are allowed
 				to circumvernt this protection.
 			</para>
 			<para>
diff --git a/Documentation/DocBook/scsi.tmpl b/Documentation/DocBook/scsi.tmpl
index 10a150ae2a7e..d87f4569e768 100644
--- a/Documentation/DocBook/scsi.tmpl
+++ b/Documentation/DocBook/scsi.tmpl
@@ -317,7 +317,7 @@
         <para>
           The SAS transport class contains common code to deal with SAS HBAs,
           an aproximated representation of SAS topologies in the driver model,
-          and various sysfs attributes to expose these topologies and managment
+          and various sysfs attributes to expose these topologies and management
           interfaces to userspace.
         </para>
         <para>
diff --git a/Documentation/scsi/ChangeLog.megaraid b/Documentation/scsi/ChangeLog.megaraid
index eaa4801f2ce6..38e9e7cadc90 100644
--- a/Documentation/scsi/ChangeLog.megaraid
+++ b/Documentation/scsi/ChangeLog.megaraid
@@ -514,7 +514,7 @@ iv.	Remove yield() while mailbox handshake in synchronous commands
 
 v.	Remove redundant __megaraid_busywait_mbox routine
 
-vi.	Fix bug in the managment module, which causes a system lockup when the
+vi.	Fix bug in the management module, which causes a system lockup when the
 	IO module is loaded and then unloaded, followed by executing any
 	management utility. The current version of management module does not
 	handle the adapter unregister properly.
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 97eebd63bedc..f1708b79f963 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -387,7 +387,7 @@ STAC92HD73*
 STAC92HD83*
 ===========
   ref		Reference board
-  mic-ref	Reference board with power managment for ports
+  mic-ref	Reference board with power management for ports
   dell-s14	Dell laptop
   auto		BIOS setup (default)
 
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 1b6292bbdd6d..957b22fde2df 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -133,7 +133,7 @@ of ftrace. Here is a list of some of the key files:
 	than requested, the rest of the page will be used,
 	making the actual allocation bigger than requested.
 	( Note, the size may not be a multiple of the page size
-	  due to buffer managment overhead. )
+	  due to buffer management overhead. )
 
 	This can only be updated when the current_tracer
 	is set to "nop".
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 7350557a81e0..54661125a8bf 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -25,7 +25,7 @@ KBUILD_CFLAGS	+=$(call cc-option,-marm,)
 # Select a platform tht is kept up-to-date
 KBUILD_DEFCONFIG := versatile_defconfig
 
-# defines filename extension depending memory manement type.
+# defines filename extension depending memory management type.
 ifeq ($(CONFIG_MMU),)
 MMUEXT		:= -nommu
 endif
diff --git a/arch/frv/lib/cache.S b/arch/frv/lib/cache.S
index 0e10ad8dc462..0c4fb204911b 100644
--- a/arch/frv/lib/cache.S
+++ b/arch/frv/lib/cache.S
@@ -1,4 +1,4 @@
-/* cache.S: cache managment routines
+/* cache.S: cache management routines
  *
  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
diff --git a/arch/mn10300/include/asm/cacheflush.h b/arch/mn10300/include/asm/cacheflush.h
index 2db746a251f8..1a55d61f0d06 100644
--- a/arch/mn10300/include/asm/cacheflush.h
+++ b/arch/mn10300/include/asm/cacheflush.h
@@ -17,7 +17,7 @@
 #include <linux/mm.h>
 
 /*
- * virtually-indexed cache managment (our cache is physically indexed)
+ * virtually-indexed cache management (our cache is physically indexed)
  */
 #define flush_cache_all()			do {} while (0)
 #define flush_cache_mm(mm)			do {} while (0)
@@ -31,7 +31,7 @@
 #define flush_dcache_mmap_unlock(mapping)	do {} while (0)
 
 /*
- * physically-indexed cache managment
+ * physically-indexed cache management
  */
 #ifndef CONFIG_MN10300_CACHE_DISABLED
 
diff --git a/drivers/media/dvb/siano/smscoreapi.c b/drivers/media/dvb/siano/smscoreapi.c
index bd9ab9d0d12a..fa6a62369a78 100644
--- a/drivers/media/dvb/siano/smscoreapi.c
+++ b/drivers/media/dvb/siano/smscoreapi.c
@@ -1367,7 +1367,7 @@ int smscore_set_gpio(struct smscore_device_t *coredev, u32 pin, int level)
 					    &msg, sizeof(msg));
 }
 
-/* new GPIO managment implementation */
+/* new GPIO management implementation */
 static int GetGpioPinParams(u32 PinNum, u32 *pTranslatedPinNum,
 		u32 *pGroupNum, u32 *pGroupCfg) {
 
diff --git a/drivers/media/dvb/siano/smscoreapi.h b/drivers/media/dvb/siano/smscoreapi.h
index f1108c64e895..eec18aaf5512 100644
--- a/drivers/media/dvb/siano/smscoreapi.h
+++ b/drivers/media/dvb/siano/smscoreapi.h
@@ -657,12 +657,12 @@ struct smscore_buffer_t *smscore_getbuffer(struct smscore_device_t *coredev);
 extern void smscore_putbuffer(struct smscore_device_t *coredev,
 			      struct smscore_buffer_t *cb);
 
-/* old GPIO managment */
+/* old GPIO management */
 int smscore_configure_gpio(struct smscore_device_t *coredev, u32 pin,
 			   struct smscore_config_gpio *pinconfig);
 int smscore_set_gpio(struct smscore_device_t *coredev, u32 pin, int level);
 
-/* new GPIO managment */
+/* new GPIO management */
 extern int smscore_gpio_configure(struct smscore_device_t *coredev, u8 PinNum,
 		struct smscore_gpio_config *pGpioConfig);
 extern int smscore_gpio_set_level(struct smscore_device_t *coredev, u8 PinNum,
diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c
index 575bf9d89419..a1239083472d 100644
--- a/drivers/media/radio/radio-mr800.c
+++ b/drivers/media/radio/radio-mr800.c
@@ -46,7 +46,7 @@
  * Version 0.11:	Converted to v4l2_device.
  *
  * Many things to do:
- * 	- Correct power managment of device (suspend & resume)
+ * 	- Correct power management of device (suspend & resume)
  * 	- Add code for scanning and smooth tuning
  * 	- Add code for sensitivity value
  * 	- Correct mistakes
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 76fa2ee0b574..610e914abe6c 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -6821,7 +6821,7 @@ mpt_print_ioc_summary(MPT_ADAPTER *ioc, char *buffer, int *size, int len, int sh
 	*size = y;
 }
 /**
- *	mpt_set_taskmgmt_in_progress_flag - set flags associated with task managment
+ *	mpt_set_taskmgmt_in_progress_flag - set flags associated with task management
  *	@ioc: Pointer to MPT_ADAPTER structure
  *
  *	Returns 0 for SUCCESS or -1 if FAILED.
@@ -6854,7 +6854,7 @@ mpt_set_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc)
 EXPORT_SYMBOL(mpt_set_taskmgmt_in_progress_flag);
 
 /**
- *	mpt_clear_taskmgmt_in_progress_flag - clear flags associated with task managment
+ *	mpt_clear_taskmgmt_in_progress_flag - clear flags associated with task management
  *	@ioc: Pointer to MPT_ADAPTER structure
  *
  **/
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index fb65b427c692..1d0d4d9ab623 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -241,7 +241,7 @@ static int macb_mii_init(struct macb *bp)
 	struct eth_platform_data *pdata;
 	int err = -ENXIO, i;
 
-	/* Enable managment port */
+	/* Enable management port */
 	macb_writel(bp, NCR, MACB_BIT(MPE));
 
 	bp->mii_bus = mdiobus_alloc();
diff --git a/drivers/net/wireless/ath/ath5k/reg.h b/drivers/net/wireless/ath/ath5k/reg.h
index debad07d9900..c63ea6afd96f 100644
--- a/drivers/net/wireless/ath/ath5k/reg.h
+++ b/drivers/net/wireless/ath/ath5k/reg.h
@@ -982,7 +982,7 @@
 #define AR5K_5414_CBCFG_BUF_DIS	0x10	/* Disable buffer */
 
 /*
- * PCI-E Power managment configuration
+ * PCI-E Power management configuration
  * and status register [5424+]
  */
 #define	AR5K_PCIE_PM_CTL		0x4068			/* Register address */
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index a3b36b3a9d67..cce188837d10 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -3330,7 +3330,7 @@ static void atmel_smooth_qual(struct atmel_private *priv)
 	priv->wstats.qual.updated &= ~IW_QUAL_QUAL_INVALID;
 }
 
-/* deals with incoming managment frames. */
+/* deals with incoming management frames. */
 static void atmel_management_frame(struct atmel_private *priv,
 				   struct ieee80211_hdr *header,
 				   u16 frame_len, u8 rssi)
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index d31d32206ba3..ffe1625d4e1b 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1150,7 +1150,7 @@ void xhci_dbg_cmd_ptrs(struct xhci_hcd *xhci);
 void xhci_dbg_ring_ptrs(struct xhci_hcd *xhci, struct xhci_ring *ring);
 void xhci_dbg_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx, unsigned int last_ep);
 
-/* xHCI memory managment */
+/* xHCI memory management */
 void xhci_mem_cleanup(struct xhci_hcd *xhci);
 int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags);
 void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id);
diff --git a/drivers/usb/wusbcore/wa-hc.h b/drivers/usb/wusbcore/wa-hc.h
index 586d350cdb4d..d6bea3e0b54a 100644
--- a/drivers/usb/wusbcore/wa-hc.h
+++ b/drivers/usb/wusbcore/wa-hc.h
@@ -47,7 +47,7 @@
  *             to an endpoint on a WUSB device that is connected to a
  *             HWA RC.
  *
- *  xfer       Transfer managment -- this is all the code that gets a
+ *  xfer       Transfer management -- this is all the code that gets a
  *             buffer and pushes it to a device (or viceversa). *
  *
  * Some day a lot of this code will be shared between this driver and
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 536ca12442ca..78c3bed1c3f5 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -104,7 +104,7 @@
 #define DL_UNITDATA_IND		0x3108
 #define DL_INFORMATION_IND	0x0008
 
-/* intern layer 2 managment */
+/* intern layer 2 management */
 #define MDL_ASSIGN_REQ		0x1804
 #define MDL_ASSIGN_IND		0x1904
 #define MDL_REMOVE_REQ		0x1A04
-- 
cgit v1.2.3


From a419aef8b858a2bdb98df60336063d28df4b272f Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 18 Aug 2009 11:18:35 -0700
Subject: trivial: remove unnecessary semicolons

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/s390/hypfs/inode.c               | 2 +-
 arch/s390/kvm/interrupt.c             | 2 +-
 arch/sparc/kernel/irq_64.c            | 2 +-
 arch/um/drivers/net_kern.c            | 2 +-
 drivers/block/DAC960.c                | 4 ++--
 drivers/block/swim3.c                 | 2 +-
 drivers/char/epca.c                   | 2 +-
 drivers/gpu/drm/i915/intel_dp.c       | 2 +-
 drivers/gpu/drm/radeon/r300.c         | 4 ++--
 drivers/ide/ide-probe.c               | 2 +-
 drivers/ide/umc8672.c                 | 4 ++--
 drivers/isdn/capi/capiutil.c          | 2 +-
 drivers/macintosh/rack-meter.c        | 2 +-
 drivers/net/arcnet/arc-rawmode.c      | 1 -
 drivers/net/arcnet/capmode.c          | 1 -
 drivers/net/gianfar_ethtool.c         | 2 +-
 drivers/net/ibm_newemac/core.c        | 8 ++++----
 drivers/net/igb/igb_main.c            | 2 +-
 drivers/net/ll_temac_main.c           | 2 +-
 drivers/net/ni52.c                    | 4 ++--
 drivers/net/qlge/qlge_main.c          | 4 ++--
 drivers/net/skfp/pcmplc.c             | 2 +-
 drivers/net/skfp/pmf.c                | 8 ++++----
 drivers/net/skge.c                    | 2 +-
 drivers/net/sky2.c                    | 2 +-
 drivers/net/vxge/vxge-config.h        | 2 +-
 drivers/net/vxge/vxge-main.c          | 2 +-
 drivers/rtc/rtc-omap.c                | 2 +-
 drivers/s390/block/dasd_eckd.c        | 2 +-
 drivers/s390/net/netiucv.c            | 2 +-
 drivers/s390/scsi/zfcp_scsi.c         | 2 +-
 drivers/scsi/bnx2i/bnx2i_hwi.c        | 2 +-
 drivers/scsi/lpfc/lpfc_ct.c           | 2 +-
 drivers/spi/omap_uwire.c              | 2 +-
 drivers/spi/spi_s3c24xx.c             | 2 +-
 drivers/usb/class/cdc-wdm.c           | 2 --
 drivers/usb/serial/spcp8x5.c          | 2 +-
 drivers/uwb/i1480/i1480u-wlp/netdev.c | 2 +-
 drivers/video/cfbcopyarea.c           | 2 +-
 drivers/video/imxfb.c                 | 2 +-
 drivers/video/s3c2410fb.c             | 2 +-
 drivers/xen/balloon.c                 | 2 +-
 fs/autofs/dirhash.c                   | 2 +-
 fs/btrfs/tree-log.c                   | 2 +-
 fs/cifs/cifs_dfs_ref.c                | 2 +-
 fs/nfs/callback_xdr.c                 | 2 +-
 fs/ocfs2/quota_global.c               | 2 +-
 include/scsi/fc/fc_fc2.h              | 3 +--
 kernel/trace/trace_hw_branches.c      | 2 +-
 net/wireless/wext-compat.c            | 2 +-
 sound/oss/sys_timer.c                 | 3 ---
 sound/soc/codecs/wm9081.c             | 2 +-
 sound/soc/pxa/pxa-ssp.c               | 2 +-
 sound/soc/s3c24xx/s3c24xx_uda134x.c   | 2 +-
 54 files changed, 61 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index bd9914b89488..1baa635717b0 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -496,7 +496,7 @@ static int __init hypfs_init(void)
 	}
 	s390_kobj = kobject_create_and_add("s390", hypervisor_kobj);
 	if (!s390_kobj) {
-		rc = -ENOMEM;;
+		rc = -ENOMEM;
 		goto fail_sysfs;
 	}
 	rc = register_filesystem(&hypfs_type);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 2c2f98353415..43486c2408e1 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -478,7 +478,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 	if (!inti)
 		return -ENOMEM;
 
-	inti->type = KVM_S390_PROGRAM_INT;;
+	inti->type = KVM_S390_PROGRAM_INT;
 	inti->pgm.code = code;
 
 	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 8daab33fc17d..8ab1d4728a4b 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -229,7 +229,7 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
 				tid = ((a << IMAP_AID_SHIFT) |
 				       (n << IMAP_NID_SHIFT));
 				tid &= (IMAP_AID_SAFARI |
-					IMAP_NID_SAFARI);;
+					IMAP_NID_SAFARI);
 			}
 		} else {
 			tid = cpuid << IMAP_TID_SHIFT;
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index f114813ae258..a74245ae3a84 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -533,7 +533,7 @@ static int eth_parse(char *str, int *index_out, char **str_out,
 		     char **error_out)
 {
 	char *end;
-	int n, err = -EINVAL;;
+	int n, err = -EINVAL;
 
 	n = simple_strtoul(str, &end, 0);
 	if (end == str) {
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index b839af1702e2..26caa3ffaff7 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -6653,7 +6653,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
 	else ErrorCode = get_user(ControllerNumber,
 			     &UserSpaceControllerInfo->ControllerNumber);
 	if (ErrorCode != 0)
-		break;;
+		break;
 	ErrorCode = -ENXIO;
 	if (ControllerNumber < 0 ||
 	    ControllerNumber > DAC960_ControllerCount - 1) {
@@ -6661,7 +6661,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
 	}
 	Controller = DAC960_Controllers[ControllerNumber];
 	if (Controller == NULL)
-		break;;
+		break;
 	memset(&ControllerInfo, 0, sizeof(DAC960_ControllerInfo_T));
 	ControllerInfo.ControllerNumber = ControllerNumber;
 	ControllerInfo.FirmwareType = Controller->FirmwareType;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 80df93e3cdd0..572ec6164f2d 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1062,7 +1062,7 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
 		goto out_release;
 	}
 	fs->swim3_intr = macio_irq(mdev, 0);
-	fs->dma_intr = macio_irq(mdev, 1);;
+	fs->dma_intr = macio_irq(mdev, 1);
 	fs->cur_cyl = -1;
 	fs->cur_sector = -1;
 	fs->secpercyl = 36;
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index ff647ca1c489..9d589e3144de 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -2239,7 +2239,7 @@ static void do_softint(struct work_struct *work)
 	struct channel *ch = container_of(work, struct channel, tqueue);
 	/* Called in response to a modem change event */
 	if (ch && ch->magic == EPCA_MAGIC) {
-		struct tty_struct *tty = tty_port_tty_get(&ch->port);;
+		struct tty_struct *tty = tty_port_tty_get(&ch->port);
 
 		if (tty && tty->driver_data) {
 			if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) {
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 2b914d732076..f4856a510476 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -232,7 +232,7 @@ intel_dp_aux_ch(struct intel_output *intel_output,
 	for (try = 0; try < 5; try++) {
 		/* Load the send data into the aux channel data registers */
 		for (i = 0; i < send_bytes; i += 4) {
-			uint32_t    d = pack_aux(send + i, send_bytes - i);;
+			uint32_t    d = pack_aux(send + i, send_bytes - i);
 	
 			I915_WRITE(ch_data + i, d);
 		}
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 051bca6e3a4f..b2f5d32efb0c 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -1319,11 +1319,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 	case 0x443C:
 		/* TX_FILTER0_[0-15] */
 		i = (reg - 0x4400) >> 2;
-		tmp = ib_chunk->kdata[idx] & 0x7;;
+		tmp = ib_chunk->kdata[idx] & 0x7;
 		if (tmp == 2 || tmp == 4 || tmp == 6) {
 			track->textures[i].roundup_w = false;
 		}
-		tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
+		tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;
 		if (tmp == 2 || tmp == 4 || tmp == 6) {
 			track->textures[i].roundup_h = false;
 		}
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 8de442cbee94..63c53d65e875 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1212,7 +1212,7 @@ static int ide_find_port_slot(const struct ide_port_info *d)
 {
 	int idx = -ENOENT;
 	u8 bootable = (d && (d->host_flags & IDE_HFLAG_NON_BOOTABLE)) ? 0 : 1;
-	u8 i = (d && (d->host_flags & IDE_HFLAG_QD_2ND_PORT)) ? 1 : 0;;
+	u8 i = (d && (d->host_flags & IDE_HFLAG_QD_2ND_PORT)) ? 1 : 0;
 
 	/*
 	 * Claim an unassigned slot.
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index 0608d41fb6d0..60f936e2319c 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -170,9 +170,9 @@ static int __init umc8672_init(void)
 		goto out;
 
 	if (umc8672_probe() == 0)
-		return 0;;
+		return 0;
 out:
-	return -ENODEV;;
+	return -ENODEV;
 }
 
 module_init(umc8672_init);
diff --git a/drivers/isdn/capi/capiutil.c b/drivers/isdn/capi/capiutil.c
index 16f2e465e5f9..26626eead828 100644
--- a/drivers/isdn/capi/capiutil.c
+++ b/drivers/isdn/capi/capiutil.c
@@ -1019,7 +1019,7 @@ int __init cdebug_init(void)
 	if (!g_debbuf->buf) {
 		kfree(g_cmsg);
 		kfree(g_debbuf);
-		return -ENOMEM;;
+		return -ENOMEM;
 	}
 	g_debbuf->size = CDEBUG_GSIZE;
 	g_debbuf->buf[0] = 0;
diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index a98ab72adf95..93fb32038b14 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -274,7 +274,7 @@ static void __devinit rackmeter_init_cpu_sniffer(struct rackmeter *rm)
 
 		if (cpu > 1)
 			continue;
-		rcpu = &rm->cpu[cpu];;
+		rcpu = &rm->cpu[cpu];
 		rcpu->prev_idle = get_cpu_idle_time(cpu);
 		rcpu->prev_wall = jiffies64_to_cputime64(get_jiffies_64());
 		schedule_delayed_work_on(cpu, &rm->cpu[cpu].sniffer,
diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 646dfc5f50c9..8ea9c7545c12 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -123,7 +123,6 @@ static void rx(struct net_device *dev, int bufnum,
 	BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
 
 	skb->protocol = cpu_to_be16(ETH_P_ARCNET);
-;
 	netif_rx(skb);
 }
 
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 083e21094b20..66bcbbb6babc 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -149,7 +149,6 @@ static void rx(struct net_device *dev, int bufnum,
 	BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
 
 	skb->protocol = cpu_to_be16(ETH_P_ARCNET);
-;
 	netif_rx(skb);
 }
 
diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c
index 2234118eedbb..6c144b525b47 100644
--- a/drivers/net/gianfar_ethtool.c
+++ b/drivers/net/gianfar_ethtool.c
@@ -293,7 +293,7 @@ static int gfar_gcoalesce(struct net_device *dev, struct ethtool_coalesce *cvals
 	rxtime  = get_ictt_value(priv->rxic);
 	rxcount = get_icft_value(priv->rxic);
 	txtime  = get_ictt_value(priv->txic);
-	txcount = get_icft_value(priv->txic);;
+	txcount = get_icft_value(priv->txic);
 	cvals->rx_coalesce_usecs = gfar_ticks2usecs(priv, rxtime);
 	cvals->rx_max_coalesced_frames = rxcount;
 
diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c
index 1d7d7fef414f..89c82c5e63e4 100644
--- a/drivers/net/ibm_newemac/core.c
+++ b/drivers/net/ibm_newemac/core.c
@@ -2556,13 +2556,13 @@ static int __devinit emac_init_config(struct emac_instance *dev)
 	if (emac_read_uint_prop(np, "mdio-device", &dev->mdio_ph, 0))
 		dev->mdio_ph = 0;
 	if (emac_read_uint_prop(np, "zmii-device", &dev->zmii_ph, 0))
-		dev->zmii_ph = 0;;
+		dev->zmii_ph = 0;
 	if (emac_read_uint_prop(np, "zmii-channel", &dev->zmii_port, 0))
-		dev->zmii_port = 0xffffffff;;
+		dev->zmii_port = 0xffffffff;
 	if (emac_read_uint_prop(np, "rgmii-device", &dev->rgmii_ph, 0))
-		dev->rgmii_ph = 0;;
+		dev->rgmii_ph = 0;
 	if (emac_read_uint_prop(np, "rgmii-channel", &dev->rgmii_port, 0))
-		dev->rgmii_port = 0xffffffff;;
+		dev->rgmii_port = 0xffffffff;
 	if (emac_read_uint_prop(np, "fifo-entry-size", &dev->fifo_entry_size, 0))
 		dev->fifo_entry_size = 16;
 	if (emac_read_uint_prop(np, "mal-burst-size", &dev->mal_burst_size, 0))
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index d2639c4a086d..5d6c1530a8c0 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -3966,7 +3966,7 @@ static int igb_set_vf_multicasts(struct igb_adapter *adapter,
 	/* VFs are limited to using the MTA hash table for their multicast
 	 * addresses */
 	for (i = 0; i < n; i++)
-		vf_data->vf_mc_hashes[i] = hash_list[i];;
+		vf_data->vf_mc_hashes[i] = hash_list[i];
 
 	/* Flush and reset the mta with the new values */
 	igb_set_rx_mode(adapter->netdev);
diff --git a/drivers/net/ll_temac_main.c b/drivers/net/ll_temac_main.c
index da8d0a0ca94f..f2a197fd47a5 100644
--- a/drivers/net/ll_temac_main.c
+++ b/drivers/net/ll_temac_main.c
@@ -865,7 +865,7 @@ temac_of_probe(struct of_device *op, const struct of_device_id *match)
 	dcrs = dcr_resource_start(np, 0);
 	if (dcrs == 0) {
 		dev_err(&op->dev, "could not get DMA register address\n");
-		goto nodev;;
+		goto nodev;
 	}
 	lp->sdma_dcrs = dcr_map(np, dcrs, dcr_resource_len(np, 0));
 	dev_dbg(&op->dev, "DCR base: %x\n", dcrs);
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index bd0ac690d12c..aad3b370c562 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -615,10 +615,10 @@ static int init586(struct net_device *dev)
 	/* addr_len |!src_insert |pre-len |loopback */
 	writeb(0x2e, &cfg_cmd->adr_len);
 	writeb(0x00, &cfg_cmd->priority);
-	writeb(0x60, &cfg_cmd->ifs);;
+	writeb(0x60, &cfg_cmd->ifs);
 	writeb(0x00, &cfg_cmd->time_low);
 	writeb(0xf2, &cfg_cmd->time_high);
-	writeb(0x00, &cfg_cmd->promisc);;
+	writeb(0x00, &cfg_cmd->promisc);
 	if (dev->flags & IFF_ALLMULTI) {
 		int len = ((char __iomem *)p->iscp - (char __iomem *)ptr - 8) / 6;
 		if (num_addrs > len) {
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 220529257828..7783c5db81dc 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -2630,7 +2630,7 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 	    FLAGS_LI;		/* Load irq delay values */
 	if (rx_ring->lbq_len) {
 		cqicb->flags |= FLAGS_LL;	/* Load lbq values */
-		tmp = (u64)rx_ring->lbq_base_dma;;
+		tmp = (u64)rx_ring->lbq_base_dma;
 		base_indirect_ptr = (__le64 *) rx_ring->lbq_base_indirect;
 		page_entries = 0;
 		do {
@@ -2654,7 +2654,7 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 	}
 	if (rx_ring->sbq_len) {
 		cqicb->flags |= FLAGS_LS;	/* Load sbq values */
-		tmp = (u64)rx_ring->sbq_base_dma;;
+		tmp = (u64)rx_ring->sbq_base_dma;
 		base_indirect_ptr = (__le64 *) rx_ring->sbq_base_indirect;
 		page_entries = 0;
 		do {
diff --git a/drivers/net/skfp/pcmplc.c b/drivers/net/skfp/pcmplc.c
index f1df2ec8ad41..e6b33ee05ede 100644
--- a/drivers/net/skfp/pcmplc.c
+++ b/drivers/net/skfp/pcmplc.c
@@ -960,7 +960,7 @@ static void pcm_fsm(struct s_smc *smc, struct s_phy *phy, int cmd)
 			/*PC88b*/
 			if (!phy->cf_join) {
 				phy->cf_join = TRUE ;
-				queue_event(smc,EVENT_CFM,CF_JOIN+np) ; ;
+				queue_event(smc,EVENT_CFM,CF_JOIN+np) ;
 			}
 			if (cmd == PC_JOIN)
 				GO_STATE(PC8_ACTIVE) ;
diff --git a/drivers/net/skfp/pmf.c b/drivers/net/skfp/pmf.c
index 79e665e0853d..a320fdb3727d 100644
--- a/drivers/net/skfp/pmf.c
+++ b/drivers/net/skfp/pmf.c
@@ -807,9 +807,9 @@ void smt_add_para(struct s_smc *smc, struct s_pcon *pcon, u_short para,
 				mib_p->fddiPORTLerFlag ;
 			sp->p4050_pad = 0 ;
 			sp->p4050_cutoff =
-				mib_p->fddiPORTLer_Cutoff ; ;
+				mib_p->fddiPORTLer_Cutoff ;
 			sp->p4050_alarm =
-				mib_p->fddiPORTLer_Alarm ; ;
+				mib_p->fddiPORTLer_Alarm ;
 			sp->p4050_estimate =
 				mib_p->fddiPORTLer_Estimate ;
 			sp->p4050_reject_ct =
@@ -829,7 +829,7 @@ void smt_add_para(struct s_smc *smc, struct s_pcon *pcon, u_short para,
 			sp->p4051_porttype =
 				mib_p->fddiPORTMy_Type ;
 			sp->p4051_connectstate =
-				mib_p->fddiPORTConnectState ; ;
+				mib_p->fddiPORTConnectState ;
 			sp->p4051_pc_neighbor =
 				mib_p->fddiPORTNeighborType ;
 			sp->p4051_pc_withhold =
@@ -853,7 +853,7 @@ void smt_add_para(struct s_smc *smc, struct s_pcon *pcon, u_short para,
 			struct smt_p_4053	*sp ;
 			sp = (struct smt_p_4053 *) to ;
 			sp->p4053_multiple =
-				mib_p->fddiPORTMultiple_P ; ;
+				mib_p->fddiPORTMultiple_P ;
 			sp->p4053_availablepaths =
 				mib_p->fddiPORTAvailablePaths ;
 			sp->p4053_currentpath =
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 62e852e21ab2..55bad4081966 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -215,7 +215,7 @@ static void skge_wol_init(struct skge_port *skge)
 	if (skge->wol & WAKE_MAGIC)
 		ctrl |= WOL_CTL_ENA_PME_ON_MAGIC_PKT|WOL_CTL_ENA_MAGIC_PKT_UNIT;
 	else
-		ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;;
+		ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;
 
 	ctrl |= WOL_CTL_DIS_PME_ON_PATTERN|WOL_CTL_DIS_PATTERN_UNIT;
 	skge_write16(hw, WOL_REGS(port, WOL_CTRL_STAT), ctrl);
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 4bb52e9cd371..15140f9f2e92 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -765,7 +765,7 @@ static void sky2_wol_init(struct sky2_port *sky2)
 	if (sky2->wol & WAKE_MAGIC)
 		ctrl |= WOL_CTL_ENA_PME_ON_MAGIC_PKT|WOL_CTL_ENA_MAGIC_PKT_UNIT;
 	else
-		ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;;
+		ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;
 
 	ctrl |= WOL_CTL_DIS_PME_ON_PATTERN|WOL_CTL_DIS_PATTERN_UNIT;
 	sky2_write16(hw, WOL_REGS(port, WOL_CTRL_STAT), ctrl);
diff --git a/drivers/net/vxge/vxge-config.h b/drivers/net/vxge/vxge-config.h
index 62779a520ca1..3e94f0ce0900 100644
--- a/drivers/net/vxge/vxge-config.h
+++ b/drivers/net/vxge/vxge-config.h
@@ -1541,7 +1541,7 @@ void vxge_hw_ring_rxd_1b_info_get(
 	rxd_info->l4_cksum_valid =
 		(u32)VXGE_HW_RING_RXD_L4_CKSUM_CORRECT_GET(rxdp->control_0);
 	rxd_info->l4_cksum =
-		(u32)VXGE_HW_RING_RXD_L4_CKSUM_GET(rxdp->control_0);;
+		(u32)VXGE_HW_RING_RXD_L4_CKSUM_GET(rxdp->control_0);
 	rxd_info->frame =
 		(u32)VXGE_HW_RING_RXD_ETHER_ENCAP_GET(rxdp->control_0);
 	rxd_info->proto =
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index b378037a29b5..068d7a9d3e36 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -2350,7 +2350,7 @@ static int vxge_enable_msix(struct vxgedev *vdev)
 	enum vxge_hw_status status;
 	/* 0 - Tx, 1 - Rx  */
 	int tim_msix_id[4];
-	int alarm_msix_id = 0, msix_intr_vect = 0;;
+	int alarm_msix_id = 0, msix_intr_vect = 0;
 	vdev->intr_cnt = 0;
 
 	/* allocate msix vectors */
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index bd1ce8e2bc18..0587d53987fe 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -430,7 +430,7 @@ fail:
 
 static int __exit omap_rtc_remove(struct platform_device *pdev)
 {
-	struct rtc_device	*rtc = platform_get_drvdata(pdev);;
+	struct rtc_device	*rtc = platform_get_drvdata(pdev);
 
 	device_init_wakeup(&pdev->dev, 0);
 
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index a1ce573648a2..bd9fe2e36dce 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -706,7 +706,7 @@ static int dasd_eckd_generate_uid(struct dasd_device *device,
 	       sizeof(uid->serial) - 1);
 	EBCASC(uid->serial, sizeof(uid->serial) - 1);
 	uid->ssid = private->gneq->subsystemID;
-	uid->real_unit_addr = private->ned->unit_addr;;
+	uid->real_unit_addr = private->ned->unit_addr;
 	if (private->sneq) {
 		uid->type = private->sneq->sua_flags;
 		if (uid->type == UA_BASE_PAV_ALIAS)
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index a4b2c576144b..c84eadd3602a 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -2113,7 +2113,7 @@ static ssize_t remove_write (struct device_driver *drv,
 	IUCV_DBF_TEXT(trace, 3, __func__);
 
         if (count >= IFNAMSIZ)
-                count = IFNAMSIZ - 1;;
+                count = IFNAMSIZ - 1;
 
 	for (i = 0, p = buf; i < count && *p; i++, p++) {
 		if (*p == '\n' || *p == ' ')
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 3ff726afafc6..0e1a34627a2e 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -102,7 +102,7 @@ static int zfcp_scsi_queuecommand(struct scsi_cmnd *scpnt,
 	if (unlikely((status & ZFCP_STATUS_COMMON_ERP_FAILED) ||
 		     !(status & ZFCP_STATUS_COMMON_RUNNING))) {
 		zfcp_scsi_command_fail(scpnt, DID_ERROR);
-		return 0;;
+		return 0;
 	}
 
 	ret = zfcp_fsf_send_fcp_command_task(unit, scpnt);
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index 906cef5cda86..41e1b0e7e2ef 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -1340,7 +1340,7 @@ static int bnx2i_process_login_resp(struct iscsi_session *session,
 	resp_hdr->opcode = login->op_code;
 	resp_hdr->flags = login->response_flags;
 	resp_hdr->max_version = login->version_max;
-	resp_hdr->active_version = login->version_active;;
+	resp_hdr->active_version = login->version_active;
 	resp_hdr->hlength = 0;
 
 	hton24(resp_hdr->dlength, login->data_length);
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 9df7ed38e1be..9a1bd9534d74 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1207,7 +1207,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		vport->ct_flags &= ~FC_CT_RFF_ID;
 		CtReq->CommandResponse.bits.CmdRsp =
 		    be16_to_cpu(SLI_CTNS_RFF_ID);
-		CtReq->un.rff.PortId = cpu_to_be32(vport->fc_myDID);;
+		CtReq->un.rff.PortId = cpu_to_be32(vport->fc_myDID);
 		CtReq->un.rff.fbits = FC4_FEATURE_INIT;
 		CtReq->un.rff.type_code = FC_FCP_DATA;
 		cmpl = lpfc_cmpl_ct_cmd_rff_id;
diff --git a/drivers/spi/omap_uwire.c b/drivers/spi/omap_uwire.c
index 8980a5640bd9..e75ba9b28898 100644
--- a/drivers/spi/omap_uwire.c
+++ b/drivers/spi/omap_uwire.c
@@ -213,7 +213,7 @@ static int uwire_txrx(struct spi_device *spi, struct spi_transfer *t)
 	unsigned	bits = ust->bits_per_word;
 	unsigned	bytes;
 	u16		val, w;
-	int		status = 0;;
+	int		status = 0;
 
 	if (!t->tx_buf && !t->rx_buf)
 		return 0;
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 3f3119d760db..6ba8aece90b5 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -388,7 +388,7 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev)
 
  err_no_iores:
  err_no_pdata:
-	spi_master_put(hw->master);;
+	spi_master_put(hw->master);
 
  err_nomem:
 	return err;
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index ba589d4ca8bc..8c64c018b676 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -506,8 +506,6 @@ static int wdm_open(struct inode *inode, struct file *file)
 	desc = usb_get_intfdata(intf);
 	if (test_bit(WDM_DISCONNECTING, &desc->flags))
 		goto out;
-
-	;
 	file->private_data = desc;
 
 	rv = usb_autopm_get_interface(desc->intf);
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 61e7c40b94fb..1e58220403d1 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -544,7 +544,7 @@ static void spcp8x5_set_termios(struct tty_struct *tty,
 	}
 
 	/* Set Baud Rate */
-	baud = tty_get_baud_rate(tty);;
+	baud = tty_get_baud_rate(tty);
 	switch (baud) {
 	case 300:	buf[0] = 0x00;	break;
 	case 600:	buf[0] = 0x01;	break;
diff --git a/drivers/uwb/i1480/i1480u-wlp/netdev.c b/drivers/uwb/i1480/i1480u-wlp/netdev.c
index 73055530e60f..b236e6969942 100644
--- a/drivers/uwb/i1480/i1480u-wlp/netdev.c
+++ b/drivers/uwb/i1480/i1480u-wlp/netdev.c
@@ -214,7 +214,7 @@ int i1480u_open(struct net_device *net_dev)
 
 	netif_wake_queue(net_dev);
 #ifdef i1480u_FLOW_CONTROL
-	result = usb_submit_urb(i1480u->notif_urb, GFP_KERNEL);;
+	result = usb_submit_urb(i1480u->notif_urb, GFP_KERNEL);
 	if (result < 0) {
 		dev_err(dev, "Can't submit notification URB: %d\n", result);
 		goto error_notif_urb_submit;
diff --git a/drivers/video/cfbcopyarea.c b/drivers/video/cfbcopyarea.c
index df03f3776dcc..79e5f40e6486 100644
--- a/drivers/video/cfbcopyarea.c
+++ b/drivers/video/cfbcopyarea.c
@@ -114,7 +114,7 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
 				d0 >>= right;
 			} else if (src_idx+n <= bits) {
 				// Single source word
-				d0 <<= left;;
+				d0 <<= left;
 			} else {
 				// 2 source words
 				d1 = FB_READL(src + 1);
diff --git a/drivers/video/imxfb.c b/drivers/video/imxfb.c
index 30ae3022f633..66358fa825f3 100644
--- a/drivers/video/imxfb.c
+++ b/drivers/video/imxfb.c
@@ -710,7 +710,7 @@ static int __init imxfb_probe(struct platform_device *pdev)
 
 	fbi->clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(fbi->clk)) {
-		ret = PTR_ERR(fbi->clk);;
+		ret = PTR_ERR(fbi->clk);
 		dev_err(&pdev->dev, "unable to get clock: %d\n", ret);
 		goto failed_getclock;
 	}
diff --git a/drivers/video/s3c2410fb.c b/drivers/video/s3c2410fb.c
index 7da0027e2409..5ffca2adc6a8 100644
--- a/drivers/video/s3c2410fb.c
+++ b/drivers/video/s3c2410fb.c
@@ -1119,7 +1119,7 @@ int __init s3c2410fb_init(void)
 	int ret = platform_driver_register(&s3c2410fb_driver);
 
 	if (ret == 0)
-		ret = platform_driver_register(&s3c2412fb_driver);;
+		ret = platform_driver_register(&s3c2412fb_driver);
 
 	return ret;
 }
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f5bbd9e83416..4d1b322d2b45 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -214,7 +214,7 @@ static int increase_reservation(unsigned long nr_pages)
 	page = balloon_first_page();
 	for (i = 0; i < nr_pages; i++) {
 		BUG_ON(page == NULL);
-		frame_list[i] = page_to_pfn(page);;
+		frame_list[i] = page_to_pfn(page);
 		page = balloon_next_page(page);
 	}
 
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 2316e944a109..e947915109e5 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -90,7 +90,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
 			DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
 			continue;
 		}
-		while (d_mountpoint(path.dentry) && follow_down(&path));
+		while (d_mountpoint(path.dentry) && follow_down(&path))
 			;
 		umount_ok = may_umount(path.mnt);
 		path_put(&path);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d91b0de7c502..30c0d45c1b5e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2605,7 +2605,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 								extent);
 				cs = btrfs_file_extent_offset(src, extent);
 				cl = btrfs_file_extent_num_bytes(src,
-								extent);;
+								extent);
 				if (btrfs_file_extent_compression(src,
 								  extent)) {
 					cs = 0;
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 606912d8f2a8..5e8bc99221cb 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -142,7 +142,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
 	rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
 	if (rc != 0) {
 		cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d",
-			  __func__, *devname, rc));;
+			  __func__, *devname, rc));
 		goto compose_mount_options_err;
 	}
 	/* md_len = strlen(...) + 12 for 'sep+prefixpath='
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index e5a2dac5f715..76b0aa0f73bf 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -222,7 +222,7 @@ static unsigned decode_sessionid(struct xdr_stream *xdr,
 
 	p = read_buf(xdr, len);
 	if (unlikely(p == NULL))
-		return htonl(NFS4ERR_RESOURCE);;
+		return htonl(NFS4ERR_RESOURCE);
 
 	memcpy(sid->data, p, len);
 	return 0;
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 44f2a5e1d042..0578cc14b7a3 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -154,7 +154,7 @@ static int ocfs2_get_quota_block(struct inode *inode, int block,
 		err = -EIO;
 		mlog_errno(err);
 	}
-	return err;;
+	return err;
 }
 
 /* Read data from global quotafile - avoid pagecache and such because we cannot
diff --git a/include/scsi/fc/fc_fc2.h b/include/scsi/fc/fc_fc2.h
index cff8a8c22f50..f87777d0d5bd 100644
--- a/include/scsi/fc/fc_fc2.h
+++ b/include/scsi/fc/fc_fc2.h
@@ -92,8 +92,7 @@ struct fc_esb {
 	__u8	_esb_resvd[4];
 	__u8	esb_service_params[112]; /* TBD */
 	__u8	esb_seq_status[8];	/* sequence statuses, 8 bytes each */
-} __attribute__((packed));;
-
+} __attribute__((packed));
 
 /*
  * Define expected size for ASSERTs.
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index ca7d7c4d0c2a..23b63859130e 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -155,7 +155,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
 		    seq_print_ip_sym(seq, it->from, symflags) &&
 		    trace_seq_printf(seq, "\n"))
 			return TRACE_TYPE_HANDLED;
-		return TRACE_TYPE_PARTIAL_LINE;;
+		return TRACE_TYPE_PARTIAL_LINE;
 	}
 	return TRACE_TYPE_UNHANDLED;
 }
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 429dd06a4ecc..561a45cf2a6a 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -834,7 +834,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 		return 0;
 	}
 
-	return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);;
+	return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower);
 
diff --git a/sound/oss/sys_timer.c b/sound/oss/sys_timer.c
index 107534477a2f..8db6aefe15e4 100644
--- a/sound/oss/sys_timer.c
+++ b/sound/oss/sys_timer.c
@@ -100,9 +100,6 @@ def_tmr_open(int dev, int mode)
 	curr_tempo = 60;
 	curr_timebase = 100;
 	opened = 1;
-
-	;
-
 	{
 		def_tmr.expires = (1) + jiffies;
 		add_timer(&def_tmr);
diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
index c64e55aa63b6..686e5aa97206 100644
--- a/sound/soc/codecs/wm9081.c
+++ b/sound/soc/codecs/wm9081.c
@@ -1027,7 +1027,7 @@ static int wm9081_hw_params(struct snd_pcm_substream *substream,
 		       - wm9081->fs);
 	for (i = 1; i < ARRAY_SIZE(clk_sys_rates); i++) {
 		cur_val = abs((wm9081->sysclk_rate /
-			       clk_sys_rates[i].ratio) - wm9081->fs);;
+			       clk_sys_rates[i].ratio) - wm9081->fs);
 		if (cur_val < best_val) {
 			best = i;
 			best_val = cur_val;
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 5b9ed6464789..d11a6d7e384a 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -351,7 +351,7 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai,
 			do_div(tmp, freq_out);
 			val = tmp;
 
-			val = (val << 16) | 64;;
+			val = (val << 16) | 64;
 			ssp_write_reg(ssp, SSACDD, val);
 
 			ssacd |= (0x6 << 4);
diff --git a/sound/soc/s3c24xx/s3c24xx_uda134x.c b/sound/soc/s3c24xx/s3c24xx_uda134x.c
index 8e79a416db57..c215d32d6322 100644
--- a/sound/soc/s3c24xx/s3c24xx_uda134x.c
+++ b/sound/soc/s3c24xx/s3c24xx_uda134x.c
@@ -67,7 +67,7 @@ static int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream)
 {
 	int ret = 0;
 #ifdef ENFORCE_RATES
-	struct snd_pcm_runtime *runtime = substream->runtime;;
+	struct snd_pcm_runtime *runtime = substream->runtime;
 #endif
 
 	mutex_lock(&clk_lock);
-- 
cgit v1.2.3


From 325253a6b2de4bdfa9ef0e28b5df8a4a4fe2b677 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 14 Jul 2009 17:06:02 +0100
Subject: backlight: Allow drivers to update the core, and generate events on
 changes

Certain hardware will send us events when the backlight brightness
changes. Add a function to update the value in the core, and
additionally send a uevent so that userspace can pop up appropriate
UI. The uevents are flagged depending on whether the update originated
in the kernel or from userspace, making it easier to only display UI
at the appropriate time.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/backlight.c | 41 +++++++++++++++++++++++++++++++++++++
 include/linux/backlight.h           |  7 +++++++
 2 files changed, 48 insertions(+)

(limited to 'include')

diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 157057c79ca3..6e1446ae7f52 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -73,6 +73,26 @@ static inline void backlight_unregister_fb(struct backlight_device *bd)
 }
 #endif /* CONFIG_FB */
 
+static void backlight_generate_event(struct backlight_device *bd,
+				     enum backlight_update_reason reason)
+{
+	char *envp[2];
+
+	switch (reason) {
+	case BACKLIGHT_UPDATE_SYSFS:
+		envp[0] = "SOURCE=sysfs";
+		break;
+	case BACKLIGHT_UPDATE_HOTKEY:
+		envp[0] = "SOURCE=hotkey";
+		break;
+	default:
+		envp[0] = "SOURCE=unknown";
+		break;
+	}
+	envp[1] = NULL;
+	kobject_uevent_env(&bd->dev.kobj, KOBJ_CHANGE, envp);
+}
+
 static ssize_t backlight_show_power(struct device *dev,
 		struct device_attribute *attr,char *buf)
 {
@@ -142,6 +162,8 @@ static ssize_t backlight_store_brightness(struct device *dev,
 	}
 	mutex_unlock(&bd->ops_lock);
 
+	backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS);
+
 	return rc;
 }
 
@@ -213,6 +235,25 @@ static struct device_attribute bl_device_attributes[] = {
 	__ATTR_NULL,
 };
 
+/**
+ * backlight_force_update - tell the backlight subsystem that hardware state
+ *   has changed
+ * @bd: the backlight device to update
+ *
+ * Updates the internal state of the backlight in response to a hardware event,
+ * and generate a uevent to notify userspace
+ */
+void backlight_force_update(struct backlight_device *bd,
+			    enum backlight_update_reason reason)
+{
+	mutex_lock(&bd->ops_lock);
+	if (bd->ops && bd->ops->get_brightness)
+		bd->props.brightness = bd->ops->get_brightness(bd);
+	mutex_unlock(&bd->ops_lock);
+	backlight_generate_event(bd, reason);
+}
+EXPORT_SYMBOL(backlight_force_update);
+
 /**
  * backlight_device_register - create and register a new object of
  *   backlight_device class.
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 79ca2da81c87..0f5f57858a23 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -27,6 +27,11 @@
  * Any other use of the locks below is probably wrong.
  */
 
+enum backlight_update_reason {
+	BACKLIGHT_UPDATE_HOTKEY,
+	BACKLIGHT_UPDATE_SYSFS,
+};
+
 struct backlight_device;
 struct fb_info;
 
@@ -100,6 +105,8 @@ static inline void backlight_update_status(struct backlight_device *bd)
 extern struct backlight_device *backlight_device_register(const char *name,
 	struct device *dev, void *devdata, struct backlight_ops *ops);
 extern void backlight_device_unregister(struct backlight_device *bd);
+extern void backlight_force_update(struct backlight_device *bd,
+				   enum backlight_update_reason reason);
 
 #define to_backlight_device(obj) container_of(obj, struct backlight_device, dev)
 
-- 
cgit v1.2.3


From 181f7c5dd3832763bdf2756b6d2d8a49bdf12791 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 6 Jul 2009 11:53:03 +0200
Subject: kmemcheck: add missing braces to do-while in
 kmemcheck_annotate_bitfield

Whether or not the sparse warning

	warning: do-while statement is not a compound statement

is justified or not in this case, it is annoying and trivial to fix.

[vegard.nossum@gmail.com: title and cleanup]
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 include/linux/kmemcheck.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 47b39b7c7e84..06c6c5501f13 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -137,7 +137,10 @@ static inline void kmemcheck_mark_initialized_pages(struct page *p,
 	int name##_end[0];
 
 #define kmemcheck_annotate_bitfield(ptr, name)				\
-	do if (ptr) {							\
+	do {								\
+		if (!ptr)						\
+			break;						\
+									\
 		int _n = (long) &((ptr)->name##_end)			\
 			- (long) &((ptr)->name##_begin);		\
 		BUILD_BUG_ON(_n < 0);					\
-- 
cgit v1.2.3


From 40f9244f4da8976eeb6d5ed6313c635ba238a9d3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 17 Jun 2009 17:56:39 +0100
Subject: regulator: Allow consumer supplies to be set up with dev_name()

Follow the approach suggested by Russell King and implemented by him in
the clkdev API and allow consumer device supply mapings to be set up
using the dev_name() for the consumer instead of the struct device.
In order to avoid making existing machines instabuggy and creating merge
issues the use of struct device is still supported for the time being.

This resolves problems working with buses such as I2C which make the
struct device available late providing that the final device name is
known, which is the case for most embedded systems with fixed setups.

Consumers must still use the struct device when calling regulator_get().

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/core.c          | 62 ++++++++++++++++++++++++++++++---------
 include/linux/regulator/machine.h |  7 ++++-
 2 files changed, 54 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 91ba9bfaa706..24e05b7607b4 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -37,7 +37,7 @@ static int has_full_constraints;
  */
 struct regulator_map {
 	struct list_head list;
-	struct device *dev;
+	const char *dev_name;   /* The dev_name() for the consumer */
 	const char *supply;
 	struct regulator_dev *regulator;
 };
@@ -857,23 +857,33 @@ out:
  * set_consumer_device_supply: Bind a regulator to a symbolic supply
  * @rdev:         regulator source
  * @consumer_dev: device the supply applies to
+ * @consumer_dev_name: dev_name() string for device supply applies to
  * @supply:       symbolic name for supply
  *
  * Allows platform initialisation code to map physical regulator
  * sources to symbolic names for supplies for use by devices.  Devices
  * should use these symbolic names to request regulators, avoiding the
  * need to provide board-specific regulator names as platform data.
+ *
+ * Only one of consumer_dev and consumer_dev_name may be specified.
  */
 static int set_consumer_device_supply(struct regulator_dev *rdev,
-	struct device *consumer_dev, const char *supply)
+	struct device *consumer_dev, const char *consumer_dev_name,
+	const char *supply)
 {
 	struct regulator_map *node;
 
+	if (consumer_dev && consumer_dev_name)
+		return -EINVAL;
+
+	if (!consumer_dev_name && consumer_dev)
+		consumer_dev_name = dev_name(consumer_dev);
+
 	if (supply == NULL)
 		return -EINVAL;
 
 	list_for_each_entry(node, &regulator_map_list, list) {
-		if (consumer_dev != node->dev)
+		if (consumer_dev_name != node->dev_name)
 			continue;
 		if (strcmp(node->supply, supply) != 0)
 			continue;
@@ -891,25 +901,38 @@ static int set_consumer_device_supply(struct regulator_dev *rdev,
 		return -ENOMEM;
 
 	node->regulator = rdev;
-	node->dev = consumer_dev;
+	node->dev_name = kstrdup(consumer_dev_name, GFP_KERNEL);
 	node->supply = supply;
 
+	if (node->dev_name == NULL) {
+		kfree(node);
+		return -ENOMEM;
+	}
+
 	list_add(&node->list, &regulator_map_list);
 	return 0;
 }
 
 static void unset_consumer_device_supply(struct regulator_dev *rdev,
-	struct device *consumer_dev)
+	const char *consumer_dev_name, struct device *consumer_dev)
 {
 	struct regulator_map *node, *n;
 
+	if (consumer_dev && !consumer_dev_name)
+		consumer_dev_name = dev_name(consumer_dev);
+
 	list_for_each_entry_safe(node, n, &regulator_map_list, list) {
-		if (rdev == node->regulator &&
-			consumer_dev == node->dev) {
-			list_del(&node->list);
-			kfree(node);
-			return;
-		}
+		if (rdev != node->regulator)
+			continue;
+
+		if (consumer_dev_name && node->dev_name &&
+		    strcmp(consumer_dev_name, node->dev_name))
+			continue;
+
+		list_del(&node->list);
+		kfree(node->dev_name);
+		kfree(node);
+		return;
 	}
 }
 
@@ -920,6 +943,7 @@ static void unset_regulator_supplies(struct regulator_dev *rdev)
 	list_for_each_entry_safe(node, n, &regulator_map_list, list) {
 		if (rdev == node->regulator) {
 			list_del(&node->list);
+			kfree(node->dev_name);
 			kfree(node);
 			return;
 		}
@@ -1019,17 +1043,25 @@ struct regulator *regulator_get(struct device *dev, const char *id)
 	struct regulator_dev *rdev;
 	struct regulator_map *map;
 	struct regulator *regulator = ERR_PTR(-ENODEV);
+	const char *devname = NULL;
 
 	if (id == NULL) {
 		printk(KERN_ERR "regulator: get() with no identifier\n");
 		return regulator;
 	}
 
+	if (dev)
+		devname = dev_name(dev);
+
 	mutex_lock(&regulator_list_mutex);
 
 	list_for_each_entry(map, &regulator_map_list, list) {
-		if (dev == map->dev &&
-		    strcmp(map->supply, id) == 0) {
+		/* If the mapping has a device set up it must match */
+		if (map->dev_name &&
+		    (!devname || strcmp(map->dev_name, devname)))
+			continue;
+
+		if (strcmp(map->supply, id) == 0) {
 			rdev = map->regulator;
 			goto found;
 		}
@@ -2091,11 +2123,13 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
 	for (i = 0; i < init_data->num_consumer_supplies; i++) {
 		ret = set_consumer_device_supply(rdev,
 			init_data->consumer_supplies[i].dev,
+			init_data->consumer_supplies[i].dev_name,
 			init_data->consumer_supplies[i].supply);
 		if (ret < 0) {
 			for (--i; i >= 0; i--)
 				unset_consumer_device_supply(rdev,
-					init_data->consumer_supplies[i].dev);
+				    init_data->consumer_supplies[i].dev_name,
+				    init_data->consumer_supplies[i].dev);
 			goto scrub;
 		}
 	}
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index bac64fa390f2..9328090eca20 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -126,13 +126,18 @@ struct regulation_constraints {
 /**
  * struct regulator_consumer_supply - supply -> device mapping
  *
- * This maps a supply name to a device.
+ * This maps a supply name to a device.  Only one of dev or dev_name
+ * can be specified.  Use of dev_name allows support for buses which
+ * make struct device available late such as I2C and is the preferred
+ * form.
  *
  * @dev: Device structure for the consumer.
+ * @dev_name: Result of dev_name() for the consumer.
  * @supply: Name for the supply.
  */
 struct regulator_consumer_supply {
 	struct device *dev;	/* consumer */
+	const char *dev_name;   /* dev_name() for consumer */
 	const char *supply;	/* consumer supply - e.g. "vcc" */
 };
 
-- 
cgit v1.2.3


From 9c19bc0444490e76197f47316c649590dc6f10a4 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 9 Jul 2009 15:44:31 +0100
Subject: regulator: Define full constraints function with REGULATOR disabled

This allows machine drivers to build without ifdefs if they have
full constraints. Suggested by machine drivers contributed by
Haojian Zhuang <haojian.zhuang@gmail.com>.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/regulator/machine.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 9328090eca20..73a88f6cbb1c 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -171,6 +171,12 @@ struct regulator_init_data {
 
 int regulator_suspend_prepare(suspend_state_t state);
 
+#ifdef CONFIG_REGULATOR
 void regulator_has_full_constraints(void);
+#else
+static inline void regulator_has_full_constraints(void)
+{
+}
+#endif
 
 #endif
-- 
cgit v1.2.3


From 0198d1163b3e0313b3f073b62384abfab1a17cff Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Fri, 26 Jun 2009 19:20:59 +0800
Subject: regulator: add buck3 in da903x driver

BUCK3 is the new component in DA9035. So there're three BUCKs in DA9035.
And there're two BUCKs in DA9034.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/da903x.c | 32 ++++++++++++++++++++++++++++++++
 include/linux/mfd/da903x.h |  4 +++-
 2 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index b8b89ef10a84..33dfeeb9407c 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -64,6 +64,14 @@
 #define DA9034_MDTV2		(0x33)
 #define DA9034_MVRC		(0x34)
 
+/* DA9035 Registers. DA9034 Registers are comptabile to DA9035. */
+#define DA9035_OVER3		(0x12)
+#define DA9035_VCC2		(0x1f)
+#define DA9035_3DTV1		(0x2c)
+#define DA9035_3DTV2		(0x2d)
+#define DA9035_3VRC		(0x2e)
+#define DA9035_AUTOSKIP		(0x2f)
+
 struct da903x_regulator_info {
 	struct regulator_desc desc;
 
@@ -388,6 +396,27 @@ static struct regulator_ops da9034_regulator_ldo12_ops = {
 	.enable_bit	= (ebit),					\
 }
 
+#define DA9035_DVC(_id, min, max, step, vreg, nbits, ureg, ubit, ereg, ebit) \
+{									\
+	.desc	= {							\
+		.name	= #_id,						\
+		.ops	= &da9034_regulator_dvc_ops,			\
+		.type	= REGULATOR_VOLTAGE,				\
+		.id	= DA9035_ID_##_id,				\
+		.owner	= THIS_MODULE,					\
+	},								\
+	.min_uV		= (min) * 1000,					\
+	.max_uV		= (max) * 1000,					\
+	.step_uV	= (step) * 1000,				\
+	.vol_reg	= DA9035_##vreg,				\
+	.vol_shift	= (0),						\
+	.vol_nbits	= (nbits),					\
+	.update_reg	= DA9035_##ureg,				\
+	.update_bit	= (ubit),					\
+	.enable_reg	= DA9035_##ereg,				\
+	.enable_bit	= (ebit),					\
+}
+
 #define DA9034_LDO(_id, min, max, step, vreg, shift, nbits, ereg, ebit)	\
 	DA903x_LDO(DA9034, _id, min, max, step, vreg, shift, nbits, ereg, ebit)
 
@@ -435,6 +464,9 @@ static struct da903x_regulator_info da903x_regulator_info[] = {
 	DA9034_LDO(14, 1800, 3300, 100, LDO1514, 0, 4, OVER3, 0),
 	DA9034_LDO(15, 1800, 3300, 100, LDO1514, 4, 4, OVER3, 1),
 	DA9034_LDO(5, 3100, 3100, 0, INVAL, 0, 0, OVER3, 7), /* fixed @3.1V */
+
+	/* DA9035 */
+	DA9035_DVC(BUCK3, 1800, 2200, 100, 3DTV1, 3, VCC2, 0, OVER3, 3),
 };
 
 static inline struct da903x_regulator_info *find_regulator_info(int id)
diff --git a/include/linux/mfd/da903x.h b/include/linux/mfd/da903x.h
index 115dbe965082..c63b65c94429 100644
--- a/include/linux/mfd/da903x.h
+++ b/include/linux/mfd/da903x.h
@@ -1,7 +1,7 @@
 #ifndef __LINUX_PMIC_DA903X_H
 #define __LINUX_PMIC_DA903X_H
 
-/* Unified sub device IDs for DA9030/DA9034 */
+/* Unified sub device IDs for DA9030/DA9034/DA9035 */
 enum {
 	DA9030_ID_LED_1,
 	DA9030_ID_LED_2,
@@ -57,6 +57,8 @@ enum {
 	DA9034_ID_LDO13,
 	DA9034_ID_LDO14,
 	DA9034_ID_LDO15,
+
+	DA9035_ID_BUCK3,
 };
 
 /*
-- 
cgit v1.2.3


From ed6543243a1c557dbe2005a86f6d8e851c1ebb79 Mon Sep 17 00:00:00 2001
From: roald <roald@sh-dt-4505.(none)>
Date: Mon, 13 Jul 2009 17:25:21 +0800
Subject: regulator: add initialization macro of regulator supply

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/regulator/machine.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 73a88f6cbb1c..99a4e2eb36aa 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -141,6 +141,13 @@ struct regulator_consumer_supply {
 	const char *supply;	/* consumer supply - e.g. "vcc" */
 };
 
+/* Initialize struct regulator_consumer_supply */
+#define REGULATOR_SUPPLY(_name, _dev_name)			\
+{								\
+	.supply		= _name,				\
+	.dev_name	= _dev_name,				\
+}
+
 /**
  * struct regulator_init_data - regulator platform initialisation data.
  *
-- 
cgit v1.2.3


From 5ffbd136e6c51c8d1eec7a4a0c5d2180c81aea30 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 21 Jul 2009 16:00:23 +0100
Subject: regulator: Add regulator_get_exclusive() API

Some consumers require complete control of the regulator and can't
tolerate sharing it with other consumers, most commonly because they need
to have the regulator actually disabled so can't have other consumers
forcing it on. This new regulator_get_exclusive() API call allows these
consumers to explicitly request this, documenting the assumptions that
they are making.

In order to simplify coding of such consumers the use count for regulators
they request is forced to match the enabled state of the regulator when
it is requested. This is not possible for consumers which can share
regulators due to the need to keep track of the ownership of use counts.

A new API call is used rather than an additional argument to the existing
regulator_get() in order to avoid merge headaches with driver code in
other trees.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/core.c           | 88 ++++++++++++++++++++++++++++++++------
 include/linux/regulator/consumer.h |  2 +
 include/linux/regulator/driver.h   |  2 +
 3 files changed, 78 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 24e05b7607b4..68549008582c 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1025,25 +1025,15 @@ overflow_err:
 	return NULL;
 }
 
-/**
- * regulator_get - lookup and obtain a reference to a regulator.
- * @dev: device for regulator "consumer"
- * @id: Supply name or regulator ID.
- *
- * Returns a struct regulator corresponding to the regulator producer,
- * or IS_ERR() condition containing errno.
- *
- * Use of supply names configured via regulator_set_device_supply() is
- * strongly encouraged.  It is recommended that the supply name used
- * should match the name used for the supply and/or the relevant
- * device pins in the datasheet.
- */
-struct regulator *regulator_get(struct device *dev, const char *id)
+/* Internal regulator request function */
+static struct regulator *_regulator_get(struct device *dev, const char *id,
+					int exclusive)
 {
 	struct regulator_dev *rdev;
 	struct regulator_map *map;
 	struct regulator *regulator = ERR_PTR(-ENODEV);
 	const char *devname = NULL;
+	int ret;
 
 	if (id == NULL) {
 		printk(KERN_ERR "regulator: get() with no identifier\n");
@@ -1070,6 +1060,16 @@ struct regulator *regulator_get(struct device *dev, const char *id)
 	return regulator;
 
 found:
+	if (rdev->exclusive) {
+		regulator = ERR_PTR(-EPERM);
+		goto out;
+	}
+
+	if (exclusive && rdev->open_count) {
+		regulator = ERR_PTR(-EBUSY);
+		goto out;
+	}
+
 	if (!try_module_get(rdev->owner))
 		goto out;
 
@@ -1079,12 +1079,69 @@ found:
 		module_put(rdev->owner);
 	}
 
+	rdev->open_count++;
+	if (exclusive) {
+		rdev->exclusive = 1;
+
+		ret = _regulator_is_enabled(rdev);
+		if (ret > 0)
+			rdev->use_count = 1;
+		else
+			rdev->use_count = 0;
+	}
+
 out:
 	mutex_unlock(&regulator_list_mutex);
+
 	return regulator;
 }
+
+/**
+ * regulator_get - lookup and obtain a reference to a regulator.
+ * @dev: device for regulator "consumer"
+ * @id: Supply name or regulator ID.
+ *
+ * Returns a struct regulator corresponding to the regulator producer,
+ * or IS_ERR() condition containing errno.
+ *
+ * Use of supply names configured via regulator_set_device_supply() is
+ * strongly encouraged.  It is recommended that the supply name used
+ * should match the name used for the supply and/or the relevant
+ * device pins in the datasheet.
+ */
+struct regulator *regulator_get(struct device *dev, const char *id)
+{
+	return _regulator_get(dev, id, 0);
+}
 EXPORT_SYMBOL_GPL(regulator_get);
 
+/**
+ * regulator_get_exclusive - obtain exclusive access to a regulator.
+ * @dev: device for regulator "consumer"
+ * @id: Supply name or regulator ID.
+ *
+ * Returns a struct regulator corresponding to the regulator producer,
+ * or IS_ERR() condition containing errno.  Other consumers will be
+ * unable to obtain this reference is held and the use count for the
+ * regulator will be initialised to reflect the current state of the
+ * regulator.
+ *
+ * This is intended for use by consumers which cannot tolerate shared
+ * use of the regulator such as those which need to force the
+ * regulator off for correct operation of the hardware they are
+ * controlling.
+ *
+ * Use of supply names configured via regulator_set_device_supply() is
+ * strongly encouraged.  It is recommended that the supply name used
+ * should match the name used for the supply and/or the relevant
+ * device pins in the datasheet.
+ */
+struct regulator *regulator_get_exclusive(struct device *dev, const char *id)
+{
+	return _regulator_get(dev, id, 1);
+}
+EXPORT_SYMBOL_GPL(regulator_get_exclusive);
+
 /**
  * regulator_put - "free" the regulator source
  * @regulator: regulator source
@@ -1113,6 +1170,9 @@ void regulator_put(struct regulator *regulator)
 	list_del(&regulator->list);
 	kfree(regulator);
 
+	rdev->open_count--;
+	rdev->exclusive = 0;
+
 	module_put(rdev->owner);
 	mutex_unlock(&regulator_list_mutex);
 }
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 277f4b964df5..976b57b6912c 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -125,6 +125,8 @@ struct regulator_bulk_data {
 /* regulator get and put */
 struct regulator *__must_check regulator_get(struct device *dev,
 					     const char *id);
+struct regulator *__must_check regulator_get_exclusive(struct device *dev,
+						       const char *id);
 void regulator_put(struct regulator *regulator);
 
 /* regulator output control and status */
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index ce1be708ca16..73c9cd6cda7d 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -162,6 +162,8 @@ struct regulator_desc {
 struct regulator_dev {
 	struct regulator_desc *desc;
 	int use_count;
+	int open_count;
+	int exclusive;
 
 	/* lists we belong to */
 	struct list_head list; /* list of all regulators */
-- 
cgit v1.2.3


From a7a1ad9066e0266c8a4357ba3dbaeebfb80f531d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 21 Jul 2009 16:00:24 +0100
Subject: regulator: Add regulator voltage range check API

Simplify checking of support for voltage ranges by providing an API which
wraps the existing count and list operations.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/core.c           | 29 +++++++++++++++++++++++++++++
 include/linux/regulator/consumer.h |  2 ++
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 68549008582c..e11c2222d9af 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1441,6 +1441,35 @@ int regulator_list_voltage(struct regulator *regulator, unsigned selector)
 }
 EXPORT_SYMBOL_GPL(regulator_list_voltage);
 
+/**
+ * regulator_is_supported_voltage - check if a voltage range can be supported
+ *
+ * @regulator: Regulator to check.
+ * @min_uV: Minimum required voltage in uV.
+ * @max_uV: Maximum required voltage in uV.
+ *
+ * Returns a boolean or a negative error code.
+ */
+int regulator_is_supported_voltage(struct regulator *regulator,
+				   int min_uV, int max_uV)
+{
+	int i, voltages, ret;
+
+	ret = regulator_count_voltages(regulator);
+	if (ret < 0)
+		return ret;
+	voltages = ret;
+
+	for (i = 0; i < voltages; i++) {
+		ret = regulator_list_voltage(regulator, i);
+
+		if (ret >= min_uV && ret <= max_uV)
+			return 1;
+	}
+
+	return 0;
+}
+
 /**
  * regulator_set_voltage - set regulator output voltage
  * @regulator: regulator source
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 976b57b6912c..490c5b37b6d7 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -146,6 +146,8 @@ void regulator_bulk_free(int num_consumers,
 
 int regulator_count_voltages(struct regulator *regulator);
 int regulator_list_voltage(struct regulator *regulator, unsigned selector);
+int regulator_is_supported_voltage(struct regulator *regulator,
+				   int min_uV, int max_uV);
 int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV);
 int regulator_get_voltage(struct regulator *regulator);
 int regulator_set_current_limit(struct regulator *regulator,
-- 
cgit v1.2.3


From 86d9884b6a3646bc24e57430f1f694c5171c1bf6 Mon Sep 17 00:00:00 2001
From: Roger Quadros <ext-roger.quadros@nokia.com>
Date: Thu, 6 Aug 2009 19:37:29 +0300
Subject: regulator: Add GPIO enable control to fixed voltage regulator driver

Now fixed regulators that have their enable pin connected to a GPIO line
can use the fixed regulator driver for regulator enable/disable control.
The GPIO number and polarity information is passed through platform data.
GPIO enable control is achieved using gpiolib.

Signed-off-by: Roger Quadros <ext-roger.quadros@nokia.com>
Reviewed-by: Philipp Zabel <philipp.zabel@gmail.com>
Reviewed-by: Felipe Balbi <felipe.balbi@nokia.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/fixed.c       | 88 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/regulator/fixed.h | 24 +++++++++++
 2 files changed, 110 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 9c7f956d57c4..f8b295700d7d 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -5,6 +5,9 @@
  *
  * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
  *
+ * Copyright (c) 2009 Nokia Corporation
+ * Roger Quadros <ext-roger.quadros@nokia.com>
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2 of the
@@ -20,20 +23,45 @@
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/fixed.h>
+#include <linux/gpio.h>
 
 struct fixed_voltage_data {
 	struct regulator_desc desc;
 	struct regulator_dev *dev;
 	int microvolts;
+	int gpio;
+	unsigned enable_high:1;
+	unsigned is_enabled:1;
 };
 
 static int fixed_voltage_is_enabled(struct regulator_dev *dev)
 {
-	return 1;
+	struct fixed_voltage_data *data = rdev_get_drvdata(dev);
+
+	return data->is_enabled;
 }
 
 static int fixed_voltage_enable(struct regulator_dev *dev)
 {
+	struct fixed_voltage_data *data = rdev_get_drvdata(dev);
+
+	if (gpio_is_valid(data->gpio)) {
+		gpio_set_value_cansleep(data->gpio, data->enable_high);
+		data->is_enabled = 1;
+	}
+
+	return 0;
+}
+
+static int fixed_voltage_disable(struct regulator_dev *dev)
+{
+	struct fixed_voltage_data *data = rdev_get_drvdata(dev);
+
+	if (gpio_is_valid(data->gpio)) {
+		gpio_set_value_cansleep(data->gpio, !data->enable_high);
+		data->is_enabled = 0;
+	}
+
 	return 0;
 }
 
@@ -58,6 +86,7 @@ static int fixed_voltage_list_voltage(struct regulator_dev *dev,
 static struct regulator_ops fixed_voltage_ops = {
 	.is_enabled = fixed_voltage_is_enabled,
 	.enable = fixed_voltage_enable,
+	.disable = fixed_voltage_disable,
 	.get_voltage = fixed_voltage_get_voltage,
 	.list_voltage = fixed_voltage_list_voltage,
 };
@@ -87,13 +116,62 @@ static int regulator_fixed_voltage_probe(struct platform_device *pdev)
 	drvdata->desc.n_voltages = 1;
 
 	drvdata->microvolts = config->microvolts;
+	drvdata->gpio = config->gpio;
+
+	if (gpio_is_valid(config->gpio)) {
+		drvdata->enable_high = config->enable_high;
+
+		/* FIXME: Remove below print warning
+		 *
+		 * config->gpio must be set to -EINVAL by platform code if
+		 * GPIO control is not required. However, early adopters
+		 * not requiring GPIO control may forget to initialize
+		 * config->gpio to -EINVAL. This will cause GPIO 0 to be used
+		 * for GPIO control.
+		 *
+		 * This warning will be removed once there are a couple of users
+		 * for this driver.
+		 */
+		if (!config->gpio)
+			dev_warn(&pdev->dev,
+				"using GPIO 0 for regulator enable control\n");
+
+		ret = gpio_request(config->gpio, config->supply_name);
+		if (ret) {
+			dev_err(&pdev->dev,
+			   "Could not obtain regulator enable GPIO %d: %d\n",
+							config->gpio, ret);
+			goto err_name;
+		}
+
+		/* set output direction without changing state
+		 * to prevent glitch
+		 */
+		drvdata->is_enabled = config->enabled_at_boot;
+		ret = drvdata->is_enabled ?
+				config->enable_high : !config->enable_high;
+
+		ret = gpio_direction_output(config->gpio, ret);
+		if (ret) {
+			dev_err(&pdev->dev,
+			   "Could not configure regulator enable GPIO %d direction: %d\n",
+							config->gpio, ret);
+			goto err_gpio;
+		}
+
+	} else {
+		/* Regulator without GPIO control is considered
+		 * always enabled
+		 */
+		drvdata->is_enabled = 1;
+	}
 
 	drvdata->dev = regulator_register(&drvdata->desc, &pdev->dev,
 					  config->init_data, drvdata);
 	if (IS_ERR(drvdata->dev)) {
 		ret = PTR_ERR(drvdata->dev);
 		dev_err(&pdev->dev, "Failed to register regulator: %d\n", ret);
-		goto err_name;
+		goto err_gpio;
 	}
 
 	platform_set_drvdata(pdev, drvdata);
@@ -103,6 +181,9 @@ static int regulator_fixed_voltage_probe(struct platform_device *pdev)
 
 	return 0;
 
+err_gpio:
+	if (gpio_is_valid(config->gpio))
+		gpio_free(config->gpio);
 err_name:
 	kfree(drvdata->desc.name);
 err:
@@ -118,6 +199,9 @@ static int regulator_fixed_voltage_remove(struct platform_device *pdev)
 	kfree(drvdata->desc.name);
 	kfree(drvdata);
 
+	if (gpio_is_valid(drvdata->gpio))
+		gpio_free(drvdata->gpio);
+
 	return 0;
 }
 
diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h
index 91b4da31f1b5..e94a4a1c7c8a 100644
--- a/include/linux/regulator/fixed.h
+++ b/include/linux/regulator/fixed.h
@@ -5,6 +5,9 @@
  *
  * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
  *
+ * Copyright (c) 2009 Nokia Corporation
+ * Roger Quadros <ext-roger.quadros@nokia.com>
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2 of the
@@ -16,9 +19,30 @@
 
 struct regulator_init_data;
 
+/**
+ * struct fixed_voltage_config - fixed_voltage_config structure
+ * @supply_name:	Name of the regulator supply
+ * @microvolts:		Output voltage of regulator
+ * @gpio:		GPIO to use for enable control
+ * 			set to -EINVAL if not used
+ * @enable_high:	Polarity of enable GPIO
+ *			1 = Active high, 0 = Active low
+ * @enabled_at_boot:	Whether regulator has been enabled at
+ * 			boot or not. 1 = Yes, 0 = No
+ * 			This is used to keep the regulator at
+ * 			the default state
+ * @init_data:		regulator_init_data
+ *
+ * This structure contains fixed voltage regulator configuration
+ * information that must be passed by platform code to the fixed
+ * voltage regulator driver.
+ */
 struct fixed_voltage_config {
 	const char *supply_name;
 	int microvolts;
+	int gpio;
+	unsigned enable_high:1;
+	unsigned enabled_at_boot:1;
 	struct regulator_init_data *init_data;
 };
 
-- 
cgit v1.2.3


From 2e7e65ce55566fc81036960b00e5e15f5d9578ea Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 18 Sep 2009 22:44:43 +0200
Subject: regulator: fix typos

Fix a couple of typos I found while working with this subsystem.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 Documentation/power/regulator/machine.txt | 4 ++--
 include/linux/regulator/machine.h         | 6 +++---
 include/linux/regulator/max1586.h         | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/Documentation/power/regulator/machine.txt b/Documentation/power/regulator/machine.txt
index ce3487d99abe..63728fed620b 100644
--- a/Documentation/power/regulator/machine.txt
+++ b/Documentation/power/regulator/machine.txt
@@ -87,7 +87,7 @@ static struct platform_device regulator_devices[] = {
 },
 };
 /* register regulator 1 device */
-platform_device_register(&wm8350_regulator_devices[0]);
+platform_device_register(&regulator_devices[0]);
 
 /* register regulator 2 device */
-platform_device_register(&wm8350_regulator_devices[1]);
+platform_device_register(&regulator_devices[1]);
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 99a4e2eb36aa..87f5f176d4ef 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -41,7 +41,7 @@ struct regulator;
 #define REGULATOR_CHANGE_DRMS		0x10
 
 /**
- * struct regulator_state - regulator state during low power syatem states
+ * struct regulator_state - regulator state during low power system states
  *
  * This describes a regulators state during a system wide low power state.
  *
@@ -117,10 +117,10 @@ struct regulation_constraints {
 	/* mode to set on startup */
 	unsigned int initial_mode;
 
-	/* constriant flags */
+	/* constraint flags */
 	unsigned always_on:1;	/* regulator never off when system is on */
 	unsigned boot_on:1;	/* bootloader/firmware enabled regulator */
-	unsigned apply_uV:1;	/* apply uV constraint iff min == max */
+	unsigned apply_uV:1;	/* apply uV constraint if min == max */
 };
 
 /**
diff --git a/include/linux/regulator/max1586.h b/include/linux/regulator/max1586.h
index 44563192bf16..de9a7fae20be 100644
--- a/include/linux/regulator/max1586.h
+++ b/include/linux/regulator/max1586.h
@@ -36,7 +36,7 @@
  * max1586_subdev_data - regulator data
  * @id: regulator Id (either MAX1586_V3 or MAX1586_V6)
  * @name: regulator cute name (example for V3: "vcc_core")
- * @platform_data: regulator init data (contraints, supplies, ...)
+ * @platform_data: regulator init data (constraints, supplies, ...)
  */
 struct max1586_subdev_data {
 	int				id;
@@ -46,7 +46,7 @@ struct max1586_subdev_data {
 
 /**
  * max1586_platform_data - platform data for max1586
- * @num_subdevs: number of regultors used (may be 1 or 2)
+ * @num_subdevs: number of regulators used (may be 1 or 2)
  * @subdevs: regulator used
  *           At most, there will be a regulator for V3 and one for V6 voltages.
  * @v3_gain: gain on the V3 voltage output multiplied by 1e6.
-- 
cgit v1.2.3


From d87b969d15a084503870da598c97278fb4877753 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 18 Sep 2009 22:44:46 +0200
Subject: regulator/driver: be more specific in nanodoc for is_enabled

Document the possibility that is_enabled may also return with negative
errorcodes.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/regulator/driver.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 73c9cd6cda7d..31f2055eae28 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -37,7 +37,8 @@ enum regulator_status {
  *
  * @enable: Configure the regulator as enabled.
  * @disable: Configure the regulator as disabled.
- * @is_enabled: Return 1 if the regulator is enabled, 0 otherwise.
+ * @is_enabled: Return 1 if the regulator is enabled, 0 if not.
+ *		May also return negative errno.
  *
  * @set_voltage: Set the voltage for the regulator within the range specified.
  *               The driver should select the voltage closest to min_uV.
-- 
cgit v1.2.3


From a6f10a2f5d8c2738b3ac05974bdbea3b68a2aecd Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 22 Sep 2009 22:34:24 +1000
Subject: perf_event: Update PERF_EVENT_FORK header definition

PERF_EVENT_FORK always outputs the time field, so update the header
to reflect this.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20090922123424.GD19453@kryten>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 2 +-
 include/linux/perf_event.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 368bd70f1d2d..7b7fbf433cff 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -361,7 +361,7 @@ enum perf_event_type {
 	 *	struct perf_event_header	header;
 	 *	u32				pid, ppid;
 	 *	u32				tid, ptid;
-	 *	{ u64				time;     } && PERF_SAMPLE_TIME
+	 *	u64				time;
 	 * };
 	 */
 	PERF_EVENT_FORK			= 7,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index acefaf71e6dd..3a9d36d1e92a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -357,7 +357,7 @@ enum perf_event_type {
 	 *	struct perf_event_header	header;
 	 *	u32				pid, ppid;
 	 *	u32				tid, ptid;
-	 *	{ u64				time;     } && PERF_SAMPLE_TIME
+	 *	u64				time;
 	 * };
 	 */
 	PERF_RECORD_FORK			= 7,
-- 
cgit v1.2.3


From 61e225dc341107be304fd1088146c2a5e88ff9e0 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:08 -0700
Subject: const: make struct super_block::dq_op const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/super.c          | 2 +-
 fs/ext4/super.c          | 2 +-
 fs/ocfs2/quota.h         | 2 +-
 fs/ocfs2/quota_global.c  | 2 +-
 fs/quota/dquot.c         | 2 +-
 fs/reiserfs/super.c      | 2 +-
 include/linux/fs.h       | 2 +-
 include/linux/quotaops.h | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index a8d80a7f1105..e7a4e11352d2 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -720,7 +720,7 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
 static ssize_t ext3_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
 
-static struct dquot_operations ext3_quota_operations = {
+static const struct dquot_operations ext3_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
 	.alloc_space	= dquot_alloc_space,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a6b1ab734728..7ffb62eca4b2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -964,7 +964,7 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 static ssize_t ext4_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
 
-static struct dquot_operations ext4_quota_operations = {
+static const struct dquot_operations ext4_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
 	.alloc_space	= dquot_alloc_space,
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 3fb96fcd4c81..e5df9d170b0c 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -109,7 +109,7 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
 int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
 			   struct buffer_head **bh);
 
-extern struct dquot_operations ocfs2_quota_operations;
+extern const struct dquot_operations ocfs2_quota_operations;
 extern struct quota_format_type ocfs2_quota_format;
 
 int ocfs2_quota_setup(void);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 44f2a5e1d042..3af4954d537b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -849,7 +849,7 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 	kmem_cache_free(ocfs2_dquot_cachep, dquot);
 }
 
-struct dquot_operations ocfs2_quota_operations = {
+const struct dquot_operations ocfs2_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
 	.alloc_space	= dquot_alloc_space,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 38f7bd559f35..635ae2e535bf 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1839,7 +1839,7 @@ EXPORT_SYMBOL(dquot_commit_info);
 /*
  * Definitions of diskquota operations.
  */
-struct dquot_operations dquot_operations = {
+const struct dquot_operations dquot_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
 	.alloc_space	= dquot_alloc_space,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7adea74d6a8a..09c93c12874b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -612,7 +612,7 @@ static int reiserfs_mark_dquot_dirty(struct dquot *);
 static int reiserfs_write_info(struct super_block *, int);
 static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 
-static struct dquot_operations reiserfs_quota_operations = {
+static const struct dquot_operations reiserfs_quota_operations = {
 	.initialize = dquot_initialize,
 	.drop = dquot_drop,
 	.alloc_space = dquot_alloc_space,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 90162fb3bf04..83e1a0cea97a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1318,7 +1318,7 @@ struct super_block {
 	unsigned long long	s_maxbytes;	/* Max file size */
 	struct file_system_type	*s_type;
 	const struct super_operations	*s_op;
-	struct dquot_operations	*dq_op;
+	const struct dquot_operations	*dq_op;
  	struct quotactl_ops	*s_qcop;
 	const struct export_operations *s_export_op;
 	unsigned long		s_flags;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 26361c4c037a..8dcbdb6e1019 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -135,7 +135,7 @@ static inline int sb_any_quota_active(struct super_block *sb)
 /*
  * Operations supported for diskquotas.
  */
-extern struct dquot_operations dquot_operations;
+extern const struct dquot_operations dquot_operations;
 extern struct quotactl_ops vfs_quotactl_ops;
 
 #define sb_dquot_ops (&dquot_operations)
-- 
cgit v1.2.3


From 0d54b217a247f39605361f867fefbb9e099a5432 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:09 -0700
Subject: const: make struct super_block::s_qcop const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/cifs/cifsfs.c                | 4 ++--
 fs/ext3/super.c                 | 2 +-
 fs/ext4/super.c                 | 2 +-
 fs/ocfs2/super.c                | 2 +-
 fs/quota/dquot.c                | 2 +-
 fs/reiserfs/super.c             | 2 +-
 fs/xfs/linux-2.6/xfs_quotaops.c | 2 +-
 fs/xfs/linux-2.6/xfs_super.h    | 2 +-
 include/linux/fs.h              | 2 +-
 include/linux/quotaops.h        | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3610e9958b4c..d79ce2e95c23 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -50,7 +50,7 @@
 #define CIFS_MAGIC_NUMBER 0xFF534D42	/* the first four bytes of SMB PDUs */
 
 #ifdef CONFIG_CIFS_QUOTA
-static struct quotactl_ops cifs_quotactl_ops;
+static const struct quotactl_ops cifs_quotactl_ops;
 #endif /* QUOTA */
 
 int cifsFYI = 0;
@@ -517,7 +517,7 @@ int cifs_xstate_get(struct super_block *sb, struct fs_quota_stat *qstats)
 	return rc;
 }
 
-static struct quotactl_ops cifs_quotactl_ops = {
+static const struct quotactl_ops cifs_quotactl_ops = {
 	.set_xquota	= cifs_xquota_set,
 	.get_xquota	= cifs_xquota_get,
 	.set_xstate	= cifs_xstate_set,
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index e7a4e11352d2..72743d360509 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -737,7 +737,7 @@ static const struct dquot_operations ext3_quota_operations = {
 	.destroy_dquot	= dquot_destroy,
 };
 
-static struct quotactl_ops ext3_qctl_operations = {
+static const struct quotactl_ops ext3_qctl_operations = {
 	.quota_on	= ext3_quota_on,
 	.quota_off	= vfs_quota_off,
 	.quota_sync	= vfs_quota_sync,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7ffb62eca4b2..df539ba27779 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -985,7 +985,7 @@ static const struct dquot_operations ext4_quota_operations = {
 	.destroy_dquot	= dquot_destroy,
 };
 
-static struct quotactl_ops ext4_qctl_operations = {
+static const struct quotactl_ops ext4_qctl_operations = {
 	.quota_on	= ext4_quota_on,
 	.quota_off	= vfs_quota_off,
 	.quota_sync	= vfs_quota_sync,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a3f8871d21fd..faca4720aa47 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -965,7 +965,7 @@ static int ocfs2_quota_off(struct super_block *sb, int type, int remount)
 	return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
 }
 
-static struct quotactl_ops ocfs2_quotactl_ops = {
+static const struct quotactl_ops ocfs2_quotactl_ops = {
 	.quota_on	= ocfs2_quota_on,
 	.quota_off	= ocfs2_quota_off,
 	.quota_sync	= vfs_quota_sync,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 635ae2e535bf..39b49c42a7ed 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2461,7 +2461,7 @@ out:
 }
 EXPORT_SYMBOL(vfs_set_dqinfo);
 
-struct quotactl_ops vfs_quotactl_ops = {
+const struct quotactl_ops vfs_quotactl_ops = {
 	.quota_on	= vfs_quota_on,
 	.quota_off	= vfs_quota_off,
 	.quota_sync	= vfs_quota_sync,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 09c93c12874b..f0ad05f38022 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -629,7 +629,7 @@ static const struct dquot_operations reiserfs_quota_operations = {
 	.destroy_dquot	= dquot_destroy,
 };
 
-static struct quotactl_ops reiserfs_qctl_operations = {
+static const struct quotactl_ops reiserfs_qctl_operations = {
 	.quota_on = reiserfs_quota_on,
 	.quota_off = vfs_quota_off,
 	.quota_sync = vfs_quota_sync,
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index cb6e2cca214f..9e41f91aa269 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -150,7 +150,7 @@ xfs_fs_set_xquota(
 	return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
 }
 
-struct quotactl_ops xfs_quotactl_operations = {
+const struct quotactl_ops xfs_quotactl_operations = {
 	.quota_sync		= xfs_fs_quota_sync,
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 5a2ea3a21781..18175ebd58ed 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -93,7 +93,7 @@ extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
 
 extern const struct export_operations xfs_export_operations;
 extern struct xattr_handler *xfs_xattr_handlers[];
-extern struct quotactl_ops xfs_quotactl_operations;
+extern const struct quotactl_ops xfs_quotactl_operations;
 
 #define XFS_M(sb)		((struct xfs_mount *)((sb)->s_fs_info))
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 83e1a0cea97a..13c030ebfd28 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1319,7 +1319,7 @@ struct super_block {
 	struct file_system_type	*s_type;
 	const struct super_operations	*s_op;
 	const struct dquot_operations	*dq_op;
- 	struct quotactl_ops	*s_qcop;
+	const struct quotactl_ops	*s_qcop;
 	const struct export_operations *s_export_op;
 	unsigned long		s_flags;
 	unsigned long		s_magic;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 8dcbdb6e1019..3ebb23153640 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -136,7 +136,7 @@ static inline int sb_any_quota_active(struct super_block *sb)
  * Operations supported for diskquotas.
  */
 extern const struct dquot_operations dquot_operations;
-extern struct quotactl_ops vfs_quotactl_ops;
+extern const struct quotactl_ops vfs_quotactl_ops;
 
 #define sb_dquot_ops (&dquot_operations)
 #define sb_quotactl_ops (&vfs_quotactl_ops)
-- 
cgit v1.2.3


From 6aed62853c72e29f2c97bbac7712cb398e8c9437 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:11 -0700
Subject: const: make file_lock_operations const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/flock.c      | 2 +-
 fs/lockd/clntproc.c | 2 +-
 fs/nfs/nfs4state.c  | 2 +-
 include/linux/fs.h  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 3ff8bdd18fb3..0931bc1325eb 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -21,7 +21,7 @@ static void afs_fl_release_private(struct file_lock *fl);
 static struct workqueue_struct *afs_lock_manager;
 static DEFINE_MUTEX(afs_lock_manager_mutex);
 
-static struct file_lock_operations afs_lock_ops = {
+static const struct file_lock_operations afs_lock_ops = {
 	.fl_copy_lock		= afs_fl_copy_lock,
 	.fl_release_private	= afs_fl_release_private,
 };
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 4336adba952a..c81249fef11f 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -458,7 +458,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl)
 	nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
 }
 
-static struct file_lock_operations nlmclnt_lock_ops = {
+static const struct file_lock_operations nlmclnt_lock_ops = {
 	.fl_copy_lock = nlmclnt_locks_copy_lock,
 	.fl_release_private = nlmclnt_locks_release_private,
 };
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1434080aefeb..2ef4fecf3984 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -638,7 +638,7 @@ static void nfs4_fl_release_lock(struct file_lock *fl)
 	nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner);
 }
 
-static struct file_lock_operations nfs4_fl_lock_ops = {
+static const struct file_lock_operations nfs4_fl_lock_ops = {
 	.fl_copy_lock = nfs4_fl_copy_lock,
 	.fl_release_private = nfs4_fl_release_lock,
 };
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 13c030ebfd28..6146dec21c35 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1066,7 +1066,7 @@ struct file_lock {
 	struct fasync_struct *	fl_fasync; /* for lease break notifications */
 	unsigned long fl_break_time;	/* for nonblocking lease breaks */
 
-	struct file_lock_operations *fl_ops;	/* Callbacks for filesystems */
+	const struct file_lock_operations *fl_ops;	/* Callbacks for filesystems */
 	struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
 	union {
 		struct nfs_lock_info	nfs_fl;
-- 
cgit v1.2.3


From 7b021967c5e1463936042c8da72b550d3cabe9ac Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:12 -0700
Subject: const: make lock_manager_operations const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/lockd/svclock.c          | 2 +-
 fs/locks.c                  | 2 +-
 fs/nfsd/nfs4state.c         | 4 ++--
 include/linux/fs.h          | 2 +-
 include/linux/lockd/lockd.h | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index e577a78d7bac..d1001790fa9a 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -705,7 +705,7 @@ static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 	return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid;
 }
 
-struct lock_manager_operations nlmsvc_lock_operations = {
+const struct lock_manager_operations nlmsvc_lock_operations = {
 	.fl_compare_owner = nlmsvc_same_owner,
 	.fl_notify = nlmsvc_notify_blocked,
 	.fl_grant = nlmsvc_grant_deferred,
diff --git a/fs/locks.c b/fs/locks.c
index 19ee18a6829b..a8794f233bc9 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -434,7 +434,7 @@ static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try)
 	return fl->fl_file == try->fl_file;
 }
 
-static struct lock_manager_operations lease_manager_ops = {
+static const struct lock_manager_operations lease_manager_ops = {
 	.fl_break = lease_break_callback,
 	.fl_release_private = lease_release_private_callback,
 	.fl_mylease = lease_mylease_callback,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 980a216a48c8..766d3d544544 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2163,7 +2163,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
 		return -EAGAIN;
 }
 
-static struct lock_manager_operations nfsd_lease_mng_ops = {
+static const struct lock_manager_operations nfsd_lease_mng_ops = {
 	.fl_break = nfsd_break_deleg_cb,
 	.fl_release_private = nfsd_release_deleg_cb,
 	.fl_copy_lock = nfsd_copy_lock_deleg_cb,
@@ -3368,7 +3368,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
 
 /* Hack!: For now, we're defining this just so we can use a pointer to it
  * as a unique cookie to identify our (NFSv4's) posix locks. */
-static struct lock_manager_operations nfsd_posix_mng_ops  = {
+static const struct lock_manager_operations nfsd_posix_mng_ops  = {
 };
 
 static inline void
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6146dec21c35..51803528b095 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1067,7 +1067,7 @@ struct file_lock {
 	unsigned long fl_break_time;	/* for nonblocking lease breaks */
 
 	const struct file_lock_operations *fl_ops;	/* Callbacks for filesystems */
-	struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
+	const struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
 	union {
 		struct nfs_lock_info	nfs_fl;
 		struct nfs4_lock_info	nfs4_fl;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c325b187966b..ccf2e0dc077a 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -395,7 +395,7 @@ static inline int nlm_compare_locks(const struct file_lock *fl1,
 	     &&(fl1->fl_type  == fl2->fl_type || fl2->fl_type == F_UNLCK);
 }
 
-extern struct lock_manager_operations nlmsvc_lock_operations;
+extern const struct lock_manager_operations nlmsvc_lock_operations;
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From 83d5cde47dedf01b6a4a4331882cbc0a7eea3c2e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:13 -0700
Subject: const: make block_device_operations const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/sysdev/axonram.c       | 2 +-
 arch/um/drivers/ubd_kern.c          | 2 +-
 drivers/block/DAC960.c              | 2 +-
 drivers/block/amiflop.c             | 2 +-
 drivers/block/aoe/aoeblk.c          | 2 +-
 drivers/block/ataflop.c             | 2 +-
 drivers/block/brd.c                 | 2 +-
 drivers/block/cciss.c               | 2 +-
 drivers/block/cpqarray.c            | 2 +-
 drivers/block/floppy.c              | 2 +-
 drivers/block/hd.c                  | 2 +-
 drivers/block/loop.c                | 2 +-
 drivers/block/mg_disk.c             | 2 +-
 drivers/block/nbd.c                 | 2 +-
 drivers/block/osdblk.c              | 2 +-
 drivers/block/paride/pcd.c          | 2 +-
 drivers/block/paride/pd.c           | 2 +-
 drivers/block/paride/pf.c           | 2 +-
 drivers/block/pktcdvd.c             | 2 +-
 drivers/block/ps3disk.c             | 2 +-
 drivers/block/ps3vram.c             | 2 +-
 drivers/block/sunvdc.c              | 2 +-
 drivers/block/swim.c                | 2 +-
 drivers/block/swim3.c               | 2 +-
 drivers/block/sx8.c                 | 2 +-
 drivers/block/ub.c                  | 2 +-
 drivers/block/umem.c                | 3 +--
 drivers/block/viodasd.c             | 2 +-
 drivers/block/virtio_blk.c          | 2 +-
 drivers/block/xd.c                  | 2 +-
 drivers/block/xen-blkfront.c        | 4 ++--
 drivers/block/xsysace.c             | 2 +-
 drivers/block/z2ram.c               | 3 +--
 drivers/cdrom/gdrom.c               | 2 +-
 drivers/cdrom/viocd.c               | 2 +-
 drivers/ide/ide-cd.c                | 2 +-
 drivers/ide/ide-gd.c                | 2 +-
 drivers/ide/ide-tape.c              | 2 +-
 drivers/md/dm.c                     | 4 ++--
 drivers/md/md.c                     | 4 ++--
 drivers/memstick/core/mspro_block.c | 2 +-
 drivers/message/i2o/i2o_block.c     | 2 +-
 drivers/mmc/card/block.c            | 2 +-
 drivers/mtd/mtd_blkdevs.c           | 2 +-
 drivers/s390/block/dasd.c           | 2 +-
 drivers/s390/block/dasd_int.h       | 2 +-
 drivers/s390/block/dcssblk.c        | 2 +-
 drivers/s390/block/xpram.c          | 2 +-
 drivers/s390/char/tape_block.c      | 2 +-
 drivers/sbus/char/jsflash.c         | 2 +-
 drivers/scsi/sd.c                   | 2 +-
 drivers/scsi/sr.c                   | 2 +-
 fs/block_dev.c                      | 2 +-
 fs/ext2/xip.c                       | 2 +-
 fs/partitions/check.c               | 2 +-
 include/linux/genhd.h               | 2 +-
 56 files changed, 59 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index a4779912a5ca..88f4ae787832 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -165,7 +165,7 @@ axon_ram_direct_access(struct block_device *device, sector_t sector,
 	return 0;
 }
 
-static struct block_device_operations axon_ram_devops = {
+static const struct block_device_operations axon_ram_devops = {
 	.owner		= THIS_MODULE,
 	.direct_access	= axon_ram_direct_access
 };
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 8f05d4d9da12..635d16d90a80 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -106,7 +106,7 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
 #define MAX_DEV (16)
 
-static struct block_device_operations ubd_blops = {
+static const struct block_device_operations ubd_blops = {
         .owner		= THIS_MODULE,
         .open		= ubd_open,
         .release	= ubd_release,
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 1e6b7c14f697..8b50af381a8e 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -152,7 +152,7 @@ static int DAC960_revalidate_disk(struct gendisk *disk)
 	return 0;
 }
 
-static struct block_device_operations DAC960_BlockDeviceOperations = {
+static const struct block_device_operations DAC960_BlockDeviceOperations = {
 	.owner			= THIS_MODULE,
 	.open			= DAC960_open,
 	.getgeo			= DAC960_getgeo,
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 2f07b7c99a95..055225839024 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1632,7 +1632,7 @@ static int amiga_floppy_change(struct gendisk *disk)
 	return 0;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.owner		= THIS_MODULE,
 	.open		= floppy_open,
 	.release	= floppy_release,
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index b6cd571adbf2..3af97d4da2db 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -237,7 +237,7 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations aoe_bdops = {
+static const struct block_device_operations aoe_bdops = {
 	.open = aoeblk_open,
 	.release = aoeblk_release,
 	.getgeo = aoeblk_getgeo,
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 3ff02941b3dd..847a9e57570a 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1856,7 +1856,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.owner		= THIS_MODULE,
 	.open		= floppy_open,
 	.release	= floppy_release,
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 4bf8705b3ace..4f688434daf1 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -375,7 +375,7 @@ static int brd_ioctl(struct block_device *bdev, fmode_t mode,
 	return error;
 }
 
-static struct block_device_operations brd_fops = {
+static const struct block_device_operations brd_fops = {
 	.owner =		THIS_MODULE,
 	.locked_ioctl =		brd_ioctl,
 #ifdef CONFIG_BLK_DEV_XIP
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index d8372b432826..4f19105f755c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -205,7 +205,7 @@ static int cciss_compat_ioctl(struct block_device *, fmode_t,
 			      unsigned, unsigned long);
 #endif
 
-static struct block_device_operations cciss_fops = {
+static const struct block_device_operations cciss_fops = {
 	.owner = THIS_MODULE,
 	.open = cciss_open,
 	.release = cciss_release,
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 44fa2018f6b0..b82d438e2607 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -193,7 +193,7 @@ static inline ctlr_info_t *get_host(struct gendisk *disk)
 }
 
 
-static struct block_device_operations ida_fops  = {
+static const struct block_device_operations ida_fops  = {
 	.owner		= THIS_MODULE,
 	.open		= ida_open,
 	.release	= ida_release,
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 2b387c2260d8..5c01f747571b 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3907,7 +3907,7 @@ static int floppy_revalidate(struct gendisk *disk)
 	return res;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.owner			= THIS_MODULE,
 	.open			= floppy_open,
 	.release		= floppy_release,
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index f9d01608cbe2..d5cdce08ffd2 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -692,7 +692,7 @@ static irqreturn_t hd_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static struct block_device_operations hd_fops = {
+static const struct block_device_operations hd_fops = {
 	.getgeo =	hd_getgeo,
 };
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index bbb79441d895..edda9ea7c626 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1438,7 +1438,7 @@ out_unlocked:
 	return 0;
 }
 
-static struct block_device_operations lo_fops = {
+static const struct block_device_operations lo_fops = {
 	.owner =	THIS_MODULE,
 	.open =		lo_open,
 	.release =	lo_release,
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 6d7fbaa92248..e0339aaa1815 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -775,7 +775,7 @@ static int mg_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations mg_disk_ops = {
+static const struct block_device_operations mg_disk_ops = {
 	.getgeo = mg_getgeo
 };
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5d23ffad7c77..cc923a5b430c 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -722,7 +722,7 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 	return error;
 }
 
-static struct block_device_operations nbd_fops =
+static const struct block_device_operations nbd_fops =
 {
 	.owner =	THIS_MODULE,
 	.locked_ioctl =	nbd_ioctl,
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 13c1aee6aa3f..a808b1530b3b 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -125,7 +125,7 @@ static struct class *class_osdblk;		/* /sys/class/osdblk */
 static DEFINE_MUTEX(ctl_mutex);	/* Serialize open/close/setup/teardown */
 static LIST_HEAD(osdblkdev_list);
 
-static struct block_device_operations osdblk_bd_ops = {
+static const struct block_device_operations osdblk_bd_ops = {
 	.owner		= THIS_MODULE,
 };
 
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 9f3518c515a1..8866ca369d5e 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -247,7 +247,7 @@ static int pcd_block_media_changed(struct gendisk *disk)
 	return cdrom_media_changed(&cd->info);
 }
 
-static struct block_device_operations pcd_bdops = {
+static const struct block_device_operations pcd_bdops = {
 	.owner		= THIS_MODULE,
 	.open		= pcd_block_open,
 	.release	= pcd_block_release,
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index bf5955b3d873..569e39e8f114 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -807,7 +807,7 @@ static int pd_revalidate(struct gendisk *p)
 	return 0;
 }
 
-static struct block_device_operations pd_fops = {
+static const struct block_device_operations pd_fops = {
 	.owner		= THIS_MODULE,
 	.open		= pd_open,
 	.release	= pd_release,
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 68a90834e993..ea54ea393553 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -262,7 +262,7 @@ static char *pf_buf;		/* buffer for request in progress */
 
 /* kernel glue structures */
 
-static struct block_device_operations pf_fops = {
+static const struct block_device_operations pf_fops = {
 	.owner		= THIS_MODULE,
 	.open		= pf_open,
 	.release	= pf_release,
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index fd5bb8ad59a9..2ddf03ae034e 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2849,7 +2849,7 @@ static int pkt_media_changed(struct gendisk *disk)
 	return attached_disk->fops->media_changed(attached_disk);
 }
 
-static struct block_device_operations pktcdvd_ops = {
+static const struct block_device_operations pktcdvd_ops = {
 	.owner =		THIS_MODULE,
 	.open =			pkt_open,
 	.release =		pkt_close,
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 34cbb7f3efa8..03a130dca8ab 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -82,7 +82,7 @@ enum lv1_ata_in_out {
 static int ps3disk_major;
 
 
-static struct block_device_operations ps3disk_fops = {
+static const struct block_device_operations ps3disk_fops = {
 	.owner		= THIS_MODULE,
 };
 
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index c8753a9ed290..3bb7c47c869f 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -88,7 +88,7 @@ struct ps3vram_priv {
 static int ps3vram_major;
 
 
-static struct block_device_operations ps3vram_fops = {
+static const struct block_device_operations ps3vram_fops = {
 	.owner		= THIS_MODULE,
 };
 
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index cbfd9c0aef03..411f064760b4 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -103,7 +103,7 @@ static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations vdc_fops = {
+static const struct block_device_operations vdc_fops = {
 	.owner		= THIS_MODULE,
 	.getgeo		= vdc_getgeo,
 };
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index cf7877fb8a7d..8f569e3df890 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -748,7 +748,7 @@ static int floppy_revalidate(struct gendisk *disk)
 	return !fs->disk_in;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.owner		 = THIS_MODULE,
 	.open		 = floppy_open,
 	.release	 = floppy_release,
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 80df93e3cdd0..e39e3820fef9 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -998,7 +998,7 @@ static int floppy_revalidate(struct gendisk *disk)
 	return ret;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.open		= floppy_open,
 	.release	= floppy_release,
 	.locked_ioctl	= floppy_ioctl,
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index f5cd2e83ebcc..a7c4184f4a63 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -423,7 +423,7 @@ static struct pci_driver carm_driver = {
 	.remove		= carm_remove_one,
 };
 
-static struct block_device_operations carm_bd_ops = {
+static const struct block_device_operations carm_bd_ops = {
 	.owner		= THIS_MODULE,
 	.getgeo		= carm_bdev_getgeo,
 };
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index cc54473b8e77..c739b203fe91 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -1789,7 +1789,7 @@ static int ub_bd_media_changed(struct gendisk *disk)
 	return lun->changed;
 }
 
-static struct block_device_operations ub_bd_fops = {
+static const struct block_device_operations ub_bd_fops = {
 	.owner		= THIS_MODULE,
 	.open		= ub_bd_open,
 	.release	= ub_bd_release,
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 858c34dd032d..ad1ba393801a 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -140,7 +140,6 @@ struct cardinfo {
 };
 
 static struct cardinfo cards[MM_MAXCARDS];
-static struct block_device_operations mm_fops;
 static struct timer_list battery_timer;
 
 static int num_cards;
@@ -789,7 +788,7 @@ static int mm_check_change(struct gendisk *disk)
 	return 0;
 }
 
-static struct block_device_operations mm_fops = {
+static const struct block_device_operations mm_fops = {
 	.owner		= THIS_MODULE,
 	.getgeo		= mm_getgeo,
 	.revalidate_disk = mm_revalidate,
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index b441ce3832e9..a8c8b56b275e 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -219,7 +219,7 @@ static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 /*
  * Our file operations table
  */
-static struct block_device_operations viodasd_fops = {
+static const struct block_device_operations viodasd_fops = {
 	.owner = THIS_MODULE,
 	.open = viodasd_open,
 	.release = viodasd_release,
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index aa1a3d5a3e2b..aa89fe45237d 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -243,7 +243,7 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations virtblk_fops = {
+static const struct block_device_operations virtblk_fops = {
 	.locked_ioctl = virtblk_ioctl,
 	.owner  = THIS_MODULE,
 	.getgeo = virtblk_getgeo,
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index ce2429219925..0877d3628fda 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -130,7 +130,7 @@ static struct gendisk *xd_gendisk[2];
 
 static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
-static struct block_device_operations xd_fops = {
+static const struct block_device_operations xd_fops = {
 	.owner	= THIS_MODULE,
 	.locked_ioctl	= xd_ioctl,
 	.getgeo = xd_getgeo,
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index e53284767f7c..b8578bb3f4c9 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -65,7 +65,7 @@ struct blk_shadow {
 	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
-static struct block_device_operations xlvbd_block_fops;
+static const struct block_device_operations xlvbd_block_fops;
 
 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
 
@@ -1039,7 +1039,7 @@ static int blkif_release(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static struct block_device_operations xlvbd_block_fops =
+static const struct block_device_operations xlvbd_block_fops =
 {
 	.owner = THIS_MODULE,
 	.open = blkif_open,
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index b20abe102a2b..e5c5415eb45e 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -941,7 +941,7 @@ static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations ace_fops = {
+static const struct block_device_operations ace_fops = {
 	.owner = THIS_MODULE,
 	.open = ace_open,
 	.release = ace_release,
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index b2590409f25e..64f941e0f14b 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -64,7 +64,6 @@ static int current_device   = -1;
 
 static DEFINE_SPINLOCK(z2ram_lock);
 
-static struct block_device_operations z2_fops;
 static struct gendisk *z2ram_gendisk;
 
 static void do_z2_request(struct request_queue *q)
@@ -315,7 +314,7 @@ z2_release(struct gendisk *disk, fmode_t mode)
     return 0;
 }
 
-static struct block_device_operations z2_fops =
+static const struct block_device_operations z2_fops =
 {
 	.owner		= THIS_MODULE,
 	.open		= z2_open,
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index b5621f27c4be..a762283d2a21 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -512,7 +512,7 @@ static int gdrom_bdops_ioctl(struct block_device *bdev, fmode_t mode,
 	return cdrom_ioctl(gd.cd_info, bdev, mode, cmd, arg);
 }
 
-static struct block_device_operations gdrom_bdops = {
+static const struct block_device_operations gdrom_bdops = {
 	.owner			= THIS_MODULE,
 	.open			= gdrom_bdops_open,
 	.release		= gdrom_bdops_release,
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 0fff646cc2f0..57ca69e0ac55 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -177,7 +177,7 @@ static int viocd_blk_media_changed(struct gendisk *disk)
 	return cdrom_media_changed(&di->viocd_info);
 }
 
-struct block_device_operations viocd_fops = {
+static const struct block_device_operations viocd_fops = {
 	.owner =		THIS_MODULE,
 	.open =			viocd_blk_open,
 	.release =		viocd_blk_release,
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index b79ca419d8d9..64207df8da82 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1686,7 +1686,7 @@ static int idecd_revalidate_disk(struct gendisk *disk)
 	return  0;
 }
 
-static struct block_device_operations idecd_ops = {
+static const struct block_device_operations idecd_ops = {
 	.owner			= THIS_MODULE,
 	.open			= idecd_open,
 	.release		= idecd_release,
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 214119026b3f..753241429c26 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -321,7 +321,7 @@ static int ide_gd_ioctl(struct block_device *bdev, fmode_t mode,
 	return drive->disk_ops->ioctl(drive, bdev, mode, cmd, arg);
 }
 
-static struct block_device_operations ide_gd_ops = {
+static const struct block_device_operations ide_gd_ops = {
 	.owner			= THIS_MODULE,
 	.open			= ide_gd_open,
 	.release		= ide_gd_release,
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 9d6f62baac27..58fc920d5c32 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -1913,7 +1913,7 @@ static int idetape_ioctl(struct block_device *bdev, fmode_t mode,
 	return err;
 }
 
-static struct block_device_operations idetape_block_ops = {
+static const struct block_device_operations idetape_block_ops = {
 	.owner		= THIS_MODULE,
 	.open		= idetape_open,
 	.release	= idetape_release,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index eee28fac210c..376f1ab48a24 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1716,7 +1716,7 @@ out:
 	return r;
 }
 
-static struct block_device_operations dm_blk_dops;
+static const struct block_device_operations dm_blk_dops;
 
 static void dm_wq_work(struct work_struct *work);
 
@@ -2663,7 +2663,7 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
 	kfree(pools);
 }
 
-static struct block_device_operations dm_blk_dops = {
+static const struct block_device_operations dm_blk_dops = {
 	.open = dm_blk_open,
 	.release = dm_blk_close,
 	.ioctl = dm_blk_ioctl,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9dd872000cec..6aa497e4baf8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -138,7 +138,7 @@ static ctl_table raid_root_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static struct block_device_operations md_fops;
+static const struct block_device_operations md_fops;
 
 static int start_readonly;
 
@@ -5556,7 +5556,7 @@ static int md_revalidate(struct gendisk *disk)
 	mddev->changed = 0;
 	return 0;
 }
-static struct block_device_operations md_fops =
+static const struct block_device_operations md_fops =
 {
 	.owner		= THIS_MODULE,
 	.open		= md_open,
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 7847bbc1440d..bd83fa0a4970 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -235,7 +235,7 @@ static int mspro_block_bd_getgeo(struct block_device *bdev,
 	return 0;
 }
 
-static struct block_device_operations ms_block_bdops = {
+static const struct block_device_operations ms_block_bdops = {
 	.open    = mspro_block_bd_open,
 	.release = mspro_block_bd_release,
 	.getgeo  = mspro_block_bd_getgeo,
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 335d4c78a775..d505b68cd372 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -925,7 +925,7 @@ static void i2o_block_request_fn(struct request_queue *q)
 };
 
 /* I2O Block device operations definition */
-static struct block_device_operations i2o_block_fops = {
+static const struct block_device_operations i2o_block_fops = {
 	.owner = THIS_MODULE,
 	.open = i2o_block_open,
 	.release = i2o_block_release,
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index adc205c49fbf..85f0e8cd875b 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -130,7 +130,7 @@ mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations mmc_bdops = {
+static const struct block_device_operations mmc_bdops = {
 	.open			= mmc_blk_open,
 	.release		= mmc_blk_release,
 	.getgeo			= mmc_blk_getgeo,
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 7baba40c1ed2..0acbf4f5be50 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -210,7 +210,7 @@ static int blktrans_ioctl(struct block_device *bdev, fmode_t mode,
 	}
 }
 
-static struct block_device_operations mtd_blktrans_ops = {
+static const struct block_device_operations mtd_blktrans_ops = {
 	.owner		= THIS_MODULE,
 	.open		= blktrans_open,
 	.release	= blktrans_release,
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index e109da4583a8..dad0449475b6 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2146,7 +2146,7 @@ static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-struct block_device_operations
+const struct block_device_operations
 dasd_device_operations = {
 	.owner		= THIS_MODULE,
 	.open		= dasd_open,
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 5e47a1ee52b9..8afd9fa00875 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -540,7 +540,7 @@ dasd_check_blocksize(int bsize)
 extern debug_info_t *dasd_debug_area;
 extern struct dasd_profile_info_t dasd_global_profile;
 extern unsigned int dasd_profile_level;
-extern struct block_device_operations dasd_device_operations;
+extern const struct block_device_operations dasd_device_operations;
 
 extern struct kmem_cache *dasd_page_cache;
 
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index d34617682a62..f76f4bd82b9f 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -34,7 +34,7 @@ static int dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
 static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
 
 static int dcssblk_major;
-static struct block_device_operations dcssblk_devops = {
+static const struct block_device_operations dcssblk_devops = {
 	.owner   	= THIS_MODULE,
 	.open    	= dcssblk_open,
 	.release 	= dcssblk_release,
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index ee604e92a5fa..116d1b3eeb15 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -244,7 +244,7 @@ static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations xpram_devops =
+static const struct block_device_operations xpram_devops =
 {
 	.owner	= THIS_MODULE,
 	.getgeo	= xpram_getgeo,
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 4cb9e70507ab..64f57ef2763c 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -50,7 +50,7 @@ static int tapeblock_ioctl(struct block_device *, fmode_t, unsigned int,
 static int tapeblock_medium_changed(struct gendisk *);
 static int tapeblock_revalidate_disk(struct gendisk *);
 
-static struct block_device_operations tapeblock_fops = {
+static const struct block_device_operations tapeblock_fops = {
 	.owner		 = THIS_MODULE,
 	.open		 = tapeblock_open,
 	.release	 = tapeblock_release,
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 6d4651684688..869a30b49edc 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -452,7 +452,7 @@ static const struct file_operations jsf_fops = {
 
 static struct miscdevice jsf_dev = { JSF_MINOR, "jsflash", &jsf_fops };
 
-static struct block_device_operations jsfd_fops = {
+static const struct block_device_operations jsfd_fops = {
 	.owner =	THIS_MODULE,
 };
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index a89c421dab51..8dd96dcd716c 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -956,7 +956,7 @@ static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode,
 }
 #endif
 
-static struct block_device_operations sd_fops = {
+static const struct block_device_operations sd_fops = {
 	.owner			= THIS_MODULE,
 	.open			= sd_open,
 	.release		= sd_release,
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index cce0fe4c8a3b..eb61f7a70e1d 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -525,7 +525,7 @@ static int sr_block_media_changed(struct gendisk *disk)
 	return cdrom_media_changed(&cd->cdi);
 }
 
-static struct block_device_operations sr_bdops =
+static const struct block_device_operations sr_bdops =
 {
 	.owner		= THIS_MODULE,
 	.open		= sr_block_open,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 71e7e03ac343..5d1ed50bd46c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1114,7 +1114,7 @@ EXPORT_SYMBOL(revalidate_disk);
 int check_disk_change(struct block_device *bdev)
 {
 	struct gendisk *disk = bdev->bd_disk;
-	struct block_device_operations * bdops = disk->fops;
+	const struct block_device_operations *bdops = disk->fops;
 
 	if (!bdops->media_changed)
 		return 0;
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index b72b85884223..c18fbf3e4068 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -20,7 +20,7 @@ __inode_direct_access(struct inode *inode, sector_t block,
 		      void **kaddr, unsigned long *pfn)
 {
 	struct block_device *bdev = inode->i_sb->s_bdev;
-	struct block_device_operations *ops = bdev->bd_disk->fops;
+	const struct block_device_operations *ops = bdev->bd_disk->fops;
 	sector_t sector;
 
 	sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index fbeaddf595d3..7b685e10cbad 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -581,7 +581,7 @@ try_scan:
 		}
 
 		if (from + size > get_capacity(disk)) {
-			struct block_device_operations *bdops = disk->fops;
+			const struct block_device_operations *bdops = disk->fops;
 			unsigned long long capacity;
 
 			printk(KERN_WARNING
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 109d179adb93..297df45ffd0a 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -151,7 +151,7 @@ struct gendisk {
 	struct disk_part_tbl *part_tbl;
 	struct hd_struct part0;
 
-	struct block_device_operations *fops;
+	const struct block_device_operations *fops;
 	struct request_queue *queue;
 	void *private_data;
 
-- 
cgit v1.2.3


From a5abeeacc44bbef2935a7a8e939264c28962def2 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:01:13 -0700
Subject: mm: make swap token dummies static inlines

Make use of the compiler's typechecking on !CONFIG_SWAP as well.

[akpm@linux-foundation.org: build fix]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7c15334f3ff2..6c990e658f4e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -419,10 +419,22 @@ static inline swp_entry_t get_swap_page(void)
 }
 
 /* linux/mm/thrash.c */
-#define put_swap_token(mm)	do { } while (0)
-#define grab_swap_token(mm)	do { } while (0)
-#define has_swap_token(mm)	0
-#define disable_swap_token()	do { } while (0)
+static inline void put_swap_token(struct mm_struct *mm)
+{
+}
+
+static inline void grab_swap_token(struct mm_struct *mm)
+{
+}
+
+static inline int has_swap_token(struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void disable_swap_token(void)
+{
+}
 
 static inline void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
-- 
cgit v1.2.3


From 112067f0905b2de862c607ee62411cf47d2fe5c4 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Mon, 21 Sep 2009 17:01:16 -0700
Subject: memory hotplug: update zone pcp at memory online

In my test, 128M memory is hot added, but zone's pcp batch is 0, which is
an obvious error.  When pages are onlined, zone pcp should be updated
accordingly.

[akpm@linux-foundation.org: fix warnings]
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Yakui Zhao <yakui.zhao@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |  2 ++
 mm/memory_hotplug.c |  1 +
 mm/page_alloc.c     | 26 ++++++++++++++++++++++++++
 3 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9a72cc78e6b8..d3c8ae7c8015 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1058,6 +1058,8 @@ extern void setup_per_cpu_pageset(void);
 static inline void setup_per_cpu_pageset(void) {}
 #endif
 
+extern void zone_pcp_update(struct zone *zone);
+
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e4412a676c88..616236e6343f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -422,6 +422,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 	zone->present_pages += onlined_pages;
 	zone->zone_pgdat->node_present_pages += onlined_pages;
 
+	zone_pcp_update(zone);
 	setup_per_zone_wmarks();
 	calculate_zone_inactive_ratio(zone);
 	if (onlined_pages) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f4e929e356db..1a3a893ef50e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3142,6 +3142,32 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 	return 0;
 }
 
+static int __zone_pcp_update(void *data)
+{
+	struct zone *zone = data;
+	int cpu;
+	unsigned long batch = zone_batchsize(zone), flags;
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		struct per_cpu_pageset *pset;
+		struct per_cpu_pages *pcp;
+
+		pset = zone_pcp(zone, cpu);
+		pcp = &pset->pcp;
+
+		local_irq_save(flags);
+		free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+		setup_pageset(pset, batch);
+		local_irq_restore(flags);
+	}
+	return 0;
+}
+
+void zone_pcp_update(struct zone *zone)
+{
+	stop_machine(__zone_pcp_update, zone, NULL);
+}
+
 static __meminit void zone_pcp_init(struct zone *zone)
 {
 	int cpu;
-- 
cgit v1.2.3


From e8c5c8249878fb6564125680a1d15e06adbd5639 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Mon, 21 Sep 2009 17:01:22 -0700
Subject: hugetlb: balance freeing of huge pages across nodes

Free huges pages from nodes in round robin fashion in an attempt to keep
[persistent a.k.a static] hugepages balanced across nodes

New function free_pool_huge_page() is modeled on and performs roughly the
inverse of alloc_fresh_huge_page().  Replaces dequeue_huge_page() which
now has no callers, so this patch removes it.

Helper function hstate_next_node_to_free() uses new hstate member
next_to_free_nid to distribute "frees" across all nodes with huge pages.

Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |   3 +-
 mm/hugetlb.c            | 132 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 88 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5cbc620bdfe0..16cdb75a543a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -185,7 +185,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 #define HSTATE_NAME_LEN 32
 /* Defines one hugetlb page size */
 struct hstate {
-	int hugetlb_next_nid;
+	int next_nid_to_alloc;
+	int next_nid_to_free;
 	unsigned int order;
 	unsigned long mask;
 	unsigned long max_huge_pages;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b16d63634777..38dab5586827 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -456,24 +456,6 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
 	h->free_huge_pages_node[nid]++;
 }
 
-static struct page *dequeue_huge_page(struct hstate *h)
-{
-	int nid;
-	struct page *page = NULL;
-
-	for (nid = 0; nid < MAX_NUMNODES; ++nid) {
-		if (!list_empty(&h->hugepage_freelists[nid])) {
-			page = list_entry(h->hugepage_freelists[nid].next,
-					  struct page, lru);
-			list_del(&page->lru);
-			h->free_huge_pages--;
-			h->free_huge_pages_node[nid]--;
-			break;
-		}
-	}
-	return page;
-}
-
 static struct page *dequeue_huge_page_vma(struct hstate *h,
 				struct vm_area_struct *vma,
 				unsigned long address, int avoid_reserve)
@@ -641,7 +623,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 
 /*
  * Use a helper variable to find the next node and then
- * copy it back to hugetlb_next_nid afterwards:
+ * copy it back to next_nid_to_alloc afterwards:
  * otherwise there's a window in which a racer might
  * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node.
  * But we don't need to use a spin_lock here: it really
@@ -650,13 +632,13 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
  * if we just successfully allocated a hugepage so that
  * the next caller gets hugepages on the next node.
  */
-static int hstate_next_node(struct hstate *h)
+static int hstate_next_node_to_alloc(struct hstate *h)
 {
 	int next_nid;
-	next_nid = next_node(h->hugetlb_next_nid, node_online_map);
+	next_nid = next_node(h->next_nid_to_alloc, node_online_map);
 	if (next_nid == MAX_NUMNODES)
 		next_nid = first_node(node_online_map);
-	h->hugetlb_next_nid = next_nid;
+	h->next_nid_to_alloc = next_nid;
 	return next_nid;
 }
 
@@ -667,14 +649,15 @@ static int alloc_fresh_huge_page(struct hstate *h)
 	int next_nid;
 	int ret = 0;
 
-	start_nid = h->hugetlb_next_nid;
+	start_nid = h->next_nid_to_alloc;
+	next_nid = start_nid;
 
 	do {
-		page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid);
+		page = alloc_fresh_huge_page_node(h, next_nid);
 		if (page)
 			ret = 1;
-		next_nid = hstate_next_node(h);
-	} while (!page && h->hugetlb_next_nid != start_nid);
+		next_nid = hstate_next_node_to_alloc(h);
+	} while (!page && next_nid != start_nid);
 
 	if (ret)
 		count_vm_event(HTLB_BUDDY_PGALLOC);
@@ -684,6 +667,52 @@ static int alloc_fresh_huge_page(struct hstate *h)
 	return ret;
 }
 
+/*
+ * helper for free_pool_huge_page() - find next node
+ * from which to free a huge page
+ */
+static int hstate_next_node_to_free(struct hstate *h)
+{
+	int next_nid;
+	next_nid = next_node(h->next_nid_to_free, node_online_map);
+	if (next_nid == MAX_NUMNODES)
+		next_nid = first_node(node_online_map);
+	h->next_nid_to_free = next_nid;
+	return next_nid;
+}
+
+/*
+ * Free huge page from pool from next node to free.
+ * Attempt to keep persistent huge pages more or less
+ * balanced over allowed nodes.
+ * Called with hugetlb_lock locked.
+ */
+static int free_pool_huge_page(struct hstate *h)
+{
+	int start_nid;
+	int next_nid;
+	int ret = 0;
+
+	start_nid = h->next_nid_to_free;
+	next_nid = start_nid;
+
+	do {
+		if (!list_empty(&h->hugepage_freelists[next_nid])) {
+			struct page *page =
+				list_entry(h->hugepage_freelists[next_nid].next,
+					  struct page, lru);
+			list_del(&page->lru);
+			h->free_huge_pages--;
+			h->free_huge_pages_node[next_nid]--;
+			update_and_free_page(h, page);
+			ret = 1;
+		}
+		next_nid = hstate_next_node_to_free(h);
+	} while (!ret && next_nid != start_nid);
+
+	return ret;
+}
+
 static struct page *alloc_buddy_huge_page(struct hstate *h,
 			struct vm_area_struct *vma, unsigned long address)
 {
@@ -1008,7 +1037,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
 		void *addr;
 
 		addr = __alloc_bootmem_node_nopanic(
-				NODE_DATA(h->hugetlb_next_nid),
+				NODE_DATA(h->next_nid_to_alloc),
 				huge_page_size(h), huge_page_size(h), 0);
 
 		if (addr) {
@@ -1020,7 +1049,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
 			m = addr;
 			goto found;
 		}
-		hstate_next_node(h);
+		hstate_next_node_to_alloc(h);
 		nr_nodes--;
 	}
 	return 0;
@@ -1141,31 +1170,43 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
  */
 static int adjust_pool_surplus(struct hstate *h, int delta)
 {
-	static int prev_nid;
-	int nid = prev_nid;
+	int start_nid, next_nid;
 	int ret = 0;
 
 	VM_BUG_ON(delta != -1 && delta != 1);
-	do {
-		nid = next_node(nid, node_online_map);
-		if (nid == MAX_NUMNODES)
-			nid = first_node(node_online_map);
 
-		/* To shrink on this node, there must be a surplus page */
-		if (delta < 0 && !h->surplus_huge_pages_node[nid])
-			continue;
-		/* Surplus cannot exceed the total number of pages */
-		if (delta > 0 && h->surplus_huge_pages_node[nid] >=
+	if (delta < 0)
+		start_nid = h->next_nid_to_alloc;
+	else
+		start_nid = h->next_nid_to_free;
+	next_nid = start_nid;
+
+	do {
+		int nid = next_nid;
+		if (delta < 0)  {
+			next_nid = hstate_next_node_to_alloc(h);
+			/*
+			 * To shrink on this node, there must be a surplus page
+			 */
+			if (!h->surplus_huge_pages_node[nid])
+				continue;
+		}
+		if (delta > 0) {
+			next_nid = hstate_next_node_to_free(h);
+			/*
+			 * Surplus cannot exceed the total number of pages
+			 */
+			if (h->surplus_huge_pages_node[nid] >=
 						h->nr_huge_pages_node[nid])
-			continue;
+				continue;
+		}
 
 		h->surplus_huge_pages += delta;
 		h->surplus_huge_pages_node[nid] += delta;
 		ret = 1;
 		break;
-	} while (nid != prev_nid);
+	} while (next_nid != start_nid);
 
-	prev_nid = nid;
 	return ret;
 }
 
@@ -1227,10 +1268,8 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
 	min_count = max(count, min_count);
 	try_to_free_low(h, min_count);
 	while (min_count < persistent_huge_pages(h)) {
-		struct page *page = dequeue_huge_page(h);
-		if (!page)
+		if (!free_pool_huge_page(h))
 			break;
-		update_and_free_page(h, page);
 	}
 	while (count < persistent_huge_pages(h)) {
 		if (!adjust_pool_surplus(h, 1))
@@ -1442,7 +1481,8 @@ void __init hugetlb_add_hstate(unsigned order)
 	h->free_huge_pages = 0;
 	for (i = 0; i < MAX_NUMNODES; ++i)
 		INIT_LIST_HEAD(&h->hugepage_freelists[i]);
-	h->hugetlb_next_nid = first_node(node_online_map);
+	h->next_nid_to_alloc = first_node(node_online_map);
+	h->next_nid_to_free = first_node(node_online_map);
 	snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
 					huge_page_size(h)/1024);
 
-- 
cgit v1.2.3


From c6a7f5728a1db45d30df55a01adc130b4ab0327c Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:32 -0700
Subject: mm: oom analysis: Show kernel stack usage in /proc/meminfo and OOM
 log output

The amount of memory allocated to kernel stacks can become significant and
cause OOM conditions.  However, we do not display the amount of memory
consumed by stacks.

Add code to display the amount of memory used for stacks in /proc/meminfo.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c    |  3 +++
 fs/proc/meminfo.c      |  2 ++
 include/linux/mmzone.h |  3 ++-
 kernel/fork.c          | 11 +++++++++++
 mm/page_alloc.c        |  3 +++
 mm/vmstat.c            |  1 +
 6 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 91d4087b4039..b560c17f6d4e 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -85,6 +85,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       "Node %d FilePages:      %8lu kB\n"
 		       "Node %d Mapped:         %8lu kB\n"
 		       "Node %d AnonPages:      %8lu kB\n"
+		       "Node %d KernelStack:    %8lu kB\n"
 		       "Node %d PageTables:     %8lu kB\n"
 		       "Node %d NFS_Unstable:   %8lu kB\n"
 		       "Node %d Bounce:         %8lu kB\n"
@@ -116,6 +117,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       nid, K(node_page_state(nid, NR_FILE_PAGES)),
 		       nid, K(node_page_state(nid, NR_FILE_MAPPED)),
 		       nid, K(node_page_state(nid, NR_ANON_PAGES)),
+		       nid, node_page_state(nid, NR_KERNEL_STACK) *
+				THREAD_SIZE / 1024,
 		       nid, K(node_page_state(nid, NR_PAGETABLE)),
 		       nid, K(node_page_state(nid, NR_UNSTABLE_NFS)),
 		       nid, K(node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index d5c410d47fae..1fc588f430e4 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -84,6 +84,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		"Slab:           %8lu kB\n"
 		"SReclaimable:   %8lu kB\n"
 		"SUnreclaim:     %8lu kB\n"
+		"KernelStack:    %8lu kB\n"
 		"PageTables:     %8lu kB\n"
 #ifdef CONFIG_QUICKLIST
 		"Quicklists:     %8lu kB\n"
@@ -128,6 +129,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 				global_page_state(NR_SLAB_UNRECLAIMABLE)),
 		K(global_page_state(NR_SLAB_RECLAIMABLE)),
 		K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
+		global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024,
 		K(global_page_state(NR_PAGETABLE)),
 #ifdef CONFIG_QUICKLIST
 		K(quicklist_total_size()),
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 889598537370..d9335b8de84a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -94,10 +94,11 @@ enum zone_stat_item {
 	NR_SLAB_RECLAIMABLE,
 	NR_SLAB_UNRECLAIMABLE,
 	NR_PAGETABLE,		/* used for pagetables */
+	NR_KERNEL_STACK,
+	/* Second 128 byte cacheline */
 	NR_UNSTABLE_NFS,	/* NFS unstable pages */
 	NR_BOUNCE,
 	NR_VMSCAN_WRITE,
-	/* Second 128 byte cacheline */
 	NR_WRITEBACK_TEMP,	/* Writeback using temporary buffers */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
diff --git a/kernel/fork.c b/kernel/fork.c
index 2cebfb23b0b8..d4638c8cc19e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -136,9 +136,17 @@ struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
+static void account_kernel_stack(struct thread_info *ti, int account)
+{
+	struct zone *zone = page_zone(virt_to_page(ti));
+
+	mod_zone_page_state(zone, NR_KERNEL_STACK, account);
+}
+
 void free_task(struct task_struct *tsk)
 {
 	prop_local_destroy_single(&tsk->dirties);
+	account_kernel_stack(tsk->stack, -1);
 	free_thread_info(tsk->stack);
 	rt_mutex_debug_task_free(tsk);
 	ftrace_graph_exit_task(tsk);
@@ -253,6 +261,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	tsk->btrace_seq = 0;
 #endif
 	tsk->splice_pipe = NULL;
+
+	account_kernel_stack(ti, 1);
+
 	return tsk;
 
 out:
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 494c09196c30..4e050f325ebd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2177,6 +2177,7 @@ void show_free_areas(void)
 			" mapped:%lukB"
 			" slab_reclaimable:%lukB"
 			" slab_unreclaimable:%lukB"
+			" kernel_stack:%lukB"
 			" pagetables:%lukB"
 			" unstable:%lukB"
 			" bounce:%lukB"
@@ -2201,6 +2202,8 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_FILE_MAPPED)),
 			K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
 			K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
+			zone_page_state(zone, NR_KERNEL_STACK) *
+				THREAD_SIZE / 1024,
 			K(zone_page_state(zone, NR_PAGETABLE)),
 			K(zone_page_state(zone, NR_UNSTABLE_NFS)),
 			K(zone_page_state(zone, NR_BOUNCE)),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 138bed53706e..ceda39b63d7e 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -639,6 +639,7 @@ static const char * const vmstat_text[] = {
 	"nr_slab_reclaimable",
 	"nr_slab_unreclaimable",
 	"nr_page_table_pages",
+	"nr_kernel_stack",
 	"nr_unstable",
 	"nr_bounce",
 	"nr_vmscan_write",
-- 
cgit v1.2.3


From 4b02108ac1b3354a22b0d83c684797692efdc395 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:33 -0700
Subject: mm: oom analysis: add shmem vmstat

Recently we encountered OOM problems due to memory use of the GEM cache.
Generally a large amuont of Shmem/Tmpfs pages tend to create a memory
shortage problem.

We often use the following calculation to determine the amount of shmem
pages:

shmem = NR_ACTIVE_ANON + NR_INACTIVE_ANON - NR_ANON_PAGES

however the expression does not consider isolated and mlocked pages.

This patch adds explicit accounting for pages used by shmem and tmpfs.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c    | 2 ++
 fs/proc/meminfo.c      | 2 ++
 include/linux/mmzone.h | 1 +
 mm/filemap.c           | 4 ++++
 mm/migrate.c           | 5 ++++-
 mm/page_alloc.c        | 5 ++++-
 mm/vmstat.c            | 2 +-
 7 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index b560c17f6d4e..1fe5536d404f 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -85,6 +85,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       "Node %d FilePages:      %8lu kB\n"
 		       "Node %d Mapped:         %8lu kB\n"
 		       "Node %d AnonPages:      %8lu kB\n"
+		       "Node %d Shmem:          %8lu kB\n"
 		       "Node %d KernelStack:    %8lu kB\n"
 		       "Node %d PageTables:     %8lu kB\n"
 		       "Node %d NFS_Unstable:   %8lu kB\n"
@@ -117,6 +118,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       nid, K(node_page_state(nid, NR_FILE_PAGES)),
 		       nid, K(node_page_state(nid, NR_FILE_MAPPED)),
 		       nid, K(node_page_state(nid, NR_ANON_PAGES)),
+		       nid, K(node_page_state(nid, NR_SHMEM)),
 		       nid, node_page_state(nid, NR_KERNEL_STACK) *
 				THREAD_SIZE / 1024,
 		       nid, K(node_page_state(nid, NR_PAGETABLE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 1fc588f430e4..171e052c07b3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -81,6 +81,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		"Writeback:      %8lu kB\n"
 		"AnonPages:      %8lu kB\n"
 		"Mapped:         %8lu kB\n"
+		"Shmem:          %8lu kB\n"
 		"Slab:           %8lu kB\n"
 		"SReclaimable:   %8lu kB\n"
 		"SUnreclaim:     %8lu kB\n"
@@ -125,6 +126,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		K(global_page_state(NR_WRITEBACK)),
 		K(global_page_state(NR_ANON_PAGES)),
 		K(global_page_state(NR_FILE_MAPPED)),
+		K(global_page_state(NR_SHMEM)),
 		K(global_page_state(NR_SLAB_RECLAIMABLE) +
 				global_page_state(NR_SLAB_UNRECLAIMABLE)),
 		K(global_page_state(NR_SLAB_RECLAIMABLE)),
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d9335b8de84a..b3583b93b77e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -100,6 +100,7 @@ enum zone_stat_item {
 	NR_BOUNCE,
 	NR_VMSCAN_WRITE,
 	NR_WRITEBACK_TEMP,	/* Writeback using temporary buffers */
+	NR_SHMEM,		/* shmem pages (included tmpfs/GEM pages) */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
 	NUMA_MISS,		/* allocated in non intended node */
diff --git a/mm/filemap.c b/mm/filemap.c
index dd51c68e2b86..bcc7372aebbc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -119,6 +119,8 @@ void __remove_from_page_cache(struct page *page)
 	page->mapping = NULL;
 	mapping->nrpages--;
 	__dec_zone_page_state(page, NR_FILE_PAGES);
+	if (PageSwapBacked(page))
+		__dec_zone_page_state(page, NR_SHMEM);
 	BUG_ON(page_mapped(page));
 
 	/*
@@ -431,6 +433,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		if (likely(!error)) {
 			mapping->nrpages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
+			if (PageSwapBacked(page))
+				__inc_zone_page_state(page, NR_SHMEM);
 			spin_unlock_irq(&mapping->tree_lock);
 		} else {
 			page->mapping = NULL;
diff --git a/mm/migrate.c b/mm/migrate.c
index 0edeac91348d..37143b924484 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -312,7 +312,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	 */
 	__dec_zone_page_state(page, NR_FILE_PAGES);
 	__inc_zone_page_state(newpage, NR_FILE_PAGES);
-
+	if (PageSwapBacked(page)) {
+		__dec_zone_page_state(page, NR_SHMEM);
+		__inc_zone_page_state(newpage, NR_SHMEM);
+	}
 	spin_unlock_irq(&mapping->tree_lock);
 
 	return 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4e050f325ebd..e50c22545b8f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2139,7 +2139,7 @@ void show_free_areas(void)
 		" unevictable:%lu"
 		" dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n"
 		" free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
-		" mapped:%lu pagetables:%lu bounce:%lu\n",
+		" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
 		global_page_state(NR_ACTIVE_ANON),
 		global_page_state(NR_ACTIVE_FILE),
 		global_page_state(NR_INACTIVE_ANON),
@@ -2153,6 +2153,7 @@ void show_free_areas(void)
 		global_page_state(NR_SLAB_RECLAIMABLE),
 		global_page_state(NR_SLAB_UNRECLAIMABLE),
 		global_page_state(NR_FILE_MAPPED),
+		global_page_state(NR_SHMEM),
 		global_page_state(NR_PAGETABLE),
 		global_page_state(NR_BOUNCE));
 
@@ -2175,6 +2176,7 @@ void show_free_areas(void)
 			" dirty:%lukB"
 			" writeback:%lukB"
 			" mapped:%lukB"
+			" shmem:%lukB"
 			" slab_reclaimable:%lukB"
 			" slab_unreclaimable:%lukB"
 			" kernel_stack:%lukB"
@@ -2200,6 +2202,7 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_FILE_DIRTY)),
 			K(zone_page_state(zone, NR_WRITEBACK)),
 			K(zone_page_state(zone, NR_FILE_MAPPED)),
+			K(zone_page_state(zone, NR_SHMEM)),
 			K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
 			K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
 			zone_page_state(zone, NR_KERNEL_STACK) *
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ceda39b63d7e..7214a4511257 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -644,7 +644,7 @@ static const char * const vmstat_text[] = {
 	"nr_bounce",
 	"nr_vmscan_write",
 	"nr_writeback_temp",
-
+	"nr_shmem",
 #ifdef CONFIG_NUMA
 	"numa_hit",
 	"numa_miss",
-- 
cgit v1.2.3


From a731286de62294b63d8ceb3c5914ac52cc17e690 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:37 -0700
Subject: mm: vmstat: add isolate pages

If the system is running a heavy load of processes then concurrent reclaim
can isolate a large number of pages from the LRU. /proc/vmstat and the
output generated for an OOM do not show how many pages were isolated.

This has been observed during process fork bomb testing (mstctl11 in LTP).

This patch shows the information about isolated pages.

Reproduced via:

-----------------------
% ./hackbench 140 process 1000
   => OOM occur

active_anon:146 inactive_anon:0 isolated_anon:49245
 active_file:79 inactive_file:18 isolated_file:113
 unevictable:0 dirty:0 writeback:0 unstable:0 buffer:39
 free:370 slab_reclaimable:309 slab_unreclaimable:5492
 mapped:53 shmem:15 pagetables:28140 bounce:0

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  2 ++
 mm/migrate.c           | 11 +++++++++++
 mm/page_alloc.c        | 12 +++++++++---
 mm/vmscan.c            | 12 +++++++++++-
 mm/vmstat.c            |  2 ++
 5 files changed, 35 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b3583b93b77e..9c50309b30a1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -100,6 +100,8 @@ enum zone_stat_item {
 	NR_BOUNCE,
 	NR_VMSCAN_WRITE,
 	NR_WRITEBACK_TEMP,	/* Writeback using temporary buffers */
+	NR_ISOLATED_ANON,	/* Temporary isolated pages from anon lru */
+	NR_ISOLATED_FILE,	/* Temporary isolated pages from file lru */
 	NR_SHMEM,		/* shmem pages (included tmpfs/GEM pages) */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
diff --git a/mm/migrate.c b/mm/migrate.c
index 37143b924484..b535a2c1656c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -67,6 +67,8 @@ int putback_lru_pages(struct list_head *l)
 
 	list_for_each_entry_safe(page, page2, l, lru) {
 		list_del(&page->lru);
+		dec_zone_page_state(page, NR_ISOLATED_ANON +
+				    !!page_is_file_cache(page));
 		putback_lru_page(page);
 		count++;
 	}
@@ -698,6 +700,8 @@ unlock:
  		 * restored.
  		 */
  		list_del(&page->lru);
+		dec_zone_page_state(page, NR_ISOLATED_ANON +
+				    !!page_is_file_cache(page));
 		putback_lru_page(page);
 	}
 
@@ -742,6 +746,13 @@ int migrate_pages(struct list_head *from,
 	struct page *page2;
 	int swapwrite = current->flags & PF_SWAPWRITE;
 	int rc;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	list_for_each_entry(page, from, lru)
+		__inc_zone_page_state(page, NR_ISOLATED_ANON +
+				      !!page_is_file_cache(page));
+	local_irq_restore(flags);
 
 	if (!swapwrite)
 		current->flags |= PF_SWAPWRITE;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b6d0d09557ef..afda8fd16484 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2134,16 +2134,18 @@ void show_free_areas(void)
 		}
 	}
 
-	printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
-		" inactive_file:%lu"
+	printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
+		" active_file:%lu inactive_file:%lu isolated_file:%lu\n"
 		" unevictable:%lu"
 		" dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n"
 		" free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
 		" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
 		global_page_state(NR_ACTIVE_ANON),
-		global_page_state(NR_ACTIVE_FILE),
 		global_page_state(NR_INACTIVE_ANON),
+		global_page_state(NR_ISOLATED_ANON),
+		global_page_state(NR_ACTIVE_FILE),
 		global_page_state(NR_INACTIVE_FILE),
+		global_page_state(NR_ISOLATED_FILE),
 		global_page_state(NR_UNEVICTABLE),
 		global_page_state(NR_FILE_DIRTY),
 		global_page_state(NR_WRITEBACK),
@@ -2171,6 +2173,8 @@ void show_free_areas(void)
 			" active_file:%lukB"
 			" inactive_file:%lukB"
 			" unevictable:%lukB"
+			" isolated(anon):%lukB"
+			" isolated(file):%lukB"
 			" present:%lukB"
 			" mlocked:%lukB"
 			" dirty:%lukB"
@@ -2197,6 +2201,8 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_ACTIVE_FILE)),
 			K(zone_page_state(zone, NR_INACTIVE_FILE)),
 			K(zone_page_state(zone, NR_UNEVICTABLE)),
+			K(zone_page_state(zone, NR_ISOLATED_ANON)),
+			K(zone_page_state(zone, NR_ISOLATED_FILE)),
 			K(zone->present_pages),
 			K(zone_page_state(zone, NR_MLOCK)),
 			K(zone_page_state(zone, NR_FILE_DIRTY)),
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d86a91f8c16b..75c29974e878 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1072,6 +1072,8 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		unsigned long nr_active;
 		unsigned int count[NR_LRU_LISTS] = { 0, };
 		int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE;
+		unsigned long nr_anon;
+		unsigned long nr_file;
 
 		nr_taken = sc->isolate_pages(sc->swap_cluster_max,
 			     &page_list, &nr_scan, sc->order, mode,
@@ -1102,6 +1104,10 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		__mod_zone_page_state(zone, NR_INACTIVE_ANON,
 						-count[LRU_INACTIVE_ANON]);
 
+		nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
+		nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
+		__mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
+		__mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
 
 		reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];
 		reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];
@@ -1169,6 +1175,9 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 				spin_lock_irq(&zone->lru_lock);
 			}
 		}
+		__mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
+		__mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
+
   	} while (nr_scanned < max_scan);
 
 done:
@@ -1279,6 +1288,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		__mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
 	else
 		__mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
+	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
 
 	while (!list_empty(&l_hold)) {
@@ -1329,7 +1339,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 						LRU_ACTIVE + file * LRU_FILE);
 	move_active_pages_to_lru(zone, &l_inactive,
 						LRU_BASE   + file * LRU_FILE);
-
+	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
 }
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 7214a4511257..c81321f9feec 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -644,6 +644,8 @@ static const char * const vmstat_text[] = {
 	"nr_bounce",
 	"nr_vmscan_write",
 	"nr_writeback_temp",
+	"nr_isolated_anon",
+	"nr_isolated_file",
 	"nr_shmem",
 #ifdef CONFIG_NUMA
 	"numa_hit",
-- 
cgit v1.2.3


From 5a2ae913f5229d6e1d4a666f0477350789d5128e Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:39 -0700
Subject: mm: remove __{add,sub}_zone_page_state()

__add_zone_page_state() and __sub_zone_page_state() are unused.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 81a97cf8f0a0..d7f577f49d16 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -210,11 +210,6 @@ extern void zone_statistics(struct zone *, struct zone *);
 
 #endif /* CONFIG_NUMA */
 
-#define __add_zone_page_state(__z, __i, __d)	\
-		__mod_zone_page_state(__z, __i, __d)
-#define __sub_zone_page_state(__z, __i, __d)	\
-		__mod_zone_page_state(__z, __i,-(__d))
-
 #define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d)
 #define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d))
 
-- 
cgit v1.2.3


From adea02a1bea71a508da32c04d715485a1fe62029 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Mon, 21 Sep 2009 17:01:42 -0700
Subject: mm: count only reclaimable lru pages

global_lru_pages() / zone_lru_pages() can be used in two ways:
- to estimate max reclaimable pages in determine_dirtyable_memory()
- to calculate the slab scan ratio

When swap is full or not present, the anon lru lists are not reclaimable
and also won't be scanned.  So the anon pages shall not be counted in both
usage scenarios.  Also rename to _reclaimable_pages: now they are counting
the possibly reclaimable lru pages.

It can greatly (and correctly) increase the slab scan rate under high
memory pressure (when most file pages have been reclaimed and swap is
full/absent), thus reduce false OOM kills.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: David Howells <dhowells@redhat.com>
Cc: "Li, Ming Chun" <macli@brc.ubc.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 11 ++---------
 mm/page-writeback.c    |  5 +++--
 mm/vmscan.c            | 50 +++++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 44 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d7f577f49d16..2d0f222388a8 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -166,15 +166,8 @@ static inline unsigned long zone_page_state(struct zone *zone,
 	return x;
 }
 
-extern unsigned long global_lru_pages(void);
-
-static inline unsigned long zone_lru_pages(struct zone *zone)
-{
-	return (zone_page_state(zone, NR_ACTIVE_ANON)
-		+ zone_page_state(zone, NR_ACTIVE_FILE)
-		+ zone_page_state(zone, NR_INACTIVE_ANON)
-		+ zone_page_state(zone, NR_INACTIVE_FILE));
-}
+extern unsigned long global_reclaimable_pages(void);
+extern unsigned long zone_reclaimable_pages(struct zone *zone);
 
 #ifdef CONFIG_NUMA
 /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d1ba46441053..5f378dd58802 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -380,7 +380,8 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
 		struct zone *z =
 			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
 
-		x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
+		x += zone_page_state(z, NR_FREE_PAGES) +
+		     zone_reclaimable_pages(z);
 	}
 	/*
 	 * Make sure that the number of highmem pages is never larger
@@ -404,7 +405,7 @@ unsigned long determine_dirtyable_memory(void)
 {
 	unsigned long x;
 
-	x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
+	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
 
 	if (!vm_highmem_is_dirtyable)
 		x -= highmem_dirtyable_memory(x);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f90b76086ffa..208071c48bf2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1734,7 +1734,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 				continue;
 
-			lru_pages += zone_lru_pages(zone);
+			lru_pages += zone_reclaimable_pages(zone);
 		}
 	}
 
@@ -1951,7 +1951,7 @@ loop_again:
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 
-			lru_pages += zone_lru_pages(zone);
+			lru_pages += zone_reclaimable_pages(zone);
 		}
 
 		/*
@@ -1995,7 +1995,7 @@ loop_again:
 			if (zone_is_all_unreclaimable(zone))
 				continue;
 			if (nr_slab == 0 && zone->pages_scanned >=
-						(zone_lru_pages(zone) * 6))
+					(zone_reclaimable_pages(zone) * 6))
 					zone_set_flag(zone,
 						      ZONE_ALL_UNRECLAIMABLE);
 			/*
@@ -2162,12 +2162,39 @@ void wakeup_kswapd(struct zone *zone, int order)
 	wake_up_interruptible(&pgdat->kswapd_wait);
 }
 
-unsigned long global_lru_pages(void)
+/*
+ * The reclaimable count would be mostly accurate.
+ * The less reclaimable pages may be
+ * - mlocked pages, which will be moved to unevictable list when encountered
+ * - mapped pages, which may require several travels to be reclaimed
+ * - dirty pages, which is not "instantly" reclaimable
+ */
+unsigned long global_reclaimable_pages(void)
 {
-	return global_page_state(NR_ACTIVE_ANON)
-		+ global_page_state(NR_ACTIVE_FILE)
-		+ global_page_state(NR_INACTIVE_ANON)
-		+ global_page_state(NR_INACTIVE_FILE);
+	int nr;
+
+	nr = global_page_state(NR_ACTIVE_FILE) +
+	     global_page_state(NR_INACTIVE_FILE);
+
+	if (nr_swap_pages > 0)
+		nr += global_page_state(NR_ACTIVE_ANON) +
+		      global_page_state(NR_INACTIVE_ANON);
+
+	return nr;
+}
+
+unsigned long zone_reclaimable_pages(struct zone *zone)
+{
+	int nr;
+
+	nr = zone_page_state(zone, NR_ACTIVE_FILE) +
+	     zone_page_state(zone, NR_INACTIVE_FILE);
+
+	if (nr_swap_pages > 0)
+		nr += zone_page_state(zone, NR_ACTIVE_ANON) +
+		      zone_page_state(zone, NR_INACTIVE_ANON);
+
+	return nr;
 }
 
 #ifdef CONFIG_HIBERNATION
@@ -2239,7 +2266,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
 	current->reclaim_state = &reclaim_state;
 
-	lru_pages = global_lru_pages();
+	lru_pages = global_reclaimable_pages();
 	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
 	/* If slab caches are huge, it's better to hit them first */
 	while (nr_slab >= lru_pages) {
@@ -2281,7 +2308,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
 			reclaim_state.reclaimed_slab = 0;
 			shrink_slab(sc.nr_scanned, sc.gfp_mask,
-					global_lru_pages());
+				    global_reclaimable_pages());
 			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 			if (sc.nr_reclaimed >= nr_pages)
 				goto out;
@@ -2298,7 +2325,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 	if (!sc.nr_reclaimed) {
 		do {
 			reclaim_state.reclaimed_slab = 0;
-			shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
+			shrink_slab(nr_pages, sc.gfp_mask,
+				    global_reclaimable_pages());
 			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 		} while (sc.nr_reclaimed < nr_pages &&
 				reclaim_state.reclaimed_slab > 0);
-- 
cgit v1.2.3


From 451ea25da71590361c71bf3044c55b870a887d53 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:01:48 -0700
Subject: mm: perform non-atomic test-clear of PG_mlocked on free

By the time PG_mlocked is cleared in the page freeing path, nobody else is
looking at our page->flags anymore.

It is thus safe to make the test-and-clear non-atomic and thereby removing
an unnecessary and expensive operation from a hotpath.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 12 +++++++++---
 mm/page_alloc.c            |  4 ++--
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 2b87acfc5f87..d07c0bb2203a 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -158,6 +158,9 @@ static inline int TestSetPage##uname(struct page *page)			\
 static inline int TestClearPage##uname(struct page *page)		\
 		{ return test_and_clear_bit(PG_##lname, &page->flags); }
 
+#define __TESTCLEARFLAG(uname, lname)					\
+static inline int __TestClearPage##uname(struct page *page)		\
+		{ return __test_and_clear_bit(PG_##lname, &page->flags); }
 
 #define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)		\
 	SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname)
@@ -184,6 +187,9 @@ static inline void __ClearPage##uname(struct page *page) {  }
 #define TESTCLEARFLAG_FALSE(uname)					\
 static inline int TestClearPage##uname(struct page *page) { return 0; }
 
+#define __TESTCLEARFLAG_FALSE(uname)					\
+static inline int __TestClearPage##uname(struct page *page) { return 0; }
+
 struct page;	/* forward declaration */
 
 TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked)
@@ -250,11 +256,11 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
 #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
 #define MLOCK_PAGES 1
 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
-	TESTSCFLAG(Mlocked, mlocked)
+	TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
 #else
 #define MLOCK_PAGES 0
-PAGEFLAG_FALSE(Mlocked)
-	SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
+	TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
 #endif
 
 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 81926c7ef6f0..9242d13f4ff3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -557,7 +557,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 	unsigned long flags;
 	int i;
 	int bad = 0;
-	int wasMlocked = TestClearPageMlocked(page);
+	int wasMlocked = __TestClearPageMlocked(page);
 
 	kmemcheck_free_shadow(page, order);
 
@@ -1026,7 +1026,7 @@ static void free_hot_cold_page(struct page *page, int cold)
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
-	int wasMlocked = TestClearPageMlocked(page);
+	int wasMlocked = __TestClearPageMlocked(page);
 
 	kmemcheck_free_shadow(page, 0);
 
-- 
cgit v1.2.3


From 828502d30073036a486d96b1fe051e0f08b6df83 Mon Sep 17 00:00:00 2001
From: Izik Eidus <ieidus@redhat.com>
Date: Mon, 21 Sep 2009 17:01:51 -0700
Subject: ksm: add mmu_notifier set_pte_at_notify()

KSM is a linux driver that allows dynamicly sharing identical memory pages
between one or more processes.

Unlike tradtional page sharing that is made at the allocation of the
memory, ksm do it dynamicly after the memory was created.  Memory is
periodically scanned; identical pages are identified and merged.

The sharing is made in a transparent way to the processes that use it.

Ksm is highly important for hypervisors (kvm), where in production
enviorments there might be many copys of the same data data among the host
memory.  This kind of data can be: similar kernels, librarys, cache, and
so on.

Even that ksm was wrote for kvm, any userspace application that want to
use it to share its data can try it.

Ksm may be useful for any application that might have similar (page
aligment) data strctures among the memory, ksm will find this data merge
it to one copy, and even if it will be changed and thereforew copy on
writed, ksm will merge it again as soon as it will be identical again.

Another reason to consider using ksm is the fact that it might simplify
alot the userspace code of application that want to use shared private
data, instead that the application will mange shared area, ksm will do
this for the application, and even write to this data will be allowed
without any synchinization acts from the application.

Ksm was designed to be a loadable module that doesn't change the VM code
of linux.

This patch:

The set_pte_at_notify() macro allows setting a pte in the shadow page
table directly, instead of flushing the shadow page table entry and then
getting vmexit to set it.  It uses a new change_pte() callback to do so.

set_pte_at_notify() is an optimization for kvm, and other users of
mmu_notifiers, for COW pages.  It is useful for kvm when ksm is used,
because it allows kvm not to have to receive vmexit and only then map the
ksm page into the shadow page table, but instead map it directly at the
same time as Linux maps the page into the host page table.

Users of mmu_notifiers who don't implement new mmu_notifier_change_pte()
callback will just receive the mmu_notifier_invalidate_page() callback.

Signed-off-by: Izik Eidus <ieidus@redhat.com>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 34 ++++++++++++++++++++++++++++++++++
 mm/memory.c                  |  9 +++++++--
 mm/mmu_notifier.c            | 20 ++++++++++++++++++++
 3 files changed, 61 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index b77486d152cd..4e02ee2b071e 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -61,6 +61,15 @@ struct mmu_notifier_ops {
 				 struct mm_struct *mm,
 				 unsigned long address);
 
+	/*
+	 * change_pte is called in cases that pte mapping to page is changed:
+	 * for example, when ksm remaps pte to point to a new shared page.
+	 */
+	void (*change_pte)(struct mmu_notifier *mn,
+			   struct mm_struct *mm,
+			   unsigned long address,
+			   pte_t pte);
+
 	/*
 	 * Before this is invoked any secondary MMU is still ok to
 	 * read/write to the page previously pointed to by the Linux
@@ -154,6 +163,8 @@ extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 					  unsigned long address);
+extern void __mmu_notifier_change_pte(struct mm_struct *mm,
+				      unsigned long address, pte_t pte);
 extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address);
 extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
@@ -175,6 +186,13 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	return 0;
 }
 
+static inline void mmu_notifier_change_pte(struct mm_struct *mm,
+					   unsigned long address, pte_t pte)
+{
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_change_pte(mm, address, pte);
+}
+
 static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address)
 {
@@ -236,6 +254,16 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	__young;							\
 })
 
+#define set_pte_at_notify(__mm, __address, __ptep, __pte)		\
+({									\
+	struct mm_struct *___mm = __mm;					\
+	unsigned long ___address = __address;				\
+	pte_t ___pte = __pte;						\
+									\
+	set_pte_at(___mm, ___address, __ptep, ___pte);			\
+	mmu_notifier_change_pte(___mm, ___address, ___pte);		\
+})
+
 #else /* CONFIG_MMU_NOTIFIER */
 
 static inline void mmu_notifier_release(struct mm_struct *mm)
@@ -248,6 +276,11 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	return 0;
 }
 
+static inline void mmu_notifier_change_pte(struct mm_struct *mm,
+					   unsigned long address, pte_t pte)
+{
+}
+
 static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address)
 {
@@ -273,6 +306,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 
 #define ptep_clear_flush_young_notify ptep_clear_flush_young
 #define ptep_clear_flush_notify ptep_clear_flush
+#define set_pte_at_notify set_pte_at
 
 #endif /* CONFIG_MMU_NOTIFIER */
 
diff --git a/mm/memory.c b/mm/memory.c
index e8f63d9961ea..368561f32009 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2115,9 +2115,14 @@ gotten:
 		 * seen in the presence of one thread doing SMC and another
 		 * thread doing COW.
 		 */
-		ptep_clear_flush_notify(vma, address, page_table);
+		ptep_clear_flush(vma, address, page_table);
 		page_add_new_anon_rmap(new_page, vma, address);
-		set_pte_at(mm, address, page_table, entry);
+		/*
+		 * We call the notify macro here because, when using secondary
+		 * mmu page tables (such as kvm shadow page tables), we want the
+		 * new page to be mapped directly into the secondary page table.
+		 */
+		set_pte_at_notify(mm, address, page_table, entry);
 		update_mmu_cache(vma, address, entry);
 		if (old_page) {
 			/*
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 5f4ef0250bee..7e33f2cb3c77 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -99,6 +99,26 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	return young;
 }
 
+void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
+			       pte_t pte)
+{
+	struct mmu_notifier *mn;
+	struct hlist_node *n;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+		if (mn->ops->change_pte)
+			mn->ops->change_pte(mn, mm, address, pte);
+		/*
+		 * Some drivers don't have change_pte,
+		 * so we must call invalidate_page in that case.
+		 */
+		else if (mn->ops->invalidate_page)
+			mn->ops->invalidate_page(mn, mm, address);
+	}
+	rcu_read_unlock();
+}
+
 void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address)
 {
-- 
cgit v1.2.3


From d19f352484467a5e518639ddff0554669c10ffab Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:01:53 -0700
Subject: ksm: define MADV_MERGEABLE and MADV_UNMERGEABLE

The out-of-tree KSM used ioctls on fds cloned from /dev/ksm to register a
memory area for merging: we prefer now to use an madvise(2) interface.

This patch just defines MADV_MERGEABLE (to tell KSM it may merge pages in
this area found identical to pages in other mergeable areas) and
MADV_UNMERGEABLE (to undo that).

Most architectures use asm-generic, but alpha, mips, parisc, xtensa need
their own definitions: included here for mmotm convenience, but we'll
probably want to split this and feed pieces to arch maintainers.

Based upon earlier patches by Chris Wright and Izik Eidus.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Helge Deller <deller@gmx.de>
Cc: Chris Zankel <chris@zankel.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/mman.h     | 3 +++
 arch/mips/include/asm/mman.h      | 3 +++
 arch/parisc/include/asm/mman.h    | 3 +++
 arch/xtensa/include/asm/mman.h    | 3 +++
 include/asm-generic/mman-common.h | 3 +++
 5 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h
index 90d7c35d2867..c77c55756a7c 100644
--- a/arch/alpha/include/asm/mman.h
+++ b/arch/alpha/include/asm/mman.h
@@ -48,6 +48,9 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h
index e4d6f1fb1cf7..f15554d1518a 100644
--- a/arch/mips/include/asm/mman.h
+++ b/arch/mips/include/asm/mman.h
@@ -71,6 +71,9 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h
index defe752cc996..a12d9d43f507 100644
--- a/arch/parisc/include/asm/mman.h
+++ b/arch/parisc/include/asm/mman.h
@@ -54,6 +54,9 @@
 #define MADV_16M_PAGES  24              /* Use 16 Megabyte pages */
 #define MADV_64M_PAGES  26              /* Use 64 Megabyte pages */
 
+#define MADV_MERGEABLE   65		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 66		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 #define MAP_VARIABLE	0
diff --git a/arch/xtensa/include/asm/mman.h b/arch/xtensa/include/asm/mman.h
index 9b92620c8a1e..6e55b4d1f9c5 100644
--- a/arch/xtensa/include/asm/mman.h
+++ b/arch/xtensa/include/asm/mman.h
@@ -78,6 +78,9 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
index 3b69ad34189a..dd63bd38864b 100644
--- a/include/asm-generic/mman-common.h
+++ b/include/asm-generic/mman-common.h
@@ -35,6 +35,9 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
-- 
cgit v1.2.3


From f8af4da3b4c14e7267c4ffb952079af3912c51c5 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:01:57 -0700
Subject: ksm: the mm interface to ksm

This patch presents the mm interface to a dummy version of ksm.c, for
better scrutiny of that interface: the real ksm.c follows later.

When CONFIG_KSM is not set, madvise(2) reject MADV_MERGEABLE and
MADV_UNMERGEABLE with EINVAL, since that seems more helpful than
pretending that they can be serviced.  But when CONFIG_KSM=y, accept them
even if KSM is not currently running, and even on areas which KSM will not
touch (e.g.  hugetlb or shared file or special driver mappings).

Like other madvices, report ENOMEM despite success if any area in the
range is unmapped, and use EAGAIN to report out of memory.

Define vma flag VM_MERGEABLE to identify an area on which KSM may try
merging pages: leave it to ksm_madvise() to decide whether to set it.
Define mm flag MMF_VM_MERGEABLE to identify an mm which might contain
VM_MERGEABLE areas, to minimize callouts when forking or exiting.

Based upon earlier patches by Chris Wright and Izik Eidus.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h   | 50 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h    |  1 +
 include/linux/sched.h |  7 +++++++
 kernel/fork.c         |  8 +++++++-
 mm/Kconfig            | 11 ++++++++++
 mm/Makefile           |  1 +
 mm/ksm.c              | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/madvise.c          | 14 +++++++++++++
 8 files changed, 147 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/ksm.h
 create mode 100644 mm/ksm.c

(limited to 'include')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
new file mode 100644
index 000000000000..eb2a448981ee
--- /dev/null
+++ b/include/linux/ksm.h
@@ -0,0 +1,50 @@
+#ifndef __LINUX_KSM_H
+#define __LINUX_KSM_H
+/*
+ * Memory merging support.
+ *
+ * This code enables dynamic sharing of identical pages found in different
+ * memory areas, even if they are not shared by fork().
+ */
+
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_KSM
+int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, int advice, unsigned long *vm_flags);
+int __ksm_enter(struct mm_struct *mm);
+void __ksm_exit(struct mm_struct *mm);
+
+static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
+		return __ksm_enter(mm);
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm)
+{
+	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
+		__ksm_exit(mm);
+}
+#else  /* !CONFIG_KSM */
+
+static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, int advice, unsigned long *vm_flags)
+{
+	return 0;
+}
+
+static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm)
+{
+}
+#endif /* !CONFIG_KSM */
+
+#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d3c8ae7c8015..d808cf832c4d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -103,6 +103,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
 #define VM_SAO		0x20000000	/* Strong Access Ordering (powerpc) */
 #define VM_PFN_AT_MMAP	0x40000000	/* PFNMAP vma that is fully mapped at mmap time */
+#define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */
 
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8fe351c3914a..8f3e63cb33a6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -434,7 +434,9 @@ extern int get_dumpable(struct mm_struct *mm);
 /* dumpable bits */
 #define MMF_DUMPABLE      0  /* core dump is permitted */
 #define MMF_DUMP_SECURELY 1  /* core file is readable only by root */
+
 #define MMF_DUMPABLE_BITS 2
+#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
 
 /* coredump filter bits */
 #define MMF_DUMP_ANON_PRIVATE	2
@@ -444,6 +446,7 @@ extern int get_dumpable(struct mm_struct *mm);
 #define MMF_DUMP_ELF_HEADERS	6
 #define MMF_DUMP_HUGETLB_PRIVATE 7
 #define MMF_DUMP_HUGETLB_SHARED  8
+
 #define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
 #define MMF_DUMP_FILTER_BITS	7
 #define MMF_DUMP_FILTER_MASK \
@@ -457,6 +460,10 @@ extern int get_dumpable(struct mm_struct *mm);
 #else
 # define MMF_DUMP_MASK_DEFAULT_ELF	0
 #endif
+					/* leave room for more dump flags */
+#define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
+
+#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
 struct sighand_struct {
 	atomic_t		count;
diff --git a/kernel/fork.c b/kernel/fork.c
index d4638c8cc19e..73a442b7be6d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -49,6 +49,7 @@
 #include <linux/ftrace.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
+#include <linux/ksm.h>
 #include <linux/acct.h>
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
@@ -299,6 +300,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	rb_link = &mm->mm_rb.rb_node;
 	rb_parent = NULL;
 	pprev = &mm->mmap;
+	retval = ksm_fork(mm, oldmm);
+	if (retval)
+		goto out;
 
 	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
 		struct file *file;
@@ -435,7 +439,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 	atomic_set(&mm->mm_count, 1);
 	init_rwsem(&mm->mmap_sem);
 	INIT_LIST_HEAD(&mm->mmlist);
-	mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
+	mm->flags = (current->mm) ?
+		(current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
 	mm->core_state = NULL;
 	mm->nr_ptes = 0;
 	set_mm_counter(mm, file_rss, 0);
@@ -496,6 +501,7 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
+		ksm_exit(mm);
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
diff --git a/mm/Kconfig b/mm/Kconfig
index 3aa519f52e18..c0b6afa178a1 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -214,6 +214,17 @@ config HAVE_MLOCKED_PAGE_BIT
 config MMU_NOTIFIER
 	bool
 
+config KSM
+	bool "Enable KSM for page merging"
+	depends on MMU
+	help
+	  Enable Kernel Samepage Merging: KSM periodically scans those areas
+	  of an application's address space that an app has advised may be
+	  mergeable.  When it finds pages of identical content, it replaces
+	  the many instances by a single resident page with that content, so
+	  saving memory until one or another app needs to modify the content.
+	  Recommended for use with KVM, or with other duplicative applications.
+
 config DEFAULT_MMAP_MIN_ADDR
         int "Low address space to protect from user allocation"
         default 4096
diff --git a/mm/Makefile b/mm/Makefile
index ea4b18bd3960..a63bf59a0c77 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
 obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
+obj-$(CONFIG_KSM) += ksm.o
 obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_SLUB) += slub.o
diff --git a/mm/ksm.c b/mm/ksm.c
new file mode 100644
index 000000000000..8b76008fcd32
--- /dev/null
+++ b/mm/ksm.c
@@ -0,0 +1,56 @@
+/*
+ * Initial dummy version just to illustrate KSM's interface to other files.
+ */
+
+#include <linux/errno.h>
+#include <linux/mman.h>
+#include <linux/ksm.h>
+
+int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, int advice, unsigned long *vm_flags)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	switch (advice) {
+	case MADV_MERGEABLE:
+		/*
+		 * Be somewhat over-protective for now!
+		 */
+		if (*vm_flags & (VM_MERGEABLE | VM_SHARED  | VM_MAYSHARE   |
+				 VM_PFNMAP    | VM_IO      | VM_DONTEXPAND |
+				 VM_RESERVED  | VM_HUGETLB | VM_INSERTPAGE |
+				 VM_MIXEDMAP  | VM_SAO))
+			return 0;		/* just ignore the advice */
+
+		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags))
+			if (__ksm_enter(mm) < 0)
+				return -EAGAIN;
+
+		*vm_flags |= VM_MERGEABLE;
+		break;
+
+	case MADV_UNMERGEABLE:
+		if (!(*vm_flags & VM_MERGEABLE))
+			return 0;		/* just ignore the advice */
+
+		/* Unmerge any merged pages here */
+
+		*vm_flags &= ~VM_MERGEABLE;
+		break;
+	}
+
+	return 0;
+}
+
+int __ksm_enter(struct mm_struct *mm)
+{
+	/* Allocate a structure to track mm and link it into KSM's list */
+	set_bit(MMF_VM_MERGEABLE, &mm->flags);
+	return 0;
+}
+
+void __ksm_exit(struct mm_struct *mm)
+{
+	/* Unlink and free all KSM's structures which track this mm */
+	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+}
diff --git a/mm/madvise.c b/mm/madvise.c
index 66c31264f062..d9ae2067952e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -11,6 +11,7 @@
 #include <linux/mempolicy.h>
 #include <linux/hugetlb.h>
 #include <linux/sched.h>
+#include <linux/ksm.h>
 
 /*
  * Any behaviour which results in changes to the vma->vm_flags needs to
@@ -63,6 +64,12 @@ static long madvise_behavior(struct vm_area_struct * vma,
 		}
 		new_flags &= ~VM_DONTCOPY;
 		break;
+	case MADV_MERGEABLE:
+	case MADV_UNMERGEABLE:
+		error = ksm_madvise(vma, start, end, behavior, &new_flags);
+		if (error)
+			goto out;
+		break;
 	}
 
 	if (new_flags == vma->vm_flags) {
@@ -239,6 +246,10 @@ madvise_behavior_valid(int behavior)
 	case MADV_REMOVE:
 	case MADV_WILLNEED:
 	case MADV_DONTNEED:
+#ifdef CONFIG_KSM
+	case MADV_MERGEABLE:
+	case MADV_UNMERGEABLE:
+#endif
 		return 1;
 
 	default:
@@ -273,6 +284,9 @@ madvise_behavior_valid(int behavior)
  *  MADV_DONTFORK - omit this area from child's address space when forking:
  *		typically, to avoid COWing pages pinned by get_user_pages().
  *  MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
+ *  MADV_MERGEABLE - the application recommends that KSM try to merge pages in
+ *		this area with pages of identical content from other such areas.
+ *  MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others.
  *
  * return values:
  *  zero    - success
-- 
cgit v1.2.3


From 21333b2b66b805a360641568588e5a0bb06d9d1f Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:01:59 -0700
Subject: ksm: no debug in page_dup_rmap()

page_dup_rmap(), used on each mapped page when forking, was originally
just an inline atomic_inc of mapcount.  2.6.22 added CONFIG_DEBUG_VM
out-of-line checks to it, which would need to be ever-so-slightly
complicated to allow for the PageKsm() we're about to define.

But I think these checks never caught anything.  And if it's coding errors
we're worried about, such checks should be in page_remove_rmap() too, not
just when forking; whereas if it's pagetable corruption we're worried
about, then they shouldn't be limited to CONFIG_DEBUG_VM.

Oh, just revert page_dup_rmap() to an inline atomic_inc of mapcount.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  6 +-----
 mm/memory.c          |  2 +-
 mm/rmap.c            | 21 ---------------------
 3 files changed, 2 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf116d0dbf23..477841d29fce 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -71,14 +71,10 @@ void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned lon
 void page_add_file_rmap(struct page *);
 void page_remove_rmap(struct page *);
 
-#ifdef CONFIG_DEBUG_VM
-void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address);
-#else
-static inline void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
+static inline void page_dup_rmap(struct page *page)
 {
 	atomic_inc(&page->_mapcount);
 }
-#endif
 
 /*
  * Called from mm/vmscan.c to handle paging out
diff --git a/mm/memory.c b/mm/memory.c
index 368561f32009..7a61a11f1867 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -597,7 +597,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	page = vm_normal_page(vma, addr, pte);
 	if (page) {
 		get_page(page);
-		page_dup_rmap(page, vma, addr);
+		page_dup_rmap(page);
 		rss[!!PageAnon(page)]++;
 	}
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 1406e67f9613..720fc03a7bc4 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -710,27 +710,6 @@ void page_add_file_rmap(struct page *page)
 	}
 }
 
-#ifdef CONFIG_DEBUG_VM
-/**
- * page_dup_rmap - duplicate pte mapping to a page
- * @page:	the page to add the mapping to
- * @vma:	the vm area being duplicated
- * @address:	the user virtual address mapped
- *
- * For copy_page_range only: minimal extract from page_add_file_rmap /
- * page_add_anon_rmap, avoiding unnecessary tests (already checked) so it's
- * quicker.
- *
- * The caller needs to hold the pte lock.
- */
-void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
-{
-	if (PageAnon(page))
-		__page_check_anon_rmap(page, vma, address);
-	atomic_inc(&page->_mapcount);
-}
-#endif
-
 /**
  * page_remove_rmap - take down pte mapping from a page
  * @page: page to remove mapping from
-- 
cgit v1.2.3


From 9a840895147b12de5cdd633c600b38686840ee53 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:01 -0700
Subject: ksm: identify PageKsm pages

KSM will need to identify its kernel merged pages unambiguously, and
/proc/kpageflags will probably like to do so too.

Since KSM will only be substituting anonymous pages, statistics are best
preserved by making a PageKsm page a special PageAnon page: one with no
anon_vma.

But KSM then needs its own page_add_ksm_rmap() - keep it in ksm.h near
PageKsm; and do_wp_page() must COW them, unlike singly mapped PageAnons.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/page.c      |  5 +++++
 include/linux/ksm.h | 29 +++++++++++++++++++++++++++++
 mm/memory.c         |  3 ++-
 3 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/proc/page.c b/fs/proc/page.c
index 2707c6c7a20f..2281c2cbfe2b 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -2,6 +2,7 @@
 #include <linux/compiler.h>
 #include <linux/fs.h>
 #include <linux/init.h>
+#include <linux/ksm.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/proc_fs.h>
@@ -95,6 +96,8 @@ static const struct file_operations proc_kpagecount_operations = {
 #define KPF_UNEVICTABLE		18
 #define KPF_NOPAGE		20
 
+#define KPF_KSM			21
+
 /* kernel hacking assistances
  * WARNING: subject to change, never rely on them!
  */
@@ -137,6 +140,8 @@ static u64 get_uflags(struct page *page)
 		u |= 1 << KPF_MMAP;
 	if (PageAnon(page))
 		u |= 1 << KPF_ANON;
+	if (PageKsm(page))
+		u |= 1 << KPF_KSM;
 
 	/*
 	 * compound pages: export both head/tail info
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index eb2a448981ee..a485c14ecd5d 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -10,6 +10,7 @@
 #include <linux/bitops.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/vmstat.h>
 
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
@@ -29,6 +30,27 @@ static inline void ksm_exit(struct mm_struct *mm)
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
 		__ksm_exit(mm);
 }
+
+/*
+ * A KSM page is one of those write-protected "shared pages" or "merged pages"
+ * which KSM maps into multiple mms, wherever identical anonymous page content
+ * is found in VM_MERGEABLE vmas.  It's a PageAnon page, with NULL anon_vma.
+ */
+static inline int PageKsm(struct page *page)
+{
+	return ((unsigned long)page->mapping == PAGE_MAPPING_ANON);
+}
+
+/*
+ * But we have to avoid the checking which page_add_anon_rmap() performs.
+ */
+static inline void page_add_ksm_rmap(struct page *page)
+{
+	if (atomic_inc_and_test(&page->_mapcount)) {
+		page->mapping = (void *) PAGE_MAPPING_ANON;
+		__inc_zone_page_state(page, NR_ANON_PAGES);
+	}
+}
 #else  /* !CONFIG_KSM */
 
 static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
@@ -45,6 +67,13 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 static inline void ksm_exit(struct mm_struct *mm)
 {
 }
+
+static inline int PageKsm(struct page *page)
+{
+	return 0;
+}
+
+/* No stub required for page_add_ksm_rmap(page) */
 #endif /* !CONFIG_KSM */
 
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 7a61a11f1867..1a435b81876c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -45,6 +45,7 @@
 #include <linux/swap.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/ksm.h>
 #include <linux/rmap.h>
 #include <linux/module.h>
 #include <linux/delayacct.h>
@@ -1974,7 +1975,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * Take out anonymous pages first, anonymous shared vmas are
 	 * not dirty accountable.
 	 */
-	if (PageAnon(old_page)) {
+	if (PageAnon(old_page) && !PageKsm(old_page)) {
 		if (!trylock_page(old_page)) {
 			page_cache_get(old_page);
 			pte_unmap_unlock(page_table, ptl);
-- 
cgit v1.2.3


From 9ba6929480088a85c1ff60a4b1f1c9fc80dbd2b7 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:20 -0700
Subject: ksm: fix oom deadlock

There's a now-obvious deadlock in KSM's out-of-memory handling:
imagine ksmd or KSM_RUN_UNMERGE handling, holding ksm_thread_mutex,
trying to allocate a page to break KSM in an mm which becomes the
OOM victim (quite likely in the unmerge case): it's killed and goes
to exit, and hangs there waiting to acquire ksm_thread_mutex.

Clearly we must not require ksm_thread_mutex in __ksm_exit, simple
though that made everything else: perhaps use mmap_sem somehow?
And part of the answer lies in the comments on unmerge_ksm_pages:
__ksm_exit should also leave all the rmap_item removal to ksmd.

But there's a fundamental problem, that KSM relies upon mmap_sem to
guarantee the consistency of the mm it's dealing with, yet exit_mmap
tears down an mm without taking mmap_sem.  And bumping mm_users won't
help at all, that just ensures that the pages the OOM killer assumes
are on their way to being freed will not be freed.

The best answer seems to be, to move the ksm_exit callout from just
before exit_mmap, to the middle of exit_mmap: after the mm's pages
have been freed (if the mmu_gather is flushed), but before its page
tables and vma structures have been freed; and down_write,up_write
mmap_sem there to serialize with KSM's own reliance on mmap_sem.

But KSM then needs to be careful, whenever it downs mmap_sem, to
check that the mm is not already exiting: there's a danger of using
find_vma on a layout that's being torn apart, or writing into page
tables which have been freed for reuse; and even do_anonymous_page
and __do_fault need to check they're not being called by break_ksm
to reinstate a pte after zap_pte_range has zapped that page table.

Though it might be clearer to add an exiting flag, set while holding
mmap_sem in __ksm_exit, that wouldn't cover the issue of reinstating
a zapped pte.  All we need is to check whether mm_users is 0 - but
must remember that ksmd may detect that before __ksm_exit is reached.
So, ksm_test_exit(mm) added to comment such checks on mm->mm_users.

__ksm_exit now has to leave clearing up the rmap_items to ksmd,
that needs ksm_thread_mutex; but shift the exiting mm just after the
ksm_scan cursor so that it will soon be dealt with.  __ksm_enter raise
mm_count to hold the mm_struct, ksmd's exit processing (exactly like
its processing when it finds all VM_MERGEABLEs unmapped) mmdrop it,
similar procedure for KSM_RUN_UNMERGE (which has stopped ksmd).

But also give __ksm_exit a fast path: when there's no complication
(no rmap_items attached to mm and it's not at the ksm_scan cursor),
it can safely do all the exiting work itself.  This is not just an
optimization: when ksmd is not running, the raised mm_count would
otherwise leak mm_structs.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h |  31 +++++++++--
 kernel/fork.c       |   1 -
 mm/ksm.c            | 144 +++++++++++++++++++++++++++++++++++-----------------
 mm/memory.c         |   5 +-
 mm/mmap.c           |   9 ++++
 5 files changed, 137 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index a485c14ecd5d..2d64ff30c0de 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -12,11 +12,14 @@
 #include <linux/sched.h>
 #include <linux/vmstat.h>
 
+struct mmu_gather;
+
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
 int __ksm_enter(struct mm_struct *mm);
-void __ksm_exit(struct mm_struct *mm);
+void __ksm_exit(struct mm_struct *mm,
+		struct mmu_gather **tlbp, unsigned long end);
 
 static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
@@ -25,10 +28,24 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm)
+/*
+ * For KSM to handle OOM without deadlock when it's breaking COW in a
+ * likely victim of the OOM killer, exit_mmap() has to serialize with
+ * ksm_exit() after freeing mm's pages but before freeing its page tables.
+ * That leaves a window in which KSM might refault pages which have just
+ * been finally unmapped: guard against that with ksm_test_exit(), and
+ * use it after getting mmap_sem in ksm.c, to check if mm is exiting.
+ */
+static inline bool ksm_test_exit(struct mm_struct *mm)
+{
+	return atomic_read(&mm->mm_users) == 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm,
+			    struct mmu_gather **tlbp, unsigned long end)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
-		__ksm_exit(mm);
+		__ksm_exit(mm, tlbp, end);
 }
 
 /*
@@ -64,7 +81,13 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm)
+static inline bool ksm_test_exit(struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm,
+			    struct mmu_gather **tlbp, unsigned long end)
 {
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 73a442b7be6d..42f20f565b16 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -501,7 +501,6 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
-		ksm_exit(mm);
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
diff --git a/mm/ksm.c b/mm/ksm.c
index 7e4d255dadc0..722e3f2a8dc5 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -32,6 +32,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/ksm.h>
 
+#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
 /*
@@ -347,6 +348,8 @@ static void break_cow(struct mm_struct *mm, unsigned long addr)
 	struct vm_area_struct *vma;
 
 	down_read(&mm->mmap_sem);
+	if (ksm_test_exit(mm))
+		goto out;
 	vma = find_vma(mm, addr);
 	if (!vma || vma->vm_start > addr)
 		goto out;
@@ -365,6 +368,8 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
 	struct page *page;
 
 	down_read(&mm->mmap_sem);
+	if (ksm_test_exit(mm))
+		goto out;
 	vma = find_vma(mm, addr);
 	if (!vma || vma->vm_start > addr)
 		goto out;
@@ -439,11 +444,11 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 	} else if (rmap_item->address & NODE_FLAG) {
 		unsigned char age;
 		/*
-		 * ksm_thread can and must skip the rb_erase, because
+		 * Usually ksmd can and must skip the rb_erase, because
 		 * root_unstable_tree was already reset to RB_ROOT.
-		 * But __ksm_exit has to be careful: do the rb_erase
-		 * if it's interrupting a scan, and this rmap_item was
-		 * inserted by this scan rather than left from before.
+		 * But be careful when an mm is exiting: do the rb_erase
+		 * if this rmap_item was inserted by this scan, rather
+		 * than left over from before.
 		 */
 		age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
 		BUG_ON(age > 1);
@@ -491,6 +496,8 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
 	int err = 0;
 
 	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
+		if (ksm_test_exit(vma->vm_mm))
+			break;
 		if (signal_pending(current))
 			err = -ERESTARTSYS;
 		else
@@ -507,34 +514,50 @@ static int unmerge_and_remove_all_rmap_items(void)
 	int err = 0;
 
 	spin_lock(&ksm_mmlist_lock);
-	mm_slot = list_entry(ksm_mm_head.mm_list.next,
+	ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
 						struct mm_slot, mm_list);
 	spin_unlock(&ksm_mmlist_lock);
 
-	while (mm_slot != &ksm_mm_head) {
+	for (mm_slot = ksm_scan.mm_slot;
+			mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
 		mm = mm_slot->mm;
 		down_read(&mm->mmap_sem);
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
+			if (ksm_test_exit(mm))
+				break;
 			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
 				continue;
 			err = unmerge_ksm_pages(vma,
 						vma->vm_start, vma->vm_end);
-			if (err) {
-				up_read(&mm->mmap_sem);
-				goto out;
-			}
+			if (err)
+				goto error;
 		}
+
 		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
-		up_read(&mm->mmap_sem);
 
 		spin_lock(&ksm_mmlist_lock);
-		mm_slot = list_entry(mm_slot->mm_list.next,
+		ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
 						struct mm_slot, mm_list);
-		spin_unlock(&ksm_mmlist_lock);
+		if (ksm_test_exit(mm)) {
+			hlist_del(&mm_slot->link);
+			list_del(&mm_slot->mm_list);
+			spin_unlock(&ksm_mmlist_lock);
+
+			free_mm_slot(mm_slot);
+			clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+			up_read(&mm->mmap_sem);
+			mmdrop(mm);
+		} else {
+			spin_unlock(&ksm_mmlist_lock);
+			up_read(&mm->mmap_sem);
+		}
 	}
 
 	ksm_scan.seqnr = 0;
-out:
+	return 0;
+
+error:
+	up_read(&mm->mmap_sem);
 	spin_lock(&ksm_mmlist_lock);
 	ksm_scan.mm_slot = &ksm_mm_head;
 	spin_unlock(&ksm_mmlist_lock);
@@ -755,6 +778,9 @@ static int try_to_merge_with_ksm_page(struct mm_struct *mm1,
 	int err = -EFAULT;
 
 	down_read(&mm1->mmap_sem);
+	if (ksm_test_exit(mm1))
+		goto out;
+
 	vma = find_vma(mm1, addr1);
 	if (!vma || vma->vm_start > addr1)
 		goto out;
@@ -796,6 +822,10 @@ static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1,
 		return err;
 
 	down_read(&mm1->mmap_sem);
+	if (ksm_test_exit(mm1)) {
+		up_read(&mm1->mmap_sem);
+		goto out;
+	}
 	vma = find_vma(mm1, addr1);
 	if (!vma || vma->vm_start > addr1) {
 		up_read(&mm1->mmap_sem);
@@ -1174,7 +1204,12 @@ next_mm:
 
 	mm = slot->mm;
 	down_read(&mm->mmap_sem);
-	for (vma = find_vma(mm, ksm_scan.address); vma; vma = vma->vm_next) {
+	if (ksm_test_exit(mm))
+		vma = NULL;
+	else
+		vma = find_vma(mm, ksm_scan.address);
+
+	for (; vma; vma = vma->vm_next) {
 		if (!(vma->vm_flags & VM_MERGEABLE))
 			continue;
 		if (ksm_scan.address < vma->vm_start)
@@ -1183,6 +1218,8 @@ next_mm:
 			ksm_scan.address = vma->vm_end;
 
 		while (ksm_scan.address < vma->vm_end) {
+			if (ksm_test_exit(mm))
+				break;
 			*page = follow_page(vma, ksm_scan.address, FOLL_GET);
 			if (*page && PageAnon(*page)) {
 				flush_anon_page(vma, *page, ksm_scan.address);
@@ -1205,6 +1242,11 @@ next_mm:
 		}
 	}
 
+	if (ksm_test_exit(mm)) {
+		ksm_scan.address = 0;
+		ksm_scan.rmap_item = list_entry(&slot->rmap_list,
+						struct rmap_item, link);
+	}
 	/*
 	 * Nuke all the rmap_items that are above this current rmap:
 	 * because there were no VM_MERGEABLE vmas with such addresses.
@@ -1219,24 +1261,29 @@ next_mm:
 		 * We've completed a full scan of all vmas, holding mmap_sem
 		 * throughout, and found no VM_MERGEABLE: so do the same as
 		 * __ksm_exit does to remove this mm from all our lists now.
+		 * This applies either when cleaning up after __ksm_exit
+		 * (but beware: we can reach here even before __ksm_exit),
+		 * or when all VM_MERGEABLE areas have been unmapped (and
+		 * mmap_sem then protects against race with MADV_MERGEABLE).
 		 */
 		hlist_del(&slot->link);
 		list_del(&slot->mm_list);
+		spin_unlock(&ksm_mmlist_lock);
+
 		free_mm_slot(slot);
 		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+		up_read(&mm->mmap_sem);
+		mmdrop(mm);
+	} else {
+		spin_unlock(&ksm_mmlist_lock);
+		up_read(&mm->mmap_sem);
 	}
-	spin_unlock(&ksm_mmlist_lock);
-	up_read(&mm->mmap_sem);
 
 	/* Repeat until we've completed scanning the whole list */
 	slot = ksm_scan.mm_slot;
 	if (slot != &ksm_mm_head)
 		goto next_mm;
 
-	/*
-	 * Bump seqnr here rather than at top, so that __ksm_exit
-	 * can skip rb_erase on unstable tree until we run again.
-	 */
 	ksm_scan.seqnr++;
 	return NULL;
 }
@@ -1361,6 +1408,7 @@ int __ksm_enter(struct mm_struct *mm)
 	spin_unlock(&ksm_mmlist_lock);
 
 	set_bit(MMF_VM_MERGEABLE, &mm->flags);
+	atomic_inc(&mm->mm_count);
 
 	if (needs_wakeup)
 		wake_up_interruptible(&ksm_thread_wait);
@@ -1368,41 +1416,45 @@ int __ksm_enter(struct mm_struct *mm)
 	return 0;
 }
 
-void __ksm_exit(struct mm_struct *mm)
+void __ksm_exit(struct mm_struct *mm,
+		struct mmu_gather **tlbp, unsigned long end)
 {
 	struct mm_slot *mm_slot;
+	int easy_to_free = 0;
 
 	/*
-	 * This process is exiting: doesn't hold and doesn't need mmap_sem;
-	 * but we do need to exclude ksmd and other exiters while we modify
-	 * the various lists and trees.
+	 * This process is exiting: if it's straightforward (as is the
+	 * case when ksmd was never running), free mm_slot immediately.
+	 * But if it's at the cursor or has rmap_items linked to it, use
+	 * mmap_sem to synchronize with any break_cows before pagetables
+	 * are freed, and leave the mm_slot on the list for ksmd to free.
+	 * Beware: ksm may already have noticed it exiting and freed the slot.
 	 */
-	mutex_lock(&ksm_thread_mutex);
+
 	spin_lock(&ksm_mmlist_lock);
 	mm_slot = get_mm_slot(mm);
-	if (!list_empty(&mm_slot->rmap_list)) {
-		spin_unlock(&ksm_mmlist_lock);
-		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
-		spin_lock(&ksm_mmlist_lock);
-	}
-
-	if (ksm_scan.mm_slot == mm_slot) {
-		ksm_scan.mm_slot = list_entry(
-			mm_slot->mm_list.next, struct mm_slot, mm_list);
-		ksm_scan.address = 0;
-		ksm_scan.rmap_item = list_entry(
-			&ksm_scan.mm_slot->rmap_list, struct rmap_item, link);
-		if (ksm_scan.mm_slot == &ksm_mm_head)
-			ksm_scan.seqnr++;
+	if (mm_slot && ksm_scan.mm_slot != mm_slot) {
+		if (list_empty(&mm_slot->rmap_list)) {
+			hlist_del(&mm_slot->link);
+			list_del(&mm_slot->mm_list);
+			easy_to_free = 1;
+		} else {
+			list_move(&mm_slot->mm_list,
+				  &ksm_scan.mm_slot->mm_list);
+		}
 	}
-
-	hlist_del(&mm_slot->link);
-	list_del(&mm_slot->mm_list);
 	spin_unlock(&ksm_mmlist_lock);
 
-	free_mm_slot(mm_slot);
-	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
-	mutex_unlock(&ksm_thread_mutex);
+	if (easy_to_free) {
+		free_mm_slot(mm_slot);
+		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+		mmdrop(mm);
+	} else if (mm_slot) {
+		tlb_finish_mmu(*tlbp, 0, end);
+		down_write(&mm->mmap_sem);
+		up_write(&mm->mmap_sem);
+		*tlbp = tlb_gather_mmu(mm, 1);
+	}
 }
 
 #define KSM_ATTR_RO(_name) \
diff --git a/mm/memory.c b/mm/memory.c
index 1a435b81876c..f47ffe971012 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2648,8 +2648,9 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (!pte_none(*page_table))
+	if (!pte_none(*page_table) || ksm_test_exit(mm))
 		goto release;
+
 	inc_mm_counter(mm, anon_rss);
 	page_add_new_anon_rmap(page, vma, address);
 	set_pte_at(mm, address, page_table, entry);
@@ -2791,7 +2792,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * handle that later.
 	 */
 	/* Only go through if we didn't race with anybody else... */
-	if (likely(pte_same(*page_table, orig_pte))) {
+	if (likely(pte_same(*page_table, orig_pte) && !ksm_test_exit(mm))) {
 		flush_icache_page(vma, page);
 		entry = mk_pte(page, vma->vm_page_prot);
 		if (flags & FAULT_FLAG_WRITE)
diff --git a/mm/mmap.c b/mm/mmap.c
index 376492ed08f4..e02f1aa66a1a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -27,6 +27,7 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
+#include <linux/ksm.h>
 #include <linux/mmu_notifier.h>
 #include <linux/perf_event.h>
 
@@ -2111,6 +2112,14 @@ void exit_mmap(struct mm_struct *mm)
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
+
+	/*
+	 * For KSM to handle OOM without deadlock when it's breaking COW in a
+	 * likely victim of the OOM killer, we must serialize with ksm_exit()
+	 * after freeing mm's pages but before freeing its page tables.
+	 */
+	ksm_exit(mm, &tlb, end);
+
 	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
 	tlb_finish_mmu(tlb, 0, end);
 
-- 
cgit v1.2.3


From 1c2fb7a4c2ca7a958b02bc1e615d0254990bba8d Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Mon, 21 Sep 2009 17:02:22 -0700
Subject: ksm: fix deadlock with munlock in exit_mmap

Rawhide users have reported hang at startup when cryptsetup is run: the
same problem can be simply reproduced by running a program int main() {
mlockall(MCL_CURRENT | MCL_FUTURE); return 0; }

The problem is that exit_mmap() applies munlock_vma_pages_all() to
clean up VM_LOCKED areas, and its current implementation (stupidly)
tries to fault in absent pages, for example where PROT_NONE prevented
them being faulted in when mlocking.  Whereas the "ksm: fix oom
deadlock" patch, knowing there's a race by which KSM might try to fault
in pages after exit_mmap() had finally zapped the range, backs out of
such faults doing nothing when its ksm_test_exit() notices mm_users 0.

So revert that part of "ksm: fix oom deadlock" which moved the
ksm_exit() call from before exit_mmap() to the middle of exit_mmap();
and remove those ksm_test_exit() checks from the page fault paths, so
allowing the munlocking to proceed without interference.

ksm_exit, if there are rmap_items still chained on this mm slot, takes
mmap_sem write side: so preventing KSM from working on an mm while
exit_mmap runs.  And KSM will bail out as soon as it notices that
mm_users is already zero, thanks to its internal ksm_test_exit checks.
So that when a task is killed by OOM killer or the user, KSM will not
indefinitely prevent it from running exit_mmap to release its memory.

This does break a part of what "ksm: fix oom deadlock" was trying to
achieve.  When unmerging KSM (echo 2 >/sys/kernel/mm/ksm), and even
when ksmd itself has to cancel a KSM page, it is possible that the
first OOM-kill victim would be the KSM process being faulted: then its
memory won't be freed until a second victim has been selected (freeing
memory for the unmerging fault to complete).

But the OOM killer is already liable to kill a second victim once the
intended victim's p->mm goes to NULL: so there's not much point in
rejecting this KSM patch before fixing that OOM behaviour.  It is very
much more important to allow KSM users to boot up, than to haggle over
an unlikely and poorly supported OOM case.

We also intend to fix munlocking to not fault pages: at which point
this patch _could_ be reverted; though that would be controversial, so
we hope to find a better solution.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Justin M. Forbes <jforbes@redhat.com>
Acked-for-now-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Izik Eidus <ieidus@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h | 11 ++++-------
 kernel/fork.c       |  1 +
 mm/ksm.c            |  5 +----
 mm/memory.c         |  4 ++--
 mm/mmap.c           |  7 -------
 5 files changed, 8 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 2d64ff30c0de..0e26de6adb51 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -18,8 +18,7 @@ struct mmu_gather;
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
 int __ksm_enter(struct mm_struct *mm);
-void __ksm_exit(struct mm_struct *mm,
-		struct mmu_gather **tlbp, unsigned long end);
+void __ksm_exit(struct mm_struct *mm);
 
 static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
@@ -41,11 +40,10 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
 	return atomic_read(&mm->mm_users) == 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm,
-			    struct mmu_gather **tlbp, unsigned long end)
+static inline void ksm_exit(struct mm_struct *mm)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
-		__ksm_exit(mm, tlbp, end);
+		__ksm_exit(mm);
 }
 
 /*
@@ -86,8 +84,7 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
 	return 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm,
-			    struct mmu_gather **tlbp, unsigned long end)
+static inline void ksm_exit(struct mm_struct *mm)
 {
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 42f20f565b16..73a442b7be6d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -501,6 +501,7 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
+		ksm_exit(mm);
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
diff --git a/mm/ksm.c b/mm/ksm.c
index 722e3f2a8dc5..92034eb47eba 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1416,8 +1416,7 @@ int __ksm_enter(struct mm_struct *mm)
 	return 0;
 }
 
-void __ksm_exit(struct mm_struct *mm,
-		struct mmu_gather **tlbp, unsigned long end)
+void __ksm_exit(struct mm_struct *mm)
 {
 	struct mm_slot *mm_slot;
 	int easy_to_free = 0;
@@ -1450,10 +1449,8 @@ void __ksm_exit(struct mm_struct *mm,
 		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
 		mmdrop(mm);
 	} else if (mm_slot) {
-		tlb_finish_mmu(*tlbp, 0, end);
 		down_write(&mm->mmap_sem);
 		up_write(&mm->mmap_sem);
-		*tlbp = tlb_gather_mmu(mm, 1);
 	}
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index f47ffe971012..05feaa11d87c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2648,7 +2648,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (!pte_none(*page_table) || ksm_test_exit(mm))
+	if (!pte_none(*page_table))
 		goto release;
 
 	inc_mm_counter(mm, anon_rss);
@@ -2792,7 +2792,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * handle that later.
 	 */
 	/* Only go through if we didn't race with anybody else... */
-	if (likely(pte_same(*page_table, orig_pte) && !ksm_test_exit(mm))) {
+	if (likely(pte_same(*page_table, orig_pte))) {
 		flush_icache_page(vma, page);
 		entry = mk_pte(page, vma->vm_page_prot);
 		if (flags & FAULT_FLAG_WRITE)
diff --git a/mm/mmap.c b/mm/mmap.c
index e02f1aa66a1a..ffd6c6c9bcf4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2113,13 +2113,6 @@ void exit_mmap(struct mm_struct *mm)
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
 
-	/*
-	 * For KSM to handle OOM without deadlock when it's breaking COW in a
-	 * likely victim of the OOM killer, we must serialize with ksm_exit()
-	 * after freeing mm's pages but before freeing its page tables.
-	 */
-	ksm_exit(mm, &tlb, end);
-
 	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
 	tlb_finish_mmu(tlb, 0, end);
 
-- 
cgit v1.2.3


From a913e182ab9484308e870af37a14d372742d53b0 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:26 -0700
Subject: ksm: clean up obsolete references

A few cleanups, given the munlock fix: the comment on ksm_test_exit() no
longer applies, and it can be made private to ksm.c; there's no more
reference to mmu_gather or tlb.h, and mmap.c doesn't need ksm.h.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h | 20 --------------------
 mm/ksm.c            | 14 +++++++++++++-
 mm/mmap.c           |  1 -
 3 files changed, 13 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 0e26de6adb51..a485c14ecd5d 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -12,8 +12,6 @@
 #include <linux/sched.h>
 #include <linux/vmstat.h>
 
-struct mmu_gather;
-
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
@@ -27,19 +25,6 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
-/*
- * For KSM to handle OOM without deadlock when it's breaking COW in a
- * likely victim of the OOM killer, exit_mmap() has to serialize with
- * ksm_exit() after freeing mm's pages but before freeing its page tables.
- * That leaves a window in which KSM might refault pages which have just
- * been finally unmapped: guard against that with ksm_test_exit(), and
- * use it after getting mmap_sem in ksm.c, to check if mm is exiting.
- */
-static inline bool ksm_test_exit(struct mm_struct *mm)
-{
-	return atomic_read(&mm->mm_users) == 0;
-}
-
 static inline void ksm_exit(struct mm_struct *mm)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
@@ -79,11 +64,6 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
-static inline bool ksm_test_exit(struct mm_struct *mm)
-{
-	return 0;
-}
-
 static inline void ksm_exit(struct mm_struct *mm)
 {
 }
diff --git a/mm/ksm.c b/mm/ksm.c
index 3bd54ce9eb38..e11e7a5ac84f 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -32,7 +32,6 @@
 #include <linux/mmu_notifier.h>
 #include <linux/ksm.h>
 
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
 /*
@@ -284,6 +283,19 @@ static inline int in_stable_tree(struct rmap_item *rmap_item)
 	return rmap_item->address & STABLE_FLAG;
 }
 
+/*
+ * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
+ * page tables after it has passed through ksm_exit() - which, if necessary,
+ * takes mmap_sem briefly to serialize against them.  ksm_exit() does not set
+ * a special flag: they can just back out as soon as mm_users goes to zero.
+ * ksm_test_exit() is used throughout to make this test for exit: in some
+ * places for correctness, in some places just to avoid unnecessary work.
+ */
+static inline bool ksm_test_exit(struct mm_struct *mm)
+{
+	return atomic_read(&mm->mm_users) == 0;
+}
+
 /*
  * We use break_ksm to break COW on a ksm page: it's a stripped down
  *
diff --git a/mm/mmap.c b/mm/mmap.c
index 22dff49d579e..6eed98c00543 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -27,7 +27,6 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
-#include <linux/ksm.h>
 #include <linux/mmu_notifier.h>
 #include <linux/perf_event.h>
 
-- 
cgit v1.2.3


From 35451beecbd7c86ce3249d543594517a5fe9a0cd Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:27 -0700
Subject: ksm: unmerge is an origin of OOMs

Just as the swapoff system call allocates many pages of RAM to various
processes, perhaps triggering OOM, so "echo 2 >/sys/kernel/mm/ksm/run"
(unmerge) is liable to allocate many pages of RAM to various processes,
perhaps triggering OOM; and each is normally run from a modest admin
process (swapoff or shell), easily repeated until it succeeds.

So treat unmerge_and_remove_all_rmap_items() in the same way that we treat
try_to_unuse(): generalize PF_SWAPOFF to PF_OOM_ORIGIN, and bracket both
with that, to ask the OOM killer to kill them first, to prevent them from
spawning more and more OOM kills.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 2 +-
 mm/ksm.c              | 2 ++
 mm/oom_kill.c         | 2 +-
 mm/swapfile.c         | 4 ++--
 4 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8f3e63cb33a6..899d7304d594 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1720,7 +1720,7 @@ extern cputime_t task_gtime(struct task_struct *p);
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
-#define PF_SWAPOFF	0x00080000	/* I am in swapoff */
+#define PF_OOM_ORIGIN	0x00080000	/* Allocating much memory to others */
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
 #define PF_KTHREAD	0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
diff --git a/mm/ksm.c b/mm/ksm.c
index e11e7a5ac84f..37cc37325094 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1557,7 +1557,9 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
 	if (ksm_run != flags) {
 		ksm_run = flags;
 		if (flags & KSM_RUN_UNMERGE) {
+			current->flags |= PF_OOM_ORIGIN;
 			err = unmerge_and_remove_all_rmap_items();
+			current->flags &= ~PF_OOM_ORIGIN;
 			if (err) {
 				ksm_run = KSM_RUN_STOP;
 				count = err;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index a7b2460e922b..da4c342f2641 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -79,7 +79,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 	/*
 	 * swapoff can easily use up all memory, so kill those first.
 	 */
-	if (p->flags & PF_SWAPOFF)
+	if (p->flags & PF_OOM_ORIGIN)
 		return ULONG_MAX;
 
 	/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 74f1102e8749..f1bf19daadc6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1575,9 +1575,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	p->flags &= ~SWP_WRITEOK;
 	spin_unlock(&swap_lock);
 
-	current->flags |= PF_SWAPOFF;
+	current->flags |= PF_OOM_ORIGIN;
 	err = try_to_unuse(type);
-	current->flags &= ~PF_SWAPOFF;
+	current->flags &= ~PF_OOM_ORIGIN;
 
 	if (err) {
 		/* re-insert swap space back into swap_list */
-- 
cgit v1.2.3


From 38a398572fa2d8124f7479e40db581b5b72719c9 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:39 -0700
Subject: page-allocator: remove dead function free_cold_page()

The function free_cold_page() has no callers so delete it.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 1 -
 mm/page_alloc.c     | 5 -----
 2 files changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 7c777a0da17a..c32bfa8e7f1e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -326,7 +326,6 @@ void free_pages_exact(void *virt, size_t size);
 extern void __free_pages(struct page *page, unsigned int order);
 extern void free_pages(unsigned long addr, unsigned int order);
 extern void free_hot_page(struct page *page);
-extern void free_cold_page(struct page *page);
 
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr),0)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 20759803a64a..913a8ebd3a8e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1079,11 +1079,6 @@ void free_hot_page(struct page *page)
 	free_hot_cold_page(page, 0);
 }
 	
-void free_cold_page(struct page *page)
-{
-	free_hot_cold_page(page, 1);
-}
-
 /*
  * split_page takes a non-compound higher-order page, and splits it into
  * n (1<<order) sub-pages: page[0..n]
-- 
cgit v1.2.3


From 4b4f278c030aa4b6ee0915f396e9a9478d92d610 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:41 -0700
Subject: tracing, page-allocator: add trace events for page allocation and
 page freeing

This patch adds trace events for the allocation and freeing of pages,
including the freeing of pagevecs.  Using the events, it will be known
what struct page and pfns are being allocated and freed and what the call
site was in many cases.

The page alloc tracepoints be used as an indicator as to whether the
workload was heavily dependant on the page allocator or not.  You can make
a guess based on vmstat but you can't get a per-process breakdown.
Depending on the call path, the call_site for page allocation may be
__get_free_pages() instead of a useful callsite.  Instead of passing down
a return address similar to slab debugging, the user should enable the
stacktrace and seg-addr options to get a proper stack trace.

The pagevec free tracepoint has a different usecase.  It can be used to
get a idea of how many pages are being dumped off the LRU and whether it
is kswapd doing the work or a process doing direct reclaim.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/kmem.h | 74 +++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c             |  7 ++++-
 2 files changed, 80 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 1493c541f9c4..0d358a0d45c1 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -225,6 +225,80 @@ TRACE_EVENT(kmem_cache_free,
 
 	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
 );
+
+TRACE_EVENT(mm_page_free_direct,
+
+	TP_PROTO(struct page *page, unsigned int order),
+
+	TP_ARGS(page, order),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	unsigned int,	order		)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->order		= order;
+	),
+
+	TP_printk("page=%p pfn=%lu order=%d",
+			__entry->page,
+			page_to_pfn(__entry->page),
+			__entry->order)
+);
+
+TRACE_EVENT(mm_pagevec_free,
+
+	TP_PROTO(struct page *page, int cold),
+
+	TP_ARGS(page, cold),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	int,		cold		)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->cold		= cold;
+	),
+
+	TP_printk("page=%p pfn=%lu order=0 cold=%d",
+			__entry->page,
+			page_to_pfn(__entry->page),
+			__entry->cold)
+);
+
+TRACE_EVENT(mm_page_alloc,
+
+	TP_PROTO(struct page *page, unsigned int order,
+			gfp_t gfp_flags, int migratetype),
+
+	TP_ARGS(page, order, gfp_flags, migratetype),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	unsigned int,	order		)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		migratetype	)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->order		= order;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->migratetype	= migratetype;
+	),
+
+	TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->order,
+		__entry->migratetype,
+		show_gfp_flags(__entry->gfp_flags))
+);
+
 #endif /* _TRACE_KMEM_H */
 
 /* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 913a8ebd3a8e..80f954d82d77 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1076,6 +1076,7 @@ static void free_hot_cold_page(struct page *page, int cold)
 
 void free_hot_page(struct page *page)
 {
+	trace_mm_page_free_direct(page, 0);
 	free_hot_cold_page(page, 0);
 }
 	
@@ -1920,6 +1921,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 				zonelist, high_zoneidx, nodemask,
 				preferred_zone, migratetype);
 
+	trace_mm_page_alloc(page, order, gfp_mask, migratetype);
 	return page;
 }
 EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -1954,13 +1956,16 @@ void __pagevec_free(struct pagevec *pvec)
 {
 	int i = pagevec_count(pvec);
 
-	while (--i >= 0)
+	while (--i >= 0) {
+		trace_mm_pagevec_free(pvec->pages[i], pvec->cold);
 		free_hot_cold_page(pvec->pages[i], pvec->cold);
+	}
 }
 
 void __free_pages(struct page *page, unsigned int order)
 {
 	if (put_page_testzero(page)) {
+		trace_mm_page_free_direct(page, order);
 		if (order == 0)
 			free_hot_page(page);
 		else
-- 
cgit v1.2.3


From e0fff1bd12469c45dab088e353d8882761387bb6 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:42 -0700
Subject: tracing, page-allocator: add trace events for anti-fragmentation
 falling back to other migratetypes

Fragmentation avoidance depends on being able to use free pages from lists
of the appropriate migrate type.  In the event this is not possible,
__rmqueue_fallback() selects a different list and in some circumstances
change the migratetype of the pageblock.  Simplistically, the more times
this event occurs, the more likely that fragmentation will be a problem
later for hugepage allocation at least but there are other considerations
such as the order of page being split to satisfy the allocation.

This patch adds a trace event for __rmqueue_fallback() that reports what
page is being used for the fallback, the orders of relevant pages, the
desired migratetype and the migratetype of the lists being used, whether
the pageblock changed type and whether this event is important with
respect to fragmentation avoidance or not.  This information can be used
to help analyse fragmentation avoidance and help decide whether
min_free_kbytes should be increased or not.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/kmem.h | 38 ++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c             |  4 ++++
 2 files changed, 42 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 0d358a0d45c1..aae16ee17601 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -299,6 +299,44 @@ TRACE_EVENT(mm_page_alloc,
 		show_gfp_flags(__entry->gfp_flags))
 );
 
+TRACE_EVENT(mm_page_alloc_extfrag,
+
+	TP_PROTO(struct page *page,
+			int alloc_order, int fallback_order,
+			int alloc_migratetype, int fallback_migratetype),
+
+	TP_ARGS(page,
+		alloc_order, fallback_order,
+		alloc_migratetype, fallback_migratetype),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page			)
+		__field(	int,		alloc_order		)
+		__field(	int,		fallback_order		)
+		__field(	int,		alloc_migratetype	)
+		__field(	int,		fallback_migratetype	)
+	),
+
+	TP_fast_assign(
+		__entry->page			= page;
+		__entry->alloc_order		= alloc_order;
+		__entry->fallback_order		= fallback_order;
+		__entry->alloc_migratetype	= alloc_migratetype;
+		__entry->fallback_migratetype	= fallback_migratetype;
+	),
+
+	TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->alloc_order,
+		__entry->fallback_order,
+		pageblock_order,
+		__entry->alloc_migratetype,
+		__entry->fallback_migratetype,
+		__entry->fallback_order < pageblock_order,
+		__entry->alloc_migratetype == __entry->fallback_migratetype)
+);
+
 #endif /* _TRACE_KMEM_H */
 
 /* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 80f954d82d77..77f517c18b37 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -853,6 +853,10 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 							start_migratetype);
 
 			expand(zone, page, order, current_order, area, migratetype);
+
+			trace_mm_page_alloc_extfrag(page, order, current_order,
+				start_migratetype, migratetype);
+
 			return page;
 		}
 	}
-- 
cgit v1.2.3


From 0d3d062a6e289e065bd0aa537a6806a1806bf8aa Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:44 -0700
Subject: tracing, page-allocator: add trace event for page traffic related to
 the buddy lists

The page allocation trace event reports that a page was successfully
allocated but it does not specify where it came from.  When analysing
performance, it can be important to distinguish between pages coming from
the per-cpu allocator and pages coming from the buddy lists as the latter
requires the zone lock to the taken and more data structures to be
examined.

This patch adds a trace event for __rmqueue reporting when a page is being
allocated from the buddy lists.  It distinguishes between being called to
refill the per-cpu lists or whether it is a high-order allocation.
Similarly, this patch adds an event to catch when the PCP lists are being
drained a little and pages are going back to the buddy lists.

This is trickier to draw conclusions from but high activity on those
events could explain why there were a large number of cache misses on a
page-allocator-intensive workload.  The coalescing and splitting of
buddies involves a lot of writing of page metadata and cache line bounces
not to mention the acquisition of an interrupt-safe lock necessary to
enter this path.

[akpm@linux-foundation.org: fix build]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/kmem.h | 51 +++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c             |  3 +++
 2 files changed, 54 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index aae16ee17601..eaf46bdd18a5 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -299,6 +299,57 @@ TRACE_EVENT(mm_page_alloc,
 		show_gfp_flags(__entry->gfp_flags))
 );
 
+TRACE_EVENT(mm_page_alloc_zone_locked,
+
+	TP_PROTO(struct page *page, unsigned int order, int migratetype),
+
+	TP_ARGS(page, order, migratetype),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	unsigned int,	order		)
+		__field(	int,		migratetype	)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->order		= order;
+		__entry->migratetype	= migratetype;
+	),
+
+	TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->order,
+		__entry->migratetype,
+		__entry->order == 0)
+);
+
+TRACE_EVENT(mm_page_pcpu_drain,
+
+	TP_PROTO(struct page *page, int order, int migratetype),
+
+	TP_ARGS(page, order, migratetype),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	int,		order		)
+		__field(	int,		migratetype	)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->order		= order;
+		__entry->migratetype	= migratetype;
+	),
+
+	TP_printk("page=%p pfn=%lu order=%d migratetype=%d",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->order,
+		__entry->migratetype)
+);
+
 TRACE_EVENT(mm_page_alloc_extfrag,
 
 	TP_PROTO(struct page *page,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 77f517c18b37..4c847cc57caf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
 #include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 #include <linux/kmemleak.h>
+#include <trace/events/kmem.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -535,6 +536,7 @@ static void free_pages_bulk(struct zone *zone, int count,
 		page = list_entry(list->prev, struct page, lru);
 		/* have to delete it as __free_one_page list manipulates */
 		list_del(&page->lru);
+		trace_mm_page_pcpu_drain(page, order, page_private(page));
 		__free_one_page(page, zone, order, page_private(page));
 	}
 	spin_unlock(&zone->lock);
@@ -890,6 +892,7 @@ retry_reserve:
 		}
 	}
 
+	trace_mm_page_alloc_zone_locked(page, order, migratetype);
 	return page;
 }
 
-- 
cgit v1.2.3


From bba78819548a59a52e60f0b259997bbd011164ae Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 21 Sep 2009 17:02:56 -0700
Subject: mm: remove broken 'kzalloc' mempool

The kzalloc mempool zeros items when they are initially allocated, but
does not rezero used items that are returned to the pool.  Consequently
mempool_alloc()s may return non-zeroed memory.

Since there are/were only two in-tree users for
mempool_create_kzalloc_pool(), and 'fixing' this in a way that will
re-zero used (but not new) items before first use is non-trivial, just
remove it.

Signed-off-by: Sage Weil <sage@newdream.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempool.h | 10 ++--------
 mm/mempool.c            |  7 -------
 2 files changed, 2 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 9be484d11283..7c08052e3321 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -47,22 +47,16 @@ mempool_create_slab_pool(int min_nr, struct kmem_cache *kc)
 }
 
 /*
- * 2 mempool_alloc_t's and a mempool_free_t to kmalloc/kzalloc and kfree
- * the amount of memory specified by pool_data
+ * a mempool_alloc_t and a mempool_free_t to kmalloc and kfree the
+ * amount of memory specified by pool_data
  */
 void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data);
-void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data);
 void mempool_kfree(void *element, void *pool_data);
 static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
 {
 	return mempool_create(min_nr, mempool_kmalloc, mempool_kfree,
 			      (void *) size);
 }
-static inline mempool_t *mempool_create_kzalloc_pool(int min_nr, size_t size)
-{
-	return mempool_create(min_nr, mempool_kzalloc, mempool_kfree,
-			      (void *) size);
-}
 
 /*
  * A mempool_alloc_t and mempool_free_t for a simple page allocator that
diff --git a/mm/mempool.c b/mm/mempool.c
index 32e75d400503..1a3bc3d4d554 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -308,13 +308,6 @@ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
 }
 EXPORT_SYMBOL(mempool_kmalloc);
 
-void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data)
-{
-	size_t size = (size_t)pool_data;
-	return kzalloc(size, gfp_mask);
-}
-EXPORT_SYMBOL(mempool_kzalloc);
-
 void mempool_kfree(void *element, void *pool_data)
 {
 	kfree(element);
-- 
cgit v1.2.3


From 401a8e1c1670085b8177330ca47d4f7c4ac88761 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:02:58 -0700
Subject: mm: introduce page_lru_base_type()

Instead of abusing page_is_file_cache() for LRU list index arithmetic, add
another helper with a more appropriate name and convert the non-boolean
users of page_is_file_cache() accordingly.

This new helper gives the LRU base type a page is supposed to live on,
inactive anon or inactive file.

[hugh.dickins@tiscali.co.uk: convert del_page_from_lru() also]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 23 +++++++++++++++++++----
 mm/swap.c                 |  4 ++--
 mm/vmscan.c               |  6 +++---
 3 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 7fbb97267556..99977ff45b83 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -39,21 +39,36 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 	mem_cgroup_del_lru_list(page, l);
 }
 
+/**
+ * page_lru_base_type - which LRU list type should a page be on?
+ * @page: the page to test
+ *
+ * Used for LRU list index arithmetic.
+ *
+ * Returns the base LRU type - file or anon - @page should be on.
+ */
+static inline enum lru_list page_lru_base_type(struct page *page)
+{
+	if (page_is_file_cache(page))
+		return LRU_INACTIVE_FILE;
+	return LRU_INACTIVE_ANON;
+}
+
 static inline void
 del_page_from_lru(struct zone *zone, struct page *page)
 {
-	enum lru_list l = LRU_BASE;
+	enum lru_list l;
 
 	list_del(&page->lru);
 	if (PageUnevictable(page)) {
 		__ClearPageUnevictable(page);
 		l = LRU_UNEVICTABLE;
 	} else {
+		l = page_lru_base_type(page);
 		if (PageActive(page)) {
 			__ClearPageActive(page);
 			l += LRU_ACTIVE;
 		}
-		l += page_is_file_cache(page);
 	}
 	__dec_zone_state(zone, NR_LRU_BASE + l);
 	mem_cgroup_del_lru_list(page, l);
@@ -68,14 +83,14 @@ del_page_from_lru(struct zone *zone, struct page *page)
  */
 static inline enum lru_list page_lru(struct page *page)
 {
-	enum lru_list lru = LRU_BASE;
+	enum lru_list lru;
 
 	if (PageUnevictable(page))
 		lru = LRU_UNEVICTABLE;
 	else {
+		lru = page_lru_base_type(page);
 		if (PageActive(page))
 			lru += LRU_ACTIVE;
-		lru += page_is_file_cache(page);
 	}
 
 	return lru;
diff --git a/mm/swap.c b/mm/swap.c
index cb29ae5d33ab..168d53e6e58e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -118,7 +118,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
 			spin_lock(&zone->lru_lock);
 		}
 		if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-			int lru = page_is_file_cache(page);
+			int lru = page_lru_base_type(page);
 			list_move_tail(&page->lru, &zone->lru[lru].list);
 			pgmoved++;
 		}
@@ -181,7 +181,7 @@ void activate_page(struct page *page)
 	spin_lock_irq(&zone->lru_lock);
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		int file = page_is_file_cache(page);
-		int lru = LRU_BASE + file;
+		int lru = page_lru_base_type(page);
 		del_page_from_lru_list(zone, page, lru);
 
 		SetPageActive(page);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cad5d528a6f0..30e56ee833f8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -531,7 +531,7 @@ redo:
 		 * unevictable page on [in]active list.
 		 * We know how to handle that.
 		 */
-		lru = active + page_is_file_cache(page);
+		lru = active + page_lru_base_type(page);
 		lru_cache_add_lru(page, lru);
 	} else {
 		/*
@@ -986,7 +986,7 @@ static unsigned long clear_active_flags(struct list_head *page_list,
 	struct page *page;
 
 	list_for_each_entry(page, page_list, lru) {
-		lru = page_is_file_cache(page);
+		lru = page_lru_base_type(page);
 		if (PageActive(page)) {
 			lru += LRU_ACTIVE;
 			ClearPageActive(page);
@@ -2652,7 +2652,7 @@ static void check_move_unevictable_page(struct page *page, struct zone *zone)
 retry:
 	ClearPageUnevictable(page);
 	if (page_evictable(page, NULL)) {
-		enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
+		enum lru_list l = page_lru_base_type(page);
 
 		__dec_zone_state(zone, NR_UNEVICTABLE);
 		list_move(&page->lru, &zone->lru[l].list);
-- 
cgit v1.2.3


From 6c0b13519d1c755d874e82c8fb8a6dcef0ee402c Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:02:59 -0700
Subject: mm: return boolean from page_is_file_cache()

page_is_file_cache() has been used for both boolean checks and LRU
arithmetic, which was always a bit weird.

Now that page_lru_base_type() exists for LRU arithmetic, make
page_is_file_cache() a real predicate function and adjust the
boolean-using callsites to drop those pesky double negations.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 8 ++------
 mm/migrate.c              | 6 +++---
 mm/swap.c                 | 2 +-
 mm/vmscan.c               | 2 +-
 4 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 99977ff45b83..8835b877b8db 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -5,7 +5,7 @@
  * page_is_file_cache - should the page be on a file LRU or anon LRU?
  * @page: the page to test
  *
- * Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
+ * Returns 1 if @page is page cache page backed by a regular filesystem,
  * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
  * Used by functions that manipulate the LRU lists, to sort a page
  * onto the right LRU list.
@@ -16,11 +16,7 @@
  */
 static inline int page_is_file_cache(struct page *page)
 {
-	if (PageSwapBacked(page))
-		return 0;
-
-	/* The page is page cache backed by a normal filesystem. */
-	return LRU_FILE;
+	return !PageSwapBacked(page);
 }
 
 static inline void
diff --git a/mm/migrate.c b/mm/migrate.c
index b535a2c1656c..e97e513fe898 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -68,7 +68,7 @@ int putback_lru_pages(struct list_head *l)
 	list_for_each_entry_safe(page, page2, l, lru) {
 		list_del(&page->lru);
 		dec_zone_page_state(page, NR_ISOLATED_ANON +
-				    !!page_is_file_cache(page));
+				page_is_file_cache(page));
 		putback_lru_page(page);
 		count++;
 	}
@@ -701,7 +701,7 @@ unlock:
  		 */
  		list_del(&page->lru);
 		dec_zone_page_state(page, NR_ISOLATED_ANON +
-				    !!page_is_file_cache(page));
+				page_is_file_cache(page));
 		putback_lru_page(page);
 	}
 
@@ -751,7 +751,7 @@ int migrate_pages(struct list_head *from,
 	local_irq_save(flags);
 	list_for_each_entry(page, from, lru)
 		__inc_zone_page_state(page, NR_ISOLATED_ANON +
-				      !!page_is_file_cache(page));
+				page_is_file_cache(page));
 	local_irq_restore(flags);
 
 	if (!swapwrite)
diff --git a/mm/swap.c b/mm/swap.c
index 168d53e6e58e..4a8a59e671f7 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -189,7 +189,7 @@ void activate_page(struct page *page)
 		add_page_to_lru_list(zone, page, lru);
 		__count_vm_event(PGACTIVATE);
 
-		update_page_reclaim_stat(zone, page, !!file, 1);
+		update_page_reclaim_stat(zone, page, file, 1);
 	}
 	spin_unlock_irq(&zone->lru_lock);
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 30e56ee833f8..172119caebcc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -821,7 +821,7 @@ int __isolate_lru_page(struct page *page, int mode, int file)
 	if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
 		return ret;
 
-	if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file))
+	if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file)
 		return ret;
 
 	/*
-- 
cgit v1.2.3


From edcf4748cd56adcdf0856cc99ef108a4ea3ac7fe Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:02:59 -0700
Subject: mm: return boolean from page_has_private()

Make page_has_private() return a true boolean value and remove the double
negations from the two callsites using it for arithmetic.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 13 ++++++++-----
 mm/migrate.c               |  2 +-
 mm/vmscan.c                |  2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d07c0bb2203a..13de789f0a5c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -402,8 +402,8 @@ static inline void __ClearPageTail(struct page *page)
  */
 #define PAGE_FLAGS_CHECK_AT_PREP	((1 << NR_PAGEFLAGS) - 1)
 
-#endif /* !__GENERATING_BOUNDS_H */
-
+#define PAGE_FLAGS_PRIVATE				\
+	(1 << PG_private | 1 << PG_private_2)
 /**
  * page_has_private - Determine if page has private stuff
  * @page: The page to be checked
@@ -411,8 +411,11 @@ static inline void __ClearPageTail(struct page *page)
  * Determine if a page has private stuff, indicating that release routines
  * should be invoked upon it.
  */
-#define page_has_private(page)			\
-	((page)->flags & ((1 << PG_private) |	\
-			  (1 << PG_private_2)))
+static inline int page_has_private(struct page *page)
+{
+	return !!(page->flags & PAGE_FLAGS_PRIVATE);
+}
+
+#endif /* !__GENERATING_BOUNDS_H */
 
 #endif	/* PAGE_FLAGS_H */
diff --git a/mm/migrate.c b/mm/migrate.c
index e97e513fe898..16052e80aaac 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -272,7 +272,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	pslot = radix_tree_lookup_slot(&mapping->page_tree,
  					page_index(page));
 
-	expected_count = 2 + !!page_has_private(page);
+	expected_count = 2 + page_has_private(page);
 	if (page_count(page) != expected_count ||
 			(struct page *)radix_tree_deref_slot(pslot) != page) {
 		spin_unlock_irq(&mapping->tree_lock);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 172119caebcc..f5b5f029288c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -286,7 +286,7 @@ static inline int page_mapping_inuse(struct page *page)
 
 static inline int is_page_cache_freeable(struct page *page)
 {
-	return page_count(page) - !!page_has_private(page) == 2;
+	return page_count(page) - page_has_private(page) == 2;
 }
 
 static int may_write_to_queue(struct backing_dev_info *bdi)
-- 
cgit v1.2.3


From 4481374ce88ba8f460c8b89f2572027bd27057d0 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Mon, 21 Sep 2009 17:03:05 -0700
Subject: mm: replace various uses of num_physpages by totalram_pages

Sizing of memory allocations shouldn't depend on the number of physical
pages found in a system, as that generally includes (perhaps a huge amount
of) non-RAM pages.  The amount of what actually is usable as storage
should instead be used as a basis here.

Some of the calculations (i.e.  those not intending to use high memory)
should likely even use (totalram_pages - totalhigh_pages).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/microcode_core.c  | 4 ++--
 drivers/char/agp/backend.c        | 4 ++--
 drivers/parisc/ccio-dma.c         | 4 ++--
 drivers/parisc/sba_iommu.c        | 4 ++--
 drivers/xen/balloon.c             | 4 ----
 fs/ntfs/malloc.h                  | 2 +-
 include/linux/mm.h                | 1 +
 init/main.c                       | 4 ++--
 mm/slab.c                         | 2 +-
 mm/swap.c                         | 2 +-
 mm/vmalloc.c                      | 4 ++--
 net/core/sock.c                   | 4 ++--
 net/dccp/proto.c                  | 6 +++---
 net/decnet/dn_route.c             | 2 +-
 net/ipv4/route.c                  | 2 +-
 net/ipv4/tcp.c                    | 4 ++--
 net/netfilter/nf_conntrack_core.c | 4 ++--
 net/netfilter/x_tables.c          | 2 +-
 net/netfilter/xt_hashlimit.c      | 8 ++++----
 net/netlink/af_netlink.c          | 6 +++---
 net/sctp/protocol.c               | 6 +++---
 21 files changed, 38 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 0db7969b0dde..378e9a8f1bf8 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -210,8 +210,8 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
 {
 	ssize_t ret = -EINVAL;
 
-	if ((len >> PAGE_SHIFT) > num_physpages) {
-		pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+	if ((len >> PAGE_SHIFT) > totalram_pages) {
+		pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
 		return ret;
 	}
 
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index ad87753f6de4..a56ca080e108 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -114,9 +114,9 @@ static int agp_find_max(void)
 	long memory, index, result;
 
 #if PAGE_SHIFT < 20
-	memory = num_physpages >> (20 - PAGE_SHIFT);
+	memory = totalram_pages >> (20 - PAGE_SHIFT);
 #else
-	memory = num_physpages << (PAGE_SHIFT - 20);
+	memory = totalram_pages << (PAGE_SHIFT - 20);
 #endif
 	index = 1;
 
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index a45b0c0d574e..a6b4a5a53d40 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1266,7 +1266,7 @@ ccio_ioc_init(struct ioc *ioc)
 	** Hot-Plug/Removal of PCI cards. (aka PCI OLARD).
 	*/
 
-	iova_space_size = (u32) (num_physpages / count_parisc_driver(&ccio_driver));
+	iova_space_size = (u32) (totalram_pages / count_parisc_driver(&ccio_driver));
 
 	/* limit IOVA space size to 1MB-1GB */
 
@@ -1305,7 +1305,7 @@ ccio_ioc_init(struct ioc *ioc)
 
 	DBG_INIT("%s() hpa 0x%p mem %luMB IOV %dMB (%d bits)\n",
 			__func__, ioc->ioc_regs,
-			(unsigned long) num_physpages >> (20 - PAGE_SHIFT),
+			(unsigned long) totalram_pages >> (20 - PAGE_SHIFT),
 			iova_space_size>>20,
 			iov_order + PAGE_SHIFT);
 
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index 123d8fe3427d..57a6d19eba4c 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1390,7 +1390,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num)
 	** for DMA hints - ergo only 30 bits max.
 	*/
 
-	iova_space_size = (u32) (num_physpages/global_ioc_cnt);
+	iova_space_size = (u32) (totalram_pages/global_ioc_cnt);
 
 	/* limit IOVA space size to 1MB-1GB */
 	if (iova_space_size < (1 << (20 - PAGE_SHIFT))) {
@@ -1415,7 +1415,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num)
 	DBG_INIT("%s() hpa 0x%lx mem %ldMB IOV %dMB (%d bits)\n",
 			__func__,
 			ioc->ioc_hpa,
-			(unsigned long) num_physpages >> (20 - PAGE_SHIFT),
+			(unsigned long) totalram_pages >> (20 - PAGE_SHIFT),
 			iova_space_size>>20,
 			iov_order + PAGE_SHIFT);
 
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f5bbd9e83416..1b7123eb5d7b 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -96,11 +96,7 @@ static struct balloon_stats balloon_stats;
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
 
-/* VM /proc information for memory */
-extern unsigned long totalram_pages;
-
 #ifdef CONFIG_HIGHMEM
-extern unsigned long totalhigh_pages;
 #define inc_totalhigh_pages() (totalhigh_pages++)
 #define dec_totalhigh_pages() (totalhigh_pages--)
 #else
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index cd0be3f5c3cd..a44b14cbceeb 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -47,7 +47,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask)
 		return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM);
 		/* return (void *)__get_free_page(gfp_mask); */
 	}
-	if (likely(size >> PAGE_SHIFT < num_physpages))
+	if (likely((size >> PAGE_SHIFT) < totalram_pages))
 		return __vmalloc(size, gfp_mask, PAGE_KERNEL);
 	return NULL;
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d808cf832c4d..19ff81c49ba6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -25,6 +25,7 @@ extern unsigned long max_mapnr;
 #endif
 
 extern unsigned long num_physpages;
+extern unsigned long totalram_pages;
 extern void * high_memory;
 extern int page_cluster;
 
diff --git a/init/main.c b/init/main.c
index 34971becbd3c..2c48c3153163 100644
--- a/init/main.c
+++ b/init/main.c
@@ -668,12 +668,12 @@ asmlinkage void __init start_kernel(void)
 #endif
 	thread_info_cache_init();
 	cred_init();
-	fork_init(num_physpages);
+	fork_init(totalram_pages);
 	proc_caches_init();
 	buffer_init();
 	key_init();
 	security_init();
-	vfs_caches_init(num_physpages);
+	vfs_caches_init(totalram_pages);
 	radix_tree_init();
 	signals_init();
 	/* rootfs populating might need page-writeback */
diff --git a/mm/slab.c b/mm/slab.c
index 7b5d4deacfcd..7dfa481c96ba 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1384,7 +1384,7 @@ void __init kmem_cache_init(void)
 	 * Fragmentation resistance on low memory - only use bigger
 	 * page orders on machines with more than 32MB of memory.
 	 */
-	if (num_physpages > (32 << 20) >> PAGE_SHIFT)
+	if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
 		slab_break_gfp_order = BREAK_GFP_ORDER_HI;
 
 	/* Bootstrap is tricky, because several objects are allocated
diff --git a/mm/swap.c b/mm/swap.c
index 4a8a59e671f7..308e57d8d7ed 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -496,7 +496,7 @@ EXPORT_SYMBOL(pagevec_lookup_tag);
  */
 void __init swap_setup(void)
 {
-	unsigned long megs = num_physpages >> (20 - PAGE_SHIFT);
+	unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
 
 #ifdef CONFIG_SWAP
 	bdi_init(swapper_space.backing_dev_info);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9216b2555d07..5535da1d6961 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1386,7 +1386,7 @@ void *vmap(struct page **pages, unsigned int count,
 
 	might_sleep();
 
-	if (count > num_physpages)
+	if (count > totalram_pages)
 		return NULL;
 
 	area = get_vm_area_caller((count << PAGE_SHIFT), flags,
@@ -1493,7 +1493,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
 	unsigned long real_size = size;
 
 	size = PAGE_ALIGN(size);
-	if (!size || (size >> PAGE_SHIFT) > num_physpages)
+	if (!size || (size >> PAGE_SHIFT) > totalram_pages)
 		return NULL;
 
 	area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END,
diff --git a/net/core/sock.c b/net/core/sock.c
index 30d5446512f9..524712a7b154 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1206,12 +1206,12 @@ EXPORT_SYMBOL_GPL(sk_setup_caps);
 
 void __init sk_init(void)
 {
-	if (num_physpages <= 4096) {
+	if (totalram_pages <= 4096) {
 		sysctl_wmem_max = 32767;
 		sysctl_rmem_max = 32767;
 		sysctl_wmem_default = 32767;
 		sysctl_rmem_default = 32767;
-	} else if (num_physpages >= 131072) {
+	} else if (totalram_pages >= 131072) {
 		sysctl_wmem_max = 131071;
 		sysctl_rmem_max = 131071;
 	}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 923db06c7e55..bc4467082a00 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1049,10 +1049,10 @@ static int __init dccp_init(void)
 	 *
 	 * The methodology is similar to that of the buffer cache.
 	 */
-	if (num_physpages >= (128 * 1024))
-		goal = num_physpages >> (21 - PAGE_SHIFT);
+	if (totalram_pages >= (128 * 1024))
+		goal = totalram_pages >> (21 - PAGE_SHIFT);
 	else
-		goal = num_physpages >> (23 - PAGE_SHIFT);
+		goal = totalram_pages >> (23 - PAGE_SHIFT);
 
 	if (thash_entries)
 		goal = (thash_entries *
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 9383d3e5a1ab..57662cabaf9b 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1750,7 +1750,7 @@ void __init dn_route_init(void)
 	dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
 	add_timer(&dn_route_timer);
 
-	goal = num_physpages >> (26 - PAGE_SHIFT);
+	goal = totalram_pages >> (26 - PAGE_SHIFT);
 
 	for(order = 0; (1UL << order) < goal; order++)
 		/* NOTHING */;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 91867d3e6328..df9347314538 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3414,7 +3414,7 @@ int __init ip_rt_init(void)
 		alloc_large_system_hash("IP route cache",
 					sizeof(struct rt_hash_bucket),
 					rhash_entries,
-					(num_physpages >= 128 * 1024) ?
+					(totalram_pages >= 128 * 1024) ?
 					15 : 17,
 					0,
 					&rt_hash_log,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 19a0612b8a20..21387ebabf00 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2862,7 +2862,7 @@ void __init tcp_init(void)
 		alloc_large_system_hash("TCP established",
 					sizeof(struct inet_ehash_bucket),
 					thash_entries,
-					(num_physpages >= 128 * 1024) ?
+					(totalram_pages >= 128 * 1024) ?
 					13 : 15,
 					0,
 					&tcp_hashinfo.ehash_size,
@@ -2879,7 +2879,7 @@ void __init tcp_init(void)
 		alloc_large_system_hash("TCP bind",
 					sizeof(struct inet_bind_hashbucket),
 					tcp_hashinfo.ehash_size,
-					(num_physpages >= 128 * 1024) ?
+					(totalram_pages >= 128 * 1024) ?
 					13 : 15,
 					0,
 					&tcp_hashinfo.bhash_size,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b37109817a98..7c9ec3dee96e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1245,9 +1245,9 @@ static int nf_conntrack_init_init_net(void)
 	 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
 	if (!nf_conntrack_htable_size) {
 		nf_conntrack_htable_size
-			= (((num_physpages << PAGE_SHIFT) / 16384)
+			= (((totalram_pages << PAGE_SHIFT) / 16384)
 			   / sizeof(struct hlist_head));
-		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+		if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
 			nf_conntrack_htable_size = 16384;
 		if (nf_conntrack_htable_size < 32)
 			nf_conntrack_htable_size = 32;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a6ac83a93348..f01955cce314 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -617,7 +617,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
 	int cpu;
 
 	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
-	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
+	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
 		return NULL;
 
 	newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 219dcdbe388c..dd16e404424f 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -194,9 +194,9 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
 	if (minfo->cfg.size)
 		size = minfo->cfg.size;
 	else {
-		size = ((num_physpages << PAGE_SHIFT) / 16384) /
+		size = ((totalram_pages << PAGE_SHIFT) / 16384) /
 		       sizeof(struct list_head);
-		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+		if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
 			size = 8192;
 		if (size < 16)
 			size = 16;
@@ -266,9 +266,9 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
 	if (minfo->cfg.size) {
 		size = minfo->cfg.size;
 	} else {
-		size = (num_physpages << PAGE_SHIFT) / 16384 /
+		size = (totalram_pages << PAGE_SHIFT) / 16384 /
 		       sizeof(struct list_head);
-		if (num_physpages > 1024 * 1024 * 1024 / PAGE_SIZE)
+		if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
 			size = 8192;
 		if (size < 16)
 			size = 16;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c5aab6a368ce..55180b99562a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2091,10 +2091,10 @@ static int __init netlink_proto_init(void)
 	if (!nl_table)
 		goto panic;
 
-	if (num_physpages >= (128 * 1024))
-		limit = num_physpages >> (21 - PAGE_SHIFT);
+	if (totalram_pages >= (128 * 1024))
+		limit = totalram_pages >> (21 - PAGE_SHIFT);
 	else
-		limit = num_physpages >> (23 - PAGE_SHIFT);
+		limit = totalram_pages >> (23 - PAGE_SHIFT);
 
 	order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
 	limit = (1UL << order) / sizeof(struct hlist_head);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index c557f1fb1c66..612dc878e05c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1184,10 +1184,10 @@ SCTP_STATIC __init int sctp_init(void)
 	/* Size and allocate the association hash table.
 	 * The methodology is similar to that of the tcp hash tables.
 	 */
-	if (num_physpages >= (128 * 1024))
-		goal = num_physpages >> (22 - PAGE_SHIFT);
+	if (totalram_pages >= (128 * 1024))
+		goal = totalram_pages >> (22 - PAGE_SHIFT);
 	else
-		goal = num_physpages >> (24 - PAGE_SHIFT);
+		goal = totalram_pages >> (24 - PAGE_SHIFT);
 
 	for (order = 0; (1UL << order) < goal; order++)
 		;
-- 
cgit v1.2.3


From 2c85f51d222ccdd8c401d77a36b723a89156810d Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Mon, 21 Sep 2009 17:03:07 -0700
Subject: mm: also use alloc_large_system_hash() for the PID hash table

This is being done by allowing boot time allocations to specify that they
may want a sub-page sized amount of memory.

Overall this seems more consistent with the other hash table allocations,
and allows making two supposedly mm-only variables really mm-only
(nr_{kernel,all}_pages).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  5 ++---
 kernel/pid.c            | 15 ++++-----------
 mm/page_alloc.c         | 13 ++++++++++---
 3 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index bc3ab7073695..dd97fb8408a8 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -132,9 +132,6 @@ static inline void *alloc_remap(int nid, unsigned long size)
 }
 #endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */
 
-extern unsigned long __meminitdata nr_kernel_pages;
-extern unsigned long __meminitdata nr_all_pages;
-
 extern void *alloc_large_system_hash(const char *tablename,
 				     unsigned long bucketsize,
 				     unsigned long numentries,
@@ -145,6 +142,8 @@ extern void *alloc_large_system_hash(const char *tablename,
 				     unsigned long limit);
 
 #define HASH_EARLY	0x00000001	/* Allocating during early boot? */
+#define HASH_SMALL	0x00000002	/* sub-page allocation allowed, min
+					 * shift passed via *_hash_shift */
 
 /* Only NUMA needs hash distribution. 64bit NUMA architectures have
  * sufficient vmalloc space.
diff --git a/kernel/pid.c b/kernel/pid.c
index 31310b5d3f50..d3f722d20f9c 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -40,7 +40,7 @@
 #define pid_hashfn(nr, ns)	\
 	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
 static struct hlist_head *pid_hash;
-static int pidhash_shift;
+static unsigned int pidhash_shift = 4;
 struct pid init_struct_pid = INIT_STRUCT_PID;
 
 int pid_max = PID_MAX_DEFAULT;
@@ -499,19 +499,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 void __init pidhash_init(void)
 {
 	int i, pidhash_size;
-	unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
 
-	pidhash_shift = max(4, fls(megabytes * 4));
-	pidhash_shift = min(12, pidhash_shift);
+	pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
+					   HASH_EARLY | HASH_SMALL,
+					   &pidhash_shift, NULL, 4096);
 	pidhash_size = 1 << pidhash_shift;
 
-	printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
-		pidhash_size, pidhash_shift,
-		pidhash_size * sizeof(struct hlist_head));
-
-	pid_hash = alloc_bootmem(pidhash_size *	sizeof(*(pid_hash)));
-	if (!pid_hash)
-		panic("Could not alloc pidhash!\n");
 	for (i = 0; i < pidhash_size; i++)
 		INIT_HLIST_HEAD(&pid_hash[i]);
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 33b1a4762a7b..770f011e1c12 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -124,8 +124,8 @@ static char * const zone_names[MAX_NR_ZONES] = {
 
 int min_free_kbytes = 1024;
 
-unsigned long __meminitdata nr_kernel_pages;
-unsigned long __meminitdata nr_all_pages;
+static unsigned long __meminitdata nr_kernel_pages;
+static unsigned long __meminitdata nr_all_pages;
 static unsigned long __meminitdata dma_reserve;
 
 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
@@ -4821,7 +4821,14 @@ void *__init alloc_large_system_hash(const char *tablename,
 			numentries <<= (PAGE_SHIFT - scale);
 
 		/* Make sure we've got at least a 0-order allocation.. */
-		if (unlikely((numentries * bucketsize) < PAGE_SIZE))
+		if (unlikely(flags & HASH_SMALL)) {
+			/* Makes no sense without HASH_EARLY */
+			WARN_ON(!(flags & HASH_EARLY));
+			if (!(numentries >> *_hash_shift)) {
+				numentries = 1UL << *_hash_shift;
+				BUG_ON(!numentries);
+			}
+		} else if (unlikely((numentries * bucketsize) < PAGE_SIZE))
 			numentries = PAGE_SIZE / bucketsize;
 	}
 	numentries = roundup_pow_of_two(numentries);
-- 
cgit v1.2.3


From 1a8670a29b5277cbe601f74ab63d2c5211fb3005 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:03:09 -0700
Subject: oom: move oom_killer_enable()/oom_killer_disable to where they belong

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h    | 12 ------------
 include/linux/oom.h    | 11 +++++++++++
 kernel/power/process.c |  1 +
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c32bfa8e7f1e..f53e9b868c26 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -335,18 +335,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
 void drain_all_pages(void);
 void drain_local_pages(void *dummy);
 
-extern bool oom_killer_disabled;
-
-static inline void oom_killer_disable(void)
-{
-	oom_killer_disabled = true;
-}
-
-static inline void oom_killer_enable(void)
-{
-	oom_killer_disabled = false;
-}
-
 extern gfp_t gfp_allowed_mask;
 
 static inline void set_gfp_allowed_mask(gfp_t mask)
diff --git a/include/linux/oom.h b/include/linux/oom.h
index a7979baf1e39..6aac5fe4f6f1 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -30,5 +30,16 @@ extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
 extern int register_oom_notifier(struct notifier_block *nb);
 extern int unregister_oom_notifier(struct notifier_block *nb);
 
+extern bool oom_killer_disabled;
+
+static inline void oom_killer_disable(void)
+{
+	oom_killer_disabled = true;
+}
+
+static inline void oom_killer_enable(void)
+{
+	oom_killer_disabled = false;
+}
 #endif /* __KERNEL__*/
 #endif /* _INCLUDE_LINUX_OOM_H */
diff --git a/kernel/power/process.c b/kernel/power/process.c
index da2072d73811..cc2e55373b68 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -9,6 +9,7 @@
 #undef DEBUG
 
 #include <linux/interrupt.h>
+#include <linux/oom.h>
 #include <linux/suspend.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
-- 
cgit v1.2.3


From f86296317434b21585e229f6c49a33cb9ebab4d3 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Mon, 21 Sep 2009 17:03:11 -0700
Subject: mm: do batched scans for mem_cgroup

For mem_cgroup, shrink_zone() may call shrink_list() with nr_to_scan=1, in
which case shrink_list() _still_ calls isolate_pages() with the much
larger SWAP_CLUSTER_MAX.  It effectively scales up the inactive list scan
rate by up to 32 times.

For example, with 16k inactive pages and DEF_PRIORITY=12, (16k >> 12)=4.
So when shrink_zone() expects to scan 4 pages in the active/inactive list,
the active list will be scanned 4 pages, while the inactive list will be
(over) scanned SWAP_CLUSTER_MAX=32 pages in effect.  And that could break
the balance between the two lists.

It can further impact the scan of anon active list, due to the anon
active/inactive ratio rebalance logic in balance_pgdat()/shrink_zone():

inactive anon list over scanned => inactive_anon_is_low() == TRUE
                                => shrink_active_list()
                                => active anon list over scanned

So the end result may be

- anon inactive  => over scanned
- anon active    => over scanned (maybe not as much)
- file inactive  => over scanned
- file active    => under scanned (relatively)

The accesses to nr_saved_scan are not lock protected and so not 100%
accurate, however we can tolerate small errors and the resulted small
imbalanced scan rates between zones.

Cc: Rik van Riel <riel@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  6 +++++-
 mm/page_alloc.c        |  2 +-
 mm/vmscan.c            | 20 +++++++++++---------
 3 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9c50309b30a1..c188ea624c74 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -273,6 +273,11 @@ struct zone_reclaim_stat {
 	 */
 	unsigned long		recent_rotated[2];
 	unsigned long		recent_scanned[2];
+
+	/*
+	 * accumulated for batching
+	 */
+	unsigned long		nr_saved_scan[NR_LRU_LISTS];
 };
 
 struct zone {
@@ -327,7 +332,6 @@ struct zone {
 	spinlock_t		lru_lock;	
 	struct zone_lru {
 		struct list_head list;
-		unsigned long nr_saved_scan;	/* accumulated for batching */
 	} lru[NR_LRU_LISTS];
 
 	struct zone_reclaim_stat reclaim_stat;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 770f011e1c12..84d9da1e8f4c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3809,7 +3809,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		zone_pcp_init(zone);
 		for_each_lru(l) {
 			INIT_LIST_HEAD(&zone->lru[l].list);
-			zone->lru[l].nr_saved_scan = 0;
+			zone->reclaim_stat.nr_saved_scan[l] = 0;
 		}
 		zone->reclaim_stat.recent_rotated[0] = 0;
 		zone->reclaim_stat.recent_rotated[1] = 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5432c230c4cb..0e7f5e4a22d7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1586,6 +1586,7 @@ static void shrink_zone(int priority, struct zone *zone,
 	enum lru_list l;
 	unsigned long nr_reclaimed = sc->nr_reclaimed;
 	unsigned long swap_cluster_max = sc->swap_cluster_max;
+	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
 	int noswap = 0;
 
 	/* If we have no swap space, do not bother scanning anon pages. */
@@ -1605,12 +1606,9 @@ static void shrink_zone(int priority, struct zone *zone,
 			scan >>= priority;
 			scan = (scan * percent[file]) / 100;
 		}
-		if (scanning_global_lru(sc))
-			nr[l] = nr_scan_try_batch(scan,
-						  &zone->lru[l].nr_saved_scan,
-						  swap_cluster_max);
-		else
-			nr[l] = scan;
+		nr[l] = nr_scan_try_batch(scan,
+					  &reclaim_stat->nr_saved_scan[l],
+					  swap_cluster_max);
 	}
 
 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2220,6 +2218,7 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
 {
 	struct zone *zone;
 	unsigned long nr_reclaimed = 0;
+	struct zone_reclaim_stat *reclaim_stat;
 
 	for_each_populated_zone(zone) {
 		enum lru_list l;
@@ -2236,11 +2235,14 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
 						l == LRU_ACTIVE_FILE))
 				continue;
 
-			zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1;
-			if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) {
+			reclaim_stat = get_reclaim_stat(zone, sc);
+			reclaim_stat->nr_saved_scan[l] +=
+						(lru_pages >> prio) + 1;
+			if (reclaim_stat->nr_saved_scan[l]
+						>= nr_pages || pass > 3) {
 				unsigned long nr_to_scan;
 
-				zone->lru[l].nr_saved_scan = 0;
+				reclaim_stat->nr_saved_scan[l] = 0;
 				nr_to_scan = min(nr_pages, lru_pages);
 				nr_reclaimed += shrink_list(l, nr_to_scan, zone,
 								sc, prio);
-- 
cgit v1.2.3


From 28b83c5193e7ab951e402252278f2cc79dc4d298 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:03:13 -0700
Subject: oom: move oom_adj value from task_struct to signal_struct

Currently, OOM logic callflow is here.

    __out_of_memory()
        select_bad_process()            for each task
            badness()                   calculate badness of one task
                oom_kill_process()      search child
                    oom_kill_task()     kill target task and mm shared tasks with it

example, process-A have two thread, thread-A and thread-B and it have very
fat memory and each thread have following oom_adj and oom_score.

     thread-A: oom_adj = OOM_DISABLE, oom_score = 0
     thread-B: oom_adj = 0,           oom_score = very-high

Then, select_bad_process() select thread-B, but oom_kill_task() refuse
kill the task because thread-A have OOM_DISABLE.  Thus __out_of_memory()
call select_bad_process() again.  but select_bad_process() select the same
task.  It mean kernel fall in livelock.

The fact is, select_bad_process() must select killable task.  otherwise
OOM logic go into livelock.

And root cause is, oom_adj shouldn't be per-thread value.  it should be
per-process value because OOM-killer kill a process, not thread.  Thus
This patch moves oomkilladj (now more appropriately named oom_adj) from
struct task_struct to struct signal_struct.  it naturally prevent
select_bad_process() choose wrong task.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c        | 24 ++++++++++++++++++++----
 include/linux/sched.h |  3 ++-
 kernel/fork.c         |  2 ++
 mm/oom_kill.c         | 34 +++++++++++++++-------------------
 4 files changed, 39 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6f742f6658a9..81cfff82875b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -999,11 +999,17 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
 	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 	char buffer[PROC_NUMBUF];
 	size_t len;
-	int oom_adjust;
+	int oom_adjust = OOM_DISABLE;
+	unsigned long flags;
 
 	if (!task)
 		return -ESRCH;
-	oom_adjust = task->oomkilladj;
+
+	if (lock_task_sighand(task, &flags)) {
+		oom_adjust = task->signal->oom_adj;
+		unlock_task_sighand(task, &flags);
+	}
+
 	put_task_struct(task);
 
 	len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1017,6 +1023,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	struct task_struct *task;
 	char buffer[PROC_NUMBUF], *end;
 	int oom_adjust;
+	unsigned long flags;
 
 	memset(buffer, 0, sizeof(buffer));
 	if (count > sizeof(buffer) - 1)
@@ -1032,11 +1039,20 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	task = get_proc_task(file->f_path.dentry->d_inode);
 	if (!task)
 		return -ESRCH;
-	if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
+	if (!lock_task_sighand(task, &flags)) {
+		put_task_struct(task);
+		return -ESRCH;
+	}
+
+	if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
+		unlock_task_sighand(task, &flags);
 		put_task_struct(task);
 		return -EACCES;
 	}
-	task->oomkilladj = oom_adjust;
+
+	task->signal->oom_adj = oom_adjust;
+
+	unlock_task_sighand(task, &flags);
 	put_task_struct(task);
 	if (end - buffer == 0)
 		return -EIO;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 899d7304d594..17e9a8e9a51d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -639,6 +639,8 @@ struct signal_struct {
 	unsigned audit_tty;
 	struct tty_audit_buf *tty_audit_buf;
 #endif
+
+	int oom_adj;	/* OOM kill score adjustment (bit shift) */
 };
 
 /* Context switch must be unlocked if interrupts are to be enabled */
@@ -1221,7 +1223,6 @@ struct task_struct {
 	 * a short time
 	 */
 	unsigned char fpu_counter;
-	s8 oomkilladj; /* OOM kill score adjustment (bit shift). */
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 73a442b7be6d..1020977b57ca 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -880,6 +880,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 
 	tty_audit_fork(sig);
 
+	sig->oom_adj = current->signal->oom_adj;
+
 	return 0;
 }
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index da4c342f2641..630b77fe862f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -58,6 +58,10 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 	unsigned long points, cpu_time, run_time;
 	struct mm_struct *mm;
 	struct task_struct *child;
+	int oom_adj = p->signal->oom_adj;
+
+	if (oom_adj == OOM_DISABLE)
+		return 0;
 
 	task_lock(p);
 	mm = p->mm;
@@ -148,15 +152,15 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 		points /= 8;
 
 	/*
-	 * Adjust the score by oomkilladj.
+	 * Adjust the score by oom_adj.
 	 */
-	if (p->oomkilladj) {
-		if (p->oomkilladj > 0) {
+	if (oom_adj) {
+		if (oom_adj > 0) {
 			if (!points)
 				points = 1;
-			points <<= p->oomkilladj;
+			points <<= oom_adj;
 		} else
-			points >>= -(p->oomkilladj);
+			points >>= -(oom_adj);
 	}
 
 #ifdef DEBUG
@@ -251,7 +255,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
 			*ppoints = ULONG_MAX;
 		}
 
-		if (p->oomkilladj == OOM_DISABLE)
+		if (p->signal->oom_adj == OOM_DISABLE)
 			continue;
 
 		points = badness(p, uptime.tv_sec);
@@ -304,7 +308,7 @@ static void dump_tasks(const struct mem_cgroup *mem)
 		}
 		printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d     %3d %s\n",
 		       p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
-		       get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj,
+		       get_mm_rss(mm), (int)task_cpu(p), p->signal->oom_adj,
 		       p->comm);
 		task_unlock(p);
 	} while_each_thread(g, p);
@@ -359,18 +363,9 @@ static int oom_kill_task(struct task_struct *p)
 	 * change to NULL at any time since we do not hold task_lock(p).
 	 * However, this is of no concern to us.
 	 */
-
-	if (mm == NULL)
+	if (!mm || p->signal->oom_adj == OOM_DISABLE)
 		return 1;
 
-	/*
-	 * Don't kill the process if any threads are set to OOM_DISABLE
-	 */
-	do_each_thread(g, q) {
-		if (q->mm == mm && q->oomkilladj == OOM_DISABLE)
-			return 1;
-	} while_each_thread(g, q);
-
 	__oom_kill_task(p, 1);
 
 	/*
@@ -394,8 +389,9 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 
 	if (printk_ratelimit()) {
 		printk(KERN_WARNING "%s invoked oom-killer: "
-			"gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
-			current->comm, gfp_mask, order, current->oomkilladj);
+			"gfp_mask=0x%x, order=%d, oom_adj=%d\n",
+			current->comm, gfp_mask, order,
+			current->signal->oom_adj);
 		task_lock(current);
 		cpuset_print_task_mems_allowed(current);
 		task_unlock(current);
-- 
cgit v1.2.3


From 5f8dcc21211a3d4e3a7a5ca366b469fb88117f61 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:03:19 -0700
Subject: page-allocator: split per-cpu list into one-list-per-migrate-type

The following two patches remove searching in the page allocator fast-path
by maintaining multiple free-lists in the per-cpu structure.  At the time
the search was introduced, increasing the per-cpu structures would waste a
lot of memory as per-cpu structures were statically allocated at
compile-time.  This is no longer the case.

The patches are as follows. They are based on mmotm-2009-08-27.

Patch 1 adds multiple lists to struct per_cpu_pages, one per
	migratetype that can be stored on the PCP lists.

Patch 2 notes that the pcpu drain path check empty lists multiple times. The
	patch reduces the number of checks by maintaining a count of free
	lists encountered. Lists containing pages will then free multiple
	pages in batch

The patches were tested with kernbench, netperf udp/tcp, hackbench and
sysbench.  The netperf tests were not bound to any CPU in particular and
were run such that the results should be 99% confidence that the reported
results are within 1% of the estimated mean.  sysbench was run with a
postgres background and read-only tests.  Similar to netperf, it was run
multiple times so that it's 99% confidence results are within 1%.  The
patches were tested on x86, x86-64 and ppc64 as

x86:	Intel Pentium D 3GHz with 8G RAM (no-brand machine)
	kernbench	- No significant difference, variance well within noise
	netperf-udp	- 1.34% to 2.28% gain
	netperf-tcp	- 0.45% to 1.22% gain
	hackbench	- Small variances, very close to noise
	sysbench	- Very small gains

x86-64:	AMD Phenom 9950 1.3GHz with 8G RAM (no-brand machine)
	kernbench	- No significant difference, variance well within noise
	netperf-udp	- 1.83% to 10.42% gains
	netperf-tcp	- No conclusive until buffer >= PAGE_SIZE
				4096	+15.83%
				8192	+ 0.34% (not significant)
				16384	+ 1%
	hackbench	- Small gains, very close to noise
	sysbench	- 0.79% to 1.6% gain

ppc64:	PPC970MP 2.5GHz with 10GB RAM (it's a terrasoft powerstation)
	kernbench	- No significant difference, variance well within noise
	netperf-udp	- 2-3% gain for almost all buffer sizes tested
	netperf-tcp	- losses on small buffers, gains on larger buffers
			  possibly indicates some bad caching effect.
	hackbench	- No significant difference
	sysbench	- 2-4% gain

This patch:

Currently the per-cpu page allocator searches the PCP list for pages of
the correct migrate-type to reduce the possibility of pages being
inappropriate placed from a fragmentation perspective.  This search is
potentially expensive in a fast-path and undesirable.  Splitting the
per-cpu list into multiple lists increases the size of a per-cpu structure
and this was potentially a major problem at the time the search was
introduced.  These problem has been mitigated as now only the necessary
number of structures is allocated for the running system.

This patch replaces a list search in the per-cpu allocator with one list
per migrate type.  The potential snag with this approach is when bulk
freeing pages.  We round-robin free pages based on migrate type which has
little bearing on the cache hotness of the page and potentially checks
empty lists repeatedly in the event the majority of PCP pages are of one
type.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Nick Piggin <npiggin@suse.de>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |   5 ++-
 mm/page_alloc.c        | 106 +++++++++++++++++++++++++++----------------------
 2 files changed, 63 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c188ea624c74..652ef01be582 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -38,6 +38,7 @@
 #define MIGRATE_UNMOVABLE     0
 #define MIGRATE_RECLAIMABLE   1
 #define MIGRATE_MOVABLE       2
+#define MIGRATE_PCPTYPES      3 /* the number of types on the pcp lists */
 #define MIGRATE_RESERVE       3
 #define MIGRATE_ISOLATE       4 /* can't allocate from here */
 #define MIGRATE_TYPES         5
@@ -169,7 +170,9 @@ struct per_cpu_pages {
 	int count;		/* number of pages in the list */
 	int high;		/* high watermark, emptying needed */
 	int batch;		/* chunk size for buddy add/remove */
-	struct list_head list;	/* the list of pages */
+
+	/* Lists of pages, one per migrate type stored on the pcp-lists */
+	struct list_head lists[MIGRATE_PCPTYPES];
 };
 
 struct per_cpu_pageset {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 84d9da1e8f4c..1b1c39e6a9b8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -511,7 +511,7 @@ static inline int free_pages_check(struct page *page)
 }
 
 /*
- * Frees a list of pages. 
+ * Frees a number of pages from the PCP lists
  * Assumes all pages on list are in same zone, and of same order.
  * count is the number of pages to free.
  *
@@ -521,23 +521,36 @@ static inline int free_pages_check(struct page *page)
  * And clear the zone's pages_scanned counter, to hold off the "all pages are
  * pinned" detection logic.
  */
-static void free_pages_bulk(struct zone *zone, int count,
-					struct list_head *list, int order)
+static void free_pcppages_bulk(struct zone *zone, int count,
+					struct per_cpu_pages *pcp)
 {
+	int migratetype = 0;
+
 	spin_lock(&zone->lock);
 	zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
 	zone->pages_scanned = 0;
 
-	__mod_zone_page_state(zone, NR_FREE_PAGES, count << order);
+	__mod_zone_page_state(zone, NR_FREE_PAGES, count);
 	while (count--) {
 		struct page *page;
+		struct list_head *list;
+
+		/*
+		 * Remove pages from lists in a round-robin fashion. This spinning
+		 * around potentially empty lists is bloody awful, alternatives that
+		 * don't suck are welcome
+		 */
+		do {
+			if (++migratetype == MIGRATE_PCPTYPES)
+				migratetype = 0;
+			list = &pcp->lists[migratetype];
+		} while (list_empty(list));
 
-		VM_BUG_ON(list_empty(list));
 		page = list_entry(list->prev, struct page, lru);
 		/* have to delete it as __free_one_page list manipulates */
 		list_del(&page->lru);
-		trace_mm_page_pcpu_drain(page, order, page_private(page));
-		__free_one_page(page, zone, order, page_private(page));
+		trace_mm_page_pcpu_drain(page, 0, migratetype);
+		__free_one_page(page, zone, 0, migratetype);
 	}
 	spin_unlock(&zone->lock);
 }
@@ -953,7 +966,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 		to_drain = pcp->batch;
 	else
 		to_drain = pcp->count;
-	free_pages_bulk(zone, to_drain, &pcp->list, 0);
+	free_pcppages_bulk(zone, to_drain, pcp);
 	pcp->count -= to_drain;
 	local_irq_restore(flags);
 }
@@ -979,7 +992,7 @@ static void drain_pages(unsigned int cpu)
 
 		pcp = &pset->pcp;
 		local_irq_save(flags);
-		free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+		free_pcppages_bulk(zone, pcp->count, pcp);
 		pcp->count = 0;
 		local_irq_restore(flags);
 	}
@@ -1045,6 +1058,7 @@ static void free_hot_cold_page(struct page *page, int cold)
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
+	int migratetype;
 	int wasMlocked = __TestClearPageMlocked(page);
 
 	kmemcheck_free_shadow(page, 0);
@@ -1062,21 +1076,39 @@ static void free_hot_cold_page(struct page *page, int cold)
 	kernel_map_pages(page, 1, 0);
 
 	pcp = &zone_pcp(zone, get_cpu())->pcp;
-	set_page_private(page, get_pageblock_migratetype(page));
+	migratetype = get_pageblock_migratetype(page);
+	set_page_private(page, migratetype);
 	local_irq_save(flags);
 	if (unlikely(wasMlocked))
 		free_page_mlock(page);
 	__count_vm_event(PGFREE);
 
+	/*
+	 * We only track unmovable, reclaimable and movable on pcp lists.
+	 * Free ISOLATE pages back to the allocator because they are being
+	 * offlined but treat RESERVE as movable pages so we can get those
+	 * areas back if necessary. Otherwise, we may have to free
+	 * excessively into the page allocator
+	 */
+	if (migratetype >= MIGRATE_PCPTYPES) {
+		if (unlikely(migratetype == MIGRATE_ISOLATE)) {
+			free_one_page(zone, page, 0, migratetype);
+			goto out;
+		}
+		migratetype = MIGRATE_MOVABLE;
+	}
+
 	if (cold)
-		list_add_tail(&page->lru, &pcp->list);
+		list_add_tail(&page->lru, &pcp->lists[migratetype]);
 	else
-		list_add(&page->lru, &pcp->list);
+		list_add(&page->lru, &pcp->lists[migratetype]);
 	pcp->count++;
 	if (pcp->count >= pcp->high) {
-		free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+		free_pcppages_bulk(zone, pcp->batch, pcp);
 		pcp->count -= pcp->batch;
 	}
+
+out:
 	local_irq_restore(flags);
 	put_cpu();
 }
@@ -1134,46 +1166,24 @@ again:
 	cpu  = get_cpu();
 	if (likely(order == 0)) {
 		struct per_cpu_pages *pcp;
+		struct list_head *list;
 
 		pcp = &zone_pcp(zone, cpu)->pcp;
+		list = &pcp->lists[migratetype];
 		local_irq_save(flags);
-		if (!pcp->count) {
-			pcp->count = rmqueue_bulk(zone, 0,
-					pcp->batch, &pcp->list,
-					migratetype, cold);
-			if (unlikely(!pcp->count))
-				goto failed;
-		}
-
-		/* Find a page of the appropriate migrate type */
-		if (cold) {
-			list_for_each_entry_reverse(page, &pcp->list, lru)
-				if (page_private(page) == migratetype)
-					break;
-		} else {
-			list_for_each_entry(page, &pcp->list, lru)
-				if (page_private(page) == migratetype)
-					break;
-		}
-
-		/* Allocate more to the pcp list if necessary */
-		if (unlikely(&page->lru == &pcp->list)) {
-			int get_one_page = 0;
-
+		if (list_empty(list)) {
 			pcp->count += rmqueue_bulk(zone, 0,
-					pcp->batch, &pcp->list,
+					pcp->batch, list,
 					migratetype, cold);
-			list_for_each_entry(page, &pcp->list, lru) {
-				if (get_pageblock_migratetype(page) !=
-					    MIGRATE_ISOLATE) {
-					get_one_page = 1;
-					break;
-				}
-			}
-			if (!get_one_page)
+			if (unlikely(list_empty(list)))
 				goto failed;
 		}
 
+		if (cold)
+			page = list_entry(list->prev, struct page, lru);
+		else
+			page = list_entry(list->next, struct page, lru);
+
 		list_del(&page->lru);
 		pcp->count--;
 	} else {
@@ -3024,6 +3034,7 @@ static int zone_batchsize(struct zone *zone)
 static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 {
 	struct per_cpu_pages *pcp;
+	int migratetype;
 
 	memset(p, 0, sizeof(*p));
 
@@ -3031,7 +3042,8 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 	pcp->count = 0;
 	pcp->high = 6 * batch;
 	pcp->batch = max(1UL, 1 * batch);
-	INIT_LIST_HEAD(&pcp->list);
+	for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
+		INIT_LIST_HEAD(&pcp->lists[migratetype]);
 }
 
 /*
@@ -3223,7 +3235,7 @@ static int __zone_pcp_update(void *data)
 		pcp = &pset->pcp;
 
 		local_irq_save(flags);
-		free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+		free_pcppages_bulk(zone, pcp->count, pcp);
 		setup_pageset(pset, batch);
 		local_irq_restore(flags);
 	}
-- 
cgit v1.2.3


From f3e8fccd06d27773186a0094371daf2d84c79469 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:25 -0700
Subject: mm: add get_dump_page

In preparation for the next patch, add a simple get_dump_page(addr)
interface for the CONFIG_ELF_CORE dumpers to use, instead of calling
get_user_pages() directly.  They're not interested in errors: they
just want to use holes as much as possible, to save space and make
sure that the data is aligned where the headers said it would be.

Oh, and don't use that horrid DUMP_SEEK(off) macro!

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c       | 44 +++++++++++++---------------------------
 fs/binfmt_elf_fdpic.c | 56 +++++++++++++++++----------------------------------
 include/linux/mm.h    |  1 +
 mm/memory.c           | 33 +++++++++++++++++++++++++++++-
 4 files changed, 66 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7c1e65d54872..442d94fe255c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1280,9 +1280,6 @@ static int writenote(struct memelfnote *men, struct file *file,
 #define DUMP_WRITE(addr, nr)	\
 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
 		goto end_coredump;
-#define DUMP_SEEK(off)	\
-	if (!dump_seek(file, (off))) \
-		goto end_coredump;
 
 static void fill_elf_header(struct elfhdr *elf, int segs,
 			    u16 machine, u32 flags, u8 osabi)
@@ -2016,7 +2013,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 		goto end_coredump;
 
 	/* Align to page */
-	DUMP_SEEK(dataoff - foffset);
+	if (!dump_seek(file, dataoff - foffset))
+		goto end_coredump;
 
 	for (vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
@@ -2027,33 +2025,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 
 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
 			struct page *page;
-			struct vm_area_struct *tmp_vma;
-
-			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
-						&page, &tmp_vma) <= 0) {
-				DUMP_SEEK(PAGE_SIZE);
-			} else {
-				if (page == ZERO_PAGE(0)) {
-					if (!dump_seek(file, PAGE_SIZE)) {
-						page_cache_release(page);
-						goto end_coredump;
-					}
-				} else {
-					void *kaddr;
-					flush_cache_page(tmp_vma, addr,
-							 page_to_pfn(page));
-					kaddr = kmap(page);
-					if ((size += PAGE_SIZE) > limit ||
-					    !dump_write(file, kaddr,
-					    PAGE_SIZE)) {
-						kunmap(page);
-						page_cache_release(page);
-						goto end_coredump;
-					}
-					kunmap(page);
-				}
+			int stop;
+
+			page = get_dump_page(addr);
+			if (page) {
+				void *kaddr = kmap(page);
+				stop = ((size += PAGE_SIZE) > limit) ||
+					!dump_write(file, kaddr, PAGE_SIZE);
+				kunmap(page);
 				page_cache_release(page);
-			}
+			} else
+				stop = !dump_seek(file, PAGE_SIZE);
+			if (stop)
+				goto end_coredump;
 		}
 	}
 
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 20fbeced472b..76285471073e 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1325,9 +1325,6 @@ static int writenote(struct memelfnote *men, struct file *file)
 #define DUMP_WRITE(addr, nr)	\
 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
 		goto end_coredump;
-#define DUMP_SEEK(off)	\
-	if (!dump_seek(file, (off))) \
-		goto end_coredump;
 
 static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
 {
@@ -1518,6 +1515,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
 			   unsigned long *limit, unsigned long mm_flags)
 {
 	struct vm_area_struct *vma;
+	int err = 0;
 
 	for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
 		unsigned long addr;
@@ -1525,43 +1523,26 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
 		if (!maydump(vma, mm_flags))
 			continue;
 
-		for (addr = vma->vm_start;
-		     addr < vma->vm_end;
-		     addr += PAGE_SIZE
-		     ) {
-			struct vm_area_struct *vma;
-			struct page *page;
-
-			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
-					   &page, &vma) <= 0) {
-				DUMP_SEEK(file->f_pos + PAGE_SIZE);
-			}
-			else if (page == ZERO_PAGE(0)) {
-				page_cache_release(page);
-				DUMP_SEEK(file->f_pos + PAGE_SIZE);
-			}
-			else {
-				void *kaddr;
-
-				flush_cache_page(vma, addr, page_to_pfn(page));
-				kaddr = kmap(page);
-				if ((*size += PAGE_SIZE) > *limit ||
-				    !dump_write(file, kaddr, PAGE_SIZE)
-				    ) {
-					kunmap(page);
-					page_cache_release(page);
-					return -EIO;
-				}
+		for (addr = vma->vm_start; addr < vma->vm_end;
+							addr += PAGE_SIZE) {
+			struct page *page = get_dump_page(addr);
+			if (page) {
+				void *kaddr = kmap(page);
+				*size += PAGE_SIZE;
+				if (*size > *limit)
+					err = -EFBIG;
+				else if (!dump_write(file, kaddr, PAGE_SIZE))
+					err = -EIO;
 				kunmap(page);
 				page_cache_release(page);
-			}
+			} else if (!dump_seek(file, file->f_pos + PAGE_SIZE))
+				err = -EFBIG;
+			if (err)
+				goto out;
 		}
 	}
-
-	return 0;
-
-end_coredump:
-	return -EFBIG;
+out:
+	return err;
 }
 #endif
 
@@ -1802,7 +1783,8 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 				goto end_coredump;
 	}
 
-	DUMP_SEEK(dataoff);
+	if (!dump_seek(file, dataoff))
+		goto end_coredump;
 
 	if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
 		goto end_coredump;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 19ff81c49ba6..e41795bba95d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -817,6 +817,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			struct page **pages, struct vm_area_struct **vmas);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
+struct page *get_dump_page(unsigned long addr);
 
 extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
 extern void do_invalidatepage(struct page *page, unsigned long offset);
diff --git a/mm/memory.c b/mm/memory.c
index 4b5200f5f35a..a8430ff13837 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1423,9 +1423,40 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 
 	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
 }
-
 EXPORT_SYMBOL(get_user_pages);
 
+/**
+ * get_dump_page() - pin user page in memory while writing it to core dump
+ * @addr: user address
+ *
+ * Returns struct page pointer of user page pinned for dump,
+ * to be freed afterwards by page_cache_release() or put_page().
+ *
+ * Returns NULL on any kind of failure - a hole must then be inserted into
+ * the corefile, to preserve alignment with its headers; and also returns
+ * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
+ * allowing a hole to be left in the corefile to save diskspace.
+ *
+ * Called without mmap_sem, but after all other threads have been killed.
+ */
+#ifdef CONFIG_ELF_CORE
+struct page *get_dump_page(unsigned long addr)
+{
+	struct vm_area_struct *vma;
+	struct page *page;
+
+	if (__get_user_pages(current, current->mm, addr, 1,
+				GUP_FLAGS_FORCE, &page, &vma) < 1)
+		return NULL;
+	if (page == ZERO_PAGE(0)) {
+		page_cache_release(page);
+		return NULL;
+	}
+	flush_cache_page(vma, addr, page_to_pfn(page));
+	return page;
+}
+#endif /* CONFIG_ELF_CORE */
+
 pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
 			spinlock_t **ptl)
 {
-- 
cgit v1.2.3


From 8e4b9a60718970bbc02dfd3abd0b956ab65af231 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:26 -0700
Subject: mm: FOLL_DUMP replace FOLL_ANON

The "FOLL_ANON optimization" and its use_zero_page() test have caused
confusion and bugs: why does it test VM_SHARED? for the very good but
unsatisfying reason that VMware crashed without.  As we look to maybe
reinstating anonymous use of the ZERO_PAGE, we need to sort this out.

Easily done: it's silly for __get_user_pages() and follow_page() to
be guessing whether it's safe to assume that they're being used for
a coredump (which can take a shortcut snapshot where other uses must
handle a fault) - just tell them with GUP_FLAGS_DUMP and FOLL_DUMP.

get_dump_page() doesn't even want a ZERO_PAGE: an error suits fine.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 +-
 mm/internal.h      |  1 +
 mm/memory.c        | 43 ++++++++++++-------------------------------
 3 files changed, 14 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e41795bba95d..45ee5b5a343d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1231,7 +1231,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_WRITE	0x01	/* check pte is writable */
 #define FOLL_TOUCH	0x02	/* mark page accessed */
 #define FOLL_GET	0x04	/* do get_page on page */
-#define FOLL_ANON	0x08	/* give ZERO_PAGE if no pgtable */
+#define FOLL_DUMP	0x08	/* give error on hole if it would be zero */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/mm/internal.h b/mm/internal.h
index 166765cd58d6..d41475078b20 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -252,6 +252,7 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 
 #define GUP_FLAGS_WRITE		0x01
 #define GUP_FLAGS_FORCE		0x02
+#define GUP_FLAGS_DUMP		0x04
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		     unsigned long start, int len, int flags,
diff --git a/mm/memory.c b/mm/memory.c
index a8430ff13837..532a55bce6a4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1174,41 +1174,22 @@ no_page:
 	pte_unmap_unlock(ptep, ptl);
 	if (!pte_none(pte))
 		return page;
-	/* Fall through to ZERO_PAGE handling */
+
 no_page_table:
 	/*
 	 * When core dumping an enormous anonymous area that nobody
-	 * has touched so far, we don't want to allocate page tables.
+	 * has touched so far, we don't want to allocate unnecessary pages or
+	 * page tables.  Return error instead of NULL to skip handle_mm_fault,
+	 * then get_dump_page() will return NULL to leave a hole in the dump.
+	 * But we can only make this optimization where a hole would surely
+	 * be zero-filled if handle_mm_fault() actually did handle it.
 	 */
-	if (flags & FOLL_ANON) {
-		page = ZERO_PAGE(0);
-		if (flags & FOLL_GET)
-			get_page(page);
-		BUG_ON(flags & FOLL_WRITE);
-	}
+	if ((flags & FOLL_DUMP) &&
+	    (!vma->vm_ops || !vma->vm_ops->fault))
+		return ERR_PTR(-EFAULT);
 	return page;
 }
 
-/* Can we do the FOLL_ANON optimization? */
-static inline int use_zero_page(struct vm_area_struct *vma)
-{
-	/*
-	 * We don't want to optimize FOLL_ANON for make_pages_present()
-	 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
-	 * we want to get the page from the page tables to make sure
-	 * that we serialize and update with any other user of that
-	 * mapping.
-	 */
-	if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
-		return 0;
-	/*
-	 * And if we have a fault routine, it's not an anonymous region.
-	 */
-	return !vma->vm_ops || !vma->vm_ops->fault;
-}
-
-
-
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		     unsigned long start, int nr_pages, int flags,
 		     struct page **pages, struct vm_area_struct **vmas)
@@ -1288,8 +1269,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		foll_flags = FOLL_TOUCH;
 		if (pages)
 			foll_flags |= FOLL_GET;
-		if (!write && use_zero_page(vma))
-			foll_flags |= FOLL_ANON;
+		if (flags & GUP_FLAGS_DUMP)
+			foll_flags |= FOLL_DUMP;
 
 		do {
 			struct page *page;
@@ -1446,7 +1427,7 @@ struct page *get_dump_page(unsigned long addr)
 	struct page *page;
 
 	if (__get_user_pages(current, current->mm, addr, 1,
-				GUP_FLAGS_FORCE, &page, &vma) < 1)
+			GUP_FLAGS_FORCE | GUP_FLAGS_DUMP, &page, &vma) < 1)
 		return NULL;
 	if (page == ZERO_PAGE(0)) {
 		page_cache_release(page);
-- 
cgit v1.2.3


From 2a15efc953b26ad57d7d38b9e6782d57e53b4ab2 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:27 -0700
Subject: mm: follow_hugetlb_page flags

follow_hugetlb_page() shouldn't be guessing about the coredump case
either: pass the foll_flags down to it, instead of just the write bit.

Remove that obscure huge_zeropage_ok() test.  The decision is easy,
though unlike the non-huge case - here vm_ops->fault is always set.
But we know that a fault would serve up zeroes, unless there's
already a hugetlbfs pagecache page to back the range.

(Alternatively, since hugetlb pages aren't swapped out under pressure,
you could save more dump space by arguing that a page not yet faulted
into this process cannot be relevant to the dump; but that would be
more surprising.)

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  4 +++-
 mm/hugetlb.c            | 62 +++++++++++++++++++++++++++++--------------------
 mm/memory.c             | 14 ++++++-----
 3 files changed, 48 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 16cdb75a543a..e7f0fabfa1c2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -24,7 +24,9 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *
 int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
-int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int);
+int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
+			struct page **, struct vm_area_struct **,
+			unsigned long *, int *, int, unsigned int flags);
 void unmap_hugepage_range(struct vm_area_struct *,
 			unsigned long, unsigned long, struct page *);
 void __unmap_hugepage_range(struct vm_area_struct *,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c001f846f17d..6b41f70bbc7f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2016,6 +2016,23 @@ static struct page *hugetlbfs_pagecache_page(struct hstate *h,
 	return find_lock_page(mapping, idx);
 }
 
+/* Return whether there is a pagecache page to back given address within VMA */
+static bool hugetlbfs_backed(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long address)
+{
+	struct address_space *mapping;
+	pgoff_t idx;
+	struct page *page;
+
+	mapping = vma->vm_file->f_mapping;
+	idx = vma_hugecache_offset(h, vma, address);
+
+	page = find_get_page(mapping, idx);
+	if (page)
+		put_page(page);
+	return page != NULL;
+}
+
 static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep, unsigned int flags)
 {
@@ -2211,54 +2228,52 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
 	return NULL;
 }
 
-static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
-{
-	if (!ptep || write || shared)
-		return 0;
-	else
-		return huge_pte_none(huge_ptep_get(ptep));
-}
-
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			struct page **pages, struct vm_area_struct **vmas,
 			unsigned long *position, int *length, int i,
-			int write)
+			unsigned int flags)
 {
 	unsigned long pfn_offset;
 	unsigned long vaddr = *position;
 	int remainder = *length;
 	struct hstate *h = hstate_vma(vma);
-	int zeropage_ok = 0;
-	int shared = vma->vm_flags & VM_SHARED;
 
 	spin_lock(&mm->page_table_lock);
 	while (vaddr < vma->vm_end && remainder) {
 		pte_t *pte;
+		int absent;
 		struct page *page;
 
 		/*
 		 * Some archs (sparc64, sh*) have multiple pte_ts to
-		 * each hugepage.  We have to make * sure we get the
+		 * each hugepage.  We have to make sure we get the
 		 * first, for the page indexing below to work.
 		 */
 		pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
-		if (huge_zeropage_ok(pte, write, shared))
-			zeropage_ok = 1;
+		absent = !pte || huge_pte_none(huge_ptep_get(pte));
+
+		/*
+		 * When coredumping, it suits get_dump_page if we just return
+		 * an error if there's a hole and no huge pagecache to back it.
+		 */
+		if (absent &&
+		    ((flags & FOLL_DUMP) && !hugetlbfs_backed(h, vma, vaddr))) {
+			remainder = 0;
+			break;
+		}
 
-		if (!pte ||
-		    (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
-		    (write && !pte_write(huge_ptep_get(pte)))) {
+		if (absent ||
+		    ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) {
 			int ret;
 
 			spin_unlock(&mm->page_table_lock);
-			ret = hugetlb_fault(mm, vma, vaddr, write);
+			ret = hugetlb_fault(mm, vma, vaddr,
+				(flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
 			spin_lock(&mm->page_table_lock);
 			if (!(ret & VM_FAULT_ERROR))
 				continue;
 
 			remainder = 0;
-			if (!i)
-				i = -EFAULT;
 			break;
 		}
 
@@ -2266,10 +2281,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		page = pte_page(huge_ptep_get(pte));
 same_page:
 		if (pages) {
-			if (zeropage_ok)
-				pages[i] = ZERO_PAGE(0);
-			else
-				pages[i] = mem_map_offset(page, pfn_offset);
+			pages[i] = mem_map_offset(page, pfn_offset);
 			get_page(pages[i]);
 		}
 
@@ -2293,7 +2305,7 @@ same_page:
 	*length = remainder;
 	*position = vaddr;
 
-	return i;
+	return i ? i : -EFAULT;
 }
 
 void hugetlb_change_protection(struct vm_area_struct *vma,
diff --git a/mm/memory.c b/mm/memory.c
index 532a55bce6a4..6359a4f80c4a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1260,17 +1260,19 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		    !(vm_flags & vma->vm_flags))
 			return i ? : -EFAULT;
 
-		if (is_vm_hugetlb_page(vma)) {
-			i = follow_hugetlb_page(mm, vma, pages, vmas,
-						&start, &nr_pages, i, write);
-			continue;
-		}
-
 		foll_flags = FOLL_TOUCH;
 		if (pages)
 			foll_flags |= FOLL_GET;
 		if (flags & GUP_FLAGS_DUMP)
 			foll_flags |= FOLL_DUMP;
+		if (write)
+			foll_flags |= FOLL_WRITE;
+
+		if (is_vm_hugetlb_page(vma)) {
+			i = follow_hugetlb_page(mm, vma, pages, vmas,
+					&start, &nr_pages, i, foll_flags);
+			continue;
+		}
 
 		do {
 			struct page *page;
-- 
cgit v1.2.3


From 58fa879e1e640a1856f736b418984ebeccee1c95 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:31 -0700
Subject: mm: FOLL flags for GUP flags

__get_user_pages() has been taking its own GUP flags, then processing
them into FOLL flags for follow_page().  Though oddly named, the FOLL
flags are more widely used, so pass them to __get_user_pages() now.
Sorry, VM flags, VM_FAULT flags and FAULT_FLAGs are still distinct.

(The patch to __get_user_pages() looks peculiar, with both gup_flags
and foll_flags: the gup_flags remain constant; but as before there's
an exceptional case, out of scope of the patch, in which foll_flags
per page have FOLL_WRITE masked off.)

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  1 +
 mm/internal.h      |  6 +-----
 mm/memory.c        | 44 +++++++++++++++++++-------------------------
 mm/mlock.c         |  4 ++--
 mm/nommu.c         | 16 ++++++++--------
 5 files changed, 31 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 45ee5b5a343d..5409eced7aae 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1232,6 +1232,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_TOUCH	0x02	/* mark page accessed */
 #define FOLL_GET	0x04	/* do get_page on page */
 #define FOLL_DUMP	0x08	/* give error on hole if it would be zero */
+#define FOLL_FORCE	0x10	/* get_user_pages read/write w/o permission */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/mm/internal.h b/mm/internal.h
index d41475078b20..75596574911e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -250,12 +250,8 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 }
 #endif /* CONFIG_SPARSEMEM */
 
-#define GUP_FLAGS_WRITE		0x01
-#define GUP_FLAGS_FORCE		0x02
-#define GUP_FLAGS_DUMP		0x04
-
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int len, int flags,
+		     unsigned long start, int len, unsigned int foll_flags,
 		     struct page **pages, struct vm_area_struct **vmas);
 
 #define ZONE_RECLAIM_NOSCAN	-2
diff --git a/mm/memory.c b/mm/memory.c
index c8b5b9435a92..5c694f2b9c12 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1209,27 +1209,29 @@ no_page_table:
 }
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int nr_pages, int flags,
+		     unsigned long start, int nr_pages, unsigned int gup_flags,
 		     struct page **pages, struct vm_area_struct **vmas)
 {
 	int i;
-	unsigned int vm_flags = 0;
-	int write = !!(flags & GUP_FLAGS_WRITE);
-	int force = !!(flags & GUP_FLAGS_FORCE);
+	unsigned long vm_flags;
 
 	if (nr_pages <= 0)
 		return 0;
+
+	VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
+
 	/* 
 	 * Require read or write permissions.
-	 * If 'force' is set, we only require the "MAY" flags.
+	 * If FOLL_FORCE is set, we only require the "MAY" flags.
 	 */
-	vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
-	vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+	vm_flags  = (gup_flags & FOLL_WRITE) ?
+			(VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+	vm_flags &= (gup_flags & FOLL_FORCE) ?
+			(VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 	i = 0;
 
 	do {
 		struct vm_area_struct *vma;
-		unsigned int foll_flags;
 
 		vma = find_extend_vma(mm, start);
 		if (!vma && in_gate_area(tsk, start)) {
@@ -1241,7 +1243,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			pte_t *pte;
 
 			/* user gate pages are read-only */
-			if (write)
+			if (gup_flags & FOLL_WRITE)
 				return i ? : -EFAULT;
 			if (pg > TASK_SIZE)
 				pgd = pgd_offset_k(pg);
@@ -1278,22 +1280,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		    !(vm_flags & vma->vm_flags))
 			return i ? : -EFAULT;
 
-		foll_flags = FOLL_TOUCH;
-		if (pages)
-			foll_flags |= FOLL_GET;
-		if (flags & GUP_FLAGS_DUMP)
-			foll_flags |= FOLL_DUMP;
-		if (write)
-			foll_flags |= FOLL_WRITE;
-
 		if (is_vm_hugetlb_page(vma)) {
 			i = follow_hugetlb_page(mm, vma, pages, vmas,
-					&start, &nr_pages, i, foll_flags);
+					&start, &nr_pages, i, gup_flags);
 			continue;
 		}
 
 		do {
 			struct page *page;
+			unsigned int foll_flags = gup_flags;
 
 			/*
 			 * If we have a pending SIGKILL, don't keep faulting
@@ -1302,9 +1297,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			if (unlikely(fatal_signal_pending(current)))
 				return i ? i : -ERESTARTSYS;
 
-			if (write)
-				foll_flags |= FOLL_WRITE;
-
 			cond_resched();
 			while (!(page = follow_page(vma, start, foll_flags))) {
 				int ret;
@@ -1415,12 +1407,14 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		unsigned long start, int nr_pages, int write, int force,
 		struct page **pages, struct vm_area_struct **vmas)
 {
-	int flags = 0;
+	int flags = FOLL_TOUCH;
 
+	if (pages)
+		flags |= FOLL_GET;
 	if (write)
-		flags |= GUP_FLAGS_WRITE;
+		flags |= FOLL_WRITE;
 	if (force)
-		flags |= GUP_FLAGS_FORCE;
+		flags |= FOLL_FORCE;
 
 	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
 }
@@ -1447,7 +1441,7 @@ struct page *get_dump_page(unsigned long addr)
 	struct page *page;
 
 	if (__get_user_pages(current, current->mm, addr, 1,
-			GUP_FLAGS_FORCE | GUP_FLAGS_DUMP, &page, &vma) < 1)
+			FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1)
 		return NULL;
 	if (page == ZERO_PAGE(0)) {
 		page_cache_release(page);
diff --git a/mm/mlock.c b/mm/mlock.c
index e13918d4fc4f..22041aa9f5c1 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -166,9 +166,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
 	VM_BUG_ON(end   > vma->vm_end);
 	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 
-	gup_flags = 0;
+	gup_flags = FOLL_TOUCH | FOLL_GET;
 	if (vma->vm_flags & VM_WRITE)
-		gup_flags = GUP_FLAGS_WRITE;
+		gup_flags |= FOLL_WRITE;
 
 	while (nr_pages > 0) {
 		int i;
diff --git a/mm/nommu.c b/mm/nommu.c
index 386443e9d2c6..2d02ca17ce18 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -168,20 +168,20 @@ unsigned int kobjsize(const void *objp)
 }
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int nr_pages, int flags,
+		     unsigned long start, int nr_pages, int foll_flags,
 		     struct page **pages, struct vm_area_struct **vmas)
 {
 	struct vm_area_struct *vma;
 	unsigned long vm_flags;
 	int i;
-	int write = !!(flags & GUP_FLAGS_WRITE);
-	int force = !!(flags & GUP_FLAGS_FORCE);
 
 	/* calculate required read or write permissions.
-	 * - if 'force' is set, we only require the "MAY" flags.
+	 * If FOLL_FORCE is set, we only require the "MAY" flags.
 	 */
-	vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
-	vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+	vm_flags  = (foll_flags & FOLL_WRITE) ?
+			(VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+	vm_flags &= (foll_flags & FOLL_FORCE) ?
+			(VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 
 	for (i = 0; i < nr_pages; i++) {
 		vma = find_vma(mm, start);
@@ -223,9 +223,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	int flags = 0;
 
 	if (write)
-		flags |= GUP_FLAGS_WRITE;
+		flags |= FOLL_WRITE;
 	if (force)
-		flags |= GUP_FLAGS_FORCE;
+		flags |= FOLL_FORCE;
 
 	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
 }
-- 
cgit v1.2.3


From 3f96b79ad96263cc0ece7bb340cddf9b2ddfb1b3 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:37 -0700
Subject: tmpfs: depend on shmem

CONFIG_SHMEM off gives you (ramfs masquerading as) tmpfs, even when
CONFIG_TMPFS is off: that's a little anomalous, and I'd intended to make
more sense of it by removing CONFIG_TMPFS altogether, always enabling its
code when CONFIG_SHMEM; but so many defconfigs have CONFIG_SHMEM on
CONFIG_TMPFS off that we'd better leave that as is.

But there is no point in asking for CONFIG_TMPFS if CONFIG_SHMEM is off:
make TMPFS depend on SHMEM, which also prevents TMPFS_POSIX_ACL
shmem_acl.o being pointlessly built into the kernel when SHMEM is off.

And a selfish change, to prevent the world from being rebuilt when I
switch between CONFIG_SHMEM on and off: the only CONFIG_SHMEM in the
header files is mm.h shmem_lock() - give that a shmem.c stub instead.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig         |  1 +
 include/linux/mm.h | 11 +----------
 mm/shmem.c         |  5 +++++
 3 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/Kconfig b/fs/Kconfig
index 455aa207e67e..d4bf8caad8d0 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -109,6 +109,7 @@ source "fs/sysfs/Kconfig"
 
 config TMPFS
 	bool "Virtual memory file system support (former shm fs)"
+	depends on SHMEM
 	help
 	  Tmpfs is a file system which keeps all files in virtual memory.
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5409eced7aae..5946e2ff9fe8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -702,17 +702,8 @@ extern void pagefault_out_of_memory(void);
 
 extern void show_free_areas(void);
 
-#ifdef CONFIG_SHMEM
-extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
-#else
-static inline int shmem_lock(struct file *file, int lock,
-			    struct user_struct *user)
-{
-	return 0;
-}
-#endif
+int shmem_lock(struct file *file, int lock, struct user_struct *user);
 struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
-
 int shmem_zero_setup(struct vm_area_struct *);
 
 #ifndef CONFIG_MMU
diff --git a/mm/shmem.c b/mm/shmem.c
index 25ba75d02580..b4b56fd1e772 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2593,6 +2593,11 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 	return 0;
 }
 
+int shmem_lock(struct file *file, int lock, struct user_struct *user)
+{
+	return 0;
+}
+
 #define shmem_vm_ops				generic_file_vm_ops
 #define shmem_file_operations			ramfs_file_operations
 #define shmem_get_inode(sb, mode, dev, flags)	ramfs_get_inode(sb, mode, dev)
-- 
cgit v1.2.3


From 6bfde05bf5c9682e255c6a2c669dc80f91af6296 Mon Sep 17 00:00:00 2001
From: Eric B Munson <ebmunson@us.ibm.com>
Date: Mon, 21 Sep 2009 17:03:43 -0700
Subject: hugetlbfs: allow the creation of files suitable for MAP_PRIVATE on
 the vfs internal mount

This patchset adds a flag to mmap that allows the user to request that an
anonymous mapping be backed with huge pages.  This mapping will borrow
functionality from the huge page shm code to create a file on the kernel
internal mount and use it to approximate an anonymous mapping.  The
MAP_HUGETLB flag is a modifier to MAP_ANONYMOUS and will not work without
both flags being preset.

A new flag is necessary because there is no other way to hook into huge
pages without creating a file on a hugetlbfs mount which wouldn't be
MAP_ANONYMOUS.

To userspace, this mapping will behave just like an anonymous mapping
because the file is not accessible outside of the kernel.

This patchset is meant to simplify the programming model.  Presently there
is a large chunk of boiler platecode, contained in libhugetlbfs, required
to create private, hugepage backed mappings.  This patch set would allow
use of hugepages without linking to libhugetlbfs or having hugetblfs
mounted.

Unification of the VM code would provide these same benefits, but it has
been resisted each time that it has been suggested for several reasons: it
would break PAGE_SIZE assumptions across the kernel, it makes page-table
abstractions really expensive, and it does not provide any benefit on
architectures that do not support huge pages, incurring fast path
penalties without providing any benefit on these architectures.

This patch:

There are two means of creating mappings backed by huge pages:

        1. mmap() a file created on hugetlbfs
        2. Use shm which creates a file on an internal mount which essentially
           maps it MAP_SHARED

The internal mount is only used for shared mappings but there is very
little that stops it being used for private mappings. This patch extends
hugetlbfs_file_setup() to deal with the creation of files that will be
mapped MAP_PRIVATE on the internal hugetlbfs mount. This extended API is
used in a subsequent patch to implement the MAP_HUGETLB mmap() flag.

Signed-off-by: Eric Munson <ebmunson@us.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    | 21 +++++++++++++++++----
 include/linux/hugetlb.h | 12 ++++++++++--
 ipc/shm.c               |  2 +-
 3 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a93b885311d8..06b7c2623f99 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -507,6 +507,13 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		INIT_LIST_HEAD(&inode->i_mapping->private_list);
 		info = HUGETLBFS_I(inode);
+		/*
+		 * The policy is initialized here even if we are creating a
+		 * private inode because initialization simply creates an
+		 * an empty rb tree and calls spin_lock_init(), later when we
+		 * call mpol_free_shared_policy() it will just return because
+		 * the rb tree will still be empty.
+		 */
 		mpol_shared_policy_init(&info->policy, NULL);
 		switch (mode & S_IFMT) {
 		default:
@@ -931,13 +938,19 @@ static struct file_system_type hugetlbfs_fs_type = {
 
 static struct vfsmount *hugetlbfs_vfsmount;
 
-static int can_do_hugetlb_shm(void)
+static int can_do_hugetlb_shm(int creat_flags)
 {
-	return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
+	if (creat_flags != HUGETLB_SHMFS_INODE)
+		return 0;
+	if (capable(CAP_IPC_LOCK))
+		return 1;
+	if (in_group_p(sysctl_hugetlb_shm_group))
+		return 1;
+	return 0;
 }
 
 struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
-						struct user_struct **user)
+				struct user_struct **user, int creat_flags)
 {
 	int error = -ENOMEM;
 	struct file *file;
@@ -949,7 +962,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 	if (!hugetlbfs_vfsmount)
 		return ERR_PTR(-ENOENT);
 
-	if (!can_do_hugetlb_shm()) {
+	if (!can_do_hugetlb_shm(creat_flags)) {
 		*user = current_user();
 		if (user_shm_lock(size, *user)) {
 			WARN_ONCE(1,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e7f0fabfa1c2..f6505ad86657 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -112,6 +112,14 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
 
 #endif /* !CONFIG_HUGETLB_PAGE */
 
+enum {
+	/*
+	 * The file will be used as an shm file so shmfs accounting rules
+	 * apply
+	 */
+	HUGETLB_SHMFS_INODE     = 1,
+};
+
 #ifdef CONFIG_HUGETLBFS
 struct hugetlbfs_config {
 	uid_t   uid;
@@ -150,7 +158,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
 extern const struct file_operations hugetlbfs_file_operations;
 extern struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_file_setup(const char *name, size_t size, int acct,
-						struct user_struct **user);
+				struct user_struct **user, int creat_flags);
 int hugetlb_get_quota(struct address_space *mapping, long delta);
 void hugetlb_put_quota(struct address_space *mapping, long delta);
 
@@ -172,7 +180,7 @@ static inline void set_file_hugepages(struct file *file)
 
 #define is_file_hugepages(file)			0
 #define set_file_hugepages(file)		BUG()
-#define hugetlb_file_setup(name,size,acct,user)	ERR_PTR(-ENOSYS)
+#define hugetlb_file_setup(name,size,acct,user,creat)	ERR_PTR(-ENOSYS)
 
 #endif /* !CONFIG_HUGETLBFS */
 
diff --git a/ipc/shm.c b/ipc/shm.c
index 30162a59621a..9eb1488b543b 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -370,7 +370,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 		if (shmflg & SHM_NORESERVE)
 			acctflag = VM_NORESERVE;
 		file = hugetlb_file_setup(name, size, acctflag,
-							&shp->mlock_user);
+					&shp->mlock_user, HUGETLB_SHMFS_INODE);
 	} else {
 		/*
 		 * Do not allow no accounting for OVERCOMMIT_NEVER, even
-- 
cgit v1.2.3


From 90f72aa58bbf076b68e289fbd71eb829bc505923 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 21 Sep 2009 17:03:45 -0700
Subject: mm: add MAP_HUGETLB for mmaping pseudo-anonymous huge page regions

Add a flag for mmap that will be used to request a huge page region that
will look like anonymous memory to user space.  This is accomplished by
using a file on the internal vfsmount.  MAP_HUGETLB is a modifier of
MAP_ANONYMOUS and so must be specified with it.  The region will behave
the same as a MAP_ANONYMOUS region using small pages.

The patch also adds the MAP_STACK flag, which was previously defined only
on some architectures but not on others.  Since MAP_STACK is meant to be a
hint only, architectures can define it without assigning a specific
meaning to it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Eric B Munson <ebmunson@us.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: David Rientjes <rientjes@google.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/mman.h   | 2 ++
 arch/arm/include/asm/mman.h     | 2 ++
 arch/avr32/include/asm/mman.h   | 2 ++
 arch/cris/include/asm/mman.h    | 2 ++
 arch/frv/include/asm/mman.h     | 2 ++
 arch/h8300/include/asm/mman.h   | 2 ++
 arch/ia64/include/asm/mman.h    | 2 ++
 arch/m32r/include/asm/mman.h    | 2 ++
 arch/m68k/include/asm/mman.h    | 2 ++
 arch/mips/include/asm/mman.h    | 2 ++
 arch/mn10300/include/asm/mman.h | 2 ++
 arch/parisc/include/asm/mman.h  | 2 ++
 arch/powerpc/include/asm/mman.h | 2 ++
 arch/s390/include/asm/mman.h    | 2 ++
 arch/sparc/include/asm/mman.h   | 2 ++
 arch/xtensa/include/asm/mman.h  | 2 ++
 include/asm-generic/mman.h      | 1 +
 17 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h
index c77c55756a7c..99c56d47879d 100644
--- a/arch/alpha/include/asm/mman.h
+++ b/arch/alpha/include/asm/mman.h
@@ -28,6 +28,8 @@
 #define MAP_NORESERVE	0x10000		/* don't check for reservations */
 #define MAP_POPULATE	0x20000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x40000		/* do not block on IO */
+#define MAP_STACK	0x80000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x100000	/* create a huge page mapping */
 
 #define MS_ASYNC	1		/* sync memory asynchronously */
 #define MS_SYNC		2		/* synchronous memory sync */
diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h
index fc26976d8e3a..6464d471bc70 100644
--- a/arch/arm/include/asm/mman.h
+++ b/arch/arm/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) page tables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/avr32/include/asm/mman.h b/arch/avr32/include/asm/mman.h
index 9a92b15f6a66..38cea1b597c2 100644
--- a/arch/avr32/include/asm/mman.h
+++ b/arch/avr32/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) page tables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/cris/include/asm/mman.h b/arch/cris/include/asm/mman.h
index b7f0afba3ce0..de6b903b22cd 100644
--- a/arch/cris/include/asm/mman.h
+++ b/arch/cris/include/asm/mman.h
@@ -12,6 +12,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/frv/include/asm/mman.h b/arch/frv/include/asm/mman.h
index 58c1d11e2ac7..1939343322bb 100644
--- a/arch/frv/include/asm/mman.h
+++ b/arch/frv/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/h8300/include/asm/mman.h b/arch/h8300/include/asm/mman.h
index cf35f0a6f12e..eacacd04032e 100644
--- a/arch/h8300/include/asm/mman.h
+++ b/arch/h8300/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h
index 48cf8b98a0b4..cf55884e7f39 100644
--- a/arch/ia64/include/asm/mman.h
+++ b/arch/ia64/include/asm/mman.h
@@ -18,6 +18,8 @@
 #define MAP_NORESERVE	0x04000		/* don't check for reservations */
 #define MAP_POPULATE	0x08000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/m32r/include/asm/mman.h b/arch/m32r/include/asm/mman.h
index 04a5f40aa401..d191089808f4 100644
--- a/arch/m32r/include/asm/mman.h
+++ b/arch/m32r/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/m68k/include/asm/mman.h b/arch/m68k/include/asm/mman.h
index 9f5c4c4b3c7b..c421fef55f5e 100644
--- a/arch/m68k/include/asm/mman.h
+++ b/arch/m68k/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h
index f15554d1518a..a2250f390a29 100644
--- a/arch/mips/include/asm/mman.h
+++ b/arch/mips/include/asm/mman.h
@@ -46,6 +46,8 @@
 #define MAP_LOCKED	0x8000		/* pages are locked */
 #define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
+#define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x80000		/* create a huge page mapping */
 
 /*
  * Flags for msync
diff --git a/arch/mn10300/include/asm/mman.h b/arch/mn10300/include/asm/mman.h
index d04fac1da5aa..94611c356bb4 100644
--- a/arch/mn10300/include/asm/mman.h
+++ b/arch/mn10300/include/asm/mman.h
@@ -21,6 +21,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h
index a12d9d43f507..9749c8afe83a 100644
--- a/arch/parisc/include/asm/mman.h
+++ b/arch/parisc/include/asm/mman.h
@@ -22,6 +22,8 @@
 #define MAP_GROWSDOWN	0x8000		/* stack-like segment */
 #define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
+#define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x80000		/* create a huge page mapping */
 
 #define MS_SYNC		1		/* synchronous memory sync */
 #define MS_ASYNC	2		/* sync memory asynchronously */
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 7b1c49811a24..d4a7f645c5db 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -25,6 +25,8 @@
 
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #ifdef __KERNEL__
 #ifdef CONFIG_PPC64
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index f63fe7b431ed..22714ca181ad 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -18,6 +18,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h
index 988192e8e956..c3029ad6619a 100644
--- a/arch/sparc/include/asm/mman.h
+++ b/arch/sparc/include/asm/mman.h
@@ -20,6 +20,8 @@
 
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
diff --git a/arch/xtensa/include/asm/mman.h b/arch/xtensa/include/asm/mman.h
index 6e55b4d1f9c5..fca4db425f6e 100644
--- a/arch/xtensa/include/asm/mman.h
+++ b/arch/xtensa/include/asm/mman.h
@@ -53,6 +53,8 @@
 #define MAP_LOCKED	0x8000		/* pages are locked */
 #define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
+#define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x80000		/* create a huge page mapping */
 
 /*
  * Flags for msync
diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h
index 7cab4de2bca6..32c8bd6a196d 100644
--- a/include/asm-generic/mman.h
+++ b/include/asm-generic/mman.h
@@ -11,6 +11,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 #define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
-- 
cgit v1.2.3


From 4e52780d41a741fb4861ae1df2413dd816ec11b1 Mon Sep 17 00:00:00 2001
From: Eric B Munson <ebmunson@us.ibm.com>
Date: Mon, 21 Sep 2009 17:03:47 -0700
Subject: hugetlb: add MAP_HUGETLB for mmaping pseudo-anonymous huge page
 regions

Add a flag for mmap that will be used to request a huge page region that
will look like anonymous memory to userspace.  This is accomplished by
using a file on the internal vfsmount.  MAP_HUGETLB is a modifier of
MAP_ANONYMOUS and so must be specified with it.  The region will behave
the same as a MAP_ANONYMOUS region using small pages.

[akpm@linux-foundation.org: fix arch definitions of MAP_HUGETLB]
Signed-off-by: Eric B Munson <ebmunson@us.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  7 +++++++
 mm/mmap.c               | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index f6505ad86657..176e7ee73eff 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -112,12 +112,19 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
 
 #endif /* !CONFIG_HUGETLB_PAGE */
 
+#define HUGETLB_ANON_FILE "anon_hugepage"
+
 enum {
 	/*
 	 * The file will be used as an shm file so shmfs accounting rules
 	 * apply
 	 */
 	HUGETLB_SHMFS_INODE     = 1,
+	/*
+	 * The file is being created on the internal vfs mount and shmfs
+	 * accounting rules do not apply
+	 */
+	HUGETLB_ANONHUGE_INODE  = 2,
 };
 
 #ifdef CONFIG_HUGETLBFS
diff --git a/mm/mmap.c b/mm/mmap.c
index 1aeef6625e6a..21d4029a07b3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -949,6 +949,24 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	if (mm->map_count > sysctl_max_map_count)
 		return -ENOMEM;
 
+	if (flags & MAP_HUGETLB) {
+		struct user_struct *user = NULL;
+		if (file)
+			return -EINVAL;
+
+		/*
+		 * VM_NORESERVE is used because the reservations will be
+		 * taken when vm_ops->mmap() is called
+		 * A dummy user value is used because we are not locking
+		 * memory so no accounting is necessary
+		 */
+		len = ALIGN(len, huge_page_size(&default_hstate));
+		file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
+						&user, HUGETLB_ANONHUGE_INODE);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+	}
+
 	/* Obtain the address to map to. we verify (or select) it and ensure
 	 * that it represents a valid section of the address space.
 	 */
-- 
cgit v1.2.3


From 3d2d827f5ca5e32816194119d5c980c7e04474a6 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 21 Sep 2009 17:03:51 -0700
Subject: mm: move use_mm/unuse_mm from aio.c to mm/

Anyone who wants to do copy to/from user from a kernel thread, needs
use_mm (like what fs/aio has).  Move that into mm/, to make reusing and
exporting easier down the line, and make aio use it.  Next intended user,
besides aio, will be vhost-net.

Acked-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c                    | 47 +-------------------------------------
 include/linux/mmu_context.h |  9 ++++++++
 mm/Makefile                 |  2 +-
 mm/mmu_context.c            | 55 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 47 deletions(-)
 create mode 100644 include/linux/mmu_context.h
 create mode 100644 mm/mmu_context.c

(limited to 'include')

diff --git a/fs/aio.c b/fs/aio.c
index d065b2c3273e..fc21c23b2387 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -24,6 +24,7 @@
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/mmu_context.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <linux/aio.h>
@@ -34,7 +35,6 @@
 
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
-#include <asm/mmu_context.h>
 
 #if DEBUG > 1
 #define dprintk		printk
@@ -594,51 +594,6 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 	return ret;
 }
 
-/*
- * use_mm
- *	Makes the calling kernel thread take on the specified
- *	mm context.
- *	Called by the retry thread execute retries within the
- *	iocb issuer's mm context, so that copy_from/to_user
- *	operations work seamlessly for aio.
- *	(Note: this routine is intended to be called only
- *	from a kernel thread context)
- */
-static void use_mm(struct mm_struct *mm)
-{
-	struct mm_struct *active_mm;
-	struct task_struct *tsk = current;
-
-	task_lock(tsk);
-	active_mm = tsk->active_mm;
-	atomic_inc(&mm->mm_count);
-	tsk->mm = mm;
-	tsk->active_mm = mm;
-	switch_mm(active_mm, mm, tsk);
-	task_unlock(tsk);
-
-	mmdrop(active_mm);
-}
-
-/*
- * unuse_mm
- *	Reverses the effect of use_mm, i.e. releases the
- *	specified mm context which was earlier taken on
- *	by the calling kernel thread
- *	(Note: this routine is intended to be called only
- *	from a kernel thread context)
- */
-static void unuse_mm(struct mm_struct *mm)
-{
-	struct task_struct *tsk = current;
-
-	task_lock(tsk);
-	tsk->mm = NULL;
-	/* active_mm is still 'mm' */
-	enter_lazy_tlb(mm, tsk);
-	task_unlock(tsk);
-}
-
 /*
  * Queue up a kiocb to be retried. Assumes that the kiocb
  * has already been marked as kicked, and places it on
diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
new file mode 100644
index 000000000000..70fffeba7495
--- /dev/null
+++ b/include/linux/mmu_context.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_MMU_CONTEXT_H
+#define _LINUX_MMU_CONTEXT_H
+
+struct mm_struct;
+
+void use_mm(struct mm_struct *mm);
+void unuse_mm(struct mm_struct *mm);
+
+#endif
diff --git a/mm/Makefile b/mm/Makefile
index a63bf59a0c77..728a9fde49d1 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   maccess.o page_alloc.o page-writeback.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o mm_init.o $(mmu-y)
+			   page_isolation.o mm_init.o mmu_context.o $(mmu-y)
 obj-y += init-mm.o
 
 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
new file mode 100644
index 000000000000..fd473b51c903
--- /dev/null
+++ b/mm/mmu_context.c
@@ -0,0 +1,55 @@
+/* Copyright (C) 2009 Red Hat, Inc.
+ *
+ * See ../COPYING for licensing terms.
+ */
+
+#include <linux/mm.h>
+#include <linux/mmu_context.h>
+#include <linux/sched.h>
+
+#include <asm/mmu_context.h>
+
+/*
+ * use_mm
+ *	Makes the calling kernel thread take on the specified
+ *	mm context.
+ *	Called by the retry thread execute retries within the
+ *	iocb issuer's mm context, so that copy_from/to_user
+ *	operations work seamlessly for aio.
+ *	(Note: this routine is intended to be called only
+ *	from a kernel thread context)
+ */
+void use_mm(struct mm_struct *mm)
+{
+	struct mm_struct *active_mm;
+	struct task_struct *tsk = current;
+
+	task_lock(tsk);
+	active_mm = tsk->active_mm;
+	atomic_inc(&mm->mm_count);
+	tsk->mm = mm;
+	tsk->active_mm = mm;
+	switch_mm(active_mm, mm, tsk);
+	task_unlock(tsk);
+
+	mmdrop(active_mm);
+}
+
+/*
+ * unuse_mm
+ *	Reverses the effect of use_mm, i.e. releases the
+ *	specified mm context which was earlier taken on
+ *	by the calling kernel thread
+ *	(Note: this routine is intended to be called only
+ *	from a kernel thread context)
+ */
+void unuse_mm(struct mm_struct *mm)
+{
+	struct task_struct *tsk = current;
+
+	task_lock(tsk);
+	tsk->mm = NULL;
+	/* active_mm is still 'mm' */
+	enter_lazy_tlb(mm, tsk);
+	task_unlock(tsk);
+}
-- 
cgit v1.2.3


From 69d25870f20c4b2563304f2b79c5300dd60a067e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Mon, 21 Sep 2009 17:04:08 -0700
Subject: cpuidle: fix the menu governor to boost IO performance

Fix the menu idle governor which balances power savings, energy efficiency
and performance impact.

The reason for a reworked governor is that there have been serious
performance issues reported with the existing code on Nehalem server
systems.

To show this I'm sure Andrew wants to see benchmark results:
(benchmark is "fio", "no cstates" is using "idle=poll")

		no cstates	current linux	new algorithm
1 disk		107 Mb/s	85 Mb/s		105 Mb/s
2 disks		215 Mb/s	123 Mb/s	209 Mb/s
12 disks	590 Mb/s	320 Mb/s	585 Mb/s

In various power benchmark measurements, no degredation was found by our
measurement&diagnostics team.  Obviously a small percentage more power was
used in the "fio" benchmark, due to the much higher performance.

While it would be a novel idea to describe the new algorithm in this
commit message, I cheaped out and described it in comments in the code
instead.

[changes since first post: spelling fixes from akpm, review feedback,
folded menu-tng into menu.c]

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/cpuidle/governors/menu.c | 251 +++++++++++++++++++++++++++++++++------
 include/linux/sched.h            |   4 +
 kernel/sched.c                   |  13 ++
 3 files changed, 229 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index f1df59f59a37..9f3d77532ab9 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -2,8 +2,12 @@
  * menu.c - the menu idle governor
  *
  * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
+ * Copyright (C) 2009 Intel Corporation
+ * Author:
+ *        Arjan van de Ven <arjan@linux.intel.com>
  *
- * This code is licenced under the GPL.
+ * This code is licenced under the GPL version 2 as described
+ * in the COPYING file that acompanies the Linux Kernel.
  */
 
 #include <linux/kernel.h>
@@ -13,20 +17,153 @@
 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
+#include <linux/sched.h>
 
-#define BREAK_FUZZ	4	/* 4 us */
-#define PRED_HISTORY_PCT	50
+#define BUCKETS 12
+#define RESOLUTION 1024
+#define DECAY 4
+#define MAX_INTERESTING 50000
+
+/*
+ * Concepts and ideas behind the menu governor
+ *
+ * For the menu governor, there are 3 decision factors for picking a C
+ * state:
+ * 1) Energy break even point
+ * 2) Performance impact
+ * 3) Latency tolerance (from pmqos infrastructure)
+ * These these three factors are treated independently.
+ *
+ * Energy break even point
+ * -----------------------
+ * C state entry and exit have an energy cost, and a certain amount of time in
+ * the  C state is required to actually break even on this cost. CPUIDLE
+ * provides us this duration in the "target_residency" field. So all that we
+ * need is a good prediction of how long we'll be idle. Like the traditional
+ * menu governor, we start with the actual known "next timer event" time.
+ *
+ * Since there are other source of wakeups (interrupts for example) than
+ * the next timer event, this estimation is rather optimistic. To get a
+ * more realistic estimate, a correction factor is applied to the estimate,
+ * that is based on historic behavior. For example, if in the past the actual
+ * duration always was 50% of the next timer tick, the correction factor will
+ * be 0.5.
+ *
+ * menu uses a running average for this correction factor, however it uses a
+ * set of factors, not just a single factor. This stems from the realization
+ * that the ratio is dependent on the order of magnitude of the expected
+ * duration; if we expect 500 milliseconds of idle time the likelihood of
+ * getting an interrupt very early is much higher than if we expect 50 micro
+ * seconds of idle time. A second independent factor that has big impact on
+ * the actual factor is if there is (disk) IO outstanding or not.
+ * (as a special twist, we consider every sleep longer than 50 milliseconds
+ * as perfect; there are no power gains for sleeping longer than this)
+ *
+ * For these two reasons we keep an array of 12 independent factors, that gets
+ * indexed based on the magnitude of the expected duration as well as the
+ * "is IO outstanding" property.
+ *
+ * Limiting Performance Impact
+ * ---------------------------
+ * C states, especially those with large exit latencies, can have a real
+ * noticable impact on workloads, which is not acceptable for most sysadmins,
+ * and in addition, less performance has a power price of its own.
+ *
+ * As a general rule of thumb, menu assumes that the following heuristic
+ * holds:
+ *     The busier the system, the less impact of C states is acceptable
+ *
+ * This rule-of-thumb is implemented using a performance-multiplier:
+ * If the exit latency times the performance multiplier is longer than
+ * the predicted duration, the C state is not considered a candidate
+ * for selection due to a too high performance impact. So the higher
+ * this multiplier is, the longer we need to be idle to pick a deep C
+ * state, and thus the less likely a busy CPU will hit such a deep
+ * C state.
+ *
+ * Two factors are used in determing this multiplier:
+ * a value of 10 is added for each point of "per cpu load average" we have.
+ * a value of 5 points is added for each process that is waiting for
+ * IO on this CPU.
+ * (these values are experimentally determined)
+ *
+ * The load average factor gives a longer term (few seconds) input to the
+ * decision, while the iowait value gives a cpu local instantanious input.
+ * The iowait factor may look low, but realize that this is also already
+ * represented in the system load average.
+ *
+ */
 
 struct menu_device {
 	int		last_state_idx;
 
 	unsigned int	expected_us;
-	unsigned int	predicted_us;
-	unsigned int    current_predicted_us;
-	unsigned int	last_measured_us;
-	unsigned int	elapsed_us;
+	u64		predicted_us;
+	unsigned int	measured_us;
+	unsigned int	exit_us;
+	unsigned int	bucket;
+	u64		correction_factor[BUCKETS];
 };
 
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static int get_loadavg(void)
+{
+	unsigned long this = this_cpu_load();
+
+
+	return LOAD_INT(this) * 10 + LOAD_FRAC(this) / 10;
+}
+
+static inline int which_bucket(unsigned int duration)
+{
+	int bucket = 0;
+
+	/*
+	 * We keep two groups of stats; one with no
+	 * IO pending, one without.
+	 * This allows us to calculate
+	 * E(duration)|iowait
+	 */
+	if (nr_iowait_cpu())
+		bucket = BUCKETS/2;
+
+	if (duration < 10)
+		return bucket;
+	if (duration < 100)
+		return bucket + 1;
+	if (duration < 1000)
+		return bucket + 2;
+	if (duration < 10000)
+		return bucket + 3;
+	if (duration < 100000)
+		return bucket + 4;
+	return bucket + 5;
+}
+
+/*
+ * Return a multiplier for the exit latency that is intended
+ * to take performance requirements into account.
+ * The more performance critical we estimate the system
+ * to be, the higher this multiplier, and thus the higher
+ * the barrier to go to an expensive C state.
+ */
+static inline int performance_multiplier(void)
+{
+	int mult = 1;
+
+	/* for higher loadavg, we are more reluctant */
+
+	mult += 2 * get_loadavg();
+
+	/* for IO wait tasks (per cpu!) we add 5x each */
+	mult += 10 * nr_iowait_cpu();
+
+	return mult;
+}
+
 static DEFINE_PER_CPU(struct menu_device, menu_devices);
 
 /**
@@ -38,37 +175,59 @@ static int menu_select(struct cpuidle_device *dev)
 	struct menu_device *data = &__get_cpu_var(menu_devices);
 	int latency_req = pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY);
 	int i;
+	int multiplier;
+
+	data->last_state_idx = 0;
+	data->exit_us = 0;
 
 	/* Special case when user has set very strict latency requirement */
-	if (unlikely(latency_req == 0)) {
-		data->last_state_idx = 0;
+	if (unlikely(latency_req == 0))
 		return 0;
-	}
 
-	/* determine the expected residency time */
+	/* determine the expected residency time, round up */
 	data->expected_us =
-		(u32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000;
+	    DIV_ROUND_UP((u32)ktime_to_ns(tick_nohz_get_sleep_length()), 1000);
+
+
+	data->bucket = which_bucket(data->expected_us);
+
+	multiplier = performance_multiplier();
+
+	/*
+	 * if the correction factor is 0 (eg first time init or cpu hotplug
+	 * etc), we actually want to start out with a unity factor.
+	 */
+	if (data->correction_factor[data->bucket] == 0)
+		data->correction_factor[data->bucket] = RESOLUTION * DECAY;
+
+	/* Make sure to round up for half microseconds */
+	data->predicted_us = DIV_ROUND_CLOSEST(
+		data->expected_us * data->correction_factor[data->bucket],
+		RESOLUTION * DECAY);
+
+	/*
+	 * We want to default to C1 (hlt), not to busy polling
+	 * unless the timer is happening really really soon.
+	 */
+	if (data->expected_us > 5)
+		data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
 
-	/* Recalculate predicted_us based on prediction_history_pct */
-	data->predicted_us *= PRED_HISTORY_PCT;
-	data->predicted_us += (100 - PRED_HISTORY_PCT) *
-				data->current_predicted_us;
-	data->predicted_us /= 100;
 
 	/* find the deepest idle state that satisfies our constraints */
-	for (i = CPUIDLE_DRIVER_STATE_START + 1; i < dev->state_count; i++) {
+	for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++) {
 		struct cpuidle_state *s = &dev->states[i];
 
-		if (s->target_residency > data->expected_us)
-			break;
 		if (s->target_residency > data->predicted_us)
 			break;
 		if (s->exit_latency > latency_req)
 			break;
+		if (s->exit_latency * multiplier > data->predicted_us)
+			break;
+		data->exit_us = s->exit_latency;
+		data->last_state_idx = i;
 	}
 
-	data->last_state_idx = i - 1;
-	return i - 1;
+	return data->last_state_idx;
 }
 
 /**
@@ -85,35 +244,49 @@ static void menu_reflect(struct cpuidle_device *dev)
 	unsigned int last_idle_us = cpuidle_get_last_residency(dev);
 	struct cpuidle_state *target = &dev->states[last_idx];
 	unsigned int measured_us;
+	u64 new_factor;
 
 	/*
 	 * Ugh, this idle state doesn't support residency measurements, so we
 	 * are basically lost in the dark.  As a compromise, assume we slept
-	 * for one full standard timer tick.  However, be aware that this
-	 * could potentially result in a suboptimal state transition.
+	 * for the whole expected time.
 	 */
 	if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID)))
-		last_idle_us = USEC_PER_SEC / HZ;
+		last_idle_us = data->expected_us;
+
+
+	measured_us = last_idle_us;
 
 	/*
-	 * measured_us and elapsed_us are the cumulative idle time, since the
-	 * last time we were woken out of idle by an interrupt.
+	 * We correct for the exit latency; we are assuming here that the
+	 * exit latency happens after the event that we're interested in.
 	 */
-	if (data->elapsed_us <= data->elapsed_us + last_idle_us)
-		measured_us = data->elapsed_us + last_idle_us;
+	if (measured_us > data->exit_us)
+		measured_us -= data->exit_us;
+
+
+	/* update our correction ratio */
+
+	new_factor = data->correction_factor[data->bucket]
+			* (DECAY - 1) / DECAY;
+
+	if (data->expected_us > 0 && data->measured_us < MAX_INTERESTING)
+		new_factor += RESOLUTION * measured_us / data->expected_us;
 	else
-		measured_us = -1;
+		/*
+		 * we were idle so long that we count it as a perfect
+		 * prediction
+		 */
+		new_factor += RESOLUTION;
 
-	/* Predict time until next break event */
-	data->current_predicted_us = max(measured_us, data->last_measured_us);
+	/*
+	 * We don't want 0 as factor; we always want at least
+	 * a tiny bit of estimated time.
+	 */
+	if (new_factor == 0)
+		new_factor = 1;
 
-	if (last_idle_us + BREAK_FUZZ <
-	    data->expected_us - target->exit_latency) {
-		data->last_measured_us = measured_us;
-		data->elapsed_us = 0;
-	} else {
-		data->elapsed_us = measured_us;
-	}
+	data->correction_factor[data->bucket] = new_factor;
 }
 
 /**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 17e9a8e9a51d..97b10da0a3ea 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -140,6 +140,10 @@ extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_iowait(void);
+extern unsigned long nr_iowait_cpu(void);
+extern unsigned long this_cpu_load(void);
+
+
 extern void calc_global_load(void);
 extern u64 cpu_nr_migrations(int cpu);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 91843ba7f237..0ac9053c21d6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2904,6 +2904,19 @@ unsigned long nr_iowait(void)
 	return sum;
 }
 
+unsigned long nr_iowait_cpu(void)
+{
+	struct rq *this = this_rq();
+	return atomic_read(&this->nr_iowait);
+}
+
+unsigned long this_cpu_load(void)
+{
+	struct rq *this = this_rq();
+	return this->cpu_load[0];
+}
+
+
 /* Variables and functions for calc_load */
 static atomic_long_t calc_load_tasks;
 static unsigned long calc_load_update;
-- 
cgit v1.2.3


From e6de3988aa52debb25a427d085061f3bf1181d54 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:04:30 -0700
Subject: flex_array: add flex_array_clear function

Add a new function to the flex_array API:

	int flex_array_clear(struct flex_array *fa,
				unsigned int element_nr)

This function will zero the element at element_nr in the flex_array.

Although this is equivalent to using flex_array_put() and passing a
pointer to zero'd memory, flex_array_clear() does not require such a
pointer to memory that would most likely need to be allocated on the
caller's stack which could be significantly large depending on
element_size.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/flex_array.h |  1 +
 lib/flex_array.c           | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 45ff18491514..3887b21f883f 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -44,6 +44,7 @@ void flex_array_free(struct flex_array *fa);
 void flex_array_free_parts(struct flex_array *fa);
 int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 		gfp_t flags);
+int flex_array_clear(struct flex_array *fa, unsigned int element_nr);
 void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
 
 #endif /* _FLEX_ARRAY_H */
diff --git a/lib/flex_array.c b/lib/flex_array.c
index 7baed2fc3bc8..b68f99be4080 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -206,6 +206,32 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 	return 0;
 }
 
+/**
+ * flex_array_clear - clear element in array at @element_nr
+ * @element_nr:	index of the position to clear.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+	void *dst;
+
+	if (element_nr >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else {
+		part = fa->parts[part_nr];
+		if (!part)
+			return -EINVAL;
+	}
+	dst = &part->elements[index_inside_part(fa, element_nr)];
+	memset(dst, 0, fa->element_size);
+	return 0;
+}
+
 /**
  * flex_array_prealloc - guarantee that array space exists
  * @start:	index of first array element for which space is allocated
-- 
cgit v1.2.3


From 19da3dd157f8db6fe727ff268dab4791d55a6371 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:04:31 -0700
Subject: flex_array: poison free elements

Newly initialized flex_array's and/or flex_array_part's are now poisoned
with a new poison value, FLEX_ARRAY_FREE.  It's value is similar to
POISON_FREE used in the various slab allocators, but is different to
distinguish between flex array's poisoned kmem and slab allocator poisoned
kmem.

This will allow us to identify flex_array_part's that only contain free
elements (and free them with an addition to the flex_array API).  This
could also be extended in the future to identify `get' uses on elements
that have not been `put'.

If __GFP_ZERO is passed for a part's gfp mask, the poisoning is avoided.
These elements are considered to be in-use since they have been
initialized.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/poison.h |  3 +++
 lib/flex_array.c       | 15 +++++++--------
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/poison.h b/include/linux/poison.h
index 6729f7dcd60e..7fc194aef8c2 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -65,6 +65,9 @@
 #define MUTEX_DEBUG_INIT	0x11
 #define MUTEX_DEBUG_FREE	0x22
 
+/********** lib/flex_array.c **********/
+#define FLEX_ARRAY_FREE	0x6c	/* for use-after-free poisoning */
+
 /********** security/ **********/
 #define KEY_DESTROY		0xbd
 
diff --git a/lib/flex_array.c b/lib/flex_array.c
index b68f99be4080..e22d0e9776aa 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -113,6 +113,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 		return NULL;
 	ret->element_size = element_size;
 	ret->total_nr_elements = total;
+	if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
+		memset(ret->parts[0], FLEX_ARRAY_FREE, bytes_left_in_base());
 	return ret;
 }
 
@@ -159,15 +161,12 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
 {
 	struct flex_array_part *part = fa->parts[part_nr];
 	if (!part) {
-		/*
-		 * This leaves the part pages uninitialized
-		 * and with potentially random data, just
-		 * as if the user had kmalloc()'d the whole.
-		 * __GFP_ZERO can be used to zero it.
-		 */
-		part = kmalloc(FLEX_ARRAY_PART_SIZE, flags);
+		part = kmalloc(sizeof(struct flex_array_part), flags);
 		if (!part)
 			return NULL;
+		if (!(flags & __GFP_ZERO))
+			memset(part, FLEX_ARRAY_FREE,
+				sizeof(struct flex_array_part));
 		fa->parts[part_nr] = part;
 	}
 	return part;
@@ -228,7 +227,7 @@ int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
 			return -EINVAL;
 	}
 	dst = &part->elements[index_inside_part(fa, element_nr)];
-	memset(dst, 0, fa->element_size);
+	memset(dst, FLEX_ARRAY_FREE, fa->element_size);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 4af5a2f770cc8575840ccb1514ec76ecb592985c Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:04:31 -0700
Subject: flex_array: add flex_array_shrink function

Add a new function to the flex_array API:

	int flex_array_shrink(struct flex_array *fa)

This function will free all unused second-level pages.  Since elements are
now poisoned if they are not allocated with __GFP_ZERO, it's possible to
identify parts that consist solely of unused elements.

flex_array_shrink() returns the number of pages freed.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/flex_array.h |  1 +
 lib/flex_array.c           | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 3887b21f883f..f12401e485fe 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -46,5 +46,6 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 		gfp_t flags);
 int flex_array_clear(struct flex_array *fa, unsigned int element_nr);
 void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
+int flex_array_shrink(struct flex_array *fa);
 
 #endif /* _FLEX_ARRAY_H */
diff --git a/lib/flex_array.c b/lib/flex_array.c
index e22d0e9776aa..1b03bb553410 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -291,3 +291,43 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
 	}
 	return &part->elements[index_inside_part(fa, element_nr)];
 }
+
+static int part_is_free(struct flex_array_part *part)
+{
+	int i;
+
+	for (i = 0; i < sizeof(struct flex_array_part); i++)
+		if (part->elements[i] != FLEX_ARRAY_FREE)
+			return 0;
+	return 1;
+}
+
+/**
+ * flex_array_shrink - free unused second-level pages
+ *
+ * Frees all second-level pages that consist solely of unused
+ * elements.  Returns the number of pages freed.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_shrink(struct flex_array *fa)
+{
+	struct flex_array_part *part;
+	int max_part = nr_base_part_ptrs();
+	int part_nr;
+	int ret = 0;
+
+	if (elements_fit_in_base(fa))
+		return ret;
+	for (part_nr = 0; part_nr < max_part; part_nr++) {
+		part = fa->parts[part_nr];
+		if (!part)
+			continue;
+		if (part_is_free(part)) {
+			fa->parts[part_nr] = NULL;
+			kfree(part);
+			ret++;
+		}
+	}
+	return ret;
+}
-- 
cgit v1.2.3


From 45b588d6e5cc172704bac0c998ce54873b149b22 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:04:33 -0700
Subject: flex_array: introduce DEFINE_FLEX_ARRAY

FLEX_ARRAY_INIT(element_size, total_nr_elements) cannot determine if
either parameter is valid, so flex arrays which are statically allocated
with this interface can easily become corrupted or reference beyond its
allocated memory.

This removes FLEX_ARRAY_INIT() as a struct flex_array initializer since no
initializer may perform the required checking.  Instead, the array is now
defined with a new interface:

	DEFINE_FLEX_ARRAY(name, element_size, total_nr_elements)

This may be prefixed with `static' for file scope.

This interface includes compile-time checking of the parameters to ensure
they are valid.  Since the validity of both element_size and
total_nr_elements depend on FLEX_ARRAY_BASE_SIZE and FLEX_ARRAY_PART_SIZE,
the kernel build will fail if either of these predefined values changes
such that the array parameters are no longer valid.

Since BUILD_BUG_ON() requires compile time constants, several of the
static inline functions that were once local to lib/flex_array.c had to be
moved to include/linux/flex_array.h.

Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/flex_array.h | 30 ++++++++++++++++++++++++++----
 lib/flex_array.c           | 36 ++++++++++--------------------------
 2 files changed, 36 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index f12401e485fe..1d747f72298b 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -31,10 +31,32 @@ struct flex_array {
 	};
 };
 
-#define FLEX_ARRAY_INIT(size, total) { { {\
-	.element_size = (size),		\
-	.total_nr_elements = (total),	\
-} } }
+/* Number of bytes left in base struct flex_array, excluding metadata */
+#define FLEX_ARRAY_BASE_BYTES_LEFT					\
+	(FLEX_ARRAY_BASE_SIZE - offsetof(struct flex_array, parts))
+
+/* Number of pointers in base to struct flex_array_part pages */
+#define FLEX_ARRAY_NR_BASE_PTRS						\
+	(FLEX_ARRAY_BASE_BYTES_LEFT / sizeof(struct flex_array_part *))
+
+/* Number of elements of size that fit in struct flex_array_part */
+#define FLEX_ARRAY_ELEMENTS_PER_PART(size)				\
+	(FLEX_ARRAY_PART_SIZE / size)
+
+/*
+ * Defines a statically allocated flex array and ensures its parameters are
+ * valid.
+ */
+#define DEFINE_FLEX_ARRAY(__arrayname, __element_size, __total)		\
+	struct flex_array __arrayname = { { {				\
+		.element_size = (__element_size),			\
+		.total_nr_elements = (__total),				\
+	} } };								\
+	static inline void __arrayname##_invalid_parameter(void)	\
+	{								\
+		BUILD_BUG_ON((__total) > FLEX_ARRAY_NR_BASE_PTRS *	\
+			FLEX_ARRAY_ELEMENTS_PER_PART(__element_size));	\
+	}
 
 struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 		gfp_t flags);
diff --git a/lib/flex_array.c b/lib/flex_array.c
index 1b03bb553410..b62ce6cffd0a 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -28,23 +28,6 @@ struct flex_array_part {
 	char elements[FLEX_ARRAY_PART_SIZE];
 };
 
-static inline int __elements_per_part(int element_size)
-{
-	return FLEX_ARRAY_PART_SIZE / element_size;
-}
-
-static inline int bytes_left_in_base(void)
-{
-	int element_offset = offsetof(struct flex_array, parts);
-	int bytes_left = FLEX_ARRAY_BASE_SIZE - element_offset;
-	return bytes_left;
-}
-
-static inline int nr_base_part_ptrs(void)
-{
-	return bytes_left_in_base() / sizeof(struct flex_array_part *);
-}
-
 /*
  * If a user requests an allocation which is small
  * enough, we may simply use the space in the
@@ -54,7 +37,7 @@ static inline int nr_base_part_ptrs(void)
 static inline int elements_fit_in_base(struct flex_array *fa)
 {
 	int data_size = fa->element_size * fa->total_nr_elements;
-	if (data_size <= bytes_left_in_base())
+	if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT)
 		return 1;
 	return 0;
 }
@@ -103,7 +86,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 					gfp_t flags)
 {
 	struct flex_array *ret;
-	int max_size = nr_base_part_ptrs() * __elements_per_part(element_size);
+	int max_size = FLEX_ARRAY_NR_BASE_PTRS *
+				FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
 
 	/* max_size will end up 0 if element_size > PAGE_SIZE */
 	if (total > max_size)
@@ -114,14 +98,15 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 	ret->element_size = element_size;
 	ret->total_nr_elements = total;
 	if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
-		memset(ret->parts[0], FLEX_ARRAY_FREE, bytes_left_in_base());
+		memset(ret->parts[0], FLEX_ARRAY_FREE,
+						FLEX_ARRAY_BASE_BYTES_LEFT);
 	return ret;
 }
 
 static int fa_element_to_part_nr(struct flex_array *fa,
 					unsigned int element_nr)
 {
-	return element_nr / __elements_per_part(fa->element_size);
+	return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
 }
 
 /**
@@ -133,11 +118,10 @@ static int fa_element_to_part_nr(struct flex_array *fa,
 void flex_array_free_parts(struct flex_array *fa)
 {
 	int part_nr;
-	int max_part = nr_base_part_ptrs();
 
 	if (elements_fit_in_base(fa))
 		return;
-	for (part_nr = 0; part_nr < max_part; part_nr++)
+	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++)
 		kfree(fa->parts[part_nr]);
 }
 
@@ -152,7 +136,8 @@ static unsigned int index_inside_part(struct flex_array *fa,
 {
 	unsigned int part_offset;
 
-	part_offset = element_nr % __elements_per_part(fa->element_size);
+	part_offset = element_nr %
+				FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
 	return part_offset * fa->element_size;
 }
 
@@ -313,13 +298,12 @@ static int part_is_free(struct flex_array_part *part)
 int flex_array_shrink(struct flex_array *fa)
 {
 	struct flex_array_part *part;
-	int max_part = nr_base_part_ptrs();
 	int part_nr;
 	int ret = 0;
 
 	if (elements_fit_in_base(fa))
 		return ret;
-	for (part_nr = 0; part_nr < max_part; part_nr++) {
+	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) {
 		part = fa->parts[part_nr];
 		if (!part)
 			continue;
-- 
cgit v1.2.3


From 0ec48915e8bbb37dea3df85c41e4c3498b95664b Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 21 Sep 2009 17:04:42 -0700
Subject: lis3: fix typo

Bit 0x80 in CTRL_REG3 is an ACTIVE_LOW rather than an ACTIVE_HIGH
function, I got that wrong during my last change.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lis3lv02d.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index ad651f4e45ac..113778b5df36 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -32,7 +32,7 @@ struct lis3lv02d_platform_data {
 #define LIS3_IRQ2_DATA_READY	(4 << 3)
 #define LIS3_IRQ2_CLICK		(7 << 3)
 #define LIS3_IRQ_OPEN_DRAIN	(1 << 6)
-#define LIS3_IRQ_ACTIVE_HIGH	(1 << 7)
+#define LIS3_IRQ_ACTIVE_LOW	(1 << 7)
 	unsigned char irq_cfg;
 };
 
-- 
cgit v1.2.3


From 8873c33483e62988ed886230aab71ef4c678f710 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 21 Sep 2009 17:04:43 -0700
Subject: lis3: add free-fall/wakeup function via platform_data

This offers a way for platforms to define flags and thresholds for the
free-fall/wakeup functions of the lis302d chips.

More registers needed to be seperated as they are specific to the

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/lis3lv02d.c |  9 +++++++++
 drivers/hwmon/lis3lv02d.h | 24 ++++++++++++++++++------
 include/linux/lis3lv02d.h |  9 +++++++++
 3 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c
index 271338bdb6be..cf5afb9a10ab 100644
--- a/drivers/hwmon/lis3lv02d.c
+++ b/drivers/hwmon/lis3lv02d.c
@@ -454,6 +454,15 @@ int lis3lv02d_init_device(struct lis3lv02d *dev)
 					(p->click_thresh_y << 4));
 		}
 
+		if (p->wakeup_flags && (dev->whoami == LIS_SINGLE_ID)) {
+			dev->write(dev, FF_WU_CFG_1, p->wakeup_flags);
+			dev->write(dev, FF_WU_THS_1, p->wakeup_thresh & 0x7f);
+			/* default to 2.5ms for now */
+			dev->write(dev, FF_WU_DURATION_1, 1);
+			/* enable high pass filter for both free-fall units */
+			dev->write(dev, CTRL_REG2, HP_FF_WU1 | HP_FF_WU2);
+		}
+
 		if (p->irq_cfg)
 			dev->write(dev, CTRL_REG3, p->irq_cfg);
 	}
diff --git a/drivers/hwmon/lis3lv02d.h b/drivers/hwmon/lis3lv02d.h
index e320e2f511f1..3e1ff46f72d3 100644
--- a/drivers/hwmon/lis3lv02d.h
+++ b/drivers/hwmon/lis3lv02d.h
@@ -58,15 +58,17 @@ enum lis3_reg {
 	OUTZ_L		= 0x2C,
 	OUTZ_H		= 0x2D,
 	OUTZ		= 0x2D,
-	FF_WU_CFG	= 0x30,
-	FF_WU_SRC	= 0x31,
-	FF_WU_ACK	= 0x32,
-	FF_WU_THS_L	= 0x34,
-	FF_WU_THS_H	= 0x35,
-	FF_WU_DURATION	= 0x36,
 };
 
 enum lis302d_reg {
+	FF_WU_CFG_1	= 0x30,
+	FF_WU_SRC_1	= 0x31,
+	FF_WU_THS_1	= 0x32,
+	FF_WU_DURATION_1 = 0x33,
+	FF_WU_CFG_2	= 0x34,
+	FF_WU_SRC_2	= 0x35,
+	FF_WU_THS_2	= 0x36,
+	FF_WU_DURATION_2 = 0x37,
 	CLICK_CFG	= 0x38,
 	CLICK_SRC	= 0x39,
 	CLICK_THSY_X	= 0x3B,
@@ -77,6 +79,12 @@ enum lis302d_reg {
 };
 
 enum lis3lv02d_reg {
+	FF_WU_CFG	= 0x30,
+	FF_WU_SRC	= 0x31,
+	FF_WU_ACK	= 0x32,
+	FF_WU_THS_L	= 0x34,
+	FF_WU_THS_H	= 0x35,
+	FF_WU_DURATION	= 0x36,
 	DD_CFG		= 0x38,
 	DD_SRC		= 0x39,
 	DD_ACK		= 0x3A,
@@ -107,6 +115,10 @@ enum lis3lv02d_ctrl2 {
 	CTRL2_FS	= 0x80, /* Full Scale selection */
 };
 
+enum lis302d_ctrl2 {
+	HP_FF_WU2	= 0x08,
+	HP_FF_WU1	= 0x04,
+};
 
 enum lis3lv02d_ctrl3 {
 	CTRL3_CFS0	= 0x01,
diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index 113778b5df36..3cc2f2c53e4c 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -34,6 +34,15 @@ struct lis3lv02d_platform_data {
 #define LIS3_IRQ_OPEN_DRAIN	(1 << 6)
 #define LIS3_IRQ_ACTIVE_LOW	(1 << 7)
 	unsigned char irq_cfg;
+
+#define LIS3_WAKEUP_X_LO	(1 << 0)
+#define LIS3_WAKEUP_X_HI	(1 << 1)
+#define LIS3_WAKEUP_Y_LO	(1 << 2)
+#define LIS3_WAKEUP_Y_HI	(1 << 3)
+#define LIS3_WAKEUP_Z_LO	(1 << 4)
+#define LIS3_WAKEUP_Z_HI	(1 << 5)
+	unsigned char wakeup_flags;
+	unsigned char wakeup_thresh;
 };
 
 #endif /* __LIS3LV02D_H_ */
-- 
cgit v1.2.3


From abd6633c67925f90775bb74755f9c547e30f1f20 Mon Sep 17 00:00:00 2001
From: David Härdeman <david@hardeman.nu>
Date: Mon, 21 Sep 2009 17:04:52 -0700
Subject: pnp: add a shutdown method to pnp drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The shutdown method is used by the winbond cir driver to setup the
hardware for wake-from-S5.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: David Härdeman <david@hardeman.nu>
Cc: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pnp/driver.c | 10 ++++++++++
 include/linux/pnp.h  |  1 +
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index 527ee764c93f..cd11b113494f 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -135,6 +135,15 @@ static int pnp_device_remove(struct device *dev)
 	return 0;
 }
 
+static void pnp_device_shutdown(struct device *dev)
+{
+	struct pnp_dev *pnp_dev = to_pnp_dev(dev);
+	struct pnp_driver *drv = pnp_dev->driver;
+
+	if (drv && drv->shutdown)
+		drv->shutdown(pnp_dev);
+}
+
 static int pnp_bus_match(struct device *dev, struct device_driver *drv)
 {
 	struct pnp_dev *pnp_dev = to_pnp_dev(dev);
@@ -203,6 +212,7 @@ struct bus_type pnp_bus_type = {
 	.match   = pnp_bus_match,
 	.probe   = pnp_device_probe,
 	.remove  = pnp_device_remove,
+	.shutdown = pnp_device_shutdown,
 	.suspend = pnp_bus_suspend,
 	.resume  = pnp_bus_resume,
 	.dev_attrs = pnp_interface_attrs,
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index b063c7328ba5..fddfafaed024 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -360,6 +360,7 @@ struct pnp_driver {
 	unsigned int flags;
 	int (*probe) (struct pnp_dev *dev, const struct pnp_device_id *dev_id);
 	void (*remove) (struct pnp_dev *dev);
+	void (*shutdown) (struct pnp_dev *dev);
 	int (*suspend) (struct pnp_dev *dev, pm_message_t state);
 	int (*resume) (struct pnp_dev *dev);
 	struct device_driver driver;
-- 
cgit v1.2.3


From 6ef297f86b62f187c59475784208f75c2ed8ccd8 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Tue, 22 Sep 2009 14:29:36 +0100
Subject: ARM: 5720/1: Move MMCI header to amba include dir

This moves the mmci platform data definition struct away from
arch/arm/include/asm/mach/mmc.h into the more proper place among
the other primecells in include/linux/amba/mmci.h and at the same
time renames it to "mmci.h", and also the struct in this file
confusingly named mmc_platform_data has been renamed
mmci_platform_data for clarity.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mach/mmc.h          | 18 ------------------
 arch/arm/mach-integrator/integrator_cp.c |  4 ++--
 arch/arm/mach-realview/core.c            |  6 +++---
 arch/arm/mach-realview/core.h            |  4 ++--
 arch/arm/mach-realview/realview_eb.c     |  2 +-
 arch/arm/mach-realview/realview_pb1176.c |  2 +-
 arch/arm/mach-realview/realview_pb11mp.c |  2 +-
 arch/arm/mach-realview/realview_pba8.c   |  2 +-
 arch/arm/mach-realview/realview_pbx.c    |  2 +-
 arch/arm/mach-u300/mmc.c                 |  4 ++--
 arch/arm/mach-versatile/core.c           |  4 ++--
 arch/arm/mach-versatile/versatile_pb.c   |  4 ++--
 drivers/mmc/host/mmci.c                  |  4 ++--
 drivers/mmc/host/mmci.h                  |  2 +-
 include/linux/amba/mmci.h                | 18 ++++++++++++++++++
 15 files changed, 39 insertions(+), 39 deletions(-)
 delete mode 100644 arch/arm/include/asm/mach/mmc.h
 create mode 100644 include/linux/amba/mmci.h

(limited to 'include')

diff --git a/arch/arm/include/asm/mach/mmc.h b/arch/arm/include/asm/mach/mmc.h
deleted file mode 100644
index 27bec555ee16..000000000000
--- a/arch/arm/include/asm/mach/mmc.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- *  arch/arm/include/asm/mach/mmc.h
- */
-#ifndef ASMARM_MACH_MMC_H
-#define ASMARM_MACH_MMC_H
-
-#include <linux/mmc/host.h>
-
-struct mmc_platform_data {
-	unsigned int ocr_mask;			/* available voltages */
-	u32 (*translate_vdd)(struct device *, unsigned int);
-	unsigned int (*status)(struct device *);
-	int	gpio_wp;
-	int	gpio_cd;
-	unsigned long capabilities;
-};
-
-#endif
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index 2a318eba1b07..3f35293d457a 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -19,6 +19,7 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/kmi.h>
 #include <linux/amba/clcd.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <asm/clkdev.h>
@@ -35,7 +36,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/irq.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 
@@ -400,7 +400,7 @@ static unsigned int mmc_status(struct device *dev)
 	return status & 8;
 }
 
-static struct mmc_platform_data mmc_data = {
+static struct mmci_platform_data mmc_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
 	.status		= mmc_status,
 	.gpio_wp	= -1,
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 5a5e1f72461e..a2083b60e3fb 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -30,6 +30,7 @@
 #include <linux/io.h>
 #include <linux/smsc911x.h>
 #include <linux/ata_platform.h>
+#include <linux/amba/mmci.h>
 
 #include <asm/clkdev.h>
 #include <asm/system.h>
@@ -44,7 +45,6 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/irq.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 
 #include <asm/hardware/gic.h>
 
@@ -237,14 +237,14 @@ static unsigned int realview_mmc_status(struct device *dev)
 	return readl(REALVIEW_SYSMCI) & mask;
 }
 
-struct mmc_platform_data realview_mmc0_plat_data = {
+struct mmci_platform_data realview_mmc0_plat_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
 	.status		= realview_mmc_status,
 	.gpio_wp	= 17,
 	.gpio_cd	= 16,
 };
 
-struct mmc_platform_data realview_mmc1_plat_data = {
+struct mmci_platform_data realview_mmc1_plat_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
 	.status		= realview_mmc_status,
 	.gpio_wp	= 19,
diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h
index 59a337ba4be7..46cd6acb4d40 100644
--- a/arch/arm/mach-realview/core.h
+++ b/arch/arm/mach-realview/core.h
@@ -47,8 +47,8 @@ static struct amba_device name##_device = {			\
 extern struct platform_device realview_flash_device;
 extern struct platform_device realview_cf_device;
 extern struct platform_device realview_i2c_device;
-extern struct mmc_platform_data realview_mmc0_plat_data;
-extern struct mmc_platform_data realview_mmc1_plat_data;
+extern struct mmci_platform_data realview_mmc0_plat_data;
+extern struct mmci_platform_data realview_mmc1_plat_data;
 extern struct clcd_board clcd_plat_data;
 extern void __iomem *gic_cpu_base_addr;
 extern void __iomem *timer0_va_base;
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index c0795ea3b3a7..1d65e64ae571 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
@@ -37,7 +38,6 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/board-eb.h>
diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c
index 395dc18070b4..2817fe099319 100644
--- a/arch/arm/mach-realview/realview_pb1176.c
+++ b/arch/arm/mach-realview/realview_pb1176.c
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
@@ -37,7 +38,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/board-pb1176.h>
diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c
index c0c9e35e6e38..94680fcf726d 100644
--- a/arch/arm/mach-realview/realview_pb11mp.c
+++ b/arch/arm/mach-realview/realview_pb11mp.c
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
@@ -38,7 +39,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/board-pb11mp.h>
diff --git a/arch/arm/mach-realview/realview_pba8.c b/arch/arm/mach-realview/realview_pba8.c
index 4fc64e146c32..941beb2b9709 100644
--- a/arch/arm/mach-realview/realview_pba8.c
+++ b/arch/arm/mach-realview/realview_pba8.c
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <asm/irq.h>
@@ -34,7 +35,6 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c
index cf68b5426061..7e4bc6cdca52 100644
--- a/arch/arm/mach-realview/realview_pbx.c
+++ b/arch/arm/mach-realview/realview_pbx.c
@@ -23,6 +23,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <asm/irq.h>
@@ -34,7 +35,6 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-u300/mmc.c b/arch/arm/mach-u300/mmc.c
index 82af247760e6..7b6b016786bb 100644
--- a/arch/arm/mach-u300/mmc.c
+++ b/arch/arm/mach-u300/mmc.c
@@ -19,8 +19,8 @@
 #include <linux/regulator/consumer.h>
 #include <linux/regulator/machine.h>
 #include <linux/gpio.h>
+#include <linux/amba/mmci.h>
 
-#include <asm/mach/mmc.h>
 #include "mmc.h"
 #include "padmux.h"
 
@@ -28,7 +28,7 @@ struct mmci_card_event {
 	struct input_dev *mmc_input;
 	int mmc_inserted;
 	struct work_struct workq;
-	struct mmc_platform_data mmc0_plat_data;
+	struct mmci_platform_data mmc0_plat_data;
 };
 
 static unsigned int mmc_status(struct device *dev)
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 975eae41ee66..e13be7c444ca 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -27,6 +27,7 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
 #include <linux/cnt32_to_63.h>
@@ -47,7 +48,6 @@
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 
 #include "core.h"
 #include "clock.h"
@@ -369,7 +369,7 @@ unsigned int mmc_status(struct device *dev)
 	return readl(VERSATILE_SYSMCI) & mask;
 }
 
-static struct mmc_platform_data mmc0_plat_data = {
+static struct mmci_platform_data mmc0_plat_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
 	.status		= mmc_status,
 	.gpio_wp	= -1,
diff --git a/arch/arm/mach-versatile/versatile_pb.c b/arch/arm/mach-versatile/versatile_pb.c
index 9af8d8154df5..239cd30fc4f5 100644
--- a/arch/arm/mach-versatile/versatile_pb.c
+++ b/arch/arm/mach-versatile/versatile_pb.c
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
@@ -31,7 +32,6 @@
 #include <asm/mach-types.h>
 
 #include <asm/mach/arch.h>
-#include <asm/mach/mmc.h>
 
 #include "core.h"
 
@@ -41,7 +41,7 @@
 #define IRQ_MMCI1A	IRQ_SIC_MMCI1A
 #endif
 
-static struct mmc_platform_data mmc1_plat_data = {
+static struct mmci_platform_data mmc1_plat_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
 	.status		= mmc_status,
 	.gpio_wp	= -1,
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index bf7c05b29e2c..79205e565c07 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -22,12 +22,12 @@
 #include <linux/clk.h>
 #include <linux/scatterlist.h>
 #include <linux/gpio.h>
+#include <linux/amba/mmci.h>
 
 #include <asm/cacheflush.h>
 #include <asm/div64.h>
 #include <asm/io.h>
 #include <asm/sizes.h>
-#include <asm/mach/mmc.h>
 
 #include "mmci.h"
 
@@ -537,7 +537,7 @@ static void mmci_check_status(unsigned long data)
 
 static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 {
-	struct mmc_platform_data *plat = dev->dev.platform_data;
+	struct mmci_platform_data *plat = dev->dev.platform_data;
 	struct mmci_host *host;
 	struct mmc_host *mmc;
 	int ret;
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 839f264c9725..a7f9a51a0a3e 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -161,7 +161,7 @@ struct mmci_host {
 	unsigned int		mclk;
 	unsigned int		cclk;
 	u32			pwr;
-	struct mmc_platform_data *plat;
+	struct mmci_platform_data *plat;
 
 	u8			hw_designer;
 	u8			hw_revision:4;
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
new file mode 100644
index 000000000000..6b4241748dda
--- /dev/null
+++ b/include/linux/amba/mmci.h
@@ -0,0 +1,18 @@
+/*
+ *  include/linux/amba/mmci.h
+ */
+#ifndef AMBA_MMCI_H
+#define AMBA_MMCI_H
+
+#include <linux/mmc/host.h>
+
+struct mmci_platform_data {
+	unsigned int ocr_mask;			/* available voltages */
+	u32 (*translate_vdd)(struct device *, unsigned int);
+	unsigned int (*status)(struct device *);
+	int	gpio_wp;
+	int	gpio_cd;
+	unsigned long capabilities;
+};
+
+#endif
-- 
cgit v1.2.3


From 8cd09a5984c08dafade74c9c1ab4311af2bf2d24 Mon Sep 17 00:00:00 2001
From: Li Hong <lihong.hi@gmail.com>
Date: Tue, 22 Sep 2009 18:00:44 +0800
Subject: tracing: Fix a comment and a trivial format issue in tracepoint.h

Fix the tracepoint documentation path in tracepoints headers and
a misaligned tabulation.

Signed-off-by: Li Hong <lihong.hi@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
LKML-Reference: <3a3680030909220300h7cf18849q4d4702b9d4feaa67@mail.gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/tracepoint.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 63a3f7a80580..2aac8a83e89b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -4,7 +4,7 @@
 /*
  * Kernel Tracepoint API.
  *
- * See Documentation/tracepoint.txt.
+ * See Documentation/trace/tracepoints.txt.
  *
  * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
  *
@@ -36,7 +36,7 @@ struct tracepoint {
 #ifndef DECLARE_TRACE
 
 #define TP_PROTO(args...)	args
-#define TP_ARGS(args...)		args
+#define TP_ARGS(args...)	args
 
 #ifdef CONFIG_TRACEPOINTS
 
-- 
cgit v1.2.3


From 9961079348d43dddb1f21118c17f054f63bbaa05 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 22 Sep 2009 12:40:33 +1000
Subject: tracing/workqueue: Use %pf in workqueue trace events

Using %pf instead of %pF supresses printing of the function offset
which will always be 0 in the case of worklet functions.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090922024033.GB31801@kryten>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/events/workqueue.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index fcfd9a1e4b96..e4612dbd7ba6 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -26,7 +26,7 @@ TRACE_EVENT(workqueue_insertion,
 		__entry->func		= work->func;
 	),
 
-	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+	TP_printk("thread=%s:%d func=%pf", __entry->thread_comm,
 		__entry->thread_pid, __entry->func)
 );
 
@@ -48,7 +48,7 @@ TRACE_EVENT(workqueue_execution,
 		__entry->func		= work->func;
 	),
 
-	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+	TP_printk("thread=%s:%d func=%pf", __entry->thread_comm,
 		__entry->thread_pid, __entry->func)
 );
 
-- 
cgit v1.2.3


From ec4756238239f1a331d9fb95bad8b281dad56855 Mon Sep 17 00:00:00 2001
From: Steve Glendinning <steve.glendinning@smsc.com>
Date: Tue, 22 Sep 2009 04:00:27 +0000
Subject: smsc95xx: fix transmission where ZLP is expected

Usbnet framework assumes USB hardware doesn't handle zero length
packets, but SMSC LAN95xx requires these to be sent for correct
operation.

This patch fixes an easily reproducible tx lockup when sending a frame
that results in exactly 512 bytes in a USB transmission (e.g. a UDP
frame with 458 data bytes, due to IP headers and our USB headers).  It
adds an extra flag to usbnet for the hardware driver to indicate that
it can handle and requires the zero length packets.

This patch should not affect other usbnet users, please also consider
for -stable.

Signed-off-by: Steve Glendinning <steve.glendinning@smsc.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc95xx.c | 2 +-
 drivers/net/usb/usbnet.c   | 2 +-
 include/linux/usb/usbnet.h | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 3aafebdbe7b5..c6c922247d05 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1227,7 +1227,7 @@ static const struct driver_info smsc95xx_info = {
 	.rx_fixup	= smsc95xx_rx_fixup,
 	.tx_fixup	= smsc95xx_tx_fixup,
 	.status		= smsc95xx_status,
-	.flags		= FLAG_ETHER,
+	.flags		= FLAG_ETHER | FLAG_SEND_ZLP,
 };
 
 static const struct usb_device_id products[] = {
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 24b36f795151..ca5ca5ae061d 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1049,7 +1049,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 	 * NOTE:  strictly conforming cdc-ether devices should expect
 	 * the ZLP here, but ignore the one-byte packet.
 	 */
-	if ((length % dev->maxpacket) == 0) {
+	if (!(info->flags & FLAG_SEND_ZLP) && (length % dev->maxpacket) == 0) {
 		urb->transfer_buffer_length++;
 		if (skb_tailroom(skb)) {
 			skb->data[skb->len] = 0;
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index bb69e256cd16..f81473052059 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -89,6 +89,7 @@ struct driver_info {
 #define FLAG_FRAMING_AX 0x0040		/* AX88772/178 packets */
 #define FLAG_WLAN	0x0080		/* use "wlan%d" names */
 #define FLAG_AVOID_UNLINK_URBS 0x0100	/* don't unlink urbs at usbnet_stop() */
+#define FLAG_SEND_ZLP	0x0200		/* hw requires ZLPs are sent */
 
 
 	/* init device ... can sleep, or cause probe() failure */
-- 
cgit v1.2.3


From 7c329288d72e025db4feac65f0fed95fb3e3ef1c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 23 Sep 2009 09:52:18 +1000
Subject: vgaarb: make client interface config invariant.

Fixes build when VGA_ARB is off.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/vgaarb.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index e81c64af80c1..923f9040ea20 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -41,7 +41,7 @@
  *     interrupts at any time.
  */
 extern void vga_set_legacy_decoding(struct pci_dev *pdev,
-									unsigned int decodes);
+				    unsigned int decodes);
 
 /**
  *     vga_get         - acquire & locks VGA resources
@@ -193,8 +193,17 @@ static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2)
  * They driver will get a callback when VGA arbitration is first used
  * by userspace since we some older X servers have issues.
  */
+#if defined(CONFIG_VGA_ARB)
 int vga_client_register(struct pci_dev *pdev, void *cookie,
 			void (*irq_set_state)(void *cookie, bool state),
 			unsigned int (*set_vga_decode)(void *cookie, bool state));
+#else
+static inline int vga_client_register(struct pci_dev *pdev, void *cookie,
+				      void (*irq_set_state)(void *cookie, bool state),
+				      unsigned int (*set_vga_decode)(void *cookie, bool state))
+{
+	return 0;
+}
+#endif
 
 #endif /* LINUX_VGA_H */
-- 
cgit v1.2.3


From bb6baf76f45708dbba651ed76a7ad94462f30c0b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 22 Sep 2009 14:24:13 +0100
Subject: drm/i915: Track purged state.

In order to correctly prevent the invalid reuse of a purged buffer, we
need to track such events and warn the user before something bad
happens.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 24 +++++++++++++++---------
 include/drm/i915_drm.h          |  1 +
 2 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8beec97fa348..f4f714e39b7b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1470,6 +1470,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
 	int i;
 
 	BUG_ON(obj_priv->pages_refcount == 0);
+	BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
 
 	if (--obj_priv->pages_refcount != 0)
 		return;
@@ -1534,11 +1535,14 @@ i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
 static void
 i915_gem_object_truncate(struct drm_gem_object *obj)
 {
-    struct inode *inode;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	struct inode *inode;
 
-    inode = obj->filp->f_path.dentry->d_inode;
-    if (inode->i_op->truncate)
-	    inode->i_op->truncate (inode);
+	inode = obj->filp->f_path.dentry->d_inode;
+	if (inode->i_op->truncate)
+		inode->i_op->truncate (inode);
+
+	obj_priv->madv = __I915_MADV_PURGED;
 }
 
 static inline int
@@ -2559,7 +2563,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
 	if (dev_priv->mm.suspended)
 		return -EBUSY;
 
-	if (obj_priv->madv == I915_MADV_DONTNEED) {
+	if (obj_priv->madv != I915_MADV_WILLNEED) {
 		DRM_ERROR("Attempting to bind a purgeable object\n");
 		return -EINVAL;
 	}
@@ -3928,8 +3932,8 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
 	}
 	obj_priv = obj->driver_private;
 
-	if (obj_priv->madv == I915_MADV_DONTNEED) {
-		DRM_ERROR("Attempting to pin a I915_MADV_DONTNEED buffer\n");
+	if (obj_priv->madv != I915_MADV_WILLNEED) {
+		DRM_ERROR("Attempting to pin a purgeable buffer\n");
 		drm_gem_object_unreference(obj);
 		mutex_unlock(&dev->struct_mutex);
 		return -EINVAL;
@@ -4081,14 +4085,16 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	obj_priv->madv = args->madv;
-	args->retained = obj_priv->gtt_space != NULL;
+	if (obj_priv->madv != __I915_MADV_PURGED)
+		obj_priv->madv = args->madv;
 
 	/* if the object is no longer bound, discard its backing storage */
 	if (i915_gem_object_is_purgeable(obj_priv) &&
 	    obj_priv->gtt_space == NULL)
 		i915_gem_object_truncate(obj);
 
+	args->retained = obj_priv->madv != __I915_MADV_PURGED;
+
 	drm_gem_object_unreference(obj);
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 607c9da061e8..7e0cb1da92e6 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -671,6 +671,7 @@ struct drm_i915_get_pipe_from_crtc_id {
 
 #define I915_MADV_WILLNEED 0
 #define I915_MADV_DONTNEED 1
+#define __I915_MADV_PURGED 2 /* internal state */
 
 struct drm_i915_gem_madvise {
 	/** Handle of the buffer to change the backing store advice */
-- 
cgit v1.2.3


From 268e46712d57a6493cc0f98e7d200a0f674c31ed Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 22 Sep 2009 19:56:50 -0700
Subject: asm-generic: syscall_get_nr returns int

Only 32 bits of system call number are meaningful, so make the
specification for syscall_get_nr() be to return int, not long.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 include/asm-generic/syscall.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index ea8087b55ffc..5c122ae6bfa6 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -1,7 +1,7 @@
 /*
  * Access to user system call parameters and results
  *
- * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -32,9 +32,13 @@ struct pt_regs;
  * If @task is not executing a system call, i.e. it's blocked
  * inside the kernel for a fault or signal, returns -1.
  *
+ * Note this returns int even on 64-bit machines.  Only 32 bits of
+ * system call number can be meaningful.  If the actual arch value
+ * is 64 bits, this truncates to 32 bits so 0xffffffff means -1.
+ *
  * It's only valid to call this when @task is known to be blocked.
  */
-long syscall_get_nr(struct task_struct *task, struct pt_regs *regs);
+int syscall_get_nr(struct task_struct *task, struct pt_regs *regs);
 
 /**
  * syscall_rollback - roll back registers after an aborted system call
-- 
cgit v1.2.3


From 3c1b27d5043086a485f8526353ae9fe37bfa1065 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 23 Sep 2009 22:26:31 -0600
Subject: virtio: make add_buf return capacity remaining

This API change means that virtio_net can tell how much capacity
remains for buffers.  It's necessarily fuzzy, since
VIRTIO_RING_F_INDIRECT_DESC means we can fit any number of descriptors
in one, *if* we can kmalloc.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Dinesh Subhraveti <dineshs@us.ibm.com>
---
 drivers/block/virtio_blk.c          |  2 +-
 drivers/char/hw_random/virtio-rng.c |  2 +-
 drivers/char/virtio_console.c       |  4 ++--
 drivers/net/virtio_net.c            | 14 +++++++-------
 drivers/virtio/virtio_balloon.c     |  2 +-
 drivers/virtio/virtio_ring.c        |  6 +++++-
 include/linux/virtio.h              |  2 +-
 net/9p/trans_virtio.c               |  2 +-
 8 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index aa1a3d5a3e2b..d739ee4201f9 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -139,7 +139,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		}
 	}
 
-	if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) {
+	if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
 		mempool_free(vbr, vblk->pool);
 		return false;
 	}
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 32216b623248..b6c24dcc987d 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -51,7 +51,7 @@ static void register_buffer(void)
 
 	sg_init_one(&sg, random_data+data_left, RANDOM_DATA_SIZE-data_left);
 	/* There should always be room for one buffer. */
-	if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) != 0)
+	if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) < 0)
 		BUG();
 	vq->vq_ops->kick(vq);
 }
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index c74dacfa6795..a035ae39a359 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -65,7 +65,7 @@ static int put_chars(u32 vtermno, const char *buf, int count)
 
 	/* add_buf wants a token to identify this buffer: we hand it any
 	 * non-NULL pointer, since there's only ever one buffer. */
-	if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) == 0) {
+	if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) >= 0) {
 		/* Tell Host to go! */
 		out_vq->vq_ops->kick(out_vq);
 		/* Chill out until it's done with the buffer. */
@@ -85,7 +85,7 @@ static void add_inbuf(void)
 	sg_init_one(sg, inbuf, PAGE_SIZE);
 
 	/* We should always be able to add one buffer to an empty queue. */
-	if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) != 0)
+	if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) < 0)
 		BUG();
 	in_vq->vq_ops->kick(in_vq);
 }
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 32266fb89c20..fbf04a553f5a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -320,7 +320,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp)
 		skb_queue_head(&vi->recv, skb);
 
 		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
-		if (err) {
+		if (err < 0) {
 			skb_unlink(skb, &vi->recv);
 			trim_pages(vi, skb);
 			kfree_skb(skb);
@@ -373,7 +373,7 @@ static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
 		skb_queue_head(&vi->recv, skb);
 
 		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb);
-		if (err) {
+		if (err < 0) {
 			skb_unlink(skb, &vi->recv);
 			kfree_skb(skb);
 			break;
@@ -527,7 +527,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 	num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
 
 	err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
-	if (!err && !vi->free_in_tasklet)
+	if (err >= 0 && !vi->free_in_tasklet)
 		mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
 
 	return err;
@@ -538,7 +538,7 @@ static void xmit_tasklet(unsigned long data)
 	struct virtnet_info *vi = (void *)data;
 
 	netif_tx_lock_bh(vi->dev);
-	if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) {
+	if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) >= 0) {
 		vi->svq->vq_ops->kick(vi->svq);
 		vi->last_xmit_skb = NULL;
 	}
@@ -557,7 +557,7 @@ again:
 
 	/* If we has a buffer left over from last time, send it now. */
 	if (unlikely(vi->last_xmit_skb) &&
-	    xmit_skb(vi, vi->last_xmit_skb) != 0)
+	    xmit_skb(vi, vi->last_xmit_skb) < 0)
 		goto stop_queue;
 
 	vi->last_xmit_skb = NULL;
@@ -565,7 +565,7 @@ again:
 	/* Put new one in send queue and do transmit */
 	if (likely(skb)) {
 		__skb_queue_head(&vi->send, skb);
-		if (xmit_skb(vi, skb) != 0) {
+		if (xmit_skb(vi, skb) < 0) {
 			vi->last_xmit_skb = skb;
 			skb = NULL;
 			goto stop_queue;
@@ -668,7 +668,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 		sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
 	sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
 
-	BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi));
+	BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi) < 0);
 
 	vi->cvq->vq_ops->kick(vi->cvq);
 
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 26b278264796..39789232646d 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -84,7 +84,7 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
 	init_completion(&vb->acked);
 
 	/* We should always be able to add one buffer to an empty queue. */
-	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) != 0)
+	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0)
 		BUG();
 	vq->vq_ops->kick(vq);
 
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a882f2606515..f53600580726 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -208,7 +208,11 @@ add_head:
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
 	END_USE(vq);
-	return 0;
+
+	/* If we're indirect, we can fit many (assuming not OOM). */
+	if (vq->indirect)
+		return vq->num_free ? vq->vring.num : 0;
+	return vq->num_free;
 }
 
 static void vring_kick(struct virtqueue *_vq)
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 4fca4f5440ba..057a2e010758 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -34,7 +34,7 @@ struct virtqueue {
  *	out_num: the number of sg readable by other side
  *	in_num: the number of sg which are writable (after readable ones)
  *	data: the token identifying the buffer.
- *      Returns 0 or an error.
+ *      Returns remaining capacity of queue (sg segments) or a negative error.
  * @kick: update after add_buf
  *	vq: the struct virtqueue
  *	After one or more add_buf calls, invoke this to kick the other side.
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 9bf0b737aa51..53c139d31a21 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -200,7 +200,7 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 
 	req->status = REQ_STATUS_SENT;
 
-	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc)) {
+	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) {
 		P9_DPRINTK(P9_DEBUG_TRANS,
 			"9p debug: virtio rpc add_buf returned failure");
 		return -EIO;
-- 
cgit v1.2.3


From 3ca4f5ca73057a617f9444a91022d7127041970a Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Date: Fri, 31 Jul 2009 15:25:56 +0900
Subject: virtio: add virtio IDs file

Virtio IDs are spread all over the tree which makes assigning new IDs
bothersome. Putting them together should make the process less error-prone.

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/lguest/lguest.c       |  1 +
 drivers/block/virtio_blk.c          |  1 +
 drivers/char/hw_random/virtio-rng.c |  1 +
 drivers/char/virtio_console.c       |  1 +
 drivers/net/virtio_net.c            |  1 +
 drivers/virtio/virtio_balloon.c     |  1 +
 include/linux/virtio_9p.h           |  2 --
 include/linux/virtio_balloon.h      |  3 ---
 include/linux/virtio_blk.h          |  3 ---
 include/linux/virtio_console.h      |  3 ---
 include/linux/virtio_ids.h          | 17 +++++++++++++++++
 include/linux/virtio_net.h          |  3 ---
 include/linux/virtio_rng.h          |  3 ---
 net/9p/trans_virtio.c               |  1 +
 14 files changed, 24 insertions(+), 17 deletions(-)
 create mode 100644 include/linux/virtio_ids.h

(limited to 'include')

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 950cde6d6e58..84bbb190bf7b 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -42,6 +42,7 @@
 #include <signal.h>
 #include "linux/lguest_launcher.h"
 #include "linux/virtio_config.h"
+#include <linux/virtio_ids.h>
 #include "linux/virtio_net.h"
 #include "linux/virtio_blk.h"
 #include "linux/virtio_console.h"
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index d739ee4201f9..73de7532fcf5 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -3,6 +3,7 @@
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_blk.h>
 #include <linux/scatterlist.h>
 
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index b6c24dcc987d..962968f05b94 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -21,6 +21,7 @@
 #include <linux/scatterlist.h>
 #include <linux/spinlock.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_rng.h>
 
 /* The host will fill any buffer we give it with sweet, sweet randomness.  We
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index a035ae39a359..0d328b59568d 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -31,6 +31,7 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_console.h>
 #include "hvc_console.h"
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index fbf04a553f5a..5c498d2b043f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -22,6 +22,7 @@
 #include <linux/ethtool.h>
 #include <linux/module.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_net.h>
 #include <linux/scatterlist.h>
 #include <linux/if_vlan.h>
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 39789232646d..200c22f55130 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -19,6 +19,7 @@
  */
 //#define DEBUG
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_balloon.h>
 #include <linux/swap.h>
 #include <linux/kthread.h>
diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index b3c4a60ceeb3..ea7226a45acb 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -4,8 +4,6 @@
  * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
-/* The ID for virtio console */
-#define VIRTIO_ID_9P	9
 /* Maximum number of virtio channels per partition (1 for now) */
 #define MAX_9P_CHAN	1
 
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 8726ff77763e..09d730085060 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -4,9 +4,6 @@
  * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
-/* The ID for virtio_balloon */
-#define VIRTIO_ID_BALLOON	5
-
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 8dab9f2b8832..25fbabfa90d4 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -5,9 +5,6 @@
 #include <linux/types.h>
 #include <linux/virtio_config.h>
 
-/* The ID for virtio_block */
-#define VIRTIO_ID_BLOCK	2
-
 /* Feature bits */
 #define VIRTIO_BLK_F_BARRIER	0	/* Does host support barriers? */
 #define VIRTIO_BLK_F_SIZE_MAX	1	/* Indicates maximum segment size */
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index dc161115ae35..b5f519806014 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -5,9 +5,6 @@
 /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
  * anyone can use the definitions to implement compatible drivers/servers. */
 
-/* The ID for virtio console */
-#define VIRTIO_ID_CONSOLE	3
-
 /* Feature bits */
 #define VIRTIO_CONSOLE_F_SIZE	0	/* Does host provide console size? */
 
diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h
new file mode 100644
index 000000000000..06660c0a78d7
--- /dev/null
+++ b/include/linux/virtio_ids.h
@@ -0,0 +1,17 @@
+#ifndef _LINUX_VIRTIO_IDS_H
+#define _LINUX_VIRTIO_IDS_H
+/*
+ * Virtio IDs
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ */
+
+#define VIRTIO_ID_NET		1 /* virtio net */
+#define VIRTIO_ID_BLOCK		2 /* virtio block */
+#define VIRTIO_ID_CONSOLE	3 /* virtio console */
+#define VIRTIO_ID_RNG		4 /* virtio ring */
+#define VIRTIO_ID_BALLOON	5 /* virtio balloon */
+#define VIRTIO_ID_9P		9 /* 9p virtio console */
+
+#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index d8dd539c9f48..1f41734bbb77 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -6,9 +6,6 @@
 #include <linux/virtio_config.h>
 #include <linux/if_ether.h>
 
-/* The ID for virtio_net */
-#define VIRTIO_ID_NET	1
-
 /* The feature bitmap for virtio net */
 #define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
 #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h
index 1a85dab8a940..48121c3c434b 100644
--- a/include/linux/virtio_rng.h
+++ b/include/linux/virtio_rng.h
@@ -4,7 +4,4 @@
  * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
-/* The ID for virtio_rng */
-#define VIRTIO_ID_RNG	4
-
 #endif /* _LINUX_VIRTIO_RNG_H */
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index ea1e3daabefe..b2e07f0dd298 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -43,6 +43,7 @@
 #include <net/9p/transport.h>
 #include <linux/scatterlist.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_9p.h>
 
 #define VIRTQUEUE_NUM	128
-- 
cgit v1.2.3


From f1b0ef062602713c2c7cfa12362d5d90ed01c5f6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Sep 2009 19:57:42 +0200
Subject: virtio_blk: add support for cache flush

Recent qemu has added a VIRTIO_BLK_F_FLUSH flag to advertise that the
virtual disk has a volatile write cache that needs to be flushed.  In case
we see this feature implement tell the Linux block layer about the fact
and use the new VIRTIO_BLK_T_FLUSH to flush the cache when required.  This
allows for an correct and simple implementation of write barriers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/block/virtio_blk.c | 30 +++++++++++++++++++++++++-----
 include/linux/virtio_blk.h | 15 +++++++++++++++
 2 files changed, 40 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 73de7532fcf5..3d5fe97569c7 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -92,15 +92,26 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		return false;
 
 	vbr->req = req;
-	if (blk_fs_request(vbr->req)) {
+	switch (req->cmd_type) {
+	case REQ_TYPE_FS:
 		vbr->out_hdr.type = 0;
 		vbr->out_hdr.sector = blk_rq_pos(vbr->req);
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-	} else if (blk_pc_request(vbr->req)) {
+		break;
+	case REQ_TYPE_BLOCK_PC:
 		vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
 		vbr->out_hdr.sector = 0;
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-	} else {
+		break;
+	case REQ_TYPE_LINUX_BLOCK:
+		if (req->cmd[0] == REQ_LB_OP_FLUSH) {
+			vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
+			vbr->out_hdr.sector = 0;
+			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			break;
+		}
+		/*FALLTHRU*/
+	default:
 		/* We don't put anything else in the queue. */
 		BUG();
 	}
@@ -200,6 +211,12 @@ out:
 	return err;
 }
 
+static void virtblk_prepare_flush(struct request_queue *q, struct request *req)
+{
+	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
+	req->cmd[0] = REQ_LB_OP_FLUSH;
+}
+
 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 			 unsigned cmd, unsigned long data)
 {
@@ -338,7 +355,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 	index++;
 
 	/* If barriers are supported, tell block layer that queue is ordered */
-	if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
+		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_DRAIN_FLUSH,
+				  virtblk_prepare_flush);
+	else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
 		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
 
 	/* If disk is read-only in the host, the guest should obey */
@@ -425,7 +445,7 @@ static struct virtio_device_id id_table[] = {
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
 	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
-	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY
+	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY, VIRTIO_BLK_F_FLUSH
 };
 
 /*
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 25fbabfa90d4..15cb666581d7 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -14,6 +14,7 @@
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
 #define VIRTIO_BLK_F_IDENTIFY	8	/* ATA IDENTIFY supported */
+#define VIRTIO_BLK_F_FLUSH	9	/* Cache flush command support */
 
 #define VIRTIO_BLK_ID_BYTES	(sizeof(__u16[256]))	/* IDENTIFY DATA */
 
@@ -35,6 +36,17 @@ struct virtio_blk_config {
 	__u8 identify[VIRTIO_BLK_ID_BYTES];
 } __attribute__((packed));
 
+/*
+ * Command types
+ *
+ * Usage is a bit tricky as some bits are used as flags and some are not.
+ *
+ * Rules:
+ *   VIRTIO_BLK_T_OUT may be combined with VIRTIO_BLK_T_SCSI_CMD or
+ *   VIRTIO_BLK_T_BARRIER.  VIRTIO_BLK_T_FLUSH is a command of its own
+ *   and may not be combined with any of the other flags.
+ */
+
 /* These two define direction. */
 #define VIRTIO_BLK_T_IN		0
 #define VIRTIO_BLK_T_OUT	1
@@ -42,6 +54,9 @@ struct virtio_blk_config {
 /* This bit says it's a scsi command, not an actual read or write. */
 #define VIRTIO_BLK_T_SCSI_CMD	2
 
+/* Cache flush command */
+#define VIRTIO_BLK_T_FLUSH	4
+
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER	0x80000000
 
-- 
cgit v1.2.3


From 4e9e92003529e5c7bb11281f7c2c9b3fe8858403 Mon Sep 17 00:00:00 2001
From: Pete Zaitcev <zaitcev@redhat.com>
Date: Thu, 11 Jun 2009 08:53:20 -0600
Subject: USB: usbmon: end ugly tricks with DMA peeking

This patch fixes crashes when usbmon attempts to access GART aperture.
The old code attempted to take a bus address and convert it into a
virtual address, which clearly was impossible on systems with actual
IOMMUs. Let us not persist in this foolishness, and use transfer_buffer
in all cases instead.

I think downsides are negligible. The ones I see are:
 - A driver may pass an address of one buffer down as transfer_buffer,
   and entirely different entity mapped for DMA, resulting in misleading
   output of usbmon. Note, however, that PIO based controllers would
   do transfer the same data that usbmon sees here.
 - Out of tree drivers may crash usbmon if they store garbage in
   transfer_buffer. I inspected the in-tree drivers, and clarified
   the documentation in comments.
 - Drivers that use get_user_pages will not be possible to monitor.
   I only found one driver with this problem (drivers/staging/rspiusb).
 - Same happens with with usb_storage transferring from highmem, but
   it works fine on 64-bit systems, so I think it's not a concern.
   At least we don't crash anymore.

Why didn't we do this in 2.6.10? That's because back in those days
it was popular not to fill in transfer_buffer, so almost all
traffic would be invisible (e.g. all of HID was like that).
But now, the tree is almost 100% PIO friendly, so we can do the
right thing at last.

Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/mon/Makefile   |  2 +-
 drivers/usb/mon/mon_bin.c  | 12 +-----
 drivers/usb/mon/mon_dma.c  | 95 ----------------------------------------------
 drivers/usb/mon/mon_main.c |  1 -
 drivers/usb/mon/mon_text.c | 14 -------
 drivers/usb/mon/usb_mon.h  | 14 -------
 include/linux/usb.h        | 19 +++++++---
 7 files changed, 15 insertions(+), 142 deletions(-)
 delete mode 100644 drivers/usb/mon/mon_dma.c

(limited to 'include')

diff --git a/drivers/usb/mon/Makefile b/drivers/usb/mon/Makefile
index c6516b566731..384b198faa7c 100644
--- a/drivers/usb/mon/Makefile
+++ b/drivers/usb/mon/Makefile
@@ -2,6 +2,6 @@
 # Makefile for USB monitor
 #
 
-usbmon-objs	:= mon_main.o mon_stat.o mon_text.o mon_bin.o mon_dma.o
+usbmon-objs	:= mon_main.o mon_stat.o mon_text.o mon_bin.o
 
 obj-$(CONFIG_USB_MON)	+= usbmon.o
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 0f7a30b7d2d1..dfdc43e2e00d 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -220,9 +220,8 @@ static void mon_free_buff(struct mon_pgmap *map, int npages);
 
 /*
  * This is a "chunked memcpy". It does not manipulate any counters.
- * But it returns the new offset for repeated application.
  */
-unsigned int mon_copy_to_buff(const struct mon_reader_bin *this,
+static void mon_copy_to_buff(const struct mon_reader_bin *this,
     unsigned int off, const unsigned char *from, unsigned int length)
 {
 	unsigned int step_len;
@@ -247,7 +246,6 @@ unsigned int mon_copy_to_buff(const struct mon_reader_bin *this,
 		from += step_len;
 		length -= step_len;
 	}
-	return off;
 }
 
 /*
@@ -400,15 +398,8 @@ static char mon_bin_get_data(const struct mon_reader_bin *rp,
     unsigned int offset, struct urb *urb, unsigned int length)
 {
 
-	if (urb->dev->bus->uses_dma &&
-	    (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
-		mon_dmapeek_vec(rp, offset, urb->transfer_dma, length);
-		return 0;
-	}
-
 	if (urb->transfer_buffer == NULL)
 		return 'Z';
-
 	mon_copy_to_buff(rp, offset, urb->transfer_buffer, length);
 	return 0;
 }
@@ -635,7 +626,6 @@ static int mon_bin_open(struct inode *inode, struct file *file)
 	spin_lock_init(&rp->b_lock);
 	init_waitqueue_head(&rp->b_wait);
 	mutex_init(&rp->fetch_lock);
-
 	rp->b_size = BUFF_DFL;
 
 	size = sizeof(struct mon_pgmap) * (rp->b_size/CHUNK_SIZE);
diff --git a/drivers/usb/mon/mon_dma.c b/drivers/usb/mon/mon_dma.c
deleted file mode 100644
index 140cc80bd2b1..000000000000
--- a/drivers/usb/mon/mon_dma.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * The USB Monitor, inspired by Dave Harding's USBMon.
- *
- * mon_dma.c: Library which snoops on DMA areas.
- *
- * Copyright (C) 2005 Pete Zaitcev (zaitcev@redhat.com)
- */
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/highmem.h>
-#include <asm/page.h>
-
-#include <linux/usb.h>	/* Only needed for declarations in usb_mon.h */
-#include "usb_mon.h"
-
-/*
- * PC-compatibles, are, fortunately, sufficiently cache-coherent for this.
- */
-#if defined(__i386__) || defined(__x86_64__) /* CONFIG_ARCH_I386 doesn't exit */
-#define MON_HAS_UNMAP 1
-
-#define phys_to_page(phys)	pfn_to_page((phys) >> PAGE_SHIFT)
-
-char mon_dmapeek(unsigned char *dst, dma_addr_t dma_addr, int len)
-{
-	struct page *pg;
-	unsigned long flags;
-	unsigned char *map;
-	unsigned char *ptr;
-
-	/*
-	 * On i386, a DMA handle is the "physical" address of a page.
-	 * In other words, the bus address is equal to physical address.
-	 * There is no IOMMU.
-	 */
-	pg = phys_to_page(dma_addr);
-
-	/*
-	 * We are called from hardware IRQs in case of callbacks.
-	 * But we can be called from softirq or process context in case
-	 * of submissions. In such case, we need to protect KM_IRQ0.
-	 */
-	local_irq_save(flags);
-	map = kmap_atomic(pg, KM_IRQ0);
-	ptr = map + (dma_addr & (PAGE_SIZE-1));
-	memcpy(dst, ptr, len);
-	kunmap_atomic(map, KM_IRQ0);
-	local_irq_restore(flags);
-	return 0;
-}
-
-void mon_dmapeek_vec(const struct mon_reader_bin *rp,
-    unsigned int offset, dma_addr_t dma_addr, unsigned int length)
-{
-	unsigned long flags;
-	unsigned int step_len;
-	struct page *pg;
-	unsigned char *map;
-	unsigned long page_off, page_len;
-
-	local_irq_save(flags);
-	while (length) {
-		/* compute number of bytes we are going to copy in this page */
-		step_len = length;
-		page_off = dma_addr & (PAGE_SIZE-1);
-		page_len = PAGE_SIZE - page_off;
-		if (page_len < step_len)
-			step_len = page_len;
-
-		/* copy data and advance pointers */
-		pg = phys_to_page(dma_addr);
-		map = kmap_atomic(pg, KM_IRQ0);
-		offset = mon_copy_to_buff(rp, offset, map + page_off, step_len);
-		kunmap_atomic(map, KM_IRQ0);
-		dma_addr += step_len;
-		length -= step_len;
-	}
-	local_irq_restore(flags);
-}
-
-#endif /* __i386__ */
-
-#ifndef MON_HAS_UNMAP
-char mon_dmapeek(unsigned char *dst, dma_addr_t dma_addr, int len)
-{
-	return 'D';
-}
-
-void mon_dmapeek_vec(const struct mon_reader_bin *rp,
-    unsigned int offset, dma_addr_t dma_addr, unsigned int length)
-{
-	;
-}
-
-#endif /* MON_HAS_UNMAP */
diff --git a/drivers/usb/mon/mon_main.c b/drivers/usb/mon/mon_main.c
index 5e0ab4201c00..e0c2db3b767b 100644
--- a/drivers/usb/mon/mon_main.c
+++ b/drivers/usb/mon/mon_main.c
@@ -361,7 +361,6 @@ static int __init mon_init(void)
 	}
 	// MOD_INC_USE_COUNT(which_module?);
 
-
 	mutex_lock(&usb_bus_list_lock);
 	list_for_each_entry (ubus, &usb_bus_list, bus_list) {
 		mon_bus_init(ubus);
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index a7eb4c99342c..9f1a9227ebe6 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -150,20 +150,6 @@ static inline char mon_text_get_data(struct mon_event_text *ep, struct urb *urb,
 			return '>';
 	}
 
-	/*
-	 * The check to see if it's safe to poke at data has an enormous
-	 * number of corner cases, but it seems that the following is
-	 * more or less safe.
-	 *
-	 * We do not even try to look at transfer_buffer, because it can
-	 * contain non-NULL garbage in case the upper level promised to
-	 * set DMA for the HCD.
-	 */
-	if (urb->dev->bus->uses_dma &&
-	    (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
-		return mon_dmapeek(ep->data, urb->transfer_dma, len);
-	}
-
 	if (urb->transfer_buffer == NULL)
 		return 'Z';	/* '0' would be not as pretty. */
 
diff --git a/drivers/usb/mon/usb_mon.h b/drivers/usb/mon/usb_mon.h
index f5d84ff8c101..df9a4df342c7 100644
--- a/drivers/usb/mon/usb_mon.h
+++ b/drivers/usb/mon/usb_mon.h
@@ -64,20 +64,6 @@ void mon_text_exit(void);
 int __init mon_bin_init(void);
 void mon_bin_exit(void);
 
-/*
- * DMA interface.
- *
- * XXX The vectored side needs a serious re-thinking. Abstracting vectors,
- * like in Paolo's original patch, produces a double pkmap. We need an idea.
-*/
-extern char mon_dmapeek(unsigned char *dst, dma_addr_t dma_addr, int len);
-
-struct mon_reader_bin;
-extern void mon_dmapeek_vec(const struct mon_reader_bin *rp,
-    unsigned int offset, dma_addr_t dma_addr, unsigned int len);
-extern unsigned int mon_copy_to_buff(const struct mon_reader_bin *rp,
-    unsigned int offset, const unsigned char *from, unsigned int len);
-
 /*
  */
 extern struct mutex mon_lock;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 19fabc487beb..3b45a0d27b80 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1036,9 +1036,10 @@ typedef void (*usb_complete_t)(struct urb *);
  * @transfer_flags: A variety of flags may be used to affect how URB
  *	submission, unlinking, or operation are handled.  Different
  *	kinds of URB can use different flags.
- * @transfer_buffer:  This identifies the buffer to (or from) which
- * 	the I/O request will be performed (unless URB_NO_TRANSFER_DMA_MAP
- *	is set).  This buffer must be suitable for DMA; allocate it with
+ * @transfer_buffer:  This identifies the buffer to (or from) which the I/O
+ *	request will be performed unless URB_NO_TRANSFER_DMA_MAP is set
+ *	(however, do not leave garbage in transfer_buffer even then).
+ *	This buffer must be suitable for DMA; allocate it with
  *	kmalloc() or equivalent.  For transfers to "in" endpoints, contents
  *	of this buffer will be modified.  This buffer is used for the data
  *	stage of control transfers.
@@ -1104,9 +1105,15 @@ typedef void (*usb_complete_t)(struct urb *);
  * allocate a DMA buffer with usb_buffer_alloc() or call usb_buffer_map().
  * When these transfer flags are provided, host controller drivers will
  * attempt to use the dma addresses found in the transfer_dma and/or
- * setup_dma fields rather than determining a dma address themselves.  (Note
- * that transfer_buffer and setup_packet must still be set because not all
- * host controllers use DMA, nor do virtual root hubs).
+ * setup_dma fields rather than determining a dma address themselves.
+ *
+ * Note that transfer_buffer must still be set if the controller
+ * does not support DMA (as indicated by bus.uses_dma) and when talking
+ * to root hub. If you have to trasfer between highmem zone and the device
+ * on such controller, create a bounce buffer or bail out with an error.
+ * If transfer_buffer cannot be set (is in highmem) and the controller is DMA
+ * capable, assign NULL to it, so that usbmon knows not to use the value.
+ * The setup_packet must always be set, so it cannot be located in highmem.
  *
  * Initialization:
  *
-- 
cgit v1.2.3


From 85e08ca54c5c203cd2638f0fc8fa899a539f6254 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 21 Jun 2009 23:19:23 +0200
Subject: USB: Move endpoint sync type definitions from usb/audio.h to
 usb/ch9.h

And use the new definitions in the USB Audio Class gadget driver.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/f_audio.c |  2 +-
 include/linux/usb/audio.h    | 10 ----------
 include/linux/usb/ch9.h      |  6 ++++++
 3 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c
index 66527ba2d2ea..76afbd1b515f 100644
--- a/drivers/usb/gadget/f_audio.c
+++ b/drivers/usb/gadget/f_audio.c
@@ -174,7 +174,7 @@ static struct usb_endpoint_descriptor as_out_ep_desc __initdata = {
 	.bLength =		USB_DT_ENDPOINT_AUDIO_SIZE,
 	.bDescriptorType =	USB_DT_ENDPOINT,
 	.bEndpointAddress =	USB_DIR_OUT,
-	.bmAttributes =		USB_AS_ENDPOINT_ADAPTIVE
+	.bmAttributes =		USB_ENDPOINT_SYNC_ADAPTIVE
 				| USB_ENDPOINT_XFER_ISOC,
 	.wMaxPacketSize =	__constant_cpu_to_le16(OUT_EP_MAX_PACKET_SIZE),
 	.bInterval =		4,
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index b5744bc218ab..bcf4fb4f5e82 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -46,12 +46,6 @@
 #define MIDI_IN_JACK			0x02
 #define MIDI_OUT_JACK			0x03
 
-/* endpoint attributes */
-#define EP_ATTR_MASK			0x0c
-#define EP_ATTR_ASYNC			0x04
-#define EP_ATTR_ADAPTIVE		0x08
-#define EP_ATTR_SYNC			0x0c
-
 /* cs endpoint attributes */
 #define EP_CS_ATTR_SAMPLE_RATE		0x01
 #define EP_CS_ATTR_PITCH_CONTROL	0x02
@@ -244,10 +238,6 @@ struct usb_as_formate_type_i_discrete_descriptor_##n {		\
 #define USB_AS_FORMAT_TYPE_II		0x2
 #define USB_AS_FORMAT_TYPE_III		0x3
 
-#define USB_AS_ENDPOINT_ASYNC		(1 << 2)
-#define USB_AS_ENDPOINT_ADAPTIVE	(2 << 2)
-#define USB_AS_ENDPOINT_SYNC		(3 << 2)
-
 struct usb_as_iso_endpoint_descriptor {
 	__u8  bLength;			/* in bytes: 7 */
 	__u8  bDescriptorType;		/* USB_DT_CS_ENDPOINT */
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 93223638f702..8f8b7411b87b 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -348,6 +348,12 @@ struct usb_endpoint_descriptor {
 #define USB_ENDPOINT_NUMBER_MASK	0x0f	/* in bEndpointAddress */
 #define USB_ENDPOINT_DIR_MASK		0x80
 
+#define USB_ENDPOINT_SYNCTYPE		0x0c
+#define USB_ENDPOINT_SYNC_NONE		(0 << 2)
+#define USB_ENDPOINT_SYNC_ASYNC		(1 << 2)
+#define USB_ENDPOINT_SYNC_ADAPTIVE	(2 << 2)
+#define USB_ENDPOINT_SYNC_SYNC		(3 << 2)
+
 #define USB_ENDPOINT_XFERTYPE_MASK	0x03	/* in bmAttributes */
 #define USB_ENDPOINT_XFER_CONTROL	0
 #define USB_ENDPOINT_XFER_ISOC		1
-- 
cgit v1.2.3


From 315ad3028c8aae14891797040f855fc3291a076b Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 21 Jun 2009 23:20:39 +0200
Subject: USB: Move vendor subclass definition from usb/audio.h to usb/ch9.h

USB_SUBCLASS_VENDOR_SPEC is common to several USB classes and as such belongs
to usb/ch9.h.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/audio.h | 1 -
 include/linux/usb/ch9.h   | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index bcf4fb4f5e82..f75092aba02f 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -24,7 +24,6 @@
 #define USB_SUBCLASS_AUDIOCONTROL	0x01
 #define USB_SUBCLASS_AUDIOSTREAMING	0x02
 #define USB_SUBCLASS_MIDISTREAMING	0x03
-#define USB_SUBCLASS_VENDOR_SPEC	0xff
 
 /* A.5 Audio Class-Specific AC interface Descriptor Subtypes*/
 #define HEADER				0x01
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 8f8b7411b87b..94012e649d86 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -258,6 +258,8 @@ struct usb_device_descriptor {
 #define USB_CLASS_APP_SPEC		0xfe
 #define USB_CLASS_VENDOR_SPEC		0xff
 
+#define USB_SUBCLASS_VENDOR_SPEC	0xff
+
 /*-------------------------------------------------------------------------*/
 
 /* USB_DT_CONFIG: Configuration descriptor information.
-- 
cgit v1.2.3


From 512ad27d8667158747de2e8da8a23e8f50e91856 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 21 Jun 2009 23:23:05 +0200
Subject: USB audio gadget: Prefix all macro definitions with UAC_ in
 linux/usb/audio.h

linux/usb/audio.h is a public header file that includes definitions
exported to userspace. To avoid namespace clashes, prefix all macro
definitions with UAC_. Existing macros and structures prefixed with
USB_AC_ and USB_AS_ are renamed for consistency.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/audio.c   |  24 ++--
 drivers/usb/gadget/f_audio.c |  80 +++++++-------
 drivers/usb/gadget/gmidi.c   |   8 +-
 include/linux/usb/audio.h    | 258 ++++++++++++++++++++++---------------------
 4 files changed, 188 insertions(+), 182 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/audio.c b/drivers/usb/gadget/audio.c
index 9f80f4e970bd..a3a0f4a27ef0 100644
--- a/drivers/usb/gadget/audio.c
+++ b/drivers/usb/gadget/audio.c
@@ -106,20 +106,20 @@ static int audio_set_endpoint_req(struct usb_configuration *c,
 			ctrl->bRequest, w_value, len, ep);
 
 	switch (ctrl->bRequest) {
-	case SET_CUR:
+	case UAC_SET_CUR:
 		value = 0;
 		break;
 
-	case SET_MIN:
+	case UAC_SET_MIN:
 		break;
 
-	case SET_MAX:
+	case UAC_SET_MAX:
 		break;
 
-	case SET_RES:
+	case UAC_SET_RES:
 		break;
 
-	case SET_MEM:
+	case UAC_SET_MEM:
 		break;
 
 	default:
@@ -142,13 +142,13 @@ static int audio_get_endpoint_req(struct usb_configuration *c,
 			ctrl->bRequest, w_value, len, ep);
 
 	switch (ctrl->bRequest) {
-	case GET_CUR:
-	case GET_MIN:
-	case GET_MAX:
-	case GET_RES:
+	case UAC_GET_CUR:
+	case UAC_GET_MIN:
+	case UAC_GET_MAX:
+	case UAC_GET_RES:
 		value = 3;
 		break;
-	case GET_MEM:
+	case UAC_GET_MEM:
 		break;
 	default:
 		break;
@@ -171,11 +171,11 @@ audio_setup(struct usb_configuration *c, const struct usb_ctrlrequest *ctrl)
 	 * Audio class messages; interface activation uses set_alt().
 	 */
 	switch (ctrl->bRequestType) {
-	case USB_AUDIO_SET_ENDPOINT:
+	case USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_ENDPOINT:
 		value = audio_set_endpoint_req(c, ctrl);
 		break;
 
-	case USB_AUDIO_GET_ENDPOINT:
+	case USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT:
 		value = audio_get_endpoint_req(c, ctrl);
 		break;
 
diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c
index 76afbd1b515f..7b05b3c8c0b1 100644
--- a/drivers/usb/gadget/f_audio.c
+++ b/drivers/usb/gadget/f_audio.c
@@ -50,16 +50,16 @@ static struct usb_interface_descriptor ac_interface_desc __initdata = {
 	.bInterfaceSubClass =	USB_SUBCLASS_AUDIOCONTROL,
 };
 
-DECLARE_USB_AC_HEADER_DESCRIPTOR(2);
+DECLARE_UAC_AC_HEADER_DESCRIPTOR(2);
 
-#define USB_DT_AC_HEADER_LENGH	USB_DT_AC_HEADER_SIZE(F_AUDIO_NUM_INTERFACES)
+#define UAC_DT_AC_HEADER_LENGTH	UAC_DT_AC_HEADER_SIZE(F_AUDIO_NUM_INTERFACES)
 /* B.3.2  Class-Specific AC Interface Descriptor */
-static struct usb_ac_header_descriptor_2 ac_header_desc = {
-	.bLength =		USB_DT_AC_HEADER_LENGH,
+static struct uac_ac_header_descriptor_2 ac_header_desc = {
+	.bLength =		UAC_DT_AC_HEADER_LENGTH,
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype =	HEADER,
+	.bDescriptorSubtype =	UAC_HEADER,
 	.bcdADC =		__constant_cpu_to_le16(0x0100),
-	.wTotalLength =		__constant_cpu_to_le16(USB_DT_AC_HEADER_LENGH),
+	.wTotalLength =		__constant_cpu_to_le16(UAC_DT_AC_HEADER_LENGTH),
 	.bInCollection =	F_AUDIO_NUM_INTERFACES,
 	.baInterfaceNr = {
 		[0] =		F_AUDIO_AC_INTERFACE,
@@ -68,33 +68,33 @@ static struct usb_ac_header_descriptor_2 ac_header_desc = {
 };
 
 #define INPUT_TERMINAL_ID	1
-static struct usb_input_terminal_descriptor input_terminal_desc = {
-	.bLength =		USB_DT_AC_INPUT_TERMINAL_SIZE,
+static struct uac_input_terminal_descriptor input_terminal_desc = {
+	.bLength =		UAC_DT_INPUT_TERMINAL_SIZE,
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype =	INPUT_TERMINAL,
+	.bDescriptorSubtype =	UAC_INPUT_TERMINAL,
 	.bTerminalID =		INPUT_TERMINAL_ID,
-	.wTerminalType =	USB_AC_TERMINAL_STREAMING,
+	.wTerminalType =	UAC_TERMINAL_STREAMING,
 	.bAssocTerminal =	0,
 	.wChannelConfig =	0x3,
 };
 
-DECLARE_USB_AC_FEATURE_UNIT_DESCRIPTOR(0);
+DECLARE_UAC_FEATURE_UNIT_DESCRIPTOR(0);
 
 #define FEATURE_UNIT_ID		2
-static struct usb_ac_feature_unit_descriptor_0 feature_unit_desc = {
-	.bLength		= USB_DT_AC_FEATURE_UNIT_SIZE(0),
+static struct uac_feature_unit_descriptor_0 feature_unit_desc = {
+	.bLength		= UAC_DT_FEATURE_UNIT_SIZE(0),
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype	= FEATURE_UNIT,
+	.bDescriptorSubtype	= UAC_FEATURE_UNIT,
 	.bUnitID		= FEATURE_UNIT_ID,
 	.bSourceID		= INPUT_TERMINAL_ID,
 	.bControlSize		= 2,
-	.bmaControls[0]		= (FU_MUTE | FU_VOLUME),
+	.bmaControls[0]		= (UAC_FU_MUTE | UAC_FU_VOLUME),
 };
 
 static struct usb_audio_control mute_control = {
 	.list = LIST_HEAD_INIT(mute_control.list),
 	.name = "Mute Control",
-	.type = MUTE_CONTROL,
+	.type = UAC_MUTE_CONTROL,
 	/* Todo: add real Mute control code */
 	.set = generic_set_cmd,
 	.get = generic_get_cmd,
@@ -103,7 +103,7 @@ static struct usb_audio_control mute_control = {
 static struct usb_audio_control volume_control = {
 	.list = LIST_HEAD_INIT(volume_control.list),
 	.name = "Volume Control",
-	.type = VOLUME_CONTROL,
+	.type = UAC_VOLUME_CONTROL,
 	/* Todo: add real Volume control code */
 	.set = generic_set_cmd,
 	.get = generic_get_cmd,
@@ -113,17 +113,17 @@ static struct usb_audio_control_selector feature_unit = {
 	.list = LIST_HEAD_INIT(feature_unit.list),
 	.id = FEATURE_UNIT_ID,
 	.name = "Mute & Volume Control",
-	.type = FEATURE_UNIT,
+	.type = UAC_FEATURE_UNIT,
 	.desc = (struct usb_descriptor_header *)&feature_unit_desc,
 };
 
 #define OUTPUT_TERMINAL_ID	3
-static struct usb_output_terminal_descriptor output_terminal_desc = {
-	.bLength		= USB_DT_AC_OUTPUT_TERMINAL_SIZE,
+static struct uac_output_terminal_descriptor output_terminal_desc = {
+	.bLength		= UAC_DT_OUTPUT_TERMINAL_SIZE,
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype	= OUTPUT_TERMINAL,
+	.bDescriptorSubtype	= UAC_OUTPUT_TERMINAL,
 	.bTerminalID		= OUTPUT_TERMINAL_ID,
-	.wTerminalType		= USB_AC_OUTPUT_TERMINAL_SPEAKER,
+	.wTerminalType		= UAC_OUTPUT_TERMINAL_SPEAKER,
 	.bAssocTerminal		= FEATURE_UNIT_ID,
 	.bSourceID		= FEATURE_UNIT_ID,
 };
@@ -148,22 +148,22 @@ static struct usb_interface_descriptor as_interface_alt_1_desc = {
 };
 
 /* B.4.2  Class-Specific AS Interface Descriptor */
-static struct usb_as_header_descriptor as_header_desc = {
-	.bLength =		USB_DT_AS_HEADER_SIZE,
+static struct uac_as_header_descriptor as_header_desc = {
+	.bLength =		UAC_DT_AS_HEADER_SIZE,
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype =	AS_GENERAL,
+	.bDescriptorSubtype =	UAC_AS_GENERAL,
 	.bTerminalLink =	INPUT_TERMINAL_ID,
 	.bDelay =		1,
-	.wFormatTag =		USB_AS_AUDIO_FORMAT_TYPE_I_PCM,
+	.wFormatTag =		UAC_FORMAT_TYPE_I_PCM,
 };
 
-DECLARE_USB_AS_FORMAT_TYPE_I_DISCRETE_DESC(1);
+DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(1);
 
-static struct usb_as_formate_type_i_discrete_descriptor_1 as_type_i_desc = {
-	.bLength =		USB_AS_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1),
+static struct uac_format_type_i_discrete_descriptor_1 as_type_i_desc = {
+	.bLength =		UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1),
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype =	FORMAT_TYPE,
-	.bFormatType =		USB_AS_FORMAT_TYPE_I,
+	.bDescriptorSubtype =	UAC_FORMAT_TYPE,
+	.bFormatType =		UAC_FORMAT_TYPE_I,
 	.bSubframeSize =	2,
 	.bBitResolution =	16,
 	.bSamFreqType =		1,
@@ -181,10 +181,10 @@ static struct usb_endpoint_descriptor as_out_ep_desc __initdata = {
 };
 
 /* Class-specific AS ISO OUT Endpoint Descriptor */
-static struct usb_as_iso_endpoint_descriptor as_iso_out_desc __initdata = {
-	.bLength =		USB_AS_ISO_ENDPOINT_DESC_SIZE,
+static struct uac_iso_endpoint_descriptor as_iso_out_desc __initdata = {
+	.bLength =		UAC_ISO_ENDPOINT_DESC_SIZE,
 	.bDescriptorType =	USB_DT_CS_ENDPOINT,
-	.bDescriptorSubtype =	EP_GENERAL,
+	.bDescriptorSubtype =	UAC_EP_GENERAL,
 	.bmAttributes = 	1,
 	.bLockDelayUnits =	1,
 	.wLockDelay =		__constant_cpu_to_le16(1),
@@ -456,11 +456,11 @@ f_audio_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl)
 	 * Audio class messages; interface activation uses set_alt().
 	 */
 	switch (ctrl->bRequestType) {
-	case USB_AUDIO_SET_INTF:
+	case USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE:
 		value = audio_set_intf_req(f, ctrl);
 		break;
 
-	case USB_AUDIO_GET_INTF:
+	case USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE:
 		value = audio_get_intf_req(f, ctrl);
 		break;
 
@@ -642,10 +642,10 @@ int __init control_selector_init(struct f_audio *audio)
 	list_add(&mute_control.list, &feature_unit.control);
 	list_add(&volume_control.list, &feature_unit.control);
 
-	volume_control.data[_CUR] = 0xffc0;
-	volume_control.data[_MIN] = 0xe3a0;
-	volume_control.data[_MAX] = 0xfff0;
-	volume_control.data[_RES] = 0x0030;
+	volume_control.data[UAC__CUR] = 0xffc0;
+	volume_control.data[UAC__MIN] = 0xe3a0;
+	volume_control.data[UAC__MAX] = 0xfff0;
+	volume_control.data[UAC__RES] = 0x0030;
 
 	return 0;
 }
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
index b9312dc6e041..d0b1e836f0e0 100644
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -191,7 +191,7 @@ module_param(qlen, uint, S_IRUGO);
 #define GMIDI_MS_INTERFACE	1
 #define GMIDI_NUM_INTERFACES	2
 
-DECLARE_USB_AC_HEADER_DESCRIPTOR(1);
+DECLARE_UAC_AC_HEADER_DESCRIPTOR(1);
 DECLARE_USB_MIDI_OUT_JACK_DESCRIPTOR(1);
 DECLARE_USB_MS_ENDPOINT_DESCRIPTOR(1);
 
@@ -237,12 +237,12 @@ static const struct usb_interface_descriptor ac_interface_desc = {
 };
 
 /* B.3.2  Class-Specific AC Interface Descriptor */
-static const struct usb_ac_header_descriptor_1 ac_header_desc = {
-	.bLength =		USB_DT_AC_HEADER_SIZE(1),
+static const struct uac_ac_header_descriptor_1 ac_header_desc = {
+	.bLength =		UAC_DT_AC_HEADER_SIZE(1),
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
 	.bDescriptorSubtype =	USB_MS_HEADER,
 	.bcdADC =		cpu_to_le16(0x0100),
-	.wTotalLength =		cpu_to_le16(USB_DT_AC_HEADER_SIZE(1)),
+	.wTotalLength =		cpu_to_le16(UAC_DT_AC_HEADER_SIZE(1)),
 	.bInCollection =	1,
 	.baInterfaceNr = {
 		[0] =		GMIDI_MS_INTERFACE,
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index f75092aba02f..c3edfcb72c34 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -25,80 +25,77 @@
 #define USB_SUBCLASS_AUDIOSTREAMING	0x02
 #define USB_SUBCLASS_MIDISTREAMING	0x03
 
-/* A.5 Audio Class-Specific AC interface Descriptor Subtypes*/
-#define HEADER				0x01
-#define INPUT_TERMINAL			0x02
-#define OUTPUT_TERMINAL			0x03
-#define MIXER_UNIT			0x04
-#define SELECTOR_UNIT			0x05
-#define FEATURE_UNIT			0x06
-#define PROCESSING_UNIT			0x07
-#define EXTENSION_UNIT			0x08
-
-#define AS_GENERAL			0x01
-#define FORMAT_TYPE			0x02
-#define FORMAT_SPECIFIC			0x03
-
-#define EP_GENERAL			0x01
-
-#define MS_GENERAL			0x01
-#define MIDI_IN_JACK			0x02
-#define MIDI_OUT_JACK			0x03
-
-/* cs endpoint attributes */
-#define EP_CS_ATTR_SAMPLE_RATE		0x01
-#define EP_CS_ATTR_PITCH_CONTROL	0x02
-#define EP_CS_ATTR_FILL_MAX		0x80
-
-/* Audio Class specific Request Codes */
-#define USB_AUDIO_SET_INTF		0x21
-#define USB_AUDIO_SET_ENDPOINT		0x22
-#define USB_AUDIO_GET_INTF		0xa1
-#define USB_AUDIO_GET_ENDPOINT		0xa2
-
-#define SET_	0x00
-#define GET_	0x80
-
-#define _CUR	0x1
-#define _MIN	0x2
-#define _MAX	0x3
-#define _RES	0x4
-#define _MEM	0x5
-
-#define SET_CUR		(SET_ | _CUR)
-#define GET_CUR		(GET_ | _CUR)
-#define SET_MIN		(SET_ | _MIN)
-#define GET_MIN		(GET_ | _MIN)
-#define SET_MAX		(SET_ | _MAX)
-#define GET_MAX		(GET_ | _MAX)
-#define SET_RES		(SET_ | _RES)
-#define GET_RES		(GET_ | _RES)
-#define SET_MEM		(SET_ | _MEM)
-#define GET_MEM		(GET_ | _MEM)
-
-#define GET_STAT	0xff
-
-#define USB_AC_TERMINAL_UNDEFINED	0x100
-#define USB_AC_TERMINAL_STREAMING	0x101
-#define USB_AC_TERMINAL_VENDOR_SPEC	0x1FF
+/* A.5 Audio Class-Specific AC Interface Descriptor Subtypes */
+#define UAC_HEADER			0x01
+#define UAC_INPUT_TERMINAL		0x02
+#define UAC_OUTPUT_TERMINAL		0x03
+#define UAC_MIXER_UNIT			0x04
+#define UAC_SELECTOR_UNIT		0x05
+#define UAC_FEATURE_UNIT		0x06
+#define UAC_PROCESSING_UNIT		0x07
+#define UAC_EXTENSION_UNIT		0x08
+
+/* A.6 Audio Class-Specific AS Interface Descriptor Subtypes */
+#define UAC_AS_GENERAL			0x01
+#define UAC_FORMAT_TYPE			0x02
+#define UAC_FORMAT_SPECIFIC		0x03
+
+/* A.8 Audio Class-Specific Endpoint Descriptor Subtypes */
+#define UAC_EP_GENERAL			0x01
+
+/* A.9 Audio Class-Specific Request Codes */
+#define UAC_SET_			0x00
+#define UAC_GET_			0x80
+
+#define UAC__CUR			0x1
+#define UAC__MIN			0x2
+#define UAC__MAX			0x3
+#define UAC__RES			0x4
+#define UAC__MEM			0x5
+
+#define UAC_SET_CUR			(UAC_SET_ | UAC__CUR)
+#define UAC_GET_CUR			(UAC_GET_ | UAC__CUR)
+#define UAC_SET_MIN			(UAC_SET_ | UAC__MIN)
+#define UAC_GET_MIN			(UAC_GET_ | UAC__MIN)
+#define UAC_SET_MAX			(UAC_SET_ | UAC__MAX)
+#define UAC_GET_MAX			(UAC_GET_ | UAC__MAX)
+#define UAC_SET_RES			(UAC_SET_ | UAC__RES)
+#define UAC_GET_RES			(UAC_GET_ | UAC__RES)
+#define UAC_SET_MEM			(UAC_SET_ | UAC__MEM)
+#define UAC_GET_MEM			(UAC_GET_ | UAC__MEM)
+
+#define UAC_GET_STAT			0xff
+
+/* MIDI - A.1 MS Class-Specific Interface Descriptor Subtypes */
+#define UAC_MS_HEADER			0x01
+#define UAC_MIDI_IN_JACK		0x02
+#define UAC_MIDI_OUT_JACK		0x03
+
+/* MIDI - A.1 MS Class-Specific Endpoint Descriptor Subtypes */
+#define UAC_MS_GENERAL			0x01
+
+/* Terminals - 2.1 USB Terminal Types */
+#define UAC_TERMINAL_UNDEFINED		0x100
+#define UAC_TERMINAL_STREAMING		0x101
+#define UAC_TERMINAL_VENDOR_SPEC	0x1FF
 
 /* Terminal Control Selectors */
 /* 4.3.2  Class-Specific AC Interface Descriptor */
-struct usb_ac_header_descriptor {
+struct uac_ac_header_descriptor {
 	__u8  bLength;			/* 8 + n */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
-	__u8  bDescriptorSubtype;	/* USB_MS_HEADER */
+	__u8  bDescriptorSubtype;	/* UAC_MS_HEADER */
 	__le16 bcdADC;			/* 0x0100 */
 	__le16 wTotalLength;		/* includes Unit and Terminal desc. */
 	__u8  bInCollection;		/* n */
 	__u8  baInterfaceNr[];		/* [n] */
 } __attribute__ ((packed));
 
-#define USB_DT_AC_HEADER_SIZE(n)	(8 + (n))
+#define UAC_DT_AC_HEADER_SIZE(n)	(8 + (n))
 
 /* As above, but more useful for defining your own descriptors: */
-#define DECLARE_USB_AC_HEADER_DESCRIPTOR(n) 			\
-struct usb_ac_header_descriptor_##n {				\
+#define DECLARE_UAC_AC_HEADER_DESCRIPTOR(n) 			\
+struct uac_ac_header_descriptor_##n {				\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
 	__u8  bDescriptorSubtype;				\
@@ -109,7 +106,7 @@ struct usb_ac_header_descriptor_##n {				\
 } __attribute__ ((packed))
 
 /* 4.3.2.1 Input Terminal Descriptor */
-struct usb_input_terminal_descriptor {
+struct uac_input_terminal_descriptor {
 	__u8  bLength;			/* in bytes: 12 */
 	__u8  bDescriptorType;		/* CS_INTERFACE descriptor type */
 	__u8  bDescriptorSubtype;	/* INPUT_TERMINAL descriptor subtype */
@@ -122,18 +119,19 @@ struct usb_input_terminal_descriptor {
 	__u8  iTerminal;
 } __attribute__ ((packed));
 
-#define USB_DT_AC_INPUT_TERMINAL_SIZE			12
+#define UAC_DT_INPUT_TERMINAL_SIZE			12
 
-#define USB_AC_INPUT_TERMINAL_UNDEFINED			0x200
-#define USB_AC_INPUT_TERMINAL_MICROPHONE		0x201
-#define USB_AC_INPUT_TERMINAL_DESKTOP_MICROPHONE	0x202
-#define USB_AC_INPUT_TERMINAL_PERSONAL_MICROPHONE	0x203
-#define USB_AC_INPUT_TERMINAL_OMNI_DIR_MICROPHONE	0x204
-#define USB_AC_INPUT_TERMINAL_MICROPHONE_ARRAY		0x205
-#define USB_AC_INPUT_TERMINAL_PROC_MICROPHONE_ARRAY	0x206
+/* Terminals - 2.2 Input Terminal Types */
+#define UAC_INPUT_TERMINAL_UNDEFINED			0x200
+#define UAC_INPUT_TERMINAL_MICROPHONE			0x201
+#define UAC_INPUT_TERMINAL_DESKTOP_MICROPHONE		0x202
+#define UAC_INPUT_TERMINAL_PERSONAL_MICROPHONE		0x203
+#define UAC_INPUT_TERMINAL_OMNI_DIR_MICROPHONE		0x204
+#define UAC_INPUT_TERMINAL_MICROPHONE_ARRAY		0x205
+#define UAC_INPUT_TERMINAL_PROC_MICROPHONE_ARRAY	0x206
 
 /* 4.3.2.2 Output Terminal Descriptor */
-struct usb_output_terminal_descriptor {
+struct uac_output_terminal_descriptor {
 	__u8  bLength;			/* in bytes: 9 */
 	__u8  bDescriptorType;		/* CS_INTERFACE descriptor type */
 	__u8  bDescriptorSubtype;	/* OUTPUT_TERMINAL descriptor subtype */
@@ -144,23 +142,24 @@ struct usb_output_terminal_descriptor {
 	__u8  iTerminal;
 } __attribute__ ((packed));
 
-#define USB_DT_AC_OUTPUT_TERMINAL_SIZE				9
+#define UAC_DT_OUTPUT_TERMINAL_SIZE			9
 
-#define USB_AC_OUTPUT_TERMINAL_UNDEFINED			0x300
-#define USB_AC_OUTPUT_TERMINAL_SPEAKER				0x301
-#define USB_AC_OUTPUT_TERMINAL_HEADPHONES			0x302
-#define USB_AC_OUTPUT_TERMINAL_HEAD_MOUNTED_DISPLAY_AUDIO	0x303
-#define USB_AC_OUTPUT_TERMINAL_DESKTOP_SPEAKER			0x304
-#define USB_AC_OUTPUT_TERMINAL_ROOM_SPEAKER			0x305
-#define USB_AC_OUTPUT_TERMINAL_COMMUNICATION_SPEAKER		0x306
-#define USB_AC_OUTPUT_TERMINAL_LOW_FREQ_EFFECTS_SPEAKER		0x307
+/* Terminals - 2.3 Output Terminal Types */
+#define UAC_OUTPUT_TERMINAL_UNDEFINED			0x300
+#define UAC_OUTPUT_TERMINAL_SPEAKER			0x301
+#define UAC_OUTPUT_TERMINAL_HEADPHONES			0x302
+#define UAC_OUTPUT_TERMINAL_HEAD_MOUNTED_DISPLAY_AUDIO	0x303
+#define UAC_OUTPUT_TERMINAL_DESKTOP_SPEAKER		0x304
+#define UAC_OUTPUT_TERMINAL_ROOM_SPEAKER		0x305
+#define UAC_OUTPUT_TERMINAL_COMMUNICATION_SPEAKER	0x306
+#define UAC_OUTPUT_TERMINAL_LOW_FREQ_EFFECTS_SPEAKER	0x307
 
 /* Set bControlSize = 2 as default setting */
-#define USB_DT_AC_FEATURE_UNIT_SIZE(ch)		(7 + ((ch) + 1) * 2)
+#define UAC_DT_FEATURE_UNIT_SIZE(ch)		(7 + ((ch) + 1) * 2)
 
 /* As above, but more useful for defining your own descriptors: */
-#define DECLARE_USB_AC_FEATURE_UNIT_DESCRIPTOR(ch) 		\
-struct usb_ac_feature_unit_descriptor_##ch {			\
+#define DECLARE_UAC_FEATURE_UNIT_DESCRIPTOR(ch) 		\
+struct uac_feature_unit_descriptor_##ch {			\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
 	__u8  bDescriptorSubtype;				\
@@ -172,7 +171,7 @@ struct usb_ac_feature_unit_descriptor_##ch {			\
 } __attribute__ ((packed))
 
 /* 4.5.2 Class-Specific AS Interface Descriptor */
-struct usb_as_header_descriptor {
+struct uac_as_header_descriptor {
 	__u8  bLength;			/* in bytes: 7 */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* AS_GENERAL */
@@ -181,16 +180,17 @@ struct usb_as_header_descriptor {
 	__le16 wFormatTag;		/* The Audio Data Format */
 } __attribute__ ((packed));
 
-#define USB_DT_AS_HEADER_SIZE		7
+#define UAC_DT_AS_HEADER_SIZE		7
 
-#define USB_AS_AUDIO_FORMAT_TYPE_I_UNDEFINED	0x0
-#define USB_AS_AUDIO_FORMAT_TYPE_I_PCM		0x1
-#define USB_AS_AUDIO_FORMAT_TYPE_I_PCM8		0x2
-#define USB_AS_AUDIO_FORMAT_TYPE_I_IEEE_FLOAT	0x3
-#define USB_AS_AUDIO_FORMAT_TYPE_I_ALAW		0x4
-#define USB_AS_AUDIO_FORMAT_TYPE_I_MULAW	0x5
+/* Formats - A.1.1 Audio Data Format Type I Codes */
+#define UAC_FORMAT_TYPE_I_UNDEFINED	0x0
+#define UAC_FORMAT_TYPE_I_PCM		0x1
+#define UAC_FORMAT_TYPE_I_PCM8		0x2
+#define UAC_FORMAT_TYPE_I_IEEE_FLOAT	0x3
+#define UAC_FORMAT_TYPE_I_ALAW		0x4
+#define UAC_FORMAT_TYPE_I_MULAW		0x5
 
-struct usb_as_format_type_i_continuous_descriptor {
+struct uac_format_type_i_continuous_descriptor {
 	__u8  bLength;			/* in bytes: 8 + (ns * 3) */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* FORMAT_TYPE */
@@ -203,9 +203,9 @@ struct usb_as_format_type_i_continuous_descriptor {
 	__u8  tUpperSamFreq[3];
 } __attribute__ ((packed));
 
-#define USB_AS_FORMAT_TYPE_I_CONTINUOUS_DESC_SIZE	14
+#define UAC_FORMAT_TYPE_I_CONTINUOUS_DESC_SIZE	14
 
-struct usb_as_formate_type_i_discrete_descriptor {
+struct uac_format_type_i_discrete_descriptor {
 	__u8  bLength;			/* in bytes: 8 + (ns * 3) */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* FORMAT_TYPE */
@@ -217,8 +217,8 @@ struct usb_as_formate_type_i_discrete_descriptor {
 	__u8  tSamFreq[][3];
 } __attribute__ ((packed));
 
-#define DECLARE_USB_AS_FORMAT_TYPE_I_DISCRETE_DESC(n) 		\
-struct usb_as_formate_type_i_discrete_descriptor_##n {		\
+#define DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(n) 		\
+struct uac_format_type_i_discrete_descriptor_##n {		\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
 	__u8  bDescriptorSubtype;				\
@@ -230,14 +230,15 @@ struct usb_as_formate_type_i_discrete_descriptor_##n {		\
 	__u8  tSamFreq[n][3];					\
 } __attribute__ ((packed))
 
-#define USB_AS_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(n)	(8 + (n * 3))
+#define UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(n)	(8 + (n * 3))
 
-#define USB_AS_FORMAT_TYPE_UNDEFINED	0x0
-#define USB_AS_FORMAT_TYPE_I		0x1
-#define USB_AS_FORMAT_TYPE_II		0x2
-#define USB_AS_FORMAT_TYPE_III		0x3
+/* Formats - A.2 Format Type Codes */
+#define UAC_FORMAT_TYPE_UNDEFINED	0x0
+#define UAC_FORMAT_TYPE_I		0x1
+#define UAC_FORMAT_TYPE_II		0x2
+#define UAC_FORMAT_TYPE_III		0x3
 
-struct usb_as_iso_endpoint_descriptor {
+struct uac_iso_endpoint_descriptor {
 	__u8  bLength;			/* in bytes: 7 */
 	__u8  bDescriptorType;		/* USB_DT_CS_ENDPOINT */
 	__u8  bDescriptorSubtype;	/* EP_GENERAL */
@@ -245,30 +246,35 @@ struct usb_as_iso_endpoint_descriptor {
 	__u8  bLockDelayUnits;
 	__le16 wLockDelay;
 };
-#define USB_AS_ISO_ENDPOINT_DESC_SIZE	7
-
-#define FU_CONTROL_UNDEFINED		0x00
-#define MUTE_CONTROL			0x01
-#define VOLUME_CONTROL			0x02
-#define BASS_CONTROL			0x03
-#define MID_CONTROL			0x04
-#define TREBLE_CONTROL			0x05
-#define GRAPHIC_EQUALIZER_CONTROL	0x06
-#define AUTOMATIC_GAIN_CONTROL		0x07
-#define DELAY_CONTROL			0x08
-#define BASS_BOOST_CONTROL		0x09
-#define LOUDNESS_CONTROL		0x0a
-
-#define FU_MUTE		(1 << (MUTE_CONTROL - 1))
-#define FU_VOLUME	(1 << (VOLUME_CONTROL - 1))
-#define FU_BASS		(1 << (BASS_CONTROL - 1))
-#define FU_MID		(1 << (MID_CONTROL - 1))
-#define FU_TREBLE	(1 << (TREBLE_CONTROL - 1))
-#define FU_GRAPHIC_EQ	(1 << (GRAPHIC_EQUALIZER_CONTROL - 1))
-#define FU_AUTO_GAIN	(1 << (AUTOMATIC_GAIN_CONTROL - 1))
-#define FU_DELAY	(1 << (DELAY_CONTROL - 1))
-#define FU_BASS_BOOST	(1 << (BASS_BOOST_CONTROL - 1))
-#define FU_LOUDNESS	(1 << (LOUDNESS_CONTROL - 1))
+#define UAC_ISO_ENDPOINT_DESC_SIZE	7
+
+#define UAC_EP_CS_ATTR_SAMPLE_RATE	0x01
+#define UAC_EP_CS_ATTR_PITCH_CONTROL	0x02
+#define UAC_EP_CS_ATTR_FILL_MAX		0x80
+
+/* A.10.2 Feature Unit Control Selectors */
+#define UAC_FU_CONTROL_UNDEFINED	0x00
+#define UAC_MUTE_CONTROL		0x01
+#define UAC_VOLUME_CONTROL		0x02
+#define UAC_BASS_CONTROL		0x03
+#define UAC_MID_CONTROL			0x04
+#define UAC_TREBLE_CONTROL		0x05
+#define UAC_GRAPHIC_EQUALIZER_CONTROL	0x06
+#define UAC_AUTOMATIC_GAIN_CONTROL	0x07
+#define UAC_DELAY_CONTROL		0x08
+#define UAC_BASS_BOOST_CONTROL		0x09
+#define UAC_LOUDNESS_CONTROL		0x0a
+
+#define UAC_FU_MUTE		(1 << (UAC_MUTE_CONTROL - 1))
+#define UAC_FU_VOLUME		(1 << (UAC_VOLUME_CONTROL - 1))
+#define UAC_FU_BASS		(1 << (UAC_BASS_CONTROL - 1))
+#define UAC_FU_MID		(1 << (UAC_MID_CONTROL - 1))
+#define UAC_FU_TREBLE		(1 << (UAC_TREBLE_CONTROL - 1))
+#define UAC_FU_GRAPHIC_EQ	(1 << (UAC_GRAPHIC_EQUALIZER_CONTROL - 1))
+#define UAC_FU_AUTO_GAIN	(1 << (UAC_AUTOMATIC_GAIN_CONTROL - 1))
+#define UAC_FU_DELAY		(1 << (UAC_DELAY_CONTROL - 1))
+#define UAC_FU_BASS_BOOST	(1 << (UAC_BASS_BOOST_CONTROL - 1))
+#define UAC_FU_LOUDNESS		(1 << (UAC_LOUDNESS_CONTROL - 1))
 
 struct usb_audio_control {
 	struct list_head list;
-- 
cgit v1.2.3


From b95cd7ec3e93bae199e820bd65b21b23e4538acc Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 21 Jun 2009 23:21:55 +0200
Subject: USB audio gadget: Un-inline generic_[gs]et_cmd

Those functions are used only used to fill the set/get members of
usb_audio_control. It doesn't make much sense to inline them.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/f_audio.c | 15 +++++++++++++++
 include/linux/usb/audio.h    | 12 ------------
 2 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c
index 7b05b3c8c0b1..98e9bb977291 100644
--- a/drivers/usb/gadget/f_audio.c
+++ b/drivers/usb/gadget/f_audio.c
@@ -28,6 +28,9 @@ static int audio_buf_size = 48000;
 module_param(audio_buf_size, int, S_IRUGO);
 MODULE_PARM_DESC(audio_buf_size, "Audio buffer size");
 
+static int generic_set_cmd(struct usb_audio_control *con, u8 cmd, int value);
+static int generic_get_cmd(struct usb_audio_control *con, u8 cmd);
+
 /*
  * DESCRIPTORS ... most are static, but strings and full
  * configuration descriptors are built on demand.
@@ -632,6 +635,18 @@ f_audio_unbind(struct usb_configuration *c, struct usb_function *f)
 
 /*-------------------------------------------------------------------------*/
 
+static int generic_set_cmd(struct usb_audio_control *con, u8 cmd, int value)
+{
+	con->data[cmd] = value;
+
+	return 0;
+}
+
+static int generic_get_cmd(struct usb_audio_control *con, u8 cmd)
+{
+	return con->data[cmd];
+}
+
 /* Todo: add more control selecotor dynamically */
 int __init control_selector_init(struct f_audio *audio)
 {
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index c3edfcb72c34..7b33c493917f 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -285,18 +285,6 @@ struct usb_audio_control {
 	int (*get)(struct usb_audio_control *con, u8 cmd);
 };
 
-static inline int generic_set_cmd(struct usb_audio_control *con, u8 cmd, int value)
-{
-	con->data[cmd] = value;
-
-	return 0;
-}
-
-static inline int generic_get_cmd(struct usb_audio_control *con, u8 cmd)
-{
-	return con->data[cmd];
-}
-
 struct usb_audio_control_selector {
 	struct list_head list;
 	struct list_head control;
-- 
cgit v1.2.3


From 7cbe5dca399a50ce8aa74314b1d276e2fb904e1b Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 29 Jun 2009 10:56:54 -0400
Subject: USB: add API for userspace drivers to "claim" ports

This patch (as1258) implements a feature that users have been asking
for: It gives programs the ability to "claim" a port on a hub, via a
new usbfs ioctl.  A device plugged into a "claimed" port will not be
touched by the kernel beyond the immediate necessities of
initialization and enumeration.

In particular, when a device is plugged into a "claimed" port, the
kernel will not select and install a configuration.  And when a config
is installed by usbfs or sysfs, the kernel will not probe any drivers
for any of the interfaces.  (However the kernel will fetch various
string descriptors during enumeration.  One could argue that this
isn't really necessary, but the strings are exported in sysfs.)

The patch does not guarantee exclusive access to these devices; it is
still possible for more than one program to open the device file
concurrently.  Programs are responsible for coordinating access among
themselves.

A demonstration program showing how to use the new interface can be
found in an attachment to

	http://marc.info/?l=linux-usb&m=124345857431452&w=2

The patch also makes a small simplification to the hub driver,
replacing a bunch of more-or-less useless variants of "out of memory"
with a single message.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/devio.c     | 35 ++++++++++++++++++
 drivers/usb/core/driver.c    |  3 ++
 drivers/usb/core/generic.c   |  4 +-
 drivers/usb/core/hub.c       | 88 +++++++++++++++++++++++++++++++++++++++++---
 drivers/usb/core/usb.h       |  7 ++++
 include/linux/usbdevice_fs.h |  2 +
 6 files changed, 133 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 4247eccf858c..165de5d59005 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -52,6 +52,7 @@
 
 #include "hcd.h"	/* for usbcore internals */
 #include "usb.h"
+#include "hub.h"
 
 #define USB_MAXBUS			64
 #define USB_DEVICE_MAX			USB_MAXBUS * 128
@@ -655,6 +656,7 @@ static int usbdev_release(struct inode *inode, struct file *file)
 	struct async *as;
 
 	usb_lock_device(dev);
+	usb_hub_release_all_ports(dev, ps);
 
 	/* Protect against simultaneous open */
 	mutex_lock(&usbfs_mutex);
@@ -1548,6 +1550,29 @@ static int proc_ioctl_compat(struct dev_state *ps, compat_uptr_t arg)
 }
 #endif
 
+static int proc_claim_port(struct dev_state *ps, void __user *arg)
+{
+	unsigned portnum;
+	int rc;
+
+	if (get_user(portnum, (unsigned __user *) arg))
+		return -EFAULT;
+	rc = usb_hub_claim_port(ps->dev, portnum, ps);
+	if (rc == 0)
+		snoop(&ps->dev->dev, "port %d claimed by process %d: %s\n",
+			portnum, task_pid_nr(current), current->comm);
+	return rc;
+}
+
+static int proc_release_port(struct dev_state *ps, void __user *arg)
+{
+	unsigned portnum;
+
+	if (get_user(portnum, (unsigned __user *) arg))
+		return -EFAULT;
+	return usb_hub_release_port(ps->dev, portnum, ps);
+}
+
 /*
  * NOTE:  All requests here that have interface numbers as parameters
  * are assuming that somehow the configuration has been prevented from
@@ -1689,6 +1714,16 @@ static int usbdev_ioctl(struct inode *inode, struct file *file,
 		snoop(&dev->dev, "%s: IOCTL\n", __func__);
 		ret = proc_ioctl_default(ps, p);
 		break;
+
+	case USBDEVFS_CLAIM_PORT:
+		snoop(&dev->dev, "%s: CLAIM_PORT\n", __func__);
+		ret = proc_claim_port(ps, p);
+		break;
+
+	case USBDEVFS_RELEASE_PORT:
+		snoop(&dev->dev, "%s: RELEASE_PORT\n", __func__);
+		ret = proc_release_port(ps, p);
+		break;
 	}
 	usb_unlock_device(dev);
 	if (ret >= 0)
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 69e5773abfce..1bad4e5a6abb 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -207,6 +207,9 @@ static int usb_probe_interface(struct device *dev)
 
 	intf->needs_binding = 0;
 
+	if (usb_device_is_owned(udev))
+		return -ENODEV;
+
 	if (udev->authorized == 0) {
 		dev_err(&intf->dev, "Device is not authorized for usage\n");
 		return -ENODEV;
diff --git a/drivers/usb/core/generic.c b/drivers/usb/core/generic.c
index 30ecac3af15a..05e6d313961e 100644
--- a/drivers/usb/core/generic.c
+++ b/drivers/usb/core/generic.c
@@ -158,7 +158,9 @@ static int generic_probe(struct usb_device *udev)
 	/* Choose and set the configuration.  This registers the interfaces
 	 * with the driver core and lets interface drivers bind to them.
 	 */
-	if (udev->authorized == 0)
+	if (usb_device_is_owned(udev))
+		;		/* Don't configure if the device is owned */
+	else if (udev->authorized == 0)
 		dev_err(&udev->dev, "Device is not authorized for usage\n");
 	else {
 		c = usb_choose_configuration(udev);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 71f86c60d83c..cb54420ed583 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -78,6 +78,7 @@ struct usb_hub {
 	u8			indicator[USB_MAXCHILDREN];
 	struct delayed_work	leds;
 	struct delayed_work	init_work;
+	void			**port_owners;
 };
 
 
@@ -860,19 +861,17 @@ static int hub_configure(struct usb_hub *hub,
 	u16 wHubCharacteristics;
 	unsigned int pipe;
 	int maxp, ret;
-	char *message;
+	char *message = "out of memory";
 
 	hub->buffer = usb_buffer_alloc(hdev, sizeof(*hub->buffer), GFP_KERNEL,
 			&hub->buffer_dma);
 	if (!hub->buffer) {
-		message = "can't allocate hub irq buffer";
 		ret = -ENOMEM;
 		goto fail;
 	}
 
 	hub->status = kmalloc(sizeof(*hub->status), GFP_KERNEL);
 	if (!hub->status) {
-		message = "can't kmalloc hub status buffer";
 		ret = -ENOMEM;
 		goto fail;
 	}
@@ -880,7 +879,6 @@ static int hub_configure(struct usb_hub *hub,
 
 	hub->descriptor = kmalloc(sizeof(*hub->descriptor), GFP_KERNEL);
 	if (!hub->descriptor) {
-		message = "can't kmalloc hub descriptor";
 		ret = -ENOMEM;
 		goto fail;
 	}
@@ -904,6 +902,12 @@ static int hub_configure(struct usb_hub *hub,
 	dev_info (hub_dev, "%d port%s detected\n", hdev->maxchild,
 		(hdev->maxchild == 1) ? "" : "s");
 
+	hub->port_owners = kzalloc(hdev->maxchild * sizeof(void *), GFP_KERNEL);
+	if (!hub->port_owners) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
 	wHubCharacteristics = le16_to_cpu(hub->descriptor->wHubCharacteristics);
 
 	if (wHubCharacteristics & HUB_CHAR_COMPOUND) {
@@ -1082,7 +1086,6 @@ static int hub_configure(struct usb_hub *hub,
 
 	hub->urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!hub->urb) {
-		message = "couldn't allocate interrupt urb";
 		ret = -ENOMEM;
 		goto fail;
 	}
@@ -1131,11 +1134,13 @@ static void hub_disconnect(struct usb_interface *intf)
 	hub_quiesce(hub, HUB_DISCONNECT);
 
 	usb_set_intfdata (intf, NULL);
+	hub->hdev->maxchild = 0;
 
 	if (hub->hdev->speed == USB_SPEED_HIGH)
 		highspeed_hubs--;
 
 	usb_free_urb(hub->urb);
+	kfree(hub->port_owners);
 	kfree(hub->descriptor);
 	kfree(hub->status);
 	usb_buffer_free(hub->hdev, sizeof(*hub->buffer), hub->buffer,
@@ -1250,6 +1255,79 @@ hub_ioctl(struct usb_interface *intf, unsigned int code, void *user_data)
 	}
 }
 
+/*
+ * Allow user programs to claim ports on a hub.  When a device is attached
+ * to one of these "claimed" ports, the program will "own" the device.
+ */
+static int find_port_owner(struct usb_device *hdev, unsigned port1,
+		void ***ppowner)
+{
+	if (hdev->state == USB_STATE_NOTATTACHED)
+		return -ENODEV;
+	if (port1 == 0 || port1 > hdev->maxchild)
+		return -EINVAL;
+
+	/* This assumes that devices not managed by the hub driver
+	 * will always have maxchild equal to 0.
+	 */
+	*ppowner = &(hdev_to_hub(hdev)->port_owners[port1 - 1]);
+	return 0;
+}
+
+/* In the following three functions, the caller must hold hdev's lock */
+int usb_hub_claim_port(struct usb_device *hdev, unsigned port1, void *owner)
+{
+	int rc;
+	void **powner;
+
+	rc = find_port_owner(hdev, port1, &powner);
+	if (rc)
+		return rc;
+	if (*powner)
+		return -EBUSY;
+	*powner = owner;
+	return rc;
+}
+
+int usb_hub_release_port(struct usb_device *hdev, unsigned port1, void *owner)
+{
+	int rc;
+	void **powner;
+
+	rc = find_port_owner(hdev, port1, &powner);
+	if (rc)
+		return rc;
+	if (*powner != owner)
+		return -ENOENT;
+	*powner = NULL;
+	return rc;
+}
+
+void usb_hub_release_all_ports(struct usb_device *hdev, void *owner)
+{
+	int n;
+	void **powner;
+
+	n = find_port_owner(hdev, 1, &powner);
+	if (n == 0) {
+		for (; n < hdev->maxchild; (++n, ++powner)) {
+			if (*powner == owner)
+				*powner = NULL;
+		}
+	}
+}
+
+/* The caller must hold udev's lock */
+bool usb_device_is_owned(struct usb_device *udev)
+{
+	struct usb_hub *hub;
+
+	if (udev->state == USB_STATE_NOTATTACHED || !udev->parent)
+		return false;
+	hub = hdev_to_hub(udev->parent);
+	return !!hub->port_owners[udev->portnum - 1];
+}
+
 
 static void recursively_mark_NOTATTACHED(struct usb_device *udev)
 {
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index c0e0ae2bb8e7..9a8b15e6377a 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -37,6 +37,13 @@ extern int usb_match_device(struct usb_device *dev,
 extern void usb_forced_unbind_intf(struct usb_interface *intf);
 extern void usb_rebind_intf(struct usb_interface *intf);
 
+extern int usb_hub_claim_port(struct usb_device *hdev, unsigned port,
+		void *owner);
+extern int usb_hub_release_port(struct usb_device *hdev, unsigned port,
+		void *owner);
+extern void usb_hub_release_all_ports(struct usb_device *hdev, void *owner);
+extern bool usb_device_is_owned(struct usb_device *udev);
+
 extern int  usb_hub_init(void);
 extern void usb_hub_cleanup(void);
 extern int usb_major_init(void);
diff --git a/include/linux/usbdevice_fs.h b/include/linux/usbdevice_fs.h
index 0044d9b4cb85..00ceebeb9e5c 100644
--- a/include/linux/usbdevice_fs.h
+++ b/include/linux/usbdevice_fs.h
@@ -175,4 +175,6 @@ struct usbdevfs_ioctl32 {
 #define USBDEVFS_CLEAR_HALT        _IOR('U', 21, unsigned int)
 #define USBDEVFS_DISCONNECT        _IO('U', 22)
 #define USBDEVFS_CONNECT           _IO('U', 23)
+#define USBDEVFS_CLAIM_PORT        _IOR('U', 24, unsigned int)
+#define USBDEVFS_RELEASE_PORT      _IOR('U', 25, unsigned int)
 #endif /* _LINUX_USBDEVICE_FS_H */
-- 
cgit v1.2.3


From ccf5b801cef4f9e2d708d3b87e91e2bc6abd5206 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 29 Jun 2009 11:00:01 -0400
Subject: USB: make intf.pm_usage an atomic_t

This patch (as1260) changes the pm_usage_cnt field in struct
usb_interface from an int to an atomic_t.  This is so that drivers can
invoke the usb_autopm_get_interface_async() and
usb_autopm_put_interface_async() routines without locking and without
fear of corrupting the pm_usage_cnt value.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/driver.c | 38 ++++++++++++++++++++++----------------
 drivers/usb/core/hub.c    |  5 +++--
 include/linux/usb.h       |  6 +++---
 3 files changed, 28 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 1bad4e5a6abb..1c976c141f33 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -235,7 +235,7 @@ static int usb_probe_interface(struct device *dev)
 		/* The interface should always appear to be in use
 		 * unless the driver suports autosuspend.
 		 */
-		intf->pm_usage_cnt = !(driver->supports_autosuspend);
+		atomic_set(&intf->pm_usage_cnt, !driver->supports_autosuspend);
 
 		/* Carry out a deferred switch to altsetting 0 */
 		if (intf->needs_altsetting0) {
@@ -347,7 +347,7 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 	usb_pm_lock(udev);
 	iface->condition = USB_INTERFACE_BOUND;
 	mark_active(iface);
-	iface->pm_usage_cnt = !(driver->supports_autosuspend);
+	atomic_set(&iface->pm_usage_cnt, !driver->supports_autosuspend);
 	usb_pm_unlock(udev);
 
 	/* if interface was already added, bind now; else let
@@ -1068,7 +1068,7 @@ static int autosuspend_check(struct usb_device *udev, int reschedule)
 			intf = udev->actconfig->interface[i];
 			if (!is_active(intf))
 				continue;
-			if (intf->pm_usage_cnt > 0)
+			if (atomic_read(&intf->pm_usage_cnt) > 0)
 				return -EBUSY;
 			if (intf->needs_remote_wakeup &&
 					!udev->do_remote_wakeup) {
@@ -1464,17 +1464,19 @@ static int usb_autopm_do_interface(struct usb_interface *intf,
 		status = -ENODEV;
 	else {
 		udev->auto_pm = 1;
-		intf->pm_usage_cnt += inc_usage_cnt;
+		atomic_add(inc_usage_cnt, &intf->pm_usage_cnt);
 		udev->last_busy = jiffies;
-		if (inc_usage_cnt >= 0 && intf->pm_usage_cnt > 0) {
+		if (inc_usage_cnt >= 0 &&
+				atomic_read(&intf->pm_usage_cnt) > 0) {
 			if (udev->state == USB_STATE_SUSPENDED)
 				status = usb_resume_both(udev,
 						PMSG_AUTO_RESUME);
 			if (status != 0)
-				intf->pm_usage_cnt -= inc_usage_cnt;
+				atomic_sub(inc_usage_cnt, &intf->pm_usage_cnt);
 			else
 				udev->last_busy = jiffies;
-		} else if (inc_usage_cnt <= 0 && intf->pm_usage_cnt <= 0) {
+		} else if (inc_usage_cnt <= 0 &&
+				atomic_read(&intf->pm_usage_cnt) <= 0) {
 			status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND);
 		}
 	}
@@ -1519,7 +1521,7 @@ void usb_autopm_put_interface(struct usb_interface *intf)
 
 	status = usb_autopm_do_interface(intf, -1);
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 }
 EXPORT_SYMBOL_GPL(usb_autopm_put_interface);
 
@@ -1547,10 +1549,10 @@ void usb_autopm_put_interface_async(struct usb_interface *intf)
 		status = -ENODEV;
 	} else {
 		udev->last_busy = jiffies;
-		--intf->pm_usage_cnt;
+		atomic_dec(&intf->pm_usage_cnt);
 		if (udev->autosuspend_disabled || udev->autosuspend_delay < 0)
 			status = -EPERM;
-		else if (intf->pm_usage_cnt <= 0 &&
+		else if (atomic_read(&intf->pm_usage_cnt) <= 0 &&
 				!timer_pending(&udev->autosuspend.timer)) {
 			queue_delayed_work(ksuspend_usb_wq, &udev->autosuspend,
 					round_jiffies_up_relative(
@@ -1558,7 +1560,7 @@ void usb_autopm_put_interface_async(struct usb_interface *intf)
 		}
 	}
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 }
 EXPORT_SYMBOL_GPL(usb_autopm_put_interface_async);
 
@@ -1602,7 +1604,7 @@ int usb_autopm_get_interface(struct usb_interface *intf)
 
 	status = usb_autopm_do_interface(intf, 1);
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 	return status;
 }
 EXPORT_SYMBOL_GPL(usb_autopm_get_interface);
@@ -1630,10 +1632,14 @@ int usb_autopm_get_interface_async(struct usb_interface *intf)
 		status = -ENODEV;
 	else if (udev->autoresume_disabled)
 		status = -EPERM;
-	else if (++intf->pm_usage_cnt > 0 && udev->state == USB_STATE_SUSPENDED)
-		queue_work(ksuspend_usb_wq, &udev->autoresume);
+	else {
+		atomic_inc(&intf->pm_usage_cnt);
+		if (atomic_read(&intf->pm_usage_cnt) > 0 &&
+				udev->state == USB_STATE_SUSPENDED)
+			queue_work(ksuspend_usb_wq, &udev->autoresume);
+	}
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 	return status;
 }
 EXPORT_SYMBOL_GPL(usb_autopm_get_interface_async);
@@ -1655,7 +1661,7 @@ int usb_autopm_set_interface(struct usb_interface *intf)
 
 	status = usb_autopm_do_interface(intf, 0);
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 	return status;
 }
 EXPORT_SYMBOL_GPL(usb_autopm_set_interface);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index cb54420ed583..69e3a966a4b7 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -373,7 +373,7 @@ static void kick_khubd(struct usb_hub *hub)
 	unsigned long	flags;
 
 	/* Suppress autosuspend until khubd runs */
-	to_usb_interface(hub->intfdev)->pm_usage_cnt = 1;
+	atomic_set(&to_usb_interface(hub->intfdev)->pm_usage_cnt, 1);
 
 	spin_lock_irqsave(&hub_event_lock, flags);
 	if (!hub->disconnected && list_empty(&hub->event_list)) {
@@ -678,7 +678,8 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
 					msecs_to_jiffies(delay));
 
 			/* Suppress autosuspend until init is done */
-			to_usb_interface(hub->intfdev)->pm_usage_cnt = 1;
+			atomic_set(&to_usb_interface(hub->intfdev)->
+					pm_usage_cnt, 1);
 			return;		/* Continues at init2: below */
 		} else {
 			hub_power_on(hub, true);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3b45a0d27b80..a34fa89f1474 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -195,7 +195,7 @@ struct usb_interface {
 
 	struct device dev;		/* interface specific device info */
 	struct device *usb_dev;
-	int pm_usage_cnt;		/* usage counter for autosuspend */
+	atomic_t pm_usage_cnt;		/* usage counter for autosuspend */
 	struct work_struct reset_ws;	/* for resets in atomic context */
 };
 #define	to_usb_interface(d) container_of(d, struct usb_interface, dev)
@@ -551,13 +551,13 @@ extern void usb_autopm_put_interface_async(struct usb_interface *intf);
 
 static inline void usb_autopm_enable(struct usb_interface *intf)
 {
-	intf->pm_usage_cnt = 0;
+	atomic_set(&intf->pm_usage_cnt, 0);
 	usb_autopm_set_interface(intf);
 }
 
 static inline void usb_autopm_disable(struct usb_interface *intf)
 {
-	intf->pm_usage_cnt = 1;
+	atomic_set(&intf->pm_usage_cnt, 1);
 	usb_autopm_set_interface(intf);
 }
 
-- 
cgit v1.2.3


From 331ac6b288d9f3689514ced1878041fb0df7e13c Mon Sep 17 00:00:00 2001
From: Alek Du <alek.du@intel.com>
Date: Mon, 13 Jul 2009 12:41:20 +0800
Subject: USB: EHCI: Add Intel Moorestown EHCI controller HOSTPCx extensions
 and support phy low power mode

The Intel Moorestown EHCI controller supports non-standard HOSTPCx register
extension. This register controls the LPM behaviour and controls the behaviour
of each USB port.

Signed-off-by: Jacob Pan <jacob.jun.pan@intel.com>
Signed-off-by: Alek Du <alek.du@intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/host/ehci-hcd.c  |  6 +++++
 drivers/usb/host/ehci-hub.c  | 62 ++++++++++++++++++++++++++++++++++++++++----
 drivers/usb/host/ehci.h      |  3 ++-
 include/linux/usb/ehci_def.h | 13 ++++++++++
 4 files changed, 78 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 7bee1638dfd7..5fbc67c5a913 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -249,6 +249,12 @@ static int ehci_reset (struct ehci_hcd *ehci)
 	retval = handshake (ehci, &ehci->regs->command,
 			    CMD_RESET, 0, 250 * 1000);
 
+	if (ehci->has_hostpc) {
+		ehci_writel(ehci, USBMODE_EX_HC | USBMODE_EX_VBPS,
+			(u32 __iomem *)(((u8 *)ehci->regs) + USBMODE_EX));
+		ehci_writel(ehci, TXFIFO_DEFAULT,
+			(u32 __iomem *)(((u8 *)ehci->regs) + TXFILLTUNING));
+	}
 	if (retval)
 		return retval;
 
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index f46ad27c9a90..6fef1ee7d101 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -111,6 +111,7 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 	struct ehci_hcd		*ehci = hcd_to_ehci (hcd);
 	int			port;
 	int			mask;
+	u32 __iomem		*hostpc_reg = NULL;
 
 	ehci_dbg(ehci, "suspend root hub\n");
 
@@ -142,6 +143,9 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 		u32		t1 = ehci_readl(ehci, reg) & ~PORT_RWC_BITS;
 		u32		t2 = t1;
 
+		if (ehci->has_hostpc)
+			hostpc_reg = (u32 __iomem *)((u8 *)ehci->regs
+				+ HOSTPC0 + 4 * (port & 0xff));
 		/* keep track of which ports we suspend */
 		if (t1 & PORT_OWNER)
 			set_bit(port, &ehci->owned_ports);
@@ -151,15 +155,37 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 		}
 
 		/* enable remote wakeup on all ports */
-		if (hcd->self.root_hub->do_remote_wakeup)
-			t2 |= PORT_WAKE_BITS;
-		else
+		if (hcd->self.root_hub->do_remote_wakeup) {
+			/* only enable appropriate wake bits, otherwise the
+			 * hardware can not go phy low power mode. If a race
+			 * condition happens here(connection change during bits
+			 * set), the port change detection will finally fix it.
+			 */
+			if (t1 & PORT_CONNECT) {
+				t2 |= PORT_WKOC_E | PORT_WKDISC_E;
+				t2 &= ~PORT_WKCONN_E;
+			} else {
+				t2 |= PORT_WKOC_E | PORT_WKCONN_E;
+				t2 &= ~PORT_WKDISC_E;
+			}
+		} else
 			t2 &= ~PORT_WAKE_BITS;
 
 		if (t1 != t2) {
 			ehci_vdbg (ehci, "port %d, %08x -> %08x\n",
 				port + 1, t1, t2);
 			ehci_writel(ehci, t2, reg);
+			if (hostpc_reg) {
+				u32	t3;
+
+				msleep(5);/* 5ms for HCD enter low pwr mode */
+				t3 = ehci_readl(ehci, hostpc_reg);
+				ehci_writel(ehci, t3 | HOSTPC_PHCD, hostpc_reg);
+				t3 = ehci_readl(ehci, hostpc_reg);
+				ehci_dbg(ehci, "Port%d phy low pwr mode %s\n",
+					port, (t3 & HOSTPC_PHCD) ?
+					"succeeded" : "failed");
+			}
 		}
 	}
 
@@ -563,7 +589,8 @@ static int ehci_hub_control (
 	int		ports = HCS_N_PORTS (ehci->hcs_params);
 	u32 __iomem	*status_reg = &ehci->regs->port_status[
 				(wIndex & 0xff) - 1];
-	u32		temp, status;
+	u32 __iomem	*hostpc_reg = NULL;
+	u32		temp, temp1, status;
 	unsigned long	flags;
 	int		retval = 0;
 	unsigned	selector;
@@ -575,6 +602,9 @@ static int ehci_hub_control (
 	 * power, "this is the one", etc.  EHCI spec supports this.
 	 */
 
+	if (ehci->has_hostpc)
+		hostpc_reg = (u32 __iomem *)((u8 *)ehci->regs
+				+ HOSTPC0 + 4 * ((wIndex & 0xff) - 1));
 	spin_lock_irqsave (&ehci->lock, flags);
 	switch (typeReq) {
 	case ClearHubFeature:
@@ -773,7 +803,11 @@ static int ehci_hub_control (
 		if (temp & PORT_CONNECT) {
 			status |= 1 << USB_PORT_FEAT_CONNECTION;
 			// status may be from integrated TT
-			status |= ehci_port_speed(ehci, temp);
+			if (ehci->has_hostpc) {
+				temp1 = ehci_readl(ehci, hostpc_reg);
+				status |= ehci_port_speed(ehci, temp1);
+			} else
+				status |= ehci_port_speed(ehci, temp);
 		}
 		if (temp & PORT_PE)
 			status |= 1 << USB_PORT_FEAT_ENABLE;
@@ -832,6 +866,24 @@ static int ehci_hub_control (
 					|| (temp & PORT_RESET) != 0)
 				goto error;
 			ehci_writel(ehci, temp | PORT_SUSPEND, status_reg);
+			/* After above check the port must be connected.
+			 * Set appropriate bit thus could put phy into low power
+			 * mode if we have hostpc feature
+			 */
+			if (hostpc_reg) {
+				temp &= ~PORT_WKCONN_E;
+				temp |= (PORT_WKDISC_E | PORT_WKOC_E);
+				ehci_writel(ehci, temp | PORT_SUSPEND,
+							status_reg);
+				msleep(5);/* 5ms for HCD enter low pwr mode */
+				temp1 = ehci_readl(ehci, hostpc_reg);
+				ehci_writel(ehci, temp1 | HOSTPC_PHCD,
+					hostpc_reg);
+				temp1 = ehci_readl(ehci, hostpc_reg);
+				ehci_dbg(ehci, "Port%d phy low pwr mode %s\n",
+					wIndex, (temp1 & HOSTPC_PHCD) ?
+					"succeeded" : "failed");
+			}
 			set_bit(wIndex, &ehci->suspended_ports);
 			break;
 		case USB_PORT_FEAT_POWER:
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index fa12f20fbfe2..ec3dba6b8e48 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -136,6 +136,7 @@ struct ehci_hcd {			/* one per controller */
 	#define OHCI_HCCTRL_OFFSET      0x4
 	#define OHCI_HCCTRL_LEN         0x4
 	__hc32			*ohci_hcctrl_reg;
+	unsigned		has_hostpc:1;
 
 	u8			sbrn;		/* packed release number */
 
@@ -548,7 +549,7 @@ static inline unsigned int
 ehci_port_speed(struct ehci_hcd *ehci, unsigned int portsc)
 {
 	if (ehci_is_TDI(ehci)) {
-		switch ((portsc>>26)&3) {
+		switch ((portsc >> (ehci->has_hostpc ? 25 : 26)) & 3) {
 		case 0:
 			return 0;
 		case 1:
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 5b88e36c9103..4c4b701ff265 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -132,6 +132,19 @@ struct ehci_regs {
 #define USBMODE_CM_HC	(3<<0)		/* host controller mode */
 #define USBMODE_CM_IDLE	(0<<0)		/* idle state */
 
+/* Moorestown has some non-standard registers, partially due to the fact that
+ * its EHCI controller has both TT and LPM support. HOSTPCx are extentions to
+ * PORTSCx
+ */
+#define HOSTPC0		0x84		/* HOSTPC extension */
+#define HOSTPC_PHCD	(1<<22)		/* Phy clock disable */
+#define HOSTPC_PSPD	(3<<25)		/* Port speed detection */
+#define USBMODE_EX	0xc8		/* USB Device mode extension */
+#define USBMODE_EX_VBPS	(1<<5)		/* VBus Power Select On */
+#define USBMODE_EX_HC	(3<<0)		/* host controller mode */
+#define TXFILLTUNING	0x24		/* TX FIFO Tuning register */
+#define TXFIFO_DEFAULT	(8<<16)		/* FIFO burst threshold 8 */
+
 /* Appendix C, Debug port ... intended for use with special "debug devices"
  * that can help if there's no serial console.  (nonstandard enumeration.)
  */
-- 
cgit v1.2.3


From 9da69c604d87afea37b5411867bb76e3c624cc92 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Wed, 15 Jul 2009 23:22:54 -0400
Subject: USB: isp1760: allow platform devices to customize devflags

Platform device support was merged earlier, but support for boards to
customize the devflags aspect of the controller was not.  We want this on
Blackfin systems to control the bus width, but might as well expose all of
the fields while we're at it.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/host/isp1760-hcd.c |  4 ++++
 drivers/usb/host/isp1760-hcd.h |  2 ++
 drivers/usb/host/isp1760-if.c  | 21 ++++++++++++++++++++-
 include/linux/usb/isp1760.h    | 18 ++++++++++++++++++
 4 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/usb/isp1760.h

(limited to 'include')

diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c
index 15438469f21a..9600a58299db 100644
--- a/drivers/usb/host/isp1760-hcd.c
+++ b/drivers/usb/host/isp1760-hcd.c
@@ -386,6 +386,10 @@ static int isp1760_hc_setup(struct usb_hcd *hcd)
 		hwmode |= HW_DACK_POL_HIGH;
 	if (priv->devflags & ISP1760_FLAG_DREQ_POL_HIGH)
 		hwmode |= HW_DREQ_POL_HIGH;
+	if (priv->devflags & ISP1760_FLAG_INTR_POL_HIGH)
+		hwmode |= HW_INTR_HIGH_ACT;
+	if (priv->devflags & ISP1760_FLAG_INTR_EDGE_TRIG)
+		hwmode |= HW_INTR_EDGE_TRIG;
 
 	/*
 	 * We have to set this first in case we're in 16-bit mode.
diff --git a/drivers/usb/host/isp1760-hcd.h b/drivers/usb/host/isp1760-hcd.h
index 462f4943cb1b..6931ef5c9650 100644
--- a/drivers/usb/host/isp1760-hcd.h
+++ b/drivers/usb/host/isp1760-hcd.h
@@ -142,6 +142,8 @@ typedef void (packet_enqueue)(struct usb_hcd *hcd, struct isp1760_qh *qh,
 #define ISP1760_FLAG_DACK_POL_HIGH	0x00000010 /* DACK active high */
 #define ISP1760_FLAG_DREQ_POL_HIGH	0x00000020 /* DREQ active high */
 #define ISP1760_FLAG_ISP1761		0x00000040 /* Chip is ISP1761 */
+#define ISP1760_FLAG_INTR_POL_HIGH	0x00000080 /* Interrupt polarity active high */
+#define ISP1760_FLAG_INTR_EDGE_TRIG	0x00000100 /* Interrupt edge triggered */
 
 /* chip memory management */
 struct memory_chunk {
diff --git a/drivers/usb/host/isp1760-if.c b/drivers/usb/host/isp1760-if.c
index d4feebfc63bd..1c9f977a5c9c 100644
--- a/drivers/usb/host/isp1760-if.c
+++ b/drivers/usb/host/isp1760-if.c
@@ -3,6 +3,7 @@
  * Currently there is support for
  * - OpenFirmware
  * - PCI
+ * - PDEV (generic platform device centralized driver model)
  *
  * (c) 2007 Sebastian Siewior <bigeasy@linutronix.de>
  *
@@ -11,6 +12,7 @@
 #include <linux/usb.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/usb/isp1760.h>
 
 #include "../core/hcd.h"
 #include "isp1760-hcd.h"
@@ -308,6 +310,8 @@ static int __devinit isp1760_plat_probe(struct platform_device *pdev)
 	struct resource *mem_res;
 	struct resource *irq_res;
 	resource_size_t mem_size;
+	struct isp1760_platform_data *priv = pdev->dev.platform_data;
+	unsigned int devflags = 0;
 	unsigned long irqflags = IRQF_SHARED | IRQF_DISABLED;
 
 	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -330,8 +334,23 @@ static int __devinit isp1760_plat_probe(struct platform_device *pdev)
 	}
 	irqflags |= irq_res->flags & IRQF_TRIGGER_MASK;
 
+	if (priv) {
+		if (priv->is_isp1761)
+			devflags |= ISP1760_FLAG_ISP1761;
+		if (priv->bus_width_16)
+			devflags |= ISP1760_FLAG_BUS_WIDTH_16;
+		if (priv->port1_otg)
+			devflags |= ISP1760_FLAG_OTG_EN;
+		if (priv->analog_oc)
+			devflags |= ISP1760_FLAG_ANALOG_OC;
+		if (priv->dack_polarity_high)
+			devflags |= ISP1760_FLAG_DACK_POL_HIGH;
+		if (priv->dreq_polarity_high)
+			devflags |= ISP1760_FLAG_DREQ_POL_HIGH;
+	}
+
 	hcd = isp1760_register(mem_res->start, mem_size, irq_res->start,
-			       irqflags, &pdev->dev, dev_name(&pdev->dev), 0);
+			       irqflags, &pdev->dev, dev_name(&pdev->dev), devflags);
 	if (IS_ERR(hcd)) {
 		pr_warning("isp1760: Failed to register the HCD device\n");
 		ret = -ENODEV;
diff --git a/include/linux/usb/isp1760.h b/include/linux/usb/isp1760.h
new file mode 100644
index 000000000000..de7de53c5531
--- /dev/null
+++ b/include/linux/usb/isp1760.h
@@ -0,0 +1,18 @@
+/*
+ * board initialization should put one of these into dev->platform_data
+ * and place the isp1760 onto platform_bus named "isp1760-hcd".
+ */
+
+#ifndef __LINUX_USB_ISP1760_H
+#define __LINUX_USB_ISP1760_H
+
+struct isp1760_platform_data {
+	unsigned is_isp1761:1;			/* Chip is ISP1761 */
+	unsigned bus_width_16:1;		/* 16/32-bit data bus width */
+	unsigned port1_otg:1;			/* Port 1 supports OTG */
+	unsigned analog_oc:1;			/* Analog overcurrent */
+	unsigned dack_polarity_high:1;		/* DACK active high */
+	unsigned dreq_polarity_high:1;		/* DREQ active high */
+};
+
+#endif /* __LINUX_USB_ISP1760_H */
-- 
cgit v1.2.3


From a9d43091c5be1e7a60d5abe84be4f3050236b26a Mon Sep 17 00:00:00 2001
From: Lothar Wassmann <LW@KARO-electronics.de>
Date: Thu, 16 Jul 2009 20:51:21 -0400
Subject: USB: NXP ISP1362 USB host driver

Signed-off-by: Lothar Wassmann <LW@KARO-electronics.de>
Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/Makefile           |    1 +
 drivers/usb/host/Kconfig       |   12 +
 drivers/usb/host/Makefile      |    1 +
 drivers/usb/host/isp1362-hcd.c | 2905 ++++++++++++++++++++++++++++++++++++++++
 drivers/usb/host/isp1362.h     | 1079 +++++++++++++++
 include/linux/usb/isp1362.h    |   46 +
 6 files changed, 4044 insertions(+)
 create mode 100644 drivers/usb/host/isp1362-hcd.c
 create mode 100644 drivers/usb/host/isp1362.h
 create mode 100644 include/linux/usb/isp1362.h

(limited to 'include')

diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index 19cb7d5480d7..dbe4fb694ad2 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_USB_UHCI_HCD)	+= host/
 obj-$(CONFIG_USB_FHCI_HCD)	+= host/
 obj-$(CONFIG_USB_XHCI_HCD)	+= host/
 obj-$(CONFIG_USB_SL811_HCD)	+= host/
+obj-$(CONFIG_USB_ISP1362_HCD)	+= host/
 obj-$(CONFIG_USB_U132_HCD)	+= host/
 obj-$(CONFIG_USB_R8A66597_HCD)	+= host/
 obj-$(CONFIG_USB_HWA_HCD)	+= host/
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 41b8d7de2e07..9b43b226817f 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -159,6 +159,18 @@ config USB_ISP1760_HCD
 	  To compile this driver as a module, choose M here: the
 	  module will be called isp1760.
 
+config USB_ISP1362_HCD
+	tristate "ISP1362 HCD support"
+	depends on USB
+	default N
+	---help---
+	  Supports the Philips ISP1362 chip as a host controller
+
+	  This driver does not support isochronous transfers.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called isp1362-hcd.
+
 config USB_OHCI_HCD
 	tristate "OHCI HCD support"
 	depends on USB && USB_ARCH_HAS_OHCI
diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile
index 289d748bb414..f58b2494c44a 100644
--- a/drivers/usb/host/Makefile
+++ b/drivers/usb/host/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_PCI)		+= pci-quirks.o
 obj-$(CONFIG_USB_EHCI_HCD)	+= ehci-hcd.o
 obj-$(CONFIG_USB_OXU210HP_HCD)	+= oxu210hp-hcd.o
 obj-$(CONFIG_USB_ISP116X_HCD)	+= isp116x-hcd.o
+obj-$(CONFIG_USB_ISP1362_HCD)	+= isp1362-hcd.o
 obj-$(CONFIG_USB_OHCI_HCD)	+= ohci-hcd.o
 obj-$(CONFIG_USB_UHCI_HCD)	+= uhci-hcd.o
 obj-$(CONFIG_USB_FHCI_HCD)	+= fhci.o
diff --git a/drivers/usb/host/isp1362-hcd.c b/drivers/usb/host/isp1362-hcd.c
new file mode 100644
index 000000000000..5819e10a146c
--- /dev/null
+++ b/drivers/usb/host/isp1362-hcd.c
@@ -0,0 +1,2905 @@
+/*
+ * ISP1362 HCD (Host Controller Driver) for USB.
+ *
+ * Copyright (C) 2005 Lothar Wassmann <LW@KARO-electronics.de>
+ *
+ * Derived from the SL811 HCD, rewritten for ISP116x.
+ * Copyright (C) 2005 Olav Kongas <ok@artecdesign.ee>
+ *
+ * Portions:
+ * Copyright (C) 2004 Psion Teklogix (for NetBook PRO)
+ * Copyright (C) 2004 David Brownell
+ */
+
+/*
+ * The ISP1362 chip requires a large delay (300ns and 462ns) between
+ * accesses to the address and data register.
+ * The following timing options exist:
+ *
+ * 1. Configure your memory controller to add such delays if it can (the best)
+ * 2. Implement platform-specific delay function possibly
+ *    combined with configuring the memory controller; see
+ *    include/linux/usb_isp1362.h for more info.
+ * 3. Use ndelay (easiest, poorest).
+ *
+ * Use the corresponding macros USE_PLATFORM_DELAY and USE_NDELAY in the
+ * platform specific section of isp1362.h to select the appropriate variant.
+ *
+ * Also note that according to the Philips "ISP1362 Errata" document
+ * Rev 1.00 from 27 May data corruption may occur when the #WR signal
+ * is reasserted (even with #CS deasserted) within 132ns after a
+ * write cycle to any controller register. If the hardware doesn't
+ * implement the recommended fix (gating the #WR with #CS) software
+ * must ensure that no further write cycle (not necessarily to the chip!)
+ * is issued by the CPU within this interval.
+
+ * For PXA25x this can be ensured by using VLIO with the maximum
+ * recovery time (MSCx = 0x7f8c) with a memory clock of 99.53 MHz.
+ */
+
+#ifdef CONFIG_USB_DEBUG
+# define ISP1362_DEBUG
+#else
+# undef ISP1362_DEBUG
+#endif
+
+/*
+ * The PXA255 UDC apparently doesn't handle GET_STATUS, GET_CONFIG and
+ * GET_INTERFACE requests correctly when the SETUP and DATA stages of the
+ * requests are carried out in separate frames. This will delay any SETUP
+ * packets until the start of the next frame so that this situation is
+ * unlikely to occur (and makes usbtest happy running with a PXA255 target
+ * device).
+ */
+#undef BUGGY_PXA2XX_UDC_USBTEST
+
+#undef PTD_TRACE
+#undef URB_TRACE
+#undef VERBOSE
+#undef REGISTERS
+
+/* This enables a memory test on the ISP1362 chip memory to make sure the
+ * chip access timing is correct.
+ */
+#undef CHIP_BUFFER_TEST
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/usb.h>
+#include <linux/usb/isp1362.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/bitops.h>
+
+#include <asm/irq.h>
+#include <asm/system.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+static int dbg_level;
+#ifdef ISP1362_DEBUG
+module_param(dbg_level, int, 0644);
+#else
+module_param(dbg_level, int, 0);
+#define	STUB_DEBUG_FILE
+#endif
+
+#include "../core/hcd.h"
+#include "../core/usb.h"
+#include "isp1362.h"
+
+
+#define DRIVER_VERSION	"2005-04-04"
+#define DRIVER_DESC	"ISP1362 USB Host Controller Driver"
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
+static const char hcd_name[] = "isp1362-hcd";
+
+static void isp1362_hc_stop(struct usb_hcd *hcd);
+static int isp1362_hc_start(struct usb_hcd *hcd);
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * When called from the interrupthandler only isp1362_hcd->irqenb is modified,
+ * since the interrupt handler will write isp1362_hcd->irqenb to HCuPINT upon
+ * completion.
+ * We don't need a 'disable' counterpart, since interrupts will be disabled
+ * only by the interrupt handler.
+ */
+static inline void isp1362_enable_int(struct isp1362_hcd *isp1362_hcd, u16 mask)
+{
+	if ((isp1362_hcd->irqenb | mask) == isp1362_hcd->irqenb)
+		return;
+	if (mask & ~isp1362_hcd->irqenb)
+		isp1362_write_reg16(isp1362_hcd, HCuPINT, mask & ~isp1362_hcd->irqenb);
+	isp1362_hcd->irqenb |= mask;
+	if (isp1362_hcd->irq_active)
+		return;
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, isp1362_hcd->irqenb);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static inline struct isp1362_ep_queue *get_ptd_queue(struct isp1362_hcd *isp1362_hcd,
+						     u16 offset)
+{
+	struct isp1362_ep_queue *epq = NULL;
+
+	if (offset < isp1362_hcd->istl_queue[1].buf_start)
+		epq = &isp1362_hcd->istl_queue[0];
+	else if (offset < isp1362_hcd->intl_queue.buf_start)
+		epq = &isp1362_hcd->istl_queue[1];
+	else if (offset < isp1362_hcd->atl_queue.buf_start)
+		epq = &isp1362_hcd->intl_queue;
+	else if (offset < isp1362_hcd->atl_queue.buf_start +
+		   isp1362_hcd->atl_queue.buf_size)
+		epq = &isp1362_hcd->atl_queue;
+
+	if (epq)
+		DBG(1, "%s: PTD $%04x is on %s queue\n", __func__, offset, epq->name);
+	else
+		pr_warning("%s: invalid PTD $%04x\n", __func__, offset);
+
+	return epq;
+}
+
+static inline int get_ptd_offset(struct isp1362_ep_queue *epq, u8 index)
+{
+	int offset;
+
+	if (index * epq->blk_size > epq->buf_size) {
+		pr_warning("%s: Bad %s index %d(%d)\n", __func__, epq->name, index,
+		     epq->buf_size / epq->blk_size);
+		return -EINVAL;
+	}
+	offset = epq->buf_start + index * epq->blk_size;
+	DBG(3, "%s: %s PTD[%02x] # %04x\n", __func__, epq->name, index, offset);
+
+	return offset;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static inline u16 max_transfer_size(struct isp1362_ep_queue *epq, size_t size,
+				    int mps)
+{
+	u16 xfer_size = min_t(size_t, MAX_XFER_SIZE, size);
+
+	xfer_size = min_t(size_t, xfer_size, epq->buf_avail * epq->blk_size - PTD_HEADER_SIZE);
+	if (xfer_size < size && xfer_size % mps)
+		xfer_size -= xfer_size % mps;
+
+	return xfer_size;
+}
+
+static int claim_ptd_buffers(struct isp1362_ep_queue *epq,
+			     struct isp1362_ep *ep, u16 len)
+{
+	int ptd_offset = -EINVAL;
+	int index;
+	int num_ptds = ((len + PTD_HEADER_SIZE - 1) / epq->blk_size) + 1;
+	int found = -1;
+	int last = -1;
+
+	BUG_ON(len > epq->buf_size);
+
+	if (!epq->buf_avail)
+		return -ENOMEM;
+
+	if (ep->num_ptds)
+		pr_err("%s: %s len %d/%d num_ptds %d buf_map %08lx skip_map %08lx\n", __func__,
+		    epq->name, len, epq->blk_size, num_ptds, epq->buf_map, epq->skip_map);
+	BUG_ON(ep->num_ptds != 0);
+
+	for (index = 0; index <= epq->buf_count - num_ptds; index++) {
+		if (test_bit(index, &epq->buf_map))
+			continue;
+		found = index;
+		for (last = index + 1; last < index + num_ptds; last++) {
+			if (test_bit(last, &epq->buf_map)) {
+				found = -1;
+				break;
+			}
+		}
+		if (found >= 0)
+			break;
+	}
+	if (found < 0)
+		return -EOVERFLOW;
+
+	DBG(1, "%s: Found %d PTDs[%d] for %d/%d byte\n", __func__,
+	    num_ptds, found, len, (int)(epq->blk_size - PTD_HEADER_SIZE));
+	ptd_offset = get_ptd_offset(epq, found);
+	WARN_ON(ptd_offset < 0);
+	ep->ptd_offset = ptd_offset;
+	ep->num_ptds += num_ptds;
+	epq->buf_avail -= num_ptds;
+	BUG_ON(epq->buf_avail > epq->buf_count);
+	ep->ptd_index = found;
+	for (index = found; index < last; index++)
+		__set_bit(index, &epq->buf_map);
+	DBG(1, "%s: Done %s PTD[%d] $%04x, avail %d count %d claimed %d %08lx:%08lx\n",
+	    __func__, epq->name, ep->ptd_index, ep->ptd_offset,
+	    epq->buf_avail, epq->buf_count, num_ptds, epq->buf_map, epq->skip_map);
+
+	return found;
+}
+
+static inline void release_ptd_buffers(struct isp1362_ep_queue *epq, struct isp1362_ep *ep)
+{
+	int index = ep->ptd_index;
+	int last = ep->ptd_index + ep->num_ptds;
+
+	if (last > epq->buf_count)
+		pr_err("%s: ep %p req %d len %d %s PTD[%d] $%04x num_ptds %d buf_count %d buf_avail %d buf_map %08lx skip_map %08lx\n",
+		    __func__, ep, ep->num_req, ep->length, epq->name, ep->ptd_index,
+		    ep->ptd_offset, ep->num_ptds, epq->buf_count, epq->buf_avail,
+		    epq->buf_map, epq->skip_map);
+	BUG_ON(last > epq->buf_count);
+
+	for (; index < last; index++) {
+		__clear_bit(index, &epq->buf_map);
+		__set_bit(index, &epq->skip_map);
+	}
+	epq->buf_avail += ep->num_ptds;
+	epq->ptd_count--;
+
+	BUG_ON(epq->buf_avail > epq->buf_count);
+	BUG_ON(epq->ptd_count > epq->buf_count);
+
+	DBG(1, "%s: Done %s PTDs $%04x released %d avail %d count %d\n",
+	    __func__, epq->name,
+	    ep->ptd_offset, ep->num_ptds, epq->buf_avail, epq->buf_count);
+	DBG(1, "%s: buf_map %08lx skip_map %08lx\n", __func__,
+	    epq->buf_map, epq->skip_map);
+
+	ep->num_ptds = 0;
+	ep->ptd_offset = -EINVAL;
+	ep->ptd_index = -EINVAL;
+}
+
+/*-------------------------------------------------------------------------*/
+
+/*
+  Set up PTD's.
+*/
+static void prepare_ptd(struct isp1362_hcd *isp1362_hcd, struct urb *urb,
+			struct isp1362_ep *ep, struct isp1362_ep_queue *epq,
+			u16 fno)
+{
+	struct ptd *ptd;
+	int toggle;
+	int dir;
+	u16 len;
+	size_t buf_len = urb->transfer_buffer_length - urb->actual_length;
+
+	DBG(3, "%s: %s ep %p\n", __func__, epq->name, ep);
+
+	ptd = &ep->ptd;
+
+	ep->data = (unsigned char *)urb->transfer_buffer + urb->actual_length;
+
+	switch (ep->nextpid) {
+	case USB_PID_IN:
+		toggle = usb_gettoggle(urb->dev, ep->epnum, 0);
+		dir = PTD_DIR_IN;
+		if (usb_pipecontrol(urb->pipe)) {
+			len = min_t(size_t, ep->maxpacket, buf_len);
+		} else if (usb_pipeisoc(urb->pipe)) {
+			len = min_t(size_t, urb->iso_frame_desc[fno].length, MAX_XFER_SIZE);
+			ep->data = urb->transfer_buffer + urb->iso_frame_desc[fno].offset;
+		} else
+			len = max_transfer_size(epq, buf_len, ep->maxpacket);
+		DBG(1, "%s: IN    len %d/%d/%d from URB\n", __func__, len, ep->maxpacket,
+		    (int)buf_len);
+		break;
+	case USB_PID_OUT:
+		toggle = usb_gettoggle(urb->dev, ep->epnum, 1);
+		dir = PTD_DIR_OUT;
+		if (usb_pipecontrol(urb->pipe))
+			len = min_t(size_t, ep->maxpacket, buf_len);
+		else if (usb_pipeisoc(urb->pipe))
+			len = min_t(size_t, urb->iso_frame_desc[0].length, MAX_XFER_SIZE);
+		else
+			len = max_transfer_size(epq, buf_len, ep->maxpacket);
+		if (len == 0)
+			pr_info("%s: Sending ZERO packet: %d\n", __func__,
+			     urb->transfer_flags & URB_ZERO_PACKET);
+		DBG(1, "%s: OUT   len %d/%d/%d from URB\n", __func__, len, ep->maxpacket,
+		    (int)buf_len);
+		break;
+	case USB_PID_SETUP:
+		toggle = 0;
+		dir = PTD_DIR_SETUP;
+		len = sizeof(struct usb_ctrlrequest);
+		DBG(1, "%s: SETUP len %d\n", __func__, len);
+		ep->data = urb->setup_packet;
+		break;
+	case USB_PID_ACK:
+		toggle = 1;
+		len = 0;
+		dir = (urb->transfer_buffer_length && usb_pipein(urb->pipe)) ?
+			PTD_DIR_OUT : PTD_DIR_IN;
+		DBG(1, "%s: ACK   len %d\n", __func__, len);
+		break;
+	default:
+		toggle = dir = len = 0;
+		pr_err("%s@%d: ep->nextpid %02x\n", __func__, __LINE__, ep->nextpid);
+		BUG_ON(1);
+	}
+
+	ep->length = len;
+	if (!len)
+		ep->data = NULL;
+
+	ptd->count = PTD_CC_MSK | PTD_ACTIVE_MSK | PTD_TOGGLE(toggle);
+	ptd->mps = PTD_MPS(ep->maxpacket) | PTD_SPD(urb->dev->speed == USB_SPEED_LOW) |
+		PTD_EP(ep->epnum);
+	ptd->len = PTD_LEN(len) | PTD_DIR(dir);
+	ptd->faddr = PTD_FA(usb_pipedevice(urb->pipe));
+
+	if (usb_pipeint(urb->pipe)) {
+		ptd->faddr |= PTD_SF_INT(ep->branch);
+		ptd->faddr |= PTD_PR(ep->interval ? __ffs(ep->interval) : 0);
+	}
+	if (usb_pipeisoc(urb->pipe))
+		ptd->faddr |= PTD_SF_ISO(fno);
+
+	DBG(1, "%s: Finished\n", __func__);
+}
+
+static void isp1362_write_ptd(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep,
+			      struct isp1362_ep_queue *epq)
+{
+	struct ptd *ptd = &ep->ptd;
+	int len = PTD_GET_DIR(ptd) == PTD_DIR_IN ? 0 : ep->length;
+
+	_BUG_ON(ep->ptd_offset < 0);
+
+	prefetch(ptd);
+	isp1362_write_buffer(isp1362_hcd, ptd, ep->ptd_offset, PTD_HEADER_SIZE);
+	if (len)
+		isp1362_write_buffer(isp1362_hcd, ep->data,
+				     ep->ptd_offset + PTD_HEADER_SIZE, len);
+
+	dump_ptd(ptd);
+	dump_ptd_out_data(ptd, ep->data);
+}
+
+static void isp1362_read_ptd(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep,
+			     struct isp1362_ep_queue *epq)
+{
+	struct ptd *ptd = &ep->ptd;
+	int act_len;
+
+	WARN_ON(list_empty(&ep->active));
+	BUG_ON(ep->ptd_offset < 0);
+
+	list_del_init(&ep->active);
+	DBG(1, "%s: ep %p removed from active list %p\n", __func__, ep, &epq->active);
+
+	prefetchw(ptd);
+	isp1362_read_buffer(isp1362_hcd, ptd, ep->ptd_offset, PTD_HEADER_SIZE);
+	dump_ptd(ptd);
+	act_len = PTD_GET_COUNT(ptd);
+	if (PTD_GET_DIR(ptd) != PTD_DIR_IN || act_len == 0)
+		return;
+	if (act_len > ep->length)
+		pr_err("%s: ep %p PTD $%04x act_len %d ep->length %d\n", __func__, ep,
+			 ep->ptd_offset, act_len, ep->length);
+	BUG_ON(act_len > ep->length);
+	/* Only transfer the amount of data that has actually been overwritten
+	 * in the chip buffer. We don't want any data that doesn't belong to the
+	 * transfer to leak out of the chip to the callers transfer buffer!
+	 */
+	prefetchw(ep->data);
+	isp1362_read_buffer(isp1362_hcd, ep->data,
+			    ep->ptd_offset + PTD_HEADER_SIZE, act_len);
+	dump_ptd_in_data(ptd, ep->data);
+}
+
+/*
+ * INT PTDs will stay in the chip until data is available.
+ * This function will remove a PTD from the chip when the URB is dequeued.
+ * Must be called with the spinlock held and IRQs disabled
+ */
+static void remove_ptd(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep)
+
+{
+	int index;
+	struct isp1362_ep_queue *epq;
+
+	DBG(1, "%s: ep %p PTD[%d] $%04x\n", __func__, ep, ep->ptd_index, ep->ptd_offset);
+	BUG_ON(ep->ptd_offset < 0);
+
+	epq = get_ptd_queue(isp1362_hcd, ep->ptd_offset);
+	BUG_ON(!epq);
+
+	/* put ep in remove_list for cleanup */
+	WARN_ON(!list_empty(&ep->remove_list));
+	list_add_tail(&ep->remove_list, &isp1362_hcd->remove_list);
+	/* let SOF interrupt handle the cleanup */
+	isp1362_enable_int(isp1362_hcd, HCuPINT_SOF);
+
+	index = ep->ptd_index;
+	if (index < 0)
+		/* ISO queues don't have SKIP registers */
+		return;
+
+	DBG(1, "%s: Disabling PTD[%02x] $%04x %08lx|%08x\n", __func__,
+	    index, ep->ptd_offset, epq->skip_map, 1 << index);
+
+	/* prevent further processing of PTD (will be effective after next SOF) */
+	epq->skip_map |= 1 << index;
+	if (epq == &isp1362_hcd->atl_queue) {
+		DBG(2, "%s: ATLSKIP = %08x -> %08lx\n", __func__,
+		    isp1362_read_reg32(isp1362_hcd, HCATLSKIP), epq->skip_map);
+		isp1362_write_reg32(isp1362_hcd, HCATLSKIP, epq->skip_map);
+		if (~epq->skip_map == 0)
+			isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ATL_ACTIVE);
+	} else if (epq == &isp1362_hcd->intl_queue) {
+		DBG(2, "%s: INTLSKIP = %08x -> %08lx\n", __func__,
+		    isp1362_read_reg32(isp1362_hcd, HCINTLSKIP), epq->skip_map);
+		isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, epq->skip_map);
+		if (~epq->skip_map == 0)
+			isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_INTL_ACTIVE);
+	}
+}
+
+/*
+  Take done or failed requests out of schedule. Give back
+  processed urbs.
+*/
+static void finish_request(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep,
+			   struct urb *urb, int status)
+     __releases(isp1362_hcd->lock)
+     __acquires(isp1362_hcd->lock)
+{
+	urb->hcpriv = NULL;
+	ep->error_count = 0;
+
+	if (usb_pipecontrol(urb->pipe))
+		ep->nextpid = USB_PID_SETUP;
+
+	URB_DBG("%s: req %d FA %d ep%d%s %s: len %d/%d %s stat %d\n", __func__,
+		ep->num_req, usb_pipedevice(urb->pipe),
+		usb_pipeendpoint(urb->pipe),
+		!usb_pipein(urb->pipe) ? "out" : "in",
+		usb_pipecontrol(urb->pipe) ? "ctrl" :
+			usb_pipeint(urb->pipe) ? "int" :
+			usb_pipebulk(urb->pipe) ? "bulk" :
+			"iso",
+		urb->actual_length, urb->transfer_buffer_length,
+		!(urb->transfer_flags & URB_SHORT_NOT_OK) ?
+		"short_ok" : "", urb->status);
+
+
+	usb_hcd_unlink_urb_from_ep(isp1362_hcd_to_hcd(isp1362_hcd), urb);
+	spin_unlock(&isp1362_hcd->lock);
+	usb_hcd_giveback_urb(isp1362_hcd_to_hcd(isp1362_hcd), urb, status);
+	spin_lock(&isp1362_hcd->lock);
+
+	/* take idle endpoints out of the schedule right away */
+	if (!list_empty(&ep->hep->urb_list))
+		return;
+
+	/* async deschedule */
+	if (!list_empty(&ep->schedule)) {
+		list_del_init(&ep->schedule);
+		return;
+	}
+
+
+	if (ep->interval) {
+		/* periodic deschedule */
+		DBG(1, "deschedule qh%d/%p branch %d load %d bandwidth %d -> %d\n", ep->interval,
+		    ep, ep->branch, ep->load,
+		    isp1362_hcd->load[ep->branch],
+		    isp1362_hcd->load[ep->branch] - ep->load);
+		isp1362_hcd->load[ep->branch] -= ep->load;
+		ep->branch = PERIODIC_SIZE;
+	}
+}
+
+/*
+ * Analyze transfer results, handle partial transfers and errors
+*/
+static void postproc_ep(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep)
+{
+	struct urb *urb = get_urb(ep);
+	struct usb_device *udev;
+	struct ptd *ptd;
+	int short_ok;
+	u16 len;
+	int urbstat = -EINPROGRESS;
+	u8 cc;
+
+	DBG(2, "%s: ep %p req %d\n", __func__, ep, ep->num_req);
+
+	udev = urb->dev;
+	ptd = &ep->ptd;
+	cc = PTD_GET_CC(ptd);
+	if (cc == PTD_NOTACCESSED) {
+		pr_err("%s: req %d PTD %p Untouched by ISP1362\n", __func__,
+		    ep->num_req, ptd);
+		cc = PTD_DEVNOTRESP;
+	}
+
+	short_ok = !(urb->transfer_flags & URB_SHORT_NOT_OK);
+	len = urb->transfer_buffer_length - urb->actual_length;
+
+	/* Data underrun is special. For allowed underrun
+	   we clear the error and continue as normal. For
+	   forbidden underrun we finish the DATA stage
+	   immediately while for control transfer,
+	   we do a STATUS stage.
+	*/
+	if (cc == PTD_DATAUNDERRUN) {
+		if (short_ok) {
+			DBG(1, "%s: req %d Allowed data underrun short_%sok %d/%d/%d byte\n",
+			    __func__, ep->num_req, short_ok ? "" : "not_",
+			    PTD_GET_COUNT(ptd), ep->maxpacket, len);
+			cc = PTD_CC_NOERROR;
+			urbstat = 0;
+		} else {
+			DBG(1, "%s: req %d Data Underrun %s nextpid %02x short_%sok %d/%d/%d byte\n",
+			    __func__, ep->num_req,
+			    usb_pipein(urb->pipe) ? "IN" : "OUT", ep->nextpid,
+			    short_ok ? "" : "not_",
+			    PTD_GET_COUNT(ptd), ep->maxpacket, len);
+			if (usb_pipecontrol(urb->pipe)) {
+				ep->nextpid = USB_PID_ACK;
+				/* save the data underrun error code for later and
+				 * procede with the status stage
+				 */
+				urb->actual_length += PTD_GET_COUNT(ptd);
+				BUG_ON(urb->actual_length > urb->transfer_buffer_length);
+
+				if (urb->status == -EINPROGRESS)
+					urb->status = cc_to_error[PTD_DATAUNDERRUN];
+			} else {
+				usb_settoggle(udev, ep->epnum, ep->nextpid == USB_PID_OUT,
+					      PTD_GET_TOGGLE(ptd));
+				urbstat = cc_to_error[PTD_DATAUNDERRUN];
+			}
+			goto out;
+		}
+	}
+
+	if (cc != PTD_CC_NOERROR) {
+		if (++ep->error_count >= 3 || cc == PTD_CC_STALL || cc == PTD_DATAOVERRUN) {
+			urbstat = cc_to_error[cc];
+			DBG(1, "%s: req %d nextpid %02x, status %d, error %d, error_count %d\n",
+			    __func__, ep->num_req, ep->nextpid, urbstat, cc,
+			    ep->error_count);
+		}
+		goto out;
+	}
+
+	switch (ep->nextpid) {
+	case USB_PID_OUT:
+		if (PTD_GET_COUNT(ptd) != ep->length)
+			pr_err("%s: count=%d len=%d\n", __func__,
+			   PTD_GET_COUNT(ptd), ep->length);
+		BUG_ON(PTD_GET_COUNT(ptd) != ep->length);
+		urb->actual_length += ep->length;
+		BUG_ON(urb->actual_length > urb->transfer_buffer_length);
+		usb_settoggle(udev, ep->epnum, 1, PTD_GET_TOGGLE(ptd));
+		if (urb->actual_length == urb->transfer_buffer_length) {
+			DBG(3, "%s: req %d xfer complete %d/%d status %d -> 0\n", __func__,
+			    ep->num_req, len, ep->maxpacket, urbstat);
+			if (usb_pipecontrol(urb->pipe)) {
+				DBG(3, "%s: req %d %s Wait for ACK\n", __func__,
+				    ep->num_req,
+				    usb_pipein(urb->pipe) ? "IN" : "OUT");
+				ep->nextpid = USB_PID_ACK;
+			} else {
+				if (len % ep->maxpacket ||
+				    !(urb->transfer_flags & URB_ZERO_PACKET)) {
+					urbstat = 0;
+					DBG(3, "%s: req %d URB %s status %d count %d/%d/%d\n",
+					    __func__, ep->num_req, usb_pipein(urb->pipe) ? "IN" : "OUT",
+					    urbstat, len, ep->maxpacket, urb->actual_length);
+				}
+			}
+		}
+		break;
+	case USB_PID_IN:
+		len = PTD_GET_COUNT(ptd);
+		BUG_ON(len > ep->length);
+		urb->actual_length += len;
+		BUG_ON(urb->actual_length > urb->transfer_buffer_length);
+		usb_settoggle(udev, ep->epnum, 0, PTD_GET_TOGGLE(ptd));
+		/* if transfer completed or (allowed) data underrun */
+		if ((urb->transfer_buffer_length == urb->actual_length) ||
+		    len % ep->maxpacket) {
+			DBG(3, "%s: req %d xfer complete %d/%d status %d -> 0\n", __func__,
+			    ep->num_req, len, ep->maxpacket, urbstat);
+			if (usb_pipecontrol(urb->pipe)) {
+				DBG(3, "%s: req %d %s Wait for ACK\n", __func__,
+				    ep->num_req,
+				    usb_pipein(urb->pipe) ? "IN" : "OUT");
+				ep->nextpid = USB_PID_ACK;
+			} else {
+				urbstat = 0;
+				DBG(3, "%s: req %d URB %s status %d count %d/%d/%d\n",
+				    __func__, ep->num_req, usb_pipein(urb->pipe) ? "IN" : "OUT",
+				    urbstat, len, ep->maxpacket, urb->actual_length);
+			}
+		}
+		break;
+	case USB_PID_SETUP:
+		if (urb->transfer_buffer_length == urb->actual_length) {
+			ep->nextpid = USB_PID_ACK;
+		} else if (usb_pipeout(urb->pipe)) {
+			usb_settoggle(udev, 0, 1, 1);
+			ep->nextpid = USB_PID_OUT;
+		} else {
+			usb_settoggle(udev, 0, 0, 1);
+			ep->nextpid = USB_PID_IN;
+		}
+		break;
+	case USB_PID_ACK:
+		DBG(3, "%s: req %d got ACK %d -> 0\n", __func__, ep->num_req,
+		    urbstat);
+		WARN_ON(urbstat != -EINPROGRESS);
+		urbstat = 0;
+		ep->nextpid = 0;
+		break;
+	default:
+		BUG_ON(1);
+	}
+
+ out:
+	if (urbstat != -EINPROGRESS) {
+		DBG(2, "%s: Finishing ep %p req %d urb %p status %d\n", __func__,
+		    ep, ep->num_req, urb, urbstat);
+		finish_request(isp1362_hcd, ep, urb, urbstat);
+	}
+}
+
+static void finish_unlinks(struct isp1362_hcd *isp1362_hcd)
+{
+	struct isp1362_ep *ep;
+	struct isp1362_ep *tmp;
+
+	list_for_each_entry_safe(ep, tmp, &isp1362_hcd->remove_list, remove_list) {
+		struct isp1362_ep_queue *epq =
+			get_ptd_queue(isp1362_hcd, ep->ptd_offset);
+		int index = ep->ptd_index;
+
+		BUG_ON(epq == NULL);
+		if (index >= 0) {
+			DBG(1, "%s: remove PTD[%d] $%04x\n", __func__, index, ep->ptd_offset);
+			BUG_ON(ep->num_ptds == 0);
+			release_ptd_buffers(epq, ep);
+		}
+		if (!list_empty(&ep->hep->urb_list)) {
+			struct urb *urb = get_urb(ep);
+
+			DBG(1, "%s: Finishing req %d ep %p from remove_list\n", __func__,
+			    ep->num_req, ep);
+			finish_request(isp1362_hcd, ep, urb, -ESHUTDOWN);
+		}
+		WARN_ON(list_empty(&ep->active));
+		if (!list_empty(&ep->active)) {
+			list_del_init(&ep->active);
+			DBG(1, "%s: ep %p removed from active list\n", __func__, ep);
+		}
+		list_del_init(&ep->remove_list);
+		DBG(1, "%s: ep %p removed from remove_list\n", __func__, ep);
+	}
+	DBG(1, "%s: Done\n", __func__);
+}
+
+static inline void enable_atl_transfers(struct isp1362_hcd *isp1362_hcd, int count)
+{
+	if (count > 0) {
+		if (count < isp1362_hcd->atl_queue.ptd_count)
+			isp1362_write_reg16(isp1362_hcd, HCATLDTC, count);
+		isp1362_enable_int(isp1362_hcd, HCuPINT_ATL);
+		isp1362_write_reg32(isp1362_hcd, HCATLSKIP, isp1362_hcd->atl_queue.skip_map);
+		isp1362_set_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ATL_ACTIVE);
+	} else
+		isp1362_enable_int(isp1362_hcd, HCuPINT_SOF);
+}
+
+static inline void enable_intl_transfers(struct isp1362_hcd *isp1362_hcd)
+{
+	isp1362_enable_int(isp1362_hcd, HCuPINT_INTL);
+	isp1362_set_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_INTL_ACTIVE);
+	isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, isp1362_hcd->intl_queue.skip_map);
+}
+
+static inline void enable_istl_transfers(struct isp1362_hcd *isp1362_hcd, int flip)
+{
+	isp1362_enable_int(isp1362_hcd, flip ? HCuPINT_ISTL1 : HCuPINT_ISTL0);
+	isp1362_set_mask16(isp1362_hcd, HCBUFSTAT, flip ?
+			   HCBUFSTAT_ISTL1_FULL : HCBUFSTAT_ISTL0_FULL);
+}
+
+static int submit_req(struct isp1362_hcd *isp1362_hcd, struct urb *urb,
+		      struct isp1362_ep *ep, struct isp1362_ep_queue *epq)
+{
+	int index = epq->free_ptd;
+
+	prepare_ptd(isp1362_hcd, urb, ep, epq, 0);
+	index = claim_ptd_buffers(epq, ep, ep->length);
+	if (index == -ENOMEM) {
+		DBG(1, "%s: req %d No free %s PTD available: %d, %08lx:%08lx\n", __func__,
+		    ep->num_req, epq->name, ep->num_ptds, epq->buf_map, epq->skip_map);
+		return index;
+	} else if (index == -EOVERFLOW) {
+		DBG(1, "%s: req %d Not enough space for %d byte %s PTD %d %08lx:%08lx\n",
+		    __func__, ep->num_req, ep->length, epq->name, ep->num_ptds,
+		    epq->buf_map, epq->skip_map);
+		return index;
+	} else
+		BUG_ON(index < 0);
+	list_add_tail(&ep->active, &epq->active);
+	DBG(1, "%s: ep %p req %d len %d added to active list %p\n", __func__,
+	    ep, ep->num_req, ep->length, &epq->active);
+	DBG(1, "%s: Submitting %s PTD $%04x for ep %p req %d\n", __func__, epq->name,
+	    ep->ptd_offset, ep, ep->num_req);
+	isp1362_write_ptd(isp1362_hcd, ep, epq);
+	__clear_bit(ep->ptd_index, &epq->skip_map);
+
+	return 0;
+}
+
+static void start_atl_transfers(struct isp1362_hcd *isp1362_hcd)
+{
+	int ptd_count = 0;
+	struct isp1362_ep_queue *epq = &isp1362_hcd->atl_queue;
+	struct isp1362_ep *ep;
+	int defer = 0;
+
+	if (atomic_read(&epq->finishing)) {
+		DBG(1, "%s: finish_transfers is active for %s\n", __func__, epq->name);
+		return;
+	}
+
+	list_for_each_entry(ep, &isp1362_hcd->async, schedule) {
+		struct urb *urb = get_urb(ep);
+		int ret;
+
+		if (!list_empty(&ep->active)) {
+			DBG(2, "%s: Skipping active %s ep %p\n", __func__, epq->name, ep);
+			continue;
+		}
+
+		DBG(1, "%s: Processing %s ep %p req %d\n", __func__, epq->name,
+		    ep, ep->num_req);
+
+		ret = submit_req(isp1362_hcd, urb, ep, epq);
+		if (ret == -ENOMEM) {
+			defer = 1;
+			break;
+		} else if (ret == -EOVERFLOW) {
+			defer = 1;
+			continue;
+		}
+#ifdef BUGGY_PXA2XX_UDC_USBTEST
+		defer = ep->nextpid == USB_PID_SETUP;
+#endif
+		ptd_count++;
+	}
+
+	/* Avoid starving of endpoints */
+	if (isp1362_hcd->async.next != isp1362_hcd->async.prev) {
+		DBG(2, "%s: Cycling ASYNC schedule %d\n", __func__, ptd_count);
+		list_move(&isp1362_hcd->async, isp1362_hcd->async.next);
+	}
+	if (ptd_count || defer)
+		enable_atl_transfers(isp1362_hcd, defer ? 0 : ptd_count);
+
+	epq->ptd_count += ptd_count;
+	if (epq->ptd_count > epq->stat_maxptds) {
+		epq->stat_maxptds = epq->ptd_count;
+		DBG(0, "%s: max_ptds: %d\n", __func__, epq->stat_maxptds);
+	}
+}
+
+static void start_intl_transfers(struct isp1362_hcd *isp1362_hcd)
+{
+	int ptd_count = 0;
+	struct isp1362_ep_queue *epq = &isp1362_hcd->intl_queue;
+	struct isp1362_ep *ep;
+
+	if (atomic_read(&epq->finishing)) {
+		DBG(1, "%s: finish_transfers is active for %s\n", __func__, epq->name);
+		return;
+	}
+
+	list_for_each_entry(ep, &isp1362_hcd->periodic, schedule) {
+		struct urb *urb = get_urb(ep);
+		int ret;
+
+		if (!list_empty(&ep->active)) {
+			DBG(1, "%s: Skipping active %s ep %p\n", __func__,
+			    epq->name, ep);
+			continue;
+		}
+
+		DBG(1, "%s: Processing %s ep %p req %d\n", __func__,
+		    epq->name, ep, ep->num_req);
+		ret = submit_req(isp1362_hcd, urb, ep, epq);
+		if (ret == -ENOMEM)
+			break;
+		else if (ret == -EOVERFLOW)
+			continue;
+		ptd_count++;
+	}
+
+	if (ptd_count) {
+		static int last_count;
+
+		if (ptd_count != last_count) {
+			DBG(0, "%s: ptd_count: %d\n", __func__, ptd_count);
+			last_count = ptd_count;
+		}
+		enable_intl_transfers(isp1362_hcd);
+	}
+
+	epq->ptd_count += ptd_count;
+	if (epq->ptd_count > epq->stat_maxptds)
+		epq->stat_maxptds = epq->ptd_count;
+}
+
+static inline int next_ptd(struct isp1362_ep_queue *epq, struct isp1362_ep *ep)
+{
+	u16 ptd_offset = ep->ptd_offset;
+	int num_ptds = (ep->length + PTD_HEADER_SIZE + (epq->blk_size - 1)) / epq->blk_size;
+
+	DBG(2, "%s: PTD offset $%04x + %04x => %d * %04x -> $%04x\n", __func__, ptd_offset,
+	    ep->length, num_ptds, epq->blk_size, ptd_offset + num_ptds * epq->blk_size);
+
+	ptd_offset += num_ptds * epq->blk_size;
+	if (ptd_offset < epq->buf_start + epq->buf_size)
+		return ptd_offset;
+	else
+		return -ENOMEM;
+}
+
+static void start_iso_transfers(struct isp1362_hcd *isp1362_hcd)
+{
+	int ptd_count = 0;
+	int flip = isp1362_hcd->istl_flip;
+	struct isp1362_ep_queue *epq;
+	int ptd_offset;
+	struct isp1362_ep *ep;
+	struct isp1362_ep *tmp;
+	u16 fno = isp1362_read_reg32(isp1362_hcd, HCFMNUM);
+
+ fill2:
+	epq = &isp1362_hcd->istl_queue[flip];
+	if (atomic_read(&epq->finishing)) {
+		DBG(1, "%s: finish_transfers is active for %s\n", __func__, epq->name);
+		return;
+	}
+
+	if (!list_empty(&epq->active))
+		return;
+
+	ptd_offset = epq->buf_start;
+	list_for_each_entry_safe(ep, tmp, &isp1362_hcd->isoc, schedule) {
+		struct urb *urb = get_urb(ep);
+		s16 diff = fno - (u16)urb->start_frame;
+
+		DBG(1, "%s: Processing %s ep %p\n", __func__, epq->name, ep);
+
+		if (diff > urb->number_of_packets) {
+			/* time frame for this URB has elapsed */
+			finish_request(isp1362_hcd, ep, urb, -EOVERFLOW);
+			continue;
+		} else if (diff < -1) {
+			/* URB is not due in this frame or the next one.
+			 * Comparing with '-1' instead of '0' accounts for double
+			 * buffering in the ISP1362 which enables us to queue the PTD
+			 * one frame ahead of time
+			 */
+		} else if (diff == -1) {
+			/* submit PTD's that are due in the next frame */
+			prepare_ptd(isp1362_hcd, urb, ep, epq, fno);
+			if (ptd_offset + PTD_HEADER_SIZE + ep->length >
+			    epq->buf_start + epq->buf_size) {
+				pr_err("%s: Not enough ISO buffer space for %d byte PTD\n",
+				    __func__, ep->length);
+				continue;
+			}
+			ep->ptd_offset = ptd_offset;
+			list_add_tail(&ep->active, &epq->active);
+
+			ptd_offset = next_ptd(epq, ep);
+			if (ptd_offset < 0) {
+				pr_warning("%s: req %d No more %s PTD buffers available\n", __func__,
+				     ep->num_req, epq->name);
+				break;
+			}
+		}
+	}
+	list_for_each_entry(ep, &epq->active, active) {
+		if (epq->active.next == &ep->active)
+			ep->ptd.mps |= PTD_LAST_MSK;
+		isp1362_write_ptd(isp1362_hcd, ep, epq);
+		ptd_count++;
+	}
+
+	if (ptd_count)
+		enable_istl_transfers(isp1362_hcd, flip);
+
+	epq->ptd_count += ptd_count;
+	if (epq->ptd_count > epq->stat_maxptds)
+		epq->stat_maxptds = epq->ptd_count;
+
+	/* check, whether the second ISTL buffer may also be filled */
+	if (!(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) &
+	      (flip ? HCBUFSTAT_ISTL0_FULL : HCBUFSTAT_ISTL1_FULL))) {
+		fno++;
+		ptd_count = 0;
+		flip = 1 - flip;
+		goto fill2;
+	}
+}
+
+static void finish_transfers(struct isp1362_hcd *isp1362_hcd, unsigned long done_map,
+			     struct isp1362_ep_queue *epq)
+{
+	struct isp1362_ep *ep;
+	struct isp1362_ep *tmp;
+
+	if (list_empty(&epq->active)) {
+		DBG(1, "%s: Nothing to do for %s queue\n", __func__, epq->name);
+		return;
+	}
+
+	DBG(1, "%s: Finishing %s transfers %08lx\n", __func__, epq->name, done_map);
+
+	atomic_inc(&epq->finishing);
+	list_for_each_entry_safe(ep, tmp, &epq->active, active) {
+		int index = ep->ptd_index;
+
+		DBG(1, "%s: Checking %s PTD[%02x] $%04x\n", __func__, epq->name,
+		    index, ep->ptd_offset);
+
+		BUG_ON(index < 0);
+		if (__test_and_clear_bit(index, &done_map)) {
+			isp1362_read_ptd(isp1362_hcd, ep, epq);
+			epq->free_ptd = index;
+			BUG_ON(ep->num_ptds == 0);
+			release_ptd_buffers(epq, ep);
+
+			DBG(1, "%s: ep %p req %d removed from active list\n", __func__,
+			    ep, ep->num_req);
+			if (!list_empty(&ep->remove_list)) {
+				list_del_init(&ep->remove_list);
+				DBG(1, "%s: ep %p removed from remove list\n", __func__, ep);
+			}
+			DBG(1, "%s: Postprocessing %s ep %p req %d\n", __func__, epq->name,
+			    ep, ep->num_req);
+			postproc_ep(isp1362_hcd, ep);
+		}
+		if (!done_map)
+			break;
+	}
+	if (done_map)
+		pr_warning("%s: done_map not clear: %08lx:%08lx\n", __func__, done_map,
+		     epq->skip_map);
+	atomic_dec(&epq->finishing);
+}
+
+static void finish_iso_transfers(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep_queue *epq)
+{
+	struct isp1362_ep *ep;
+	struct isp1362_ep *tmp;
+
+	if (list_empty(&epq->active)) {
+		DBG(1, "%s: Nothing to do for %s queue\n", __func__, epq->name);
+		return;
+	}
+
+	DBG(1, "%s: Finishing %s transfers\n", __func__, epq->name);
+
+	atomic_inc(&epq->finishing);
+	list_for_each_entry_safe(ep, tmp, &epq->active, active) {
+		DBG(1, "%s: Checking PTD $%04x\n", __func__, ep->ptd_offset);
+
+		isp1362_read_ptd(isp1362_hcd, ep, epq);
+		DBG(1, "%s: Postprocessing %s ep %p\n", __func__, epq->name, ep);
+		postproc_ep(isp1362_hcd, ep);
+	}
+	WARN_ON(epq->blk_size != 0);
+	atomic_dec(&epq->finishing);
+}
+
+static irqreturn_t isp1362_irq(struct usb_hcd *hcd)
+{
+	int handled = 0;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	u16 irqstat;
+	u16 svc_mask;
+
+	spin_lock(&isp1362_hcd->lock);
+
+	BUG_ON(isp1362_hcd->irq_active++);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, 0);
+
+	irqstat = isp1362_read_reg16(isp1362_hcd, HCuPINT);
+	DBG(3, "%s: got IRQ %04x:%04x\n", __func__, irqstat, isp1362_hcd->irqenb);
+
+	/* only handle interrupts that are currently enabled */
+	irqstat &= isp1362_hcd->irqenb;
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, irqstat);
+	svc_mask = irqstat;
+
+	if (irqstat & HCuPINT_SOF) {
+		isp1362_hcd->irqenb &= ~HCuPINT_SOF;
+		isp1362_hcd->irq_stat[ISP1362_INT_SOF]++;
+		handled = 1;
+		svc_mask &= ~HCuPINT_SOF;
+		DBG(3, "%s: SOF\n", __func__);
+		isp1362_hcd->fmindex = isp1362_read_reg32(isp1362_hcd, HCFMNUM);
+		if (!list_empty(&isp1362_hcd->remove_list))
+			finish_unlinks(isp1362_hcd);
+		if (!list_empty(&isp1362_hcd->async) && !(irqstat & HCuPINT_ATL)) {
+			if (list_empty(&isp1362_hcd->atl_queue.active)) {
+				start_atl_transfers(isp1362_hcd);
+			} else {
+				isp1362_enable_int(isp1362_hcd, HCuPINT_ATL);
+				isp1362_write_reg32(isp1362_hcd, HCATLSKIP,
+						    isp1362_hcd->atl_queue.skip_map);
+				isp1362_set_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ATL_ACTIVE);
+			}
+		}
+	}
+
+	if (irqstat & HCuPINT_ISTL0) {
+		isp1362_hcd->irq_stat[ISP1362_INT_ISTL0]++;
+		handled = 1;
+		svc_mask &= ~HCuPINT_ISTL0;
+		isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ISTL0_FULL);
+		DBG(1, "%s: ISTL0\n", __func__);
+		WARN_ON((int)!!isp1362_hcd->istl_flip);
+		WARN_ON(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL0_ACTIVE);
+		WARN_ON(!isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL0_DONE);
+		isp1362_hcd->irqenb &= ~HCuPINT_ISTL0;
+	}
+
+	if (irqstat & HCuPINT_ISTL1) {
+		isp1362_hcd->irq_stat[ISP1362_INT_ISTL1]++;
+		handled = 1;
+		svc_mask &= ~HCuPINT_ISTL1;
+		isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ISTL1_FULL);
+		DBG(1, "%s: ISTL1\n", __func__);
+		WARN_ON(!(int)isp1362_hcd->istl_flip);
+		WARN_ON(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL1_ACTIVE);
+		WARN_ON(!isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL1_DONE);
+		isp1362_hcd->irqenb &= ~HCuPINT_ISTL1;
+	}
+
+	if (irqstat & (HCuPINT_ISTL0 | HCuPINT_ISTL1)) {
+		WARN_ON((irqstat & (HCuPINT_ISTL0 | HCuPINT_ISTL1)) ==
+			(HCuPINT_ISTL0 | HCuPINT_ISTL1));
+		finish_iso_transfers(isp1362_hcd,
+				     &isp1362_hcd->istl_queue[isp1362_hcd->istl_flip]);
+		start_iso_transfers(isp1362_hcd);
+		isp1362_hcd->istl_flip = 1 - isp1362_hcd->istl_flip;
+	}
+
+	if (irqstat & HCuPINT_INTL) {
+		u32 done_map = isp1362_read_reg32(isp1362_hcd, HCINTLDONE);
+		u32 skip_map = isp1362_read_reg32(isp1362_hcd, HCINTLSKIP);
+		isp1362_hcd->irq_stat[ISP1362_INT_INTL]++;
+
+		DBG(2, "%s: INTL\n", __func__);
+
+		svc_mask &= ~HCuPINT_INTL;
+
+		isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, skip_map | done_map);
+		if (~(done_map | skip_map) == 0)
+			/* All PTDs are finished, disable INTL processing entirely */
+			isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_INTL_ACTIVE);
+
+		handled = 1;
+		WARN_ON(!done_map);
+		if (done_map) {
+			DBG(3, "%s: INTL done_map %08x\n", __func__, done_map);
+			finish_transfers(isp1362_hcd, done_map, &isp1362_hcd->intl_queue);
+			start_intl_transfers(isp1362_hcd);
+		}
+	}
+
+	if (irqstat & HCuPINT_ATL) {
+		u32 done_map = isp1362_read_reg32(isp1362_hcd, HCATLDONE);
+		u32 skip_map = isp1362_read_reg32(isp1362_hcd, HCATLSKIP);
+		isp1362_hcd->irq_stat[ISP1362_INT_ATL]++;
+
+		DBG(2, "%s: ATL\n", __func__);
+
+		svc_mask &= ~HCuPINT_ATL;
+
+		isp1362_write_reg32(isp1362_hcd, HCATLSKIP, skip_map | done_map);
+		if (~(done_map | skip_map) == 0)
+			isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ATL_ACTIVE);
+		if (done_map) {
+			DBG(3, "%s: ATL done_map %08x\n", __func__, done_map);
+			finish_transfers(isp1362_hcd, done_map, &isp1362_hcd->atl_queue);
+			start_atl_transfers(isp1362_hcd);
+		}
+		handled = 1;
+	}
+
+	if (irqstat & HCuPINT_OPR) {
+		u32 intstat = isp1362_read_reg32(isp1362_hcd, HCINTSTAT);
+		isp1362_hcd->irq_stat[ISP1362_INT_OPR]++;
+
+		svc_mask &= ~HCuPINT_OPR;
+		DBG(2, "%s: OPR %08x:%08x\n", __func__, intstat, isp1362_hcd->intenb);
+		intstat &= isp1362_hcd->intenb;
+		if (intstat & OHCI_INTR_UE) {
+			pr_err("Unrecoverable error\n");
+			/* FIXME: do here reset or cleanup or whatever */
+		}
+		if (intstat & OHCI_INTR_RHSC) {
+			isp1362_hcd->rhstatus = isp1362_read_reg32(isp1362_hcd, HCRHSTATUS);
+			isp1362_hcd->rhport[0] = isp1362_read_reg32(isp1362_hcd, HCRHPORT1);
+			isp1362_hcd->rhport[1] = isp1362_read_reg32(isp1362_hcd, HCRHPORT2);
+		}
+		if (intstat & OHCI_INTR_RD) {
+			pr_info("%s: RESUME DETECTED\n", __func__);
+			isp1362_show_reg(isp1362_hcd, HCCONTROL);
+			usb_hcd_resume_root_hub(hcd);
+		}
+		isp1362_write_reg32(isp1362_hcd, HCINTSTAT, intstat);
+		irqstat &= ~HCuPINT_OPR;
+		handled = 1;
+	}
+
+	if (irqstat & HCuPINT_SUSP) {
+		isp1362_hcd->irq_stat[ISP1362_INT_SUSP]++;
+		handled = 1;
+		svc_mask &= ~HCuPINT_SUSP;
+
+		pr_info("%s: SUSPEND IRQ\n", __func__);
+	}
+
+	if (irqstat & HCuPINT_CLKRDY) {
+		isp1362_hcd->irq_stat[ISP1362_INT_CLKRDY]++;
+		handled = 1;
+		isp1362_hcd->irqenb &= ~HCuPINT_CLKRDY;
+		svc_mask &= ~HCuPINT_CLKRDY;
+		pr_info("%s: CLKRDY IRQ\n", __func__);
+	}
+
+	if (svc_mask)
+		pr_err("%s: Unserviced interrupt(s) %04x\n", __func__, svc_mask);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, isp1362_hcd->irqenb);
+	isp1362_hcd->irq_active--;
+	spin_unlock(&isp1362_hcd->lock);
+
+	return IRQ_RETVAL(handled);
+}
+
+/*-------------------------------------------------------------------------*/
+
+#define	MAX_PERIODIC_LOAD	900	/* out of 1000 usec */
+static int balance(struct isp1362_hcd *isp1362_hcd, u16 interval, u16 load)
+{
+	int i, branch = -ENOSPC;
+
+	/* search for the least loaded schedule branch of that interval
+	 * which has enough bandwidth left unreserved.
+	 */
+	for (i = 0; i < interval; i++) {
+		if (branch < 0 || isp1362_hcd->load[branch] > isp1362_hcd->load[i]) {
+			int j;
+
+			for (j = i; j < PERIODIC_SIZE; j += interval) {
+				if ((isp1362_hcd->load[j] + load) > MAX_PERIODIC_LOAD) {
+					pr_err("%s: new load %d load[%02x] %d max %d\n", __func__,
+					    load, j, isp1362_hcd->load[j], MAX_PERIODIC_LOAD);
+					break;
+				}
+			}
+			if (j < PERIODIC_SIZE)
+				continue;
+			branch = i;
+		}
+	}
+	return branch;
+}
+
+/* NB! ALL the code above this point runs with isp1362_hcd->lock
+   held, irqs off
+*/
+
+/*-------------------------------------------------------------------------*/
+
+static int isp1362_urb_enqueue(struct usb_hcd *hcd,
+			       struct urb *urb,
+			       gfp_t mem_flags)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	struct usb_device *udev = urb->dev;
+	unsigned int pipe = urb->pipe;
+	int is_out = !usb_pipein(pipe);
+	int type = usb_pipetype(pipe);
+	int epnum = usb_pipeendpoint(pipe);
+	struct usb_host_endpoint *hep = urb->ep;
+	struct isp1362_ep *ep = NULL;
+	unsigned long flags;
+	int retval = 0;
+
+	DBG(3, "%s: urb %p\n", __func__, urb);
+
+	if (type == PIPE_ISOCHRONOUS) {
+		pr_err("Isochronous transfers not supported\n");
+		return -ENOSPC;
+	}
+
+	URB_DBG("%s: FA %d ep%d%s %s: len %d %s%s\n", __func__,
+		usb_pipedevice(pipe), epnum,
+		is_out ? "out" : "in",
+		usb_pipecontrol(pipe) ? "ctrl" :
+			usb_pipeint(pipe) ? "int" :
+			usb_pipebulk(pipe) ? "bulk" :
+			"iso",
+		urb->transfer_buffer_length,
+		(urb->transfer_flags & URB_ZERO_PACKET) ? "ZERO_PACKET " : "",
+		!(urb->transfer_flags & URB_SHORT_NOT_OK) ?
+		"short_ok" : "");
+
+	/* avoid all allocations within spinlocks: request or endpoint */
+	if (!hep->hcpriv) {
+		ep = kcalloc(1, sizeof *ep, mem_flags);
+		if (!ep)
+			return -ENOMEM;
+	}
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	/* don't submit to a dead or disabled port */
+	if (!((isp1362_hcd->rhport[0] | isp1362_hcd->rhport[1]) &
+	      (1 << USB_PORT_FEAT_ENABLE)) ||
+	    !HC_IS_RUNNING(hcd->state)) {
+		kfree(ep);
+		retval = -ENODEV;
+		goto fail_not_linked;
+	}
+
+	retval = usb_hcd_link_urb_to_ep(hcd, urb);
+	if (retval) {
+		kfree(ep);
+		goto fail_not_linked;
+	}
+
+	if (hep->hcpriv) {
+		ep = hep->hcpriv;
+	} else {
+		INIT_LIST_HEAD(&ep->schedule);
+		INIT_LIST_HEAD(&ep->active);
+		INIT_LIST_HEAD(&ep->remove_list);
+		ep->udev = usb_get_dev(udev);
+		ep->hep = hep;
+		ep->epnum = epnum;
+		ep->maxpacket = usb_maxpacket(udev, urb->pipe, is_out);
+		ep->ptd_offset = -EINVAL;
+		ep->ptd_index = -EINVAL;
+		usb_settoggle(udev, epnum, is_out, 0);
+
+		if (type == PIPE_CONTROL)
+			ep->nextpid = USB_PID_SETUP;
+		else if (is_out)
+			ep->nextpid = USB_PID_OUT;
+		else
+			ep->nextpid = USB_PID_IN;
+
+		switch (type) {
+		case PIPE_ISOCHRONOUS:
+		case PIPE_INTERRUPT:
+			if (urb->interval > PERIODIC_SIZE)
+				urb->interval = PERIODIC_SIZE;
+			ep->interval = urb->interval;
+			ep->branch = PERIODIC_SIZE;
+			ep->load = usb_calc_bus_time(udev->speed, !is_out,
+						     (type == PIPE_ISOCHRONOUS),
+						     usb_maxpacket(udev, pipe, is_out)) / 1000;
+			break;
+		}
+		hep->hcpriv = ep;
+	}
+	ep->num_req = isp1362_hcd->req_serial++;
+
+	/* maybe put endpoint into schedule */
+	switch (type) {
+	case PIPE_CONTROL:
+	case PIPE_BULK:
+		if (list_empty(&ep->schedule)) {
+			DBG(1, "%s: Adding ep %p req %d to async schedule\n",
+				__func__, ep, ep->num_req);
+			list_add_tail(&ep->schedule, &isp1362_hcd->async);
+		}
+		break;
+	case PIPE_ISOCHRONOUS:
+	case PIPE_INTERRUPT:
+		urb->interval = ep->interval;
+
+		/* urb submitted for already existing EP */
+		if (ep->branch < PERIODIC_SIZE)
+			break;
+
+		retval = balance(isp1362_hcd, ep->interval, ep->load);
+		if (retval < 0) {
+			pr_err("%s: balance returned %d\n", __func__, retval);
+			goto fail;
+		}
+		ep->branch = retval;
+		retval = 0;
+		isp1362_hcd->fmindex = isp1362_read_reg32(isp1362_hcd, HCFMNUM);
+		DBG(1, "%s: Current frame %04x branch %02x start_frame %04x(%04x)\n",
+		    __func__, isp1362_hcd->fmindex, ep->branch,
+		    ((isp1362_hcd->fmindex + PERIODIC_SIZE - 1) &
+		     ~(PERIODIC_SIZE - 1)) + ep->branch,
+		    (isp1362_hcd->fmindex & (PERIODIC_SIZE - 1)) + ep->branch);
+
+		if (list_empty(&ep->schedule)) {
+			if (type == PIPE_ISOCHRONOUS) {
+				u16 frame = isp1362_hcd->fmindex;
+
+				frame += max_t(u16, 8, ep->interval);
+				frame &= ~(ep->interval - 1);
+				frame |= ep->branch;
+				if (frame_before(frame, isp1362_hcd->fmindex))
+					frame += ep->interval;
+				urb->start_frame = frame;
+
+				DBG(1, "%s: Adding ep %p to isoc schedule\n", __func__, ep);
+				list_add_tail(&ep->schedule, &isp1362_hcd->isoc);
+			} else {
+				DBG(1, "%s: Adding ep %p to periodic schedule\n", __func__, ep);
+				list_add_tail(&ep->schedule, &isp1362_hcd->periodic);
+			}
+		} else
+			DBG(1, "%s: ep %p already scheduled\n", __func__, ep);
+
+		DBG(2, "%s: load %d bandwidth %d -> %d\n", __func__,
+		    ep->load / ep->interval, isp1362_hcd->load[ep->branch],
+		    isp1362_hcd->load[ep->branch] + ep->load);
+		isp1362_hcd->load[ep->branch] += ep->load;
+	}
+
+	urb->hcpriv = hep;
+	ALIGNSTAT(isp1362_hcd, urb->transfer_buffer);
+
+	switch (type) {
+	case PIPE_CONTROL:
+	case PIPE_BULK:
+		start_atl_transfers(isp1362_hcd);
+		break;
+	case PIPE_INTERRUPT:
+		start_intl_transfers(isp1362_hcd);
+		break;
+	case PIPE_ISOCHRONOUS:
+		start_iso_transfers(isp1362_hcd);
+		break;
+	default:
+		BUG();
+	}
+ fail:
+	if (retval)
+		usb_hcd_unlink_urb_from_ep(hcd, urb);
+
+
+ fail_not_linked:
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	if (retval)
+		DBG(0, "%s: urb %p failed with %d\n", __func__, urb, retval);
+	return retval;
+}
+
+static int isp1362_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	struct usb_host_endpoint *hep;
+	unsigned long flags;
+	struct isp1362_ep *ep;
+	int retval = 0;
+
+	DBG(3, "%s: urb %p\n", __func__, urb);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	retval = usb_hcd_check_unlink_urb(hcd, urb, status);
+	if (retval)
+		goto done;
+
+	hep = urb->hcpriv;
+
+	if (!hep) {
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		return -EIDRM;
+	}
+
+	ep = hep->hcpriv;
+	if (ep) {
+		/* In front of queue? */
+		if (ep->hep->urb_list.next == &urb->urb_list) {
+			if (!list_empty(&ep->active)) {
+				DBG(1, "%s: urb %p ep %p req %d active PTD[%d] $%04x\n", __func__,
+				    urb, ep, ep->num_req, ep->ptd_index, ep->ptd_offset);
+				/* disable processing and queue PTD for removal */
+				remove_ptd(isp1362_hcd, ep);
+				urb = NULL;
+			}
+		}
+		if (urb) {
+			DBG(1, "%s: Finishing ep %p req %d\n", __func__, ep,
+			    ep->num_req);
+			finish_request(isp1362_hcd, ep, urb, status);
+		} else
+			DBG(1, "%s: urb %p active; wait4irq\n", __func__, urb);
+	} else {
+		pr_warning("%s: No EP in URB %p\n", __func__, urb);
+		retval = -EINVAL;
+	}
+done:
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	DBG(3, "%s: exit\n", __func__);
+
+	return retval;
+}
+
+static void isp1362_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *hep)
+{
+	struct isp1362_ep *ep = hep->hcpriv;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+
+	DBG(1, "%s: ep %p\n", __func__, ep);
+	if (!ep)
+		return;
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	if (!list_empty(&hep->urb_list)) {
+		if (!list_empty(&ep->active) && list_empty(&ep->remove_list)) {
+			DBG(1, "%s: Removing ep %p req %d PTD[%d] $%04x\n", __func__,
+			    ep, ep->num_req, ep->ptd_index, ep->ptd_offset);
+			remove_ptd(isp1362_hcd, ep);
+			pr_info("%s: Waiting for Interrupt to clean up\n", __func__);
+		}
+	}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	/* Wait for interrupt to clear out active list */
+	while (!list_empty(&ep->active))
+		msleep(1);
+
+	DBG(1, "%s: Freeing EP %p\n", __func__, ep);
+
+	usb_put_dev(ep->udev);
+	kfree(ep);
+	hep->hcpriv = NULL;
+}
+
+static int isp1362_get_frame(struct usb_hcd *hcd)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	u32 fmnum;
+	unsigned long flags;
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	fmnum = isp1362_read_reg32(isp1362_hcd, HCFMNUM);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	return (int)fmnum;
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* Adapted from ohci-hub.c */
+static int isp1362_hub_status_data(struct usb_hcd *hcd, char *buf)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	int ports, i, changed = 0;
+	unsigned long flags;
+
+	if (!HC_IS_RUNNING(hcd->state))
+		return -ESHUTDOWN;
+
+	/* Report no status change now, if we are scheduled to be
+	   called later */
+	if (timer_pending(&hcd->rh_timer))
+		return 0;
+
+	ports = isp1362_hcd->rhdesca & RH_A_NDP;
+	BUG_ON(ports > 2);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	/* init status */
+	if (isp1362_hcd->rhstatus & (RH_HS_LPSC | RH_HS_OCIC))
+		buf[0] = changed = 1;
+	else
+		buf[0] = 0;
+
+	for (i = 0; i < ports; i++) {
+		u32 status = isp1362_hcd->rhport[i];
+
+		if (status & (RH_PS_CSC | RH_PS_PESC | RH_PS_PSSC |
+			      RH_PS_OCIC | RH_PS_PRSC)) {
+			changed = 1;
+			buf[0] |= 1 << (i + 1);
+			continue;
+		}
+
+		if (!(status & RH_PS_CCS))
+			continue;
+	}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	return changed;
+}
+
+static void isp1362_hub_descriptor(struct isp1362_hcd *isp1362_hcd,
+				   struct usb_hub_descriptor *desc)
+{
+	u32 reg = isp1362_hcd->rhdesca;
+
+	DBG(3, "%s: enter\n", __func__);
+
+	desc->bDescriptorType = 0x29;
+	desc->bDescLength = 9;
+	desc->bHubContrCurrent = 0;
+	desc->bNbrPorts = reg & 0x3;
+	/* Power switching, device type, overcurrent. */
+	desc->wHubCharacteristics = cpu_to_le16((reg >> 8) & 0x1f);
+	DBG(0, "%s: hubcharacteristics = %02x\n", __func__, cpu_to_le16((reg >> 8) & 0x1f));
+	desc->bPwrOn2PwrGood = (reg >> 24) & 0xff;
+	/* two bitmaps:  ports removable, and legacy PortPwrCtrlMask */
+	desc->bitmap[0] = desc->bNbrPorts == 1 ? 1 << 1 : 3 << 1;
+	desc->bitmap[1] = ~0;
+
+	DBG(3, "%s: exit\n", __func__);
+}
+
+/* Adapted from ohci-hub.c */
+static int isp1362_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
+			       u16 wIndex, char *buf, u16 wLength)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	int retval = 0;
+	unsigned long flags;
+	unsigned long t1;
+	int ports = isp1362_hcd->rhdesca & RH_A_NDP;
+	u32 tmp = 0;
+
+	switch (typeReq) {
+	case ClearHubFeature:
+		DBG(0, "ClearHubFeature: ");
+		switch (wValue) {
+		case C_HUB_OVER_CURRENT:
+			_DBG(0, "C_HUB_OVER_CURRENT\n");
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_OCIC);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		case C_HUB_LOCAL_POWER:
+			_DBG(0, "C_HUB_LOCAL_POWER\n");
+			break;
+		default:
+			goto error;
+		}
+		break;
+	case SetHubFeature:
+		DBG(0, "SetHubFeature: ");
+		switch (wValue) {
+		case C_HUB_OVER_CURRENT:
+		case C_HUB_LOCAL_POWER:
+			_DBG(0, "C_HUB_OVER_CURRENT or C_HUB_LOCAL_POWER\n");
+			break;
+		default:
+			goto error;
+		}
+		break;
+	case GetHubDescriptor:
+		DBG(0, "GetHubDescriptor\n");
+		isp1362_hub_descriptor(isp1362_hcd, (struct usb_hub_descriptor *)buf);
+		break;
+	case GetHubStatus:
+		DBG(0, "GetHubStatus\n");
+		put_unaligned(cpu_to_le32(0), (__le32 *) buf);
+		break;
+	case GetPortStatus:
+#ifndef VERBOSE
+		DBG(0, "GetPortStatus\n");
+#endif
+		if (!wIndex || wIndex > ports)
+			goto error;
+		tmp = isp1362_hcd->rhport[--wIndex];
+		put_unaligned(cpu_to_le32(tmp), (__le32 *) buf);
+		break;
+	case ClearPortFeature:
+		DBG(0, "ClearPortFeature: ");
+		if (!wIndex || wIndex > ports)
+			goto error;
+		wIndex--;
+
+		switch (wValue) {
+		case USB_PORT_FEAT_ENABLE:
+			_DBG(0, "USB_PORT_FEAT_ENABLE\n");
+			tmp = RH_PS_CCS;
+			break;
+		case USB_PORT_FEAT_C_ENABLE:
+			_DBG(0, "USB_PORT_FEAT_C_ENABLE\n");
+			tmp = RH_PS_PESC;
+			break;
+		case USB_PORT_FEAT_SUSPEND:
+			_DBG(0, "USB_PORT_FEAT_SUSPEND\n");
+			tmp = RH_PS_POCI;
+			break;
+		case USB_PORT_FEAT_C_SUSPEND:
+			_DBG(0, "USB_PORT_FEAT_C_SUSPEND\n");
+			tmp = RH_PS_PSSC;
+			break;
+		case USB_PORT_FEAT_POWER:
+			_DBG(0, "USB_PORT_FEAT_POWER\n");
+			tmp = RH_PS_LSDA;
+
+			break;
+		case USB_PORT_FEAT_C_CONNECTION:
+			_DBG(0, "USB_PORT_FEAT_C_CONNECTION\n");
+			tmp = RH_PS_CSC;
+			break;
+		case USB_PORT_FEAT_C_OVER_CURRENT:
+			_DBG(0, "USB_PORT_FEAT_C_OVER_CURRENT\n");
+			tmp = RH_PS_OCIC;
+			break;
+		case USB_PORT_FEAT_C_RESET:
+			_DBG(0, "USB_PORT_FEAT_C_RESET\n");
+			tmp = RH_PS_PRSC;
+			break;
+		default:
+			goto error;
+		}
+
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + wIndex, tmp);
+		isp1362_hcd->rhport[wIndex] =
+			isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + wIndex);
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		break;
+	case SetPortFeature:
+		DBG(0, "SetPortFeature: ");
+		if (!wIndex || wIndex > ports)
+			goto error;
+		wIndex--;
+		switch (wValue) {
+		case USB_PORT_FEAT_SUSPEND:
+			_DBG(0, "USB_PORT_FEAT_SUSPEND\n");
+#ifdef	CONFIG_USB_OTG
+			if (ohci->hcd.self.otg_port == (wIndex + 1) &&
+			    ohci->hcd.self.b_hnp_enable) {
+				start_hnp(ohci);
+				break;
+			}
+#endif
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + wIndex, RH_PS_PSS);
+			isp1362_hcd->rhport[wIndex] =
+				isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + wIndex);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			break;
+		case USB_PORT_FEAT_POWER:
+			_DBG(0, "USB_PORT_FEAT_POWER\n");
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + wIndex, RH_PS_PPS);
+			isp1362_hcd->rhport[wIndex] =
+				isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + wIndex);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			break;
+		case USB_PORT_FEAT_RESET:
+			_DBG(0, "USB_PORT_FEAT_RESET\n");
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+			t1 = jiffies + msecs_to_jiffies(USB_RESET_WIDTH);
+			while (time_before(jiffies, t1)) {
+				/* spin until any current reset finishes */
+				for (;;) {
+					tmp = isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + wIndex);
+					if (!(tmp & RH_PS_PRS))
+						break;
+					udelay(500);
+				}
+				if (!(tmp & RH_PS_CCS))
+					break;
+				/* Reset lasts 10ms (claims datasheet) */
+				isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + wIndex, (RH_PS_PRS));
+
+				spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+				msleep(10);
+				spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			}
+
+			isp1362_hcd->rhport[wIndex] = isp1362_read_reg32(isp1362_hcd,
+									 HCRHPORT1 + wIndex);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			break;
+		default:
+			goto error;
+		}
+		break;
+
+	default:
+ error:
+		/* "protocol stall" on error */
+		_DBG(0, "PROTOCOL STALL\n");
+		retval = -EPIPE;
+	}
+
+	return retval;
+}
+
+#ifdef	CONFIG_PM
+static int isp1362_bus_suspend(struct usb_hcd *hcd)
+{
+	int status = 0;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+
+	if (time_before(jiffies, isp1362_hcd->next_statechange))
+		msleep(5);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	isp1362_hcd->hc_control = isp1362_read_reg32(isp1362_hcd, HCCONTROL);
+	switch (isp1362_hcd->hc_control & OHCI_CTRL_HCFS) {
+	case OHCI_USB_RESUME:
+		DBG(0, "%s: resume/suspend?\n", __func__);
+		isp1362_hcd->hc_control &= ~OHCI_CTRL_HCFS;
+		isp1362_hcd->hc_control |= OHCI_USB_RESET;
+		isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+		/* FALL THROUGH */
+	case OHCI_USB_RESET:
+		status = -EBUSY;
+		pr_warning("%s: needs reinit!\n", __func__);
+		goto done;
+	case OHCI_USB_SUSPEND:
+		pr_warning("%s: already suspended?\n", __func__);
+		goto done;
+	}
+	DBG(0, "%s: suspend root hub\n", __func__);
+
+	/* First stop any processing */
+	hcd->state = HC_STATE_QUIESCING;
+	if (!list_empty(&isp1362_hcd->atl_queue.active) ||
+	    !list_empty(&isp1362_hcd->intl_queue.active) ||
+	    !list_empty(&isp1362_hcd->istl_queue[0] .active) ||
+	    !list_empty(&isp1362_hcd->istl_queue[1] .active)) {
+		int limit;
+
+		isp1362_write_reg32(isp1362_hcd, HCATLSKIP, ~0);
+		isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, ~0);
+		isp1362_write_reg16(isp1362_hcd, HCBUFSTAT, 0);
+		isp1362_write_reg16(isp1362_hcd, HCuPINTENB, 0);
+		isp1362_write_reg32(isp1362_hcd, HCINTSTAT, OHCI_INTR_SF);
+
+		DBG(0, "%s: stopping schedules ...\n", __func__);
+		limit = 2000;
+		while (limit > 0) {
+			udelay(250);
+			limit -= 250;
+			if (isp1362_read_reg32(isp1362_hcd, HCINTSTAT) & OHCI_INTR_SF)
+				break;
+		}
+		mdelay(7);
+		if (isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_ATL) {
+			u32 done_map = isp1362_read_reg32(isp1362_hcd, HCATLDONE);
+			finish_transfers(isp1362_hcd, done_map, &isp1362_hcd->atl_queue);
+		}
+		if (isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_INTL) {
+			u32 done_map = isp1362_read_reg32(isp1362_hcd, HCINTLDONE);
+			finish_transfers(isp1362_hcd, done_map, &isp1362_hcd->intl_queue);
+		}
+		if (isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_ISTL0)
+			finish_iso_transfers(isp1362_hcd, &isp1362_hcd->istl_queue[0]);
+		if (isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_ISTL1)
+			finish_iso_transfers(isp1362_hcd, &isp1362_hcd->istl_queue[1]);
+	}
+	DBG(0, "%s: HCINTSTAT: %08x\n", __func__,
+		    isp1362_read_reg32(isp1362_hcd, HCINTSTAT));
+	isp1362_write_reg32(isp1362_hcd, HCINTSTAT,
+			    isp1362_read_reg32(isp1362_hcd, HCINTSTAT));
+
+	/* Suspend hub */
+	isp1362_hcd->hc_control = OHCI_USB_SUSPEND;
+	isp1362_show_reg(isp1362_hcd, HCCONTROL);
+	isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+	isp1362_show_reg(isp1362_hcd, HCCONTROL);
+
+#if 1
+	isp1362_hcd->hc_control = isp1362_read_reg32(isp1362_hcd, HCCONTROL);
+	if ((isp1362_hcd->hc_control & OHCI_CTRL_HCFS) != OHCI_USB_SUSPEND) {
+		pr_err("%s: controller won't suspend %08x\n", __func__,
+		    isp1362_hcd->hc_control);
+		status = -EBUSY;
+	} else
+#endif
+	{
+		/* no resumes until devices finish suspending */
+		isp1362_hcd->next_statechange = jiffies + msecs_to_jiffies(5);
+	}
+done:
+	if (status == 0) {
+		hcd->state = HC_STATE_SUSPENDED;
+		DBG(0, "%s: HCD suspended: %08x\n", __func__,
+		    isp1362_read_reg32(isp1362_hcd, HCCONTROL));
+	}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	return status;
+}
+
+static int isp1362_bus_resume(struct usb_hcd *hcd)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	u32 port;
+	unsigned long flags;
+	int status = -EINPROGRESS;
+
+	if (time_before(jiffies, isp1362_hcd->next_statechange))
+		msleep(5);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	isp1362_hcd->hc_control = isp1362_read_reg32(isp1362_hcd, HCCONTROL);
+	pr_info("%s: HCCONTROL: %08x\n", __func__, isp1362_hcd->hc_control);
+	if (hcd->state == HC_STATE_RESUMING) {
+		pr_warning("%s: duplicate resume\n", __func__);
+		status = 0;
+	} else
+		switch (isp1362_hcd->hc_control & OHCI_CTRL_HCFS) {
+		case OHCI_USB_SUSPEND:
+			DBG(0, "%s: resume root hub\n", __func__);
+			isp1362_hcd->hc_control &= ~OHCI_CTRL_HCFS;
+			isp1362_hcd->hc_control |= OHCI_USB_RESUME;
+			isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+			break;
+		case OHCI_USB_RESUME:
+			/* HCFS changes sometime after INTR_RD */
+			DBG(0, "%s: remote wakeup\n", __func__);
+			break;
+		case OHCI_USB_OPER:
+			DBG(0, "%s: odd resume\n", __func__);
+			status = 0;
+			hcd->self.root_hub->dev.power.power_state = PMSG_ON;
+			break;
+		default:		/* RESET, we lost power */
+			DBG(0, "%s: root hub hardware reset\n", __func__);
+			status = -EBUSY;
+		}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	if (status == -EBUSY) {
+		DBG(0, "%s: Restarting HC\n", __func__);
+		isp1362_hc_stop(hcd);
+		return isp1362_hc_start(hcd);
+	}
+	if (status != -EINPROGRESS)
+		return status;
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	port = isp1362_read_reg32(isp1362_hcd, HCRHDESCA) & RH_A_NDP;
+	while (port--) {
+		u32 stat = isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + port);
+
+		/* force global, not selective, resume */
+		if (!(stat & RH_PS_PSS)) {
+			DBG(0, "%s: Not Resuming RH port %d\n", __func__, port);
+			continue;
+		}
+		DBG(0, "%s: Resuming RH port %d\n", __func__, port);
+		isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + port, RH_PS_POCI);
+	}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	/* Some controllers (lucent) need extra-long delays */
+	hcd->state = HC_STATE_RESUMING;
+	mdelay(20 /* usb 11.5.1.10 */ + 15);
+
+	isp1362_hcd->hc_control = OHCI_USB_OPER;
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	isp1362_show_reg(isp1362_hcd, HCCONTROL);
+	isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	/* TRSMRCY */
+	msleep(10);
+
+	/* keep it alive for ~5x suspend + resume costs */
+	isp1362_hcd->next_statechange = jiffies + msecs_to_jiffies(250);
+
+	hcd->self.root_hub->dev.power.power_state = PMSG_ON;
+	hcd->state = HC_STATE_RUNNING;
+	return 0;
+}
+#else
+#define	isp1362_bus_suspend	NULL
+#define	isp1362_bus_resume	NULL
+#endif
+
+/*-------------------------------------------------------------------------*/
+
+#ifdef STUB_DEBUG_FILE
+
+static inline void create_debug_file(struct isp1362_hcd *isp1362_hcd)
+{
+}
+static inline void remove_debug_file(struct isp1362_hcd *isp1362_hcd)
+{
+}
+
+#else
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+static void dump_irq(struct seq_file *s, char *label, u16 mask)
+{
+	seq_printf(s, "%-15s %04x%s%s%s%s%s%s\n", label, mask,
+		   mask & HCuPINT_CLKRDY ? " clkrdy" : "",
+		   mask & HCuPINT_SUSP ? " susp" : "",
+		   mask & HCuPINT_OPR ? " opr" : "",
+		   mask & HCuPINT_EOT ? " eot" : "",
+		   mask & HCuPINT_ATL ? " atl" : "",
+		   mask & HCuPINT_SOF ? " sof" : "");
+}
+
+static void dump_int(struct seq_file *s, char *label, u32 mask)
+{
+	seq_printf(s, "%-15s %08x%s%s%s%s%s%s%s\n", label, mask,
+		   mask & OHCI_INTR_MIE ? " MIE" : "",
+		   mask & OHCI_INTR_RHSC ? " rhsc" : "",
+		   mask & OHCI_INTR_FNO ? " fno" : "",
+		   mask & OHCI_INTR_UE ? " ue" : "",
+		   mask & OHCI_INTR_RD ? " rd" : "",
+		   mask & OHCI_INTR_SF ? " sof" : "",
+		   mask & OHCI_INTR_SO ? " so" : "");
+}
+
+static void dump_ctrl(struct seq_file *s, char *label, u32 mask)
+{
+	seq_printf(s, "%-15s %08x%s%s%s\n", label, mask,
+		   mask & OHCI_CTRL_RWC ? " rwc" : "",
+		   mask & OHCI_CTRL_RWE ? " rwe" : "",
+		   ({
+			   char *hcfs;
+			   switch (mask & OHCI_CTRL_HCFS) {
+			   case OHCI_USB_OPER:
+				   hcfs = " oper";
+				   break;
+			   case OHCI_USB_RESET:
+				   hcfs = " reset";
+				   break;
+			   case OHCI_USB_RESUME:
+				   hcfs = " resume";
+				   break;
+			   case OHCI_USB_SUSPEND:
+				   hcfs = " suspend";
+				   break;
+			   default:
+				   hcfs = " ?";
+			   }
+			   hcfs;
+		   }));
+}
+
+static void dump_regs(struct seq_file *s, struct isp1362_hcd *isp1362_hcd)
+{
+	seq_printf(s, "HCREVISION [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCREVISION),
+		   isp1362_read_reg32(isp1362_hcd, HCREVISION));
+	seq_printf(s, "HCCONTROL  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCCONTROL),
+		   isp1362_read_reg32(isp1362_hcd, HCCONTROL));
+	seq_printf(s, "HCCMDSTAT  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCCMDSTAT),
+		   isp1362_read_reg32(isp1362_hcd, HCCMDSTAT));
+	seq_printf(s, "HCINTSTAT  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTSTAT),
+		   isp1362_read_reg32(isp1362_hcd, HCINTSTAT));
+	seq_printf(s, "HCINTENB   [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTENB),
+		   isp1362_read_reg32(isp1362_hcd, HCINTENB));
+	seq_printf(s, "HCFMINTVL  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCFMINTVL),
+		   isp1362_read_reg32(isp1362_hcd, HCFMINTVL));
+	seq_printf(s, "HCFMREM    [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCFMREM),
+		   isp1362_read_reg32(isp1362_hcd, HCFMREM));
+	seq_printf(s, "HCFMNUM    [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCFMNUM),
+		   isp1362_read_reg32(isp1362_hcd, HCFMNUM));
+	seq_printf(s, "HCLSTHRESH [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCLSTHRESH),
+		   isp1362_read_reg32(isp1362_hcd, HCLSTHRESH));
+	seq_printf(s, "HCRHDESCA  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHDESCA),
+		   isp1362_read_reg32(isp1362_hcd, HCRHDESCA));
+	seq_printf(s, "HCRHDESCB  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHDESCB),
+		   isp1362_read_reg32(isp1362_hcd, HCRHDESCB));
+	seq_printf(s, "HCRHSTATUS [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHSTATUS),
+		   isp1362_read_reg32(isp1362_hcd, HCRHSTATUS));
+	seq_printf(s, "HCRHPORT1  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHPORT1),
+		   isp1362_read_reg32(isp1362_hcd, HCRHPORT1));
+	seq_printf(s, "HCRHPORT2  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHPORT2),
+		   isp1362_read_reg32(isp1362_hcd, HCRHPORT2));
+	seq_printf(s, "\n");
+	seq_printf(s, "HCHWCFG    [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCHWCFG),
+		   isp1362_read_reg16(isp1362_hcd, HCHWCFG));
+	seq_printf(s, "HCDMACFG   [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCDMACFG),
+		   isp1362_read_reg16(isp1362_hcd, HCDMACFG));
+	seq_printf(s, "HCXFERCTR  [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCXFERCTR),
+		   isp1362_read_reg16(isp1362_hcd, HCXFERCTR));
+	seq_printf(s, "HCuPINT    [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCuPINT),
+		   isp1362_read_reg16(isp1362_hcd, HCuPINT));
+	seq_printf(s, "HCuPINTENB [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCuPINTENB),
+		   isp1362_read_reg16(isp1362_hcd, HCuPINTENB));
+	seq_printf(s, "HCCHIPID   [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCCHIPID),
+		   isp1362_read_reg16(isp1362_hcd, HCCHIPID));
+	seq_printf(s, "HCSCRATCH  [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCSCRATCH),
+		   isp1362_read_reg16(isp1362_hcd, HCSCRATCH));
+	seq_printf(s, "HCBUFSTAT  [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCBUFSTAT),
+		   isp1362_read_reg16(isp1362_hcd, HCBUFSTAT));
+	seq_printf(s, "HCDIRADDR  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCDIRADDR),
+		   isp1362_read_reg32(isp1362_hcd, HCDIRADDR));
+#if 0
+	seq_printf(s, "HCDIRDATA  [%02x]     %04x\n", ISP1362_REG_NO(HCDIRDATA),
+		   isp1362_read_reg16(isp1362_hcd, HCDIRDATA));
+#endif
+	seq_printf(s, "HCISTLBUFSZ[%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCISTLBUFSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCISTLBUFSZ));
+	seq_printf(s, "HCISTLRATE [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCISTLRATE),
+		   isp1362_read_reg16(isp1362_hcd, HCISTLRATE));
+	seq_printf(s, "\n");
+	seq_printf(s, "HCINTLBUFSZ[%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLBUFSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCINTLBUFSZ));
+	seq_printf(s, "HCINTLBLKSZ[%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLBLKSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCINTLBLKSZ));
+	seq_printf(s, "HCINTLDONE [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLDONE),
+		   isp1362_read_reg32(isp1362_hcd, HCINTLDONE));
+	seq_printf(s, "HCINTLSKIP [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLSKIP),
+		   isp1362_read_reg32(isp1362_hcd, HCINTLSKIP));
+	seq_printf(s, "HCINTLLAST [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLLAST),
+		   isp1362_read_reg32(isp1362_hcd, HCINTLLAST));
+	seq_printf(s, "HCINTLCURR [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLCURR),
+		   isp1362_read_reg16(isp1362_hcd, HCINTLCURR));
+	seq_printf(s, "\n");
+	seq_printf(s, "HCATLBUFSZ [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLBUFSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCATLBUFSZ));
+	seq_printf(s, "HCATLBLKSZ [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLBLKSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCATLBLKSZ));
+#if 0
+	seq_printf(s, "HCATLDONE  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCATLDONE),
+		   isp1362_read_reg32(isp1362_hcd, HCATLDONE));
+#endif
+	seq_printf(s, "HCATLSKIP  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCATLSKIP),
+		   isp1362_read_reg32(isp1362_hcd, HCATLSKIP));
+	seq_printf(s, "HCATLLAST  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCATLLAST),
+		   isp1362_read_reg32(isp1362_hcd, HCATLLAST));
+	seq_printf(s, "HCATLCURR  [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLCURR),
+		   isp1362_read_reg16(isp1362_hcd, HCATLCURR));
+	seq_printf(s, "\n");
+	seq_printf(s, "HCATLDTC   [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLDTC),
+		   isp1362_read_reg16(isp1362_hcd, HCATLDTC));
+	seq_printf(s, "HCATLDTCTO [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLDTCTO),
+		   isp1362_read_reg16(isp1362_hcd, HCATLDTCTO));
+}
+
+static int proc_isp1362_show(struct seq_file *s, void *unused)
+{
+	struct isp1362_hcd *isp1362_hcd = s->private;
+	struct isp1362_ep *ep;
+	int i;
+
+	seq_printf(s, "%s\n%s version %s\n",
+		   isp1362_hcd_to_hcd(isp1362_hcd)->product_desc, hcd_name, DRIVER_VERSION);
+
+	/* collect statistics to help estimate potential win for
+	 * DMA engines that care about alignment (PXA)
+	 */
+	seq_printf(s, "alignment:  16b/%ld 8b/%ld 4b/%ld 2b/%ld 1b/%ld\n",
+		   isp1362_hcd->stat16, isp1362_hcd->stat8, isp1362_hcd->stat4,
+		   isp1362_hcd->stat2, isp1362_hcd->stat1);
+	seq_printf(s, "max # ptds in ATL  fifo: %d\n", isp1362_hcd->atl_queue.stat_maxptds);
+	seq_printf(s, "max # ptds in INTL fifo: %d\n", isp1362_hcd->intl_queue.stat_maxptds);
+	seq_printf(s, "max # ptds in ISTL fifo: %d\n",
+		   max(isp1362_hcd->istl_queue[0] .stat_maxptds,
+		       isp1362_hcd->istl_queue[1] .stat_maxptds));
+
+	/* FIXME: don't show the following in suspended state */
+	spin_lock_irq(&isp1362_hcd->lock);
+
+	dump_irq(s, "hc_irq_enable", isp1362_read_reg16(isp1362_hcd, HCuPINTENB));
+	dump_irq(s, "hc_irq_status", isp1362_read_reg16(isp1362_hcd, HCuPINT));
+	dump_int(s, "ohci_int_enable", isp1362_read_reg32(isp1362_hcd, HCINTENB));
+	dump_int(s, "ohci_int_status", isp1362_read_reg32(isp1362_hcd, HCINTSTAT));
+	dump_ctrl(s, "ohci_control", isp1362_read_reg32(isp1362_hcd, HCCONTROL));
+
+	for (i = 0; i < NUM_ISP1362_IRQS; i++)
+		if (isp1362_hcd->irq_stat[i])
+			seq_printf(s, "%-15s: %d\n",
+				   ISP1362_INT_NAME(i), isp1362_hcd->irq_stat[i]);
+
+	dump_regs(s, isp1362_hcd);
+	list_for_each_entry(ep, &isp1362_hcd->async, schedule) {
+		struct urb *urb;
+
+		seq_printf(s, "%p, ep%d%s, maxpacket %d:\n", ep, ep->epnum,
+			   ({
+				   char *s;
+				   switch (ep->nextpid) {
+				   case USB_PID_IN:
+					   s = "in";
+					   break;
+				   case USB_PID_OUT:
+					   s = "out";
+					   break;
+				   case USB_PID_SETUP:
+					   s = "setup";
+					   break;
+				   case USB_PID_ACK:
+					   s = "status";
+					   break;
+				   default:
+					   s = "?";
+					   break;
+				   };
+				   s;}), ep->maxpacket) ;
+		list_for_each_entry(urb, &ep->hep->urb_list, urb_list) {
+			seq_printf(s, "  urb%p, %d/%d\n", urb,
+				   urb->actual_length,
+				   urb->transfer_buffer_length);
+		}
+	}
+	if (!list_empty(&isp1362_hcd->async))
+		seq_printf(s, "\n");
+	dump_ptd_queue(&isp1362_hcd->atl_queue);
+
+	seq_printf(s, "periodic size= %d\n", PERIODIC_SIZE);
+
+	list_for_each_entry(ep, &isp1362_hcd->periodic, schedule) {
+		seq_printf(s, "branch:%2d load:%3d PTD[%d] $%04x:\n", ep->branch,
+			   isp1362_hcd->load[ep->branch], ep->ptd_index, ep->ptd_offset);
+
+		seq_printf(s, "   %d/%p (%sdev%d ep%d%s max %d)\n",
+			   ep->interval, ep,
+			   (ep->udev->speed == USB_SPEED_FULL) ? "" : "ls ",
+			   ep->udev->devnum, ep->epnum,
+			   (ep->epnum == 0) ? "" :
+			   ((ep->nextpid == USB_PID_IN) ?
+			    "in" : "out"), ep->maxpacket);
+	}
+	dump_ptd_queue(&isp1362_hcd->intl_queue);
+
+	seq_printf(s, "ISO:\n");
+
+	list_for_each_entry(ep, &isp1362_hcd->isoc, schedule) {
+		seq_printf(s, "   %d/%p (%sdev%d ep%d%s max %d)\n",
+			   ep->interval, ep,
+			   (ep->udev->speed == USB_SPEED_FULL) ? "" : "ls ",
+			   ep->udev->devnum, ep->epnum,
+			   (ep->epnum == 0) ? "" :
+			   ((ep->nextpid == USB_PID_IN) ?
+			    "in" : "out"), ep->maxpacket);
+	}
+
+	spin_unlock_irq(&isp1362_hcd->lock);
+	seq_printf(s, "\n");
+
+	return 0;
+}
+
+static int proc_isp1362_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_isp1362_show, PDE(inode)->data);
+}
+
+static const struct file_operations proc_ops = {
+	.open = proc_isp1362_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/* expect just one isp1362_hcd per system */
+static const char proc_filename[] = "driver/isp1362";
+
+static void create_debug_file(struct isp1362_hcd *isp1362_hcd)
+{
+	struct proc_dir_entry *pde;
+
+	pde = create_proc_entry(proc_filename, 0, NULL);
+	if (pde == NULL) {
+		pr_warning("%s: Failed to create debug file '%s'\n", __func__, proc_filename);
+		return;
+	}
+
+	pde->proc_fops = &proc_ops;
+	pde->data = isp1362_hcd;
+	isp1362_hcd->pde = pde;
+}
+
+static void remove_debug_file(struct isp1362_hcd *isp1362_hcd)
+{
+	if (isp1362_hcd->pde)
+		remove_proc_entry(proc_filename, 0);
+}
+
+#endif
+
+/*-------------------------------------------------------------------------*/
+
+static void isp1362_sw_reset(struct isp1362_hcd *isp1362_hcd)
+{
+	int tmp = 20;
+	unsigned long flags;
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	isp1362_write_reg16(isp1362_hcd, HCSWRES, HCSWRES_MAGIC);
+	isp1362_write_reg32(isp1362_hcd, HCCMDSTAT, OHCI_HCR);
+	while (--tmp) {
+		mdelay(1);
+		if (!(isp1362_read_reg32(isp1362_hcd, HCCMDSTAT) & OHCI_HCR))
+			break;
+	}
+	if (!tmp)
+		pr_err("Software reset timeout\n");
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+}
+
+static int isp1362_mem_config(struct usb_hcd *hcd)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+	u32 total;
+	u16 istl_size = ISP1362_ISTL_BUFSIZE;
+	u16 intl_blksize = ISP1362_INTL_BLKSIZE + PTD_HEADER_SIZE;
+	u16 intl_size = ISP1362_INTL_BUFFERS * intl_blksize;
+	u16 atl_blksize = ISP1362_ATL_BLKSIZE + PTD_HEADER_SIZE;
+	u16 atl_buffers = (ISP1362_BUF_SIZE - (istl_size + intl_size)) / atl_blksize;
+	u16 atl_size;
+	int i;
+
+	WARN_ON(istl_size & 3);
+	WARN_ON(atl_blksize & 3);
+	WARN_ON(intl_blksize & 3);
+	WARN_ON(atl_blksize < PTD_HEADER_SIZE);
+	WARN_ON(intl_blksize < PTD_HEADER_SIZE);
+
+	BUG_ON((unsigned)ISP1362_INTL_BUFFERS > 32);
+	if (atl_buffers > 32)
+		atl_buffers = 32;
+	atl_size = atl_buffers * atl_blksize;
+	total = atl_size + intl_size + istl_size;
+	dev_info(hcd->self.controller, "ISP1362 Memory usage:\n");
+	dev_info(hcd->self.controller, "  ISTL:    2 * %4d:     %4d @ $%04x:$%04x\n",
+		 istl_size / 2, istl_size, 0, istl_size / 2);
+	dev_info(hcd->self.controller, "  INTL: %4d * (%3u+8):  %4d @ $%04x\n",
+		 ISP1362_INTL_BUFFERS, intl_blksize - PTD_HEADER_SIZE,
+		 intl_size, istl_size);
+	dev_info(hcd->self.controller, "  ATL : %4d * (%3u+8):  %4d @ $%04x\n",
+		 atl_buffers, atl_blksize - PTD_HEADER_SIZE,
+		 atl_size, istl_size + intl_size);
+	dev_info(hcd->self.controller, "  USED/FREE:   %4d      %4d\n", total,
+		 ISP1362_BUF_SIZE - total);
+
+	if (total > ISP1362_BUF_SIZE) {
+		dev_err(hcd->self.controller, "%s: Memory requested: %d, available %d\n",
+			__func__, total, ISP1362_BUF_SIZE);
+		return -ENOMEM;
+	}
+
+	total = istl_size + intl_size + atl_size;
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	for (i = 0; i < 2; i++) {
+		isp1362_hcd->istl_queue[i].buf_start = i * istl_size / 2,
+		isp1362_hcd->istl_queue[i].buf_size = istl_size / 2;
+		isp1362_hcd->istl_queue[i].blk_size = 4;
+		INIT_LIST_HEAD(&isp1362_hcd->istl_queue[i].active);
+		snprintf(isp1362_hcd->istl_queue[i].name,
+			 sizeof(isp1362_hcd->istl_queue[i].name), "ISTL%d", i);
+		DBG(3, "%s: %5s buf $%04x %d\n", __func__,
+		     isp1362_hcd->istl_queue[i].name,
+		     isp1362_hcd->istl_queue[i].buf_start,
+		     isp1362_hcd->istl_queue[i].buf_size);
+	}
+	isp1362_write_reg16(isp1362_hcd, HCISTLBUFSZ, istl_size / 2);
+
+	isp1362_hcd->intl_queue.buf_start = istl_size;
+	isp1362_hcd->intl_queue.buf_size = intl_size;
+	isp1362_hcd->intl_queue.buf_count = ISP1362_INTL_BUFFERS;
+	isp1362_hcd->intl_queue.blk_size = intl_blksize;
+	isp1362_hcd->intl_queue.buf_avail = isp1362_hcd->intl_queue.buf_count;
+	isp1362_hcd->intl_queue.skip_map = ~0;
+	INIT_LIST_HEAD(&isp1362_hcd->intl_queue.active);
+
+	isp1362_write_reg16(isp1362_hcd, HCINTLBUFSZ,
+			    isp1362_hcd->intl_queue.buf_size);
+	isp1362_write_reg16(isp1362_hcd, HCINTLBLKSZ,
+			    isp1362_hcd->intl_queue.blk_size - PTD_HEADER_SIZE);
+	isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, ~0);
+	isp1362_write_reg32(isp1362_hcd, HCINTLLAST,
+			    1 << (ISP1362_INTL_BUFFERS - 1));
+
+	isp1362_hcd->atl_queue.buf_start = istl_size + intl_size;
+	isp1362_hcd->atl_queue.buf_size = atl_size;
+	isp1362_hcd->atl_queue.buf_count = atl_buffers;
+	isp1362_hcd->atl_queue.blk_size = atl_blksize;
+	isp1362_hcd->atl_queue.buf_avail = isp1362_hcd->atl_queue.buf_count;
+	isp1362_hcd->atl_queue.skip_map = ~0;
+	INIT_LIST_HEAD(&isp1362_hcd->atl_queue.active);
+
+	isp1362_write_reg16(isp1362_hcd, HCATLBUFSZ,
+			    isp1362_hcd->atl_queue.buf_size);
+	isp1362_write_reg16(isp1362_hcd, HCATLBLKSZ,
+			    isp1362_hcd->atl_queue.blk_size - PTD_HEADER_SIZE);
+	isp1362_write_reg32(isp1362_hcd, HCATLSKIP, ~0);
+	isp1362_write_reg32(isp1362_hcd, HCATLLAST,
+			    1 << (atl_buffers - 1));
+
+	snprintf(isp1362_hcd->atl_queue.name,
+		 sizeof(isp1362_hcd->atl_queue.name), "ATL");
+	snprintf(isp1362_hcd->intl_queue.name,
+		 sizeof(isp1362_hcd->intl_queue.name), "INTL");
+	DBG(3, "%s: %5s buf $%04x %2d * %4d = %4d\n", __func__,
+	     isp1362_hcd->intl_queue.name,
+	     isp1362_hcd->intl_queue.buf_start,
+	     ISP1362_INTL_BUFFERS, isp1362_hcd->intl_queue.blk_size,
+	     isp1362_hcd->intl_queue.buf_size);
+	DBG(3, "%s: %5s buf $%04x %2d * %4d = %4d\n", __func__,
+	     isp1362_hcd->atl_queue.name,
+	     isp1362_hcd->atl_queue.buf_start,
+	     atl_buffers, isp1362_hcd->atl_queue.blk_size,
+	     isp1362_hcd->atl_queue.buf_size);
+
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	return 0;
+}
+
+static int isp1362_hc_reset(struct usb_hcd *hcd)
+{
+	int ret = 0;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long t;
+	unsigned long timeout = 100;
+	unsigned long flags;
+	int clkrdy = 0;
+
+	pr_info("%s:\n", __func__);
+
+	if (isp1362_hcd->board && isp1362_hcd->board->reset) {
+		isp1362_hcd->board->reset(hcd->self.controller, 1);
+		msleep(20);
+		if (isp1362_hcd->board->clock)
+			isp1362_hcd->board->clock(hcd->self.controller, 1);
+		isp1362_hcd->board->reset(hcd->self.controller, 0);
+	} else
+		isp1362_sw_reset(isp1362_hcd);
+
+	/* chip has been reset. First we need to see a clock */
+	t = jiffies + msecs_to_jiffies(timeout);
+	while (!clkrdy && time_before_eq(jiffies, t)) {
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		clkrdy = isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_CLKRDY;
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		if (!clkrdy)
+			msleep(4);
+	}
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_CLKRDY);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	if (!clkrdy) {
+		pr_err("Clock not ready after %lums\n", timeout);
+		ret = -ENODEV;
+	}
+	return ret;
+}
+
+static void isp1362_hc_stop(struct usb_hcd *hcd)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+	u32 tmp;
+
+	pr_info("%s:\n", __func__);
+
+	del_timer_sync(&hcd->rh_timer);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, 0);
+
+	/* Switch off power for all ports */
+	tmp = isp1362_read_reg32(isp1362_hcd, HCRHDESCA);
+	tmp &= ~(RH_A_NPS | RH_A_PSM);
+	isp1362_write_reg32(isp1362_hcd, HCRHDESCA, tmp);
+	isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_LPS);
+
+	/* Reset the chip */
+	if (isp1362_hcd->board && isp1362_hcd->board->reset)
+		isp1362_hcd->board->reset(hcd->self.controller, 1);
+	else
+		isp1362_sw_reset(isp1362_hcd);
+
+	if (isp1362_hcd->board && isp1362_hcd->board->clock)
+		isp1362_hcd->board->clock(hcd->self.controller, 0);
+
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+}
+
+#ifdef CHIP_BUFFER_TEST
+static int isp1362_chip_test(struct isp1362_hcd *isp1362_hcd)
+{
+	int ret = 0;
+	u16 *ref;
+	unsigned long flags;
+
+	ref = kmalloc(2 * ISP1362_BUF_SIZE, GFP_KERNEL);
+	if (ref) {
+		int offset;
+		u16 *tst = &ref[ISP1362_BUF_SIZE / 2];
+
+		for (offset = 0; offset < ISP1362_BUF_SIZE / 2; offset++) {
+			ref[offset] = ~offset;
+			tst[offset] = offset;
+		}
+
+		for (offset = 0; offset < 4; offset++) {
+			int j;
+
+			for (j = 0; j < 8; j++) {
+				spin_lock_irqsave(&isp1362_hcd->lock, flags);
+				isp1362_write_buffer(isp1362_hcd, (u8 *)ref + offset, 0, j);
+				isp1362_read_buffer(isp1362_hcd, (u8 *)tst + offset, 0, j);
+				spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+				if (memcmp(ref, tst, j)) {
+					ret = -ENODEV;
+					pr_err("%s: memory check with %d byte offset %d failed\n",
+					    __func__, j, offset);
+					dump_data((u8 *)ref + offset, j);
+					dump_data((u8 *)tst + offset, j);
+				}
+			}
+		}
+
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		isp1362_write_buffer(isp1362_hcd, ref, 0, ISP1362_BUF_SIZE);
+		isp1362_read_buffer(isp1362_hcd, tst, 0, ISP1362_BUF_SIZE);
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+		if (memcmp(ref, tst, ISP1362_BUF_SIZE)) {
+			ret = -ENODEV;
+			pr_err("%s: memory check failed\n", __func__);
+			dump_data((u8 *)tst, ISP1362_BUF_SIZE / 2);
+		}
+
+		for (offset = 0; offset < 256; offset++) {
+			int test_size = 0;
+
+			yield();
+
+			memset(tst, 0, ISP1362_BUF_SIZE);
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_buffer(isp1362_hcd, tst, 0, ISP1362_BUF_SIZE);
+			isp1362_read_buffer(isp1362_hcd, tst, 0, ISP1362_BUF_SIZE);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			if (memcmp(tst, tst + (ISP1362_BUF_SIZE / (2 * sizeof(*tst))),
+				   ISP1362_BUF_SIZE / 2)) {
+				pr_err("%s: Failed to clear buffer\n", __func__);
+				dump_data((u8 *)tst, ISP1362_BUF_SIZE);
+				break;
+			}
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_buffer(isp1362_hcd, ref, offset * 2, PTD_HEADER_SIZE);
+			isp1362_write_buffer(isp1362_hcd, ref + PTD_HEADER_SIZE / sizeof(*ref),
+					     offset * 2 + PTD_HEADER_SIZE, test_size);
+			isp1362_read_buffer(isp1362_hcd, tst, offset * 2,
+					    PTD_HEADER_SIZE + test_size);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			if (memcmp(ref, tst, PTD_HEADER_SIZE + test_size)) {
+				dump_data(((u8 *)ref) + offset, PTD_HEADER_SIZE + test_size);
+				dump_data((u8 *)tst, PTD_HEADER_SIZE + test_size);
+				spin_lock_irqsave(&isp1362_hcd->lock, flags);
+				isp1362_read_buffer(isp1362_hcd, tst, offset * 2,
+						    PTD_HEADER_SIZE + test_size);
+				spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+				if (memcmp(ref, tst, PTD_HEADER_SIZE + test_size)) {
+					ret = -ENODEV;
+					pr_err("%s: memory check with offset %02x failed\n",
+					    __func__, offset);
+					break;
+				}
+				pr_warning("%s: memory check with offset %02x ok after second read\n",
+				     __func__, offset);
+			}
+		}
+		kfree(ref);
+	}
+	return ret;
+}
+#endif
+
+static int isp1362_hc_start(struct usb_hcd *hcd)
+{
+	int ret;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	struct isp1362_platform_data *board = isp1362_hcd->board;
+	u16 hwcfg;
+	u16 chipid;
+	unsigned long flags;
+
+	pr_info("%s:\n", __func__);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	chipid = isp1362_read_reg16(isp1362_hcd, HCCHIPID);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	if ((chipid & HCCHIPID_MASK) != HCCHIPID_MAGIC) {
+		pr_err("%s: Invalid chip ID %04x\n", __func__, chipid);
+		return -ENODEV;
+	}
+
+#ifdef CHIP_BUFFER_TEST
+	ret = isp1362_chip_test(isp1362_hcd);
+	if (ret)
+		return -ENODEV;
+#endif
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	/* clear interrupt status and disable all interrupt sources */
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, 0xff);
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, 0);
+
+	/* HW conf */
+	hwcfg = HCHWCFG_INT_ENABLE | HCHWCFG_DBWIDTH(1);
+	if (board->sel15Kres)
+		hwcfg |= HCHWCFG_PULLDOWN_DS2 |
+			(MAX_ROOT_PORTS > 1) ? HCHWCFG_PULLDOWN_DS1 : 0;
+	if (board->clknotstop)
+		hwcfg |= HCHWCFG_CLKNOTSTOP;
+	if (board->oc_enable)
+		hwcfg |= HCHWCFG_ANALOG_OC;
+	if (board->int_act_high)
+		hwcfg |= HCHWCFG_INT_POL;
+	if (board->int_edge_triggered)
+		hwcfg |= HCHWCFG_INT_TRIGGER;
+	if (board->dreq_act_high)
+		hwcfg |= HCHWCFG_DREQ_POL;
+	if (board->dack_act_high)
+		hwcfg |= HCHWCFG_DACK_POL;
+	isp1362_write_reg16(isp1362_hcd, HCHWCFG, hwcfg);
+	isp1362_show_reg(isp1362_hcd, HCHWCFG);
+	isp1362_write_reg16(isp1362_hcd, HCDMACFG, 0);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	ret = isp1362_mem_config(hcd);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	/* Root hub conf */
+	isp1362_hcd->rhdesca = 0;
+	if (board->no_power_switching)
+		isp1362_hcd->rhdesca |= RH_A_NPS;
+	if (board->power_switching_mode)
+		isp1362_hcd->rhdesca |= RH_A_PSM;
+	if (board->potpg)
+		isp1362_hcd->rhdesca |= (board->potpg << 24) & RH_A_POTPGT;
+	else
+		isp1362_hcd->rhdesca |= (25 << 24) & RH_A_POTPGT;
+
+	isp1362_write_reg32(isp1362_hcd, HCRHDESCA, isp1362_hcd->rhdesca & ~RH_A_OCPM);
+	isp1362_write_reg32(isp1362_hcd, HCRHDESCA, isp1362_hcd->rhdesca | RH_A_OCPM);
+	isp1362_hcd->rhdesca = isp1362_read_reg32(isp1362_hcd, HCRHDESCA);
+
+	isp1362_hcd->rhdescb = RH_B_PPCM;
+	isp1362_write_reg32(isp1362_hcd, HCRHDESCB, isp1362_hcd->rhdescb);
+	isp1362_hcd->rhdescb = isp1362_read_reg32(isp1362_hcd, HCRHDESCB);
+
+	isp1362_read_reg32(isp1362_hcd, HCFMINTVL);
+	isp1362_write_reg32(isp1362_hcd, HCFMINTVL, (FSMP(FI) << 16) | FI);
+	isp1362_write_reg32(isp1362_hcd, HCLSTHRESH, LSTHRESH);
+
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	isp1362_hcd->hc_control = OHCI_USB_OPER;
+	hcd->state = HC_STATE_RUNNING;
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	/* Set up interrupts */
+	isp1362_hcd->intenb = OHCI_INTR_MIE | OHCI_INTR_RHSC | OHCI_INTR_UE;
+	isp1362_hcd->intenb |= OHCI_INTR_RD;
+	isp1362_hcd->irqenb = HCuPINT_OPR | HCuPINT_SUSP;
+	isp1362_write_reg32(isp1362_hcd, HCINTENB, isp1362_hcd->intenb);
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, isp1362_hcd->irqenb);
+
+	/* Go operational */
+	isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+	/* enable global power */
+	isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_LPSC | RH_HS_DRWE);
+
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static struct hc_driver isp1362_hc_driver = {
+	.description =		hcd_name,
+	.product_desc =		"ISP1362 Host Controller",
+	.hcd_priv_size =	sizeof(struct isp1362_hcd),
+
+	.irq =			isp1362_irq,
+	.flags =		HCD_USB11 | HCD_MEMORY,
+
+	.reset =		isp1362_hc_reset,
+	.start =		isp1362_hc_start,
+	.stop =			isp1362_hc_stop,
+
+	.urb_enqueue =		isp1362_urb_enqueue,
+	.urb_dequeue =		isp1362_urb_dequeue,
+	.endpoint_disable =	isp1362_endpoint_disable,
+
+	.get_frame_number =	isp1362_get_frame,
+
+	.hub_status_data =	isp1362_hub_status_data,
+	.hub_control =		isp1362_hub_control,
+	.bus_suspend =		isp1362_bus_suspend,
+	.bus_resume =		isp1362_bus_resume,
+};
+
+/*-------------------------------------------------------------------------*/
+
+#define resource_len(r) (((r)->end - (r)->start) + 1)
+
+static int __devexit isp1362_remove(struct platform_device *pdev)
+{
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	struct resource *res;
+
+	remove_debug_file(isp1362_hcd);
+	DBG(0, "%s: Removing HCD\n", __func__);
+	usb_remove_hcd(hcd);
+
+	DBG(0, "%s: Unmapping data_reg @ %08x\n", __func__,
+	    (u32)isp1362_hcd->data_reg);
+	iounmap(isp1362_hcd->data_reg);
+
+	DBG(0, "%s: Unmapping addr_reg @ %08x\n", __func__,
+	    (u32)isp1362_hcd->addr_reg);
+	iounmap(isp1362_hcd->addr_reg);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	DBG(0, "%s: release mem_region: %08lx\n", __func__, (long unsigned int)res->start);
+	if (res)
+		release_mem_region(res->start, resource_len(res));
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	DBG(0, "%s: release mem_region: %08lx\n", __func__, (long unsigned int)res->start);
+	if (res)
+		release_mem_region(res->start, resource_len(res));
+
+	DBG(0, "%s: put_hcd\n", __func__);
+	usb_put_hcd(hcd);
+	DBG(0, "%s: Done\n", __func__);
+
+	return 0;
+}
+
+static int __init isp1362_probe(struct platform_device *pdev)
+{
+	struct usb_hcd *hcd;
+	struct isp1362_hcd *isp1362_hcd;
+	struct resource *addr, *data;
+	void __iomem *addr_reg;
+	void __iomem *data_reg;
+	int irq;
+	int retval = 0;
+
+	/* basic sanity checks first.  board-specific init logic should
+	 * have initialized this the three resources and probably board
+	 * specific platform_data.  we don't probe for IRQs, and do only
+	 * minimal sanity checking.
+	 */
+	if (pdev->num_resources < 3) {
+		retval = -ENODEV;
+		goto err1;
+	}
+
+	data = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	addr = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	irq = platform_get_irq(pdev, 0);
+	if (!addr || !data || irq < 0) {
+		retval = -ENODEV;
+		goto err1;
+	}
+
+#ifdef CONFIG_USB_HCD_DMA
+	if (pdev->dev.dma_mask) {
+		struct resource *dma_res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+
+		if (!dma_res) {
+			retval = -ENODEV;
+			goto err1;
+		}
+		isp1362_hcd->data_dma = dma_res->start;
+		isp1362_hcd->max_dma_size = resource_len(dma_res);
+	}
+#else
+	if (pdev->dev.dma_mask) {
+		DBG(1, "won't do DMA");
+		retval = -ENODEV;
+		goto err1;
+	}
+#endif
+
+	if (!request_mem_region(addr->start, resource_len(addr), hcd_name)) {
+		retval = -EBUSY;
+		goto err1;
+	}
+	addr_reg = ioremap(addr->start, resource_len(addr));
+	if (addr_reg == NULL) {
+		retval = -ENOMEM;
+		goto err2;
+	}
+
+	if (!request_mem_region(data->start, resource_len(data), hcd_name)) {
+		retval = -EBUSY;
+		goto err3;
+	}
+	data_reg = ioremap(data->start, resource_len(data));
+	if (data_reg == NULL) {
+		retval = -ENOMEM;
+		goto err4;
+	}
+
+	/* allocate and initialize hcd */
+	hcd = usb_create_hcd(&isp1362_hc_driver, &pdev->dev, dev_name(&pdev->dev));
+	if (!hcd) {
+		retval = -ENOMEM;
+		goto err5;
+	}
+	hcd->rsrc_start = data->start;
+	isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	isp1362_hcd->data_reg = data_reg;
+	isp1362_hcd->addr_reg = addr_reg;
+
+	isp1362_hcd->next_statechange = jiffies;
+	spin_lock_init(&isp1362_hcd->lock);
+	INIT_LIST_HEAD(&isp1362_hcd->async);
+	INIT_LIST_HEAD(&isp1362_hcd->periodic);
+	INIT_LIST_HEAD(&isp1362_hcd->isoc);
+	INIT_LIST_HEAD(&isp1362_hcd->remove_list);
+	isp1362_hcd->board = pdev->dev.platform_data;
+#if USE_PLATFORM_DELAY
+	if (!isp1362_hcd->board->delay) {
+		dev_err(hcd->self.controller, "No platform delay function given\n");
+		retval = -ENODEV;
+		goto err6;
+	}
+#endif
+
+#ifdef CONFIG_ARM
+	if (isp1362_hcd->board)
+		set_irq_type(irq, isp1362_hcd->board->int_act_high ? IRQT_RISING : IRQT_FALLING);
+#endif
+
+	retval = usb_add_hcd(hcd, irq, IRQF_TRIGGER_LOW | IRQF_DISABLED | IRQF_SHARED);
+	if (retval != 0)
+		goto err6;
+	pr_info("%s, irq %d\n", hcd->product_desc, irq);
+
+	create_debug_file(isp1362_hcd);
+
+	return 0;
+
+ err6:
+	DBG(0, "%s: Freeing dev %08x\n", __func__, (u32)isp1362_hcd);
+	usb_put_hcd(hcd);
+ err5:
+	DBG(0, "%s: Unmapping data_reg @ %08x\n", __func__, (u32)data_reg);
+	iounmap(data_reg);
+ err4:
+	DBG(0, "%s: Releasing mem region %08lx\n", __func__, (long unsigned int)data->start);
+	release_mem_region(data->start, resource_len(data));
+ err3:
+	DBG(0, "%s: Unmapping addr_reg @ %08x\n", __func__, (u32)addr_reg);
+	iounmap(addr_reg);
+ err2:
+	DBG(0, "%s: Releasing mem region %08lx\n", __func__, (long unsigned int)addr->start);
+	release_mem_region(addr->start, resource_len(addr));
+ err1:
+	pr_err("%s: init error, %d\n", __func__, retval);
+
+	return retval;
+}
+
+#ifdef	CONFIG_PM
+static int isp1362_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+	int retval = 0;
+
+	DBG(0, "%s: Suspending device\n", __func__);
+
+	if (state.event == PM_EVENT_FREEZE) {
+		DBG(0, "%s: Suspending root hub\n", __func__);
+		retval = isp1362_bus_suspend(hcd);
+	} else {
+		DBG(0, "%s: Suspending RH ports\n", __func__);
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_LPS);
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	}
+	if (retval == 0)
+		pdev->dev.power.power_state = state;
+	return retval;
+}
+
+static int isp1362_resume(struct platform_device *pdev)
+{
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+
+	DBG(0, "%s: Resuming\n", __func__);
+
+	if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) {
+		DBG(0, "%s: Resume RH ports\n", __func__);
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_LPSC);
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		return 0;
+	}
+
+	pdev->dev.power.power_state = PMSG_ON;
+
+	return isp1362_bus_resume(isp1362_hcd_to_hcd(isp1362_hcd));
+}
+#else
+#define	isp1362_suspend	NULL
+#define	isp1362_resume	NULL
+#endif
+
+static struct platform_driver isp1362_driver = {
+	.probe = isp1362_probe,
+	.remove = __devexit_p(isp1362_remove),
+
+	.suspend = isp1362_suspend,
+	.resume = isp1362_resume,
+	.driver = {
+		.name = (char *)hcd_name,
+		.owner = THIS_MODULE,
+	},
+};
+
+/*-------------------------------------------------------------------------*/
+
+static int __init isp1362_init(void)
+{
+	if (usb_disabled())
+		return -ENODEV;
+	pr_info("driver %s, %s\n", hcd_name, DRIVER_VERSION);
+	return platform_driver_register(&isp1362_driver);
+}
+module_init(isp1362_init);
+
+static void __exit isp1362_cleanup(void)
+{
+	platform_driver_unregister(&isp1362_driver);
+}
+module_exit(isp1362_cleanup);
diff --git a/drivers/usb/host/isp1362.h b/drivers/usb/host/isp1362.h
new file mode 100644
index 000000000000..26e44fc8776f
--- /dev/null
+++ b/drivers/usb/host/isp1362.h
@@ -0,0 +1,1079 @@
+/*
+ * ISP1362 HCD (Host Controller Driver) for USB.
+ *
+ * COPYRIGHT (C) by L. Wassmann <LW@KARO-electronics.de>
+ */
+
+/* ------------------------------------------------------------------------- */
+/*
+ * Platform specific compile time options
+ */
+#if defined(CONFIG_ARCH_KARO)
+#include <asm/arch/hardware.h>
+#include <asm/arch/pxa-regs.h>
+#include <asm/arch/karo.h>
+
+#define USE_32BIT		1
+
+
+/* These options are mutually eclusive */
+#define USE_PLATFORM_DELAY	1
+#define USE_NDELAY		0
+/*
+ * MAX_ROOT_PORTS: Number of downstream ports
+ *
+ * The chip has two USB ports, one of which can be configured as
+ * an USB device port, so the value of this constant is implementation
+ * specific.
+ */
+#define MAX_ROOT_PORTS		2
+#define DUMMY_DELAY_ACCESS do {} while (0)
+
+/* insert platform specific definitions for other machines here */
+#elif defined(CONFIG_BLACKFIN)
+
+#include <linux/io.h>
+#define USE_32BIT		0
+#define MAX_ROOT_PORTS		2
+#define USE_PLATFORM_DELAY	0
+#define USE_NDELAY		1
+
+#define DUMMY_DELAY_ACCESS \
+	do { \
+		bfin_read16(ASYNC_BANK0_BASE); \
+		bfin_read16(ASYNC_BANK0_BASE); \
+		bfin_read16(ASYNC_BANK0_BASE); \
+	} while (0)
+
+#undef insw
+#undef outsw
+
+#define insw  delayed_insw
+#define outsw  delayed_outsw
+
+static inline void delayed_outsw(unsigned int addr, void *buf, int len)
+{
+	unsigned short *bp = (unsigned short *)buf;
+	while (len--) {
+		DUMMY_DELAY_ACCESS;
+		outw(*bp++, addr);
+	}
+}
+
+static inline void delayed_insw(unsigned int addr, void *buf, int len)
+{
+	unsigned short *bp = (unsigned short *)buf;
+	while (len--) {
+		DUMMY_DELAY_ACCESS;
+		*bp++ = inw((void *)addr);
+	}
+}
+
+#else
+
+#define MAX_ROOT_PORTS		2
+
+#define USE_32BIT		0
+
+/* These options are mutually eclusive */
+#define USE_PLATFORM_DELAY	0
+#define USE_NDELAY		0
+
+#define DUMMY_DELAY_ACCESS do {} while (0)
+
+#endif
+
+
+/* ------------------------------------------------------------------------- */
+
+#define USB_RESET_WIDTH			50
+#define MAX_XFER_SIZE			1023
+
+/* Buffer sizes */
+#define ISP1362_BUF_SIZE		4096
+#define ISP1362_ISTL_BUFSIZE		512
+#define ISP1362_INTL_BLKSIZE		64
+#define ISP1362_INTL_BUFFERS		16
+#define ISP1362_ATL_BLKSIZE		64
+
+#define ISP1362_REG_WRITE_OFFSET	0x80
+
+#ifdef ISP1362_DEBUG
+typedef const unsigned int isp1362_reg_t;
+
+#define REG_WIDTH_16			0x000
+#define REG_WIDTH_32			0x100
+#define REG_WIDTH_MASK			0x100
+#define REG_NO_MASK			0x0ff
+
+#define REG_ACCESS_R			0x200
+#define REG_ACCESS_W			0x400
+#define REG_ACCESS_RW			0x600
+#define REG_ACCESS_MASK			0x600
+
+#define ISP1362_REG_NO(r)		((r) & REG_NO_MASK)
+
+#define _BUG_ON(x)	BUG_ON(x)
+#define _WARN_ON(x)	WARN_ON(x)
+
+#define ISP1362_REG(name, addr, width, rw) \
+static isp1362_reg_t ISP1362_REG_##name = ((addr) | (width) | (rw))
+
+#define REG_ACCESS_TEST(r)   BUG_ON(((r) & ISP1362_REG_WRITE_OFFSET) && !((r) & REG_ACCESS_W))
+#define REG_WIDTH_TEST(r, w) BUG_ON(((r) & REG_WIDTH_MASK) != (w))
+#else
+typedef const unsigned char isp1362_reg_t;
+#define ISP1362_REG_NO(r)		(r)
+#define _BUG_ON(x)			do {} while (0)
+#define _WARN_ON(x)			do {} while (0)
+
+#define ISP1362_REG(name, addr, width, rw) \
+static isp1362_reg_t ISP1362_REG_##name = addr
+
+#define REG_ACCESS_TEST(r)		do {} while (0)
+#define REG_WIDTH_TEST(r, w)		do {} while (0)
+#endif
+
+/* OHCI compatible registers */
+/*
+ * Note: Some of the ISP1362 'OHCI' registers implement only
+ * a subset of the bits defined in the OHCI spec.
+ *
+ * Bitmasks for the individual bits of these registers are defined in "ohci.h"
+ */
+ISP1362_REG(HCREVISION,	0x00,	REG_WIDTH_32,	REG_ACCESS_R);
+ISP1362_REG(HCCONTROL,	0x01,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCCMDSTAT,	0x02,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTSTAT,	0x03,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTENB,	0x04,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTDIS,	0x05,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCFMINTVL,	0x0d,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCFMREM,	0x0e,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCFMNUM,	0x0f,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCLSTHRESH,	0x11,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHDESCA,	0x12,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHDESCB,	0x13,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHSTATUS,	0x14,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHPORT1,	0x15,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHPORT2,	0x16,	REG_WIDTH_32,	REG_ACCESS_RW);
+
+/* Philips ISP1362 specific registers */
+ISP1362_REG(HCHWCFG,	0x20,	REG_WIDTH_16,	REG_ACCESS_RW);
+#define HCHWCFG_DISABLE_SUSPEND	(1 << 15)
+#define HCHWCFG_GLOBAL_PWRDOWN	(1 << 14)
+#define HCHWCFG_PULLDOWN_DS1	(1 << 13)
+#define HCHWCFG_PULLDOWN_DS2	(1 << 12)
+#define HCHWCFG_CLKNOTSTOP	(1 << 11)
+#define HCHWCFG_ANALOG_OC	(1 << 10)
+#define HCHWCFG_ONEINT		(1 << 9)
+#define HCHWCFG_DACK_MODE	(1 << 8)
+#define HCHWCFG_ONEDMA		(1 << 7)
+#define HCHWCFG_DACK_POL	(1 << 6)
+#define HCHWCFG_DREQ_POL	(1 << 5)
+#define HCHWCFG_DBWIDTH_MASK	(0x03 << 3)
+#define HCHWCFG_DBWIDTH(n)	(((n) << 3) & HCHWCFG_DBWIDTH_MASK)
+#define HCHWCFG_INT_POL		(1 << 2)
+#define HCHWCFG_INT_TRIGGER	(1 << 1)
+#define HCHWCFG_INT_ENABLE	(1 << 0)
+
+ISP1362_REG(HCDMACFG,	0x21,	REG_WIDTH_16,	REG_ACCESS_RW);
+#define HCDMACFG_CTR_ENABLE	(1 << 7)
+#define HCDMACFG_BURST_LEN_MASK	(0x03 << 5)
+#define HCDMACFG_BURST_LEN(n)	(((n) << 5) & HCDMACFG_BURST_LEN_MASK)
+#define HCDMACFG_BURST_LEN_1	HCDMACFG_BURST_LEN(0)
+#define HCDMACFG_BURST_LEN_4	HCDMACFG_BURST_LEN(1)
+#define HCDMACFG_BURST_LEN_8	HCDMACFG_BURST_LEN(2)
+#define HCDMACFG_DMA_ENABLE	(1 << 4)
+#define HCDMACFG_BUF_TYPE_MASK	(0x07 << 1)
+#define HCDMACFG_BUF_TYPE(n)	(((n) << 1) & HCDMACFG_BUF_TYPE_MASK)
+#define HCDMACFG_BUF_ISTL0	HCDMACFG_BUF_TYPE(0)
+#define HCDMACFG_BUF_ISTL1	HCDMACFG_BUF_TYPE(1)
+#define HCDMACFG_BUF_INTL	HCDMACFG_BUF_TYPE(2)
+#define HCDMACFG_BUF_ATL	HCDMACFG_BUF_TYPE(3)
+#define HCDMACFG_BUF_DIRECT	HCDMACFG_BUF_TYPE(4)
+#define HCDMACFG_DMA_RW_SELECT	(1 << 0)
+
+ISP1362_REG(HCXFERCTR,	0x22,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+ISP1362_REG(HCuPINT,	0x24,	REG_WIDTH_16,	REG_ACCESS_RW);
+#define HCuPINT_SOF		(1 << 0)
+#define HCuPINT_ISTL0		(1 << 1)
+#define HCuPINT_ISTL1		(1 << 2)
+#define HCuPINT_EOT		(1 << 3)
+#define HCuPINT_OPR		(1 << 4)
+#define HCuPINT_SUSP		(1 << 5)
+#define HCuPINT_CLKRDY		(1 << 6)
+#define HCuPINT_INTL		(1 << 7)
+#define HCuPINT_ATL		(1 << 8)
+#define HCuPINT_OTG		(1 << 9)
+
+ISP1362_REG(HCuPINTENB,	0x25,	REG_WIDTH_16,	REG_ACCESS_RW);
+/* same bit definitions apply as for HCuPINT */
+
+ISP1362_REG(HCCHIPID,	0x27,	REG_WIDTH_16,	REG_ACCESS_R);
+#define HCCHIPID_MASK		0xff00
+#define HCCHIPID_MAGIC		0x3600
+
+ISP1362_REG(HCSCRATCH,	0x28,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+ISP1362_REG(HCSWRES,	0x29,	REG_WIDTH_16,	REG_ACCESS_W);
+#define HCSWRES_MAGIC		0x00f6
+
+ISP1362_REG(HCBUFSTAT,	0x2c,	REG_WIDTH_16,	REG_ACCESS_RW);
+#define HCBUFSTAT_ISTL0_FULL	(1 << 0)
+#define HCBUFSTAT_ISTL1_FULL	(1 << 1)
+#define HCBUFSTAT_INTL_ACTIVE	(1 << 2)
+#define HCBUFSTAT_ATL_ACTIVE	(1 << 3)
+#define HCBUFSTAT_RESET_HWPP	(1 << 4)
+#define HCBUFSTAT_ISTL0_ACTIVE	(1 << 5)
+#define HCBUFSTAT_ISTL1_ACTIVE	(1 << 6)
+#define HCBUFSTAT_ISTL0_DONE	(1 << 8)
+#define HCBUFSTAT_ISTL1_DONE	(1 << 9)
+#define HCBUFSTAT_PAIRED_PTDPP	(1 << 10)
+
+ISP1362_REG(HCDIRADDR,	0x32,	REG_WIDTH_32,	REG_ACCESS_RW);
+#define HCDIRADDR_ADDR_MASK	0x0000ffff
+#define HCDIRADDR_ADDR(n)	(((n) << 0) & HCDIRADDR_ADDR_MASK)
+#define HCDIRADDR_COUNT_MASK	0xffff0000
+#define HCDIRADDR_COUNT(n)	(((n) << 16) & HCDIRADDR_COUNT_MASK)
+ISP1362_REG(HCDIRDATA,	0x45,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+ISP1362_REG(HCISTLBUFSZ, 0x30,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCISTL0PORT, 0x40,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCISTL1PORT, 0x42,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCISTLRATE,	0x47,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+ISP1362_REG(HCINTLBUFSZ, 0x33,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLPORT,	0x43,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLBLKSZ, 0x53,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLDONE,	0x17,	REG_WIDTH_32,	REG_ACCESS_R);
+ISP1362_REG(HCINTLSKIP,	0x18,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLLAST,	0x19,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLCURR,	0x1a,	REG_WIDTH_16,	REG_ACCESS_R);
+
+ISP1362_REG(HCATLBUFSZ, 0x34,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCATLPORT,	0x44,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCATLBLKSZ, 0x54,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCATLDONE,	0x1b,	REG_WIDTH_32,	REG_ACCESS_R);
+ISP1362_REG(HCATLSKIP,	0x1c,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCATLLAST,	0x1d,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCATLCURR,	0x1e,	REG_WIDTH_16,	REG_ACCESS_R);
+
+ISP1362_REG(HCATLDTC,	0x51,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCATLDTCTO,	0x52,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+
+ISP1362_REG(OTGCONTROL,	0x62,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(OTGSTATUS,	0x67,	REG_WIDTH_16,	REG_ACCESS_R);
+ISP1362_REG(OTGINT,	0x68,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(OTGINTENB,	0x69,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(OTGTIMER,	0x6A,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(OTGALTTMR,	0x6C,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+/* Philips transfer descriptor, cpu-endian */
+struct ptd {
+	u16 count;
+#define	PTD_COUNT_MSK	(0x3ff << 0)
+#define	PTD_TOGGLE_MSK	(1 << 10)
+#define	PTD_ACTIVE_MSK	(1 << 11)
+#define	PTD_CC_MSK	(0xf << 12)
+	u16 mps;
+#define	PTD_MPS_MSK	(0x3ff << 0)
+#define	PTD_SPD_MSK	(1 << 10)
+#define	PTD_LAST_MSK	(1 << 11)
+#define	PTD_EP_MSK	(0xf << 12)
+	u16 len;
+#define	PTD_LEN_MSK	(0x3ff << 0)
+#define	PTD_DIR_MSK	(3 << 10)
+#define	PTD_DIR_SETUP	(0)
+#define	PTD_DIR_OUT	(1)
+#define	PTD_DIR_IN	(2)
+	u16 faddr;
+#define	PTD_FA_MSK	(0x7f << 0)
+/* PTD Byte 7: [StartingFrame (if ISO PTD) | StartingFrame[0..4], PollingRate[0..2] (if INT PTD)] */
+#define PTD_SF_ISO_MSK	(0xff << 8)
+#define PTD_SF_INT_MSK	(0x1f << 8)
+#define PTD_PR_MSK	(0x07 << 13)
+} __attribute__ ((packed, aligned(2)));
+#define PTD_HEADER_SIZE sizeof(struct ptd)
+
+/* ------------------------------------------------------------------------- */
+/* Copied from ohci.h: */
+/*
+ * Hardware transfer status codes -- CC from PTD
+ */
+#define PTD_CC_NOERROR      0x00
+#define PTD_CC_CRC          0x01
+#define PTD_CC_BITSTUFFING  0x02
+#define PTD_CC_DATATOGGLEM  0x03
+#define PTD_CC_STALL        0x04
+#define PTD_DEVNOTRESP      0x05
+#define PTD_PIDCHECKFAIL    0x06
+#define PTD_UNEXPECTEDPID   0x07
+#define PTD_DATAOVERRUN     0x08
+#define PTD_DATAUNDERRUN    0x09
+    /* 0x0A, 0x0B reserved for hardware */
+#define PTD_BUFFEROVERRUN   0x0C
+#define PTD_BUFFERUNDERRUN  0x0D
+    /* 0x0E, 0x0F reserved for HCD */
+#define PTD_NOTACCESSED     0x0F
+
+
+/* map OHCI TD status codes (CC) to errno values */
+static const int cc_to_error[16] = {
+	/* No  Error  */               0,
+	/* CRC Error  */               -EILSEQ,
+	/* Bit Stuff  */               -EPROTO,
+	/* Data Togg  */               -EILSEQ,
+	/* Stall      */               -EPIPE,
+	/* DevNotResp */               -ETIMEDOUT,
+	/* PIDCheck   */               -EPROTO,
+	/* UnExpPID   */               -EPROTO,
+	/* DataOver   */               -EOVERFLOW,
+	/* DataUnder  */               -EREMOTEIO,
+	/* (for hw)   */               -EIO,
+	/* (for hw)   */               -EIO,
+	/* BufferOver */               -ECOMM,
+	/* BuffUnder  */               -ENOSR,
+	/* (for HCD)  */               -EALREADY,
+	/* (for HCD)  */               -EALREADY
+};
+
+
+/*
+ * HcControl (control) register masks
+ */
+#define OHCI_CTRL_HCFS	(3 << 6)	/* host controller functional state */
+#define OHCI_CTRL_RWC	(1 << 9)	/* remote wakeup connected */
+#define OHCI_CTRL_RWE	(1 << 10)	/* remote wakeup enable */
+
+/* pre-shifted values for HCFS */
+#	define OHCI_USB_RESET	(0 << 6)
+#	define OHCI_USB_RESUME	(1 << 6)
+#	define OHCI_USB_OPER	(2 << 6)
+#	define OHCI_USB_SUSPEND	(3 << 6)
+
+/*
+ * HcCommandStatus (cmdstatus) register masks
+ */
+#define OHCI_HCR	(1 << 0)	/* host controller reset */
+#define OHCI_SOC  	(3 << 16)	/* scheduling overrun count */
+
+/*
+ * masks used with interrupt registers:
+ * HcInterruptStatus (intrstatus)
+ * HcInterruptEnable (intrenable)
+ * HcInterruptDisable (intrdisable)
+ */
+#define OHCI_INTR_SO	(1 << 0)	/* scheduling overrun */
+#define OHCI_INTR_WDH	(1 << 1)	/* writeback of done_head */
+#define OHCI_INTR_SF	(1 << 2)	/* start frame */
+#define OHCI_INTR_RD	(1 << 3)	/* resume detect */
+#define OHCI_INTR_UE	(1 << 4)	/* unrecoverable error */
+#define OHCI_INTR_FNO	(1 << 5)	/* frame number overflow */
+#define OHCI_INTR_RHSC	(1 << 6)	/* root hub status change */
+#define OHCI_INTR_OC	(1 << 30)	/* ownership change */
+#define OHCI_INTR_MIE	(1 << 31)	/* master interrupt enable */
+
+/* roothub.portstatus [i] bits */
+#define RH_PS_CCS            0x00000001   	/* current connect status */
+#define RH_PS_PES            0x00000002   	/* port enable status*/
+#define RH_PS_PSS            0x00000004   	/* port suspend status */
+#define RH_PS_POCI           0x00000008   	/* port over current indicator */
+#define RH_PS_PRS            0x00000010  	/* port reset status */
+#define RH_PS_PPS            0x00000100   	/* port power status */
+#define RH_PS_LSDA           0x00000200    	/* low speed device attached */
+#define RH_PS_CSC            0x00010000 	/* connect status change */
+#define RH_PS_PESC           0x00020000   	/* port enable status change */
+#define RH_PS_PSSC           0x00040000    	/* port suspend status change */
+#define RH_PS_OCIC           0x00080000    	/* over current indicator change */
+#define RH_PS_PRSC           0x00100000   	/* port reset status change */
+
+/* roothub.status bits */
+#define RH_HS_LPS	     0x00000001		/* local power status */
+#define RH_HS_OCI	     0x00000002		/* over current indicator */
+#define RH_HS_DRWE	     0x00008000		/* device remote wakeup enable */
+#define RH_HS_LPSC	     0x00010000		/* local power status change */
+#define RH_HS_OCIC	     0x00020000		/* over current indicator change */
+#define RH_HS_CRWE	     0x80000000		/* clear remote wakeup enable */
+
+/* roothub.b masks */
+#define RH_B_DR		0x0000ffff		/* device removable flags */
+#define RH_B_PPCM	0xffff0000		/* port power control mask */
+
+/* roothub.a masks */
+#define	RH_A_NDP	(0xff << 0)		/* number of downstream ports */
+#define	RH_A_PSM	(1 << 8)		/* power switching mode */
+#define	RH_A_NPS	(1 << 9)		/* no power switching */
+#define	RH_A_DT		(1 << 10)		/* device type (mbz) */
+#define	RH_A_OCPM	(1 << 11)		/* over current protection mode */
+#define	RH_A_NOCP	(1 << 12)		/* no over current protection */
+#define	RH_A_POTPGT	(0xff << 24)		/* power on to power good time */
+
+#define	FI			0x2edf		/* 12000 bits per frame (-1) */
+#define	FSMP(fi) 		(0x7fff & ((6 * ((fi) - 210)) / 7))
+#define LSTHRESH		0x628		/* lowspeed bit threshold */
+
+/* ------------------------------------------------------------------------- */
+
+/* PTD accessor macros. */
+#define PTD_GET_COUNT(p)	(((p)->count & PTD_COUNT_MSK) >> 0)
+#define PTD_COUNT(v)		(((v) << 0) & PTD_COUNT_MSK)
+#define PTD_GET_TOGGLE(p)	(((p)->count & PTD_TOGGLE_MSK) >> 10)
+#define PTD_TOGGLE(v)		(((v) << 10) & PTD_TOGGLE_MSK)
+#define PTD_GET_ACTIVE(p)	(((p)->count & PTD_ACTIVE_MSK) >> 11)
+#define PTD_ACTIVE(v)		(((v) << 11) & PTD_ACTIVE_MSK)
+#define PTD_GET_CC(p)		(((p)->count & PTD_CC_MSK) >> 12)
+#define PTD_CC(v)		(((v) << 12) & PTD_CC_MSK)
+#define PTD_GET_MPS(p)		(((p)->mps & PTD_MPS_MSK) >> 0)
+#define PTD_MPS(v)		(((v) << 0) & PTD_MPS_MSK)
+#define PTD_GET_SPD(p)		(((p)->mps & PTD_SPD_MSK) >> 10)
+#define PTD_SPD(v)		(((v) << 10) & PTD_SPD_MSK)
+#define PTD_GET_LAST(p)		(((p)->mps & PTD_LAST_MSK) >> 11)
+#define PTD_LAST(v)		(((v) << 11) & PTD_LAST_MSK)
+#define PTD_GET_EP(p)		(((p)->mps & PTD_EP_MSK) >> 12)
+#define PTD_EP(v)		(((v) << 12) & PTD_EP_MSK)
+#define PTD_GET_LEN(p)		(((p)->len & PTD_LEN_MSK) >> 0)
+#define PTD_LEN(v)		(((v) << 0) & PTD_LEN_MSK)
+#define PTD_GET_DIR(p)		(((p)->len & PTD_DIR_MSK) >> 10)
+#define PTD_DIR(v)		(((v) << 10) & PTD_DIR_MSK)
+#define PTD_GET_FA(p)		(((p)->faddr & PTD_FA_MSK) >> 0)
+#define PTD_FA(v)		(((v) << 0) & PTD_FA_MSK)
+#define PTD_GET_SF_INT(p)	(((p)->faddr & PTD_SF_INT_MSK) >> 8)
+#define PTD_SF_INT(v)		(((v) << 8) & PTD_SF_INT_MSK)
+#define PTD_GET_SF_ISO(p)	(((p)->faddr & PTD_SF_ISO_MSK) >> 8)
+#define PTD_SF_ISO(v)		(((v) << 8) & PTD_SF_ISO_MSK)
+#define PTD_GET_PR(p)		(((p)->faddr & PTD_PR_MSK) >> 13)
+#define PTD_PR(v)		(((v) << 13) & PTD_PR_MSK)
+
+#define	LOG2_PERIODIC_SIZE	5	/* arbitrary; this matches OHCI */
+#define	PERIODIC_SIZE		(1 << LOG2_PERIODIC_SIZE)
+
+struct isp1362_ep {
+	struct usb_host_endpoint *hep;
+	struct usb_device	*udev;
+
+	/* philips transfer descriptor */
+	struct ptd		ptd;
+
+	u8			maxpacket;
+	u8			epnum;
+	u8			nextpid;
+	u16			error_count;
+	u16			length;		/* of current packet */
+	s16			ptd_offset;	/* buffer offset in ISP1362 where
+						   PTD has been stored
+						   (for access thru HCDIRDATA) */
+	int			ptd_index;
+	int num_ptds;
+	void 			*data;		/* to databuf */
+	/* queue of active EPs (the ones transmitted to the chip) */
+	struct list_head	active;
+
+	/* periodic schedule */
+	u8			branch;
+	u16			interval;
+	u16			load;
+	u16			last_iso;
+
+	/* async schedule */
+	struct list_head	schedule;	/* list of all EPs that need processing */
+	struct list_head	remove_list;
+	int			num_req;
+};
+
+struct isp1362_ep_queue {
+	struct list_head	active;		/* list of PTDs currently processed by HC */
+	atomic_t		finishing;
+	unsigned long		buf_map;
+	unsigned long		skip_map;
+	int			free_ptd;
+	u16			buf_start;
+	u16			buf_size;
+	u16			blk_size;	/* PTD buffer block size for ATL and INTL */
+	u8			buf_count;
+	u8			buf_avail;
+	char			name[16];
+
+	/* for statistical tracking */
+	u8			stat_maxptds;	/* Max # of ptds seen simultaneously in fifo */
+	u8			ptd_count;	/* number of ptds submitted to this queue */
+};
+
+struct isp1362_hcd {
+	spinlock_t		lock;
+	void __iomem		*addr_reg;
+	void __iomem		*data_reg;
+
+	struct isp1362_platform_data *board;
+
+	struct proc_dir_entry	*pde;
+	unsigned long		stat1, stat2, stat4, stat8, stat16;
+
+	/* HC registers */
+	u32			intenb;		/* "OHCI" interrupts */
+	u16			irqenb;		/* uP interrupts */
+
+	/* Root hub registers */
+	u32			rhdesca;
+	u32			rhdescb;
+	u32			rhstatus;
+	u32			rhport[MAX_ROOT_PORTS];
+	unsigned long		next_statechange;
+
+	/* HC control reg shadow copy */
+	u32			hc_control;
+
+	/* async schedule: control, bulk */
+	struct list_head	async;
+
+	/* periodic schedule: int */
+	u16			load[PERIODIC_SIZE];
+	struct list_head	periodic;
+	u16			fmindex;
+
+	/* periodic schedule: isochronous */
+	struct list_head	isoc;
+	int			istl_flip:1;
+	int			irq_active:1;
+
+	/* Schedules for the current frame */
+	struct isp1362_ep_queue atl_queue;
+	struct isp1362_ep_queue intl_queue;
+	struct isp1362_ep_queue istl_queue[2];
+
+	/* list of PTDs retrieved from HC */
+	struct list_head	remove_list;
+	enum {
+		ISP1362_INT_SOF,
+		ISP1362_INT_ISTL0,
+		ISP1362_INT_ISTL1,
+		ISP1362_INT_EOT,
+		ISP1362_INT_OPR,
+		ISP1362_INT_SUSP,
+		ISP1362_INT_CLKRDY,
+		ISP1362_INT_INTL,
+		ISP1362_INT_ATL,
+		ISP1362_INT_OTG,
+		NUM_ISP1362_IRQS
+	} IRQ_NAMES;
+	unsigned int		irq_stat[NUM_ISP1362_IRQS];
+	int			req_serial;
+};
+
+static inline const char *ISP1362_INT_NAME(int n)
+{
+	switch (n) {
+	case ISP1362_INT_SOF:    return "SOF";
+	case ISP1362_INT_ISTL0:  return "ISTL0";
+	case ISP1362_INT_ISTL1:  return "ISTL1";
+	case ISP1362_INT_EOT:    return "EOT";
+	case ISP1362_INT_OPR:    return "OPR";
+	case ISP1362_INT_SUSP:   return "SUSP";
+	case ISP1362_INT_CLKRDY: return "CLKRDY";
+	case ISP1362_INT_INTL:   return "INTL";
+	case ISP1362_INT_ATL:    return "ATL";
+	case ISP1362_INT_OTG:    return "OTG";
+	default:                 return "unknown";
+	}
+}
+
+static inline void ALIGNSTAT(struct isp1362_hcd *isp1362_hcd, void *ptr)
+{
+	unsigned p = (unsigned)ptr;
+	if (!(p & 0xf))
+		isp1362_hcd->stat16++;
+	else if (!(p & 0x7))
+		isp1362_hcd->stat8++;
+	else if (!(p & 0x3))
+		isp1362_hcd->stat4++;
+	else if (!(p & 0x1))
+		isp1362_hcd->stat2++;
+	else
+		isp1362_hcd->stat1++;
+}
+
+static inline struct isp1362_hcd *hcd_to_isp1362_hcd(struct usb_hcd *hcd)
+{
+	return (struct isp1362_hcd *) (hcd->hcd_priv);
+}
+
+static inline struct usb_hcd *isp1362_hcd_to_hcd(struct isp1362_hcd *isp1362_hcd)
+{
+	return container_of((void *)isp1362_hcd, struct usb_hcd, hcd_priv);
+}
+
+#define frame_before(f1, f2)	((s16)((u16)f1 - (u16)f2) < 0)
+
+/*
+ * ISP1362 HW Interface
+ */
+
+#ifdef ISP1362_DEBUG
+#define DBG(level, fmt...) \
+	do { \
+		if (dbg_level > level) \
+			pr_debug(fmt); \
+	} while (0)
+#define _DBG(level, fmt...)	\
+	do { \
+		if (dbg_level > level) \
+			printk(fmt); \
+	} while (0)
+#else
+#define DBG(fmt...)		do {} while (0)
+#define _DBG DBG
+#endif
+
+#ifdef VERBOSE
+#    define VDBG(fmt...)	DBG(3, fmt)
+#else
+#    define VDBG(fmt...)	do {} while (0)
+#endif
+
+#ifdef REGISTERS
+#    define RDBG(fmt...)	DBG(1, fmt)
+#else
+#    define RDBG(fmt...)	do {} while (0)
+#endif
+
+#ifdef URB_TRACE
+#define URB_DBG(fmt...)		DBG(0, fmt)
+#else
+#define URB_DBG(fmt...)		do {} while (0)
+#endif
+
+
+#if USE_PLATFORM_DELAY
+#if USE_NDELAY
+#error USE_PLATFORM_DELAY and USE_NDELAY defined simultaneously.
+#endif
+#define	isp1362_delay(h, d)	(h)->board->delay(isp1362_hcd_to_hcd(h)->self.controller, d)
+#elif USE_NDELAY
+#define	isp1362_delay(h, d)	ndelay(d)
+#else
+#define	isp1362_delay(h, d)	do {} while (0)
+#endif
+
+#define get_urb(ep) ({							\
+	BUG_ON(list_empty(&ep->hep->urb_list));				\
+	container_of(ep->hep->urb_list.next, struct urb, urb_list);	\
+})
+
+/* basic access functions for ISP1362 chip registers */
+/* NOTE: The contents of the address pointer register cannot be read back! The driver must ensure,
+ * that all register accesses are performed with interrupts disabled, since the interrupt
+ * handler has no way of restoring the previous state.
+ */
+static void isp1362_write_addr(struct isp1362_hcd *isp1362_hcd, isp1362_reg_t reg)
+{
+	/*_BUG_ON((reg & ISP1362_REG_WRITE_OFFSET) && !(reg & REG_ACCESS_W));*/
+	REG_ACCESS_TEST(reg);
+	_BUG_ON(!irqs_disabled());
+	DUMMY_DELAY_ACCESS;
+	writew(ISP1362_REG_NO(reg), isp1362_hcd->addr_reg);
+	DUMMY_DELAY_ACCESS;
+	isp1362_delay(isp1362_hcd, 1);
+}
+
+static void isp1362_write_data16(struct isp1362_hcd *isp1362_hcd, u16 val)
+{
+	_BUG_ON(!irqs_disabled());
+	DUMMY_DELAY_ACCESS;
+	writew(val, isp1362_hcd->data_reg);
+}
+
+static u16 isp1362_read_data16(struct isp1362_hcd *isp1362_hcd)
+{
+	u16 val;
+
+	_BUG_ON(!irqs_disabled());
+	DUMMY_DELAY_ACCESS;
+	val = readw(isp1362_hcd->data_reg);
+
+	return val;
+}
+
+static void isp1362_write_data32(struct isp1362_hcd *isp1362_hcd, u32 val)
+{
+	_BUG_ON(!irqs_disabled());
+#if USE_32BIT
+	DUMMY_DELAY_ACCESS;
+	writel(val, isp1362_hcd->data_reg);
+#else
+	DUMMY_DELAY_ACCESS;
+	writew((u16)val, isp1362_hcd->data_reg);
+	DUMMY_DELAY_ACCESS;
+	writew(val >> 16, isp1362_hcd->data_reg);
+#endif
+}
+
+static u32 isp1362_read_data32(struct isp1362_hcd *isp1362_hcd)
+{
+	u32 val;
+
+	_BUG_ON(!irqs_disabled());
+#if USE_32BIT
+	DUMMY_DELAY_ACCESS;
+	val = readl(isp1362_hcd->data_reg);
+#else
+	DUMMY_DELAY_ACCESS;
+	val = (u32)readw(isp1362_hcd->data_reg);
+	DUMMY_DELAY_ACCESS;
+	val |= (u32)readw(isp1362_hcd->data_reg) << 16;
+#endif
+	return val;
+}
+
+/* use readsw/writesw to access the fifo whenever possible */
+/* assume HCDIRDATA or XFERCTR & addr_reg have been set up */
+static void isp1362_read_fifo(struct isp1362_hcd *isp1362_hcd, void *buf, u16 len)
+{
+	u8 *dp = buf;
+	u16 data;
+
+	if (!len)
+		return;
+
+	_BUG_ON(!irqs_disabled());
+
+	RDBG("%s: Reading %d byte from fifo to mem @ %p\n", __func__, len, buf);
+#if USE_32BIT
+	if (len >= 4) {
+		RDBG("%s: Using readsl for %d dwords\n", __func__, len >> 2);
+		readsl(isp1362_hcd->data_reg, dp, len >> 2);
+		dp += len & ~3;
+		len &= 3;
+	}
+#endif
+	if (len >= 2) {
+		RDBG("%s: Using readsw for %d words\n", __func__, len >> 1);
+		insw((unsigned long)isp1362_hcd->data_reg, dp, len >> 1);
+		dp += len & ~1;
+		len &= 1;
+	}
+
+	BUG_ON(len & ~1);
+	if (len > 0) {
+		data = isp1362_read_data16(isp1362_hcd);
+		RDBG("%s: Reading trailing byte %02x to mem @ %08x\n", __func__,
+		     (u8)data, (u32)dp);
+		*dp = (u8)data;
+	}
+}
+
+static void isp1362_write_fifo(struct isp1362_hcd *isp1362_hcd, void *buf, u16 len)
+{
+	u8 *dp = buf;
+	u16 data;
+
+	if (!len)
+		return;
+
+	if ((unsigned)dp & 0x1) {
+		/* not aligned */
+		for (; len > 1; len -= 2) {
+			data = *dp++;
+			data |= *dp++ << 8;
+			isp1362_write_data16(isp1362_hcd, data);
+		}
+		if (len)
+			isp1362_write_data16(isp1362_hcd, *dp);
+		return;
+	}
+
+	_BUG_ON(!irqs_disabled());
+
+	RDBG("%s: Writing %d byte to fifo from memory @%p\n", __func__, len, buf);
+#if USE_32BIT
+	if (len >= 4) {
+		RDBG("%s: Using writesl for %d dwords\n", __func__, len >> 2);
+		writesl(isp1362_hcd->data_reg, dp, len >> 2);
+		dp += len & ~3;
+		len &= 3;
+	}
+#endif
+	if (len >= 2) {
+		RDBG("%s: Using writesw for %d words\n", __func__, len >> 1);
+		outsw((unsigned long)isp1362_hcd->data_reg, dp, len >> 1);
+		dp += len & ~1;
+		len &= 1;
+	}
+
+	BUG_ON(len & ~1);
+	if (len > 0) {
+		/* finally write any trailing byte; we don't need to care
+		 * about the high byte of the last word written
+		 */
+		data = (u16)*dp;
+		RDBG("%s: Sending trailing byte %02x from mem @ %08x\n", __func__,
+			data, (u32)dp);
+		isp1362_write_data16(isp1362_hcd, data);
+	}
+}
+
+#define isp1362_read_reg16(d, r)		({			\
+	u16 __v;							\
+	REG_WIDTH_TEST(ISP1362_REG_##r, REG_WIDTH_16);			\
+	isp1362_write_addr(d, ISP1362_REG_##r);				\
+	__v = isp1362_read_data16(d);					\
+	RDBG("%s: Read %04x from %s[%02x]\n", __func__, __v, #r,	\
+	     ISP1362_REG_NO(ISP1362_REG_##r));				\
+	__v;								\
+})
+
+#define isp1362_read_reg32(d, r)		({			\
+	u32 __v;							\
+	REG_WIDTH_TEST(ISP1362_REG_##r, REG_WIDTH_32);			\
+	isp1362_write_addr(d, ISP1362_REG_##r);				\
+	__v = isp1362_read_data32(d);					\
+	RDBG("%s: Read %08x from %s[%02x]\n", __func__, __v, #r,	\
+	     ISP1362_REG_NO(ISP1362_REG_##r));				\
+	__v;								\
+})
+
+#define isp1362_write_reg16(d, r, v)	{					\
+	REG_WIDTH_TEST(ISP1362_REG_##r, REG_WIDTH_16);				\
+	isp1362_write_addr(d, (ISP1362_REG_##r) | ISP1362_REG_WRITE_OFFSET);	\
+	isp1362_write_data16(d, (u16)(v));					\
+	RDBG("%s: Wrote %04x to %s[%02x]\n", __func__, (u16)(v), #r,	\
+	     ISP1362_REG_NO(ISP1362_REG_##r));					\
+}
+
+#define isp1362_write_reg32(d, r, v)	{					\
+	REG_WIDTH_TEST(ISP1362_REG_##r, REG_WIDTH_32);				\
+	isp1362_write_addr(d, (ISP1362_REG_##r) | ISP1362_REG_WRITE_OFFSET);	\
+	isp1362_write_data32(d, (u32)(v));					\
+	RDBG("%s: Wrote %08x to %s[%02x]\n", __func__, (u32)(v), #r,	\
+	     ISP1362_REG_NO(ISP1362_REG_##r));					\
+}
+
+#define isp1362_set_mask16(d, r, m) {			\
+	u16 __v;					\
+	__v = isp1362_read_reg16(d, r);			\
+	if ((__v | m) != __v)				\
+		isp1362_write_reg16(d, r, __v | m);	\
+}
+
+#define isp1362_clr_mask16(d, r, m) {			\
+	u16 __v;					\
+	__v = isp1362_read_reg16(d, r);			\
+	if ((__v & ~m) != __v)			\
+		isp1362_write_reg16(d, r, __v & ~m);	\
+}
+
+#define isp1362_set_mask32(d, r, m) {			\
+	u32 __v;					\
+	__v = isp1362_read_reg32(d, r);			\
+	if ((__v | m) != __v)				\
+		isp1362_write_reg32(d, r, __v | m);	\
+}
+
+#define isp1362_clr_mask32(d, r, m) {			\
+	u32 __v;					\
+	__v = isp1362_read_reg32(d, r);			\
+	if ((__v & ~m) != __v)			\
+		isp1362_write_reg32(d, r, __v & ~m);	\
+}
+
+#ifdef ISP1362_DEBUG
+#define isp1362_show_reg(d, r) {								\
+	if ((ISP1362_REG_##r & REG_WIDTH_MASK) == REG_WIDTH_32)			\
+		DBG(0, "%-12s[%02x]: %08x\n", #r,					\
+			ISP1362_REG_NO(ISP1362_REG_##r), isp1362_read_reg32(d, r));	\
+	else									\
+		DBG(0, "%-12s[%02x]:     %04x\n", #r,					\
+			ISP1362_REG_NO(ISP1362_REG_##r), isp1362_read_reg16(d, r));	\
+}
+#else
+#define isp1362_show_reg(d, r)	do {} while (0)
+#endif
+
+static void __attribute__((__unused__)) isp1362_show_regs(struct isp1362_hcd *isp1362_hcd)
+{
+	isp1362_show_reg(isp1362_hcd, HCREVISION);
+	isp1362_show_reg(isp1362_hcd, HCCONTROL);
+	isp1362_show_reg(isp1362_hcd, HCCMDSTAT);
+	isp1362_show_reg(isp1362_hcd, HCINTSTAT);
+	isp1362_show_reg(isp1362_hcd, HCINTENB);
+	isp1362_show_reg(isp1362_hcd, HCFMINTVL);
+	isp1362_show_reg(isp1362_hcd, HCFMREM);
+	isp1362_show_reg(isp1362_hcd, HCFMNUM);
+	isp1362_show_reg(isp1362_hcd, HCLSTHRESH);
+	isp1362_show_reg(isp1362_hcd, HCRHDESCA);
+	isp1362_show_reg(isp1362_hcd, HCRHDESCB);
+	isp1362_show_reg(isp1362_hcd, HCRHSTATUS);
+	isp1362_show_reg(isp1362_hcd, HCRHPORT1);
+	isp1362_show_reg(isp1362_hcd, HCRHPORT2);
+
+	isp1362_show_reg(isp1362_hcd, HCHWCFG);
+	isp1362_show_reg(isp1362_hcd, HCDMACFG);
+	isp1362_show_reg(isp1362_hcd, HCXFERCTR);
+	isp1362_show_reg(isp1362_hcd, HCuPINT);
+
+	if (in_interrupt())
+		DBG(0, "%-12s[%02x]:     %04x\n", "HCuPINTENB",
+			 ISP1362_REG_NO(ISP1362_REG_HCuPINTENB), isp1362_hcd->irqenb);
+	else
+		isp1362_show_reg(isp1362_hcd, HCuPINTENB);
+	isp1362_show_reg(isp1362_hcd, HCCHIPID);
+	isp1362_show_reg(isp1362_hcd, HCSCRATCH);
+	isp1362_show_reg(isp1362_hcd, HCBUFSTAT);
+	isp1362_show_reg(isp1362_hcd, HCDIRADDR);
+	/* Access would advance fifo
+	 * isp1362_show_reg(isp1362_hcd, HCDIRDATA);
+	 */
+	isp1362_show_reg(isp1362_hcd, HCISTLBUFSZ);
+	isp1362_show_reg(isp1362_hcd, HCISTLRATE);
+	isp1362_show_reg(isp1362_hcd, HCINTLBUFSZ);
+	isp1362_show_reg(isp1362_hcd, HCINTLBLKSZ);
+	isp1362_show_reg(isp1362_hcd, HCINTLDONE);
+	isp1362_show_reg(isp1362_hcd, HCINTLSKIP);
+	isp1362_show_reg(isp1362_hcd, HCINTLLAST);
+	isp1362_show_reg(isp1362_hcd, HCINTLCURR);
+	isp1362_show_reg(isp1362_hcd, HCATLBUFSZ);
+	isp1362_show_reg(isp1362_hcd, HCATLBLKSZ);
+	/* only valid after ATL_DONE interrupt
+	 * isp1362_show_reg(isp1362_hcd, HCATLDONE);
+	 */
+	isp1362_show_reg(isp1362_hcd, HCATLSKIP);
+	isp1362_show_reg(isp1362_hcd, HCATLLAST);
+	isp1362_show_reg(isp1362_hcd, HCATLCURR);
+	isp1362_show_reg(isp1362_hcd, HCATLDTC);
+	isp1362_show_reg(isp1362_hcd, HCATLDTCTO);
+}
+
+static void isp1362_write_diraddr(struct isp1362_hcd *isp1362_hcd, u16 offset, u16 len)
+{
+	_BUG_ON(offset & 1);
+	_BUG_ON(offset >= ISP1362_BUF_SIZE);
+	_BUG_ON(len > ISP1362_BUF_SIZE);
+	_BUG_ON(offset + len > ISP1362_BUF_SIZE);
+	len = (len + 1) & ~1;
+
+	isp1362_clr_mask16(isp1362_hcd, HCDMACFG, HCDMACFG_CTR_ENABLE);
+	isp1362_write_reg32(isp1362_hcd, HCDIRADDR,
+			    HCDIRADDR_ADDR(offset) | HCDIRADDR_COUNT(len));
+}
+
+static void isp1362_read_buffer(struct isp1362_hcd *isp1362_hcd, void *buf, u16 offset, int len)
+{
+	_BUG_ON(offset & 1);
+
+	isp1362_write_diraddr(isp1362_hcd, offset, len);
+
+	DBG(3, "%s: Reading %d byte from buffer @%04x to memory @ %08x\n", __func__,
+	    len, offset, (u32)buf);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_EOT);
+	_WARN_ON((isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+
+	isp1362_write_addr(isp1362_hcd, ISP1362_REG_HCDIRDATA);
+
+	isp1362_read_fifo(isp1362_hcd, buf, len);
+	_WARN_ON(!(isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_EOT);
+	_WARN_ON((isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+}
+
+static void isp1362_write_buffer(struct isp1362_hcd *isp1362_hcd, void *buf, u16 offset, int len)
+{
+	_BUG_ON(offset & 1);
+
+	isp1362_write_diraddr(isp1362_hcd, offset, len);
+
+	DBG(3, "%s: Writing %d byte to buffer @%04x from memory @ %08x\n", __func__,
+	    len, offset, (u32)buf);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_EOT);
+	_WARN_ON((isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+
+	isp1362_write_addr(isp1362_hcd, ISP1362_REG_HCDIRDATA | ISP1362_REG_WRITE_OFFSET);
+	isp1362_write_fifo(isp1362_hcd, buf, len);
+
+	_WARN_ON(!(isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_EOT);
+	_WARN_ON((isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+}
+
+static void __attribute__((unused)) dump_data(char *buf, int len)
+{
+	if (dbg_level > 0) {
+		int k;
+		int lf = 0;
+
+		for (k = 0; k < len; ++k) {
+			if (!lf)
+				DBG(0, "%04x:", k);
+			printk(" %02x", ((u8 *) buf)[k]);
+			lf = 1;
+			if (!k)
+				continue;
+			if (k % 16 == 15) {
+				printk("\n");
+				lf = 0;
+				continue;
+			}
+			if (k % 8 == 7)
+				printk(" ");
+			if (k % 4 == 3)
+				printk(" ");
+		}
+		if (lf)
+			printk("\n");
+	}
+}
+
+#if defined(ISP1362_DEBUG) && defined(PTD_TRACE)
+
+static void dump_ptd(struct ptd *ptd)
+{
+	DBG(0, "EP %p: CC=%x EP=%d DIR=%x CNT=%d LEN=%d MPS=%d TGL=%x ACT=%x FA=%d SPD=%x SF=%x PR=%x LST=%x\n",
+	    container_of(ptd, struct isp1362_ep, ptd),
+	    PTD_GET_CC(ptd), PTD_GET_EP(ptd), PTD_GET_DIR(ptd),
+	    PTD_GET_COUNT(ptd), PTD_GET_LEN(ptd), PTD_GET_MPS(ptd),
+	    PTD_GET_TOGGLE(ptd), PTD_GET_ACTIVE(ptd), PTD_GET_FA(ptd),
+	    PTD_GET_SPD(ptd), PTD_GET_SF_INT(ptd), PTD_GET_PR(ptd), PTD_GET_LAST(ptd));
+	DBG(0, "  %04x %04x %04x %04x\n", ptd->count, ptd->mps, ptd->len, ptd->faddr);
+}
+
+static void dump_ptd_out_data(struct ptd *ptd, u8 *buf)
+{
+	if (dbg_level > 0) {
+		if (PTD_GET_DIR(ptd) != PTD_DIR_IN && PTD_GET_LEN(ptd)) {
+			DBG(0, "--out->\n");
+			dump_data(buf, PTD_GET_LEN(ptd));
+		}
+	}
+}
+
+static void dump_ptd_in_data(struct ptd *ptd, u8 *buf)
+{
+	if (dbg_level > 0) {
+		if (PTD_GET_DIR(ptd) == PTD_DIR_IN && PTD_GET_COUNT(ptd)) {
+			DBG(0, "<--in--\n");
+			dump_data(buf, PTD_GET_COUNT(ptd));
+		}
+		DBG(0, "-----\n");
+	}
+}
+
+static void dump_ptd_queue(struct isp1362_ep_queue *epq)
+{
+	struct isp1362_ep *ep;
+	int dbg = dbg_level;
+
+	dbg_level = 1;
+	list_for_each_entry(ep, &epq->active, active) {
+		dump_ptd(&ep->ptd);
+		dump_data(ep->data, ep->length);
+	}
+	dbg_level = dbg;
+}
+#else
+#define dump_ptd(ptd)			do {} while (0)
+#define dump_ptd_in_data(ptd, buf)	do {} while (0)
+#define dump_ptd_out_data(ptd, buf)	do {} while (0)
+#define dump_ptd_data(ptd, buf)		do {} while (0)
+#define dump_ptd_queue(epq)		do {} while (0)
+#endif
diff --git a/include/linux/usb/isp1362.h b/include/linux/usb/isp1362.h
new file mode 100644
index 000000000000..642684bb9292
--- /dev/null
+++ b/include/linux/usb/isp1362.h
@@ -0,0 +1,46 @@
+/*
+ * board initialization code should put one of these into dev->platform_data
+ * and place the isp1362 onto platform_bus.
+ */
+
+#ifndef __LINUX_USB_ISP1362_H__
+#define __LINUX_USB_ISP1362_H__
+
+struct isp1362_platform_data {
+	/* Enable internal pulldown resistors on downstream ports */
+	unsigned sel15Kres:1;
+	/* Clock cannot be stopped */
+	unsigned clknotstop:1;
+	/* On-chip overcurrent protection */
+	unsigned oc_enable:1;
+	/* INT output polarity */
+	unsigned int_act_high:1;
+	/* INT edge or level triggered */
+	unsigned int_edge_triggered:1;
+	/* DREQ output polarity */
+	unsigned dreq_act_high:1;
+	/* DACK input polarity */
+	unsigned dack_act_high:1;
+	/* chip can be resumed via H_WAKEUP pin */
+	unsigned remote_wakeup_connected:1;
+	/* Switch or not to switch (keep always powered) */
+	unsigned no_power_switching:1;
+	/* Ganged port power switching (0) or individual port power switching (1) */
+	unsigned power_switching_mode:1;
+	/* Given port_power, msec/2 after power on till power good */
+	u8 potpg;
+	/* Hardware reset set/clear */
+	void (*reset) (struct device *dev, int set);
+	/* Clock start/stop */
+	void (*clock) (struct device *dev, int start);
+	/* Inter-io delay (ns). The chip is picky about access timings; it
+	 * expects at least:
+	 * 110ns delay between consecutive accesses to DATA_REG,
+	 * 300ns delay between access to ADDR_REG and DATA_REG (registers)
+	 * 462ns delay between access to ADDR_REG and DATA_REG (buffer memory)
+	 * WE MUST NOT be activated during these intervals (even without CS!)
+	 */
+	void (*delay) (struct device *dev, unsigned int delay);
+};
+
+#endif
-- 
cgit v1.2.3


From c35013087aa9b10e4674b53b7c8f7966de83c194 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 29 Jul 2009 14:23:25 +0300
Subject: USB: audio: guard kernel-only code with __KERNEL__

include/linux/usb/audio.h is exported to userspace,
so part of this file that is for internal kernel
usage need to be guarded with ifdef __KERNEL__.
This way make headers_install will stript it out.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/audio.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index 7b33c493917f..eaf9dffe0a01 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -276,6 +276,8 @@ struct uac_iso_endpoint_descriptor {
 #define UAC_FU_BASS_BOOST	(1 << (UAC_BASS_BOOST_CONTROL - 1))
 #define UAC_FU_LOUDNESS		(1 << (UAC_LOUDNESS_CONTROL - 1))
 
+#ifdef __KERNEL__
+
 struct usb_audio_control {
 	struct list_head list;
 	const char *name;
@@ -294,4 +296,6 @@ struct usb_audio_control_selector {
 	struct usb_descriptor_header *desc;
 };
 
+#endif /* __KERNEL__ */
+
 #endif /* __LINUX_USB_AUDIO_H */
-- 
cgit v1.2.3


From 8e8dce065088833fc418bfa5fbf035cb0726c04c Mon Sep 17 00:00:00 2001
From: David VomLehn <dvomlehn@cisco.com>
Date: Fri, 28 Aug 2009 12:54:27 -0700
Subject: USB: use kfifo to buffer usb-generic serial writes

When do_output_char() attempts to write a carriage return/line feed sequence,
it first checks to see how much buffer room is available. If there are at least
two characters free, it will write the carriage return/line feed with two calls
to tty_put_char(). It calls the tty_operation functions write() for devices that
don't support the tty_operations function put_char(). If the USB generic serial
device's write URB is not in use, it will return the buffer size when asked how
much room is available. The write() of the carriage return will cause it to mark
the write URB busy, so the subsequent write() of the line feed will be ignored.

This patch uses the kfifo infrastructure to implement a write FIFO that
accurately returns the amount of space available in the buffer.

Signed-off-by: David VomLehn <dvomlehn@cisco.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/generic.c    | 206 ++++++++++++++++++++++++----------------
 drivers/usb/serial/usb-serial.c |   7 ++
 include/linux/usb/serial.h      |   2 +
 3 files changed, 134 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index d9398e9f30ce..deba08c7a015 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -19,7 +19,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 #include <linux/uaccess.h>
-
+#include <linux/kfifo.h>
 
 static int debug;
 
@@ -166,24 +166,6 @@ static void generic_cleanup(struct usb_serial_port *port)
 	}
 }
 
-int usb_serial_generic_resume(struct usb_serial *serial)
-{
-	struct usb_serial_port *port;
-	int i, c = 0, r;
-
-	for (i = 0; i < serial->num_ports; i++) {
-		port = serial->port[i];
-		if (port->port.count && port->read_urb) {
-			r = usb_submit_urb(port->read_urb, GFP_NOIO);
-			if (r < 0)
-				c++;
-		}
-	}
-
-	return c ? -EIO : 0;
-}
-EXPORT_SYMBOL_GPL(usb_serial_generic_resume);
-
 void usb_serial_generic_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
@@ -272,12 +254,81 @@ error_no_buffer:
 	return bwrite;
 }
 
+/**
+ * usb_serial_generic_write_start - kick off an URB write
+ * @port:	Pointer to the &struct usb_serial_port data
+ *
+ * Returns the number of bytes queued on success. This will be zero if there
+ * was nothing to send. Otherwise, it returns a negative errno value
+ */
+static int usb_serial_generic_write_start(struct usb_serial_port *port)
+{
+	struct usb_serial *serial = port->serial;
+	unsigned char *data;
+	int result;
+	int count;
+	unsigned long flags;
+	bool start_io;
+
+	/* Atomically determine whether we can and need to start a USB
+	 * operation. */
+	spin_lock_irqsave(&port->lock, flags);
+	if (port->write_urb_busy)
+		start_io = false;
+	else {
+		start_io = (__kfifo_len(port->write_fifo) != 0);
+		port->write_urb_busy = start_io;
+	}
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (!start_io)
+		return 0;
+
+	data = port->write_urb->transfer_buffer;
+	count = kfifo_get(port->write_fifo, data, port->bulk_out_size);
+	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
+
+	/* set up our urb */
+	usb_fill_bulk_urb(port->write_urb, serial->dev,
+			   usb_sndbulkpipe(serial->dev,
+				port->bulk_out_endpointAddress),
+			   port->write_urb->transfer_buffer, count,
+			   ((serial->type->write_bulk_callback) ?
+			     serial->type->write_bulk_callback :
+			     usb_serial_generic_write_bulk_callback),
+			   port);
+
+	/* send the data out the bulk port */
+	result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
+	if (result) {
+		dev_err(&port->dev,
+			"%s - failed submitting write urb, error %d\n",
+						__func__, result);
+		/* don't have to grab the lock here, as we will
+		   retry if != 0 */
+		port->write_urb_busy = 0;
+	} else
+		result = count;
+
+	return result;
+}
+
+/**
+ * usb_serial_generic_write - generic write function for serial USB devices
+ * @tty:	Pointer to &struct tty_struct for the device
+ * @port:	Pointer to the &usb_serial_port structure for the device
+ * @buf:	Pointer to the data to write
+ * @count:	Number of bytes to write
+ *
+ * Returns the number of characters actually written, which may be anything
+ * from zero to @count. If an error occurs, it returns the negative errno
+ * value.
+ */
 int usb_serial_generic_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count)
 {
 	struct usb_serial *serial = port->serial;
 	int result;
-	unsigned char *data;
 
 	dbg("%s - port %d", __func__, port->number);
 
@@ -287,57 +338,20 @@ int usb_serial_generic_write(struct tty_struct *tty,
 	}
 
 	/* only do something if we have a bulk out endpoint */
-	if (serial->num_bulk_out) {
-		unsigned long flags;
-
-		if (serial->type->max_in_flight_urbs)
-			return usb_serial_multi_urb_write(tty, port,
-							  buf, count);
-
-		spin_lock_irqsave(&port->lock, flags);
-		if (port->write_urb_busy) {
-			spin_unlock_irqrestore(&port->lock, flags);
-			dbg("%s - already writing", __func__);
-			return 0;
-		}
-		port->write_urb_busy = 1;
-		spin_unlock_irqrestore(&port->lock, flags);
-
-		count = (count > port->bulk_out_size) ?
-					port->bulk_out_size : count;
-
-		memcpy(port->write_urb->transfer_buffer, buf, count);
-		data = port->write_urb->transfer_buffer;
-		usb_serial_debug_data(debug, &port->dev, __func__, count, data);
+	if (!serial->num_bulk_out)
+		return 0;
 
-		/* set up our urb */
-		usb_fill_bulk_urb(port->write_urb, serial->dev,
-				   usb_sndbulkpipe(serial->dev,
-					port->bulk_out_endpointAddress),
-				   port->write_urb->transfer_buffer, count,
-				   ((serial->type->write_bulk_callback) ?
-				     serial->type->write_bulk_callback :
-				     usb_serial_generic_write_bulk_callback),
-				   port);
+	if (serial->type->max_in_flight_urbs)
+		return usb_serial_multi_urb_write(tty, port,
+						  buf, count);
 
-		/* send the data out the bulk port */
-		port->write_urb_busy = 1;
-		result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
-		if (result) {
-			dev_err(&port->dev,
-				"%s - failed submitting write urb, error %d\n",
-							__func__, result);
-			/* don't have to grab the lock here, as we will
-			   retry if != 0 */
-			port->write_urb_busy = 0;
-		} else
-			result = count;
+	count = kfifo_put(port->write_fifo, buf, count);
+	result = usb_serial_generic_write_start(port);
 
-		return result;
-	}
+	if (result >= 0)
+		result = count;
 
-	/* no bulk out, so return 0 bytes written */
-	return 0;
+	return result;
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_write);
 
@@ -355,9 +369,8 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 			room = port->bulk_out_size *
 				(serial->type->max_in_flight_urbs -
 				 port->urbs_in_flight);
-	} else if (serial->num_bulk_out && !(port->write_urb_busy)) {
-		room = port->bulk_out_size;
-	}
+	} else if (serial->num_bulk_out)
+		room = port->write_fifo->size - __kfifo_len(port->write_fifo);
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, room);
@@ -377,11 +390,8 @@ int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 		spin_lock_irqsave(&port->lock, flags);
 		chars = port->tx_bytes_flight;
 		spin_unlock_irqrestore(&port->lock, flags);
-	} else if (serial->num_bulk_out) {
-		/* FIXME: Locking */
-		if (port->write_urb_busy)
-			chars = port->write_urb->transfer_buffer_length;
-	}
+	} else if (serial->num_bulk_out)
+		chars = kfifo_len(port->write_fifo);
 
 	dbg("%s - returns %d", __func__, chars);
 	return chars;
@@ -485,16 +495,23 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb)
 		if (port->urbs_in_flight < 0)
 			port->urbs_in_flight = 0;
 		spin_unlock_irqrestore(&port->lock, flags);
+
+		if (status) {
+			dbg("%s - nonzero multi-urb write bulk status "
+				"received: %d", __func__, status);
+			return;
+		}
 	} else {
-		/* Handle the case for single urb mode */
 		port->write_urb_busy = 0;
-	}
 
-	if (status) {
-		dbg("%s - nonzero write bulk status received: %d",
-		    __func__, status);
-		return;
+		if (status) {
+			dbg("%s - nonzero multi-urb write bulk status "
+				"received: %d", __func__, status);
+			kfifo_reset(port->write_fifo);
+		} else
+			usb_serial_generic_write_start(port);
 	}
+
 	usb_serial_port_softint(port);
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_write_bulk_callback);
@@ -559,6 +576,33 @@ int usb_serial_handle_break(struct usb_serial_port *port)
 }
 EXPORT_SYMBOL_GPL(usb_serial_handle_break);
 
+int usb_serial_generic_resume(struct usb_serial *serial)
+{
+	struct usb_serial_port *port;
+	int i, c = 0, r;
+
+	for (i = 0; i < serial->num_ports; i++) {
+		port = serial->port[i];
+		if (!port->port.count)
+			continue;
+
+		if (port->read_urb) {
+			r = usb_submit_urb(port->read_urb, GFP_NOIO);
+			if (r < 0)
+				c++;
+		}
+
+		if (port->write_urb) {
+			r = usb_serial_generic_write_start(port);
+			if (r < 0)
+				c++;
+		}
+	}
+
+	return c ? -EIO : 0;
+}
+EXPORT_SYMBOL_GPL(usb_serial_generic_resume);
+
 void usb_serial_generic_disconnect(struct usb_serial *serial)
 {
 	int i;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 45975b4984ea..ff75a3589e7e 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -35,6 +35,7 @@
 #include <linux/serial.h>
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
+#include <linux/kfifo.h>
 #include "pl2303.h"
 
 /*
@@ -625,6 +626,8 @@ static void port_release(struct device *dev)
 	usb_free_urb(port->write_urb);
 	usb_free_urb(port->interrupt_in_urb);
 	usb_free_urb(port->interrupt_out_urb);
+	if (!IS_ERR(port->write_fifo) && port->write_fifo)
+		kfifo_free(port->write_fifo);
 	kfree(port->bulk_in_buffer);
 	kfree(port->bulk_out_buffer);
 	kfree(port->interrupt_in_buffer);
@@ -964,6 +967,10 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
+		port->write_fifo = kfifo_alloc(PAGE_SIZE, GFP_KERNEL,
+			&port->lock);
+		if (IS_ERR(port->write_fifo))
+			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
 		port->bulk_out_endpointAddress = endpoint->bEndpointAddress;
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 7b85e327af91..c17eb64d7213 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -59,6 +59,7 @@ enum port_dev_state {
  * @bulk_out_buffer: pointer to the bulk out buffer for this port.
  * @bulk_out_size: the size of the bulk_out_buffer, in bytes.
  * @write_urb: pointer to the bulk out struct urb for this port.
+ * @write_fifo: kfifo used to buffer outgoing data
  * @write_urb_busy: port`s writing status
  * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this
  *	port.
@@ -96,6 +97,7 @@ struct usb_serial_port {
 	unsigned char		*bulk_out_buffer;
 	int			bulk_out_size;
 	struct urb		*write_urb;
+	struct kfifo		*write_fifo;
 	int			write_urb_busy;
 	__u8			bulk_out_endpointAddress;
 
-- 
cgit v1.2.3


From df6c516900d48df3581b23d37d6516a22ec4f2ca Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:48 -0500
Subject: USB: ehci,dbgp,early_printk: split ehci debug driver from
 early_printk.c

Move the dbgp early printk driver in advance of refactoring and adding
new code, so the changes to this code are tracked separately from the
move of the code.

The drivers/usb/early directory will be the location of the current
and future early usb code for driving usb devices prior initializing
the standard interrupt driven USB drivers.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/kernel/early_printk.c | 715 ----------------------------------------
 drivers/usb/Makefile           |   1 +
 drivers/usb/early/Makefile     |   5 +
 drivers/usb/early/ehci-dbgp.c  | 723 +++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/ehci_def.h   |   6 +
 5 files changed, 735 insertions(+), 715 deletions(-)
 create mode 100644 drivers/usb/early/Makefile
 create mode 100644 drivers/usb/early/ehci-dbgp.c

(limited to 'include')

diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index b11cab3c323a..519a5e10be53 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -160,721 +160,6 @@ static struct console early_serial_console = {
 	.index =	-1,
 };
 
-#ifdef CONFIG_EARLY_PRINTK_DBGP
-
-static struct ehci_caps __iomem *ehci_caps;
-static struct ehci_regs __iomem *ehci_regs;
-static struct ehci_dbg_port __iomem *ehci_debug;
-static unsigned int dbgp_endpoint_out;
-
-struct ehci_dev {
-	u32 bus;
-	u32 slot;
-	u32 func;
-};
-
-static struct ehci_dev ehci_dev;
-
-#define USB_DEBUG_DEVNUM 127
-
-#define DBGP_DATA_TOGGLE	0x8800
-
-static inline u32 dbgp_pid_update(u32 x, u32 tok)
-{
-	return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
-}
-
-static inline u32 dbgp_len_update(u32 x, u32 len)
-{
-	return (x & ~0x0f) | (len & 0x0f);
-}
-
-/*
- * USB Packet IDs (PIDs)
- */
-
-/* token */
-#define USB_PID_OUT		0xe1
-#define USB_PID_IN		0x69
-#define USB_PID_SOF		0xa5
-#define USB_PID_SETUP		0x2d
-/* handshake */
-#define USB_PID_ACK		0xd2
-#define USB_PID_NAK		0x5a
-#define USB_PID_STALL		0x1e
-#define USB_PID_NYET		0x96
-/* data */
-#define USB_PID_DATA0		0xc3
-#define USB_PID_DATA1		0x4b
-#define USB_PID_DATA2		0x87
-#define USB_PID_MDATA		0x0f
-/* Special */
-#define USB_PID_PREAMBLE	0x3c
-#define USB_PID_ERR		0x3c
-#define USB_PID_SPLIT		0x78
-#define USB_PID_PING		0xb4
-#define USB_PID_UNDEF_0		0xf0
-
-#define USB_PID_DATA_TOGGLE	0x88
-#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
-
-#define PCI_CAP_ID_EHCI_DEBUG	0xa
-
-#define HUB_ROOT_RESET_TIME	50	/* times are in msec */
-#define HUB_SHORT_RESET_TIME	10
-#define HUB_LONG_RESET_TIME	200
-#define HUB_RESET_TIMEOUT	500
-
-#define DBGP_MAX_PACKET		8
-
-static int dbgp_wait_until_complete(void)
-{
-	u32 ctrl;
-	int loop = 0x100000;
-
-	do {
-		ctrl = readl(&ehci_debug->control);
-		/* Stop when the transaction is finished */
-		if (ctrl & DBGP_DONE)
-			break;
-	} while (--loop > 0);
-
-	if (!loop)
-		return -1;
-
-	/*
-	 * Now that we have observed the completed transaction,
-	 * clear the done bit.
-	 */
-	writel(ctrl | DBGP_DONE, &ehci_debug->control);
-	return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
-}
-
-static void __init dbgp_mdelay(int ms)
-{
-	int i;
-
-	while (ms--) {
-		for (i = 0; i < 1000; i++)
-			outb(0x1, 0x80);
-	}
-}
-
-static void dbgp_breath(void)
-{
-	/* Sleep to give the debug port a chance to breathe */
-}
-
-static int dbgp_wait_until_done(unsigned ctrl)
-{
-	u32 pids, lpid;
-	int ret;
-	int loop = 3;
-
-retry:
-	writel(ctrl | DBGP_GO, &ehci_debug->control);
-	ret = dbgp_wait_until_complete();
-	pids = readl(&ehci_debug->pids);
-	lpid = DBGP_PID_GET(pids);
-
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * If the port is getting full or it has dropped data
-	 * start pacing ourselves, not necessary but it's friendly.
-	 */
-	if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
-		dbgp_breath();
-
-	/* If I get a NACK reissue the transmission */
-	if (lpid == USB_PID_NAK) {
-		if (--loop > 0)
-			goto retry;
-	}
-
-	return ret;
-}
-
-static void dbgp_set_data(const void *buf, int size)
-{
-	const unsigned char *bytes = buf;
-	u32 lo, hi;
-	int i;
-
-	lo = hi = 0;
-	for (i = 0; i < 4 && i < size; i++)
-		lo |= bytes[i] << (8*i);
-	for (; i < 8 && i < size; i++)
-		hi |= bytes[i] << (8*(i - 4));
-	writel(lo, &ehci_debug->data03);
-	writel(hi, &ehci_debug->data47);
-}
-
-static void __init dbgp_get_data(void *buf, int size)
-{
-	unsigned char *bytes = buf;
-	u32 lo, hi;
-	int i;
-
-	lo = readl(&ehci_debug->data03);
-	hi = readl(&ehci_debug->data47);
-	for (i = 0; i < 4 && i < size; i++)
-		bytes[i] = (lo >> (8*i)) & 0xff;
-	for (; i < 8 && i < size; i++)
-		bytes[i] = (hi >> (8*(i - 4))) & 0xff;
-}
-
-static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
-			 const char *bytes, int size)
-{
-	u32 pids, addr, ctrl;
-	int ret;
-
-	if (size > DBGP_MAX_PACKET)
-		return -1;
-
-	addr = DBGP_EPADDR(devnum, endpoint);
-
-	pids = readl(&ehci_debug->pids);
-	pids = dbgp_pid_update(pids, USB_PID_OUT);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, size);
-	ctrl |= DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	dbgp_set_data(bytes, size);
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	return ret;
-}
-
-static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
-				 int size)
-{
-	u32 pids, addr, ctrl;
-	int ret;
-
-	if (size > DBGP_MAX_PACKET)
-		return -1;
-
-	addr = DBGP_EPADDR(devnum, endpoint);
-
-	pids = readl(&ehci_debug->pids);
-	pids = dbgp_pid_update(pids, USB_PID_IN);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, size);
-	ctrl &= ~DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	if (size > ret)
-		size = ret;
-	dbgp_get_data(data, size);
-	return ret;
-}
-
-static int __init dbgp_control_msg(unsigned devnum, int requesttype,
-	int request, int value, int index, void *data, int size)
-{
-	u32 pids, addr, ctrl;
-	struct usb_ctrlrequest req;
-	int read;
-	int ret;
-
-	read = (requesttype & USB_DIR_IN) != 0;
-	if (size > (read ? DBGP_MAX_PACKET:0))
-		return -1;
-
-	/* Compute the control message */
-	req.bRequestType = requesttype;
-	req.bRequest = request;
-	req.wValue = cpu_to_le16(value);
-	req.wIndex = cpu_to_le16(index);
-	req.wLength = cpu_to_le16(size);
-
-	pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
-	addr = DBGP_EPADDR(devnum, 0);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, sizeof(req));
-	ctrl |= DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	/* Send the setup message */
-	dbgp_set_data(&req, sizeof(req));
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	/* Read the result */
-	return dbgp_bulk_read(devnum, 0, data, size);
-}
-
-
-/* Find a PCI capability */
-static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
-{
-	u8 pos;
-	int bytes;
-
-	if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
-		PCI_STATUS_CAP_LIST))
-		return 0;
-
-	pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
-	for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
-		u8 id;
-
-		pos &= ~3;
-		id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
-		if (id == 0xff)
-			break;
-		if (id == cap)
-			return pos;
-
-		pos = read_pci_config_byte(num, slot, func,
-						 pos+PCI_CAP_LIST_NEXT);
-	}
-	return 0;
-}
-
-static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
-{
-	u32 class;
-
-	class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
-	if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
-		return 0;
-
-	return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
-}
-
-static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
-{
-	u32 bus, slot, func;
-
-	for (bus = 0; bus < 256; bus++) {
-		for (slot = 0; slot < 32; slot++) {
-			for (func = 0; func < 8; func++) {
-				unsigned cap;
-
-				cap = __find_dbgp(bus, slot, func);
-
-				if (!cap)
-					continue;
-				if (ehci_num-- != 0)
-					continue;
-				*rbus = bus;
-				*rslot = slot;
-				*rfunc = func;
-				return cap;
-			}
-		}
-	}
-	return 0;
-}
-
-static int __init ehci_reset_port(int port)
-{
-	u32 portsc;
-	u32 delay_time, delay;
-	int loop;
-
-	/* Reset the usb debug port */
-	portsc = readl(&ehci_regs->port_status[port - 1]);
-	portsc &= ~PORT_PE;
-	portsc |= PORT_RESET;
-	writel(portsc, &ehci_regs->port_status[port - 1]);
-
-	delay = HUB_ROOT_RESET_TIME;
-	for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
-	     delay_time += delay) {
-		dbgp_mdelay(delay);
-
-		portsc = readl(&ehci_regs->port_status[port - 1]);
-		if (portsc & PORT_RESET) {
-			/* force reset to complete */
-			loop = 2;
-			writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
-				&ehci_regs->port_status[port - 1]);
-			do {
-				portsc = readl(&ehci_regs->port_status[port-1]);
-			} while ((portsc & PORT_RESET) && (--loop > 0));
-		}
-
-		/* Device went away? */
-		if (!(portsc & PORT_CONNECT))
-			return -ENOTCONN;
-
-		/* bomb out completely if something weird happend */
-		if ((portsc & PORT_CSC))
-			return -EINVAL;
-
-		/* If we've finished resetting, then break out of the loop */
-		if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
-			return 0;
-	}
-	return -EBUSY;
-}
-
-static int __init ehci_wait_for_port(int port)
-{
-	u32 status;
-	int ret, reps;
-
-	for (reps = 0; reps < 3; reps++) {
-		dbgp_mdelay(100);
-		status = readl(&ehci_regs->status);
-		if (status & STS_PCD) {
-			ret = ehci_reset_port(port);
-			if (ret == 0)
-				return 0;
-		}
-	}
-	return -ENOTCONN;
-}
-
-#ifdef DBGP_DEBUG
-# define dbgp_printk early_printk
-#else
-static inline void dbgp_printk(const char *fmt, ...) { }
-#endif
-
-typedef void (*set_debug_port_t)(int port);
-
-static void __init default_set_debug_port(int port)
-{
-}
-
-static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
-
-static void __init nvidia_set_debug_port(int port)
-{
-	u32 dword;
-	dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
-				 0x74);
-	dword &= ~(0x0f<<12);
-	dword |= ((port & 0x0f)<<12);
-	write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
-				 dword);
-	dbgp_printk("set debug port to %d\n", port);
-}
-
-static void __init detect_set_debug_port(void)
-{
-	u32 vendorid;
-
-	vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
-		 0x00);
-
-	if ((vendorid & 0xffff) == 0x10de) {
-		dbgp_printk("using nvidia set_debug_port\n");
-		set_debug_port = nvidia_set_debug_port;
-	}
-}
-
-static int __init ehci_setup(void)
-{
-	struct usb_debug_descriptor dbgp_desc;
-	u32 cmd, ctrl, status, portsc, hcs_params;
-	u32 debug_port, new_debug_port = 0, n_ports;
-	u32  devnum;
-	int ret, i;
-	int loop;
-	int port_map_tried;
-	int playtimes = 3;
-
-try_next_time:
-	port_map_tried = 0;
-
-try_next_port:
-
-	hcs_params = readl(&ehci_caps->hcs_params);
-	debug_port = HCS_DEBUG_PORT(hcs_params);
-	n_ports    = HCS_N_PORTS(hcs_params);
-
-	dbgp_printk("debug_port: %d\n", debug_port);
-	dbgp_printk("n_ports:    %d\n", n_ports);
-
-	for (i = 1; i <= n_ports; i++) {
-		portsc = readl(&ehci_regs->port_status[i-1]);
-		dbgp_printk("portstatus%d: %08x\n", i, portsc);
-	}
-
-	if (port_map_tried && (new_debug_port != debug_port)) {
-		if (--playtimes) {
-			set_debug_port(new_debug_port);
-			goto try_next_time;
-		}
-		return -1;
-	}
-
-	loop = 100000;
-	/* Reset the EHCI controller */
-	cmd = readl(&ehci_regs->command);
-	cmd |= CMD_RESET;
-	writel(cmd, &ehci_regs->command);
-	do {
-		cmd = readl(&ehci_regs->command);
-	} while ((cmd & CMD_RESET) && (--loop > 0));
-
-	if (!loop) {
-		dbgp_printk("can not reset ehci\n");
-		return -1;
-	}
-	dbgp_printk("ehci reset done\n");
-
-	/* Claim ownership, but do not enable yet */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_OWNER;
-	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
-	writel(ctrl, &ehci_debug->control);
-
-	/* Start the ehci running */
-	cmd = readl(&ehci_regs->command);
-	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
-	cmd |= CMD_RUN;
-	writel(cmd, &ehci_regs->command);
-
-	/* Ensure everything is routed to the EHCI */
-	writel(FLAG_CF, &ehci_regs->configured_flag);
-
-	/* Wait until the controller is no longer halted */
-	loop = 10;
-	do {
-		status = readl(&ehci_regs->status);
-	} while ((status & STS_HALT) && (--loop > 0));
-
-	if (!loop) {
-		dbgp_printk("ehci can be started\n");
-		return -1;
-	}
-	dbgp_printk("ehci started\n");
-
-	/* Wait for a device to show up in the debug port */
-	ret = ehci_wait_for_port(debug_port);
-	if (ret < 0) {
-		dbgp_printk("No device found in debug port\n");
-		goto next_debug_port;
-	}
-	dbgp_printk("ehci wait for port done\n");
-
-	/* Enable the debug port */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_CLAIM;
-	writel(ctrl, &ehci_debug->control);
-	ctrl = readl(&ehci_debug->control);
-	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
-		dbgp_printk("No device in debug port\n");
-		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
-		goto err;
-	}
-	dbgp_printk("debug ported enabled\n");
-
-	/* Completely transfer the debug device to the debug controller */
-	portsc = readl(&ehci_regs->port_status[debug_port - 1]);
-	portsc &= ~PORT_PE;
-	writel(portsc, &ehci_regs->port_status[debug_port - 1]);
-
-	dbgp_mdelay(100);
-
-	/* Find the debug device and make it device number 127 */
-	for (devnum = 0; devnum <= 127; devnum++) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
-			&dbgp_desc, sizeof(dbgp_desc));
-		if (ret > 0)
-			break;
-	}
-	if (devnum > 127) {
-		dbgp_printk("Could not find attached debug device\n");
-		goto err;
-	}
-	if (ret < 0) {
-		dbgp_printk("Attached device is not a debug device\n");
-		goto err;
-	}
-	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
-
-	/* Move the device to 127 if it isn't already there */
-	if (devnum != USB_DEBUG_DEVNUM) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
-		if (ret < 0) {
-			dbgp_printk("Could not move attached device to %d\n",
-				USB_DEBUG_DEVNUM);
-			goto err;
-		}
-		devnum = USB_DEBUG_DEVNUM;
-		dbgp_printk("debug device renamed to 127\n");
-	}
-
-	/* Enable the debug interface */
-	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
-		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
-	if (ret < 0) {
-		dbgp_printk(" Could not enable the debug device\n");
-		goto err;
-	}
-	dbgp_printk("debug interface enabled\n");
-
-	/* Perform a small write to get the even/odd data state in sync
-	 */
-	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
-	if (ret < 0) {
-		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
-		goto err;
-	}
-	dbgp_printk("small write doned\n");
-
-	return 0;
-err:
-	/* Things didn't work so remove my claim */
-	ctrl = readl(&ehci_debug->control);
-	ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
-	writel(ctrl, &ehci_debug->control);
-	return -1;
-
-next_debug_port:
-	port_map_tried |= (1<<(debug_port - 1));
-	new_debug_port = ((debug_port-1+1)%n_ports) + 1;
-	if (port_map_tried != ((1<<n_ports) - 1)) {
-		set_debug_port(new_debug_port);
-		goto try_next_port;
-	}
-	if (--playtimes) {
-		set_debug_port(new_debug_port);
-		goto try_next_time;
-	}
-
-	return -1;
-}
-
-static int __init early_dbgp_init(char *s)
-{
-	u32 debug_port, bar, offset;
-	u32 bus, slot, func, cap;
-	void __iomem *ehci_bar;
-	u32 dbgp_num;
-	u32 bar_val;
-	char *e;
-	int ret;
-	u8 byte;
-
-	if (!early_pci_allowed())
-		return -1;
-
-	dbgp_num = 0;
-	if (*s)
-		dbgp_num = simple_strtoul(s, &e, 10);
-	dbgp_printk("dbgp_num: %d\n", dbgp_num);
-
-	cap = find_dbgp(dbgp_num, &bus, &slot, &func);
-	if (!cap)
-		return -1;
-
-	dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
-			 func);
-
-	debug_port = read_pci_config(bus, slot, func, cap);
-	bar = (debug_port >> 29) & 0x7;
-	bar = (bar * 4) + 0xc;
-	offset = (debug_port >> 16) & 0xfff;
-	dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
-	if (bar != PCI_BASE_ADDRESS_0) {
-		dbgp_printk("only debug ports on bar 1 handled.\n");
-
-		return -1;
-	}
-
-	bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
-	dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
-	if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
-		dbgp_printk("only simple 32bit mmio bars supported\n");
-
-		return -1;
-	}
-
-	/* double check if the mem space is enabled */
-	byte = read_pci_config_byte(bus, slot, func, 0x04);
-	if (!(byte & 0x2)) {
-		byte  |= 0x02;
-		write_pci_config_byte(bus, slot, func, 0x04, byte);
-		dbgp_printk("mmio for ehci enabled\n");
-	}
-
-	/*
-	 * FIXME I don't have the bar size so just guess PAGE_SIZE is more
-	 * than enough.  1K is the biggest I have seen.
-	 */
-	set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
-	ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
-	ehci_bar += bar_val & ~PAGE_MASK;
-	dbgp_printk("ehci_bar: %p\n", ehci_bar);
-
-	ehci_caps  = ehci_bar;
-	ehci_regs  = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
-	ehci_debug = ehci_bar + offset;
-	ehci_dev.bus = bus;
-	ehci_dev.slot = slot;
-	ehci_dev.func = func;
-
-	detect_set_debug_port();
-
-	ret = ehci_setup();
-	if (ret < 0) {
-		dbgp_printk("ehci_setup failed\n");
-		ehci_debug = NULL;
-
-		return -1;
-	}
-
-	return 0;
-}
-
-static void early_dbgp_write(struct console *con, const char *str, u32 n)
-{
-	int chunk, ret;
-
-	if (!ehci_debug)
-		return;
-	while (n > 0) {
-		chunk = n;
-		if (chunk > DBGP_MAX_PACKET)
-			chunk = DBGP_MAX_PACKET;
-		ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
-			dbgp_endpoint_out, str, chunk);
-		str += chunk;
-		n -= chunk;
-	}
-}
-
-static struct console early_dbgp_console = {
-	.name =		"earlydbg",
-	.write =	early_dbgp_write,
-	.flags =	CON_PRINTBUFFER,
-	.index =	-1,
-};
-#endif
-
 /* Direct interface for emergencies */
 static struct console *early_console = &early_vga_console;
 static int __initdata early_console_initialized;
diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index dbe4fb694ad2..be3c9b80bc9f 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_USB_MICROTEK)	+= image/
 obj-$(CONFIG_USB_SERIAL)	+= serial/
 
 obj-$(CONFIG_USB)		+= misc/
+obj-y				+= early/
 
 obj-$(CONFIG_USB_ATM)		+= atm/
 obj-$(CONFIG_USB_SPEEDTOUCH)	+= atm/
diff --git a/drivers/usb/early/Makefile b/drivers/usb/early/Makefile
new file mode 100644
index 000000000000..dfedee8c45b6
--- /dev/null
+++ b/drivers/usb/early/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for early USB devices
+#
+
+obj-$(CONFIG_EARLY_PRINTK_DBGP)        += ehci-dbgp.o
diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
new file mode 100644
index 000000000000..821b7b21c29c
--- /dev/null
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -0,0 +1,723 @@
+#include <linux/console.h>
+#include <linux/errno.h>
+#include <linux/pci_regs.h>
+#include <linux/pci_ids.h>
+#include <linux/usb/ch9.h>
+#include <linux/usb/ehci_def.h>
+#include <linux/delay.h>
+#include <asm/io.h>
+#include <asm/pci-direct.h>
+#include <asm/fixmap.h>
+
+static struct ehci_caps __iomem *ehci_caps;
+static struct ehci_regs __iomem *ehci_regs;
+static struct ehci_dbg_port __iomem *ehci_debug;
+static unsigned int dbgp_endpoint_out;
+
+struct ehci_dev {
+	u32 bus;
+	u32 slot;
+	u32 func;
+};
+
+static struct ehci_dev ehci_dev;
+
+#define USB_DEBUG_DEVNUM 127
+
+#define DBGP_DATA_TOGGLE	0x8800
+
+static inline u32 dbgp_pid_update(u32 x, u32 tok)
+{
+	return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
+}
+
+static inline u32 dbgp_len_update(u32 x, u32 len)
+{
+	return (x & ~0x0f) | (len & 0x0f);
+}
+
+/*
+ * USB Packet IDs (PIDs)
+ */
+
+/* token */
+#define USB_PID_OUT		0xe1
+#define USB_PID_IN		0x69
+#define USB_PID_SOF		0xa5
+#define USB_PID_SETUP		0x2d
+/* handshake */
+#define USB_PID_ACK		0xd2
+#define USB_PID_NAK		0x5a
+#define USB_PID_STALL		0x1e
+#define USB_PID_NYET		0x96
+/* data */
+#define USB_PID_DATA0		0xc3
+#define USB_PID_DATA1		0x4b
+#define USB_PID_DATA2		0x87
+#define USB_PID_MDATA		0x0f
+/* Special */
+#define USB_PID_PREAMBLE	0x3c
+#define USB_PID_ERR		0x3c
+#define USB_PID_SPLIT		0x78
+#define USB_PID_PING		0xb4
+#define USB_PID_UNDEF_0		0xf0
+
+#define USB_PID_DATA_TOGGLE	0x88
+#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
+
+#define PCI_CAP_ID_EHCI_DEBUG	0xa
+
+#define HUB_ROOT_RESET_TIME	50	/* times are in msec */
+#define HUB_SHORT_RESET_TIME	10
+#define HUB_LONG_RESET_TIME	200
+#define HUB_RESET_TIMEOUT	500
+
+#define DBGP_MAX_PACKET		8
+
+static int dbgp_wait_until_complete(void)
+{
+	u32 ctrl;
+	int loop = 0x100000;
+
+	do {
+		ctrl = readl(&ehci_debug->control);
+		/* Stop when the transaction is finished */
+		if (ctrl & DBGP_DONE)
+			break;
+	} while (--loop > 0);
+
+	if (!loop)
+		return -1;
+
+	/*
+	 * Now that we have observed the completed transaction,
+	 * clear the done bit.
+	 */
+	writel(ctrl | DBGP_DONE, &ehci_debug->control);
+	return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
+}
+
+static void __init dbgp_mdelay(int ms)
+{
+	int i;
+
+	while (ms--) {
+		for (i = 0; i < 1000; i++)
+			outb(0x1, 0x80);
+	}
+}
+
+static void dbgp_breath(void)
+{
+	/* Sleep to give the debug port a chance to breathe */
+}
+
+static int dbgp_wait_until_done(unsigned ctrl)
+{
+	u32 pids, lpid;
+	int ret;
+	int loop = 3;
+
+retry:
+	writel(ctrl | DBGP_GO, &ehci_debug->control);
+	ret = dbgp_wait_until_complete();
+	pids = readl(&ehci_debug->pids);
+	lpid = DBGP_PID_GET(pids);
+
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * If the port is getting full or it has dropped data
+	 * start pacing ourselves, not necessary but it's friendly.
+	 */
+	if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
+		dbgp_breath();
+
+	/* If I get a NACK reissue the transmission */
+	if (lpid == USB_PID_NAK) {
+		if (--loop > 0)
+			goto retry;
+	}
+
+	return ret;
+}
+
+static void dbgp_set_data(const void *buf, int size)
+{
+	const unsigned char *bytes = buf;
+	u32 lo, hi;
+	int i;
+
+	lo = hi = 0;
+	for (i = 0; i < 4 && i < size; i++)
+		lo |= bytes[i] << (8*i);
+	for (; i < 8 && i < size; i++)
+		hi |= bytes[i] << (8*(i - 4));
+	writel(lo, &ehci_debug->data03);
+	writel(hi, &ehci_debug->data47);
+}
+
+static void __init dbgp_get_data(void *buf, int size)
+{
+	unsigned char *bytes = buf;
+	u32 lo, hi;
+	int i;
+
+	lo = readl(&ehci_debug->data03);
+	hi = readl(&ehci_debug->data47);
+	for (i = 0; i < 4 && i < size; i++)
+		bytes[i] = (lo >> (8*i)) & 0xff;
+	for (; i < 8 && i < size; i++)
+		bytes[i] = (hi >> (8*(i - 4))) & 0xff;
+}
+
+static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
+			 const char *bytes, int size)
+{
+	u32 pids, addr, ctrl;
+	int ret;
+
+	if (size > DBGP_MAX_PACKET)
+		return -1;
+
+	addr = DBGP_EPADDR(devnum, endpoint);
+
+	pids = readl(&ehci_debug->pids);
+	pids = dbgp_pid_update(pids, USB_PID_OUT);
+
+	ctrl = readl(&ehci_debug->control);
+	ctrl = dbgp_len_update(ctrl, size);
+	ctrl |= DBGP_OUT;
+	ctrl |= DBGP_GO;
+
+	dbgp_set_data(bytes, size);
+	writel(addr, &ehci_debug->address);
+	writel(pids, &ehci_debug->pids);
+
+	ret = dbgp_wait_until_done(ctrl);
+	if (ret < 0)
+		return ret;
+
+	return ret;
+}
+
+static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
+				 int size)
+{
+	u32 pids, addr, ctrl;
+	int ret;
+
+	if (size > DBGP_MAX_PACKET)
+		return -1;
+
+	addr = DBGP_EPADDR(devnum, endpoint);
+
+	pids = readl(&ehci_debug->pids);
+	pids = dbgp_pid_update(pids, USB_PID_IN);
+
+	ctrl = readl(&ehci_debug->control);
+	ctrl = dbgp_len_update(ctrl, size);
+	ctrl &= ~DBGP_OUT;
+	ctrl |= DBGP_GO;
+
+	writel(addr, &ehci_debug->address);
+	writel(pids, &ehci_debug->pids);
+	ret = dbgp_wait_until_done(ctrl);
+	if (ret < 0)
+		return ret;
+
+	if (size > ret)
+		size = ret;
+	dbgp_get_data(data, size);
+	return ret;
+}
+
+static int __init dbgp_control_msg(unsigned devnum, int requesttype,
+	int request, int value, int index, void *data, int size)
+{
+	u32 pids, addr, ctrl;
+	struct usb_ctrlrequest req;
+	int read;
+	int ret;
+
+	read = (requesttype & USB_DIR_IN) != 0;
+	if (size > (read ? DBGP_MAX_PACKET:0))
+		return -1;
+
+	/* Compute the control message */
+	req.bRequestType = requesttype;
+	req.bRequest = request;
+	req.wValue = cpu_to_le16(value);
+	req.wIndex = cpu_to_le16(index);
+	req.wLength = cpu_to_le16(size);
+
+	pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
+	addr = DBGP_EPADDR(devnum, 0);
+
+	ctrl = readl(&ehci_debug->control);
+	ctrl = dbgp_len_update(ctrl, sizeof(req));
+	ctrl |= DBGP_OUT;
+	ctrl |= DBGP_GO;
+
+	/* Send the setup message */
+	dbgp_set_data(&req, sizeof(req));
+	writel(addr, &ehci_debug->address);
+	writel(pids, &ehci_debug->pids);
+	ret = dbgp_wait_until_done(ctrl);
+	if (ret < 0)
+		return ret;
+
+	/* Read the result */
+	return dbgp_bulk_read(devnum, 0, data, size);
+}
+
+
+/* Find a PCI capability */
+static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
+{
+	u8 pos;
+	int bytes;
+
+	if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
+		PCI_STATUS_CAP_LIST))
+		return 0;
+
+	pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
+	for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
+		u8 id;
+
+		pos &= ~3;
+		id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+
+		pos = read_pci_config_byte(num, slot, func,
+						 pos+PCI_CAP_LIST_NEXT);
+	}
+	return 0;
+}
+
+static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
+{
+	u32 class;
+
+	class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
+	if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
+		return 0;
+
+	return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
+}
+
+static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
+{
+	u32 bus, slot, func;
+
+	for (bus = 0; bus < 256; bus++) {
+		for (slot = 0; slot < 32; slot++) {
+			for (func = 0; func < 8; func++) {
+				unsigned cap;
+
+				cap = __find_dbgp(bus, slot, func);
+
+				if (!cap)
+					continue;
+				if (ehci_num-- != 0)
+					continue;
+				*rbus = bus;
+				*rslot = slot;
+				*rfunc = func;
+				return cap;
+			}
+		}
+	}
+	return 0;
+}
+
+static int __init ehci_reset_port(int port)
+{
+	u32 portsc;
+	u32 delay_time, delay;
+	int loop;
+
+	/* Reset the usb debug port */
+	portsc = readl(&ehci_regs->port_status[port - 1]);
+	portsc &= ~PORT_PE;
+	portsc |= PORT_RESET;
+	writel(portsc, &ehci_regs->port_status[port - 1]);
+
+	delay = HUB_ROOT_RESET_TIME;
+	for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
+	     delay_time += delay) {
+		dbgp_mdelay(delay);
+
+		portsc = readl(&ehci_regs->port_status[port - 1]);
+		if (portsc & PORT_RESET) {
+			/* force reset to complete */
+			loop = 2;
+			writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
+				&ehci_regs->port_status[port - 1]);
+			do {
+				portsc = readl(&ehci_regs->port_status[port-1]);
+			} while ((portsc & PORT_RESET) && (--loop > 0));
+		}
+
+		/* Device went away? */
+		if (!(portsc & PORT_CONNECT))
+			return -ENOTCONN;
+
+		/* bomb out completely if something weird happend */
+		if ((portsc & PORT_CSC))
+			return -EINVAL;
+
+		/* If we've finished resetting, then break out of the loop */
+		if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
+			return 0;
+	}
+	return -EBUSY;
+}
+
+static int __init ehci_wait_for_port(int port)
+{
+	u32 status;
+	int ret, reps;
+
+	for (reps = 0; reps < 3; reps++) {
+		dbgp_mdelay(100);
+		status = readl(&ehci_regs->status);
+		if (status & STS_PCD) {
+			ret = ehci_reset_port(port);
+			if (ret == 0)
+				return 0;
+		}
+	}
+	return -ENOTCONN;
+}
+
+#ifdef DBGP_DEBUG
+# define dbgp_printk early_printk
+#else
+static inline void dbgp_printk(const char *fmt, ...) { }
+#endif
+
+typedef void (*set_debug_port_t)(int port);
+
+static void __init default_set_debug_port(int port)
+{
+}
+
+static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
+
+static void __init nvidia_set_debug_port(int port)
+{
+	u32 dword;
+	dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
+				 0x74);
+	dword &= ~(0x0f<<12);
+	dword |= ((port & 0x0f)<<12);
+	write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
+				 dword);
+	dbgp_printk("set debug port to %d\n", port);
+}
+
+static void __init detect_set_debug_port(void)
+{
+	u32 vendorid;
+
+	vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
+		 0x00);
+
+	if ((vendorid & 0xffff) == 0x10de) {
+		dbgp_printk("using nvidia set_debug_port\n");
+		set_debug_port = nvidia_set_debug_port;
+	}
+}
+
+static int __init ehci_setup(void)
+{
+	struct usb_debug_descriptor dbgp_desc;
+	u32 cmd, ctrl, status, portsc, hcs_params;
+	u32 debug_port, new_debug_port = 0, n_ports;
+	u32  devnum;
+	int ret, i;
+	int loop;
+	int port_map_tried;
+	int playtimes = 3;
+
+try_next_time:
+	port_map_tried = 0;
+
+try_next_port:
+
+	hcs_params = readl(&ehci_caps->hcs_params);
+	debug_port = HCS_DEBUG_PORT(hcs_params);
+	n_ports    = HCS_N_PORTS(hcs_params);
+
+	dbgp_printk("debug_port: %d\n", debug_port);
+	dbgp_printk("n_ports:    %d\n", n_ports);
+
+	for (i = 1; i <= n_ports; i++) {
+		portsc = readl(&ehci_regs->port_status[i-1]);
+		dbgp_printk("portstatus%d: %08x\n", i, portsc);
+	}
+
+	if (port_map_tried && (new_debug_port != debug_port)) {
+		if (--playtimes) {
+			set_debug_port(new_debug_port);
+			goto try_next_time;
+		}
+		return -1;
+	}
+
+	loop = 100000;
+	/* Reset the EHCI controller */
+	cmd = readl(&ehci_regs->command);
+	cmd |= CMD_RESET;
+	writel(cmd, &ehci_regs->command);
+	do {
+		cmd = readl(&ehci_regs->command);
+	} while ((cmd & CMD_RESET) && (--loop > 0));
+
+	if (!loop) {
+		dbgp_printk("can not reset ehci\n");
+		return -1;
+	}
+	dbgp_printk("ehci reset done\n");
+
+	/* Claim ownership, but do not enable yet */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_OWNER;
+	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
+	writel(ctrl, &ehci_debug->control);
+
+	/* Start the ehci running */
+	cmd = readl(&ehci_regs->command);
+	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
+	cmd |= CMD_RUN;
+	writel(cmd, &ehci_regs->command);
+
+	/* Ensure everything is routed to the EHCI */
+	writel(FLAG_CF, &ehci_regs->configured_flag);
+
+	/* Wait until the controller is no longer halted */
+	loop = 10;
+	do {
+		status = readl(&ehci_regs->status);
+	} while ((status & STS_HALT) && (--loop > 0));
+
+	if (!loop) {
+		dbgp_printk("ehci can be started\n");
+		return -1;
+	}
+	dbgp_printk("ehci started\n");
+
+	/* Wait for a device to show up in the debug port */
+	ret = ehci_wait_for_port(debug_port);
+	if (ret < 0) {
+		dbgp_printk("No device found in debug port\n");
+		goto next_debug_port;
+	}
+	dbgp_printk("ehci wait for port done\n");
+
+	/* Enable the debug port */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_CLAIM;
+	writel(ctrl, &ehci_debug->control);
+	ctrl = readl(&ehci_debug->control);
+	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
+		dbgp_printk("No device in debug port\n");
+		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
+		goto err;
+	}
+	dbgp_printk("debug ported enabled\n");
+
+	/* Completely transfer the debug device to the debug controller */
+	portsc = readl(&ehci_regs->port_status[debug_port - 1]);
+	portsc &= ~PORT_PE;
+	writel(portsc, &ehci_regs->port_status[debug_port - 1]);
+
+	dbgp_mdelay(100);
+
+	/* Find the debug device and make it device number 127 */
+	for (devnum = 0; devnum <= 127; devnum++) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
+			&dbgp_desc, sizeof(dbgp_desc));
+		if (ret > 0)
+			break;
+	}
+	if (devnum > 127) {
+		dbgp_printk("Could not find attached debug device\n");
+		goto err;
+	}
+	if (ret < 0) {
+		dbgp_printk("Attached device is not a debug device\n");
+		goto err;
+	}
+	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
+
+	/* Move the device to 127 if it isn't already there */
+	if (devnum != USB_DEBUG_DEVNUM) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
+		if (ret < 0) {
+			dbgp_printk("Could not move attached device to %d\n",
+				USB_DEBUG_DEVNUM);
+			goto err;
+		}
+		devnum = USB_DEBUG_DEVNUM;
+		dbgp_printk("debug device renamed to 127\n");
+	}
+
+	/* Enable the debug interface */
+	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
+		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
+	if (ret < 0) {
+		dbgp_printk(" Could not enable the debug device\n");
+		goto err;
+	}
+	dbgp_printk("debug interface enabled\n");
+
+	/* Perform a small write to get the even/odd data state in sync
+	 */
+	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
+	if (ret < 0) {
+		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
+		goto err;
+	}
+	dbgp_printk("small write doned\n");
+
+	return 0;
+err:
+	/* Things didn't work so remove my claim */
+	ctrl = readl(&ehci_debug->control);
+	ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
+	writel(ctrl, &ehci_debug->control);
+	return -1;
+
+next_debug_port:
+	port_map_tried |= (1<<(debug_port - 1));
+	new_debug_port = ((debug_port-1+1)%n_ports) + 1;
+	if (port_map_tried != ((1<<n_ports) - 1)) {
+		set_debug_port(new_debug_port);
+		goto try_next_port;
+	}
+	if (--playtimes) {
+		set_debug_port(new_debug_port);
+		goto try_next_time;
+	}
+
+	return -1;
+}
+
+int __init early_dbgp_init(char *s)
+{
+	u32 debug_port, bar, offset;
+	u32 bus, slot, func, cap;
+	void __iomem *ehci_bar;
+	u32 dbgp_num;
+	u32 bar_val;
+	char *e;
+	int ret;
+	u8 byte;
+
+	if (!early_pci_allowed())
+		return -1;
+
+	dbgp_num = 0;
+	if (*s)
+		dbgp_num = simple_strtoul(s, &e, 10);
+	dbgp_printk("dbgp_num: %d\n", dbgp_num);
+
+	cap = find_dbgp(dbgp_num, &bus, &slot, &func);
+	if (!cap)
+		return -1;
+
+	dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
+			 func);
+
+	debug_port = read_pci_config(bus, slot, func, cap);
+	bar = (debug_port >> 29) & 0x7;
+	bar = (bar * 4) + 0xc;
+	offset = (debug_port >> 16) & 0xfff;
+	dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
+	if (bar != PCI_BASE_ADDRESS_0) {
+		dbgp_printk("only debug ports on bar 1 handled.\n");
+
+		return -1;
+	}
+
+	bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
+	dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
+	if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
+		dbgp_printk("only simple 32bit mmio bars supported\n");
+
+		return -1;
+	}
+
+	/* double check if the mem space is enabled */
+	byte = read_pci_config_byte(bus, slot, func, 0x04);
+	if (!(byte & 0x2)) {
+		byte  |= 0x02;
+		write_pci_config_byte(bus, slot, func, 0x04, byte);
+		dbgp_printk("mmio for ehci enabled\n");
+	}
+
+	/*
+	 * FIXME I don't have the bar size so just guess PAGE_SIZE is more
+	 * than enough.  1K is the biggest I have seen.
+	 */
+	set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
+	ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
+	ehci_bar += bar_val & ~PAGE_MASK;
+	dbgp_printk("ehci_bar: %p\n", ehci_bar);
+
+	ehci_caps  = ehci_bar;
+	ehci_regs  = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
+	ehci_debug = ehci_bar + offset;
+	ehci_dev.bus = bus;
+	ehci_dev.slot = slot;
+	ehci_dev.func = func;
+
+	detect_set_debug_port();
+
+	ret = ehci_setup();
+	if (ret < 0) {
+		dbgp_printk("ehci_setup failed\n");
+		ehci_debug = NULL;
+
+		return -1;
+	}
+
+	return 0;
+}
+
+static void early_dbgp_write(struct console *con, const char *str, u32 n)
+{
+	int chunk, ret;
+
+	if (!ehci_debug)
+		return;
+	while (n > 0) {
+		chunk = n;
+		if (chunk > DBGP_MAX_PACKET)
+			chunk = DBGP_MAX_PACKET;
+		ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
+			dbgp_endpoint_out, str, chunk);
+		str += chunk;
+		n -= chunk;
+	}
+}
+
+struct console early_dbgp_console = {
+	.name =		"earlydbg",
+	.write =	early_dbgp_write,
+	.flags =	CON_PRINTBUFFER,
+	.index =	-1,
+};
+
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 4c4b701ff265..1deac2a7b12e 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -170,4 +170,10 @@ struct ehci_dbg_port {
 #define DBGP_EPADDR(dev, ep)	(((dev)<<8)|(ep))
 } __attribute__ ((packed));
 
+#ifdef CONFIG_EARLY_PRINTK_DBGP
+#include <linux/init.h>
+extern int __init early_dbgp_init(char *s);
+extern struct console early_dbgp_console;
+#endif /* CONFIG_EARLY_PRINTK_DBGP */
+
 #endif /* __LINUX_USB_EHCI_DEF_H */
-- 
cgit v1.2.3


From 917778267fbe67703ab7d5c6f0b7a05d4c3df485 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:53 -0500
Subject: USB: ehci-dbgp: stability improvements and external re-init

This patch implements several changes:

1) Improve the capability to debug the dbgp driver

   The dbgp_ehci_status() was added in a number of places to report
   the critical ehci registers to diagnose the cause of a failure of
   the ehci-dbgp driver.

2) Capability to survive the host controller initialization

   The dbgp_external_startup(), dbgp_not_safe, and dbgp_phys_port were
   added so as to allow the ehci-dbgp to re-initialize after the ehci
   host controller is reset by the standard host controller driver.
   This same routine is common for the early startup or
   re-initialization.

   This resulted in the need to move some of the initialization code
   out of the __init section because the ehci driver has the
   possibility to be loaded later on as a kernel module.

3) Stability improvements for device initialization

   The device enumeration from 0 to 127 has the possibility to fail
   the first time after a warm reset on some older EHCI debug
   controllers.  The enumeration will be tried up to 3 times to
   account for this failure case.

   The dbg_wait_until_complete() was changed to wait up to 250 ms
   before failing which only comes into play during device
   initialization. The maximum delay will never get hit during the
   course of normal operation of the driver, unless the device got
   unplugged or there was a ehci controller failure, in which case the
   dbgp device driver will shut itself down.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: dbrownell@users.sourceforge.net
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/early/ehci-dbgp.c | 442 ++++++++++++++++++++++++++++--------------
 include/linux/usb/ehci_def.h  |   5 +
 2 files changed, 299 insertions(+), 148 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index 6198ebded3a4..06e05ea17871 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -1,5 +1,19 @@
+/*
+ * Standalone EHCI usb debug driver
+ *
+ * Originally written by:
+ *  Eric W. Biederman" <ebiederm@xmission.com> and
+ *  Yinghai Lu <yhlu.kernel@gmail.com>
+ *
+ * Changes for early/late printk and HW errata:
+ *  Jason Wessel <jason.wessel@windriver.com>
+ *  Copyright (C) 2009 Wind River Systems, Inc.
+ *
+ */
+
 #include <linux/console.h>
 #include <linux/errno.h>
+#include <linux/module.h>
 #include <linux/pci_regs.h>
 #include <linux/pci_ids.h>
 #include <linux/usb/ch9.h>
@@ -9,15 +23,37 @@
 #include <asm/pci-direct.h>
 #include <asm/fixmap.h>
 
-#ifdef DBGP_DEBUG
-# define dbgp_printk printk
-#else
-static inline void dbgp_printk(const char *fmt, ...) { }
-#endif
+/* The code here is intended to talk directly to the EHCI debug port
+ * and does not require that you have any kind of USB host controller
+ * drivers or USB device drivers compiled into the kernel.
+ *
+ * If you make a change to anything in here, the following test cases
+ * need to pass where a USB debug device works in the following
+ * configurations.
+ *
+ * 1. boot args:  earlyprintk=dbgp
+ *     o kernel compiled with # CONFIG_USB_EHCI_HCD is not set
+ *     o kernel compiled with CONFIG_USB_EHCI_HCD=y
+ * 2. boot args: earlyprintk=dbgp,keep
+ *     o kernel compiled with # CONFIG_USB_EHCI_HCD is not set
+ *     o kernel compiled with CONFIG_USB_EHCI_HCD=y
+ * 3. boot args: earlyprintk=dbgp console=ttyUSB0
+ *     o kernel has CONFIG_USB_EHCI_HCD=y and
+ *       CONFIG_USB_SERIAL_DEBUG=y
+ * 4. boot args: earlyprintk=vga,dbgp
+ *     o kernel compiled with # CONFIG_USB_EHCI_HCD is not set
+ *     o kernel compiled with CONFIG_USB_EHCI_HCD=y
+ *
+ * For the 4th configuration you can turn on or off the DBGP_DEBUG
+ * such that you can debug the dbgp device's driver code.
+ */
+
+static int dbgp_phys_port = 1;
 
 static struct ehci_caps __iomem *ehci_caps;
 static struct ehci_regs __iomem *ehci_regs;
 static struct ehci_dbg_port __iomem *ehci_debug;
+static int dbgp_not_safe; /* Cannot use debug device during ehci reset */
 static unsigned int dbgp_endpoint_out;
 
 struct ehci_dev {
@@ -32,6 +68,26 @@ static struct ehci_dev ehci_dev;
 
 #define DBGP_DATA_TOGGLE	0x8800
 
+#ifdef DBGP_DEBUG
+#define dbgp_printk printk
+static void dbgp_ehci_status(char *str)
+{
+	if (!ehci_debug)
+		return;
+	dbgp_printk("dbgp: %s\n", str);
+	dbgp_printk("  Debug control: %08x", readl(&ehci_debug->control));
+	dbgp_printk("  ehci cmd     : %08x", readl(&ehci_regs->command));
+	dbgp_printk("  ehci conf flg: %08x\n",
+		    readl(&ehci_regs->configured_flag));
+	dbgp_printk("  ehci status  : %08x", readl(&ehci_regs->status));
+	dbgp_printk("  ehci portsc  : %08x\n",
+		    readl(&ehci_regs->port_status[dbgp_phys_port - 1]));
+}
+#else
+static inline void dbgp_ehci_status(char *str) { }
+static inline void dbgp_printk(const char *fmt, ...) { }
+#endif
+
 static inline u32 dbgp_pid_update(u32 x, u32 tok)
 {
 	return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
@@ -79,21 +135,23 @@ static inline u32 dbgp_len_update(u32 x, u32 len)
 #define HUB_RESET_TIMEOUT	500
 
 #define DBGP_MAX_PACKET		8
+#define DBGP_TIMEOUT		(250 * 1000)
 
 static int dbgp_wait_until_complete(void)
 {
 	u32 ctrl;
-	int loop = 0x100000;
+	int loop = DBGP_TIMEOUT;
 
 	do {
 		ctrl = readl(&ehci_debug->control);
 		/* Stop when the transaction is finished */
 		if (ctrl & DBGP_DONE)
 			break;
+		udelay(1);
 	} while (--loop > 0);
 
 	if (!loop)
-		return -1;
+		return -DBGP_TIMEOUT;
 
 	/*
 	 * Now that we have observed the completed transaction,
@@ -103,7 +161,7 @@ static int dbgp_wait_until_complete(void)
 	return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
 }
 
-static void __init dbgp_mdelay(int ms)
+static inline void dbgp_mdelay(int ms)
 {
 	int i;
 
@@ -130,8 +188,17 @@ retry:
 	pids = readl(&ehci_debug->pids);
 	lpid = DBGP_PID_GET(pids);
 
-	if (ret < 0)
+	if (ret < 0) {
+		/* A -DBGP_TIMEOUT failure here means the device has
+		 * failed, perhaps because it was unplugged, in which
+		 * case we do not want to hang the system so the dbgp
+		 * will be marked as unsafe to use.  EHCI reset is the
+		 * only way to recover if you unplug the dbgp device.
+		 */
+		if (ret == -DBGP_TIMEOUT && !dbgp_not_safe)
+			dbgp_not_safe = 1;
 		return ret;
+	}
 
 	/*
 	 * If the port is getting full or it has dropped data
@@ -149,7 +216,7 @@ retry:
 	return ret;
 }
 
-static void dbgp_set_data(const void *buf, int size)
+static inline void dbgp_set_data(const void *buf, int size)
 {
 	const unsigned char *bytes = buf;
 	u32 lo, hi;
@@ -164,7 +231,7 @@ static void dbgp_set_data(const void *buf, int size)
 	writel(hi, &ehci_debug->data47);
 }
 
-static void __init dbgp_get_data(void *buf, int size)
+static inline void dbgp_get_data(void *buf, int size)
 {
 	unsigned char *bytes = buf;
 	u32 lo, hi;
@@ -208,7 +275,7 @@ static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
 	return ret;
 }
 
-static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
+static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
 				 int size)
 {
 	u32 pids, addr, ctrl;
@@ -239,7 +306,7 @@ static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
 	return ret;
 }
 
-static int __init dbgp_control_msg(unsigned devnum, int requesttype,
+static int dbgp_control_msg(unsigned devnum, int requesttype,
 	int request, int value, int index, void *data, int size)
 {
 	u32 pids, addr, ctrl;
@@ -342,13 +409,179 @@ static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
 	return 0;
 }
 
+static int dbgp_ehci_startup(void)
+{
+	u32 ctrl, cmd, status;
+	int loop;
+
+	/* Claim ownership, but do not enable yet */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_OWNER;
+	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
+	writel(ctrl, &ehci_debug->control);
+	udelay(1);
+
+	dbgp_ehci_status("EHCI startup");
+	/* Start the ehci running */
+	cmd = readl(&ehci_regs->command);
+	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
+	cmd |= CMD_RUN;
+	writel(cmd, &ehci_regs->command);
+
+	/* Ensure everything is routed to the EHCI */
+	writel(FLAG_CF, &ehci_regs->configured_flag);
+
+	/* Wait until the controller is no longer halted */
+	loop = 10;
+	do {
+		status = readl(&ehci_regs->status);
+		if (!(status & STS_HALT))
+			break;
+		udelay(1);
+	} while (--loop > 0);
+
+	if (!loop) {
+		dbgp_printk("ehci can not be started\n");
+		return -ENODEV;
+	}
+	dbgp_printk("ehci started\n");
+	return 0;
+}
+
+static int dbgp_ehci_controller_reset(void)
+{
+	int loop = 250 * 1000;
+	u32 cmd;
+
+	/* Reset the EHCI controller */
+	cmd = readl(&ehci_regs->command);
+	cmd |= CMD_RESET;
+	writel(cmd, &ehci_regs->command);
+	do {
+		cmd = readl(&ehci_regs->command);
+	} while ((cmd & CMD_RESET) && (--loop > 0));
+
+	if (!loop) {
+		dbgp_printk("can not reset ehci\n");
+		return -1;
+	}
+	dbgp_ehci_status("ehci reset done");
+	return 0;
+}
+static int ehci_wait_for_port(int port);
+/* Return 0 on success
+ * Return -ENODEV for any general failure
+ * Return -EIO if wait for port fails
+ */
+int dbgp_external_startup(void)
+{
+	int devnum;
+	struct usb_debug_descriptor dbgp_desc;
+	int ret;
+	u32 ctrl, portsc;
+	int dbg_port = dbgp_phys_port;
+	int tries = 3;
+
+	ret = dbgp_ehci_startup();
+	if (ret)
+		return ret;
+
+	/* Wait for a device to show up in the debug port */
+	ret = ehci_wait_for_port(dbg_port);
+	if (ret < 0) {
+		portsc = readl(&ehci_regs->port_status[dbg_port - 1]);
+		dbgp_printk("No device found in debug port\n");
+		return -EIO;
+	}
+	dbgp_ehci_status("wait for port done");
+
+	/* Enable the debug port */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_CLAIM;
+	writel(ctrl, &ehci_debug->control);
+	ctrl = readl(&ehci_debug->control);
+	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
+		dbgp_printk("No device in debug port\n");
+		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
+		return -ENODEV;
+	}
+	dbgp_ehci_status("debug ported enabled");
+
+	/* Completely transfer the debug device to the debug controller */
+	portsc = readl(&ehci_regs->port_status[dbg_port - 1]);
+	portsc &= ~PORT_PE;
+	writel(portsc, &ehci_regs->port_status[dbg_port - 1]);
+
+	dbgp_mdelay(100);
+
+try_again:
+	/* Find the debug device and make it device number 127 */
+	for (devnum = 0; devnum <= 127; devnum++) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
+			&dbgp_desc, sizeof(dbgp_desc));
+		if (ret > 0)
+			break;
+	}
+	if (devnum > 127) {
+		dbgp_printk("Could not find attached debug device\n");
+		goto err;
+	}
+	if (ret < 0) {
+		dbgp_printk("Attached device is not a debug device\n");
+		goto err;
+	}
+	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
+
+	/* Move the device to 127 if it isn't already there */
+	if (devnum != USB_DEBUG_DEVNUM) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
+		if (ret < 0) {
+			dbgp_printk("Could not move attached device to %d\n",
+				USB_DEBUG_DEVNUM);
+			goto err;
+		}
+		devnum = USB_DEBUG_DEVNUM;
+		dbgp_printk("debug device renamed to 127\n");
+	}
+
+	/* Enable the debug interface */
+	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
+		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
+	if (ret < 0) {
+		dbgp_printk(" Could not enable the debug device\n");
+		goto err;
+	}
+	dbgp_printk("debug interface enabled\n");
+	/* Perform a small write to get the even/odd data state in sync
+	 */
+	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
+	if (ret < 0) {
+		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
+		goto err;
+	}
+	dbgp_printk("small write doned\n");
+	dbgp_not_safe = 0;
+
+	return 0;
+err:
+	if (tries--)
+		goto try_again;
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(dbgp_external_startup);
+
 static int __init ehci_reset_port(int port)
 {
 	u32 portsc;
 	u32 delay_time, delay;
 	int loop;
 
-	dbgp_printk("ehci_reset_port %i\n", port);
+	dbgp_ehci_status("reset port");
 	/* Reset the usb debug port */
 	portsc = readl(&ehci_regs->port_status[port - 1]);
 	portsc &= ~PORT_PE;
@@ -388,7 +621,7 @@ static int __init ehci_reset_port(int port)
 	return -EBUSY;
 }
 
-static int __init ehci_wait_for_port(int port)
+static int ehci_wait_for_port(int port)
 {
 	u32 status;
 	int ret, reps;
@@ -487,12 +720,9 @@ static void __init early_ehci_bios_handoff(void)
 
 static int __init ehci_setup(void)
 {
-	struct usb_debug_descriptor dbgp_desc;
-	u32 cmd, ctrl, status, portsc, hcs_params;
+	u32 ctrl, portsc, hcs_params;
 	u32 debug_port, new_debug_port = 0, n_ports;
-	u32  devnum;
 	int ret, i;
-	int loop;
 	int port_map_tried;
 	int playtimes = 3;
 
@@ -505,10 +735,12 @@ try_next_port:
 
 	hcs_params = readl(&ehci_caps->hcs_params);
 	debug_port = HCS_DEBUG_PORT(hcs_params);
+	dbgp_phys_port = debug_port;
 	n_ports    = HCS_N_PORTS(hcs_params);
 
 	dbgp_printk("debug_port: %d\n", debug_port);
 	dbgp_printk("n_ports:    %d\n", n_ports);
+	dbgp_ehci_status("");
 
 	for (i = 1; i <= n_ports; i++) {
 		portsc = readl(&ehci_regs->port_status[i-1]);
@@ -523,138 +755,27 @@ try_next_port:
 		return -1;
 	}
 
-	loop = 250 * 1000;
-	/* Reset the EHCI controller */
-	cmd = readl(&ehci_regs->command);
-	cmd |= CMD_RESET;
-	writel(cmd, &ehci_regs->command);
-	do {
-		cmd = readl(&ehci_regs->command);
-	} while ((cmd & CMD_RESET) && (--loop > 0));
-
-	if (!loop) {
-		dbgp_printk("can not reset ehci\n");
-		return -1;
-	}
-	dbgp_printk("ehci reset done\n");
-
-	/* Claim ownership, but do not enable yet */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_OWNER;
-	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
-	writel(ctrl, &ehci_debug->control);
-	udelay(1);
-
-	/* Start the ehci running */
-	cmd = readl(&ehci_regs->command);
-	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
-	cmd |= CMD_RUN;
-	writel(cmd, &ehci_regs->command);
-
-	/* Ensure everything is routed to the EHCI */
-	writel(FLAG_CF, &ehci_regs->configured_flag);
-
-	/* Wait until the controller is no longer halted */
-	loop = 10;
-	do {
-		status = readl(&ehci_regs->status);
-		if (!(status & STS_HALT))
-			break;
-		udelay(1);
-	} while (--loop > 0);
-
-	if (!loop) {
-		dbgp_printk("ehci can not be started\n");
-		return -1;
+	/* Only reset the controller if it is not already in the
+	 * configured state */
+	if (!(readl(&ehci_regs->configured_flag) & FLAG_CF)) {
+		if (dbgp_ehci_controller_reset() != 0)
+			return -1;
+	} else {
+		dbgp_ehci_status("ehci skip - already configured");
 	}
-	dbgp_printk("ehci started\n");
 
-	/* Wait for a device to show up in the debug port */
-	ret = ehci_wait_for_port(debug_port);
-	if (ret < 0) {
-		dbgp_printk("No device found in debug port\n");
+	ret = dbgp_external_startup();
+	if (ret == -EIO)
 		goto next_debug_port;
-	}
-	dbgp_printk("ehci wait for port done\n");
-
-	/* Enable the debug port */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_CLAIM;
-	writel(ctrl, &ehci_debug->control);
-	ctrl = readl(&ehci_debug->control);
-	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
-		dbgp_printk("No device in debug port\n");
-		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
-		goto err;
-	}
-	dbgp_printk("debug ported enabled\n");
 
-	/* Completely transfer the debug device to the debug controller */
-	portsc = readl(&ehci_regs->port_status[debug_port - 1]);
-	portsc &= ~PORT_PE;
-	writel(portsc, &ehci_regs->port_status[debug_port - 1]);
-
-	dbgp_mdelay(100);
-
-	/* Find the debug device and make it device number 127 */
-	for (devnum = 0; devnum <= 127; devnum++) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
-			&dbgp_desc, sizeof(dbgp_desc));
-		if (ret > 0)
-			break;
-	}
-	if (devnum > 127) {
-		dbgp_printk("Could not find attached debug device\n");
-		goto err;
-	}
 	if (ret < 0) {
-		dbgp_printk("Attached device is not a debug device\n");
-		goto err;
-	}
-	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
-
-	/* Move the device to 127 if it isn't already there */
-	if (devnum != USB_DEBUG_DEVNUM) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
-		if (ret < 0) {
-			dbgp_printk("Could not move attached device to %d\n",
-				USB_DEBUG_DEVNUM);
-			goto err;
-		}
-		devnum = USB_DEBUG_DEVNUM;
-		dbgp_printk("debug device renamed to 127\n");
-	}
-
-	/* Enable the debug interface */
-	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
-		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
-	if (ret < 0) {
-		dbgp_printk(" Could not enable the debug device\n");
-		goto err;
-	}
-	dbgp_printk("debug interface enabled\n");
-
-	/* Perform a small write to get the even/odd data state in sync
-	 */
-	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
-	if (ret < 0) {
-		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
-		goto err;
+		/* Things didn't work so remove my claim */
+		ctrl = readl(&ehci_debug->control);
+		ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
+		writel(ctrl, &ehci_debug->control);
+		return -1;
 	}
-	dbgp_printk("small write doned\n");
-
 	return 0;
-err:
-	/* Things didn't work so remove my claim */
-	ctrl = readl(&ehci_debug->control);
-	ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
-	writel(ctrl, &ehci_debug->control);
-	return -1;
 
 next_debug_port:
 	port_map_tried |= (1<<(debug_port - 1));
@@ -749,6 +870,7 @@ int __init early_dbgp_init(char *s)
 
 		return -1;
 	}
+	dbgp_ehci_status("early_init_complete");
 
 	return 0;
 }
@@ -758,9 +880,27 @@ static void early_dbgp_write(struct console *con, const char *str, u32 n)
 	int chunk, ret;
 	char buf[DBGP_MAX_PACKET];
 	int use_cr = 0;
+	u32 cmd, ctrl;
+	int reset_run = 0;
 
-	if (!ehci_debug)
+	if (!ehci_debug || dbgp_not_safe)
 		return;
+
+	cmd = readl(&ehci_regs->command);
+	if (unlikely(!(cmd & CMD_RUN))) {
+		/* If the ehci controller is not in the run state do extended
+		 * checks to see if the acpi or some other initialization also
+		 * reset the ehci debug port */
+		ctrl = readl(&ehci_debug->control);
+		if (!(ctrl & DBGP_ENABLED)) {
+			dbgp_not_safe = 1;
+			dbgp_external_startup();
+		} else {
+			cmd |= CMD_RUN;
+			writel(cmd, &ehci_regs->command);
+			reset_run = 1;
+		}
+	}
 	while (n > 0) {
 		for (chunk = 0; chunk < DBGP_MAX_PACKET && n > 0;
 		     str++, chunk++, n--) {
@@ -775,8 +915,15 @@ static void early_dbgp_write(struct console *con, const char *str, u32 n)
 				use_cr = 0;
 			buf[chunk] = *str;
 		}
-		ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
-			dbgp_endpoint_out, buf, chunk);
+		if (chunk > 0) {
+			ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
+				      dbgp_endpoint_out, buf, chunk);
+		}
+	}
+	if (unlikely(reset_run)) {
+		cmd = readl(&ehci_regs->command);
+		cmd &= ~CMD_RUN;
+		writel(cmd, &ehci_regs->command);
 	}
 }
 
@@ -786,4 +933,3 @@ struct console early_dbgp_console = {
 	.flags =	CON_PRINTBUFFER,
 	.index =	-1,
 };
-
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 1deac2a7b12e..07851e05d763 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -176,4 +176,9 @@ extern int __init early_dbgp_init(char *s);
 extern struct console early_dbgp_console;
 #endif /* CONFIG_EARLY_PRINTK_DBGP */
 
+#ifdef CONFIG_EARLY_PRINTK_DBGP
+/* Call backs from ehci host driver to ehci debug driver */
+extern int dbgp_external_startup(void);
+#endif
+
 #endif /* __LINUX_USB_EHCI_DEF_H */
-- 
cgit v1.2.3


From 8d053c79f22462f55c02c8083580730b922cf7b4 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:54 -0500
Subject: USB: ehci-dbgp,ehci: Allow early or late use of the dbgp device

If the EHCI debug port is initialized and in use, the EHCI host
controller driver must follow two rules.

1) If the EHCI host driver issues a controller reset, the debug
   controller driver re-initialization must get called after the reset
   is completed.

2) The EHCI host driver should ignore any requests to the physical
   EHCI debug port when the EHCI debug port is in use.

The code to check for the debug port was moved from ehci_pci_reinit()
to ehci_pci_setup because it must get called prior to ehci_reset()
which will clear the debug port registers.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: dbrownell@users.sourceforge.net
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/early/ehci-dbgp.c | 23 +++++++++++++++++++++++
 drivers/usb/host/ehci-hcd.c   |  8 ++++++++
 drivers/usb/host/ehci-hub.c   | 10 ++++++++++
 drivers/usb/host/ehci-pci.c   | 39 +++++++++++++++++++--------------------
 include/linux/usb/ehci_def.h  | 10 ++++++++++
 5 files changed, 70 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index 06e05ea17871..b88cb65b64e0 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -933,3 +933,26 @@ struct console early_dbgp_console = {
 	.flags =	CON_PRINTBUFFER,
 	.index =	-1,
 };
+
+int dbgp_reset_prep(void)
+{
+	u32 ctrl;
+
+	dbgp_not_safe = 1;
+	if (!ehci_debug)
+		return 0;
+
+	if (early_dbgp_console.index != -1 &&
+		!(early_dbgp_console.flags & CON_BOOT))
+		return 1;
+	/* This means the console is not initialized, or should get
+	 * shutdown so as to allow for reuse of the usb device, which
+	 * means it is time to shutdown the usb debug port. */
+	ctrl = readl(&ehci_debug->control);
+	if (ctrl & DBGP_ENABLED) {
+		ctrl &= ~(DBGP_CLAIM);
+		writel(ctrl, &ehci_debug->control);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dbgp_reset_prep);
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 4f89d7ffd53a..9835e0713943 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -240,6 +240,11 @@ static int ehci_reset (struct ehci_hcd *ehci)
 	int	retval;
 	u32	command = ehci_readl(ehci, &ehci->regs->command);
 
+	/* If the EHCI debug controller is active, special care must be
+	 * taken before and after a host controller reset */
+	if (ehci->debug && !dbgp_reset_prep())
+		ehci->debug = NULL;
+
 	command |= CMD_RESET;
 	dbg_cmd (ehci, "reset", command);
 	ehci_writel(ehci, command, &ehci->regs->command);
@@ -260,6 +265,9 @@ static int ehci_reset (struct ehci_hcd *ehci)
 	if (ehci_is_TDI(ehci))
 		tdi_reset (ehci);
 
+	if (ehci->debug)
+		dbgp_external_startup();
+
 	return retval;
 }
 
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 818647c33da8..6b5e4d18d4bf 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -855,6 +855,15 @@ static int ehci_hub_control (
 	case SetPortFeature:
 		selector = wIndex >> 8;
 		wIndex &= 0xff;
+		if (unlikely(ehci->debug)) {
+			/* If the debug port is active any port
+			 * feature requests should get denied */
+			if (wIndex == HCS_DEBUG_PORT(ehci->hcs_params) &&
+			    (readl(&ehci->debug->control) & DBGP_ENABLED)) {
+				retval = -ENODEV;
+				goto error_exit;
+			}
+		}
 		if (!wIndex || wIndex > ports)
 			goto error;
 		wIndex--;
@@ -951,6 +960,7 @@ error:
 		/* "stall" on error */
 		retval = -EPIPE;
 	}
+error_exit:
 	spin_unlock_irqrestore (&ehci->lock, flags);
 	return retval;
 }
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index a88ad517ec5c..378861b9d79a 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -27,28 +27,8 @@
 /* called after powerup, by probe or system-pm "wakeup" */
 static int ehci_pci_reinit(struct ehci_hcd *ehci, struct pci_dev *pdev)
 {
-	u32			temp;
 	int			retval;
 
-	/* optional debug port, normally in the first BAR */
-	temp = pci_find_capability(pdev, 0x0a);
-	if (temp) {
-		pci_read_config_dword(pdev, temp, &temp);
-		temp >>= 16;
-		if ((temp & (3 << 13)) == (1 << 13)) {
-			temp &= 0x1fff;
-			ehci->debug = ehci_to_hcd(ehci)->regs + temp;
-			temp = ehci_readl(ehci, &ehci->debug->control);
-			ehci_info(ehci, "debug port %d%s\n",
-				HCS_DEBUG_PORT(ehci->hcs_params),
-				(temp & DBGP_ENABLED)
-					? " IN USE"
-					: "");
-			if (!(temp & DBGP_ENABLED))
-				ehci->debug = NULL;
-		}
-	}
-
 	/* we expect static quirk code to handle the "extended capabilities"
 	 * (currently just BIOS handoff) allowed starting with EHCI 0.96
 	 */
@@ -195,6 +175,25 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
 		break;
 	}
 
+	/* optional debug port, normally in the first BAR */
+	temp = pci_find_capability(pdev, 0x0a);
+	if (temp) {
+		pci_read_config_dword(pdev, temp, &temp);
+		temp >>= 16;
+		if ((temp & (3 << 13)) == (1 << 13)) {
+			temp &= 0x1fff;
+			ehci->debug = ehci_to_hcd(ehci)->regs + temp;
+			temp = ehci_readl(ehci, &ehci->debug->control);
+			ehci_info(ehci, "debug port %d%s\n",
+				HCS_DEBUG_PORT(ehci->hcs_params),
+				(temp & DBGP_ENABLED)
+					? " IN USE"
+					: "");
+			if (!(temp & DBGP_ENABLED))
+				ehci->debug = NULL;
+		}
+	}
+
 	ehci_reset(ehci);
 
 	/* at least the Genesys GL880S needs fixup here */
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 07851e05d763..1909d924f816 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -179,6 +179,16 @@ extern struct console early_dbgp_console;
 #ifdef CONFIG_EARLY_PRINTK_DBGP
 /* Call backs from ehci host driver to ehci debug driver */
 extern int dbgp_external_startup(void);
+extern int dbgp_reset_prep(void);
+#else
+static inline int dbgp_reset_prep(void)
+{
+	return 1;
+}
+static inline int dbgp_external_startup(void)
+{
+	return -1;
+}
 #endif
 
 #endif /* __LINUX_USB_EHCI_DEF_H */
-- 
cgit v1.2.3


From aab2d4086a1876fcff282aa36e2d4a92aa9935c9 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:55 -0500
Subject: USB: ehci-dbgp: errata for EHCI debug controller initialization

On some EHCI usb debug controllers, the EHCI debug device will fail to
be seen after a port reset, after a warm reset.  Two options exist to
get the device to initialize correctly.

Option 1 is to unplug and plug in the device.

Option 2 is to use the EHCI port test to get the usb debug device to
start talking again.  At that point the debug controller port reset
will succeed.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
CC: dbrownell@users.sourceforge.net
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/early/ehci-dbgp.c | 23 ++++++++++++++++++++++-
 include/linux/usb/ehci_def.h  |  1 +
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index b88cb65b64e0..f0a41c647bef 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -478,10 +478,13 @@ int dbgp_external_startup(void)
 	int devnum;
 	struct usb_debug_descriptor dbgp_desc;
 	int ret;
-	u32 ctrl, portsc;
+	u32 ctrl, portsc, cmd;
 	int dbg_port = dbgp_phys_port;
 	int tries = 3;
+	int reset_port_tries = 1;
+	int try_hard_once = 1;
 
+try_port_reset_again:
 	ret = dbgp_ehci_startup();
 	if (ret)
 		return ret;
@@ -490,6 +493,24 @@ int dbgp_external_startup(void)
 	ret = ehci_wait_for_port(dbg_port);
 	if (ret < 0) {
 		portsc = readl(&ehci_regs->port_status[dbg_port - 1]);
+		if (!(portsc & PORT_CONNECT) && try_hard_once) {
+			/* Last ditch effort to try to force enable
+			 * the debug device by using the packet test
+			 * ehci command to try and wake it up. */
+			try_hard_once = 0;
+			cmd = readl(&ehci_regs->command);
+			cmd &= ~CMD_RUN;
+			writel(cmd, &ehci_regs->command);
+			portsc = readl(&ehci_regs->port_status[dbg_port - 1]);
+			portsc |= PORT_TEST_PKT;
+			writel(portsc, &ehci_regs->port_status[dbg_port - 1]);
+			dbgp_ehci_status("Trying to force debug port online");
+			mdelay(50);
+			dbgp_ehci_controller_reset();
+			goto try_port_reset_again;
+		} else if (reset_port_tries--) {
+			goto try_port_reset_again;
+		}
 		dbgp_printk("No device found in debug port\n");
 		return -EIO;
 	}
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 1909d924f816..af4b86f3aca3 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -105,6 +105,7 @@ struct ehci_regs {
 #define PORT_WKDISC_E	(1<<21)		/* wake on disconnect (enable) */
 #define PORT_WKCONN_E	(1<<20)		/* wake on connect (enable) */
 /* 19:16 for port testing */
+#define PORT_TEST_PKT	(0x4<<16)	/* Port Test Control - packet test */
 #define PORT_LED_OFF	(0<<14)
 #define PORT_LED_AMBER	(1<<14)
 #define PORT_LED_GREEN	(2<<14)
-- 
cgit v1.2.3


From 01c6460f968d7b57fc6f98adb587952628c6e099 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 1 Sep 2009 11:09:56 -0400
Subject: USB: usbfs: add USBDEVFS_URB_BULK_CONTINUATION flag

This patch (as1283) adds a new flag, USBDEVFS_URB_BULK_CONTINUATION,
to usbfs.  It is intended for userspace libraries such as libusb and
openusb.  When they have to break up a single usbfs bulk transfer into
multiple URBs, they will set the flag on all but the first URB of the
series.

If an error other than an unlink occurs, the kernel will automatically
cancel all the following URBs for the same endpoint and refuse to
accept new submissions, until an URB is encountered that is not marked
as a BULK_CONTINUATION.  Such an URB would indicate the start of a new
transfer or the presence of an older library, so the kernel returns to
normal operation.

This enables libraries to delimit bulk transfers correctly, even in
the presence of early termination as indicated by short packets.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/devio.c     | 78 +++++++++++++++++++++++++++++++++++++++++++-
 include/linux/usbdevice_fs.h |  1 +
 2 files changed, 78 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 71514be8b715..181f78c84105 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -74,6 +74,7 @@ struct dev_state {
 	void __user *disccontext;
 	unsigned long ifclaimed;
 	u32 secid;
+	u32 disabled_bulk_eps;
 };
 
 struct async {
@@ -88,6 +89,8 @@ struct async {
 	struct urb *urb;
 	int status;
 	u32 secid;
+	u8 bulk_addr;
+	u8 bulk_status;
 };
 
 static int usbfs_snoop;
@@ -343,6 +346,43 @@ static void snoop_urb(struct usb_device *udev,
 	}
 }
 
+#define AS_CONTINUATION	1
+#define AS_UNLINK	2
+
+static void cancel_bulk_urbs(struct dev_state *ps, unsigned bulk_addr)
+__releases(ps->lock)
+__acquires(ps->lock)
+{
+	struct async *as;
+
+	/* Mark all the pending URBs that match bulk_addr, up to but not
+	 * including the first one without AS_CONTINUATION.  If such an
+	 * URB is encountered then a new transfer has already started so
+	 * the endpoint doesn't need to be disabled; otherwise it does.
+	 */
+	list_for_each_entry(as, &ps->async_pending, asynclist) {
+		if (as->bulk_addr == bulk_addr) {
+			if (as->bulk_status != AS_CONTINUATION)
+				goto rescan;
+			as->bulk_status = AS_UNLINK;
+			as->bulk_addr = 0;
+		}
+	}
+	ps->disabled_bulk_eps |= (1 << bulk_addr);
+
+	/* Now carefully unlink all the marked pending URBs */
+ rescan:
+	list_for_each_entry(as, &ps->async_pending, asynclist) {
+		if (as->bulk_status == AS_UNLINK) {
+			as->bulk_status = 0;		/* Only once */
+			spin_unlock(&ps->lock);		/* Allow completions */
+			usb_unlink_urb(as->urb);
+			spin_lock(&ps->lock);
+			goto rescan;
+		}
+	}
+}
+
 static void async_completed(struct urb *urb)
 {
 	struct async *as = urb->context;
@@ -371,6 +411,9 @@ static void async_completed(struct urb *urb)
 	snoop(&urb->dev->dev, "urb complete\n");
 	snoop_urb(urb->dev, as->userurb, urb->pipe, urb->actual_length,
 			as->status, COMPLETE);
+	if (as->status < 0 && as->bulk_addr && as->status != -ECONNRESET &&
+			as->status != -ENOENT)
+		cancel_bulk_urbs(ps, as->bulk_addr);
 	spin_unlock(&ps->lock);
 
 	if (signr)
@@ -993,6 +1036,7 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 
 	if (uurb->flags & ~(USBDEVFS_URB_ISO_ASAP |
 				USBDEVFS_URB_SHORT_NOT_OK |
+				USBDEVFS_URB_BULK_CONTINUATION |
 				USBDEVFS_URB_NO_FSBR |
 				USBDEVFS_URB_ZERO_PACKET |
 				USBDEVFS_URB_NO_INTERRUPT))
@@ -1194,7 +1238,39 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 	snoop_urb(ps->dev, as->userurb, as->urb->pipe,
 			as->urb->transfer_buffer_length, 0, SUBMIT);
 	async_newpending(as);
-	if ((ret = usb_submit_urb(as->urb, GFP_KERNEL))) {
+
+	if (usb_endpoint_xfer_bulk(&ep->desc)) {
+		spin_lock_irq(&ps->lock);
+
+		/* Not exactly the endpoint address; the direction bit is
+		 * shifted to the 0x10 position so that the value will be
+		 * between 0 and 31.
+		 */
+		as->bulk_addr = usb_endpoint_num(&ep->desc) |
+			((ep->desc.bEndpointAddress & USB_ENDPOINT_DIR_MASK)
+				>> 3);
+
+		/* If this bulk URB is the start of a new transfer, re-enable
+		 * the endpoint.  Otherwise mark it as a continuation URB.
+		 */
+		if (uurb->flags & USBDEVFS_URB_BULK_CONTINUATION)
+			as->bulk_status = AS_CONTINUATION;
+		else
+			ps->disabled_bulk_eps &= ~(1 << as->bulk_addr);
+
+		/* Don't accept continuation URBs if the endpoint is
+		 * disabled because of an earlier error.
+		 */
+		if (ps->disabled_bulk_eps & (1 << as->bulk_addr))
+			ret = -EREMOTEIO;
+		else
+			ret = usb_submit_urb(as->urb, GFP_ATOMIC);
+		spin_unlock_irq(&ps->lock);
+	} else {
+		ret = usb_submit_urb(as->urb, GFP_KERNEL);
+	}
+
+	if (ret) {
 		dev_printk(KERN_DEBUG, &ps->dev->dev,
 			   "usbfs: usb_submit_urb returned %d\n", ret);
 		snoop_urb(ps->dev, as->userurb, as->urb->pipe,
diff --git a/include/linux/usbdevice_fs.h b/include/linux/usbdevice_fs.h
index 00ceebeb9e5c..b2a7d8ba6ee3 100644
--- a/include/linux/usbdevice_fs.h
+++ b/include/linux/usbdevice_fs.h
@@ -77,6 +77,7 @@ struct usbdevfs_connectinfo {
 
 #define USBDEVFS_URB_SHORT_NOT_OK	0x01
 #define USBDEVFS_URB_ISO_ASAP		0x02
+#define USBDEVFS_URB_BULK_CONTINUATION	0x04
 #define USBDEVFS_URB_NO_FSBR		0x20
 #define USBDEVFS_URB_ZERO_PACKET	0x40
 #define USBDEVFS_URB_NO_INTERRUPT	0x80
-- 
cgit v1.2.3


From fa081b00a80ef3f4575c99af6e97d29e1628cf51 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 22 Sep 2009 16:43:27 -0700
Subject: include/linux/kmemcheck.h: fix a trillion warnings

of the form

include/net/inet_sock.h:208: warning: ISO C90 forbids mixed declarations and code

Cc: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmemcheck.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index c8006607f947..136cdcdf92ef 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -145,10 +145,12 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
 
 #define kmemcheck_annotate_bitfield(ptr, name)				\
 	do {								\
+		int _n;							\
+									\
 		if (!ptr)						\
 			break;						\
 									\
-		int _n = (long) &((ptr)->name##_end)			\
+		_n = (long) &((ptr)->name##_end)			\
 			- (long) &((ptr)->name##_begin);		\
 		BUILD_BUG_ON(_n < 0);					\
 									\
-- 
cgit v1.2.3


From af91322ef3f29ae4114e736e2a72e28b4d619cf9 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Tue, 22 Sep 2009 16:43:33 -0700
Subject: printk: add printk_delay to make messages readable for some scenarios

When syslog is not possible, at the same time there's no serial/net
console available, it will be hard to read the printk messages.  For
example oops/panic/warning messages in shutdown phase.

Add a printk delay feature, we can make each printk message delay some
milliseconds.

Setting the delay by proc/sysctl interface: /proc/sys/kernel/printk_delay

The value range from 0 - 10000, default value is 0

[akpm@linux-foundation.org: fix a few things]
Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/kernel.txt |  8 ++++++++
 include/linux/kernel.h          |  2 ++
 kernel/printk.c                 | 15 +++++++++++++++
 kernel/sysctl.c                 | 14 ++++++++++++++
 4 files changed, 39 insertions(+)

(limited to 'include')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 3e5b63ebb821..b3d8b4922740 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -313,6 +313,14 @@ send before ratelimiting kicks in.
 
 ==============================================================
 
+printk_delay:
+
+Delay each printk message in printk_delay milliseconds
+
+Value from 0 - 10000 is allowed.
+
+==============================================================
+
 randomize-va-space:
 
 This option can be used to select the type of process address
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b5b1e0899a8..0a2a19087863 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -246,6 +246,8 @@ extern int printk_ratelimit(void);
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				   unsigned int interval_msec);
 
+extern int printk_delay_msec;
+
 /*
  * Print a one-time message (analogous to WARN_ONCE() et al):
  */
diff --git a/kernel/printk.c b/kernel/printk.c
index 932ea21feb18..f38b07f78a4e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -653,6 +653,20 @@ static int recursion_bug;
 static int new_text_line = 1;
 static char printk_buf[1024];
 
+int printk_delay_msec __read_mostly;
+
+static inline void printk_delay(void)
+{
+	if (unlikely(printk_delay_msec)) {
+		int m = printk_delay_msec;
+
+		while (m--) {
+			mdelay(1);
+			touch_nmi_watchdog();
+		}
+	}
+}
+
 asmlinkage int vprintk(const char *fmt, va_list args)
 {
 	int printed_len = 0;
@@ -662,6 +676,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 	char *p;
 
 	boot_delay_msec();
+	printk_delay();
 
 	preempt_disable();
 	/* This stops the holder of console_sem just where we want him */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6ba49c7cb128..0dfaa47d7cb6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -106,6 +106,9 @@ static int __maybe_unused one = 1;
 static int __maybe_unused two = 2;
 static unsigned long one_ul = 1;
 static int one_hundred = 100;
+#ifdef CONFIG_PRINTK
+static int ten_thousand = 10000;
+#endif
 
 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
@@ -722,6 +725,17 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "printk_delay",
+		.data		= &printk_delay_msec,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &ten_thousand,
+	},
 #endif
 	{
 		.ctl_name	= KERN_NGROUPS_MAX,
-- 
cgit v1.2.3


From 1fd7317d02ec03c6fdf072317841287933d06d24 Mon Sep 17 00:00:00 2001
From: Nick Black <dank@qemfd.net>
Date: Tue, 22 Sep 2009 16:43:33 -0700
Subject: Move magic numbers into magic.h

Move various magic-number definitions into magic.h.

Signed-off-by: Nick Black <dank@qemfd.net>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/devpts/inode.c          | 3 +--
 fs/hugetlbfs/inode.c       | 4 +---
 include/linux/magic.h      | 5 +++++
 net/socket.c               | 3 +--
 security/smack/smack_lsm.c | 8 +-------
 5 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 75efb028974b..d5f8c96964be 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -18,14 +18,13 @@
 #include <linux/mount.h>
 #include <linux/tty.h>
 #include <linux/mutex.h>
+#include <linux/magic.h>
 #include <linux/idr.h>
 #include <linux/devpts_fs.h>
 #include <linux/parser.h>
 #include <linux/fsnotify.h>
 #include <linux/seq_file.h>
 
-#define DEVPTS_SUPER_MAGIC 0x1cd1
-
 #define DEVPTS_DEFAULT_MODE 0600
 /*
  * ptmx is a new node in /dev/pts and will be unused in legacy (single-
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 06b7c2623f99..eba6d552d9c9 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -31,12 +31,10 @@
 #include <linux/statfs.h>
 #include <linux/security.h>
 #include <linux/ima.h>
+#include <linux/magic.h>
 
 #include <asm/uaccess.h>
 
-/* some random number */
-#define HUGETLBFS_MAGIC	0x958458f6
-
 static const struct super_operations hugetlbfs_ops;
 static const struct address_space_operations hugetlbfs_aops;
 const struct file_operations hugetlbfs_file_operations;
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 1923327b9869..bce37786a0a5 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -13,6 +13,7 @@
 #define SECURITYFS_MAGIC	0x73636673
 #define SELINUX_MAGIC		0xf97cff8c
 #define TMPFS_MAGIC		0x01021994
+#define HUGETLBFS_MAGIC 	0x958458f6	/* some random number */
 #define SQUASHFS_MAGIC		0x73717368
 #define EFS_SUPER_MAGIC		0x414A53
 #define EXT2_SUPER_MAGIC	0xEF53
@@ -53,4 +54,8 @@
 #define INOTIFYFS_SUPER_MAGIC	0x2BAD1DEA
 
 #define STACK_END_MAGIC		0x57AC6E9D
+
+#define DEVPTS_SUPER_MAGIC	0x1cd1
+#define SOCKFS_MAGIC		0x534F434B
+
 #endif /* __LINUX_MAGIC_H__ */
diff --git a/net/socket.c b/net/socket.c
index 0ad02ae61a91..49917a1cac7d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -86,6 +86,7 @@
 #include <linux/audit.h>
 #include <linux/wireless.h>
 #include <linux/nsproxy.h>
+#include <linux/magic.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -235,8 +236,6 @@ int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
 	return __put_user(klen, ulen);
 }
 
-#define SOCKFS_MAGIC 0x534F434B
-
 static struct kmem_cache *sock_inode_cachep __read_mostly;
 
 static struct inode *sock_alloc_inode(struct super_block *sb)
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index acae7ef4092d..c33b6bb9b6dd 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -30,17 +30,11 @@
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
 #include <linux/audit.h>
+#include <linux/magic.h>
 #include "smack.h"
 
 #define task_security(task)	(task_cred_xxx((task), security))
 
-/*
- * I hope these are the hokeyist lines of code in the module. Casey.
- */
-#define DEVPTS_SUPER_MAGIC	0x1cd1
-#define SOCKFS_MAGIC		0x534F434B
-#define TMPFS_MAGIC		0x01021994
-
 /**
  * smk_fetch - Fetch the smack label from a file.
  * @ip: a pointer to the inode
-- 
cgit v1.2.3


From 54fdade1c3332391948ec43530c02c4794a38172 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Tue, 22 Sep 2009 16:43:39 -0700
Subject: generic-ipi: make struct call_function_data lockless

This patch can remove spinlock from struct call_function_data, the
reasons are below:

1: add a new interface for cpumask named cpumask_test_and_clear_cpu(),
   it can atomically test and clear specific cpu, we can use it instead
   of cpumask_test_cpu() and cpumask_clear_cpu() and no need data->lock
   to protect those in generic_smp_call_function_interrupt().

2: in smp_call_function_many(), after csd_lock() return, the current's
   cfd_data is deleted from call_function list, so it not have race
   between other cpus, then cfs_data is only used in
   smp_call_function_many() that must disable preemption and not from
   a hardware interrupthandler or from a bottom half handler to call,
   only the correspond cpu can use it, so it not have race in current
   cpu, no need cfs_data->lock to protect it.

3: after 1 and 2, cfs_data->lock is only use to protect cfs_data->refs in
   generic_smp_call_function_interrupt(), so we can define cfs_data->refs
   to atomic_t, and no need cfs_data->lock any more.

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
[akpm@linux-foundation.org: use atomic_dec_return()]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpumask.h | 12 ++++++++++++
 kernel/smp.c            | 29 ++++++++---------------------
 2 files changed, 20 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 796df12091b7..9b1d458aac6e 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -714,6 +714,18 @@ static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
 	return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask));
 }
 
+/**
+ * cpumask_test_and_clear_cpu - atomically test and clear a cpu in a cpumask
+ * @cpu: cpu number (< nr_cpu_ids)
+ * @cpumask: the cpumask pointer
+ *
+ * test_and_clear_bit wrapper for cpumasks.
+ */
+static inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
+{
+	return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask));
+}
+
 /**
  * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
  * @dstp: the cpumask pointer
diff --git a/kernel/smp.c b/kernel/smp.c
index 8e218500ab14..fd47a256a24e 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -29,8 +29,7 @@ enum {
 
 struct call_function_data {
 	struct call_single_data	csd;
-	spinlock_t		lock;
-	unsigned int		refs;
+	atomic_t		refs;
 	cpumask_var_t		cpumask;
 };
 
@@ -39,9 +38,7 @@ struct call_single_queue {
 	spinlock_t		lock;
 };
 
-static DEFINE_PER_CPU(struct call_function_data, cfd_data) = {
-	.lock			= __SPIN_LOCK_UNLOCKED(cfd_data.lock),
-};
+static DEFINE_PER_CPU(struct call_function_data, cfd_data);
 
 static int
 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
@@ -196,25 +193,18 @@ void generic_smp_call_function_interrupt(void)
 	list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
 		int refs;
 
-		spin_lock(&data->lock);
-		if (!cpumask_test_cpu(cpu, data->cpumask)) {
-			spin_unlock(&data->lock);
+		if (!cpumask_test_and_clear_cpu(cpu, data->cpumask))
 			continue;
-		}
-		cpumask_clear_cpu(cpu, data->cpumask);
-		spin_unlock(&data->lock);
 
 		data->csd.func(data->csd.info);
 
-		spin_lock(&data->lock);
-		WARN_ON(data->refs == 0);
-		refs = --data->refs;
+		refs = atomic_dec_return(&data->refs);
+		WARN_ON(refs < 0);
 		if (!refs) {
 			spin_lock(&call_function.lock);
 			list_del_rcu(&data->csd.list);
 			spin_unlock(&call_function.lock);
 		}
-		spin_unlock(&data->lock);
 
 		if (refs)
 			continue;
@@ -419,23 +409,20 @@ void smp_call_function_many(const struct cpumask *mask,
 	data = &__get_cpu_var(cfd_data);
 	csd_lock(&data->csd);
 
-	spin_lock_irqsave(&data->lock, flags);
 	data->csd.func = func;
 	data->csd.info = info;
 	cpumask_and(data->cpumask, mask, cpu_online_mask);
 	cpumask_clear_cpu(this_cpu, data->cpumask);
-	data->refs = cpumask_weight(data->cpumask);
+	atomic_set(&data->refs, cpumask_weight(data->cpumask));
 
-	spin_lock(&call_function.lock);
+	spin_lock_irqsave(&call_function.lock, flags);
 	/*
 	 * Place entry at the _HEAD_ of the list, so that any cpu still
 	 * observing the entry in generic_smp_call_function_interrupt()
 	 * will not miss any other list entries:
 	 */
 	list_add_rcu(&data->csd.list, &call_function.queue);
-	spin_unlock(&call_function.lock);
-
-	spin_unlock_irqrestore(&data->lock, flags);
+	spin_unlock_irqrestore(&call_function.lock, flags);
 
 	/*
 	 * Make the list addition visible before sending the ipi.
-- 
cgit v1.2.3


From 88e9d34c727883d7d6f02cf1475b3ec98b8480c7 Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Tue, 22 Sep 2009 16:43:43 -0700
Subject: seq_file: constify seq_operations

Make all seq_operations structs const, to help mitigate against
revectoring user-triggerable function pointers.

This is derived from the grsecurity patch, although generated from scratch
because it's simpler than extracting the changes from there.

Signed-off-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mn10300/kernel/setup.c                  |  2 +-
 arch/powerpc/kernel/setup-common.c           |  2 +-
 arch/powerpc/platforms/pseries/hvCall_inst.c |  2 +-
 drivers/block/cciss.c                        |  2 +-
 drivers/char/misc.c                          |  2 +-
 drivers/char/tpm/tpm_bios.c                  |  4 ++--
 drivers/isdn/capi/kcapi_proc.c               | 10 +++++-----
 drivers/scsi/sg.c                            |  6 +++---
 fs/afs/proc.c                                |  8 ++++----
 fs/dlm/debug_fs.c                            | 12 ++++++------
 fs/jbd2/journal.c                            |  4 ++--
 fs/nfs/client.c                              |  4 ++--
 fs/nfsd/export.c                             |  2 +-
 fs/ocfs2/cluster/netdebug.c                  |  4 ++--
 fs/ocfs2/dlm/dlmdebug.c                      |  2 +-
 fs/proc/nommu.c                              |  2 +-
 include/linux/nfsd/nfsd.h                    |  2 +-
 ipc/util.c                                   |  2 +-
 kernel/cgroup.c                              |  2 +-
 kernel/kprobes.c                             |  2 +-
 kernel/lockdep_proc.c                        |  2 +-
 kernel/trace/ftrace.c                        |  4 ++--
 kernel/trace/trace.c                         |  4 ++--
 net/ipv6/ip6mr.c                             |  2 +-
 security/integrity/ima/ima_fs.c              |  4 ++--
 security/smack/smackfs.c                     |  6 +++---
 26 files changed, 49 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/arch/mn10300/kernel/setup.c b/arch/mn10300/kernel/setup.c
index 79890edfd67a..3f24c298a3af 100644
--- a/arch/mn10300/kernel/setup.c
+++ b/arch/mn10300/kernel/setup.c
@@ -285,7 +285,7 @@ static void c_stop(struct seq_file *m, void *v)
 {
 }
 
-struct seq_operations cpuinfo_op = {
+const struct seq_operations cpuinfo_op = {
 	.start	= c_start,
 	.next	= c_next,
 	.stop	= c_stop,
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 02fed27af7f6..1d5570a1e456 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -328,7 +328,7 @@ static void c_stop(struct seq_file *m, void *v)
 {
 }
 
-struct seq_operations cpuinfo_op = {
+const struct seq_operations cpuinfo_op = {
 	.start =c_start,
 	.next =	c_next,
 	.stop =	c_stop,
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index eae51ef9af24..3631a4f277eb 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -71,7 +71,7 @@ static int hc_show(struct seq_file *m, void *p)
 	return 0;
 }
 
-static struct seq_operations hcall_inst_seq_ops = {
+static const struct seq_operations hcall_inst_seq_ops = {
         .start = hc_start,
         .next  = hc_next,
         .stop  = hc_stop,
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 4f19105f755c..24c3e21ab263 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -363,7 +363,7 @@ static void cciss_seq_stop(struct seq_file *seq, void *v)
 	h->busy_configuring = 0;
 }
 
-static struct seq_operations cciss_seq_ops = {
+static const struct seq_operations cciss_seq_ops = {
 	.start = cciss_seq_start,
 	.show  = cciss_seq_show,
 	.next  = cciss_seq_next,
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 1ee27cc23426..07fa612a58d5 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -91,7 +91,7 @@ static int misc_seq_show(struct seq_file *seq, void *v)
 }
 
 
-static struct seq_operations misc_seq_ops = {
+static const struct seq_operations misc_seq_ops = {
 	.start = misc_seq_start,
 	.next  = misc_seq_next,
 	.stop  = misc_seq_stop,
diff --git a/drivers/char/tpm/tpm_bios.c b/drivers/char/tpm/tpm_bios.c
index 0c2f55a38b95..bf2170fb1cdd 100644
--- a/drivers/char/tpm/tpm_bios.c
+++ b/drivers/char/tpm/tpm_bios.c
@@ -343,14 +343,14 @@ static int tpm_ascii_bios_measurements_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations tpm_ascii_b_measurments_seqops = {
+static const struct seq_operations tpm_ascii_b_measurments_seqops = {
 	.start = tpm_bios_measurements_start,
 	.next = tpm_bios_measurements_next,
 	.stop = tpm_bios_measurements_stop,
 	.show = tpm_ascii_bios_measurements_show,
 };
 
-static struct seq_operations tpm_binary_b_measurments_seqops = {
+static const struct seq_operations tpm_binary_b_measurments_seqops = {
 	.start = tpm_bios_measurements_start,
 	.next = tpm_bios_measurements_next,
 	.stop = tpm_bios_measurements_stop,
diff --git a/drivers/isdn/capi/kcapi_proc.c b/drivers/isdn/capi/kcapi_proc.c
index 50ed778f63fc..09d4db764d22 100644
--- a/drivers/isdn/capi/kcapi_proc.c
+++ b/drivers/isdn/capi/kcapi_proc.c
@@ -89,14 +89,14 @@ static int contrstats_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations seq_controller_ops = {
+static const struct seq_operations seq_controller_ops = {
 	.start	= controller_start,
 	.next	= controller_next,
 	.stop	= controller_stop,
 	.show	= controller_show,
 };
 
-static struct seq_operations seq_contrstats_ops = {
+static const struct seq_operations seq_contrstats_ops = {
 	.start	= controller_start,
 	.next	= controller_next,
 	.stop	= controller_stop,
@@ -194,14 +194,14 @@ applstats_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations seq_applications_ops = {
+static const struct seq_operations seq_applications_ops = {
 	.start	= applications_start,
 	.next	= applications_next,
 	.stop	= applications_stop,
 	.show	= applications_show,
 };
 
-static struct seq_operations seq_applstats_ops = {
+static const struct seq_operations seq_applstats_ops = {
 	.start	= applications_start,
 	.next	= applications_next,
 	.stop	= applications_stop,
@@ -264,7 +264,7 @@ static int capi_driver_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations seq_capi_driver_ops = {
+static const struct seq_operations seq_capi_driver_ops = {
 	.start	= capi_driver_start,
 	.next	= capi_driver_next,
 	.stop	= capi_driver_stop,
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 4968c4ced385..848b59466850 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -2233,7 +2233,7 @@ static struct file_operations dev_fops = {
 	.open = sg_proc_open_dev,
 	.release = seq_release,
 };
-static struct seq_operations dev_seq_ops = {
+static const struct seq_operations dev_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
 	.stop  = dev_seq_stop,
@@ -2246,7 +2246,7 @@ static struct file_operations devstrs_fops = {
 	.open = sg_proc_open_devstrs,
 	.release = seq_release,
 };
-static struct seq_operations devstrs_seq_ops = {
+static const struct seq_operations devstrs_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
 	.stop  = dev_seq_stop,
@@ -2259,7 +2259,7 @@ static struct file_operations debug_fops = {
 	.open = sg_proc_open_debug,
 	.release = seq_release,
 };
-static struct seq_operations debug_seq_ops = {
+static const struct seq_operations debug_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
 	.stop  = dev_seq_stop,
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 8630615e57fe..852739d262a9 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -28,7 +28,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v);
 static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
 				    size_t size, loff_t *_pos);
 
-static struct seq_operations afs_proc_cells_ops = {
+static const struct seq_operations afs_proc_cells_ops = {
 	.start	= afs_proc_cells_start,
 	.next	= afs_proc_cells_next,
 	.stop	= afs_proc_cells_stop,
@@ -70,7 +70,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
 static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v);
 static int afs_proc_cell_volumes_show(struct seq_file *m, void *v);
 
-static struct seq_operations afs_proc_cell_volumes_ops = {
+static const struct seq_operations afs_proc_cell_volumes_ops = {
 	.start	= afs_proc_cell_volumes_start,
 	.next	= afs_proc_cell_volumes_next,
 	.stop	= afs_proc_cell_volumes_stop,
@@ -95,7 +95,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
 static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v);
 static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v);
 
-static struct seq_operations afs_proc_cell_vlservers_ops = {
+static const struct seq_operations afs_proc_cell_vlservers_ops = {
 	.start	= afs_proc_cell_vlservers_start,
 	.next	= afs_proc_cell_vlservers_next,
 	.stop	= afs_proc_cell_vlservers_stop,
@@ -119,7 +119,7 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
 static void afs_proc_cell_servers_stop(struct seq_file *p, void *v);
 static int afs_proc_cell_servers_show(struct seq_file *m, void *v);
 
-static struct seq_operations afs_proc_cell_servers_ops = {
+static const struct seq_operations afs_proc_cell_servers_ops = {
 	.start	= afs_proc_cell_servers_start,
 	.next	= afs_proc_cell_servers_next,
 	.stop	= afs_proc_cell_servers_stop,
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 1d1d27442235..1c8bb8c3a82e 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -386,9 +386,9 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr)
 	return rv;
 }
 
-static struct seq_operations format1_seq_ops;
-static struct seq_operations format2_seq_ops;
-static struct seq_operations format3_seq_ops;
+static const struct seq_operations format1_seq_ops;
+static const struct seq_operations format2_seq_ops;
+static const struct seq_operations format3_seq_ops;
 
 static void *table_seq_start(struct seq_file *seq, loff_t *pos)
 {
@@ -534,21 +534,21 @@ static void table_seq_stop(struct seq_file *seq, void *iter_ptr)
 	}
 }
 
-static struct seq_operations format1_seq_ops = {
+static const struct seq_operations format1_seq_ops = {
 	.start = table_seq_start,
 	.next  = table_seq_next,
 	.stop  = table_seq_stop,
 	.show  = table_seq_show,
 };
 
-static struct seq_operations format2_seq_ops = {
+static const struct seq_operations format2_seq_ops = {
 	.start = table_seq_start,
 	.next  = table_seq_next,
 	.stop  = table_seq_stop,
 	.show  = table_seq_show,
 };
 
-static struct seq_operations format3_seq_ops = {
+static const struct seq_operations format3_seq_ops = {
 	.start = table_seq_start,
 	.next  = table_seq_next,
 	.stop  = table_seq_stop,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a8a358bc0f21..53b86e16e5fe 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -768,7 +768,7 @@ static void jbd2_seq_history_stop(struct seq_file *seq, void *v)
 {
 }
 
-static struct seq_operations jbd2_seq_history_ops = {
+static const struct seq_operations jbd2_seq_history_ops = {
 	.start  = jbd2_seq_history_start,
 	.next   = jbd2_seq_history_next,
 	.stop   = jbd2_seq_history_stop,
@@ -872,7 +872,7 @@ static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
 {
 }
 
-static struct seq_operations jbd2_seq_info_ops = {
+static const struct seq_operations jbd2_seq_info_ops = {
 	.start  = jbd2_seq_info_start,
 	.next   = jbd2_seq_info_next,
 	.stop   = jbd2_seq_info_stop,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a7ce15d3c248..152025358dad 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1531,7 +1531,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
 static void nfs_server_list_stop(struct seq_file *p, void *v);
 static int nfs_server_list_show(struct seq_file *m, void *v);
 
-static struct seq_operations nfs_server_list_ops = {
+static const struct seq_operations nfs_server_list_ops = {
 	.start	= nfs_server_list_start,
 	.next	= nfs_server_list_next,
 	.stop	= nfs_server_list_stop,
@@ -1552,7 +1552,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
 static void nfs_volume_list_stop(struct seq_file *p, void *v);
 static int nfs_volume_list_show(struct seq_file *m, void *v);
 
-static struct seq_operations nfs_volume_list_ops = {
+static const struct seq_operations nfs_volume_list_ops = {
 	.start	= nfs_volume_list_start,
 	.next	= nfs_volume_list_next,
 	.stop	= nfs_volume_list_stop,
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 984a5ebcc1d6..c1c9e035d4a4 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1517,7 +1517,7 @@ static int e_show(struct seq_file *m, void *p)
 	return svc_export_show(m, &svc_export_cache, cp);
 }
 
-struct seq_operations nfs_exports_op = {
+const struct seq_operations nfs_exports_op = {
 	.start	= e_start,
 	.next	= e_next,
 	.stop	= e_stop,
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index f8424874fa07..cfb2be708abe 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -163,7 +163,7 @@ static void nst_seq_stop(struct seq_file *seq, void *v)
 {
 }
 
-static struct seq_operations nst_seq_ops = {
+static const struct seq_operations nst_seq_ops = {
 	.start = nst_seq_start,
 	.next = nst_seq_next,
 	.stop = nst_seq_stop,
@@ -344,7 +344,7 @@ static void sc_seq_stop(struct seq_file *seq, void *v)
 {
 }
 
-static struct seq_operations sc_seq_ops = {
+static const struct seq_operations sc_seq_ops = {
 	.start = sc_seq_start,
 	.next = sc_seq_next,
 	.stop = sc_seq_stop,
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index df52f706f669..c5c88124096d 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -683,7 +683,7 @@ static int lockres_seq_show(struct seq_file *s, void *v)
 	return 0;
 }
 
-static struct seq_operations debug_lockres_ops = {
+static const struct seq_operations debug_lockres_ops = {
 	.start =	lockres_seq_start,
 	.stop =		lockres_seq_stop,
 	.next =		lockres_seq_next,
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 7e14d1a04001..9fe7d7ebe115 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -109,7 +109,7 @@ static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos)
 	return rb_next((struct rb_node *) v);
 }
 
-static struct seq_operations proc_nommu_region_list_seqop = {
+static const struct seq_operations proc_nommu_region_list_seqop = {
 	.start	= nommu_region_list_start,
 	.next	= nommu_region_list_next,
 	.stop	= nommu_region_list_stop,
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 03bbe9039104..510ffdd5020e 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -60,7 +60,7 @@ extern spinlock_t		nfsd_drc_lock;
 extern unsigned int		nfsd_drc_max_mem;
 extern unsigned int		nfsd_drc_mem_used;
 
-extern struct seq_operations nfs_exports_op;
+extern const struct seq_operations nfs_exports_op;
 
 /*
  * Function prototypes.
diff --git a/ipc/util.c b/ipc/util.c
index b8e4ba92f6d1..79ce84e890f7 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -942,7 +942,7 @@ static int sysvipc_proc_show(struct seq_file *s, void *it)
 	return iface->show(s, it);
 }
 
-static struct seq_operations sysvipc_proc_seqops = {
+static const struct seq_operations sysvipc_proc_seqops = {
 	.start = sysvipc_proc_start,
 	.stop  = sysvipc_proc_stop,
 	.next  = sysvipc_proc_next,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 213b7f92fcdd..cd83d9933b6b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2314,7 +2314,7 @@ static int cgroup_tasks_show(struct seq_file *s, void *v)
 	return seq_printf(s, "%d\n", *(int *)v);
 }
 
-static struct seq_operations cgroup_tasks_seq_operations = {
+static const struct seq_operations cgroup_tasks_seq_operations = {
 	.start = cgroup_tasks_start,
 	.stop = cgroup_tasks_stop,
 	.next = cgroup_tasks_next,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ef177d653b2c..cfadc1291d0b 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1321,7 +1321,7 @@ static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
 	return 0;
 }
 
-static struct seq_operations kprobes_seq_ops = {
+static const struct seq_operations kprobes_seq_ops = {
 	.start = kprobe_seq_start,
 	.next  = kprobe_seq_next,
 	.stop  = kprobe_seq_stop,
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index d4b3dbc79fdb..d4aba4f3584c 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -594,7 +594,7 @@ static int ls_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations lockstat_ops = {
+static const struct seq_operations lockstat_ops = {
 	.start	= ls_start,
 	.next	= ls_next,
 	.stop	= ls_stop,
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c71e91bf7372..23df7771c937 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1520,7 +1520,7 @@ static int t_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations show_ftrace_seq_ops = {
+static const struct seq_operations show_ftrace_seq_ops = {
 	.start = t_start,
 	.next = t_next,
 	.stop = t_stop,
@@ -2459,7 +2459,7 @@ static int g_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations ftrace_graph_seq_ops = {
+static const struct seq_operations ftrace_graph_seq_ops = {
 	.start = g_start,
 	.next = g_next,
 	.stop = g_stop,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a35925d222ba..6c0f6a8a22eb 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1949,7 +1949,7 @@ static int s_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations tracer_seq_ops = {
+static const struct seq_operations tracer_seq_ops = {
 	.start		= s_start,
 	.next		= s_next,
 	.stop		= s_stop,
@@ -2163,7 +2163,7 @@ static int t_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations show_traces_seq_ops = {
+static const struct seq_operations show_traces_seq_ops = {
 	.start		= t_start,
 	.next		= t_next,
 	.stop		= t_stop,
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 3907510c2ce3..090675e269ee 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -324,7 +324,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ipmr_mfc_seq_ops = {
+static const struct seq_operations ipmr_mfc_seq_ops = {
 	.start = ipmr_mfc_seq_start,
 	.next  = ipmr_mfc_seq_next,
 	.stop  = ipmr_mfc_seq_stop,
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 6bfc7eaebfda..8e9777b76405 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -146,7 +146,7 @@ static int ima_measurements_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations ima_measurments_seqops = {
+static const struct seq_operations ima_measurments_seqops = {
 	.start = ima_measurements_start,
 	.next = ima_measurements_next,
 	.stop = ima_measurements_stop,
@@ -221,7 +221,7 @@ static int ima_ascii_measurements_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations ima_ascii_measurements_seqops = {
+static const struct seq_operations ima_ascii_measurements_seqops = {
 	.start = ima_measurements_start,
 	.next = ima_measurements_next,
 	.stop = ima_measurements_stop,
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index f83a80980726..aeead7585093 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -187,7 +187,7 @@ static void load_seq_stop(struct seq_file *s, void *v)
 	/* No-op */
 }
 
-static struct seq_operations load_seq_ops = {
+static const struct seq_operations load_seq_ops = {
 	.start = load_seq_start,
 	.next  = load_seq_next,
 	.show  = load_seq_show,
@@ -503,7 +503,7 @@ static void cipso_seq_stop(struct seq_file *s, void *v)
 	/* No-op */
 }
 
-static struct seq_operations cipso_seq_ops = {
+static const struct seq_operations cipso_seq_ops = {
 	.start = cipso_seq_start,
 	.stop  = cipso_seq_stop,
 	.next  = cipso_seq_next,
@@ -697,7 +697,7 @@ static void netlbladdr_seq_stop(struct seq_file *s, void *v)
 	/* No-op */
 }
 
-static struct seq_operations netlbladdr_seq_ops = {
+static const struct seq_operations netlbladdr_seq_ops = {
 	.start = netlbladdr_seq_start,
 	.stop  = netlbladdr_seq_stop,
 	.next  = netlbladdr_seq_next,
-- 
cgit v1.2.3


From 02b51df1b07b4e9ca823c89284e704cadb323cd1 Mon Sep 17 00:00:00 2001
From: Scott James Remnant <scott@ubuntu.com>
Date: Tue, 22 Sep 2009 16:43:44 -0700
Subject: proc connector: add event for process becoming session leader

The act of a process becoming a session leader is a useful signal to a
supervising init daemon such as Upstart.

While a daemon will normally do this as part of the process of becoming a
daemon, it is rare for its children to do so.  When the children do, it is
nearly always a sign that the child should be considered detached from the
parent and not supervised along with it.

The poster-child example is OpenSSH; the per-login children call setsid()
so that they may control the pty connected to them.  If the primary daemon
dies or is restarted, we do not want to consider the per-login children
and want to respawn the primary daemon without killing the children.

This patch adds a new PROC_SID_EVENT and associated structure to the
proc_event event_data union, it arranges for this to be emitted when the
special PIDTYPE_SID pid is set.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Scott James Remnant <scott@ubuntu.com>
Acked-by: Matt Helsley <matthltc@us.ibm.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Acked-by: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/connector/cn_proc.c | 25 +++++++++++++++++++++++++
 include/linux/cn_proc.h     | 10 ++++++++++
 kernel/exit.c               |  4 +++-
 3 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 85e5dc0431fe..abf4a2529f80 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -139,6 +139,31 @@ void proc_id_connector(struct task_struct *task, int which_id)
 	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
 }
 
+void proc_sid_connector(struct task_struct *task)
+{
+	struct cn_msg *msg;
+	struct proc_event *ev;
+	struct timespec ts;
+	__u8 buffer[CN_PROC_MSG_SIZE];
+
+	if (atomic_read(&proc_event_num_listeners) < 1)
+		return;
+
+	msg = (struct cn_msg *)buffer;
+	ev = (struct proc_event *)msg->data;
+	get_seq(&msg->seq, &ev->cpu);
+	ktime_get_ts(&ts); /* get high res monotonic timestamp */
+	put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
+	ev->what = PROC_EVENT_SID;
+	ev->event_data.sid.process_pid = task->pid;
+	ev->event_data.sid.process_tgid = task->tgid;
+
+	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
+	msg->ack = 0; /* not used */
+	msg->len = sizeof(*ev);
+	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+}
+
 void proc_exit_connector(struct task_struct *task)
 {
 	struct cn_msg *msg;
diff --git a/include/linux/cn_proc.h b/include/linux/cn_proc.h
index b8125b2eb665..47dac5ea8d3a 100644
--- a/include/linux/cn_proc.h
+++ b/include/linux/cn_proc.h
@@ -52,6 +52,7 @@ struct proc_event {
 		PROC_EVENT_EXEC = 0x00000002,
 		PROC_EVENT_UID  = 0x00000004,
 		PROC_EVENT_GID  = 0x00000040,
+		PROC_EVENT_SID  = 0x00000080,
 		/* "next" should be 0x00000400 */
 		/* "last" is the last process event: exit */
 		PROC_EVENT_EXIT = 0x80000000
@@ -89,6 +90,11 @@ struct proc_event {
 			} e;
 		} id;
 
+		struct sid_proc_event {
+			__kernel_pid_t process_pid;
+			__kernel_pid_t process_tgid;
+		} sid;
+
 		struct exit_proc_event {
 			__kernel_pid_t process_pid;
 			__kernel_pid_t process_tgid;
@@ -102,6 +108,7 @@ struct proc_event {
 void proc_fork_connector(struct task_struct *task);
 void proc_exec_connector(struct task_struct *task);
 void proc_id_connector(struct task_struct *task, int which_id);
+void proc_sid_connector(struct task_struct *task);
 void proc_exit_connector(struct task_struct *task);
 #else
 static inline void proc_fork_connector(struct task_struct *task)
@@ -114,6 +121,9 @@ static inline void proc_id_connector(struct task_struct *task,
 				     int which_id)
 {}
 
+static inline void proc_sid_connector(struct task_struct *task)
+{}
+
 static inline void proc_exit_connector(struct task_struct *task)
 {}
 #endif	/* CONFIG_PROC_EVENTS */
diff --git a/kernel/exit.c b/kernel/exit.c
index e47ee8a06135..61bb1761c7b8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -359,8 +359,10 @@ void __set_special_pids(struct pid *pid)
 {
 	struct task_struct *curr = current->group_leader;
 
-	if (task_session(curr) != pid)
+	if (task_session(curr) != pid) {
 		change_pid(curr, PIDTYPE_SID, pid);
+		proc_sid_connector(curr);
+	}
 
 	if (task_pgrp(curr) != pid)
 		change_pid(curr, PIDTYPE_PGID, pid);
-- 
cgit v1.2.3


From 70867453092297be9afb2249e712a1f960ec0a09 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Tue, 22 Sep 2009 16:43:46 -0700
Subject: printk_once(): use bool for boolean flag

Using the type bool (instead of int) for the __print_once flag in the
printk_once() macro matches the intent of the code better, and allows the
compiler to generate smaller code; eg a typical callsite with gcc 4.3.3 on
i386:

add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-6 (-6)
function                                     old     new   delta
static.__print_once                            4       1      -3
get_cpu_vendor                               146     143      -3

Saving 6 bytes of object size per callsite by slightly improving the
readability of the source seems like a win to me.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 0a2a19087863..55723afa097b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -252,10 +252,10 @@ extern int printk_delay_msec;
  * Print a one-time message (analogous to WARN_ONCE() et al):
  */
 #define printk_once(x...) ({			\
-	static int __print_once = 1;		\
+	static bool __print_once = true;	\
 						\
 	if (__print_once) {			\
-		__print_once = 0;		\
+		__print_once = false;		\
 		printk(x);			\
 	}					\
 })
-- 
cgit v1.2.3


From 8c87df457cb58fe75b9b893007917cf8095660a0 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Tue, 22 Sep 2009 16:43:52 -0700
Subject: BUILD_BUG_ON(): fix it and a couple of bogus uses of it

gcc permitting variable length arrays makes the current construct used for
BUILD_BUG_ON() useless, as that doesn't produce any diagnostic if the
controlling expression isn't really constant.  Instead, this patch makes
it so that a bit field gets used here.  Consequently, those uses where the
condition isn't really constant now also need fixing.

Note that in the gfp.h, kmemcheck.h, and virtio_config.h cases
MAYBE_BUILD_BUG_ON() really just serves documentation purposes - even if
the expression is compile time constant (__builtin_constant_p() yields
true), the array is still deemed of variable length by gcc, and hence the
whole expression doesn't have the intended effect.

[akpm@linux-foundation.org: make arch/sparc/include/asm/vio.h compile]
[akpm@linux-foundation.org: more nonsensical assertions in tpm.c..]
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Rajiv Andrade <srajiv@linux.vnet.ibm.com>
Cc: Mimi Zohar <zohar@us.ibm.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/include/asm/vio.h  | 2 +-
 drivers/char/tpm/tpm.c        | 4 ++--
 drivers/net/niu.c             | 2 +-
 include/linux/gfp.h           | 2 +-
 include/linux/kernel.h        | 8 ++++++--
 include/linux/kmemcheck.h     | 2 +-
 include/linux/virtio_config.h | 3 +--
 7 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
index d4de32f0f8af..6cdbf7e7351d 100644
--- a/arch/sparc/include/asm/vio.h
+++ b/arch/sparc/include/asm/vio.h
@@ -258,7 +258,7 @@ static inline void *vio_dring_entry(struct vio_dring_state *dr,
 static inline u32 vio_dring_avail(struct vio_dring_state *dr,
 				  unsigned int ring_size)
 {
-	BUILD_BUG_ON(!is_power_of_2(ring_size));
+	MAYBE_BUILD_BUG_ON(!is_power_of_2(ring_size));
 
 	return (dr->pending -
 		((dr->prod - dr->cons) & (ring_size - 1)));
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index b0603b2e5684..32b957efa420 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -696,7 +696,7 @@ int __tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
 
 	cmd.header.in = pcrread_header;
 	cmd.params.pcrread_in.pcr_idx = cpu_to_be32(pcr_idx);
-	BUILD_BUG_ON(cmd.header.in.length > READ_PCR_RESULT_SIZE);
+	BUG_ON(cmd.header.in.length > READ_PCR_RESULT_SIZE);
 	rc = transmit_cmd(chip, &cmd, cmd.header.in.length,
 			  "attempting to read a pcr value");
 
@@ -760,7 +760,7 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash)
 		return -ENODEV;
 
 	cmd.header.in = pcrextend_header;
-	BUILD_BUG_ON(be32_to_cpu(cmd.header.in.length) > EXTEND_PCR_SIZE);
+	BUG_ON(be32_to_cpu(cmd.header.in.length) > EXTEND_PCR_SIZE);
 	cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
 	memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
 	rc = transmit_cmd(chip, &cmd, cmd.header.in.length,
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 76cc2614f480..f9364d0678f2 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -5615,7 +5615,7 @@ static void niu_init_tx_mac(struct niu *np)
 	/* The XMAC_MIN register only accepts values for TX min which
 	 * have the low 3 bits cleared.
 	 */
-	BUILD_BUG_ON(min & 0x7);
+	BUG_ON(min & 0x7);
 
 	if (np->flags & NIU_FLAGS_XMAC)
 		niu_init_tx_xmac(np, min, max);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f53e9b868c26..557bdad320b6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -220,7 +220,7 @@ static inline enum zone_type gfp_zone(gfp_t flags)
 					 ((1 << ZONES_SHIFT) - 1);
 
 	if (__builtin_constant_p(bit))
-		BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
+		MAYBE_BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
 	else {
 #ifdef CONFIG_DEBUG_VM
 		BUG_ON((GFP_ZONE_BAD >> bit) & 1);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 55723afa097b..63dcaece1ac5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -678,13 +678,17 @@ struct sysinfo {
 };
 
 /* Force a compilation error if condition is true */
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
+
+/* Force a compilation error if condition is constant and true */
+#define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)]))
 
 /* Force a compilation error if condition is true, but also produce a
    result (of value 0 and type size_t), so the expression can be used
    e.g. in a structure initializer (or where-ever else comma expressions
    aren't permitted). */
-#define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1)
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
 
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 136cdcdf92ef..e880d4cf9e22 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -152,7 +152,7 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
 									\
 		_n = (long) &((ptr)->name##_end)			\
 			- (long) &((ptr)->name##_begin);		\
-		BUILD_BUG_ON(_n < 0);					\
+		MAYBE_BUILD_BUG_ON(_n < 0);				\
 									\
 		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
 	} while (0)
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index e547e3c8ee9a..0093dd7c1d6f 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -109,8 +109,7 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
 				      unsigned int fbit)
 {
 	/* Did you forget to fix assumptions on max features? */
-	if (__builtin_constant_p(fbit))
-		BUILD_BUG_ON(fbit >= 32);
+	MAYBE_BUILD_BUG_ON(fbit >= 32);
 
 	if (fbit < VIRTIO_TRANSPORT_F_START)
 		virtio_check_driver_offered_feature(vdev, fbit);
-- 
cgit v1.2.3


From 562787a5c32ccdf182de27793a83a9f2ee86cd77 Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Tue, 22 Sep 2009 16:43:57 -0700
Subject: anonfd: split interface into file creation and install

Split the anonfd interface into a bare file pointer creation one, and a
file pointer creation plus install one.

There are cases, like the usage of eventfds inside other kernel
interfaces, where the file pointer created by anonfd needs to be used
inside the initialization of other structures.

As it is right now, as soon as anon_inode_getfd() returns, the kenrle can
race with userspace closing the newly installed file descriptor.

This patch, while keeping the old anon_inode_getfd(), introduces a new
anon_inode_getfile() (whose services are reused in anon_inode_getfd())
that allows to split the file creation phase and the fd install one.

Once all the kernel structures are initialized, the code can call the
proper fd_install().

Gregory manifested the need for something like this inside KVM.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: James Morris <jmorris@namei.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Gregory Haskins <ghaskins@novell.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/anon_inodes.c            | 68 +++++++++++++++++++++++++++++++++------------
 fs/eventfd.c                | 67 +++++++++++++++++++++++++++++++++++---------
 include/linux/anon_inodes.h |  3 ++
 include/linux/eventfd.h     |  6 ++++
 4 files changed, 114 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 47d4a01c5393..d11c51fc2a3f 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -77,28 +77,24 @@ static const struct address_space_operations anon_aops = {
  *
  * Creates a new file by hooking it on a single inode. This is useful for files
  * that do not need to have a full-fledged inode in order to operate correctly.
- * All the files created with anon_inode_getfd() will share a single inode,
+ * All the files created with anon_inode_getfile() will share a single inode,
  * hence saving memory and avoiding code duplication for the file/inode/dentry
- * setup.  Returns new descriptor or -error.
+ * setup.  Returns the newly created file* or an error pointer.
  */
-int anon_inode_getfd(const char *name, const struct file_operations *fops,
-		     void *priv, int flags)
+struct file *anon_inode_getfile(const char *name,
+				const struct file_operations *fops,
+				void *priv, int flags)
 {
 	struct qstr this;
 	struct dentry *dentry;
 	struct file *file;
-	int error, fd;
+	int error;
 
 	if (IS_ERR(anon_inode_inode))
-		return -ENODEV;
+		return ERR_PTR(-ENODEV);
 
 	if (fops->owner && !try_module_get(fops->owner))
-		return -ENOENT;
-
-	error = get_unused_fd_flags(flags);
-	if (error < 0)
-		goto err_module;
-	fd = error;
+		return ERR_PTR(-ENOENT);
 
 	/*
 	 * Link the inode to a directory entry by creating a unique name
@@ -110,7 +106,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
 	this.hash = 0;
 	dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
 	if (!dentry)
-		goto err_put_unused_fd;
+		goto err_module;
 
 	/*
 	 * We know the anon_inode inode count is always greater than zero,
@@ -136,16 +132,54 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
 	file->f_version = 0;
 	file->private_data = priv;
 
+	return file;
+
+err_dput:
+	dput(dentry);
+err_module:
+	module_put(fops->owner);
+	return ERR_PTR(error);
+}
+EXPORT_SYMBOL_GPL(anon_inode_getfile);
+
+/**
+ * anon_inode_getfd - creates a new file instance by hooking it up to an
+ *                    anonymous inode, and a dentry that describe the "class"
+ *                    of the file
+ *
+ * @name:    [in]    name of the "class" of the new file
+ * @fops:    [in]    file operations for the new file
+ * @priv:    [in]    private data for the new file (will be file's private_data)
+ * @flags:   [in]    flags
+ *
+ * Creates a new file by hooking it on a single inode. This is useful for files
+ * that do not need to have a full-fledged inode in order to operate correctly.
+ * All the files created with anon_inode_getfd() will share a single inode,
+ * hence saving memory and avoiding code duplication for the file/inode/dentry
+ * setup.  Returns new descriptor or an error code.
+ */
+int anon_inode_getfd(const char *name, const struct file_operations *fops,
+		     void *priv, int flags)
+{
+	int error, fd;
+	struct file *file;
+
+	error = get_unused_fd_flags(flags);
+	if (error < 0)
+		return error;
+	fd = error;
+
+	file = anon_inode_getfile(name, fops, priv, flags);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto err_put_unused_fd;
+	}
 	fd_install(fd, file);
 
 	return fd;
 
-err_dput:
-	dput(dentry);
 err_put_unused_fd:
 	put_unused_fd(fd);
-err_module:
-	module_put(fops->owner);
 	return error;
 }
 EXPORT_SYMBOL_GPL(anon_inode_getfd);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 31d12de83a2a..8b47e4200e65 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -68,11 +68,16 @@ int eventfd_signal(struct eventfd_ctx *ctx, int n)
 }
 EXPORT_SYMBOL_GPL(eventfd_signal);
 
+static void eventfd_free_ctx(struct eventfd_ctx *ctx)
+{
+	kfree(ctx);
+}
+
 static void eventfd_free(struct kref *kref)
 {
 	struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
 
-	kfree(ctx);
+	eventfd_free_ctx(ctx);
 }
 
 /**
@@ -298,9 +303,23 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
 }
 EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
 
-SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
+/**
+ * eventfd_file_create - Creates an eventfd file pointer.
+ * @count: Initial eventfd counter value.
+ * @flags: Flags for the eventfd file.
+ *
+ * This function creates an eventfd file pointer, w/out installing it into
+ * the fd table. This is useful when the eventfd file is used during the
+ * initialization of data structures that require extra setup after the eventfd
+ * creation. So the eventfd creation is split into the file pointer creation
+ * phase, and the file descriptor installation phase.
+ * In this way races with userspace closing the newly installed file descriptor
+ * can be avoided.
+ * Returns an eventfd file pointer, or a proper error pointer.
+ */
+struct file *eventfd_file_create(unsigned int count, int flags)
 {
-	int fd;
+	struct file *file;
 	struct eventfd_ctx *ctx;
 
 	/* Check the EFD_* constants for consistency.  */
@@ -308,26 +327,48 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 	BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
 
 	if (flags & ~EFD_FLAGS_SET)
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ctx->kref);
 	init_waitqueue_head(&ctx->wqh);
 	ctx->count = count;
 	ctx->flags = flags;
 
-	/*
-	 * When we call this, the initialization must be complete, since
-	 * anon_inode_getfd() will install the fd.
-	 */
-	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
-			      flags & EFD_SHARED_FCNTL_FLAGS);
-	if (fd < 0)
-		kfree(ctx);
+	file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
+				  flags & EFD_SHARED_FCNTL_FLAGS);
+	if (IS_ERR(file))
+		eventfd_free_ctx(ctx);
+
+	return file;
+}
+
+SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
+{
+	int fd, error;
+	struct file *file;
+
+	error = get_unused_fd_flags(flags & EFD_SHARED_FCNTL_FLAGS);
+	if (error < 0)
+		return error;
+	fd = error;
+
+	file = eventfd_file_create(count, flags);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto err_put_unused_fd;
+	}
+	fd_install(fd, file);
+
 	return fd;
+
+err_put_unused_fd:
+	put_unused_fd(fd);
+
+	return error;
 }
 
 SYSCALL_DEFINE1(eventfd, unsigned int, count)
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index e0a0cdc2da43..69a21e0ebd33 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -8,6 +8,9 @@
 #ifndef _LINUX_ANON_INODES_H
 #define _LINUX_ANON_INODES_H
 
+struct file *anon_inode_getfile(const char *name,
+				const struct file_operations *fops,
+				void *priv, int flags);
 int anon_inode_getfd(const char *name, const struct file_operations *fops,
 		     void *priv, int flags);
 
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 3b85ba6479f4..94dd10366a78 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -27,6 +27,7 @@
 
 #ifdef CONFIG_EVENTFD
 
+struct file *eventfd_file_create(unsigned int count, int flags);
 struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx);
 void eventfd_ctx_put(struct eventfd_ctx *ctx);
 struct file *eventfd_fget(int fd);
@@ -40,6 +41,11 @@ int eventfd_signal(struct eventfd_ctx *ctx, int n);
  * Ugly ugly ugly error layer to support modules that uses eventfd but
  * pretend to work in !CONFIG_EVENTFD configurations. Namely, AIO.
  */
+static inline struct file *eventfd_file_create(unsigned int count, int flags)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
 static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd)
 {
 	return ERR_PTR(-ENOSYS);
-- 
cgit v1.2.3


From a49c59c042c63b432307c1bbf7dac5a104c786e6 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Tue, 22 Sep 2009 16:44:03 -0700
Subject: Make sure the value in abs() does not get truncated if it is greater
 than 2^32

abs() will truncate the input if is it outside the 2^32 range.  Fix that
by assuming `long' input.

This might generate worse code in the common case.

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 63dcaece1ac5..d3cd23f30039 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -146,7 +146,7 @@ extern int _cond_resched(void);
 #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
 
 #define abs(x) ({				\
-		int __x = (x);			\
+		long __x = (x);			\
 		(__x < 0) ? -__x : __x;		\
 	})
 
-- 
cgit v1.2.3


From b28cfd2c0616e1b42acc6ee3c77ef6cc3873c510 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 22 Sep 2009 16:44:05 -0700
Subject: kmap_types.h: rename D macro

I tend to use a 'D' debugging macro a lot during debugging.  When I define
it before includes I often get conflicts with kmap_types.h's use of 'D'
too.  It's not very nice when a global include pollutes the name space
like this.

Rename the kmap_types.h D to KMAP_D.  It is only used temporarily in the
header so has no effect on anything else.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/kmap_types.h | 47 ++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/kmap_types.h b/include/asm-generic/kmap_types.h
index eddbce0f9fb9..e5f234a08540 100644
--- a/include/asm-generic/kmap_types.h
+++ b/include/asm-generic/kmap_types.h
@@ -2,34 +2,35 @@
 #define _ASM_GENERIC_KMAP_TYPES_H
 
 #ifdef __WITH_KM_FENCE
-# define D(n) __KM_FENCE_##n ,
+# define KMAP_D(n) __KM_FENCE_##n ,
 #else
-# define D(n)
+# define KMAP_D(n)
 #endif
 
 enum km_type {
-D(0)	KM_BOUNCE_READ,
-D(1)	KM_SKB_SUNRPC_DATA,
-D(2)	KM_SKB_DATA_SOFTIRQ,
-D(3)	KM_USER0,
-D(4)	KM_USER1,
-D(5)	KM_BIO_SRC_IRQ,
-D(6)	KM_BIO_DST_IRQ,
-D(7)	KM_PTE0,
-D(8)	KM_PTE1,
-D(9)	KM_IRQ0,
-D(10)	KM_IRQ1,
-D(11)	KM_SOFTIRQ0,
-D(12)	KM_SOFTIRQ1,
-D(13)	KM_SYNC_ICACHE,
-D(14)	KM_SYNC_DCACHE,
-D(15)	KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */
-D(16)	KM_IRQ_PTE,
-D(17)	KM_NMI,
-D(18)	KM_NMI_PTE,
-D(19)	KM_TYPE_NR
+KMAP_D(0)	KM_BOUNCE_READ,
+KMAP_D(1)	KM_SKB_SUNRPC_DATA,
+KMAP_D(2)	KM_SKB_DATA_SOFTIRQ,
+KMAP_D(3)	KM_USER0,
+KMAP_D(4)	KM_USER1,
+KMAP_D(5)	KM_BIO_SRC_IRQ,
+KMAP_D(6)	KM_BIO_DST_IRQ,
+KMAP_D(7)	KM_PTE0,
+KMAP_D(8)	KM_PTE1,
+KMAP_D(9)	KM_IRQ0,
+KMAP_D(10)	KM_IRQ1,
+KMAP_D(11)	KM_SOFTIRQ0,
+KMAP_D(12)	KM_SOFTIRQ1,
+KMAP_D(13)	KM_SYNC_ICACHE,
+KMAP_D(14)	KM_SYNC_DCACHE,
+/* UML specific, for copy_*_user - used in do_op_one_page */
+KMAP_D(15)	KM_UML_USERCOPY,
+KMAP_D(16)	KM_IRQ_PTE,
+KMAP_D(17)	KM_NMI,
+KMAP_D(18)	KM_NMI_PTE,
+KMAP_D(19)	KM_TYPE_NR
 };
 
-#undef D
+#undef KMAP_D
 
 #endif
-- 
cgit v1.2.3


From 1f10206cf8e945220f7220a809d8bfc15c21f9a5 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 22 Sep 2009 16:44:10 -0700
Subject: getrusage: fill ru_maxrss value

Make ->ru_maxrss value in struct rusage filled accordingly to rss hiwater
mark.  This struct is filled as a parameter to getrusage syscall.
->ru_maxrss value is set to KBs which is the way it is done in BSD
systems.  /usr/bin/time (gnu time) application converts ->ru_maxrss to KBs
which seems to be incorrect behavior.  Maintainer of this util was
notified by me with the patch which corrects it and cc'ed.

To make this happen we extend struct signal_struct by two fields.  The
first one is ->maxrss which we use to store rss hiwater of the task.  The
second one is ->cmaxrss which we use to store highest rss hiwater of all
task childs.  These values are used in k_getrusage() to actually fill
->ru_maxrss.  k_getrusage() uses current rss hiwater value directly if mm
struct exists.

Note:
exec() clear mm->hiwater_rss, but doesn't clear sig->maxrss.
it is intetionally behavior. *BSD getrusage have exec() inheriting.

test programs
========================================================

getrusage.c
===========
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
 #include <signal.h>
 #include <sys/mman.h>

 #include "common.h"

 #define err(str) perror(str), exit(1)

int main(int argc, char** argv)
{
	int status;

	printf("allocate 100MB\n");
	consume(100);

	printf("testcase1: fork inherit? \n");
	printf("  expect: initial.self ~= child.self\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		show_rusage("fork child");
		_exit(0);
	}
	printf("\n");

	printf("testcase2: fork inherit? (cont.) \n");
	printf("  expect: initial.children ~= 100MB, but child.children = 0\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		show_rusage("child");
		_exit(0);
	}
	printf("\n");

	printf("testcase3: fork + malloc \n");
	printf("  expect: child.self ~= initial.self + 50MB\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		printf("allocate +50MB\n");
		consume(50);
		show_rusage("fork child");
		_exit(0);
	}
	printf("\n");

	printf("testcase4: grandchild maxrss\n");
	printf("  expect: post_wait.children ~= 300MB\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
		show_rusage("post_wait");
	} else {
		system("./child -n 0 -g 300");
		_exit(0);
	}
	printf("\n");

	printf("testcase5: zombie\n");
	printf("  expect: pre_wait ~= initial, IOW the zombie process is not accounted.\n");
	printf("          post_wait ~= 400MB, IOW wait() collect child's max_rss. \n");
	show_rusage("initial");
	if (__fork()) {
		sleep(1); /* children become zombie */
		show_rusage("pre_wait");
		wait(&status);
		show_rusage("post_wait");
	} else {
		system("./child -n 400");
		_exit(0);
	}
	printf("\n");

	printf("testcase6: SIG_IGN\n");
	printf("  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).\n");
	show_rusage("initial");
	signal(SIGCHLD, SIG_IGN);
	if (__fork()) {
		sleep(1); /* children become zombie */
		show_rusage("after_zombie");
	} else {
		system("./child -n 500");
		_exit(0);
	}
	printf("\n");
	signal(SIGCHLD, SIG_DFL);

	printf("testcase7: exec (without fork) \n");
	printf("  expect: initial ~= exec \n");
	show_rusage("initial");
	execl("./child", "child", "-v", NULL);

	return 0;
}

child.c
=======
 #include <sys/types.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>

 #include "common.h"

int main(int argc, char** argv)
{
	int status;
	int c;
	long consume_size = 0;
	long grandchild_consume_size = 0;
	int show = 0;

	while ((c = getopt(argc, argv, "n:g:v")) != -1) {
		switch (c) {
		case 'n':
			consume_size = atol(optarg);
			break;
		case 'v':
			show = 1;
			break;
		case 'g':

			grandchild_consume_size = atol(optarg);
			break;
		default:
			break;
		}
	}

	if (show)
		show_rusage("exec");

	if (consume_size) {
		printf("child alloc %ldMB\n", consume_size);
		consume(consume_size);
	}

	if (grandchild_consume_size) {
		if (fork()) {
			wait(&status);
		} else {
			printf("grandchild alloc %ldMB\n", grandchild_consume_size);
			consume(grandchild_consume_size);

			exit(0);
		}
	}

	return 0;
}

common.c
========
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
 #include <signal.h>
 #include <sys/mman.h>

 #include "common.h"
 #define err(str) perror(str), exit(1)

void show_rusage(char *prefix)
{
    	int err, err2;
    	struct rusage rusage_self;
    	struct rusage rusage_children;

    	printf("%s: ", prefix);
    	err = getrusage(RUSAGE_SELF, &rusage_self);
    	if (!err)
    		printf("self %ld ", rusage_self.ru_maxrss);
    	err2 = getrusage(RUSAGE_CHILDREN, &rusage_children);
    	if (!err2)
    		printf("children %ld ", rusage_children.ru_maxrss);

    	printf("\n");
}

/* Some buggy OS need this worthless CPU waste. */
void make_pagefault(void)
{
	void *addr;
	int size = getpagesize();
	int i;

	for (i=0; i<1000; i++) {
		addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
		if (addr == MAP_FAILED)
			err("make_pagefault");
		memset(addr, 0, size);
		munmap(addr, size);
	}
}

void consume(int mega)
{
    	size_t sz = mega * 1024 * 1024;
    	void *ptr;

    	ptr = malloc(sz);
    	memset(ptr, 0, sz);
	make_pagefault();
}

pid_t __fork(void)
{
	pid_t pid;

	pid = fork();
	make_pagefault();

	return pid;
}

common.h
========
void show_rusage(char *prefix);
void make_pagefault(void);
void consume(int mega);
pid_t __fork(void);

FreeBSD result (expected result)
========================================================
allocate 100MB
testcase1: fork inherit?
  expect: initial.self ~= child.self
initial: self 103492 children 0
fork child: self 103540 children 0

testcase2: fork inherit? (cont.)
  expect: initial.children ~= 100MB, but child.children = 0
initial: self 103540 children 103540
child: self 103564 children 0

testcase3: fork + malloc
  expect: child.self ~= initial.self + 50MB
initial: self 103564 children 103564
allocate +50MB
fork child: self 154860 children 0

testcase4: grandchild maxrss
  expect: post_wait.children ~= 300MB
initial: self 103564 children 154860
grandchild alloc 300MB
post_wait: self 103564 children 308720

testcase5: zombie
  expect: pre_wait ~= initial, IOW the zombie process is not accounted.
          post_wait ~= 400MB, IOW wait() collect child's max_rss.
initial: self 103564 children 308720
child alloc 400MB
pre_wait: self 103564 children 308720
post_wait: self 103564 children 411312

testcase6: SIG_IGN
  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).
initial: self 103564 children 411312
child alloc 500MB
after_zombie: self 103624 children 411312

testcase7: exec (without fork)
  expect: initial ~= exec
initial: self 103624 children 411312
exec: self 103624 children 411312

Linux result (actual test result)
========================================================
allocate 100MB
testcase1: fork inherit?
  expect: initial.self ~= child.self
initial: self 102848 children 0
fork child: self 102572 children 0

testcase2: fork inherit? (cont.)
  expect: initial.children ~= 100MB, but child.children = 0
initial: self 102876 children 102644
child: self 102572 children 0

testcase3: fork + malloc
  expect: child.self ~= initial.self + 50MB
initial: self 102876 children 102644
allocate +50MB
fork child: self 153804 children 0

testcase4: grandchild maxrss
  expect: post_wait.children ~= 300MB
initial: self 102876 children 153864
grandchild alloc 300MB
post_wait: self 102876 children 307536

testcase5: zombie
  expect: pre_wait ~= initial, IOW the zombie process is not accounted.
          post_wait ~= 400MB, IOW wait() collect child's max_rss.
initial: self 102876 children 307536
child alloc 400MB
pre_wait: self 102876 children 307536
post_wait: self 102876 children 410076

testcase6: SIG_IGN
  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).
initial: self 102876 children 410076
child alloc 500MB
after_zombie: self 102880 children 410076

testcase7: exec (without fork)
  expect: initial ~= exec
initial: self 102880 children 410076
exec: self 102880 children 410076

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c             |  3 +++
 include/linux/sched.h | 10 ++++++++++
 kernel/exit.c         |  6 ++++++
 kernel/fork.c         |  1 +
 kernel/sys.c          | 14 ++++++++++++++
 5 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 434dba778ccc..69bb9d899791 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -845,6 +845,9 @@ static int de_thread(struct task_struct *tsk)
 	sig->notify_count = 0;
 
 no_thread_group:
+	if (current->mm)
+		setmax_mm_hiwater_rss(&sig->maxrss, current->mm);
+
 	exit_itimers(sig);
 	flush_itimer_signals();
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 97b10da0a3ea..6448bbc6406b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -426,6 +426,15 @@ static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
 	return max(mm->hiwater_rss, get_mm_rss(mm));
 }
 
+static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
+					 struct mm_struct *mm)
+{
+	unsigned long hiwater_rss = get_mm_hiwater_rss(mm);
+
+	if (*maxrss < hiwater_rss)
+		*maxrss = hiwater_rss;
+}
+
 static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
 {
 	return max(mm->hiwater_vm, mm->total_vm);
@@ -612,6 +621,7 @@ struct signal_struct {
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
 	unsigned long inblock, oublock, cinblock, coublock;
+	unsigned long maxrss, cmaxrss;
 	struct task_io_accounting ioac;
 
 	/*
diff --git a/kernel/exit.c b/kernel/exit.c
index 61bb1761c7b8..60d6fdcc9265 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -947,6 +947,8 @@ NORET_TYPE void do_exit(long code)
 	if (group_dead) {
 		hrtimer_cancel(&tsk->signal->real_timer);
 		exit_itimers(tsk->signal);
+		if (tsk->mm)
+			setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
 	}
 	acct_collect(code, group_dead);
 	if (group_dead)
@@ -1210,6 +1212,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 	if (likely(!traced) && likely(!task_detached(p))) {
 		struct signal_struct *psig;
 		struct signal_struct *sig;
+		unsigned long maxrss;
 
 		/*
 		 * The resource counters for the group leader are in its
@@ -1258,6 +1261,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		psig->coublock +=
 			task_io_get_oublock(p) +
 			sig->oublock + sig->coublock;
+		maxrss = max(sig->maxrss, sig->cmaxrss);
+		if (psig->cmaxrss < maxrss)
+			psig->cmaxrss = maxrss;
 		task_io_accounting_add(&psig->ioac, &p->ioac);
 		task_io_accounting_add(&psig->ioac, &sig->ioac);
 		spin_unlock_irq(&p->real_parent->sighand->siglock);
diff --git a/kernel/fork.c b/kernel/fork.c
index 1020977b57ca..7cf45812ce84 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -866,6 +866,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
+	sig->maxrss = sig->cmaxrss = 0;
 	task_io_accounting_init(&sig->ioac);
 	sig->sum_sched_runtime = 0;
 	taskstats_tgid_init(sig);
diff --git a/kernel/sys.c b/kernel/sys.c
index ea5c3bcac881..ebcb15611728 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1338,6 +1338,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	unsigned long flags;
 	cputime_t utime, stime;
 	struct task_cputime cputime;
+	unsigned long maxrss = 0;
 
 	memset((char *) r, 0, sizeof *r);
 	utime = stime = cputime_zero;
@@ -1346,6 +1347,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 		utime = task_utime(current);
 		stime = task_stime(current);
 		accumulate_thread_rusage(p, r);
+		maxrss = p->signal->maxrss;
 		goto out;
 	}
 
@@ -1363,6 +1365,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			r->ru_majflt = p->signal->cmaj_flt;
 			r->ru_inblock = p->signal->cinblock;
 			r->ru_oublock = p->signal->coublock;
+			maxrss = p->signal->cmaxrss;
 
 			if (who == RUSAGE_CHILDREN)
 				break;
@@ -1377,6 +1380,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			r->ru_majflt += p->signal->maj_flt;
 			r->ru_inblock += p->signal->inblock;
 			r->ru_oublock += p->signal->oublock;
+			if (maxrss < p->signal->maxrss)
+				maxrss = p->signal->maxrss;
 			t = p;
 			do {
 				accumulate_thread_rusage(t, r);
@@ -1392,6 +1397,15 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 out:
 	cputime_to_timeval(utime, &r->ru_utime);
 	cputime_to_timeval(stime, &r->ru_stime);
+
+	if (who != RUSAGE_CHILDREN) {
+		struct mm_struct *mm = get_task_mm(p);
+		if (mm) {
+			setmax_mm_hiwater_rss(&maxrss, mm);
+			mmput(mm);
+		}
+	}
+	r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
 }
 
 int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
-- 
cgit v1.2.3


From 00afe029aab03bd95eba210b5e74a252017c4692 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 22 Sep 2009 16:44:14 -0700
Subject: asm/sections: add text/data checking functions for arches to override

Some ports (like the Blackfin arch) have a discontiguous memory map which
means there may be text or data that falls outside of the standard range
of the start/end text/data symbols.  Creating some helper functions allows
these non-standard ports to declare these regions without adversely
affecting anyone else.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Robin Getz <rgetz@blackfin.uclinux.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/sections.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index d083561337f2..b3bfabc258f3 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -23,4 +23,20 @@ extern char __ctors_start[], __ctors_end[];
 #define dereference_function_descriptor(p) (p)
 #endif
 
+/* random extra sections (if any).  Override
+ * in asm/sections.h */
+#ifndef arch_is_kernel_text
+static inline int arch_is_kernel_text(unsigned long addr)
+{
+	return 0;
+}
+#endif
+
+#ifndef arch_is_kernel_data
+static inline int arch_is_kernel_data(unsigned long addr)
+{
+	return 0;
+}
+#endif
+
 #endif /* _ASM_GENERIC_SECTIONS_H_ */
-- 
cgit v1.2.3


From 8ea926b22e2d13238e4d65d8f61c48fe424e6f4f Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:29 -0700
Subject: mmc: add 'enable' and 'disable' methods to mmc host

MMC hosts that support power saving can use the 'enable' and 'disable'
methods to exit and enter power saving states.  An explanation of their
use is provided in the comments added to include/linux/mmc/host.h.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/core.c  | 177 +++++++++++++++++++++++++++++++++++++++++++++--
 drivers/mmc/core/host.c  |   1 +
 drivers/mmc/core/host.h  |   2 +
 include/linux/mmc/host.h |  47 +++++++++++++
 4 files changed, 221 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index e22d2b5576ec..fb24a096dba8 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -343,6 +343,101 @@ unsigned int mmc_align_data_size(struct mmc_card *card, unsigned int sz)
 }
 EXPORT_SYMBOL(mmc_align_data_size);
 
+/**
+ *	mmc_host_enable - enable a host.
+ *	@host: mmc host to enable
+ *
+ *	Hosts that support power saving can use the 'enable' and 'disable'
+ *	methods to exit and enter power saving states. For more information
+ *	see comments for struct mmc_host_ops.
+ */
+int mmc_host_enable(struct mmc_host *host)
+{
+	if (!(host->caps & MMC_CAP_DISABLE))
+		return 0;
+
+	if (host->en_dis_recurs)
+		return 0;
+
+	if (host->nesting_cnt++)
+		return 0;
+
+	cancel_delayed_work_sync(&host->disable);
+
+	if (host->enabled)
+		return 0;
+
+	if (host->ops->enable) {
+		int err;
+
+		host->en_dis_recurs = 1;
+		err = host->ops->enable(host);
+		host->en_dis_recurs = 0;
+
+		if (err) {
+			pr_debug("%s: enable error %d\n",
+				 mmc_hostname(host), err);
+			return err;
+		}
+	}
+	host->enabled = 1;
+	return 0;
+}
+EXPORT_SYMBOL(mmc_host_enable);
+
+static int mmc_host_do_disable(struct mmc_host *host, int lazy)
+{
+	if (host->ops->disable) {
+		int err;
+
+		host->en_dis_recurs = 1;
+		err = host->ops->disable(host, lazy);
+		host->en_dis_recurs = 0;
+
+		if (err < 0) {
+			pr_debug("%s: disable error %d\n",
+				 mmc_hostname(host), err);
+			return err;
+		}
+		if (err > 0) {
+			unsigned long delay = msecs_to_jiffies(err);
+
+			mmc_schedule_delayed_work(&host->disable, delay);
+		}
+	}
+	host->enabled = 0;
+	return 0;
+}
+
+/**
+ *	mmc_host_disable - disable a host.
+ *	@host: mmc host to disable
+ *
+ *	Hosts that support power saving can use the 'enable' and 'disable'
+ *	methods to exit and enter power saving states. For more information
+ *	see comments for struct mmc_host_ops.
+ */
+int mmc_host_disable(struct mmc_host *host)
+{
+	int err;
+
+	if (!(host->caps & MMC_CAP_DISABLE))
+		return 0;
+
+	if (host->en_dis_recurs)
+		return 0;
+
+	if (--host->nesting_cnt)
+		return 0;
+
+	if (!host->enabled)
+		return 0;
+
+	err = mmc_host_do_disable(host, 0);
+	return err;
+}
+EXPORT_SYMBOL(mmc_host_disable);
+
 /**
  *	__mmc_claim_host - exclusively claim a host
  *	@host: mmc host to claim
@@ -379,11 +474,81 @@ int __mmc_claim_host(struct mmc_host *host, atomic_t *abort)
 		wake_up(&host->wq);
 	spin_unlock_irqrestore(&host->lock, flags);
 	remove_wait_queue(&host->wq, &wait);
+	if (!stop)
+		mmc_host_enable(host);
 	return stop;
 }
 
 EXPORT_SYMBOL(__mmc_claim_host);
 
+static int mmc_try_claim_host(struct mmc_host *host)
+{
+	int claimed_host = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+	if (!host->claimed) {
+		host->claimed = 1;
+		claimed_host = 1;
+	}
+	spin_unlock_irqrestore(&host->lock, flags);
+	return claimed_host;
+}
+
+static void mmc_do_release_host(struct mmc_host *host)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+	host->claimed = 0;
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	wake_up(&host->wq);
+}
+
+void mmc_host_deeper_disable(struct work_struct *work)
+{
+	struct mmc_host *host =
+		container_of(work, struct mmc_host, disable.work);
+
+	/* If the host is claimed then we do not want to disable it anymore */
+	if (!mmc_try_claim_host(host))
+		return;
+	mmc_host_do_disable(host, 1);
+	mmc_do_release_host(host);
+}
+
+/**
+ *	mmc_host_lazy_disable - lazily disable a host.
+ *	@host: mmc host to disable
+ *
+ *	Hosts that support power saving can use the 'enable' and 'disable'
+ *	methods to exit and enter power saving states. For more information
+ *	see comments for struct mmc_host_ops.
+ */
+int mmc_host_lazy_disable(struct mmc_host *host)
+{
+	if (!(host->caps & MMC_CAP_DISABLE))
+		return 0;
+
+	if (host->en_dis_recurs)
+		return 0;
+
+	if (--host->nesting_cnt)
+		return 0;
+
+	if (!host->enabled)
+		return 0;
+
+	if (host->disable_delay) {
+		mmc_schedule_delayed_work(&host->disable,
+				msecs_to_jiffies(host->disable_delay));
+		return 0;
+	} else
+		return mmc_host_do_disable(host, 1);
+}
+EXPORT_SYMBOL(mmc_host_lazy_disable);
+
 /**
  *	mmc_release_host - release a host
  *	@host: mmc host to release
@@ -393,15 +558,11 @@ EXPORT_SYMBOL(__mmc_claim_host);
  */
 void mmc_release_host(struct mmc_host *host)
 {
-	unsigned long flags;
-
 	WARN_ON(!host->claimed);
 
-	spin_lock_irqsave(&host->lock, flags);
-	host->claimed = 0;
-	spin_unlock_irqrestore(&host->lock, flags);
+	mmc_host_lazy_disable(host);
 
-	wake_up(&host->wq);
+	mmc_do_release_host(host);
 }
 
 EXPORT_SYMBOL(mmc_release_host);
@@ -953,6 +1114,8 @@ void mmc_stop_host(struct mmc_host *host)
 	spin_unlock_irqrestore(&host->lock, flags);
 #endif
 
+	if (host->caps & MMC_CAP_DISABLE)
+		cancel_delayed_work(&host->disable);
 	cancel_delayed_work(&host->detect);
 	mmc_flush_scheduled_work();
 
@@ -981,6 +1144,8 @@ void mmc_stop_host(struct mmc_host *host)
  */
 int mmc_suspend_host(struct mmc_host *host, pm_message_t state)
 {
+	if (host->caps & MMC_CAP_DISABLE)
+		cancel_delayed_work(&host->disable);
 	cancel_delayed_work(&host->detect);
 	mmc_flush_scheduled_work();
 
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 5e945e64ead7..a268d12f1af0 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -83,6 +83,7 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
 	spin_lock_init(&host->lock);
 	init_waitqueue_head(&host->wq);
 	INIT_DELAYED_WORK(&host->detect, mmc_rescan);
+	INIT_DELAYED_WORK_DEFERRABLE(&host->disable, mmc_host_deeper_disable);
 
 	/*
 	 * By default, hosts do not support SGIO or large requests.
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
index c2dc3d2d9f9a..8c87e1109a34 100644
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -14,5 +14,7 @@
 int mmc_register_host_class(void);
 void mmc_unregister_host_class(void);
 
+void mmc_host_deeper_disable(struct work_struct *work);
+
 #endif
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 3e7615e9087e..338a9b3d51e4 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -51,6 +51,35 @@ struct mmc_ios {
 };
 
 struct mmc_host_ops {
+	/*
+	 * Hosts that support power saving can use the 'enable' and 'disable'
+	 * methods to exit and enter power saving states. 'enable' is called
+	 * when the host is claimed and 'disable' is called (or scheduled with
+	 * a delay) when the host is released. The 'disable' is scheduled if
+	 * the disable delay set by 'mmc_set_disable_delay()' is non-zero,
+	 * otherwise 'disable' is called immediately. 'disable' may be
+	 * scheduled repeatedly, to permit ever greater power saving at the
+	 * expense of ever greater latency to re-enable. Rescheduling is
+	 * determined by the return value of the 'disable' method. A positive
+	 * value gives the delay in milliseconds.
+	 *
+	 * In the case where a host function (like set_ios) may be called
+	 * with or without the host claimed, enabling and disabling can be
+	 * done directly and will nest correctly. Call 'mmc_host_enable()' and
+	 * 'mmc_host_lazy_disable()' for this purpose, but note that these
+	 * functions must be paired.
+	 *
+	 * Alternatively, 'mmc_host_enable()' may be paired with
+	 * 'mmc_host_disable()' which calls 'disable' immediately.  In this
+	 * case the 'disable' method will be called with 'lazy' set to 0.
+	 * This is mainly useful for error paths.
+	 *
+	 * Because lazy disable may be called from a work queue, the 'disable'
+	 * method must claim the host when 'lazy' != 0, which will work
+	 * correctly because recursion is detected and handled.
+	 */
+	int (*enable)(struct mmc_host *host);
+	int (*disable)(struct mmc_host *host, int lazy);
 	void	(*request)(struct mmc_host *host, struct mmc_request *req);
 	/*
 	 * Avoid calling these three functions too often or in a "fast path",
@@ -118,6 +147,7 @@ struct mmc_host {
 #define MMC_CAP_SPI		(1 << 4)	/* Talks only SPI protocols */
 #define MMC_CAP_NEEDS_POLL	(1 << 5)	/* Needs polling for card-detection */
 #define MMC_CAP_8_BIT_DATA	(1 << 6)	/* Can the host do 8 bit transfers */
+#define MMC_CAP_DISABLE		(1 << 7)	/* Can the host be disabled */
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
@@ -142,6 +172,13 @@ struct mmc_host {
 	unsigned int		removed:1;	/* host is being removed */
 #endif
 
+	/* Only used with MMC_CAP_DISABLE */
+	int			enabled;	/* host is enabled */
+	int			nesting_cnt;	/* "enable" nesting count */
+	int			en_dis_recurs;	/* detect recursion */
+	unsigned int		disable_delay;	/* disable delay in msecs */
+	struct delayed_work	disable;	/* disabling work */
+
 	struct mmc_card		*card;		/* device attached to this host */
 
 	wait_queue_head_t	wq;
@@ -197,5 +234,15 @@ struct regulator;
 int mmc_regulator_get_ocrmask(struct regulator *supply);
 int mmc_regulator_set_ocr(struct regulator *supply, unsigned short vdd_bit);
 
+int mmc_host_enable(struct mmc_host *host);
+int mmc_host_disable(struct mmc_host *host);
+int mmc_host_lazy_disable(struct mmc_host *host);
+
+static inline void mmc_set_disable_delay(struct mmc_host *host,
+					 unsigned int disable_delay)
+{
+	host->disable_delay = disable_delay;
+}
+
 #endif
 
-- 
cgit v1.2.3


From 319a3f1429c91147058ac26c5f5bac8ec1730bc6 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:30 -0700
Subject: mmc: allow host claim / release nesting

This change allows the MMC host to be claimed in situations where the host
may or may not have already been claimed.  Also 'mmc_try_claim_host()' is
now exported.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/core.c  | 34 +++++++++++++++++++++++++---------
 include/linux/mmc/core.h |  1 +
 include/linux/mmc/host.h |  2 ++
 3 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index fb24a096dba8..02f2b1871a38 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -461,16 +461,18 @@ int __mmc_claim_host(struct mmc_host *host, atomic_t *abort)
 	while (1) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		stop = abort ? atomic_read(abort) : 0;
-		if (stop || !host->claimed)
+		if (stop || !host->claimed || host->claimer == current)
 			break;
 		spin_unlock_irqrestore(&host->lock, flags);
 		schedule();
 		spin_lock_irqsave(&host->lock, flags);
 	}
 	set_current_state(TASK_RUNNING);
-	if (!stop)
+	if (!stop) {
 		host->claimed = 1;
-	else
+		host->claimer = current;
+		host->claim_cnt += 1;
+	} else
 		wake_up(&host->wq);
 	spin_unlock_irqrestore(&host->lock, flags);
 	remove_wait_queue(&host->wq, &wait);
@@ -481,29 +483,43 @@ int __mmc_claim_host(struct mmc_host *host, atomic_t *abort)
 
 EXPORT_SYMBOL(__mmc_claim_host);
 
-static int mmc_try_claim_host(struct mmc_host *host)
+/**
+ *	mmc_try_claim_host - try exclusively to claim a host
+ *	@host: mmc host to claim
+ *
+ *	Returns %1 if the host is claimed, %0 otherwise.
+ */
+int mmc_try_claim_host(struct mmc_host *host)
 {
 	int claimed_host = 0;
 	unsigned long flags;
 
 	spin_lock_irqsave(&host->lock, flags);
-	if (!host->claimed) {
+	if (!host->claimed || host->claimer == current) {
 		host->claimed = 1;
+		host->claimer = current;
+		host->claim_cnt += 1;
 		claimed_host = 1;
 	}
 	spin_unlock_irqrestore(&host->lock, flags);
 	return claimed_host;
 }
+EXPORT_SYMBOL(mmc_try_claim_host);
 
 static void mmc_do_release_host(struct mmc_host *host)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&host->lock, flags);
-	host->claimed = 0;
-	spin_unlock_irqrestore(&host->lock, flags);
-
-	wake_up(&host->wq);
+	if (--host->claim_cnt) {
+		/* Release for nested claim */
+		spin_unlock_irqrestore(&host->lock, flags);
+	} else {
+		host->claimed = 0;
+		host->claimer = NULL;
+		spin_unlock_irqrestore(&host->lock, flags);
+		wake_up(&host->wq);
+	}
 }
 
 void mmc_host_deeper_disable(struct work_struct *work)
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 7ac8b500d55c..e4898e9eeb59 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -139,6 +139,7 @@ extern unsigned int mmc_align_data_size(struct mmc_card *, unsigned int);
 
 extern int __mmc_claim_host(struct mmc_host *host, atomic_t *abort);
 extern void mmc_release_host(struct mmc_host *host);
+extern int mmc_try_claim_host(struct mmc_host *host);
 
 /**
  *	mmc_claim_host - exclusively claim a host
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 338a9b3d51e4..631a2fea5264 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -182,6 +182,8 @@ struct mmc_host {
 	struct mmc_card		*card;		/* device attached to this host */
 
 	wait_queue_head_t	wq;
+	struct task_struct	*claimer;	/* task that has host claimed */
+	int			claim_cnt;	/* "claim" nesting count */
 
 	struct delayed_work	detect;
 
-- 
cgit v1.2.3


From 9feae246963c648b212abad0f0eb8938de5f5fe5 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:32 -0700
Subject: mmc: add MMC_CAP_NONREMOVABLE host capability

eMMC's are not removable, so unsafe resume is OK always.

To permit this a new host capability MMC_CAP_NONREMOVABLE has been added
and suspend / resume updated accordingly.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/mmc.c   | 41 ++++++++++++++++++++++++++++++++++-------
 drivers/mmc/core/sd.c    | 41 ++++++++++++++++++++++++++++++++++-------
 include/linux/mmc/host.h |  1 +
 3 files changed, 69 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 2fb9d5f271ea..995db1853a81 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -507,8 +507,6 @@ static void mmc_detect(struct mmc_host *host)
 	}
 }
 
-#ifdef CONFIG_MMC_UNSAFE_RESUME
-
 /*
  * Suspend callback from host.
  */
@@ -551,20 +549,49 @@ static void mmc_resume(struct mmc_host *host)
 
 }
 
-#else
+#ifdef CONFIG_MMC_UNSAFE_RESUME
 
-#define mmc_suspend NULL
-#define mmc_resume NULL
+static const struct mmc_bus_ops mmc_ops = {
+	.remove = mmc_remove,
+	.detect = mmc_detect,
+	.suspend = mmc_suspend,
+	.resume = mmc_resume,
+};
 
-#endif
+static void mmc_attach_bus_ops(struct mmc_host *host)
+{
+	mmc_attach_bus(host, &mmc_ops);
+}
+
+#else
 
 static const struct mmc_bus_ops mmc_ops = {
+	.remove = mmc_remove,
+	.detect = mmc_detect,
+	.suspend = NULL,
+	.resume = NULL,
+};
+
+static const struct mmc_bus_ops mmc_ops_unsafe = {
 	.remove = mmc_remove,
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
 	.resume = mmc_resume,
 };
 
+static void mmc_attach_bus_ops(struct mmc_host *host)
+{
+	const struct mmc_bus_ops *bus_ops;
+
+	if (host->caps & MMC_CAP_NONREMOVABLE)
+		bus_ops = &mmc_ops_unsafe;
+	else
+		bus_ops = &mmc_ops;
+	mmc_attach_bus(host, bus_ops);
+}
+
+#endif
+
 /*
  * Starting point for MMC card init.
  */
@@ -575,7 +602,7 @@ int mmc_attach_mmc(struct mmc_host *host, u32 ocr)
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
 
-	mmc_attach_bus(host, &mmc_ops);
+	mmc_attach_bus_ops(host);
 
 	/*
 	 * We need to get OCR a different way for SPI.
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 7ad646fe077e..92fa9dceca79 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -561,8 +561,6 @@ static void mmc_sd_detect(struct mmc_host *host)
 	}
 }
 
-#ifdef CONFIG_MMC_UNSAFE_RESUME
-
 /*
  * Suspend callback from host.
  */
@@ -605,20 +603,49 @@ static void mmc_sd_resume(struct mmc_host *host)
 
 }
 
-#else
+#ifdef CONFIG_MMC_UNSAFE_RESUME
 
-#define mmc_sd_suspend NULL
-#define mmc_sd_resume NULL
+static const struct mmc_bus_ops mmc_sd_ops = {
+	.remove = mmc_sd_remove,
+	.detect = mmc_sd_detect,
+	.suspend = mmc_sd_suspend,
+	.resume = mmc_sd_resume,
+};
 
-#endif
+static void mmc_sd_attach_bus_ops(struct mmc_host *host)
+{
+	mmc_attach_bus(host, &mmc_sd_ops);
+}
+
+#else
 
 static const struct mmc_bus_ops mmc_sd_ops = {
+	.remove = mmc_sd_remove,
+	.detect = mmc_sd_detect,
+	.suspend = NULL,
+	.resume = NULL,
+};
+
+static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
 	.remove = mmc_sd_remove,
 	.detect = mmc_sd_detect,
 	.suspend = mmc_sd_suspend,
 	.resume = mmc_sd_resume,
 };
 
+static void mmc_sd_attach_bus_ops(struct mmc_host *host)
+{
+	const struct mmc_bus_ops *bus_ops;
+
+	if (host->caps & MMC_CAP_NONREMOVABLE)
+		bus_ops = &mmc_sd_ops_unsafe;
+	else
+		bus_ops = &mmc_sd_ops;
+	mmc_attach_bus(host, bus_ops);
+}
+
+#endif
+
 /*
  * Starting point for SD card init.
  */
@@ -629,7 +656,7 @@ int mmc_attach_sd(struct mmc_host *host, u32 ocr)
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
 
-	mmc_attach_bus(host, &mmc_sd_ops);
+	mmc_sd_attach_bus_ops(host);
 
 	/*
 	 * We need to get OCR a different way for SPI.
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 631a2fea5264..bb867d2c26bd 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -148,6 +148,7 @@ struct mmc_host {
 #define MMC_CAP_NEEDS_POLL	(1 << 5)	/* Needs polling for card-detection */
 #define MMC_CAP_8_BIT_DATA	(1 << 6)	/* Can the host do 8 bit transfers */
 #define MMC_CAP_DISABLE		(1 << 7)	/* Can the host be disabled */
+#define MMC_CAP_NONREMOVABLE	(1 << 8)	/* Nonremovable e.g. eMMC */
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
-- 
cgit v1.2.3


From eae1aeeed852aae37621b82a9e7f6c05096a18fd Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:33 -0700
Subject: mmc: add ability to save power by powering off cards

Power can be saved by powering off cards that are not in use.  This is
similar to suspend / resume except it is under the control of the driver,
and does not require any power management support.  It can only be used
when the driver can monitor whether the card is removed, otherwise it is
unsafe.  This is possible because, unlike suspend, the driver still
receives card detect and / or cover switch interrupts.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/core.c  | 34 ++++++++++++++++++++++++++++++++++
 drivers/mmc/core/core.h  |  2 ++
 drivers/mmc/core/mmc.c   | 11 +++++++++++
 drivers/mmc/core/sd.c    | 11 +++++++++++
 include/linux/mmc/host.h |  3 +++
 5 files changed, 61 insertions(+)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 02f2b1871a38..be1fc013fbe9 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1151,6 +1151,40 @@ void mmc_stop_host(struct mmc_host *host)
 	mmc_power_off(host);
 }
 
+void mmc_power_save_host(struct mmc_host *host)
+{
+	mmc_bus_get(host);
+
+	if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
+		mmc_bus_put(host);
+		return;
+	}
+
+	if (host->bus_ops->power_save)
+		host->bus_ops->power_save(host);
+
+	mmc_bus_put(host);
+
+	mmc_power_off(host);
+}
+EXPORT_SYMBOL(mmc_power_save_host);
+
+void mmc_power_restore_host(struct mmc_host *host)
+{
+	mmc_bus_get(host);
+
+	if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
+		mmc_bus_put(host);
+		return;
+	}
+
+	mmc_power_up(host);
+	host->bus_ops->power_restore(host);
+
+	mmc_bus_put(host);
+}
+EXPORT_SYMBOL(mmc_power_restore_host);
+
 #ifdef CONFIG_PM
 
 /**
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index c819effa1032..f7eb4c4ca014 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -20,6 +20,8 @@ struct mmc_bus_ops {
 	void (*detect)(struct mmc_host *);
 	void (*suspend)(struct mmc_host *);
 	void (*resume)(struct mmc_host *);
+	void (*power_save)(struct mmc_host *);
+	void (*power_restore)(struct mmc_host *);
 };
 
 void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 995db1853a81..27e842df6a6f 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -549,6 +549,14 @@ static void mmc_resume(struct mmc_host *host)
 
 }
 
+static void mmc_power_restore(struct mmc_host *host)
+{
+	host->card->state &= ~MMC_STATE_HIGHSPEED;
+	mmc_claim_host(host);
+	mmc_init_card(host, host->ocr, host->card);
+	mmc_release_host(host);
+}
+
 #ifdef CONFIG_MMC_UNSAFE_RESUME
 
 static const struct mmc_bus_ops mmc_ops = {
@@ -556,6 +564,7 @@ static const struct mmc_bus_ops mmc_ops = {
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
 	.resume = mmc_resume,
+	.power_restore = mmc_power_restore,
 };
 
 static void mmc_attach_bus_ops(struct mmc_host *host)
@@ -570,6 +579,7 @@ static const struct mmc_bus_ops mmc_ops = {
 	.detect = mmc_detect,
 	.suspend = NULL,
 	.resume = NULL,
+	.power_restore = mmc_power_restore,
 };
 
 static const struct mmc_bus_ops mmc_ops_unsafe = {
@@ -577,6 +587,7 @@ static const struct mmc_bus_ops mmc_ops_unsafe = {
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
 	.resume = mmc_resume,
+	.power_restore = mmc_power_restore,
 };
 
 static void mmc_attach_bus_ops(struct mmc_host *host)
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 92fa9dceca79..222a60928cdb 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -603,6 +603,14 @@ static void mmc_sd_resume(struct mmc_host *host)
 
 }
 
+static void mmc_sd_power_restore(struct mmc_host *host)
+{
+	host->card->state &= ~MMC_STATE_HIGHSPEED;
+	mmc_claim_host(host);
+	mmc_sd_init_card(host, host->ocr, host->card);
+	mmc_release_host(host);
+}
+
 #ifdef CONFIG_MMC_UNSAFE_RESUME
 
 static const struct mmc_bus_ops mmc_sd_ops = {
@@ -610,6 +618,7 @@ static const struct mmc_bus_ops mmc_sd_ops = {
 	.detect = mmc_sd_detect,
 	.suspend = mmc_sd_suspend,
 	.resume = mmc_sd_resume,
+	.power_restore = mmc_sd_power_restore,
 };
 
 static void mmc_sd_attach_bus_ops(struct mmc_host *host)
@@ -624,6 +633,7 @@ static const struct mmc_bus_ops mmc_sd_ops = {
 	.detect = mmc_sd_detect,
 	.suspend = NULL,
 	.resume = NULL,
+	.power_restore = mmc_sd_power_restore,
 };
 
 static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
@@ -631,6 +641,7 @@ static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
 	.detect = mmc_sd_detect,
 	.suspend = mmc_sd_suspend,
 	.resume = mmc_sd_resume,
+	.power_restore = mmc_sd_power_restore,
 };
 
 static void mmc_sd_attach_bus_ops(struct mmc_host *host)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index bb867d2c26bd..c1cbe598d470 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -223,6 +223,9 @@ static inline void *mmc_priv(struct mmc_host *host)
 extern int mmc_suspend_host(struct mmc_host *, pm_message_t);
 extern int mmc_resume_host(struct mmc_host *);
 
+extern void mmc_power_save_host(struct mmc_host *host);
+extern void mmc_power_restore_host(struct mmc_host *host);
+
 extern void mmc_detect_change(struct mmc_host *, unsigned long delay);
 extern void mmc_request_done(struct mmc_host *, struct mmc_request *);
 
-- 
cgit v1.2.3


From b1ebe38456f7fe61a88af2844361e763ac6ea5ae Mon Sep 17 00:00:00 2001
From: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Date: Tue, 22 Sep 2009 16:44:34 -0700
Subject: mmc: add mmc card sleep and awake support

Add support for the new MMC command SLEEP_AWAKE.

Signed-off-by: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/core.c    | 40 ++++++++++++++++++++++++++++++++++
 drivers/mmc/core/core.h    |  2 ++
 drivers/mmc/core/mmc.c     | 54 +++++++++++++++++++++++++++++++++++++++++-----
 drivers/mmc/core/mmc_ops.c | 36 +++++++++++++++++++++++++++++++
 drivers/mmc/core/mmc_ops.h |  1 +
 include/linux/mmc/card.h   |  2 ++
 include/linux/mmc/host.h   |  5 +++++
 include/linux/mmc/mmc.h    |  2 ++
 8 files changed, 137 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index be1fc013fbe9..828e60ea528c 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1185,6 +1185,46 @@ void mmc_power_restore_host(struct mmc_host *host)
 }
 EXPORT_SYMBOL(mmc_power_restore_host);
 
+int mmc_card_awake(struct mmc_host *host)
+{
+	int err = -ENOSYS;
+
+	mmc_bus_get(host);
+
+	if (host->bus_ops && !host->bus_dead && host->bus_ops->awake)
+		err = host->bus_ops->awake(host);
+
+	mmc_bus_put(host);
+
+	return err;
+}
+EXPORT_SYMBOL(mmc_card_awake);
+
+int mmc_card_sleep(struct mmc_host *host)
+{
+	int err = -ENOSYS;
+
+	mmc_bus_get(host);
+
+	if (host->bus_ops && !host->bus_dead && host->bus_ops->awake)
+		err = host->bus_ops->sleep(host);
+
+	mmc_bus_put(host);
+
+	return err;
+}
+EXPORT_SYMBOL(mmc_card_sleep);
+
+int mmc_card_can_sleep(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+
+	if (card && mmc_card_mmc(card) && card->ext_csd.rev >= 3)
+		return 1;
+	return 0;
+}
+EXPORT_SYMBOL(mmc_card_can_sleep);
+
 #ifdef CONFIG_PM
 
 /**
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index f7eb4c4ca014..c386348f5f73 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -16,6 +16,8 @@
 #define MMC_CMD_RETRIES        3
 
 struct mmc_bus_ops {
+	int (*awake)(struct mmc_host *);
+	int (*sleep)(struct mmc_host *);
 	void (*remove)(struct mmc_host *);
 	void (*detect)(struct mmc_host *);
 	void (*suspend)(struct mmc_host *);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 27e842df6a6f..e0bfa9515c8a 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -160,7 +160,6 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 {
 	int err;
 	u8 *ext_csd;
-	unsigned int ext_csd_struct;
 
 	BUG_ON(!card);
 
@@ -207,16 +206,16 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 		goto out;
 	}
 
-	ext_csd_struct = ext_csd[EXT_CSD_REV];
-	if (ext_csd_struct > 3) {
+	card->ext_csd.rev = ext_csd[EXT_CSD_REV];
+	if (card->ext_csd.rev > 3) {
 		printk(KERN_ERR "%s: unrecognised EXT_CSD structure "
 			"version %d\n", mmc_hostname(card->host),
-			ext_csd_struct);
+			card->ext_csd.rev);
 		err = -EINVAL;
 		goto out;
 	}
 
-	if (ext_csd_struct >= 2) {
+	if (card->ext_csd.rev >= 2) {
 		card->ext_csd.sectors =
 			ext_csd[EXT_CSD_SEC_CNT + 0] << 0 |
 			ext_csd[EXT_CSD_SEC_CNT + 1] << 8 |
@@ -241,6 +240,15 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 		goto out;
 	}
 
+	if (card->ext_csd.rev >= 3) {
+		u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT];
+
+		/* Sleep / awake timeout in 100ns units */
+		if (sa_shift > 0 && sa_shift <= 0x17)
+			card->ext_csd.sa_timeout =
+					1 << ext_csd[EXT_CSD_S_A_TIMEOUT];
+	}
+
 out:
 	kfree(ext_csd);
 
@@ -557,9 +565,41 @@ static void mmc_power_restore(struct mmc_host *host)
 	mmc_release_host(host);
 }
 
+static int mmc_sleep(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+	int err = -ENOSYS;
+
+	if (card && card->ext_csd.rev >= 3) {
+		err = mmc_card_sleepawake(host, 1);
+		if (err < 0)
+			pr_debug("%s: Error %d while putting card into sleep",
+				 mmc_hostname(host), err);
+	}
+
+	return err;
+}
+
+static int mmc_awake(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+	int err = -ENOSYS;
+
+	if (card && card->ext_csd.rev >= 3) {
+		err = mmc_card_sleepawake(host, 0);
+		if (err < 0)
+			pr_debug("%s: Error %d while awaking sleeping card",
+				 mmc_hostname(host), err);
+	}
+
+	return err;
+}
+
 #ifdef CONFIG_MMC_UNSAFE_RESUME
 
 static const struct mmc_bus_ops mmc_ops = {
+	.awake = mmc_awake,
+	.sleep = mmc_sleep,
 	.remove = mmc_remove,
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
@@ -575,6 +615,8 @@ static void mmc_attach_bus_ops(struct mmc_host *host)
 #else
 
 static const struct mmc_bus_ops mmc_ops = {
+	.awake = mmc_awake,
+	.sleep = mmc_sleep,
 	.remove = mmc_remove,
 	.detect = mmc_detect,
 	.suspend = NULL,
@@ -583,6 +625,8 @@ static const struct mmc_bus_ops mmc_ops = {
 };
 
 static const struct mmc_bus_ops mmc_ops_unsafe = {
+	.awake = mmc_awake,
+	.sleep = mmc_sleep,
 	.remove = mmc_remove,
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 34ce2703d29a..355c6042cf65 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -57,6 +57,42 @@ int mmc_deselect_cards(struct mmc_host *host)
 	return _mmc_select_card(host, NULL);
 }
 
+int mmc_card_sleepawake(struct mmc_host *host, int sleep)
+{
+	struct mmc_command cmd;
+	struct mmc_card *card = host->card;
+	int err;
+
+	if (sleep)
+		mmc_deselect_cards(host);
+
+	memset(&cmd, 0, sizeof(struct mmc_command));
+
+	cmd.opcode = MMC_SLEEP_AWAKE;
+	cmd.arg = card->rca << 16;
+	if (sleep)
+		cmd.arg |= 1 << 15;
+
+	cmd.flags = MMC_RSP_R1B | MMC_CMD_AC;
+	err = mmc_wait_for_cmd(host, &cmd, 0);
+	if (err)
+		return err;
+
+	/*
+	 * If the host does not wait while the card signals busy, then we will
+	 * will have to wait the sleep/awake timeout.  Note, we cannot use the
+	 * SEND_STATUS command to poll the status because that command (and most
+	 * others) is invalid while the card sleeps.
+	 */
+	if (!(host->caps & MMC_CAP_WAIT_WHILE_BUSY))
+		mmc_delay(DIV_ROUND_UP(card->ext_csd.sa_timeout, 10000));
+
+	if (!sleep)
+		err = mmc_select_card(card);
+
+	return err;
+}
+
 int mmc_go_idle(struct mmc_host *host)
 {
 	int err;
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 17854bf7cf0d..653eb8e84178 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -25,6 +25,7 @@ int mmc_send_status(struct mmc_card *card, u32 *status);
 int mmc_send_cid(struct mmc_host *host, u32 *cid);
 int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp);
 int mmc_spi_set_crc(struct mmc_host *host, int use_crc);
+int mmc_card_sleepawake(struct mmc_host *host, int sleep);
 
 #endif
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 403aa505f27e..58f59174c64b 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -40,6 +40,8 @@ struct mmc_csd {
 };
 
 struct mmc_ext_csd {
+	u8			rev;
+	unsigned int		sa_timeout;		/* Units: 100ns */
 	unsigned int		hs_max_dtr;
 	unsigned int		sectors;
 };
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index c1cbe598d470..81bb42358595 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -149,6 +149,7 @@ struct mmc_host {
 #define MMC_CAP_8_BIT_DATA	(1 << 6)	/* Can the host do 8 bit transfers */
 #define MMC_CAP_DISABLE		(1 << 7)	/* Can the host be disabled */
 #define MMC_CAP_NONREMOVABLE	(1 << 8)	/* Nonremovable e.g. eMMC */
+#define MMC_CAP_WAIT_WHILE_BUSY	(1 << 9)	/* Waits while card is busy */
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
@@ -240,6 +241,10 @@ struct regulator;
 int mmc_regulator_get_ocrmask(struct regulator *supply);
 int mmc_regulator_set_ocr(struct regulator *supply, unsigned short vdd_bit);
 
+int mmc_card_awake(struct mmc_host *host);
+int mmc_card_sleep(struct mmc_host *host);
+int mmc_card_can_sleep(struct mmc_host *host);
+
 int mmc_host_enable(struct mmc_host *host);
 int mmc_host_disable(struct mmc_host *host);
 int mmc_host_lazy_disable(struct mmc_host *host);
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 14b81f3e5232..b2b40951c16c 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -31,6 +31,7 @@
 #define MMC_ALL_SEND_CID          2   /* bcr                     R2  */
 #define MMC_SET_RELATIVE_ADDR     3   /* ac   [31:16] RCA        R1  */
 #define MMC_SET_DSR               4   /* bc   [31:16] RCA            */
+#define MMC_SLEEP_AWAKE		  5   /* ac   [31:16] RCA 15:flg R1b */
 #define MMC_SWITCH                6   /* ac   [31:0] See below   R1b */
 #define MMC_SELECT_CARD           7   /* ac   [31:16] RCA        R1  */
 #define MMC_SEND_EXT_CSD          8   /* adtc                    R1  */
@@ -254,6 +255,7 @@ struct _mmc_csd {
 #define EXT_CSD_CARD_TYPE	196	/* RO */
 #define EXT_CSD_REV		192	/* RO */
 #define EXT_CSD_SEC_CNT		212	/* RO, 4 bytes */
+#define EXT_CSD_S_A_TIMEOUT	217
 
 /*
  * EXT_CSD field definitions
-- 
cgit v1.2.3


From ef0b27d4ccacac32afc3d1c0e8a95e4091dfbc8c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:37 -0700
Subject: mmc: check status after MMC SWITCH command

According to the standard, the SWITCH command should be followed by a
SEND_STATUS command to check for errors.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/mmc.c     | 24 ++++++++++++++++++------
 drivers/mmc/core/mmc_ops.c | 23 +++++++++++++++++++++++
 include/linux/mmc/mmc.h    |  1 +
 3 files changed, 42 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index e0bfa9515c8a..a6b5fe9d3969 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -416,12 +416,17 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		(host->caps & MMC_CAP_MMC_HIGHSPEED)) {
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 			EXT_CSD_HS_TIMING, 1);
-		if (err)
+		if (err && err != -EBADMSG)
 			goto free_card;
 
-		mmc_card_set_highspeed(card);
-
-		mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+		if (err) {
+			printk(KERN_WARNING "%s: switch to highspeed failed\n",
+			       mmc_hostname(card->host));
+			err = 0;
+		} else {
+			mmc_card_set_highspeed(card);
+			mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+		}
 	}
 
 	/*
@@ -456,10 +461,17 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 				 EXT_CSD_BUS_WIDTH, ext_csd_bit);
 
-		if (err)
+		if (err && err != -EBADMSG)
 			goto free_card;
 
-		mmc_set_bus_width(card->host, bus_width);
+		if (err) {
+			printk(KERN_WARNING "%s: switch to bus width %d "
+			       "failed\n", mmc_hostname(card->host),
+			       1 << bus_width);
+			err = 0;
+		} else {
+			mmc_set_bus_width(card->host, bus_width);
+		}
 	}
 
 	if (!oldcard)
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 355c6042cf65..d2cb5c634392 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -390,6 +390,7 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value)
 {
 	int err;
 	struct mmc_command cmd;
+	u32 status;
 
 	BUG_ON(!card);
 	BUG_ON(!card->host);
@@ -407,6 +408,28 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value)
 	if (err)
 		return err;
 
+	/* Must check status to be sure of no errors */
+	do {
+		err = mmc_send_status(card, &status);
+		if (err)
+			return err;
+		if (card->host->caps & MMC_CAP_WAIT_WHILE_BUSY)
+			break;
+		if (mmc_host_is_spi(card->host))
+			break;
+	} while (R1_CURRENT_STATE(status) == 7);
+
+	if (mmc_host_is_spi(card->host)) {
+		if (status & R1_SPI_ILLEGAL_COMMAND)
+			return -EBADMSG;
+	} else {
+		if (status & 0xFDFFA000)
+			printk(KERN_WARNING "%s: unexpected status %#x after "
+			       "switch", mmc_hostname(card->host), status);
+		if (status & R1_SWITCH_ERROR)
+			return -EBADMSG;
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index b2b40951c16c..c02c8db73701 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -128,6 +128,7 @@
 #define R1_STATUS(x)            (x & 0xFFFFE000)
 #define R1_CURRENT_STATE(x)	((x & 0x00001E00) >> 9)	/* sx, b (4 bits) */
 #define R1_READY_FOR_DATA	(1 << 8)	/* sx, a */
+#define R1_SWITCH_ERROR		(1 << 7)	/* sx, c */
 #define R1_APP_CMD		(1 << 5)	/* sr, c */
 
 /*
-- 
cgit v1.2.3


From 006ebd5de13854d6250eecc76866bbfad1ff7daf Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 22 Sep 2009 16:45:07 -0700
Subject: sdio: add CD disable support

Add support to disconnect the pull-up resistor on CD/DAT[3] (pin 1)
of the card. This may be desired on certain setups of boards,
controllers and embedded sdio devices which do not need the card's
pull-up. As a result, card detection is disabled and power is saved.

[akpm@linux-foundation.org: simplify sdio_disable_cd() a bit]
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Philip Langdale <philipl@overt.org>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/sdio.c  | 30 ++++++++++++++++++++++++++++++
 include/linux/mmc/card.h |  3 ++-
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 6f221dc029ad..2d24a123f0b0 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -164,6 +164,29 @@ static int sdio_enable_wide(struct mmc_card *card)
 	return 0;
 }
 
+/*
+ * If desired, disconnect the pull-up resistor on CD/DAT[3] (pin 1)
+ * of the card. This may be required on certain setups of boards,
+ * controllers and embedded sdio device which do not need the card's
+ * pull-up. As a result, card detection is disabled and power is saved.
+ */
+static int sdio_disable_cd(struct mmc_card *card)
+{
+	int ret;
+	u8 ctrl;
+
+	if (!card->cccr.disable_cd)
+		return 0;
+
+	ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_IF, 0, &ctrl);
+	if (ret)
+		return ret;
+
+	ctrl |= SDIO_BUS_CD_DISABLE;
+
+	return mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_IF, ctrl, NULL);
+}
+
 /*
  * Test if the card supports high-speed mode and, if so, switch to it.
  */
@@ -384,6 +407,13 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
 	if (err)
 		goto remove;
 
+	/*
+	 * If needed, disconnect card detection pull-up resistor.
+	 */
+	err = sdio_disable_cd(card);
+	if (err)
+		goto remove;
+
 	/*
 	 * Initialize (but don't add) all present functions.
 	 */
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 58f59174c64b..00db39cceadc 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -64,7 +64,8 @@ struct sdio_cccr {
 				low_speed:1,
 				wide_bus:1,
 				high_power:1,
-				high_speed:1;
+				high_speed:1,
+				disable_cd:1;
 };
 
 struct sdio_cis {
-- 
cgit v1.2.3


From 7c979ec7135d96bbff34790bf4b85a8508ede7fc Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 22 Sep 2009 16:45:18 -0700
Subject: sdio: add MMC_QUIRK_LENIENT_FN0

Normally writes to SDIO function 0 outside the vendor specific CCCR
registers are prohibited.

To support embedded devices that require writes to SDIO function 0 outside
this range (e.g.  TI WL127x embedded sdio wifi device),
MMC_QUIRK_LENIENT_FN0 is introduced.

A card quirks field is added to `struct mmc_card' to support non-standard
devices (e.g.  embedded sdio devices).

[akpm@linux-foundation.org: code in C, not cpp!]
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/sdio_io.c | 2 +-
 include/linux/mmc/card.h   | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c
index f61fc2d4cd0a..f9aa8a7deffa 100644
--- a/drivers/mmc/core/sdio_io.c
+++ b/drivers/mmc/core/sdio_io.c
@@ -624,7 +624,7 @@ void sdio_f0_writeb(struct sdio_func *func, unsigned char b, unsigned int addr,
 
 	BUG_ON(!func);
 
-	if (addr < 0xF0 || addr > 0xFF) {
+	if ((addr < 0xF0 || addr > 0xFF) && (!mmc_card_lenient_fn0(func->card))) {
 		if (err_ret)
 			*err_ret = -EINVAL;
 		return;
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 00db39cceadc..2ee22e8af110 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -97,6 +97,8 @@ struct mmc_card {
 #define MMC_STATE_READONLY	(1<<1)		/* card is read-only */
 #define MMC_STATE_HIGHSPEED	(1<<2)		/* card is in high speed mode */
 #define MMC_STATE_BLOCKADDR	(1<<3)		/* card uses block-addressing */
+	unsigned int		quirks; 	/* card quirks */
+#define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 
 	u32			raw_cid[4];	/* raw card CID */
 	u32			raw_csd[4];	/* raw card CSD */
@@ -132,6 +134,11 @@ struct mmc_card {
 #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
 
+static inline int mmc_card_lenient_fn0(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_LENIENT_FN0;
+}
+
 #define mmc_card_name(c)	((c)->cid.prod_name)
 #define mmc_card_id(c)		(dev_name(&(c)->dev))
 
-- 
cgit v1.2.3


From 996ad5686c5f868e67557cc1bfcb2cfdde1a18b4 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Tue, 22 Sep 2009 16:45:30 -0700
Subject: mmc: make SDIO device/driver struct accessors public

Especially with the PM framework, those are quite handy to have in driver
code too.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/sdio_bus.c   | 3 ---
 include/linux/mmc/sdio_func.h | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 46284b527397..d37464e296a5 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -20,9 +20,6 @@
 #include "sdio_cis.h"
 #include "sdio_bus.h"
 
-#define dev_to_sdio_func(d)	container_of(d, struct sdio_func, dev)
-#define to_sdio_driver(d)      container_of(d, struct sdio_driver, drv)
-
 /* show configuration fields */
 #define sdio_config_attr(field, format_string)				\
 static ssize_t								\
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h
index 451bdfc85830..ac3ab683fec6 100644
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -67,6 +67,7 @@ struct sdio_func {
 
 #define sdio_get_drvdata(f)	dev_get_drvdata(&(f)->dev)
 #define sdio_set_drvdata(f,d)	dev_set_drvdata(&(f)->dev, d)
+#define dev_to_sdio_func(d)	container_of(d, struct sdio_func, dev)
 
 /*
  * SDIO function device driver
@@ -81,6 +82,8 @@ struct sdio_driver {
 	struct device_driver drv;
 };
 
+#define to_sdio_driver(d)	container_of(d, struct sdio_driver, drv)
+
 /**
  * SDIO_DEVICE - macro used to describe a specific SDIO device
  * @vend: the 16 bit manufacturer code
-- 
cgit v1.2.3


From d899bf7b55f503ba7d3d07ed27c3a37e270fa7db Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Tue, 22 Sep 2009 16:45:40 -0700
Subject: procfs: provide stack information for threads

A patch to give a better overview of the userland application stack usage,
especially for embedded linux.

Currently you are only able to dump the main process/thread stack usage
which is showed in /proc/pid/status by the "VmStk" Value.  But you get no
information about the consumed stack memory of the the threads.

There is an enhancement in the /proc/<pid>/{task/*,}/*maps and which marks
the vm mapping where the thread stack pointer reside with "[thread stack
xxxxxxxx]".  xxxxxxxx is the maximum size of stack.  This is a value
information, because libpthread doesn't set the start of the stack to the
top of the mapped area, depending of the pthread usage.

A sample output of /proc/<pid>/task/<tid>/maps looks like:

08048000-08049000 r-xp 00000000 03:00 8312       /opt/z
08049000-0804a000 rw-p 00001000 03:00 8312       /opt/z
0804a000-0806b000 rw-p 00000000 00:00 0          [heap]
a7d12000-a7d13000 ---p 00000000 00:00 0
a7d13000-a7f13000 rw-p 00000000 00:00 0          [thread stack: 001ff4b4]
a7f13000-a7f14000 ---p 00000000 00:00 0
a7f14000-a7f36000 rw-p 00000000 00:00 0
a7f36000-a8069000 r-xp 00000000 03:00 4222       /lib/libc.so.6
a8069000-a806b000 r--p 00133000 03:00 4222       /lib/libc.so.6
a806b000-a806c000 rw-p 00135000 03:00 4222       /lib/libc.so.6
a806c000-a806f000 rw-p 00000000 00:00 0
a806f000-a8083000 r-xp 00000000 03:00 14462      /lib/libpthread.so.0
a8083000-a8084000 r--p 00013000 03:00 14462      /lib/libpthread.so.0
a8084000-a8085000 rw-p 00014000 03:00 14462      /lib/libpthread.so.0
a8085000-a8088000 rw-p 00000000 00:00 0
a8088000-a80a4000 r-xp 00000000 03:00 8317       /lib/ld-linux.so.2
a80a4000-a80a5000 r--p 0001b000 03:00 8317       /lib/ld-linux.so.2
a80a5000-a80a6000 rw-p 0001c000 03:00 8317       /lib/ld-linux.so.2
afaf5000-afb0a000 rw-p 00000000 00:00 0          [stack]
ffffe000-fffff000 r-xp 00000000 00:00 0          [vdso]

Also there is a new entry "stack usage" in /proc/<pid>/{task/*,}/status
which will you give the current stack usage in kb.

A sample output of /proc/self/status looks like:

Name:	cat
State:	R (running)
Tgid:	507
Pid:	507
.
.
.
CapBnd:	fffffffffffffeff
voluntary_ctxt_switches:	0
nonvoluntary_ctxt_switches:	0
Stack usage:	12 kB

I also fixed stack base address in /proc/<pid>/{task/*,}/stat to the base
address of the associated thread stack and not the one of the main
process.  This makes more sense.

[akpm@linux-foundation.org: fs/proc/array.c now needs walk_page_range()]
Signed-off-by: Stefani Seibold <stefani@seibold.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  5 ++-
 fs/exec.c                          |  2 +
 fs/proc/array.c                    | 85 +++++++++++++++++++++++++++++++++++++-
 fs/proc/task_mmu.c                 | 19 +++++++++
 include/linux/sched.h              |  1 +
 kernel/fork.c                      |  2 +
 mm/Makefile                        |  4 +-
 7 files changed, 114 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 75988ba26a51..b5aee7838a00 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -176,6 +176,7 @@ read the file /proc/PID/status:
   CapBnd: ffffffffffffffff
   voluntary_ctxt_switches:        0
   nonvoluntary_ctxt_switches:     1
+  Stack usage:    12 kB
 
 This shows you nearly the same information you would get if you viewed it with
 the ps  command.  In  fact,  ps  uses  the  proc  file  system  to  obtain its
@@ -229,6 +230,7 @@ Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
  Mems_allowed_list           Same as previous, but in "list format"
  voluntary_ctxt_switches     number of voluntary context switches
  nonvoluntary_ctxt_switches  number of non voluntary context switches
+ Stack usage:                stack usage high water mark (round up to page size)
 ..............................................................................
 
 Table 1-3: Contents of the statm files (as of 2.6.8-rc3)
@@ -307,7 +309,7 @@ address           perms offset  dev   inode      pathname
 08049000-0804a000 rw-p 00001000 03:00 8312       /opt/test
 0804a000-0806b000 rw-p 00000000 00:00 0          [heap]
 a7cb1000-a7cb2000 ---p 00000000 00:00 0
-a7cb2000-a7eb2000 rw-p 00000000 00:00 0
+a7cb2000-a7eb2000 rw-p 00000000 00:00 0          [threadstack:001ff4b4]
 a7eb2000-a7eb3000 ---p 00000000 00:00 0
 a7eb3000-a7ed5000 rw-p 00000000 00:00 0
 a7ed5000-a8008000 r-xp 00000000 03:00 4222       /lib/libc.so.6
@@ -343,6 +345,7 @@ is not associated with a file:
  [stack]                  = the stack of the main process
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
+ [threadstack:xxxxxxxx]   = the stack of the thread, xxxxxxxx is the stack size
 
  or if empty, the mapping is anonymous.
 
diff --git a/fs/exec.c b/fs/exec.c
index 69bb9d899791..5c833c18d0d4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1357,6 +1357,8 @@ int do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
+	current->stack_start = current->mm->start_stack;
+
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 725a650bbbb8..0c6bc602e6c4 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -82,6 +82,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/ptrace.h>
 #include <linux/tracehook.h>
+#include <linux/swapops.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -321,6 +322,87 @@ static inline void task_context_switch_counts(struct seq_file *m,
 			p->nivcsw);
 }
 
+struct stack_stats {
+	struct vm_area_struct *vma;
+	unsigned long	startpage;
+	unsigned long	usage;
+};
+
+static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr,
+				unsigned long end, struct mm_walk *walk)
+{
+	struct stack_stats *ss = walk->private;
+	struct vm_area_struct *vma = ss->vma;
+	pte_t *pte, ptent;
+	spinlock_t *ptl;
+	int ret = 0;
+
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	for (; addr != end; pte++, addr += PAGE_SIZE) {
+		ptent = *pte;
+
+#ifdef CONFIG_STACK_GROWSUP
+		if (pte_present(ptent) || is_swap_pte(ptent))
+			ss->usage = addr - ss->startpage + PAGE_SIZE;
+#else
+		if (pte_present(ptent) || is_swap_pte(ptent)) {
+			ss->usage = ss->startpage - addr + PAGE_SIZE;
+			pte++;
+			ret = 1;
+			break;
+		}
+#endif
+	}
+	pte_unmap_unlock(pte - 1, ptl);
+	cond_resched();
+	return ret;
+}
+
+static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma,
+				struct task_struct *task)
+{
+	struct stack_stats ss;
+	struct mm_walk stack_walk = {
+		.pmd_entry = stack_usage_pte_range,
+		.mm = vma->vm_mm,
+		.private = &ss,
+	};
+
+	if (!vma->vm_mm || is_vm_hugetlb_page(vma))
+		return 0;
+
+	ss.vma = vma;
+	ss.startpage = task->stack_start & PAGE_MASK;
+	ss.usage = 0;
+
+#ifdef CONFIG_STACK_GROWSUP
+	walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end,
+		&stack_walk);
+#else
+	walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE,
+		&stack_walk);
+#endif
+	return ss.usage;
+}
+
+static inline void task_show_stack_usage(struct seq_file *m,
+						struct task_struct *task)
+{
+	struct vm_area_struct	*vma;
+	struct mm_struct	*mm = get_task_mm(task);
+
+	if (mm) {
+		down_read(&mm->mmap_sem);
+		vma = find_vma(mm, task->stack_start);
+		if (vma)
+			seq_printf(m, "Stack usage:\t%lu kB\n",
+				get_stack_usage_in_bytes(vma, task) >> 10);
+
+		up_read(&mm->mmap_sem);
+		mmput(mm);
+	}
+}
+
 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
 			struct pid *pid, struct task_struct *task)
 {
@@ -340,6 +422,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
 	task_show_regs(m, task);
 #endif
 	task_context_switch_counts(m, task);
+	task_show_stack_usage(m, task);
 	return 0;
 }
 
@@ -481,7 +564,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		rsslim,
 		mm ? mm->start_code : 0,
 		mm ? mm->end_code : 0,
-		(permitted && mm) ? mm->start_stack : 0,
+		(permitted) ? task->stack_start : 0,
 		esp,
 		eip,
 		/* The signal information here is obsolete.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 366b1017a4f1..2a1bef9203c6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -243,6 +243,25 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 				} else if (vma->vm_start <= mm->start_stack &&
 					   vma->vm_end >= mm->start_stack) {
 					name = "[stack]";
+				} else {
+					unsigned long stack_start;
+					struct proc_maps_private *pmp;
+
+					pmp = m->private;
+					stack_start = pmp->task->stack_start;
+
+					if (vma->vm_start <= stack_start &&
+					    vma->vm_end >= stack_start) {
+						pad_len_spaces(m, len);
+						seq_printf(m,
+						 "[threadstack:%08lx]",
+#ifdef CONFIG_STACK_GROWSUP
+						 vma->vm_end - stack_start
+#else
+						 stack_start - vma->vm_start
+#endif
+						);
+					}
 				}
 			} else {
 				name = "[vdso]";
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6448bbc6406b..3cbc6c0be666 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1529,6 +1529,7 @@ struct task_struct {
 	/* bitmask of trace recursion */
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
+	unsigned long stack_start;
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/kernel/fork.c b/kernel/fork.c
index 7cf45812ce84..8f45b0ebdda7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1095,6 +1095,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	p->bts = NULL;
 
+	p->stack_start = stack_start;
+
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
 
diff --git a/mm/Makefile b/mm/Makefile
index 728a9fde49d1..88193d73cd1a 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,10 +11,10 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   maccess.o page_alloc.o page-writeback.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o mm_init.o mmu_context.o $(mmu-y)
+			   page_isolation.o mm_init.o mmu_context.o \
+			   pagewalk.o $(mmu-y)
 obj-y += init-mm.o
 
-obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
 obj-$(CONFIG_BOUNCE)	+= bounce.o
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_HAS_DMA)	+= dmapool.o
-- 
cgit v1.2.3


From 2ef43ec772551e975a6ea7cf22b59c84955aadf9 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:41 -0700
Subject: kcore: use usual list for kclist

This patchset is for /proc/kcore.  With this,

 - many per-arch hooks are removed.

 - /proc/kcore will know really valid physical memory area.

 - /proc/kcore will be aware of memory hotplug.

 - /proc/kcore will be architecture independent i.e.
   if an arch supports CONFIG_MMU, it can use /proc/kcore.
   (if the arch uses usual memory layout.)

This patch:

/proc/kcore uses its own list handling codes. It's better to use
generic list codes.

No changes in logic. just clean up.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/kcore.c         | 12 ++++++------
 include/linux/proc_fs.h |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 0cf8a24cf6c3..f9327e51ce99 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
+#include <linux/list.h>
 
 #define CORE_STR "CORE"
 
@@ -57,7 +58,7 @@ struct memelfnote
 	void *data;
 };
 
-static struct kcore_list *kclist;
+static LIST_HEAD(kclist_head);
 static DEFINE_RWLOCK(kclist_lock);
 
 void
@@ -67,8 +68,7 @@ kclist_add(struct kcore_list *new, void *addr, size_t size)
 	new->size = size;
 
 	write_lock(&kclist_lock);
-	new->next = kclist;
-	kclist = new;
+	list_add_tail(&new->list, &kclist_head);
 	write_unlock(&kclist_lock);
 }
 
@@ -80,7 +80,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
 	*nphdr = 1; /* PT_NOTE */
 	size = 0;
 
-	for (m=kclist; m; m=m->next) {
+	list_for_each_entry(m, &kclist_head, list) {
 		try = kc_vaddr_to_offset((size_t)m->addr + m->size);
 		if (try > size)
 			size = try;
@@ -192,7 +192,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 	nhdr->p_align	= 0;
 
 	/* setup ELF PT_LOAD program header for every area */
-	for (m=kclist; m; m=m->next) {
+	list_for_each_entry(m, &kclist_head, list) {
 		phdr = (struct elf_phdr *) bufp;
 		bufp += sizeof(struct elf_phdr);
 		offset += sizeof(struct elf_phdr);
@@ -317,7 +317,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 		struct kcore_list *m;
 
 		read_lock(&kclist_lock);
-		for (m=kclist; m; m=m->next) {
+		list_for_each_entry(m, &kclist_head, list) {
 			if (start >= m->addr && start < (m->addr+m->size))
 				break;
 		}
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index e6e77d31c418..0aff2a62eba9 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -79,7 +79,7 @@ struct proc_dir_entry {
 };
 
 struct kcore_list {
-	struct kcore_list *next;
+	struct list_head list;
 	unsigned long addr;
 	size_t size;
 };
-- 
cgit v1.2.3


From c30bb2a25fcfde6157e6154a32c14686fb0bedbe Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:43 -0700
Subject: kcore: add kclist types

Presently, kclist_add() only eats start address and size as its arguments.
Considering to make kclist dynamically reconfigulable, it's necessary to
know which kclists are for System RAM and which are not.

This patch add kclist types as
  KCORE_RAM
  KCORE_VMALLOC
  KCORE_TEXT
  KCORE_OTHER

This "type" is used in a patch following this for detecting KCORE_RAM.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/init.c       |  7 ++++---
 arch/mips/mm/init.c       |  7 ++++---
 arch/powerpc/mm/init_32.c |  4 ++--
 arch/powerpc/mm/init_64.c |  5 +++--
 arch/sh/mm/init.c         |  4 ++--
 arch/x86/mm/init_32.c     |  4 ++--
 arch/x86/mm/init_64.c     | 11 ++++++-----
 fs/proc/kcore.c           |  3 ++-
 include/linux/proc_fs.h   | 13 +++++++++++--
 9 files changed, 36 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 1d286244a562..f6a3c21a2826 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -639,9 +639,10 @@ mem_init (void)
 
 	high_memory = __va(max_low_pfn * PAGE_SIZE);
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE);
-	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
-	kclist_add(&kcore_kernel, _stext, _end - _stext);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE, KCORE_RAM);
+	kclist_add(&kcore_vmem, (void *)VMALLOC_START,
+			VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
+	kclist_add(&kcore_kernel, _stext, _end - _stext, KCORE_TEXT);
 
 	for_each_online_pgdat(pgdat)
 		if (pgdat->bdata->node_bootmem_map)
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 1f4ee4797a6e..f8661985bff6 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -409,11 +409,12 @@ void __init mem_init(void)
 	if ((unsigned long) &_text > (unsigned long) CKSEG0)
 		/* The -4 is a hack so that user tools don't have to handle
 		   the overflow.  */
-		kclist_add(&kcore_kseg0, (void *) CKSEG0, 0x80000000 - 4);
+		kclist_add(&kcore_kseg0, (void *) CKSEG0,
+				0x80000000 - 4, KCORE_TEXT);
 #endif
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START);
+		   VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 	       "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n",
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 3ef5084b90ca..e91add90ec54 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -268,11 +268,11 @@ static int __init setup_kcore(void)
 						size);
 		}
 
-		kclist_add(kcore_mem, __va(base), size);
+		kclist_add(kcore_mem, __va(base), size, KCORE_RAM);
 	}
 
 	kclist_add(&kcore_vmem, (void *)VMALLOC_START,
-		VMALLOC_END-VMALLOC_START);
+		VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	return 0;
 }
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 31582329cd67..9ee563101b56 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -128,10 +128,11 @@ static int __init setup_kcore(void)
 		if (!kcore_mem)
 			panic("%s: kmalloc failed\n", __func__);
 
-		kclist_add(kcore_mem, __va(base), size);
+		kclist_add(kcore_mem, __va(base), size, KCORE_RAM);
 	}
 
-	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
+	kclist_add(&kcore_vmem, (void *)VMALLOC_START,
+		VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	return 0;
 }
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index fabb7c6f48d2..ef56c9f9d7ba 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -226,9 +226,9 @@ void __init mem_init(void)
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END - VMALLOC_START);
+		   VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 	       "%dk data, %dk init)\n",
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index b49b4f67453d..2cbc40112932 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -886,9 +886,9 @@ void __init mem_init(void)
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START);
+		   VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 			"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 810bd31e7f5f..c05810b614fe 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -677,13 +677,14 @@ void __init mem_init(void)
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START);
-	kclist_add(&kcore_kernel, &_stext, _end - _stext);
-	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
+		   VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
+	kclist_add(&kcore_kernel, &_stext, _end - _stext, KCORE_TEXT);
+	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN,
+			KCORE_OTHER);
 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
-				 VSYSCALL_END - VSYSCALL_START);
+			 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 			 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index f9327e51ce99..659c1635db81 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -62,10 +62,11 @@ static LIST_HEAD(kclist_head);
 static DEFINE_RWLOCK(kclist_lock);
 
 void
-kclist_add(struct kcore_list *new, void *addr, size_t size)
+kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
 {
 	new->addr = (unsigned long)addr;
 	new->size = size;
+	new->type = type;
 
 	write_lock(&kclist_lock);
 	list_add_tail(&new->list, &kclist_head);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 0aff2a62eba9..bd7b840765a0 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -78,10 +78,18 @@ struct proc_dir_entry {
 	struct list_head pde_openers;	/* who did ->open, but not ->release */
 };
 
+enum kcore_type {
+	KCORE_TEXT,
+	KCORE_VMALLOC,
+	KCORE_RAM,
+	KCORE_OTHER,
+};
+
 struct kcore_list {
 	struct list_head list;
 	unsigned long addr;
 	size_t size;
+	int type;
 };
 
 struct vmcore {
@@ -233,11 +241,12 @@ static inline void dup_mm_exe_file(struct mm_struct *oldmm,
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
-static inline void kclist_add(struct kcore_list *new, void *addr, size_t size)
+static inline void
+kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
 {
 }
 #else
-extern void kclist_add(struct kcore_list *, void *, size_t);
+extern void kclist_add(struct kcore_list *, void *, size_t, int type);
 #endif
 
 union proc_op {
-- 
cgit v1.2.3


From 908eedc6168bd92e89f90d89fa389065a36358fa Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:46 -0700
Subject: walk system ram range
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Originally, walk_memory_resource() was introduced to traverse all memory
of "System RAM" for detecting memory hotplug/unplug range.  For doing so,
flags of IORESOUCE_MEM|IORESOURCE_BUSY was used and this was enough for
memory hotplug.

But for using other purpose, /proc/kcore, this may includes some firmware
area marked as IORESOURCE_BUSY | IORESOUCE_MEM.  This patch makes the
check strict to find out busy "System RAM".

Note: PPC64 keeps their own walk_memory_resouce(), which walk through
ppc64's lmb informaton.  Because old kclist_add() is called per lmb, this
patch makes no difference in behavior, finally.

And this patch removes CONFIG_MEMORY_HOTPLUG check from this function.
Because pfn_valid() just show "there is memmap or not* and cannot be used
for "there is physical memory or not", this function is useful in generic
to scan physical memory range.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Américo Wang <xiyou.wangcong@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/mm/mem.c                  |  6 +++---
 drivers/infiniband/hw/ehca/ehca_mrmw.c |  2 +-
 drivers/net/ehea/ehea_qmr.c            |  2 +-
 include/linux/ioport.h                 |  4 ++++
 include/linux/memory_hotplug.h         |  8 --------
 kernel/resource.c                      | 23 ++++++++++++++++-------
 mm/memory_hotplug.c                    |  6 +++---
 7 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0e5c59b995ef..59736317bf0e 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -143,8 +143,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
  * memory regions, find holes and callback for contiguous regions.
  */
 int
-walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
-			int (*func)(unsigned long, unsigned long, void *))
+walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
+		void *arg, int (*func)(unsigned long, unsigned long, void *))
 {
 	struct lmb_property res;
 	unsigned long pfn, len;
@@ -166,7 +166,7 @@ walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
 	}
 	return ret;
 }
-EXPORT_SYMBOL_GPL(walk_memory_resource);
+EXPORT_SYMBOL_GPL(walk_system_ram_range);
 
 /*
  * Initialize the bootmem system and give it all the memory we
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 7663a2a9f130..7550a534005c 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -2463,7 +2463,7 @@ int ehca_create_busmap(void)
 	int ret;
 
 	ehca_mr_len = 0;
-	ret = walk_memory_resource(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
+	ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
 				   ehca_create_busmap_callback);
 	return ret;
 }
diff --git a/drivers/net/ehea/ehea_qmr.c b/drivers/net/ehea/ehea_qmr.c
index 3747457f5e69..bc7c5b7abb88 100644
--- a/drivers/net/ehea/ehea_qmr.c
+++ b/drivers/net/ehea/ehea_qmr.c
@@ -751,7 +751,7 @@ int ehea_create_busmap(void)
 
 	mutex_lock(&ehea_busmap_mutex);
 	ehea_mr_len = 0;
-	ret = walk_memory_resource(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
+	ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
 				   ehea_create_busmap_callback);
 	mutex_unlock(&ehea_busmap_mutex);
 	return ret;
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 786e7b8cece9..83aa81297ea3 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -184,5 +184,9 @@ extern void __devm_release_region(struct device *dev, struct resource *parent,
 extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size);
 extern int iomem_is_exclusive(u64 addr);
 
+extern int
+walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
+		void *arg, int (*func)(unsigned long, unsigned long, void *));
+
 #endif /* __ASSEMBLY__ */
 #endif	/* _LINUX_IOPORT_H */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d95f72e79b82..fed969281a41 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -191,14 +191,6 @@ static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
 
 #endif /* ! CONFIG_MEMORY_HOTPLUG */
 
-/*
- * Walk through all memory which is registered as resource.
- * arg is (start_pfn, nr_pages, private_arg_pointer)
- */
-extern int walk_memory_resource(unsigned long start_pfn,
-			unsigned long nr_pages, void *arg,
-			int (*func)(unsigned long, unsigned long, void *));
-
 #ifdef CONFIG_MEMORY_HOTREMOVE
 
 extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
diff --git a/kernel/resource.c b/kernel/resource.c
index 78b087221c15..fb11a58b9594 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -223,13 +223,13 @@ int release_resource(struct resource *old)
 
 EXPORT_SYMBOL(release_resource);
 
-#if defined(CONFIG_MEMORY_HOTPLUG) && !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
+#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
 /*
  * Finds the lowest memory reosurce exists within [res->start.res->end)
- * the caller must specify res->start, res->end, res->flags.
+ * the caller must specify res->start, res->end, res->flags and "name".
  * If found, returns 0, res is overwritten, if not found, returns -1.
  */
-static int find_next_system_ram(struct resource *res)
+static int find_next_system_ram(struct resource *res, char *name)
 {
 	resource_size_t start, end;
 	struct resource *p;
@@ -245,6 +245,8 @@ static int find_next_system_ram(struct resource *res)
 		/* system ram is just marked as IORESOURCE_MEM */
 		if (p->flags != res->flags)
 			continue;
+		if (name && strcmp(p->name, name))
+			continue;
 		if (p->start > end) {
 			p = NULL;
 			break;
@@ -262,19 +264,26 @@ static int find_next_system_ram(struct resource *res)
 		res->end = p->end;
 	return 0;
 }
-int
-walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
-			int (*func)(unsigned long, unsigned long, void *))
+
+/*
+ * This function calls callback against all memory range of "System RAM"
+ * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
+ * Now, this function is only for "System RAM".
+ */
+int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
+		void *arg, int (*func)(unsigned long, unsigned long, void *))
 {
 	struct resource res;
 	unsigned long pfn, len;
 	u64 orig_end;
 	int ret = -1;
+
 	res.start = (u64) start_pfn << PAGE_SHIFT;
 	res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
 	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	orig_end = res.end;
-	while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) {
+	while ((res.start < res.end) &&
+		(find_next_system_ram(&res, "System RAM") >= 0)) {
 		pfn = (unsigned long)(res.start >> PAGE_SHIFT);
 		len = (unsigned long)((res.end + 1 - res.start) >> PAGE_SHIFT);
 		ret = (*func)(pfn, len, arg);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index efe3e0ec2e61..821dee596377 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -413,7 +413,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 	if (!populated_zone(zone))
 		need_zonelists_rebuild = 1;
 
-	ret = walk_memory_resource(pfn, nr_pages, &onlined_pages,
+	ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
 		online_pages_range);
 	if (ret) {
 		printk(KERN_DEBUG "online_pages %lx at %lx failed\n",
@@ -705,7 +705,7 @@ offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
 static void
 offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
 {
-	walk_memory_resource(start_pfn, end_pfn - start_pfn, NULL,
+	walk_system_ram_range(start_pfn, end_pfn - start_pfn, NULL,
 				offline_isolated_pages_cb);
 }
 
@@ -731,7 +731,7 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
 	long offlined = 0;
 	int ret;
 
-	ret = walk_memory_resource(start_pfn, end_pfn - start_pfn, &offlined,
+	ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, &offlined,
 			check_pages_isolated_cb);
 	if (ret < 0)
 		offlined = (long)ret;
-- 
cgit v1.2.3


From 26562c59fa9111ae3ea7b78045889662aac9e5ac Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:49 -0700
Subject: kcore: register vmemmap range

Benjamin Herrenschmidt <benh@kernel.crashing.org> pointed out that vmemmap
range is not included in KCORE_RAM, KCORE_VMALLOC ....

This adds KCORE_VMEMMAP if SPARSEMEM_VMEMMAP is used.  By this, vmemmap
can be readable via /proc/kcore

Because it's not vmalloc area, vread/vwrite cannot be used.  But the range
is static against the memory layout, this patch handles vmemmap area by
the same scheme with physical memory.

This patch assumes SPARSEMEM_VMEMMAP range is not in VMALLOC range.  It's
correct now.

[akpm@linux-foundation.org: fix typo]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/kcore.c         | 52 +++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/proc_fs.h |  1 +
 2 files changed, 51 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 802de33d6341..78970e6f715c 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -103,7 +103,7 @@ static void free_kclist_ents(struct list_head *head)
 	}
 }
 /*
- * Replace all KCORE_RAM information with passed list.
+ * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
  */
 static void __kcore_update_ram(struct list_head *list)
 {
@@ -113,7 +113,8 @@ static void __kcore_update_ram(struct list_head *list)
 	write_lock(&kclist_lock);
 	if (kcore_need_update) {
 		list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
-			if (pos->type == KCORE_RAM)
+			if (pos->type == KCORE_RAM
+				|| pos->type == KCORE_VMEMMAP)
 				list_move(&pos->list, &garbage);
 		}
 		list_splice_tail(list, &kclist_head);
@@ -151,6 +152,47 @@ static int kcore_update_ram(void)
 
 #else /* !CONFIG_HIGHMEM */
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/* calculate vmemmap's address from given system ram pfn and register it */
+int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
+{
+	unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
+	unsigned long nr_pages = ent->size >> PAGE_SHIFT;
+	unsigned long start, end;
+	struct kcore_list *vmm, *tmp;
+
+
+	start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
+	end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
+	end = ALIGN(end, PAGE_SIZE);
+	/* overlap check (because we have to align page */
+	list_for_each_entry(tmp, head, list) {
+		if (tmp->type != KCORE_VMEMMAP)
+			continue;
+		if (start < tmp->addr + tmp->size)
+			if (end > tmp->addr)
+				end = tmp->addr;
+	}
+	if (start < end) {
+		vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
+		if (!vmm)
+			return 0;
+		vmm->addr = start;
+		vmm->size = end - start;
+		vmm->type = KCORE_VMEMMAP;
+		list_add_tail(&vmm->list, head);
+	}
+	return 1;
+
+}
+#else
+int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
+{
+	return 1;
+}
+
+#endif
+
 static int
 kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 {
@@ -181,6 +223,12 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 
 	ent->type = KCORE_RAM;
 	list_add_tail(&ent->list, head);
+
+	if (!get_sparsemem_vmemmap_info(ent, head)) {
+		list_del(&ent->list);
+		goto free_out;
+	}
+
 	return 0;
 free_out:
 	kfree(ent);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index bd7b840765a0..379eaed72d4b 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -82,6 +82,7 @@ enum kcore_type {
 	KCORE_TEXT,
 	KCORE_VMALLOC,
 	KCORE_RAM,
+	KCORE_VMEMMAP,
 	KCORE_OTHER,
 };
 
-- 
cgit v1.2.3


From 81ac3ad9061dd9cd490ee92f0c5316a14d77ce18 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:49 -0700
Subject: kcore: register module area in generic way

Some archs define MODULED_VADDR/MODULES_END which is not in VMALLOC area.
This is handled only in x86-64.  This patch make it more generic.  And we
can use vread/vwrite to access the area.  Fix it.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/init_64.c |  4 +---
 fs/proc/kcore.c       | 19 ++++++++++++++++++-
 include/linux/mm.h    |  8 ++++++++
 mm/vmalloc.c          |  2 +-
 4 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d5d23cc24076..5a4398a6006b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -647,7 +647,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-static struct kcore_list kcore_modules, kcore_vsyscall;
+static struct kcore_list kcore_vsyscall;
 
 void __init mem_init(void)
 {
@@ -676,8 +676,6 @@ void __init mem_init(void)
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN,
-			KCORE_OTHER);
 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
 			 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
 
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 78970e6f715c..c6a5ec731972 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -490,7 +490,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 		if (m == NULL) {
 			if (clear_user(buffer, tsz))
 				return -EFAULT;
-		} else if (is_vmalloc_addr((void *)start)) {
+		} else if (is_vmalloc_or_module_addr((void *)start)) {
 			char * elf_buf;
 
 			elf_buf = kzalloc(tsz, GFP_KERNEL);
@@ -586,6 +586,22 @@ static void __init proc_kcore_text_init(void)
 }
 #endif
 
+#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+/*
+ * MODULES_VADDR has no intersection with VMALLOC_ADDR.
+ */
+struct kcore_list kcore_modules;
+static void __init add_modules_range(void)
+{
+	kclist_add(&kcore_modules, (void *)MODULES_VADDR,
+			MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
+}
+#else
+static void __init add_modules_range(void)
+{
+}
+#endif
+
 static int __init proc_kcore_init(void)
 {
 	proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
@@ -595,6 +611,7 @@ static int __init proc_kcore_init(void)
 	/* Store vmalloc area */
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
 		VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
+	add_modules_range();
 	/* Store direct-map area from physical memory map */
 	kcore_update_ram();
 	hotplug_memory_notifier(kcore_callback, 0);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5946e2ff9fe8..b6eae5e3144b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -285,6 +285,14 @@ static inline int is_vmalloc_addr(const void *x)
 	return 0;
 #endif
 }
+#ifdef CONFIG_MMU
+extern int is_vmalloc_or_module_addr(const void *x);
+#else
+static int is_vmalloc_or_module_addr(const void *x)
+{
+	return 0;
+}
+#endif
 
 static inline struct page *compound_head(struct page *page)
 {
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5535da1d6961..69511e663234 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -184,7 +184,7 @@ static int vmap_page_range(unsigned long start, unsigned long end,
 	return ret;
 }
 
-static inline int is_vmalloc_or_module_addr(const void *x)
+int is_vmalloc_or_module_addr(const void *x)
 {
 	/*
 	 * ARM, x86-64 and sparc64 put modules in a special place,
-- 
cgit v1.2.3


From a7e3108cca54c105f496919040f00df56767ec00 Mon Sep 17 00:00:00 2001
From: maximilian attems <max@stro.at>
Date: Tue, 22 Sep 2009 16:45:53 -0700
Subject: ramfs: move RAMFS_MAGIC to include/linux/magic.h

initramfs userspace likes to use this magic number.

Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: maximilian attems <max@stro.at>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ramfs/inode.c      | 4 +---
 include/linux/magic.h | 1 +
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a7f0110fca4c..a6090aa1a7c1 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -34,12 +34,10 @@
 #include <linux/ramfs.h>
 #include <linux/sched.h>
 #include <linux/parser.h>
+#include <linux/magic.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
-/* some random number */
-#define RAMFS_MAGIC	0x858458f6
-
 #define RAMFS_DEFAULT_MODE	0755
 
 static const struct super_operations ramfs_ops;
diff --git a/include/linux/magic.h b/include/linux/magic.h
index bce37786a0a5..76285e01b39e 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -12,6 +12,7 @@
 #define SYSFS_MAGIC		0x62656572
 #define SECURITYFS_MAGIC	0x73636673
 #define SELINUX_MAGIC		0xf97cff8c
+#define RAMFS_MAGIC		0x858458f6	/* some random number */
 #define TMPFS_MAGIC		0x01021994
 #define HUGETLBFS_MAGIC 	0x958458f6	/* some random number */
 #define SQUASHFS_MAGIC		0x73717368
-- 
cgit v1.2.3


From b73b255956119111dc18fa063d1e3a0bb3f06328 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 22 Sep 2009 16:46:00 -0700
Subject: spi.h: add missing kernel-doc for struct spi_master

Add missing kernel-doc notation in spi.h for struct spi_master:

Warning(include/linux/spi/spi.h:289): No description found for parameter 'mode_bits'
Warning(include/linux/spi/spi.h:289): No description found for parameter 'flags'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/spi/spi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index c47c4b4da97e..eb25cedb995b 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -207,6 +207,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  *	each slave has a chipselect signal, but it's common that not
  *	every chipselect is connected to a slave.
  * @dma_alignment: SPI controller constraint on DMA buffers alignment.
+ * @mode_bits: flags understood by this controller driver
+ * @flags: other constraints relevant to this driver
  * @setup: updates the device mode and clocking records used by a
  *	device's SPI controller; protocol code may call this.  This
  *	must fail if an unrecognized or unsupported mode is requested.
-- 
cgit v1.2.3


From 75368bf6c2876d8f33abfe77aa3864869a3893eb Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:46:04 -0700
Subject: spi: add support for device table matching

With this patch spi drivers can use standard spi_driver.id_table and
MODULE_DEVICE_TABLE() mechanisms to bind against the devices.  Just like
we do with I2C drivers.

This is useful when a single driver supports several variants of devices
but it is not possible to detect them in run-time (like non-JEDEC chips
probing in drivers/mtd/devices/m25p80.c), and when platform_data usage is
overkill.

This patch also makes life a lot easier on OpenFirmware platforms, since
with OF we extensively use proper device IDs in modaliases.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/spi.c               | 23 +++++++++++++++++++++++
 include/linux/mod_devicetable.h | 10 ++++++++++
 include/linux/spi/spi.h         | 10 ++++++++--
 scripts/mod/file2alias.c        | 13 +++++++++++++
 4 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 70845ccd85c3..8518a6eb63f3 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -59,9 +59,32 @@ static struct device_attribute spi_dev_attrs[] = {
  * and the sysfs version makes coldplug work too.
  */
 
+static const struct spi_device_id *spi_match_id(const struct spi_device_id *id,
+						const struct spi_device *sdev)
+{
+	while (id->name[0]) {
+		if (!strcmp(sdev->modalias, id->name))
+			return id;
+		id++;
+	}
+	return NULL;
+}
+
+const struct spi_device_id *spi_get_device_id(const struct spi_device *sdev)
+{
+	const struct spi_driver *sdrv = to_spi_driver(sdev->dev.driver);
+
+	return spi_match_id(sdrv->id_table, sdev);
+}
+EXPORT_SYMBOL_GPL(spi_get_device_id);
+
 static int spi_match_device(struct device *dev, struct device_driver *drv)
 {
 	const struct spi_device	*spi = to_spi_device(dev);
+	const struct spi_driver	*sdrv = to_spi_driver(drv);
+
+	if (sdrv->id_table)
+		return !!spi_match_id(sdrv->id_table, spi);
 
 	return strcmp(spi->modalias, drv->name) == 0;
 }
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 1bf5900ffe43..b34f1ef2f1fe 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -399,6 +399,16 @@ struct i2c_device_id {
 			__attribute__((aligned(sizeof(kernel_ulong_t))));
 };
 
+/* spi */
+
+#define SPI_NAME_SIZE	32
+
+struct spi_device_id {
+	char name[SPI_NAME_SIZE];
+	kernel_ulong_t driver_data	/* Data private to the driver */
+			__attribute__((aligned(sizeof(kernel_ulong_t))));
+};
+
 /* dmi */
 enum dmi_field {
 	DMI_NONE,
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index eb25cedb995b..e2051f39f6a8 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -20,6 +20,7 @@
 #define __LINUX_SPI_H
 
 #include <linux/device.h>
+#include <linux/mod_devicetable.h>
 
 /*
  * INTERFACES between SPI master-side drivers and SPI infrastructure.
@@ -86,7 +87,7 @@ struct spi_device {
 	int			irq;
 	void			*controller_state;
 	void			*controller_data;
-	char			modalias[32];
+	char			modalias[SPI_NAME_SIZE];
 
 	/*
 	 * likely need more hooks for more protocol options affecting how
@@ -145,6 +146,7 @@ struct spi_message;
 
 /**
  * struct spi_driver - Host side "protocol" driver
+ * @id_table: List of SPI devices supported by this driver
  * @probe: Binds this driver to the spi device.  Drivers can verify
  *	that the device is actually present, and may need to configure
  *	characteristics (such as bits_per_word) which weren't needed for
@@ -170,6 +172,7 @@ struct spi_message;
  * MMC, RTC, filesystem character device nodes, and hardware monitoring.
  */
 struct spi_driver {
+	const struct spi_device_id *id_table;
 	int			(*probe)(struct spi_device *spi);
 	int			(*remove)(struct spi_device *spi);
 	void			(*shutdown)(struct spi_device *spi);
@@ -734,7 +737,7 @@ struct spi_board_info {
 	 * controller_data goes to spi_device.controller_data,
 	 * irq is copied too
 	 */
-	char		modalias[32];
+	char		modalias[SPI_NAME_SIZE];
 	const void	*platform_data;
 	void		*controller_data;
 	int		irq;
@@ -802,4 +805,7 @@ spi_unregister_device(struct spi_device *spi)
 		device_unregister(&spi->dev);
 }
 
+extern const struct spi_device_id *
+spi_get_device_id(const struct spi_device *sdev);
+
 #endif /* __LINUX_SPI_H */
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 40e0045876ee..9d446e34519c 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -657,6 +657,15 @@ static int do_i2c_entry(const char *filename, struct i2c_device_id *id,
 	return 1;
 }
 
+/* Looks like: S */
+static int do_spi_entry(const char *filename, struct spi_device_id *id,
+			char *alias)
+{
+	sprintf(alias, "%s", id->name);
+
+	return 1;
+}
+
 static const struct dmifield {
 	const char *prefix;
 	int field;
@@ -853,6 +862,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
 		do_table(symval, sym->st_size,
 			 sizeof(struct i2c_device_id), "i2c",
 			 do_i2c_entry, mod);
+	else if (sym_is(symname, "__mod_spi_device_table"))
+		do_table(symval, sym->st_size,
+			 sizeof(struct spi_device_id), "spi",
+			 do_spi_entry, mod);
 	else if (sym_is(symname, "__mod_dmi_device_table"))
 		do_table(symval, sym->st_size,
 			 sizeof(struct dmi_system_id), "dmi",
-- 
cgit v1.2.3


From e0626e3844e8f430fc1a4417f523a00797df7ca6 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:46:08 -0700
Subject: spi: prefix modalias with "spi:"

This makes it consistent with other buses (platform, i2c, vio, ...).  I'm
not sure why we use the prefixes, but there must be a reason.

This was easy enough to do it, and I did it.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dmitry Torokhov <dtor@mail.ru>
Cc: Samuel Ortiz <sameo@openedhand.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Acked-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/max7301.c                    | 1 +
 drivers/gpio/mcp23s08.c                   | 1 +
 drivers/hwmon/lis3lv02d_spi.c             | 2 +-
 drivers/hwmon/max1111.c                   | 1 +
 drivers/input/touchscreen/ad7877.c        | 1 +
 drivers/input/touchscreen/ad7879.c        | 1 +
 drivers/input/touchscreen/ads7846.c       | 1 +
 drivers/leds/leds-dac124s085.c            | 1 +
 drivers/mfd/ezx-pcap.c                    | 1 +
 drivers/misc/eeprom/at25.c                | 2 +-
 drivers/mmc/host/mmc_spi.c                | 1 +
 drivers/mtd/devices/mtd_dataflash.c       | 1 +
 drivers/net/enc28j60.c                    | 1 +
 drivers/net/ks8851.c                      | 1 +
 drivers/net/wireless/libertas/if_spi.c    | 1 +
 drivers/net/wireless/p54/p54spi.c         | 1 +
 drivers/net/wireless/wl12xx/wl1251_main.c | 1 +
 drivers/rtc/rtc-ds1305.c                  | 1 +
 drivers/rtc/rtc-ds1390.c                  | 1 +
 drivers/rtc/rtc-ds3234.c                  | 1 +
 drivers/rtc/rtc-m41t94.c                  | 1 +
 drivers/rtc/rtc-max6902.c                 | 1 +
 drivers/rtc/rtc-r9701.c                   | 1 +
 drivers/rtc/rtc-rs5c348.c                 | 1 +
 drivers/serial/max3100.c                  | 1 +
 drivers/spi/spi.c                         | 3 ++-
 drivers/spi/spidev.c                      | 1 +
 drivers/spi/tle62x0.c                     | 1 +
 drivers/staging/stlc45xx/stlc45xx.c       | 1 +
 drivers/video/backlight/corgi_lcd.c       | 1 +
 drivers/video/backlight/ltv350qv.c        | 1 +
 drivers/video/backlight/tdo24m.c          | 1 +
 drivers/video/backlight/tosa_lcd.c        | 2 +-
 drivers/video/backlight/vgg2432a4.c       | 3 +--
 include/linux/mod_devicetable.h           | 1 +
 scripts/mod/file2alias.c                  | 4 ++--
 36 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/max7301.c b/drivers/gpio/max7301.c
index 7b82eaae2621..480956f1ca50 100644
--- a/drivers/gpio/max7301.c
+++ b/drivers/gpio/max7301.c
@@ -339,3 +339,4 @@ module_exit(max7301_exit);
 MODULE_AUTHOR("Juergen Beisert");
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("MAX7301 SPI based GPIO-Expander");
+MODULE_ALIAS("spi:" DRIVER_NAME);
diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c
index f6fae0e50e65..c6c7aa15f5da 100644
--- a/drivers/gpio/mcp23s08.c
+++ b/drivers/gpio/mcp23s08.c
@@ -433,3 +433,4 @@ static void __exit mcp23s08_exit(void)
 module_exit(mcp23s08_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:mcp23s08");
diff --git a/drivers/hwmon/lis3lv02d_spi.c b/drivers/hwmon/lis3lv02d_spi.c
index 82ebca5a699c..ecd739534f6a 100644
--- a/drivers/hwmon/lis3lv02d_spi.c
+++ b/drivers/hwmon/lis3lv02d_spi.c
@@ -139,4 +139,4 @@ module_exit(lis302dl_exit);
 MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
 MODULE_DESCRIPTION("lis3lv02d SPI glue layer");
 MODULE_LICENSE("GPL");
-
+MODULE_ALIAS("spi:" DRV_NAME);
diff --git a/drivers/hwmon/max1111.c b/drivers/hwmon/max1111.c
index bfaa665ccf32..9ac497271adf 100644
--- a/drivers/hwmon/max1111.c
+++ b/drivers/hwmon/max1111.c
@@ -242,3 +242,4 @@ module_exit(max1111_exit);
 MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
 MODULE_DESCRIPTION("MAX1111 ADC Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:max1111");
diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c
index ecaeb7e8e75e..eb83939c705e 100644
--- a/drivers/input/touchscreen/ad7877.c
+++ b/drivers/input/touchscreen/ad7877.c
@@ -842,3 +842,4 @@ module_exit(ad7877_exit);
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("AD7877 touchscreen Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ad7877");
diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index 5d8a70398807..19b4db7e974d 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -779,3 +779,4 @@ module_exit(ad7879_exit);
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("AD7879(-1) touchscreen Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ad7879");
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index ba9d38c3f412..09c810999b92 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -1256,3 +1256,4 @@ module_exit(ads7846_exit);
 
 MODULE_DESCRIPTION("ADS7846 TouchScreen Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ads7846");
diff --git a/drivers/leds/leds-dac124s085.c b/drivers/leds/leds-dac124s085.c
index 098d9aae7259..2913d76ad3d2 100644
--- a/drivers/leds/leds-dac124s085.c
+++ b/drivers/leds/leds-dac124s085.c
@@ -148,3 +148,4 @@ module_exit(dac124s085_leds_exit);
 MODULE_AUTHOR("Guennadi Liakhovetski <lg@denx.de>");
 MODULE_DESCRIPTION("DAC124S085 LED driver");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("spi:dac124s085");
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index 016be4938e4c..876288917976 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -548,3 +548,4 @@ module_exit(ezx_pcap_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Daniel Ribeiro / Harald Welte");
 MODULE_DESCRIPTION("Motorola PCAP2 ASIC Driver");
+MODULE_ALIAS("spi:ezx-pcap");
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 2e535a0ccd5e..d902d81dde39 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -417,4 +417,4 @@ module_exit(at25_exit);
 MODULE_DESCRIPTION("Driver for most SPI EEPROMs");
 MODULE_AUTHOR("David Brownell");
 MODULE_LICENSE("GPL");
-
+MODULE_ALIAS("spi:at25");
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index a461017ce5ce..d55fe4fb7935 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1562,3 +1562,4 @@ MODULE_AUTHOR("Mike Lavender, David Brownell, "
 		"Hans-Peter Nilsson, Jan Nikitenko");
 MODULE_DESCRIPTION("SPI SD/MMC host driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:mmc_spi");
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 43976aa4dbb1..211c27acd01e 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -966,3 +966,4 @@ module_exit(dataflash_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andrew Victor, David Brownell");
 MODULE_DESCRIPTION("MTD DataFlash driver");
+MODULE_ALIAS("spi:mtd_dataflash");
diff --git a/drivers/net/enc28j60.c b/drivers/net/enc28j60.c
index 117fc6c12e34..66813c91a720 100644
--- a/drivers/net/enc28j60.c
+++ b/drivers/net/enc28j60.c
@@ -1666,3 +1666,4 @@ MODULE_AUTHOR("Claudio Lanconelli <lanconelli.claudio@eptar.com>");
 MODULE_LICENSE("GPL");
 module_param_named(debug, debug.msg_enable, int, 0);
 MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., ffff=all)");
+MODULE_ALIAS("spi:" DRV_NAME);
diff --git a/drivers/net/ks8851.c b/drivers/net/ks8851.c
index 547ac7c7479c..237835864357 100644
--- a/drivers/net/ks8851.c
+++ b/drivers/net/ks8851.c
@@ -1321,3 +1321,4 @@ MODULE_LICENSE("GPL");
 
 module_param_named(message, msg_enable, int, 0);
 MODULE_PARM_DESC(message, "Message verbosity level (0=none, 31=all)");
+MODULE_ALIAS("spi:ks8851");
diff --git a/drivers/net/wireless/libertas/if_spi.c b/drivers/net/wireless/libertas/if_spi.c
index 446e327180f8..cb8be8d7abc1 100644
--- a/drivers/net/wireless/libertas/if_spi.c
+++ b/drivers/net/wireless/libertas/if_spi.c
@@ -1222,3 +1222,4 @@ MODULE_DESCRIPTION("Libertas SPI WLAN Driver");
 MODULE_AUTHOR("Andrey Yurovsky <andrey@cozybit.com>, "
 	      "Colin McCabe <colin@cozybit.com>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:libertas_spi");
diff --git a/drivers/net/wireless/p54/p54spi.c b/drivers/net/wireless/p54/p54spi.c
index 05458d9249ce..afd26bf06649 100644
--- a/drivers/net/wireless/p54/p54spi.c
+++ b/drivers/net/wireless/p54/p54spi.c
@@ -731,3 +731,4 @@ module_exit(p54spi_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Christian Lamparter <chunkeey@web.de>");
+MODULE_ALIAS("spi:cx3110x");
diff --git a/drivers/net/wireless/wl12xx/wl1251_main.c b/drivers/net/wireless/wl12xx/wl1251_main.c
index 5809ef5b18f8..1103256ad989 100644
--- a/drivers/net/wireless/wl12xx/wl1251_main.c
+++ b/drivers/net/wireless/wl12xx/wl1251_main.c
@@ -1426,3 +1426,4 @@ EXPORT_SYMBOL_GPL(wl1251_free_hw);
 MODULE_DESCRIPTION("TI wl1251 Wireles LAN Driver Core");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Kalle Valo <kalle.valo@nokia.com>");
+MODULE_ALIAS("spi:wl12xx");
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index 8f410e59d9f5..2736b11a1b1e 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -841,3 +841,4 @@ module_exit(ds1305_exit);
 
 MODULE_DESCRIPTION("RTC driver for DS1305 and DS1306 chips");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:rtc-ds1305");
diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c
index e01b955db077..cdb705057091 100644
--- a/drivers/rtc/rtc-ds1390.c
+++ b/drivers/rtc/rtc-ds1390.c
@@ -189,3 +189,4 @@ module_exit(ds1390_exit);
 MODULE_DESCRIPTION("Dallas/Maxim DS1390/93/94 SPI RTC driver");
 MODULE_AUTHOR("Mark Jackson <mpfj@mimc.co.uk>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:rtc-ds1390");
diff --git a/drivers/rtc/rtc-ds3234.c b/drivers/rtc/rtc-ds3234.c
index c51589ede5b7..a774ca35b5f7 100644
--- a/drivers/rtc/rtc-ds3234.c
+++ b/drivers/rtc/rtc-ds3234.c
@@ -188,3 +188,4 @@ module_exit(ds3234_exit);
 MODULE_DESCRIPTION("DS3234 SPI RTC driver");
 MODULE_AUTHOR("Dennis Aberilla <denzzzhome@yahoo.com>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ds3234");
diff --git a/drivers/rtc/rtc-m41t94.c b/drivers/rtc/rtc-m41t94.c
index c3a18c58daf6..c8c97a4169d4 100644
--- a/drivers/rtc/rtc-m41t94.c
+++ b/drivers/rtc/rtc-m41t94.c
@@ -171,3 +171,4 @@ module_exit(m41t94_exit);
 MODULE_AUTHOR("Kim B. Heino <Kim.Heino@bluegiga.com>");
 MODULE_DESCRIPTION("Driver for ST M41T94 SPI RTC");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:rtc-m41t94");
diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c
index 36a8ea9ed8ba..657403ebd54a 100644
--- a/drivers/rtc/rtc-max6902.c
+++ b/drivers/rtc/rtc-max6902.c
@@ -175,3 +175,4 @@ module_exit(max6902_exit);
 MODULE_DESCRIPTION ("max6902 spi RTC driver");
 MODULE_AUTHOR ("Raphael Assenat");
 MODULE_LICENSE ("GPL");
+MODULE_ALIAS("spi:rtc-max6902");
diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c
index 42028f233bef..9beba49c3c5b 100644
--- a/drivers/rtc/rtc-r9701.c
+++ b/drivers/rtc/rtc-r9701.c
@@ -174,3 +174,4 @@ module_exit(r9701_exit);
 MODULE_DESCRIPTION("r9701 spi RTC driver");
 MODULE_AUTHOR("Magnus Damm <damm@opensource.se>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:rtc-r9701");
diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c
index dd1e2bc7a472..2099037cb3ea 100644
--- a/drivers/rtc/rtc-rs5c348.c
+++ b/drivers/rtc/rtc-rs5c348.c
@@ -251,3 +251,4 @@ MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
 MODULE_DESCRIPTION("Ricoh RS5C348 RTC driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
+MODULE_ALIAS("spi:rtc-rs5c348");
diff --git a/drivers/serial/max3100.c b/drivers/serial/max3100.c
index 75ab00631c41..3c30c56aa2e1 100644
--- a/drivers/serial/max3100.c
+++ b/drivers/serial/max3100.c
@@ -925,3 +925,4 @@ module_exit(max3100_exit);
 MODULE_DESCRIPTION("MAX3100 driver");
 MODULE_AUTHOR("Christian Pellegrin <chripell@evolware.org>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:max3100");
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8518a6eb63f3..49e84860c8da 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/cache.h>
 #include <linux/mutex.h>
+#include <linux/mod_devicetable.h>
 #include <linux/spi/spi.h>
 
 
@@ -93,7 +94,7 @@ static int spi_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	const struct spi_device		*spi = to_spi_device(dev);
 
-	add_uevent_var(env, "MODALIAS=%s", spi->modalias);
+	add_uevent_var(env, "MODALIAS=%s%s", SPI_MODULE_PREFIX, spi->modalias);
 	return 0;
 }
 
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 606e7a40a8da..f921bd1109e1 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -688,3 +688,4 @@ module_exit(spidev_exit);
 MODULE_AUTHOR("Andrea Paterniani, <a.paterniani@swapp-eng.it>");
 MODULE_DESCRIPTION("User mode SPI device interface");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:spidev");
diff --git a/drivers/spi/tle62x0.c b/drivers/spi/tle62x0.c
index 455991fbe28f..bf9540f5fb98 100644
--- a/drivers/spi/tle62x0.c
+++ b/drivers/spi/tle62x0.c
@@ -329,3 +329,4 @@ module_exit(tle62x0_exit);
 MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
 MODULE_DESCRIPTION("TLE62x0 SPI driver");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("spi:tle62x0");
diff --git a/drivers/staging/stlc45xx/stlc45xx.c b/drivers/staging/stlc45xx/stlc45xx.c
index 12d414deaad6..be99eb33d817 100644
--- a/drivers/staging/stlc45xx/stlc45xx.c
+++ b/drivers/staging/stlc45xx/stlc45xx.c
@@ -2591,3 +2591,4 @@ module_exit(stlc45xx_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Kalle Valo <kalle.valo@nokia.com>");
+MODULE_ALIAS("spi:cx3110x");
diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index f8a4bb20f41a..2211a852af9c 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c
@@ -639,3 +639,4 @@ module_exit(corgi_lcd_exit);
 MODULE_DESCRIPTION("LCD and backlight driver for SHARP C7x0/Cxx00");
 MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:corgi-lcd");
diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c
index 2eb206bf73e6..4631ca8fa4a4 100644
--- a/drivers/video/backlight/ltv350qv.c
+++ b/drivers/video/backlight/ltv350qv.c
@@ -328,3 +328,4 @@ module_exit(ltv350qv_exit);
 MODULE_AUTHOR("Haavard Skinnemoen <hskinnemoen@atmel.com>");
 MODULE_DESCRIPTION("Samsung LTV350QV LCD Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ltv350qv");
diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c
index 51422fc4f606..bbfb502add67 100644
--- a/drivers/video/backlight/tdo24m.c
+++ b/drivers/video/backlight/tdo24m.c
@@ -472,3 +472,4 @@ module_exit(tdo24m_exit);
 MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
 MODULE_DESCRIPTION("Driver for Toppoly TDO24M LCD Panel");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:tdo24m");
diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c
index b7fbc75a62fc..50ec17dfc517 100644
--- a/drivers/video/backlight/tosa_lcd.c
+++ b/drivers/video/backlight/tosa_lcd.c
@@ -300,4 +300,4 @@ module_exit(tosa_lcd_exit);
 MODULE_AUTHOR("Dmitry Baryshkov");
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("LCD/Backlight control for Sharp SL-6000 PDA");
-
+MODULE_ALIAS("spi:tosa-lcd");
diff --git a/drivers/video/backlight/vgg2432a4.c b/drivers/video/backlight/vgg2432a4.c
index 8e653b8a6f17..b49063c831e7 100644
--- a/drivers/video/backlight/vgg2432a4.c
+++ b/drivers/video/backlight/vgg2432a4.c
@@ -280,5 +280,4 @@ module_exit(vgg2432a4_exit);
 MODULE_AUTHOR("Ben Dooks <ben-linux@fluff.org>");
 MODULE_DESCRIPTION("VGG2432A4 LCD Driver");
 MODULE_LICENSE("GPL v2");
-
-
+MODULE_ALIAS("spi:VGG2432A4");
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index b34f1ef2f1fe..f58e9d836f32 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -402,6 +402,7 @@ struct i2c_device_id {
 /* spi */
 
 #define SPI_NAME_SIZE	32
+#define SPI_MODULE_PREFIX "spi:"
 
 struct spi_device_id {
 	char name[SPI_NAME_SIZE];
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 9d446e34519c..62a9025cdcc7 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -657,11 +657,11 @@ static int do_i2c_entry(const char *filename, struct i2c_device_id *id,
 	return 1;
 }
 
-/* Looks like: S */
+/* Looks like: spi:S */
 static int do_spi_entry(const char *filename, struct spi_device_id *id,
 			char *alias)
 {
-	sprintf(alias, "%s", id->name);
+	sprintf(alias, SPI_MODULE_PREFIX "%s", id->name);
 
 	return 1;
 }
-- 
cgit v1.2.3


From 568d0697f42771425ae9f1e9a3db769fef7e10b6 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Tue, 22 Sep 2009 16:46:18 -0700
Subject: spi: handle TX-only/RX-only

Support two new half-duplex SPI implementation restrictions, for links
that talk to TX-only or RX-only devices.  (Existing half-duplex flavors
support both transfer directions, just not at the same time.)

Move spi_async() into the spi.c core, and stop inlining it.  Then make
that function perform error checks and reject messages that demand more
than the underlying controller can support.

Based on a patch from Marek Szyprowski which did this only for the
bitbanged GPIO driver.

Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/spi.c       | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi.h | 39 +++-----------------------------
 2 files changed, 62 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 49e84860c8da..b76f2468a84a 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -663,6 +663,65 @@ int spi_setup(struct spi_device *spi)
 }
 EXPORT_SYMBOL_GPL(spi_setup);
 
+/**
+ * spi_async - asynchronous SPI transfer
+ * @spi: device with which data will be exchanged
+ * @message: describes the data transfers, including completion callback
+ * Context: any (irqs may be blocked, etc)
+ *
+ * This call may be used in_irq and other contexts which can't sleep,
+ * as well as from task contexts which can sleep.
+ *
+ * The completion callback is invoked in a context which can't sleep.
+ * Before that invocation, the value of message->status is undefined.
+ * When the callback is issued, message->status holds either zero (to
+ * indicate complete success) or a negative error code.  After that
+ * callback returns, the driver which issued the transfer request may
+ * deallocate the associated memory; it's no longer in use by any SPI
+ * core or controller driver code.
+ *
+ * Note that although all messages to a spi_device are handled in
+ * FIFO order, messages may go to different devices in other orders.
+ * Some device might be higher priority, or have various "hard" access
+ * time requirements, for example.
+ *
+ * On detection of any fault during the transfer, processing of
+ * the entire message is aborted, and the device is deselected.
+ * Until returning from the associated message completion callback,
+ * no other spi_message queued to that device will be processed.
+ * (This rule applies equally to all the synchronous transfer calls,
+ * which are wrappers around this core asynchronous primitive.)
+ */
+int spi_async(struct spi_device *spi, struct spi_message *message)
+{
+	struct spi_master *master = spi->master;
+
+	/* Half-duplex links include original MicroWire, and ones with
+	 * only one data pin like SPI_3WIRE (switches direction) or where
+	 * either MOSI or MISO is missing.  They can also be caused by
+	 * software limitations.
+	 */
+	if ((master->flags & SPI_MASTER_HALF_DUPLEX)
+			|| (spi->mode & SPI_3WIRE)) {
+		struct spi_transfer *xfer;
+		unsigned flags = master->flags;
+
+		list_for_each_entry(xfer, &message->transfers, transfer_list) {
+			if (xfer->rx_buf && xfer->tx_buf)
+				return -EINVAL;
+			if ((flags & SPI_MASTER_NO_TX) && xfer->tx_buf)
+				return -EINVAL;
+			if ((flags & SPI_MASTER_NO_RX) && xfer->rx_buf)
+				return -EINVAL;
+		}
+	}
+
+	message->spi = spi;
+	message->status = -EINPROGRESS;
+	return master->transfer(spi, message);
+}
+EXPORT_SYMBOL_GPL(spi_async);
+
 
 /*-------------------------------------------------------------------------*/
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e2051f39f6a8..97b60b37f445 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -258,6 +258,8 @@ struct spi_master {
 	/* other constraints relevant to this driver */
 	u16			flags;
 #define SPI_MASTER_HALF_DUPLEX	BIT(0)		/* can't do full duplex */
+#define SPI_MASTER_NO_RX	BIT(1)		/* can't do buffer read */
+#define SPI_MASTER_NO_TX	BIT(2)		/* can't do buffer write */
 
 	/* Setup mode and clock, etc (spi driver may call many times).
 	 *
@@ -538,42 +540,7 @@ static inline void spi_message_free(struct spi_message *m)
 }
 
 extern int spi_setup(struct spi_device *spi);
-
-/**
- * spi_async - asynchronous SPI transfer
- * @spi: device with which data will be exchanged
- * @message: describes the data transfers, including completion callback
- * Context: any (irqs may be blocked, etc)
- *
- * This call may be used in_irq and other contexts which can't sleep,
- * as well as from task contexts which can sleep.
- *
- * The completion callback is invoked in a context which can't sleep.
- * Before that invocation, the value of message->status is undefined.
- * When the callback is issued, message->status holds either zero (to
- * indicate complete success) or a negative error code.  After that
- * callback returns, the driver which issued the transfer request may
- * deallocate the associated memory; it's no longer in use by any SPI
- * core or controller driver code.
- *
- * Note that although all messages to a spi_device are handled in
- * FIFO order, messages may go to different devices in other orders.
- * Some device might be higher priority, or have various "hard" access
- * time requirements, for example.
- *
- * On detection of any fault during the transfer, processing of
- * the entire message is aborted, and the device is deselected.
- * Until returning from the associated message completion callback,
- * no other spi_message queued to that device will be processed.
- * (This rule applies equally to all the synchronous transfer calls,
- * which are wrappers around this core asynchronous primitive.)
- */
-static inline int
-spi_async(struct spi_device *spi, struct spi_message *message)
-{
-	message->spi = spi;
-	return spi->master->transfer(spi, message);
-}
+extern int spi_async(struct spi_device *spi, struct spi_message *message);
 
 /*---------------------------------------------------------------------------*/
 
-- 
cgit v1.2.3


From a4177ee7f1a83eecb1d75e85d32664b023ef65e9 Mon Sep 17 00:00:00 2001
From: Jani Nikula <ext-jani.1.nikula@nokia.com>
Date: Tue, 22 Sep 2009 16:46:33 -0700
Subject: gpiolib: allow exported GPIO nodes to be named using sysfs links

Commit 926b663ce8215ba448960e1ff6e58b67a2c3b99b (gpiolib: allow GPIOs to
be named) already provides naming on the chip level. This patch provides
more flexibility by allowing multiple names where ever in sysfs on a per
GPIO basis.

Adapted from David Brownell's comments on a similar concept:
http://lkml.org/lkml/2009/4/20/203.

[randy.dunlap@oracle.com: fix build for CONFIG_GENERIC_GPIO=n]
Signed-off-by: Jani Nikula <ext-jani.1.nikula@nokia.com>
Acked-by: David Brownell <david-b@pacbell.net>
Cc: Daniel Silverstone <dsilvers@simtec.co.uk>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/gpio.txt     | 10 ++++++++++
 drivers/gpio/gpiolib.c     | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/asm-generic/gpio.h |  8 ++++++++
 include/linux/gpio.h       | 11 +++++++++++
 4 files changed, 74 insertions(+)

(limited to 'include')

diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index e4b6985044a2..566edaa56a53 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -555,6 +555,11 @@ requested using gpio_request():
 	/* reverse gpio_export() */
 	void gpio_unexport();
 
+	/* create a sysfs link to an exported GPIO node */
+	int gpio_export_link(struct device *dev, const char *name,
+		unsigned gpio)
+
+
 After a kernel driver requests a GPIO, it may only be made available in
 the sysfs interface by gpio_export().  The driver can control whether the
 signal direction may change.  This helps drivers prevent userspace code
@@ -563,3 +568,8 @@ from accidentally clobbering important system state.
 This explicit exporting can help with debugging (by making some kinds
 of experiments easier), or can provide an always-there interface that's
 suitable for documenting as part of a board support package.
+
+After the GPIO has been exported, gpio_export_link() allows creating
+symlinks from elsewhere in sysfs to the GPIO sysfs node.  Drivers can
+use this to provide the interface under their own device in sysfs with
+a descriptive name.
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 51a8d4103be5..aef6b3d8e2cf 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -504,6 +504,51 @@ static int match_export(struct device *dev, void *data)
 	return dev_get_drvdata(dev) == data;
 }
 
+/**
+ * gpio_export_link - create a sysfs link to an exported GPIO node
+ * @dev: device under which to create symlink
+ * @name: name of the symlink
+ * @gpio: gpio to create symlink to, already exported
+ *
+ * Set up a symlink from /sys/.../dev/name to /sys/class/gpio/gpioN
+ * node. Caller is responsible for unlinking.
+ *
+ * Returns zero on success, else an error.
+ */
+int gpio_export_link(struct device *dev, const char *name, unsigned gpio)
+{
+	struct gpio_desc	*desc;
+	int			status = -EINVAL;
+
+	if (!gpio_is_valid(gpio))
+		goto done;
+
+	mutex_lock(&sysfs_lock);
+
+	desc = &gpio_desc[gpio];
+
+	if (test_bit(FLAG_EXPORT, &desc->flags)) {
+		struct device *tdev;
+
+		tdev = class_find_device(&gpio_class, NULL, desc, match_export);
+		if (tdev != NULL) {
+			status = sysfs_create_link(&dev->kobj, &tdev->kobj,
+						name);
+		} else {
+			status = -ENODEV;
+		}
+	}
+
+	mutex_unlock(&sysfs_lock);
+
+done:
+	if (status)
+		pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(gpio_export_link);
+
 /**
  * gpio_unexport - reverse effect of gpio_export()
  * @gpio: gpio to make unavailable
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index d6c379dc64fa..9cca3785cab8 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -141,6 +141,8 @@ extern int __gpio_to_irq(unsigned gpio);
  * but more typically is configured entirely from userspace.
  */
 extern int gpio_export(unsigned gpio, bool direction_may_change);
+extern int gpio_export_link(struct device *dev, const char *name,
+			unsigned gpio);
 extern void gpio_unexport(unsigned gpio);
 
 #endif	/* CONFIG_GPIO_SYSFS */
@@ -185,6 +187,12 @@ static inline int gpio_export(unsigned gpio, bool direction_may_change)
 	return -ENOSYS;
 }
 
+static inline int gpio_export_link(struct device *dev, const char *name,
+				unsigned gpio)
+{
+	return -ENOSYS;
+}
+
 static inline void gpio_unexport(unsigned gpio)
 {
 }
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index e10c49a5b96e..059bd189d35d 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -12,6 +12,8 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 
+struct device;
+
 /*
  * Some platforms don't support the GPIO programming interface.
  *
@@ -89,6 +91,15 @@ static inline int gpio_export(unsigned gpio, bool direction_may_change)
 	return -EINVAL;
 }
 
+static inline int gpio_export_link(struct device *dev, const char *name,
+				unsigned gpio)
+{
+	/* GPIO can never have been exported */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+
 static inline void gpio_unexport(unsigned gpio)
 {
 	/* GPIO can never have been exported */
-- 
cgit v1.2.3


From 1e5db00687c1ebd93a902caf1d3694209013cb3e Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Date: Tue, 22 Sep 2009 16:46:34 -0700
Subject: gpio: add MC33880 driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A GPIO driver for the Freescale MC33880 High/Low side switch

Signed-off-by: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |   7 ++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/mc33880.c      | 196 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/mc33880.h |  10 +++
 4 files changed, 214 insertions(+)
 create mode 100644 drivers/gpio/mc33880.c
 create mode 100644 include/linux/spi/mc33880.h

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 6b4c484a699a..223e7c92fd54 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -195,4 +195,11 @@ config GPIO_MCP23S08
 	  SPI driver for Microchip MCP23S08 I/O expander.  This provides
 	  a GPIO interface supporting inputs and outputs.
 
+config GPIO_MC33880
+	tristate "Freescale MC33880 high-side/low-side switch"
+	depends on SPI_MASTER
+	help
+	  SPI driver for Freescale MC33880 high-side/low-side switch.
+	  This provides GPIO interface supporting inputs and outputs.
+
 endif
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index ea7c745f26a8..f35de16c7c49 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
+obj-$(CONFIG_GPIO_MC33880)	+= mc33880.o
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
 obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
diff --git a/drivers/gpio/mc33880.c b/drivers/gpio/mc33880.c
new file mode 100644
index 000000000000..e7d01bd8fdb3
--- /dev/null
+++ b/drivers/gpio/mc33880.c
@@ -0,0 +1,196 @@
+/*
+ * mc33880.c MC33880 high-side/low-side switch GPIO driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Freescale MC33880 high-side/low-side switch
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/mc33880.h>
+#include <linux/gpio.h>
+
+#define DRIVER_NAME "mc33880"
+
+/*
+ * Pin configurations, see MAX7301 datasheet page 6
+ */
+#define PIN_CONFIG_MASK 0x03
+#define PIN_CONFIG_IN_PULLUP 0x03
+#define PIN_CONFIG_IN_WO_PULLUP 0x02
+#define PIN_CONFIG_OUT 0x01
+
+#define PIN_NUMBER 8
+
+
+/*
+ * Some registers must be read back to modify.
+ * To save time we cache them here in memory
+ */
+struct mc33880 {
+	struct mutex	lock;	/* protect from simultanous accesses */
+	u8		port_config;
+	struct gpio_chip chip;
+	struct spi_device *spi;
+};
+
+static int mc33880_write_config(struct mc33880 *mc)
+{
+	return spi_write(mc->spi, &mc->port_config, sizeof(mc->port_config));
+}
+
+
+static int __mc33880_set(struct mc33880 *mc, unsigned offset, int value)
+{
+	if (value)
+		mc->port_config |= 1 << offset;
+	else
+		mc->port_config &= ~(1 << offset);
+
+	return mc33880_write_config(mc);
+}
+
+
+static void mc33880_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct mc33880 *mc = container_of(chip, struct mc33880, chip);
+
+	mutex_lock(&mc->lock);
+
+	__mc33880_set(mc, offset, value);
+
+	mutex_unlock(&mc->lock);
+}
+
+static int __devinit mc33880_probe(struct spi_device *spi)
+{
+	struct mc33880 *mc;
+	struct mc33880_platform_data *pdata;
+	int ret;
+
+	pdata = spi->dev.platform_data;
+	if (!pdata || !pdata->base) {
+		dev_dbg(&spi->dev, "incorrect or missing platform data\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * bits_per_word cannot be configured in platform data
+	 */
+	spi->bits_per_word = 8;
+
+	ret = spi_setup(spi);
+	if (ret < 0)
+		return ret;
+
+	mc = kzalloc(sizeof(struct mc33880), GFP_KERNEL);
+	if (!mc)
+		return -ENOMEM;
+
+	mutex_init(&mc->lock);
+
+	dev_set_drvdata(&spi->dev, mc);
+
+	mc->spi = spi;
+
+	mc->chip.label = DRIVER_NAME,
+	mc->chip.set = mc33880_set;
+	mc->chip.base = pdata->base;
+	mc->chip.ngpio = PIN_NUMBER;
+	mc->chip.can_sleep = 1;
+	mc->chip.dev = &spi->dev;
+	mc->chip.owner = THIS_MODULE;
+
+	mc->port_config = 0x00;
+	/* write twice, because during initialisation the first setting
+	 * is just for testing SPI communication, and the second is the
+	 * "real" configuration
+	 */
+	ret = mc33880_write_config(mc);
+	mc->port_config = 0x00;
+	if (!ret)
+		ret = mc33880_write_config(mc);
+
+	if (ret) {
+		printk(KERN_ERR "Failed writing to " DRIVER_NAME ": %d\n", ret);
+		goto exit_destroy;
+	}
+
+	ret = gpiochip_add(&mc->chip);
+	if (ret)
+		goto exit_destroy;
+
+	return ret;
+
+exit_destroy:
+	dev_set_drvdata(&spi->dev, NULL);
+	mutex_destroy(&mc->lock);
+	kfree(mc);
+	return ret;
+}
+
+static int mc33880_remove(struct spi_device *spi)
+{
+	struct mc33880 *mc;
+	int ret;
+
+	mc = dev_get_drvdata(&spi->dev);
+	if (mc == NULL)
+		return -ENODEV;
+
+	dev_set_drvdata(&spi->dev, NULL);
+
+	ret = gpiochip_remove(&mc->chip);
+	if (!ret) {
+		mutex_destroy(&mc->lock);
+		kfree(mc);
+	} else
+		dev_err(&spi->dev, "Failed to remove the GPIO controller: %d\n",
+			ret);
+
+	return ret;
+}
+
+static struct spi_driver mc33880_driver = {
+	.driver = {
+		.name		= DRIVER_NAME,
+		.owner		= THIS_MODULE,
+	},
+	.probe		= mc33880_probe,
+	.remove		= __devexit_p(mc33880_remove),
+};
+
+static int __init mc33880_init(void)
+{
+	return spi_register_driver(&mc33880_driver);
+}
+/* register after spi postcore initcall and before
+ * subsys initcalls that may rely on these GPIOs
+ */
+subsys_initcall(mc33880_init);
+
+static void __exit mc33880_exit(void)
+{
+	spi_unregister_driver(&mc33880_driver);
+}
+module_exit(mc33880_exit);
+
+MODULE_AUTHOR("Mocean Laboratories <info@mocean-labs.com>");
+MODULE_LICENSE("GPL v2");
+
diff --git a/include/linux/spi/mc33880.h b/include/linux/spi/mc33880.h
new file mode 100644
index 000000000000..82ffccd6fbe5
--- /dev/null
+++ b/include/linux/spi/mc33880.h
@@ -0,0 +1,10 @@
+#ifndef LINUX_SPI_MC33880_H
+#define LINUX_SPI_MC33880_H
+
+struct mc33880_platform_data {
+	/* number assigned to the first GPIO */
+	unsigned	base;
+};
+
+#endif
+
-- 
cgit v1.2.3


From 4cf8e53b3b55fa2f9b2a6b9c3e557b649adf7c6a Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Tue, 22 Sep 2009 16:46:35 -0700
Subject: mfd/gpio: add a GPIO interface to the UCB1400 MFD chip driver via
 gpiolib

Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: David Brownell <david-b@pacbell.net>
Cc: Samuel Ortiz <sameo@openedhand.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |  12 +++++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/ucb1400_gpio.c | 125 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/ucb1400_core.c  |  31 +++++++++--
 include/linux/ucb1400.h     |  19 +++++++
 5 files changed, 184 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpio/ucb1400_gpio.c

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 223e7c92fd54..ccca08e0b595 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -202,4 +202,16 @@ config GPIO_MC33880
 	  SPI driver for Freescale MC33880 high-side/low-side switch.
 	  This provides GPIO interface supporting inputs and outputs.
 
+comment "AC97 GPIO expanders:"
+
+config GPIO_UCB1400
+	bool "Philips UCB1400 GPIO"
+	depends on UCB1400_CORE
+	help
+	  This enables support for the Philips UCB1400 GPIO pins.
+	  The UCB1400 is an AC97 audio codec.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ucb1400_gpio.
+
 endif
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index f35de16c7c49..c1ac034698c5 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
 obj-$(CONFIG_GPIO_PL061)	+= pl061.o
 obj-$(CONFIG_GPIO_TWL4030)	+= twl4030-gpio.o
+obj-$(CONFIG_GPIO_UCB1400)	+= ucb1400_gpio.o
 obj-$(CONFIG_GPIO_XILINX)	+= xilinx_gpio.o
 obj-$(CONFIG_GPIO_BT8XX)	+= bt8xxgpio.o
 obj-$(CONFIG_GPIO_VR41XX)	+= vr41xx_giu.o
diff --git a/drivers/gpio/ucb1400_gpio.c b/drivers/gpio/ucb1400_gpio.c
new file mode 100644
index 000000000000..50e6bd1392ce
--- /dev/null
+++ b/drivers/gpio/ucb1400_gpio.c
@@ -0,0 +1,125 @@
+/*
+ * Philips UCB1400 GPIO driver
+ *
+ * Author: Marek Vasut <marek.vasut@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/ucb1400.h>
+
+struct ucb1400_gpio_data *ucbdata;
+
+static int ucb1400_gpio_dir_in(struct gpio_chip *gc, unsigned off)
+{
+	struct ucb1400_gpio *gpio;
+	gpio = container_of(gc, struct ucb1400_gpio, gc);
+	ucb1400_gpio_set_direction(gpio->ac97, off, 0);
+	return 0;
+}
+
+static int ucb1400_gpio_dir_out(struct gpio_chip *gc, unsigned off, int val)
+{
+	struct ucb1400_gpio *gpio;
+	gpio = container_of(gc, struct ucb1400_gpio, gc);
+	ucb1400_gpio_set_direction(gpio->ac97, off, 1);
+	ucb1400_gpio_set_value(gpio->ac97, off, val);
+	return 0;
+}
+
+static int ucb1400_gpio_get(struct gpio_chip *gc, unsigned off)
+{
+	struct ucb1400_gpio *gpio;
+	gpio = container_of(gc, struct ucb1400_gpio, gc);
+	return ucb1400_gpio_get_value(gpio->ac97, off);
+}
+
+static void ucb1400_gpio_set(struct gpio_chip *gc, unsigned off, int val)
+{
+	struct ucb1400_gpio *gpio;
+	gpio = container_of(gc, struct ucb1400_gpio, gc);
+	ucb1400_gpio_set_value(gpio->ac97, off, val);
+}
+
+static int ucb1400_gpio_probe(struct platform_device *dev)
+{
+	struct ucb1400_gpio *ucb = dev->dev.platform_data;
+	int err = 0;
+
+	if (!(ucbdata && ucbdata->gpio_offset)) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	platform_set_drvdata(dev, ucb);
+
+	ucb->gc.label = "ucb1400_gpio";
+	ucb->gc.base = ucbdata->gpio_offset;
+	ucb->gc.ngpio = 10;
+	ucb->gc.owner = THIS_MODULE;
+
+	ucb->gc.direction_input = ucb1400_gpio_dir_in;
+	ucb->gc.direction_output = ucb1400_gpio_dir_out;
+	ucb->gc.get = ucb1400_gpio_get;
+	ucb->gc.set = ucb1400_gpio_set;
+	ucb->gc.can_sleep = 1;
+
+	err = gpiochip_add(&ucb->gc);
+	if (err)
+		goto err;
+
+	if (ucbdata && ucbdata->gpio_setup)
+		err = ucbdata->gpio_setup(&dev->dev, ucb->gc.ngpio);
+
+err:
+	return err;
+
+}
+
+static int ucb1400_gpio_remove(struct platform_device *dev)
+{
+	int err = 0;
+	struct ucb1400_gpio *ucb = platform_get_drvdata(dev);
+
+	if (ucbdata && ucbdata->gpio_teardown) {
+		err = ucbdata->gpio_teardown(&dev->dev, ucb->gc.ngpio);
+		if (err)
+			return err;
+	}
+
+	err = gpiochip_remove(&ucb->gc);
+	return err;
+}
+
+static struct platform_driver ucb1400_gpio_driver = {
+	.probe	= ucb1400_gpio_probe,
+	.remove	= ucb1400_gpio_remove,
+	.driver	= {
+		.name	= "ucb1400_gpio"
+	},
+};
+
+static int __init ucb1400_gpio_init(void)
+{
+	return platform_driver_register(&ucb1400_gpio_driver);
+}
+
+static void __exit ucb1400_gpio_exit(void)
+{
+	platform_driver_unregister(&ucb1400_gpio_driver);
+}
+
+void __init ucb1400_gpio_set_data(struct ucb1400_gpio_data *data)
+{
+	ucbdata = data;
+}
+
+module_init(ucb1400_gpio_init);
+module_exit(ucb1400_gpio_exit);
+
+MODULE_DESCRIPTION("Philips UCB1400 GPIO driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/ucb1400_core.c b/drivers/mfd/ucb1400_core.c
index 78c2135c5de6..2afc08006e6d 100644
--- a/drivers/mfd/ucb1400_core.c
+++ b/drivers/mfd/ucb1400_core.c
@@ -48,9 +48,11 @@ static int ucb1400_core_probe(struct device *dev)
 	int err;
 	struct ucb1400 *ucb;
 	struct ucb1400_ts ucb_ts;
+	struct ucb1400_gpio ucb_gpio;
 	struct snd_ac97 *ac97;
 
 	memset(&ucb_ts, 0, sizeof(ucb_ts));
+	memset(&ucb_gpio, 0, sizeof(ucb_gpio));
 
 	ucb = kzalloc(sizeof(struct ucb1400), GFP_KERNEL);
 	if (!ucb) {
@@ -68,25 +70,44 @@ static int ucb1400_core_probe(struct device *dev)
 		goto err0;
 	}
 
+	/* GPIO */
+	ucb_gpio.ac97 = ac97;
+	ucb->ucb1400_gpio = platform_device_alloc("ucb1400_gpio", -1);
+	if (!ucb->ucb1400_gpio) {
+		err = -ENOMEM;
+		goto err0;
+	}
+	err = platform_device_add_data(ucb->ucb1400_gpio, &ucb_gpio,
+					sizeof(ucb_gpio));
+	if (err)
+		goto err1;
+	err = platform_device_add(ucb->ucb1400_gpio);
+	if (err)
+		goto err1;
+
 	/* TOUCHSCREEN */
 	ucb_ts.ac97 = ac97;
 	ucb->ucb1400_ts = platform_device_alloc("ucb1400_ts", -1);
 	if (!ucb->ucb1400_ts) {
 		err = -ENOMEM;
-		goto err0;
+		goto err2;
 	}
 	err = platform_device_add_data(ucb->ucb1400_ts, &ucb_ts,
 					sizeof(ucb_ts));
 	if (err)
-		goto err1;
+		goto err3;
 	err = platform_device_add(ucb->ucb1400_ts);
 	if (err)
-		goto err1;
+		goto err3;
 
 	return 0;
 
-err1:
+err3:
 	platform_device_put(ucb->ucb1400_ts);
+err2:
+	platform_device_unregister(ucb->ucb1400_gpio);
+err1:
+	platform_device_put(ucb->ucb1400_gpio);
 err0:
 	kfree(ucb);
 err:
@@ -98,6 +119,8 @@ static int ucb1400_core_remove(struct device *dev)
 	struct ucb1400 *ucb = dev_get_drvdata(dev);
 
 	platform_device_unregister(ucb->ucb1400_ts);
+	platform_device_unregister(ucb->ucb1400_gpio);
+
 	kfree(ucb);
 	return 0;
 }
diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h
index ae779bb8cc0f..adb44066680c 100644
--- a/include/linux/ucb1400.h
+++ b/include/linux/ucb1400.h
@@ -26,6 +26,7 @@
 #include <sound/ac97_codec.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
+#include <linux/gpio.h>
 
 /*
  * UCB1400 AC-link registers
@@ -82,6 +83,17 @@
 #define UCB_ID			0x7e
 #define UCB_ID_1400             0x4304
 
+struct ucb1400_gpio_data {
+	int gpio_offset;
+	int (*gpio_setup)(struct device *dev, int ngpio);
+	int (*gpio_teardown)(struct device *dev, int ngpio);
+};
+
+struct ucb1400_gpio {
+	struct gpio_chip	gc;
+	struct snd_ac97		*ac97;
+};
+
 struct ucb1400_ts {
 	struct input_dev	*ts_idev;
 	struct task_struct	*ts_task;
@@ -95,6 +107,7 @@ struct ucb1400_ts {
 
 struct ucb1400 {
 	struct platform_device	*ucb1400_ts;
+	struct platform_device	*ucb1400_gpio;
 };
 
 static inline u16 ucb1400_reg_read(struct snd_ac97 *ac97, u16 reg)
@@ -147,4 +160,10 @@ static inline void ucb1400_adc_disable(struct snd_ac97 *ac97)
 unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel,
 			      int adcsync);
 
+#ifdef CONFIG_GPIO_UCB1400
+void __init ucb1400_gpio_set_data(struct ucb1400_gpio_data *data);
+#else
+static inline void ucb1400_gpio_set_data(struct ucb1400_gpio_data *data) {}
+#endif
+
 #endif
-- 
cgit v1.2.3


From 4ed824d9aead77a6a4eb1e89c3b3d270ba386fad Mon Sep 17 00:00:00 2001
From: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Date: Tue, 22 Sep 2009 16:47:06 -0700
Subject: davinci: fb: Frame Buffer driver for TI DA8xx/OMAP-L1xx

Add LCD controller (LCDC) driver for TI's DA8xx/OMAP-L1xx architecture.
LCDC specifications can be found at http://www.ti.com/litv/pdf/sprufm0a.

LCDC on DA8xx consists of two independent controllers, the Raster
Controller and the LCD Interface Display Driver (LIDD) controller.  LIDD
further supports character and graphic displays.

This patch adds support for the graphic display (Sharp LQ035Q3DG01) found
on the DA830 based EVM.  The EVM details can be found at:
http://support.spectrumdigital.com/boards/dskda830/revc/.

Signed-off-by: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Signed-off-by: Pavel Kiryukhin <pkiryukhin@ru.mvista.com>
Signed-off-by: Steve Chen <schen@mvista.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
DESC
davinci-fb-frame-buffer-driver-for-ti-da8xx-omap-l1xx-fix
EDESC
From: Andrew Morton <akpm@linux-foundation.org>

fix kconfig indenting

Cc: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: Pavel Kiryukhin <pkiryukhin@ru.mvista.com>
Cc: Steve Chen <schen@mvista.com>
Cc: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/Kconfig    |  11 +
 drivers/video/Makefile   |   1 +
 drivers/video/da8xx-fb.c | 909 +++++++++++++++++++++++++++++++++++++++++++++++
 include/video/da8xx-fb.h | 106 ++++++
 4 files changed, 1027 insertions(+)
 create mode 100644 drivers/video/da8xx-fb.c
 create mode 100644 include/video/da8xx-fb.h

(limited to 'include')

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 05184a1c3e97..a7944ef48a2a 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -2041,6 +2041,17 @@ config FB_SH7760
 	  and 8, 15 or 16 bpp color; 90 degrees clockwise display rotation for
 	  panels <= 320 pixel horizontal resolution.
 
+config FB_DA8XX
+	tristate "DA8xx/OMAP-L1xx Framebuffer support"
+	depends on FB && ARCH_DAVINCI_DA8XX
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	---help---
+	  This is the frame buffer device driver for the TI LCD controller
+	  found on DA8xx/OMAP-L1xx SoCs.
+	  If unsure, say N.
+
 config FB_VIRTUAL
 	tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)"
 	depends on FB
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index f4b6c150bd06..85b2d2322dc7 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -137,6 +137,7 @@ obj-$(CONFIG_FB_OF)               += offb.o
 obj-$(CONFIG_FB_BF54X_LQ043)	  += bf54x-lq043fb.o
 obj-$(CONFIG_FB_BFIN_T350MCQB)	  += bfin-t350mcqb-fb.o
 obj-$(CONFIG_FB_MX3)		  += mx3fb.o
+obj-$(CONFIG_FB_DA8XX)		  += da8xx-fb.o
 
 # the test framebuffer is last
 obj-$(CONFIG_FB_VIRTUAL)          += vfb.o
diff --git a/drivers/video/da8xx-fb.c b/drivers/video/da8xx-fb.c
new file mode 100644
index 000000000000..e0bbc66499c8
--- /dev/null
+++ b/drivers/video/da8xx-fb.c
@@ -0,0 +1,909 @@
+/*
+ * Copyright (C) 2008-2009 MontaVista Software Inc.
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * Based on the LCD driver for TI Avalanche processors written by
+ * Ajay Singh and Shalom Hai.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option)any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fb.h>
+#include <linux/dma-mapping.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <video/da8xx-fb.h>
+
+#define DRIVER_NAME "da8xx_lcdc"
+
+/* LCD Status Register */
+#define LCD_END_OF_FRAME0		BIT(8)
+#define LCD_FIFO_UNDERFLOW		BIT(5)
+#define LCD_SYNC_LOST			BIT(2)
+
+/* LCD DMA Control Register */
+#define LCD_DMA_BURST_SIZE(x)		((x) << 4)
+#define LCD_DMA_BURST_1			0x0
+#define LCD_DMA_BURST_2			0x1
+#define LCD_DMA_BURST_4			0x2
+#define LCD_DMA_BURST_8			0x3
+#define LCD_DMA_BURST_16		0x4
+#define LCD_END_OF_FRAME_INT_ENA	BIT(2)
+#define LCD_DUAL_FRAME_BUFFER_ENABLE	BIT(0)
+
+/* LCD Control Register */
+#define LCD_CLK_DIVISOR(x)		((x) << 8)
+#define LCD_RASTER_MODE			0x01
+
+/* LCD Raster Control Register */
+#define LCD_PALETTE_LOAD_MODE(x)	((x) << 20)
+#define PALETTE_AND_DATA		0x00
+#define PALETTE_ONLY			0x01
+
+#define LCD_MONO_8BIT_MODE		BIT(9)
+#define LCD_RASTER_ORDER		BIT(8)
+#define LCD_TFT_MODE			BIT(7)
+#define LCD_UNDERFLOW_INT_ENA		BIT(6)
+#define LCD_MONOCHROME_MODE		BIT(1)
+#define LCD_RASTER_ENABLE		BIT(0)
+#define LCD_TFT_ALT_ENABLE		BIT(23)
+#define LCD_STN_565_ENABLE		BIT(24)
+
+/* LCD Raster Timing 2 Register */
+#define LCD_AC_BIAS_TRANSITIONS_PER_INT(x)	((x) << 16)
+#define LCD_AC_BIAS_FREQUENCY(x)		((x) << 8)
+#define LCD_SYNC_CTRL				BIT(25)
+#define LCD_SYNC_EDGE				BIT(24)
+#define LCD_INVERT_PIXEL_CLOCK			BIT(22)
+#define LCD_INVERT_LINE_CLOCK			BIT(21)
+#define LCD_INVERT_FRAME_CLOCK			BIT(20)
+
+/* LCD Block */
+#define  LCD_CTRL_REG				0x4
+#define  LCD_STAT_REG				0x8
+#define  LCD_RASTER_CTRL_REG			0x28
+#define  LCD_RASTER_TIMING_0_REG		0x2C
+#define  LCD_RASTER_TIMING_1_REG		0x30
+#define  LCD_RASTER_TIMING_2_REG		0x34
+#define  LCD_DMA_CTRL_REG			0x40
+#define  LCD_DMA_FRM_BUF_BASE_ADDR_0_REG	0x44
+#define  LCD_DMA_FRM_BUF_CEILING_ADDR_0_REG	0x48
+
+#define WSI_TIMEOUT	50
+#define PALETTE_SIZE	256
+#define LEFT_MARGIN	64
+#define RIGHT_MARGIN	64
+#define UPPER_MARGIN	32
+#define LOWER_MARGIN	32
+
+static resource_size_t da8xx_fb_reg_base;
+static struct resource *lcdc_regs;
+
+static inline unsigned int lcdc_read(unsigned int addr)
+{
+	return (unsigned int)__raw_readl(da8xx_fb_reg_base + (addr));
+}
+
+static inline void lcdc_write(unsigned int val, unsigned int addr)
+{
+	__raw_writel(val, da8xx_fb_reg_base + (addr));
+}
+
+struct da8xx_fb_par {
+	wait_queue_head_t da8xx_wq;
+	resource_size_t p_palette_base;
+	unsigned char *v_palette_base;
+	struct clk *lcdc_clk;
+	int irq;
+	unsigned short pseudo_palette[16];
+	unsigned int databuf_sz;
+	unsigned int palette_sz;
+};
+
+/* Variable Screen Information */
+static struct fb_var_screeninfo da8xx_fb_var __devinitdata = {
+	.xoffset = 0,
+	.yoffset = 0,
+	.transp = {0, 0, 0},
+	.nonstd = 0,
+	.activate = 0,
+	.height = -1,
+	.width = -1,
+	.pixclock = 46666,	/* 46us - AUO display */
+	.accel_flags = 0,
+	.left_margin = LEFT_MARGIN,
+	.right_margin = RIGHT_MARGIN,
+	.upper_margin = UPPER_MARGIN,
+	.lower_margin = LOWER_MARGIN,
+	.sync = 0,
+	.vmode = FB_VMODE_NONINTERLACED
+};
+
+static struct fb_fix_screeninfo da8xx_fb_fix __devinitdata = {
+	.id = "DA8xx FB Drv",
+	.type = FB_TYPE_PACKED_PIXELS,
+	.type_aux = 0,
+	.visual = FB_VISUAL_PSEUDOCOLOR,
+	.xpanstep = 1,
+	.ypanstep = 1,
+	.ywrapstep = 1,
+	.accel = FB_ACCEL_NONE
+};
+
+struct da8xx_panel {
+	const char	name[25];	/* Full name <vendor>_<model> */
+	unsigned short	width;
+	unsigned short	height;
+	int		hfp;		/* Horizontal front porch */
+	int		hbp;		/* Horizontal back porch */
+	int		hsw;		/* Horizontal Sync Pulse Width */
+	int		vfp;		/* Vertical front porch */
+	int		vbp;		/* Vertical back porch */
+	int		vsw;		/* Vertical Sync Pulse Width */
+	int		pxl_clk;	/* Pixel clock */
+};
+
+static struct da8xx_panel known_lcd_panels[] = {
+	/* Sharp LCD035Q3DG01 */
+	[0] = {
+		.name = "Sharp_LCD035Q3DG01",
+		.width = 320,
+		.height = 240,
+		.hfp = 8,
+		.hbp = 6,
+		.hsw = 0,
+		.vfp = 2,
+		.vbp = 2,
+		.vsw = 0,
+		.pxl_clk = 0x10,
+	},
+	/* Sharp LK043T1DG01 */
+	[1] = {
+		.name = "Sharp_LK043T1DG01",
+		.width = 480,
+		.height = 272,
+		.hfp = 2,
+		.hbp = 2,
+		.hsw = 41,
+		.vfp = 2,
+		.vbp = 2,
+		.vsw = 10,
+		.pxl_clk = 0x12,
+	},
+};
+
+/* Disable the Raster Engine of the LCD Controller */
+static int lcd_disable_raster(struct da8xx_fb_par *par)
+{
+	int ret = 0;
+	u32 reg;
+
+	reg = lcdc_read(LCD_RASTER_CTRL_REG);
+	if (reg & LCD_RASTER_ENABLE) {
+		lcdc_write(reg & ~LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
+		ret = wait_event_interruptible_timeout(par->da8xx_wq,
+						!lcdc_read(LCD_STAT_REG) &
+						LCD_END_OF_FRAME0, WSI_TIMEOUT);
+	}
+
+	if (ret < 0)
+		return ret;
+	if (ret == 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static void lcd_blit(int load_mode, struct da8xx_fb_par *par)
+{
+	u32 tmp = par->p_palette_base + par->databuf_sz - 4;
+	u32 reg;
+
+	/* Update the databuf in the hw. */
+	lcdc_write(par->p_palette_base, LCD_DMA_FRM_BUF_BASE_ADDR_0_REG);
+	lcdc_write(tmp, LCD_DMA_FRM_BUF_CEILING_ADDR_0_REG);
+
+	/* Start the DMA. */
+	reg = lcdc_read(LCD_RASTER_CTRL_REG);
+	reg &= ~(3 << 20);
+	if (load_mode == LOAD_DATA)
+		reg |= LCD_PALETTE_LOAD_MODE(PALETTE_AND_DATA);
+	else if (load_mode == LOAD_PALETTE)
+		reg |= LCD_PALETTE_LOAD_MODE(PALETTE_ONLY);
+
+	lcdc_write(reg, LCD_RASTER_CTRL_REG);
+}
+
+/* Configure the Burst Size of DMA */
+static int lcd_cfg_dma(int burst_size)
+{
+	u32 reg;
+
+	reg = lcdc_read(LCD_DMA_CTRL_REG) & 0x00000001;
+	switch (burst_size) {
+	case 1:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_1);
+		break;
+	case 2:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_2);
+		break;
+	case 4:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_4);
+		break;
+	case 8:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_8);
+		break;
+	case 16:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_16);
+		break;
+	default:
+		return -EINVAL;
+	}
+	lcdc_write(reg | LCD_END_OF_FRAME_INT_ENA, LCD_DMA_CTRL_REG);
+
+	return 0;
+}
+
+static void lcd_cfg_ac_bias(int period, int transitions_per_int)
+{
+	u32 reg;
+
+	/* Set the AC Bias Period and Number of Transisitons per Interrupt */
+	reg = lcdc_read(LCD_RASTER_TIMING_2_REG) & 0xFFF00000;
+	reg |= LCD_AC_BIAS_FREQUENCY(period) |
+		LCD_AC_BIAS_TRANSITIONS_PER_INT(transitions_per_int);
+	lcdc_write(reg, LCD_RASTER_TIMING_2_REG);
+}
+
+static void lcd_cfg_horizontal_sync(int back_porch, int pulse_width,
+		int front_porch)
+{
+	u32 reg;
+
+	reg = lcdc_read(LCD_RASTER_TIMING_0_REG) & 0xf;
+	reg |= ((back_porch & 0xff) << 24)
+	    | ((front_porch & 0xff) << 16)
+	    | ((pulse_width & 0x3f) << 10);
+	lcdc_write(reg, LCD_RASTER_TIMING_0_REG);
+}
+
+static void lcd_cfg_vertical_sync(int back_porch, int pulse_width,
+		int front_porch)
+{
+	u32 reg;
+
+	reg = lcdc_read(LCD_RASTER_TIMING_1_REG) & 0x3ff;
+	reg |= ((back_porch & 0xff) << 24)
+	    | ((front_porch & 0xff) << 16)
+	    | ((pulse_width & 0x3f) << 10);
+	lcdc_write(reg, LCD_RASTER_TIMING_1_REG);
+}
+
+static int lcd_cfg_display(const struct lcd_ctrl_config *cfg)
+{
+	u32 reg;
+
+	reg = lcdc_read(LCD_RASTER_CTRL_REG) & ~(LCD_TFT_MODE |
+						LCD_MONO_8BIT_MODE |
+						LCD_MONOCHROME_MODE);
+
+	switch (cfg->p_disp_panel->panel_shade) {
+	case MONOCHROME:
+		reg |= LCD_MONOCHROME_MODE;
+		if (cfg->mono_8bit_mode)
+			reg |= LCD_MONO_8BIT_MODE;
+		break;
+	case COLOR_ACTIVE:
+		reg |= LCD_TFT_MODE;
+		if (cfg->tft_alt_mode)
+			reg |= LCD_TFT_ALT_ENABLE;
+		break;
+
+	case COLOR_PASSIVE:
+		if (cfg->stn_565_mode)
+			reg |= LCD_STN_565_ENABLE;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* enable additional interrupts here */
+	reg |= LCD_UNDERFLOW_INT_ENA;
+
+	lcdc_write(reg, LCD_RASTER_CTRL_REG);
+
+	reg = lcdc_read(LCD_RASTER_TIMING_2_REG);
+
+	if (cfg->sync_ctrl)
+		reg |= LCD_SYNC_CTRL;
+	else
+		reg &= ~LCD_SYNC_CTRL;
+
+	if (cfg->sync_edge)
+		reg |= LCD_SYNC_EDGE;
+	else
+		reg &= ~LCD_SYNC_EDGE;
+
+	if (cfg->invert_pxl_clock)
+		reg |= LCD_INVERT_PIXEL_CLOCK;
+	else
+		reg &= ~LCD_INVERT_PIXEL_CLOCK;
+
+	if (cfg->invert_line_clock)
+		reg |= LCD_INVERT_LINE_CLOCK;
+	else
+		reg &= ~LCD_INVERT_LINE_CLOCK;
+
+	if (cfg->invert_frm_clock)
+		reg |= LCD_INVERT_FRAME_CLOCK;
+	else
+		reg &= ~LCD_INVERT_FRAME_CLOCK;
+
+	lcdc_write(reg, LCD_RASTER_TIMING_2_REG);
+
+	return 0;
+}
+
+static int lcd_cfg_frame_buffer(struct da8xx_fb_par *par, u32 width, u32 height,
+		u32 bpp, u32 raster_order)
+{
+	u32 bpl, reg;
+
+	/* Disable Dual Frame Buffer. */
+	reg = lcdc_read(LCD_DMA_CTRL_REG);
+	lcdc_write(reg & ~LCD_DUAL_FRAME_BUFFER_ENABLE,
+						LCD_DMA_CTRL_REG);
+	/* Set the Panel Width */
+	/* Pixels per line = (PPL + 1)*16 */
+	/*0x3F in bits 4..9 gives max horisontal resolution = 1024 pixels*/
+	width &= 0x3f0;
+	reg = lcdc_read(LCD_RASTER_TIMING_0_REG);
+	reg &= 0xfffffc00;
+	reg |= ((width >> 4) - 1) << 4;
+	lcdc_write(reg, LCD_RASTER_TIMING_0_REG);
+
+	/* Set the Panel Height */
+	reg = lcdc_read(LCD_RASTER_TIMING_1_REG);
+	reg = ((height - 1) & 0x3ff) | (reg & 0xfffffc00);
+	lcdc_write(reg, LCD_RASTER_TIMING_1_REG);
+
+	/* Set the Raster Order of the Frame Buffer */
+	reg = lcdc_read(LCD_RASTER_CTRL_REG) & ~(1 << 8);
+	if (raster_order)
+		reg |= LCD_RASTER_ORDER;
+	lcdc_write(reg, LCD_RASTER_CTRL_REG);
+
+	switch (bpp) {
+	case 1:
+	case 2:
+	case 4:
+	case 16:
+		par->palette_sz = 16 * 2;
+		break;
+
+	case 8:
+		par->palette_sz = 256 * 2;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	bpl = width * bpp / 8;
+	par->databuf_sz = height * bpl + par->palette_sz;
+
+	return 0;
+}
+
+static int fb_setcolreg(unsigned regno, unsigned red, unsigned green,
+			      unsigned blue, unsigned transp,
+			      struct fb_info *info)
+{
+	struct da8xx_fb_par *par = info->par;
+	unsigned short *palette = (unsigned short *)par->v_palette_base;
+	u_short pal;
+
+	if (regno > 255)
+		return 1;
+
+	if (info->fix.visual == FB_VISUAL_DIRECTCOLOR)
+		return 1;
+
+	if (info->var.bits_per_pixel == 8) {
+		red >>= 4;
+		green >>= 8;
+		blue >>= 12;
+
+		pal = (red & 0x0f00);
+		pal |= (green & 0x00f0);
+		pal |= (blue & 0x000f);
+
+		palette[regno] = pal;
+
+	} else if ((info->var.bits_per_pixel == 16) && regno < 16) {
+		red >>= (16 - info->var.red.length);
+		red <<= info->var.red.offset;
+
+		green >>= (16 - info->var.green.length);
+		green <<= info->var.green.offset;
+
+		blue >>= (16 - info->var.blue.length);
+		blue <<= info->var.blue.offset;
+
+		par->pseudo_palette[regno] = red | green | blue;
+
+		palette[0] = 0x4000;
+	}
+
+	return 0;
+}
+
+static int lcd_reset(struct da8xx_fb_par *par)
+{
+	int ret = 0;
+
+	/* Disable the Raster if previously Enabled */
+	if (lcdc_read(LCD_RASTER_CTRL_REG) & LCD_RASTER_ENABLE)
+		ret = lcd_disable_raster(par);
+
+	/* DMA has to be disabled */
+	lcdc_write(0, LCD_DMA_CTRL_REG);
+	lcdc_write(0, LCD_RASTER_CTRL_REG);
+
+	return ret;
+}
+
+static int lcd_init(struct da8xx_fb_par *par, const struct lcd_ctrl_config *cfg,
+		struct da8xx_panel *panel)
+{
+	u32 bpp;
+	int ret = 0;
+
+	ret = lcd_reset(par);
+	if (ret != 0)
+		return ret;
+
+	/* Configure the LCD clock divisor. */
+	lcdc_write(LCD_CLK_DIVISOR(panel->pxl_clk) |
+			(LCD_RASTER_MODE & 0x1), LCD_CTRL_REG);
+
+	/* Configure the DMA burst size. */
+	ret = lcd_cfg_dma(cfg->dma_burst_sz);
+	if (ret < 0)
+		return ret;
+
+	/* Configure the AC bias properties. */
+	lcd_cfg_ac_bias(cfg->ac_bias, cfg->ac_bias_intrpt);
+
+	/* Configure the vertical and horizontal sync properties. */
+	lcd_cfg_vertical_sync(panel->vbp, panel->vsw, panel->vfp);
+	lcd_cfg_horizontal_sync(panel->hbp, panel->hsw, panel->hfp);
+
+	/* Configure for disply */
+	ret = lcd_cfg_display(cfg);
+	if (ret < 0)
+		return ret;
+
+	if (QVGA != cfg->p_disp_panel->panel_type)
+		return -EINVAL;
+
+	if (cfg->bpp <= cfg->p_disp_panel->max_bpp &&
+	    cfg->bpp >= cfg->p_disp_panel->min_bpp)
+		bpp = cfg->bpp;
+	else
+		bpp = cfg->p_disp_panel->max_bpp;
+	if (bpp == 12)
+		bpp = 16;
+	ret = lcd_cfg_frame_buffer(par, (unsigned int)panel->width,
+				(unsigned int)panel->height, bpp,
+				cfg->raster_order);
+	if (ret < 0)
+		return ret;
+
+	/* Configure FDD */
+	lcdc_write((lcdc_read(LCD_RASTER_CTRL_REG) & 0xfff00fff) |
+		       (cfg->fdd << 12), LCD_RASTER_CTRL_REG);
+
+	return 0;
+}
+
+static irqreturn_t lcdc_irq_handler(int irq, void *arg)
+{
+	u32 stat = lcdc_read(LCD_STAT_REG);
+	struct da8xx_fb_par *par = arg;
+	u32 reg;
+
+	if ((stat & LCD_SYNC_LOST) && (stat & LCD_FIFO_UNDERFLOW)) {
+		reg = lcdc_read(LCD_RASTER_CTRL_REG);
+		lcdc_write(reg & ~LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
+		lcdc_write(stat, LCD_STAT_REG);
+		lcdc_write(reg | LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
+	} else
+		lcdc_write(stat, LCD_STAT_REG);
+
+	wake_up_interruptible(&par->da8xx_wq);
+	return IRQ_HANDLED;
+}
+
+static int fb_check_var(struct fb_var_screeninfo *var,
+			struct fb_info *info)
+{
+	int err = 0;
+
+	switch (var->bits_per_pixel) {
+	case 1:
+	case 8:
+		var->red.offset = 0;
+		var->red.length = 8;
+		var->green.offset = 0;
+		var->green.length = 8;
+		var->blue.offset = 0;
+		var->blue.length = 8;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	case 4:
+		var->red.offset = 0;
+		var->red.length = 4;
+		var->green.offset = 0;
+		var->green.length = 4;
+		var->blue.offset = 0;
+		var->blue.length = 4;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	case 16:		/* RGB 565 */
+		var->red.offset = 0;
+		var->red.length = 5;
+		var->green.offset = 5;
+		var->green.length = 6;
+		var->blue.offset = 11;
+		var->blue.length = 5;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	default:
+		err = -EINVAL;
+	}
+
+	var->red.msb_right = 0;
+	var->green.msb_right = 0;
+	var->blue.msb_right = 0;
+	var->transp.msb_right = 0;
+	return err;
+}
+
+static int __devexit fb_remove(struct platform_device *dev)
+{
+	struct fb_info *info = dev_get_drvdata(&dev->dev);
+	int ret = 0;
+
+	if (info) {
+		struct da8xx_fb_par *par = info->par;
+
+		if (lcdc_read(LCD_RASTER_CTRL_REG) & LCD_RASTER_ENABLE)
+			ret = lcd_disable_raster(par);
+		lcdc_write(0, LCD_RASTER_CTRL_REG);
+
+		/* disable DMA  */
+		lcdc_write(0, LCD_DMA_CTRL_REG);
+
+		unregister_framebuffer(info);
+		fb_dealloc_cmap(&info->cmap);
+		dma_free_coherent(NULL, par->databuf_sz + PAGE_SIZE,
+					info->screen_base,
+					info->fix.smem_start);
+		free_irq(par->irq, par);
+		clk_disable(par->lcdc_clk);
+		clk_put(par->lcdc_clk);
+		framebuffer_release(info);
+		iounmap((void __iomem *)da8xx_fb_reg_base);
+		release_mem_region(lcdc_regs->start, resource_size(lcdc_regs));
+
+	}
+	return ret;
+}
+
+static int fb_ioctl(struct fb_info *info, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct lcd_sync_arg sync_arg;
+
+	switch (cmd) {
+	case FBIOGET_CONTRAST:
+	case FBIOPUT_CONTRAST:
+	case FBIGET_BRIGHTNESS:
+	case FBIPUT_BRIGHTNESS:
+	case FBIGET_COLOR:
+	case FBIPUT_COLOR:
+		return -EINVAL;
+	case FBIPUT_HSYNC:
+		if (copy_from_user(&sync_arg, (char *)arg,
+				sizeof(struct lcd_sync_arg)))
+			return -EINVAL;
+		lcd_cfg_horizontal_sync(sync_arg.back_porch,
+					sync_arg.pulse_width,
+					sync_arg.front_porch);
+		break;
+	case FBIPUT_VSYNC:
+		if (copy_from_user(&sync_arg, (char *)arg,
+				sizeof(struct lcd_sync_arg)))
+			return -EINVAL;
+		lcd_cfg_vertical_sync(sync_arg.back_porch,
+					sync_arg.pulse_width,
+					sync_arg.front_porch);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct fb_ops da8xx_fb_ops = {
+	.owner = THIS_MODULE,
+	.fb_check_var = fb_check_var,
+	.fb_setcolreg = fb_setcolreg,
+	.fb_ioctl = fb_ioctl,
+	.fb_fillrect = cfb_fillrect,
+	.fb_copyarea = cfb_copyarea,
+	.fb_imageblit = cfb_imageblit,
+};
+
+static int __init fb_probe(struct platform_device *device)
+{
+	struct da8xx_lcdc_platform_data *fb_pdata =
+						device->dev.platform_data;
+	struct lcd_ctrl_config *lcd_cfg;
+	struct da8xx_panel *lcdc_info;
+	struct fb_info *da8xx_fb_info;
+	struct clk *fb_clk = NULL;
+	struct da8xx_fb_par *par;
+	resource_size_t len;
+	int ret, i;
+
+	if (fb_pdata == NULL) {
+		dev_err(&device->dev, "Can not get platform data\n");
+		return -ENOENT;
+	}
+
+	lcdc_regs = platform_get_resource(device, IORESOURCE_MEM, 0);
+	if (!lcdc_regs) {
+		dev_err(&device->dev,
+			"Can not get memory resource for LCD controller\n");
+		return -ENOENT;
+	}
+
+	len = resource_size(lcdc_regs);
+
+	lcdc_regs = request_mem_region(lcdc_regs->start, len, lcdc_regs->name);
+	if (!lcdc_regs)
+		return -EBUSY;
+
+	da8xx_fb_reg_base = (resource_size_t)ioremap(lcdc_regs->start, len);
+	if (!da8xx_fb_reg_base) {
+		ret = -EBUSY;
+		goto err_request_mem;
+	}
+
+	fb_clk = clk_get(&device->dev, NULL);
+	if (IS_ERR(fb_clk)) {
+		dev_err(&device->dev, "Can not get device clock\n");
+		ret = -ENODEV;
+		goto err_ioremap;
+	}
+	ret = clk_enable(fb_clk);
+	if (ret)
+		goto err_clk_put;
+
+	for (i = 0, lcdc_info = known_lcd_panels;
+		i < ARRAY_SIZE(known_lcd_panels);
+		i++, lcdc_info++) {
+		if (strcmp(fb_pdata->type, lcdc_info->name) == 0)
+			break;
+	}
+
+	if (i == ARRAY_SIZE(known_lcd_panels)) {
+		dev_err(&device->dev, "GLCD: No valid panel found\n");
+		ret = ENODEV;
+		goto err_clk_disable;
+	} else
+		dev_info(&device->dev, "GLCD: Found %s panel\n",
+					fb_pdata->type);
+
+	lcd_cfg = (struct lcd_ctrl_config *)fb_pdata->controller_data;
+
+	da8xx_fb_info = framebuffer_alloc(sizeof(struct da8xx_fb_par),
+					&device->dev);
+	if (!da8xx_fb_info) {
+		dev_dbg(&device->dev, "Memory allocation failed for fb_info\n");
+		ret = -ENOMEM;
+		goto err_clk_disable;
+	}
+
+	par = da8xx_fb_info->par;
+
+	if (lcd_init(par, lcd_cfg, lcdc_info) < 0) {
+		dev_err(&device->dev, "lcd_init failed\n");
+		ret = -EFAULT;
+		goto err_release_fb;
+	}
+
+	/* allocate frame buffer */
+	da8xx_fb_info->screen_base = dma_alloc_coherent(NULL,
+					par->databuf_sz + PAGE_SIZE,
+					(resource_size_t *)
+					&da8xx_fb_info->fix.smem_start,
+					GFP_KERNEL | GFP_DMA);
+
+	if (!da8xx_fb_info->screen_base) {
+		dev_err(&device->dev,
+			"GLCD: kmalloc for frame buffer failed\n");
+		ret = -EINVAL;
+		goto err_release_fb;
+	}
+
+	/* move palette base pointer by (PAGE_SIZE - palette_sz) bytes */
+	par->v_palette_base = da8xx_fb_info->screen_base +
+				(PAGE_SIZE - par->palette_sz);
+	par->p_palette_base = da8xx_fb_info->fix.smem_start +
+				(PAGE_SIZE - par->palette_sz);
+
+	/* the rest of the frame buffer is pixel data */
+	da8xx_fb_fix.smem_start = par->p_palette_base + par->palette_sz;
+	da8xx_fb_fix.smem_len = par->databuf_sz - par->palette_sz;
+	da8xx_fb_fix.line_length = (lcdc_info->width * lcd_cfg->bpp) / 8;
+
+	par->lcdc_clk = fb_clk;
+
+	init_waitqueue_head(&par->da8xx_wq);
+
+	par->irq = platform_get_irq(device, 0);
+	if (par->irq < 0) {
+		ret = -ENOENT;
+		goto err_release_fb_mem;
+	}
+
+	ret = request_irq(par->irq, lcdc_irq_handler, 0, DRIVER_NAME, par);
+	if (ret)
+		goto err_release_fb_mem;
+
+	/* Initialize par */
+	da8xx_fb_info->var.bits_per_pixel = lcd_cfg->bpp;
+
+	da8xx_fb_var.xres = lcdc_info->width;
+	da8xx_fb_var.xres_virtual = lcdc_info->width;
+
+	da8xx_fb_var.yres = lcdc_info->height;
+	da8xx_fb_var.yres_virtual = lcdc_info->height;
+
+	da8xx_fb_var.grayscale =
+	    lcd_cfg->p_disp_panel->panel_shade == MONOCHROME ? 1 : 0;
+	da8xx_fb_var.bits_per_pixel = lcd_cfg->bpp;
+
+	da8xx_fb_var.hsync_len = lcdc_info->hsw;
+	da8xx_fb_var.vsync_len = lcdc_info->vsw;
+
+	/* Initialize fbinfo */
+	da8xx_fb_info->flags = FBINFO_FLAG_DEFAULT;
+	da8xx_fb_info->fix = da8xx_fb_fix;
+	da8xx_fb_info->var = da8xx_fb_var;
+	da8xx_fb_info->fbops = &da8xx_fb_ops;
+	da8xx_fb_info->pseudo_palette = par->pseudo_palette;
+
+	ret = fb_alloc_cmap(&da8xx_fb_info->cmap, PALETTE_SIZE, 0);
+	if (ret)
+		goto err_free_irq;
+
+	/* First palette_sz byte of the frame buffer is the palette */
+	da8xx_fb_info->cmap.len = par->palette_sz;
+
+	/* Flush the buffer to the screen. */
+	lcd_blit(LOAD_DATA, par);
+
+	/* initialize var_screeninfo */
+	da8xx_fb_var.activate = FB_ACTIVATE_FORCE;
+	fb_set_var(da8xx_fb_info, &da8xx_fb_var);
+
+	dev_set_drvdata(&device->dev, da8xx_fb_info);
+	/* Register the Frame Buffer  */
+	if (register_framebuffer(da8xx_fb_info) < 0) {
+		dev_err(&device->dev,
+			"GLCD: Frame Buffer Registration Failed!\n");
+		ret = -EINVAL;
+		goto err_dealloc_cmap;
+	}
+
+	/* enable raster engine */
+	lcdc_write(lcdc_read(LCD_RASTER_CTRL_REG) |
+			LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
+
+	return 0;
+
+err_dealloc_cmap:
+	fb_dealloc_cmap(&da8xx_fb_info->cmap);
+
+err_free_irq:
+	free_irq(par->irq, par);
+
+err_release_fb_mem:
+	dma_free_coherent(NULL, par->databuf_sz + PAGE_SIZE,
+				da8xx_fb_info->screen_base,
+				da8xx_fb_info->fix.smem_start);
+
+err_release_fb:
+	framebuffer_release(da8xx_fb_info);
+
+err_clk_disable:
+	clk_disable(fb_clk);
+
+err_clk_put:
+	clk_put(fb_clk);
+
+err_ioremap:
+	iounmap((void __iomem *)da8xx_fb_reg_base);
+
+err_request_mem:
+	release_mem_region(lcdc_regs->start, len);
+
+	return ret;
+}
+
+#ifdef CONFIG_PM
+static int fb_suspend(struct platform_device *dev, pm_message_t state)
+{
+	 return -EBUSY;
+}
+static int fb_resume(struct platform_device *dev)
+{
+	 return -EBUSY;
+}
+#else
+#define fb_suspend NULL
+#define fb_resume NULL
+#endif
+
+static struct platform_driver da8xx_fb_driver = {
+	.probe = fb_probe,
+	.remove = fb_remove,
+	.suspend = fb_suspend,
+	.resume = fb_resume,
+	.driver = {
+		   .name = DRIVER_NAME,
+		   .owner = THIS_MODULE,
+		   },
+};
+
+static int __init da8xx_fb_init(void)
+{
+	return platform_driver_register(&da8xx_fb_driver);
+}
+
+static void __exit da8xx_fb_cleanup(void)
+{
+	platform_driver_unregister(&da8xx_fb_driver);
+}
+
+module_init(da8xx_fb_init);
+module_exit(da8xx_fb_cleanup);
+
+MODULE_DESCRIPTION("Framebuffer driver for TI da8xx/omap-l1xx");
+MODULE_AUTHOR("Texas Instruments");
+MODULE_LICENSE("GPL");
diff --git a/include/video/da8xx-fb.h b/include/video/da8xx-fb.h
new file mode 100644
index 000000000000..5f7767547fa0
--- /dev/null
+++ b/include/video/da8xx-fb.h
@@ -0,0 +1,106 @@
+/*
+ * Header file for TI DA8XX LCD controller platform data.
+ *
+ * Copyright (C) 2008-2009 MontaVista Software Inc.
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef DA8XX_FB_H
+#define DA8XX_FB_H
+
+enum panel_type {
+	QVGA = 0
+};
+
+enum panel_shade {
+	MONOCHROME = 0,
+	COLOR_ACTIVE,
+	COLOR_PASSIVE,
+};
+
+enum raster_load_mode {
+	LOAD_DATA = 1,
+	LOAD_PALETTE,
+};
+
+struct display_panel {
+	enum panel_type panel_type; /* QVGA */
+	int max_bpp;
+	int min_bpp;
+	enum panel_shade panel_shade;
+};
+
+struct da8xx_lcdc_platform_data {
+	const char manu_name[10];
+	void *controller_data;
+	const char type[25];
+};
+
+struct lcd_ctrl_config {
+	const struct display_panel *p_disp_panel;
+
+	/* AC Bias Pin Frequency */
+	int ac_bias;
+
+	/* AC Bias Pin Transitions per Interrupt */
+	int ac_bias_intrpt;
+
+	/* DMA burst size */
+	int dma_burst_sz;
+
+	/* Bits per pixel */
+	int bpp;
+
+	/* FIFO DMA Request Delay */
+	int fdd;
+
+	/* TFT Alternative Signal Mapping (Only for active) */
+	unsigned char tft_alt_mode;
+
+	/* 12 Bit Per Pixel (5-6-5) Mode (Only for passive) */
+	unsigned char stn_565_mode;
+
+	/* Mono 8-bit Mode: 1=D0-D7 or 0=D0-D3 */
+	unsigned char mono_8bit_mode;
+
+	/* Invert pixel clock */
+	unsigned char invert_pxl_clock;
+
+	/* Invert line clock */
+	unsigned char invert_line_clock;
+
+	/* Invert frame clock  */
+	unsigned char invert_frm_clock;
+
+	/* Horizontal and Vertical Sync Edge: 0=rising 1=falling */
+	unsigned char sync_edge;
+
+	/* Horizontal and Vertical Sync: Control: 0=ignore */
+	unsigned char sync_ctrl;
+
+	/* Raster Data Order Select: 1=Most-to-least 0=Least-to-most */
+	unsigned char raster_order;
+};
+
+struct lcd_sync_arg {
+	int back_porch;
+	int front_porch;
+	int pulse_width;
+};
+
+/* ioctls */
+#define FBIOGET_CONTRAST	_IOR('F', 1, int)
+#define FBIOPUT_CONTRAST	_IOW('F', 2, int)
+#define FBIGET_BRIGHTNESS	_IOR('F', 3, int)
+#define FBIPUT_BRIGHTNESS	_IOW('F', 3, int)
+#define FBIGET_COLOR		_IOR('F', 5, int)
+#define FBIPUT_COLOR		_IOW('F', 6, int)
+#define FBIPUT_HSYNC		_IOW('F', 9, int)
+#define FBIPUT_VSYNC		_IOW('F', 10, int)
+
+#endif  /* ifndef DA8XX_FB_H */
+
-- 
cgit v1.2.3


From 2f93e8f4822fdd48fa9c4c901eea87ab1c902f87 Mon Sep 17 00:00:00 2001
From: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Date: Tue, 22 Sep 2009 16:47:06 -0700
Subject: davinci-fb-frame-buffer-driver-for-ti-da8xx-omap-l1xx-v4

Since the previous version, return values in ioctl() function have been
modified.

[akpm@linux-foundation.org: simplify lcd_disable_raster()]
Signed-off-by: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Signed-off-by: Pavel Kiryukhin <pkiryukhin@ru.mvista.com>
Signed-off-by: Steve Chen <schen@mvista.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/da8xx-fb.c | 61 +++++++++++++++++-------------------------------
 include/video/da8xx-fb.h |  3 ---
 2 files changed, 21 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/video/da8xx-fb.c b/drivers/video/da8xx-fb.c
index e0bbc66499c8..42e1005e2916 100644
--- a/drivers/video/da8xx-fb.c
+++ b/drivers/video/da8xx-fb.c
@@ -107,7 +107,6 @@ static inline void lcdc_write(unsigned int val, unsigned int addr)
 }
 
 struct da8xx_fb_par {
-	wait_queue_head_t da8xx_wq;
 	resource_size_t p_palette_base;
 	unsigned char *v_palette_base;
 	struct clk *lcdc_clk;
@@ -158,6 +157,7 @@ struct da8xx_panel {
 	int		vbp;		/* Vertical back porch */
 	int		vsw;		/* Vertical Sync Pulse Width */
 	int		pxl_clk;	/* Pixel clock */
+	unsigned char	invert_pxl_clk;	/* Invert Pixel clock */
 };
 
 static struct da8xx_panel known_lcd_panels[] = {
@@ -173,6 +173,7 @@ static struct da8xx_panel known_lcd_panels[] = {
 		.vbp = 2,
 		.vsw = 0,
 		.pxl_clk = 0x10,
+		.invert_pxl_clk = 1,
 	},
 	/* Sharp LK043T1DG01 */
 	[1] = {
@@ -186,29 +187,18 @@ static struct da8xx_panel known_lcd_panels[] = {
 		.vbp = 2,
 		.vsw = 10,
 		.pxl_clk = 0x12,
+		.invert_pxl_clk = 0,
 	},
 };
 
 /* Disable the Raster Engine of the LCD Controller */
-static int lcd_disable_raster(struct da8xx_fb_par *par)
+static void lcd_disable_raster(struct da8xx_fb_par *par)
 {
-	int ret = 0;
 	u32 reg;
 
 	reg = lcdc_read(LCD_RASTER_CTRL_REG);
-	if (reg & LCD_RASTER_ENABLE) {
+	if (reg & LCD_RASTER_ENABLE)
 		lcdc_write(reg & ~LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
-		ret = wait_event_interruptible_timeout(par->da8xx_wq,
-						!lcdc_read(LCD_STAT_REG) &
-						LCD_END_OF_FRAME0, WSI_TIMEOUT);
-	}
-
-	if (ret < 0)
-		return ret;
-	if (ret == 0)
-		return -ETIMEDOUT;
-
-	return 0;
 }
 
 static void lcd_blit(int load_mode, struct da8xx_fb_par *par)
@@ -256,7 +246,7 @@ static int lcd_cfg_dma(int burst_size)
 	default:
 		return -EINVAL;
 	}
-	lcdc_write(reg | LCD_END_OF_FRAME_INT_ENA, LCD_DMA_CTRL_REG);
+	lcdc_write(reg, LCD_DMA_CTRL_REG);
 
 	return 0;
 }
@@ -342,11 +332,6 @@ static int lcd_cfg_display(const struct lcd_ctrl_config *cfg)
 	else
 		reg &= ~LCD_SYNC_EDGE;
 
-	if (cfg->invert_pxl_clock)
-		reg |= LCD_INVERT_PIXEL_CLOCK;
-	else
-		reg &= ~LCD_INVERT_PIXEL_CLOCK;
-
 	if (cfg->invert_line_clock)
 		reg |= LCD_INVERT_LINE_CLOCK;
 	else
@@ -456,19 +441,15 @@ static int fb_setcolreg(unsigned regno, unsigned red, unsigned green,
 	return 0;
 }
 
-static int lcd_reset(struct da8xx_fb_par *par)
+static void lcd_reset(struct da8xx_fb_par *par)
 {
-	int ret = 0;
-
 	/* Disable the Raster if previously Enabled */
 	if (lcdc_read(LCD_RASTER_CTRL_REG) & LCD_RASTER_ENABLE)
-		ret = lcd_disable_raster(par);
+		lcd_disable_raster(par);
 
 	/* DMA has to be disabled */
 	lcdc_write(0, LCD_DMA_CTRL_REG);
 	lcdc_write(0, LCD_RASTER_CTRL_REG);
-
-	return ret;
 }
 
 static int lcd_init(struct da8xx_fb_par *par, const struct lcd_ctrl_config *cfg,
@@ -477,14 +458,19 @@ static int lcd_init(struct da8xx_fb_par *par, const struct lcd_ctrl_config *cfg,
 	u32 bpp;
 	int ret = 0;
 
-	ret = lcd_reset(par);
-	if (ret != 0)
-		return ret;
+	lcd_reset(par);
 
 	/* Configure the LCD clock divisor. */
 	lcdc_write(LCD_CLK_DIVISOR(panel->pxl_clk) |
 			(LCD_RASTER_MODE & 0x1), LCD_CTRL_REG);
 
+	if (panel->invert_pxl_clk)
+		lcdc_write((lcdc_read(LCD_RASTER_TIMING_2_REG) |
+			LCD_INVERT_PIXEL_CLOCK), LCD_RASTER_TIMING_2_REG);
+	else
+		lcdc_write((lcdc_read(LCD_RASTER_TIMING_2_REG) &
+			~LCD_INVERT_PIXEL_CLOCK), LCD_RASTER_TIMING_2_REG);
+
 	/* Configure the DMA burst size. */
 	ret = lcd_cfg_dma(cfg->dma_burst_sz);
 	if (ret < 0)
@@ -528,7 +514,6 @@ static int lcd_init(struct da8xx_fb_par *par, const struct lcd_ctrl_config *cfg,
 static irqreturn_t lcdc_irq_handler(int irq, void *arg)
 {
 	u32 stat = lcdc_read(LCD_STAT_REG);
-	struct da8xx_fb_par *par = arg;
 	u32 reg;
 
 	if ((stat & LCD_SYNC_LOST) && (stat & LCD_FIFO_UNDERFLOW)) {
@@ -539,7 +524,6 @@ static irqreturn_t lcdc_irq_handler(int irq, void *arg)
 	} else
 		lcdc_write(stat, LCD_STAT_REG);
 
-	wake_up_interruptible(&par->da8xx_wq);
 	return IRQ_HANDLED;
 }
 
@@ -594,13 +578,12 @@ static int fb_check_var(struct fb_var_screeninfo *var,
 static int __devexit fb_remove(struct platform_device *dev)
 {
 	struct fb_info *info = dev_get_drvdata(&dev->dev);
-	int ret = 0;
 
 	if (info) {
 		struct da8xx_fb_par *par = info->par;
 
 		if (lcdc_read(LCD_RASTER_CTRL_REG) & LCD_RASTER_ENABLE)
-			ret = lcd_disable_raster(par);
+			lcd_disable_raster(par);
 		lcdc_write(0, LCD_RASTER_CTRL_REG);
 
 		/* disable DMA  */
@@ -619,7 +602,7 @@ static int __devexit fb_remove(struct platform_device *dev)
 		release_mem_region(lcdc_regs->start, resource_size(lcdc_regs));
 
 	}
-	return ret;
+	return 0;
 }
 
 static int fb_ioctl(struct fb_info *info, unsigned int cmd,
@@ -634,11 +617,11 @@ static int fb_ioctl(struct fb_info *info, unsigned int cmd,
 	case FBIPUT_BRIGHTNESS:
 	case FBIGET_COLOR:
 	case FBIPUT_COLOR:
-		return -EINVAL;
+		return -ENOTTY;
 	case FBIPUT_HSYNC:
 		if (copy_from_user(&sync_arg, (char *)arg,
 				sizeof(struct lcd_sync_arg)))
-			return -EINVAL;
+			return -EFAULT;
 		lcd_cfg_horizontal_sync(sync_arg.back_porch,
 					sync_arg.pulse_width,
 					sync_arg.front_porch);
@@ -646,7 +629,7 @@ static int fb_ioctl(struct fb_info *info, unsigned int cmd,
 	case FBIPUT_VSYNC:
 		if (copy_from_user(&sync_arg, (char *)arg,
 				sizeof(struct lcd_sync_arg)))
-			return -EINVAL;
+			return -EFAULT;
 		lcd_cfg_vertical_sync(sync_arg.back_porch,
 					sync_arg.pulse_width,
 					sync_arg.front_porch);
@@ -773,8 +756,6 @@ static int __init fb_probe(struct platform_device *device)
 
 	par->lcdc_clk = fb_clk;
 
-	init_waitqueue_head(&par->da8xx_wq);
-
 	par->irq = platform_get_irq(device, 0);
 	if (par->irq < 0) {
 		ret = -ENOENT;
diff --git a/include/video/da8xx-fb.h b/include/video/da8xx-fb.h
index 5f7767547fa0..c051a50ed528 100644
--- a/include/video/da8xx-fb.h
+++ b/include/video/da8xx-fb.h
@@ -67,9 +67,6 @@ struct lcd_ctrl_config {
 	/* Mono 8-bit Mode: 1=D0-D7 or 0=D0-D3 */
 	unsigned char mono_8bit_mode;
 
-	/* Invert pixel clock */
-	unsigned char invert_pxl_clock;
-
 	/* Invert line clock */
 	unsigned char invert_line_clock;
 
-- 
cgit v1.2.3


From 4fefce9abaeef0d6ec45e06a882db23a65135272 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Tue, 22 Sep 2009 16:48:02 -0700
Subject: jbd.h: bitfields should be unsigned

bitfields should be unsigned.

This fixes sparse noise:
  error: dubious one-bit signed bitfield

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Jan Kara <jack@ucw.cz>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/jbd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index a1187a0c99b4..331530cd3cc6 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -556,7 +556,7 @@ struct transaction_s
 	 * This transaction is being forced and some process is
 	 * waiting for it to finish.
 	 */
-	int t_synchronous_commit:1;
+	unsigned int t_synchronous_commit:1;
 };
 
 /**
-- 
cgit v1.2.3


From 4fd8da8d62416d0dae05603ab5990a498d9aeb12 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 23 Sep 2009 17:49:55 +0200
Subject: fs: change sys_truncate length parameter type

For this system call user space passes a signed long length parameter,
while the kernel side takes an unsigned long parameter and converts it
later to signed long again.

This has led to bugs in compat wrappers see e.g.  dd90bbd5 "powerpc: Add
compat_sys_truncate".  The s390 compat wrapper for this functions is
broken as well since it also performs zero extension instead of sign
extension for the length parameter.

In addition if hpa comes up with an automated way of generating
compat wrappers it would generate a wrong one here.

So change the length parameter from unsigned long to long.

Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/open.c                | 5 ++---
 include/linux/syscalls.h | 3 +--
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/open.c b/fs/open.c
index 31191bf513e4..4f01e06227c6 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -290,10 +290,9 @@ out:
 	return error;
 }
 
-SYSCALL_DEFINE2(truncate, const char __user *, path, unsigned long, length)
+SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
 {
-	/* on 32-bit boxen it will cut the range 2^31--2^32-1 off */
-	return do_sys_truncate(path, (long)length);
+	return do_sys_truncate(path, length);
 }
 
 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 8d8285a10db9..a990ace1a838 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -460,8 +460,7 @@ asmlinkage long sys_mount(char __user *dev_name, char __user *dir_name,
 				void __user *data);
 asmlinkage long sys_umount(char __user *name, int flags);
 asmlinkage long sys_oldumount(char __user *name);
-asmlinkage long sys_truncate(const char __user *path,
-				unsigned long length);
+asmlinkage long sys_truncate(const char __user *path, long length);
 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
 asmlinkage long sys_stat(char __user *filename,
 			struct __old_kernel_stat __user *statbuf);
-- 
cgit v1.2.3


From fc2219d49ef1606e7fd2c88af2b423b01ff3d319 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 23 Sep 2009 09:50:41 -0700
Subject: rcu: Clean up code based on review feedback from Josh Triplett

These issues identified during an old-fashioned face-to-face code
review extended over many hours.

o	Bury various forms of the "rsp->completed == rsp->gpnum"
	comparison into an rcu_gp_in_progress() function, which has
	the beneficial side-effect of forcing consistent use of
	ACCESS_ONCE().

o	Replace hand-coded arithmetic with DIV_ROUND_UP().

o	Bury several "!list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x01])"
	instances into an rcu_preempted_readers() function, as this
	expression indicates that there are no readers blocked
	within RCU read-side critical sections blocking the current
	grace period.  (Though there might well be similar readers
	blocking the next grace period.)

o	Remove a dangling rcu_restart_cpu() declaration that has
	been dangling for almost 20 minor releases of the kernel.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <12537246442687-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutree.h |  1 -
 kernel/rcutree.c        | 29 +++++++++++++++++-----------
 kernel/rcutree.h        |  6 +++---
 kernel/rcutree_plugin.h | 50 ++++++++++++++++++++++++-------------------------
 4 files changed, 46 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 37682770e9d2..88109c87f29c 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -85,7 +85,6 @@ static inline void synchronize_rcu_bh_expedited(void)
 
 extern void __rcu_init(void);
 extern void rcu_check_callbacks(int cpu, int user);
-extern void rcu_restart_cpu(int cpu);
 
 extern long rcu_batches_completed(void);
 extern long rcu_batches_completed_bh(void);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 52b06f6e158c..f85b6842d1e1 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -100,6 +100,16 @@ static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp,
 
 #include "rcutree_plugin.h"
 
+/*
+ * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
+ * permit this function to be invoked without holding the root rcu_node
+ * structure's ->lock, but of course results can be subject to change.
+ */
+static int rcu_gp_in_progress(struct rcu_state *rsp)
+{
+	return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum);
+}
+
 /*
  * Note a quiescent state.  Because we do not need to know
  * how many quiescent states passed, just if there was at least
@@ -173,9 +183,7 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
 static int
 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
 {
-	/* ACCESS_ONCE() because we are accessing outside of lock. */
-	return *rdp->nxttail[RCU_DONE_TAIL] &&
-	       ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum);
+	return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp);
 }
 
 /*
@@ -482,7 +490,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
 
 	spin_lock_irqsave(&rnp->lock, flags);
 	delta = jiffies - rsp->jiffies_stall;
-	if (delta < RCU_STALL_RAT_DELAY || rsp->gpnum == rsp->completed) {
+	if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
 		spin_unlock_irqrestore(&rnp->lock, flags);
 		return;
 	}
@@ -537,8 +545,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
 		/* We haven't checked in, so go dump stack. */
 		print_cpu_stall(rsp);
 
-	} else if (rsp->gpnum != rsp->completed &&
-		   delta >= RCU_STALL_RAT_DELAY) {
+	} else if (rcu_gp_in_progress(rsp) && delta >= RCU_STALL_RAT_DELAY) {
 
 		/* They had two time units to dump stack, so complain. */
 		print_other_cpu_stall(rsp);
@@ -703,9 +710,9 @@ rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
  * hold rnp->lock, as required by rcu_start_gp(), which will release it.
  */
 static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags)
-	__releases(rnp->lock)
+	__releases(rcu_get_root(rsp)->lock)
 {
-	WARN_ON_ONCE(rsp->completed == rsp->gpnum);
+	WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
 	rsp->completed = rsp->gpnum;
 	rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
 	rcu_start_gp(rsp, flags);  /* releases root node's rnp->lock. */
@@ -1092,7 +1099,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
 	struct rcu_node *rnp = rcu_get_root(rsp);
 	u8 signaled;
 
-	if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum))
+	if (!rcu_gp_in_progress(rsp))
 		return;  /* No grace period in progress, nothing to force. */
 	if (!spin_trylock_irqsave(&rsp->fqslock, flags)) {
 		rsp->n_force_qs_lh++; /* Inexact, can lose counts.  Tough! */
@@ -1251,7 +1258,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 	rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
 
 	/* Start a new grace period if one not already started. */
-	if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum)) {
+	if (!rcu_gp_in_progress(rsp)) {
 		unsigned long nestflag;
 		struct rcu_node *rnp_root = rcu_get_root(rsp);
 
@@ -1331,7 +1338,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
 	}
 
 	/* Has an RCU GP gone long enough to send resched IPIs &c? */
-	if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
+	if (rcu_gp_in_progress(rsp) &&
 	    ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) {
 		rdp->n_rp_need_fqs++;
 		return 1;
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 8e8287a983c2..9aa8c8a160d8 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -48,14 +48,14 @@
 #elif NR_CPUS <= RCU_FANOUT_SQ
 #  define NUM_RCU_LVLS	      2
 #  define NUM_RCU_LVL_0	      1
-#  define NUM_RCU_LVL_1	      (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT)
+#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
 #  define NUM_RCU_LVL_2	      (NR_CPUS)
 #  define NUM_RCU_LVL_3	      0
 #elif NR_CPUS <= RCU_FANOUT_CUBE
 #  define NUM_RCU_LVLS	      3
 #  define NUM_RCU_LVL_0	      1
-#  define NUM_RCU_LVL_1	      (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ)
-#  define NUM_RCU_LVL_2	      (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT))
+#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
+#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
 #  define NUM_RCU_LVL_3	      NR_CPUS
 #else
 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 1cee04f627eb..8ff1ba7b3c43 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -150,6 +150,16 @@ void __rcu_read_lock(void)
 }
 EXPORT_SYMBOL_GPL(__rcu_read_lock);
 
+/*
+ * Check for preempted RCU readers blocking the current grace period
+ * for the specified rcu_node structure.  If the caller needs a reliable
+ * answer, it must hold the rcu_node's ->lock.
+ */
+static int rcu_preempted_readers(struct rcu_node *rnp)
+{
+	return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
+}
+
 static void rcu_read_unlock_special(struct task_struct *t)
 {
 	int empty;
@@ -196,7 +206,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
 				break;
 			spin_unlock(&rnp->lock);  /* irqs remain disabled. */
 		}
-		empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
+		empty = !rcu_preempted_readers(rnp);
 		list_del_init(&t->rcu_node_entry);
 		t->rcu_blocked_node = NULL;
 
@@ -207,7 +217,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		 * drop rnp->lock and restore irq.
 		 */
 		if (!empty && rnp->qsmask == 0 &&
-		    list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
+		    !rcu_preempted_readers(rnp)) {
 			struct rcu_node *rnp_p;
 
 			if (rnp->parent == NULL) {
@@ -257,12 +267,12 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
 {
 	unsigned long flags;
 	struct list_head *lp;
-	int phase = rnp->gpnum & 0x1;
+	int phase;
 	struct task_struct *t;
 
-	if (!list_empty(&rnp->blocked_tasks[phase])) {
+	if (rcu_preempted_readers(rnp)) {
 		spin_lock_irqsave(&rnp->lock, flags);
-		phase = rnp->gpnum & 0x1; /* re-read under lock. */
+		phase = rnp->gpnum & 0x1;
 		lp = &rnp->blocked_tasks[phase];
 		list_for_each_entry(t, lp, rcu_node_entry)
 			printk(" P%d", t->pid);
@@ -281,20 +291,10 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
  */
 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 {
-	WARN_ON_ONCE(!list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]));
+	WARN_ON_ONCE(rcu_preempted_readers(rnp));
 	WARN_ON_ONCE(rnp->qsmask);
 }
 
-/*
- * Check for preempted RCU readers for the specified rcu_node structure.
- * If the caller needs a reliable answer, it must hold the rcu_node's
- * >lock.
- */
-static int rcu_preempted_readers(struct rcu_node *rnp)
-{
-	return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 
 /*
@@ -461,6 +461,15 @@ static void rcu_preempt_note_context_switch(int cpu)
 {
 }
 
+/*
+ * Because preemptable RCU does not exist, there are never any preempted
+ * RCU readers.
+ */
+static int rcu_preempted_readers(struct rcu_node *rnp)
+{
+	return 0;
+}
+
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 
 /*
@@ -483,15 +492,6 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 	WARN_ON_ONCE(rnp->qsmask);
 }
 
-/*
- * Because preemptable RCU does not exist, there are never any preempted
- * RCU readers.
- */
-static int rcu_preempted_readers(struct rcu_node *rnp)
-{
-	return 0;
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 
 /*
-- 
cgit v1.2.3


From 1eba8f84380bede3c602bd7758dea96925cead01 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 23 Sep 2009 09:50:42 -0700
Subject: rcu: Clean up code based on review feedback from Josh Triplett, part
 2

These issues identified during an old-fashioned face-to-face code
review extending over many hours.

o	Add comments for tricky parts of code, and correct comments
	that have passed their sell-by date.

o	Get rid of the vestiges of rcu_init_sched(), which is no
	longer needed now that PREEMPT_RCU is gone.

o	Move the #include of rcutree_plugin.h to the end of
	rcutree.c, which means that, rather than having a random
	collection of forward declarations, the new set of forward
	declarations document the set of plugins.  The new home for
	this #include also allows __rcu_init_preempt() to move into
	rcutree_plugin.h.

o	Fix rcu_preempt_check_callbacks() to be static.

Suggested-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <12537246443924-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Peter Zijlstra <peterz@infradead.org>
---
 include/linux/rcupdate.h |  4 +++
 include/linux/rcutree.h  |  6 +---
 init/main.c              |  1 -
 kernel/rcutree.c         | 72 ++++++++++++++++++++++--------------------------
 kernel/rcutree.h         | 31 +++++++++++++++------
 kernel/rcutree_plugin.h  | 23 ++++++++++++++--
 6 files changed, 82 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6fe0363724e9..70331218e4b4 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -196,6 +196,8 @@ static inline void rcu_read_lock_sched(void)
 	__acquire(RCU_SCHED);
 	rcu_read_acquire();
 }
+
+/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
 static inline notrace void rcu_read_lock_sched_notrace(void)
 {
 	preempt_disable_notrace();
@@ -213,6 +215,8 @@ static inline void rcu_read_unlock_sched(void)
 	__release(RCU_SCHED);
 	preempt_enable();
 }
+
+/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
 static inline notrace void rcu_read_unlock_sched_notrace(void)
 {
 	__release(RCU_SCHED);
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 88109c87f29c..19a3b06943e0 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -90,10 +90,6 @@ extern long rcu_batches_completed(void);
 extern long rcu_batches_completed_bh(void);
 extern long rcu_batches_completed_sched(void);
 
-static inline void rcu_init_sched(void)
-{
-}
-
 #ifdef CONFIG_NO_HZ
 void rcu_enter_nohz(void);
 void rcu_exit_nohz(void);
@@ -106,7 +102,7 @@ static inline void rcu_exit_nohz(void)
 }
 #endif /* CONFIG_NO_HZ */
 
-/* A context switch is a grace period for rcutree. */
+/* A context switch is a grace period for RCU-sched and RCU-bh. */
 static inline int rcu_blocking_is_gp(void)
 {
 	return num_online_cpus() == 1;
diff --git a/init/main.c b/init/main.c
index 34971becbd3c..833d675677d1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -782,7 +782,6 @@ static void __init do_initcalls(void)
  */
 static void __init do_basic_setup(void)
 {
-	rcu_init_sched(); /* needed by module_init stage. */
 	init_workqueues();
 	cpuset_init_smp();
 	usermodehelper_init();
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index f85b6842d1e1..53a5ef0ca911 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -81,24 +81,29 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
-extern long rcu_batches_completed_sched(void);
-static struct rcu_node *rcu_get_root(struct rcu_state *rsp);
-static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp,
-			  struct rcu_node *rnp, unsigned long flags);
-static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags);
+/* Forward declarations for rcutree_plugin.h */
+static inline void rcu_bootup_announce(void);
+long rcu_batches_completed(void);
+static void rcu_preempt_note_context_switch(int cpu);
+static int rcu_preempted_readers(struct rcu_node *rnp);
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+static void rcu_print_task_stall(struct rcu_node *rnp);
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
-static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp);
+static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
+				      struct rcu_node *rnp,
+				      struct rcu_data *rdp);
+static void rcu_preempt_offline_cpu(int cpu);
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
-static void __rcu_process_callbacks(struct rcu_state *rsp,
-				    struct rcu_data *rdp);
-static void __call_rcu(struct rcu_head *head,
-		       void (*func)(struct rcu_head *rcu),
-		       struct rcu_state *rsp);
-static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp);
-static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp,
-					   int preemptable);
+static void rcu_preempt_check_callbacks(int cpu);
+static void rcu_preempt_process_callbacks(void);
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+static int rcu_preempt_pending(int cpu);
+static int rcu_preempt_needs_cpu(int cpu);
+static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
+static void __init __rcu_init_preempt(void);
 
-#include "rcutree_plugin.h"
 
 /*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
@@ -377,7 +382,7 @@ static long dyntick_recall_completed(struct rcu_state *rsp)
 /*
  * Snapshot the specified CPU's dynticks counter so that we can later
  * credit them with an implicit quiescent state.  Return 1 if this CPU
- * is already in a quiescent state courtesy of dynticks idle mode.
+ * is in dynticks idle mode, which is an extended quiescent state.
  */
 static int dyntick_save_progress_counter(struct rcu_data *rdp)
 {
@@ -624,9 +629,15 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
 	note_new_gpnum(rsp, rdp);
 
 	/*
-	 * Because we are first, we know that all our callbacks will
-	 * be covered by this upcoming grace period, even the ones
-	 * that were registered arbitrarily recently.
+	 * Because this CPU just now started the new grace period, we know
+	 * that all of its callbacks will be covered by this upcoming grace
+	 * period, even the ones that were registered arbitrarily recently.
+	 * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL.
+	 *
+	 * Other CPUs cannot be sure exactly when the grace period started.
+	 * Therefore, their recently registered callbacks must pass through
+	 * an additional RCU_NEXT_READY stage, so that they will be handled
+	 * by the next RCU grace period.
 	 */
 	rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
 	rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
@@ -886,7 +897,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 
 	/*
 	 * Move callbacks from the outgoing CPU to the running CPU.
-	 * Note that the outgoing CPU is now quiscent, so it is now
+	 * Note that the outgoing CPU is now quiescent, so it is now
 	 * (uncharacteristically) safe to access its rcu_data structure.
 	 * Note also that we must carefully retain the order of the
 	 * outgoing CPU's callbacks in order for rcu_barrier() to work
@@ -1577,25 +1588,6 @@ do { \
 	} \
 } while (0)
 
-#ifdef CONFIG_TREE_PREEMPT_RCU
-
-void __init __rcu_init_preempt(void)
-{
-	int i;			/* All used by RCU_INIT_FLAVOR(). */
-	int j;
-	struct rcu_node *rnp;
-
-	RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
-}
-
-#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-
-void __init __rcu_init_preempt(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
-
 void __init __rcu_init(void)
 {
 	int i;			/* All used by RCU_INIT_FLAVOR(). */
@@ -1612,6 +1604,8 @@ void __init __rcu_init(void)
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 }
 
+#include "rcutree_plugin.h"
+
 module_param(blimit, int, 0);
 module_param(qhimark, int, 0);
 module_param(qlowmark, int, 0);
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 9aa8c8a160d8..a48d11f37b4c 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -79,15 +79,21 @@ struct rcu_dynticks {
  * Definition for node within the RCU grace-period-detection hierarchy.
  */
 struct rcu_node {
-	spinlock_t lock;
+	spinlock_t lock;	/* Root rcu_node's lock protects some */
+				/*  rcu_state fields as well as following. */
 	long	gpnum;		/* Current grace period for this node. */
 				/*  This will either be equal to or one */
 				/*  behind the root rcu_node's gpnum. */
 	unsigned long qsmask;	/* CPUs or groups that need to switch in */
 				/*  order for current grace period to proceed.*/
+				/*  In leaf rcu_node, each bit corresponds to */
+				/*  an rcu_data structure, otherwise, each */
+				/*  bit corresponds to a child rcu_node */
+				/*  structure. */
 	unsigned long qsmaskinit;
 				/* Per-GP initialization for qsmask. */
 	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
+				/*  Only one bit will be set in this mask. */
 	int	grplo;		/* lowest-numbered CPU or group here. */
 	int	grphi;		/* highest-numbered CPU or group here. */
 	u8	grpnum;		/* CPU/group number for next level up. */
@@ -95,6 +101,9 @@ struct rcu_node {
 	struct rcu_node *parent;
 	struct list_head blocked_tasks[2];
 				/* Tasks blocked in RCU read-side critsect. */
+				/*  Grace period number (->gpnum) x blocked */
+				/*  by tasks on the (x & 0x1) element of the */
+				/*  blocked_tasks[] array. */
 } ____cacheline_internodealigned_in_smp;
 
 /* Index values for nxttail array in struct rcu_data. */
@@ -126,19 +135,22 @@ struct rcu_data {
 	 * Any of the partitions might be empty, in which case the
 	 * pointer to that partition will be equal to the pointer for
 	 * the following partition.  When the list is empty, all of
-	 * the nxttail elements point to nxtlist, which is NULL.
+	 * the nxttail elements point to the ->nxtlist pointer itself,
+	 * which in that case is NULL.
 	 *
-	 * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]):
-	 *	Entries that might have arrived after current GP ended
-	 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
-	 *	Entries known to have arrived before current GP ended
-	 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
-	 *	Entries that batch # <= ->completed - 1: waiting for current GP
 	 * [nxtlist, *nxttail[RCU_DONE_TAIL]):
 	 *	Entries that batch # <= ->completed
 	 *	The grace period for these entries has completed, and
 	 *	the other grace-period-completed entries may be moved
 	 *	here temporarily in rcu_process_callbacks().
+	 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
+	 *	Entries that batch # <= ->completed - 1: waiting for current GP
+	 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
+	 *	Entries known to have arrived before current GP ended
+	 * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]):
+	 *	Entries that might have arrived after current GP ended
+	 *	Note that the value of *nxttail[RCU_NEXT_TAIL] will
+	 *	always be NULL, as this is the end of the list.
 	 */
 	struct rcu_head *nxtlist;
 	struct rcu_head **nxttail[RCU_NEXT_SIZE];
@@ -216,6 +228,9 @@ struct rcu_state {
 						/* Force QS state. */
 	long	gpnum;				/* Current gp number. */
 	long	completed;			/* # of last completed gp. */
+
+	/* End  of fields guarded by root rcu_node's lock. */
+
 	spinlock_t onofflock;			/* exclude on/offline and */
 						/*  starting new GP. */
 	spinlock_t fqslock;			/* Only one task forcing */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 8ff1ba7b3c43..65250219ab6d 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -418,6 +418,18 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 	rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
 }
 
+/*
+ * Initialize preemptable RCU's state structures.
+ */
+static void __init __rcu_init_preempt(void)
+{
+	int i;			/* All used by RCU_INIT_FLAVOR(). */
+	int j;
+	struct rcu_node *rnp;
+
+	RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
+}
+
 /*
  * Check for a task exiting while in a preemptable-RCU read-side
  * critical section, clean up if so.  No need to issue warnings,
@@ -518,7 +530,7 @@ static void rcu_preempt_offline_cpu(int cpu)
  * Because preemptable RCU does not exist, it never has any callbacks
  * to check.
  */
-void rcu_preempt_check_callbacks(int cpu)
+static void rcu_preempt_check_callbacks(int cpu)
 {
 }
 
@@ -526,7 +538,7 @@ void rcu_preempt_check_callbacks(int cpu)
  * Because preemptable RCU does not exist, it never has any callbacks
  * to process.
  */
-void rcu_preempt_process_callbacks(void)
+static void rcu_preempt_process_callbacks(void)
 {
 }
 
@@ -563,4 +575,11 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 {
 }
 
+/*
+ * Because preemptable RCU does not exist, it need not be initialized.
+ */
+static void __init __rcu_init_preempt(void)
+{
+}
+
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
-- 
cgit v1.2.3


From 9b2619aff0332e95ea5eb7a0d75b0208818d871c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 23 Sep 2009 09:50:43 -0700
Subject: rcu: Clean up code to address Ingo's checkpatch feedback

Move declarations and update storage classes to make checkpatch happy.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <12537246441701-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutree.h |  6 +++++-
 kernel/rcupdate.c       |  3 ---
 kernel/rcutorture.c     |  4 +---
 kernel/rcutree.c        | 23 -----------------------
 kernel/rcutree.h        | 26 +++++++++++++++++++++++++-
 kernel/rcutree_trace.c  | 12 ++++++------
 6 files changed, 37 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 19a3b06943e0..46e9ab3ee6e1 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,10 +30,14 @@
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 
+struct notifier_block;
+
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
-
+extern int rcu_cpu_notify(struct notifier_block *self,
+			  unsigned long action, void *hcpu);
 extern int rcu_needs_cpu(int cpu);
+extern int rcu_expedited_torture_stats(char *page);
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
 
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 37ac45483082..8e795133b33d 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -259,9 +259,6 @@ static void rcu_migrate_callback(struct rcu_head *notused)
 		wake_up(&rcu_migrate_wq);
 }
 
-extern int rcu_cpu_notify(struct notifier_block *self,
-			  unsigned long action, void *hcpu);
-
 static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
 		unsigned long action, void *hcpu)
 {
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 233768f21f97..697c0a0229d4 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -606,8 +606,6 @@ static struct rcu_torture_ops sched_ops_sync = {
 	.name		= "sched_sync"
 };
 
-extern int rcu_expedited_torture_stats(char *page);
-
 static struct rcu_torture_ops sched_expedited_ops = {
 	.init		= rcu_sync_torture_init,
 	.cleanup	= NULL,
@@ -650,7 +648,7 @@ rcu_torture_writer(void *arg)
 		old_rp = rcu_torture_current;
 		rp->rtort_mbtest = 1;
 		rcu_assign_pointer(rcu_torture_current, rp);
-		smp_wmb();
+		smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */
 		if (old_rp) {
 			i = old_rp->rtort_pipe_count;
 			if (i > RCU_TORTURE_PIPE_LEN)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 53a5ef0ca911..8e52cde7b8f7 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -81,29 +81,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
-/* Forward declarations for rcutree_plugin.h */
-static inline void rcu_bootup_announce(void);
-long rcu_batches_completed(void);
-static void rcu_preempt_note_context_switch(int cpu);
-static int rcu_preempted_readers(struct rcu_node *rnp);
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-static void rcu_print_task_stall(struct rcu_node *rnp);
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
-#ifdef CONFIG_HOTPLUG_CPU
-static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
-				      struct rcu_node *rnp,
-				      struct rcu_data *rdp);
-static void rcu_preempt_offline_cpu(int cpu);
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-static void rcu_preempt_check_callbacks(int cpu);
-static void rcu_preempt_process_callbacks(void);
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
-static int rcu_preempt_pending(int cpu);
-static int rcu_preempt_needs_cpu(int cpu);
-static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
-static void __init __rcu_init_preempt(void);
-
 
 /*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index a48d11f37b4c..e6ab31cc28ba 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -270,5 +270,29 @@ extern struct rcu_state rcu_preempt_state;
 DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 
-#endif /* #ifdef RCU_TREE_NONCORE */
+#else /* #ifdef RCU_TREE_NONCORE */
 
+/* Forward declarations for rcutree_plugin.h */
+static inline void rcu_bootup_announce(void);
+long rcu_batches_completed(void);
+static void rcu_preempt_note_context_switch(int cpu);
+static int rcu_preempted_readers(struct rcu_node *rnp);
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+static void rcu_print_task_stall(struct rcu_node *rnp);
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
+#ifdef CONFIG_HOTPLUG_CPU
+static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
+				      struct rcu_node *rnp,
+				      struct rcu_data *rdp);
+static void rcu_preempt_offline_cpu(int cpu);
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+static void rcu_preempt_check_callbacks(int cpu);
+static void rcu_preempt_process_callbacks(void);
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+static int rcu_preempt_pending(int cpu);
+static int rcu_preempt_needs_cpu(int cpu);
+static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
+static void __init __rcu_init_preempt(void);
+
+#endif /* #else #ifdef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index c89f5e9fd173..f09af28b8262 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -93,7 +93,7 @@ static int rcudata_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcudata, NULL);
 }
 
-static struct file_operations rcudata_fops = {
+static const struct file_operations rcudata_fops = {
 	.owner = THIS_MODULE,
 	.open = rcudata_open,
 	.read = seq_read,
@@ -145,7 +145,7 @@ static int rcudata_csv_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcudata_csv, NULL);
 }
 
-static struct file_operations rcudata_csv_fops = {
+static const struct file_operations rcudata_csv_fops = {
 	.owner = THIS_MODULE,
 	.open = rcudata_csv_open,
 	.read = seq_read,
@@ -159,7 +159,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 	struct rcu_node *rnp;
 
 	seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x "
-	              "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
+		      "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
 		   rsp->completed, rsp->gpnum, rsp->signaled,
 		   (long)(rsp->jiffies_force_qs - jiffies),
 		   (int)(jiffies & 0xffff),
@@ -196,7 +196,7 @@ static int rcuhier_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcuhier, NULL);
 }
 
-static struct file_operations rcuhier_fops = {
+static const struct file_operations rcuhier_fops = {
 	.owner = THIS_MODULE,
 	.open = rcuhier_open,
 	.read = seq_read,
@@ -222,7 +222,7 @@ static int rcugp_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcugp, NULL);
 }
 
-static struct file_operations rcugp_fops = {
+static const struct file_operations rcugp_fops = {
 	.owner = THIS_MODULE,
 	.open = rcugp_open,
 	.read = seq_read,
@@ -276,7 +276,7 @@ static int rcu_pending_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcu_pending, NULL);
 }
 
-static struct file_operations rcu_pending_fops = {
+static const struct file_operations rcu_pending_fops = {
 	.owner = THIS_MODULE,
 	.open = rcu_pending_open,
 	.read = seq_read,
-- 
cgit v1.2.3


From 74908a0009eb36054190ab80deb9671014efed96 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 17 Sep 2009 17:47:12 -0700
Subject: include/linux/cred.h: fix build

mips allmodconfig:

include/linux/cred.h: In function `creds_are_invalid':
include/linux/cred.h:187: error: `PAGE_SIZE' undeclared (first use in this function)
include/linux/cred.h:187: error: (Each undeclared identifier is reported only once
include/linux/cred.h:187: error: for each function it appears in.)

Fixes

commit b6dff3ec5e116e3af6f537d4caedcad6b9e5082a
Author:     David Howells <dhowells@redhat.com>
AuthorDate: Fri Nov 14 10:39:16 2008 +1100
Commit:     James Morris <jmorris@namei.org>
CommitDate: Fri Nov 14 10:39:16 2008 +1100

    CRED: Separate task security context from task_struct

I think.

It's way too large to be inlined anyway.

Dunno if this needs an EXPORT_SYMBOL() yet.

Cc: David Howells <dhowells@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/cred.h | 18 +-----------------
 kernel/cred.c        | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index fb371601a3b4..4e3387a89cb9 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -176,23 +176,7 @@ extern void __invalid_creds(const struct cred *, const char *, unsigned);
 extern void __validate_process_creds(struct task_struct *,
 				     const char *, unsigned);
 
-static inline bool creds_are_invalid(const struct cred *cred)
-{
-	if (cred->magic != CRED_MAGIC)
-		return true;
-	if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
-		return true;
-#ifdef CONFIG_SECURITY_SELINUX
-	if (selinux_is_enabled()) {
-		if ((unsigned long) cred->security < PAGE_SIZE)
-			return true;
-		if ((*(u32 *)cred->security & 0xffffff00) ==
-		    (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
-			return true;
-	}
-#endif
-	return false;
-}
+extern bool creds_are_invalid(const struct cred *cred);
 
 static inline void __validate_creds(const struct cred *cred,
 				    const char *file, unsigned line)
diff --git a/kernel/cred.c b/kernel/cred.c
index d7f7a01082eb..70bda79fae24 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -782,6 +782,24 @@ EXPORT_SYMBOL(set_create_files_as);
 
 #ifdef CONFIG_DEBUG_CREDENTIALS
 
+bool creds_are_invalid(const struct cred *cred)
+{
+	if (cred->magic != CRED_MAGIC)
+		return true;
+	if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
+		return true;
+#ifdef CONFIG_SECURITY_SELINUX
+	if (selinux_is_enabled()) {
+		if ((unsigned long) cred->security < PAGE_SIZE)
+			return true;
+		if ((*(u32 *)cred->security & 0xffffff00) ==
+		    (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
+			return true;
+	}
+#endif
+	return false;
+}
+
 /*
  * dump invalid credentials
  */
-- 
cgit v1.2.3


From 60e78d2c993e58d890596d951fff77d5965adcd6 Mon Sep 17 00:00:00 2001
From: Abhishek Kulkarni <adkulkar@umail.iu.edu>
Date: Wed, 23 Sep 2009 13:00:27 -0500
Subject: 9p: Add fscache support to 9p

This patch adds a persistent, read-only caching facility for
9p clients using the FS-Cache caching backend.

When the fscache facility is enabled, each inode is associated
with a corresponding vcookie which is an index into the FS-Cache
indexing tree. The FS-Cache indexing tree is indexed at 3 levels:
- session object associated with each mount.
- inode/vcookie
- actual data (pages)

A cache tag is chosen randomly for each session. These tags can
be read off /sys/fs/9p/caches and can be passed as a mount-time
parameter to re-attach to the specified caching session.

Signed-off-by: Abhishek Kulkarni <adkulkar@umail.iu.edu>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/Kconfig       |   9 +
 fs/9p/Makefile      |   3 +-
 fs/9p/cache.c       | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/9p/cache.h       | 176 +++++++++++++++++++
 fs/9p/v9fs.c        | 196 +++++++++++++++++++---
 fs/9p/v9fs.h        |  13 +-
 fs/9p/v9fs_vfs.h    |   6 +
 fs/9p/vfs_addr.c    |  88 +++++++++-
 fs/9p/vfs_file.c    |  10 +-
 fs/9p/vfs_inode.c   |  57 ++++++-
 fs/9p/vfs_super.c   |  16 +-
 include/net/9p/9p.h |   3 +
 12 files changed, 1009 insertions(+), 42 deletions(-)
 create mode 100644 fs/9p/cache.c
 create mode 100644 fs/9p/cache.h

(limited to 'include')

diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 74e0723e90bc..795233702a4e 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -8,3 +8,12 @@ config 9P_FS
 	  See <http://v9fs.sf.net> for more information.
 
 	  If unsure, say N.
+
+config 9P_FSCACHE
+	bool "Enable 9P client caching support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y
+	help
+	  Choose Y here to enable persistent, read-only local
+	  caching support for 9p clients using FS-Cache
+
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index bc7f0d1551e6..1a940ec7af61 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -8,5 +8,6 @@ obj-$(CONFIG_9P_FS) := 9p.o
 	vfs_dir.o \
 	vfs_dentry.o \
 	v9fs.o \
-	fid.o \
+	fid.o
 
+9p-$(CONFIG_9P_FSCACHE) += cache.o
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
new file mode 100644
index 000000000000..51c94e26a346
--- /dev/null
+++ b/fs/9p/cache.c
@@ -0,0 +1,474 @@
+/*
+ * V9FS cache definitions.
+ *
+ *  Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/jiffies.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <net/9p/9p.h>
+
+#include "v9fs.h"
+#include "cache.h"
+
+#define CACHETAG_LEN  11
+
+struct kmem_cache *vcookie_cache;
+
+struct fscache_netfs v9fs_cache_netfs = {
+	.name 		= "9p",
+	.version 	= 0,
+};
+
+static void init_once(void *foo)
+{
+	struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo;
+	vcookie->fscache = NULL;
+	vcookie->qid = NULL;
+	inode_init_once(&vcookie->inode);
+}
+
+/**
+ * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain
+ *			    vcookie to inode mapping
+ *
+ * Returns 0 on success.
+ */
+
+static int v9fs_init_vcookiecache(void)
+{
+	vcookie_cache = kmem_cache_create("vcookie_cache",
+					  sizeof(struct v9fs_cookie),
+					  0, (SLAB_RECLAIM_ACCOUNT|
+					      SLAB_MEM_SPREAD),
+					  init_once);
+	if (!vcookie_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * v9fs_destroy_vcookiecache - destroy the cache of vcookies
+ *
+ */
+
+static void v9fs_destroy_vcookiecache(void)
+{
+	kmem_cache_destroy(vcookie_cache);
+}
+
+int __v9fs_cache_register(void)
+{
+	int ret;
+	ret = v9fs_init_vcookiecache();
+	if (ret < 0)
+		return ret;
+
+	return fscache_register_netfs(&v9fs_cache_netfs);
+}
+
+void __v9fs_cache_unregister(void)
+{
+	v9fs_destroy_vcookiecache();
+	fscache_unregister_netfs(&v9fs_cache_netfs);
+}
+
+/**
+ * v9fs_random_cachetag - Generate a random tag to be associated
+ *			  with a new cache session.
+ *
+ * The value of jiffies is used for a fairly randomly cache tag.
+ */
+
+static
+int v9fs_random_cachetag(struct v9fs_session_info *v9ses)
+{
+	v9ses->cachetag = kmalloc(CACHETAG_LEN, GFP_KERNEL);
+	if (!v9ses->cachetag)
+		return -ENOMEM;
+
+	return scnprintf(v9ses->cachetag, CACHETAG_LEN, "%lu", jiffies);
+}
+
+static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
+					   void *buffer, uint16_t bufmax)
+{
+	struct v9fs_session_info *v9ses;
+	uint16_t klen = 0;
+
+	v9ses = (struct v9fs_session_info *)cookie_netfs_data;
+	P9_DPRINTK(P9_DEBUG_FSC, "session %p buf %p size %u", v9ses,
+		   buffer, bufmax);
+
+	if (v9ses->cachetag)
+		klen = strlen(v9ses->cachetag);
+
+	if (klen > bufmax)
+		return 0;
+
+	memcpy(buffer, v9ses->cachetag, klen);
+	P9_DPRINTK(P9_DEBUG_FSC, "cache session tag %s", v9ses->cachetag);
+	return klen;
+}
+
+const struct fscache_cookie_def v9fs_cache_session_index_def = {
+	.name 		= "9P.session",
+	.type 		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key 	= v9fs_cache_session_get_key,
+};
+
+void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
+{
+	/* If no cache session tag was specified, we generate a random one. */
+	if (!v9ses->cachetag)
+		v9fs_random_cachetag(v9ses);
+
+	v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index,
+						&v9fs_cache_session_index_def,
+						v9ses);
+	P9_DPRINTK(P9_DEBUG_FSC, "session %p get cookie %p", v9ses,
+		   v9ses->fscache);
+}
+
+void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
+{
+	P9_DPRINTK(P9_DEBUG_FSC, "session %p put cookie %p", v9ses,
+		   v9ses->fscache);
+	fscache_relinquish_cookie(v9ses->fscache, 0);
+	v9ses->fscache = NULL;
+}
+
+
+static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
+					 void *buffer, uint16_t bufmax)
+{
+	const struct v9fs_cookie *vcookie = cookie_netfs_data;
+	memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path));
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode,
+		   vcookie->qid->path);
+	return sizeof(vcookie->qid->path);
+}
+
+static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
+				      uint64_t *size)
+{
+	const struct v9fs_cookie *vcookie = cookie_netfs_data;
+	*size = i_size_read(&vcookie->inode);
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode,
+		   *size);
+}
+
+static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
+					 void *buffer, uint16_t buflen)
+{
+	const struct v9fs_cookie *vcookie = cookie_netfs_data;
+	memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version));
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode,
+		   vcookie->qid->version);
+	return sizeof(vcookie->qid->version);
+}
+
+static enum
+fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
+					    const void *buffer,
+					    uint16_t buflen)
+{
+	const struct v9fs_cookie *vcookie = cookie_netfs_data;
+
+	if (buflen != sizeof(vcookie->qid->version))
+		return FSCACHE_CHECKAUX_OBSOLETE;
+
+	if (memcmp(buffer, &vcookie->qid->version,
+		   sizeof(vcookie->qid->version)))
+		return FSCACHE_CHECKAUX_OBSOLETE;
+
+	return FSCACHE_CHECKAUX_OKAY;
+}
+
+static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
+{
+	struct v9fs_cookie *vcookie = cookie_netfs_data;
+	struct pagevec pvec;
+	pgoff_t first;
+	int loop, nr_pages;
+
+	pagevec_init(&pvec, 0);
+	first = 0;
+
+	for (;;) {
+		nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping,
+					  first,
+					  PAGEVEC_SIZE - pagevec_count(&pvec));
+		if (!nr_pages)
+			break;
+
+		for (loop = 0; loop < nr_pages; loop++)
+			ClearPageFsCache(pvec.pages[loop]);
+
+		first = pvec.pages[nr_pages - 1]->index + 1;
+
+		pvec.nr = nr_pages;
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+}
+
+const struct fscache_cookie_def v9fs_cache_inode_index_def = {
+	.name		= "9p.inode",
+	.type		= FSCACHE_COOKIE_TYPE_DATAFILE,
+	.get_key	= v9fs_cache_inode_get_key,
+	.get_attr	= v9fs_cache_inode_get_attr,
+	.get_aux	= v9fs_cache_inode_get_aux,
+	.check_aux	= v9fs_cache_inode_check_aux,
+	.now_uncached	= v9fs_cache_inode_now_uncached,
+};
+
+void v9fs_cache_inode_get_cookie(struct inode *inode)
+{
+	struct v9fs_cookie *vcookie;
+	struct v9fs_session_info *v9ses;
+
+	if (!S_ISREG(inode->i_mode))
+		return;
+
+	vcookie = v9fs_inode2cookie(inode);
+	if (vcookie->fscache)
+		return;
+
+	v9ses = v9fs_inode2v9ses(inode);
+	vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+						  &v9fs_cache_inode_index_def,
+						  vcookie);
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode,
+		   vcookie->fscache);
+}
+
+void v9fs_cache_inode_put_cookie(struct inode *inode)
+{
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	if (!vcookie->fscache)
+		return;
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode,
+		   vcookie->fscache);
+
+	fscache_relinquish_cookie(vcookie->fscache, 0);
+	vcookie->fscache = NULL;
+}
+
+void v9fs_cache_inode_flush_cookie(struct inode *inode)
+{
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	if (!vcookie->fscache)
+		return;
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode,
+		   vcookie->fscache);
+
+	fscache_relinquish_cookie(vcookie->fscache, 1);
+	vcookie->fscache = NULL;
+}
+
+void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
+{
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+	struct p9_fid *fid;
+
+	if (!vcookie->fscache)
+		return;
+
+	spin_lock(&vcookie->lock);
+	fid = filp->private_data;
+	if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
+		v9fs_cache_inode_flush_cookie(inode);
+	else
+		v9fs_cache_inode_get_cookie(inode);
+
+	spin_unlock(&vcookie->lock);
+}
+
+void v9fs_cache_inode_reset_cookie(struct inode *inode)
+{
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+	struct v9fs_session_info *v9ses;
+	struct fscache_cookie *old;
+
+	if (!vcookie->fscache)
+		return;
+
+	old = vcookie->fscache;
+
+	spin_lock(&vcookie->lock);
+	fscache_relinquish_cookie(vcookie->fscache, 1);
+
+	v9ses = v9fs_inode2v9ses(inode);
+	vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+						  &v9fs_cache_inode_index_def,
+						  vcookie);
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p",
+		   inode, old, vcookie->fscache);
+
+	spin_unlock(&vcookie->lock);
+}
+
+int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+	struct inode *inode = page->mapping->host;
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	BUG_ON(!vcookie->fscache);
+
+	if (PageFsCache(page)) {
+		if (fscache_check_page_write(vcookie->fscache, page)) {
+			if (!(gfp & __GFP_WAIT))
+				return 0;
+			fscache_wait_on_page_write(vcookie->fscache, page);
+		}
+
+		fscache_uncache_page(vcookie->fscache, page);
+		ClearPageFsCache(page);
+	}
+
+	return 1;
+}
+
+void __v9fs_fscache_invalidate_page(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	BUG_ON(!vcookie->fscache);
+
+	if (PageFsCache(page)) {
+		fscache_wait_on_page_write(vcookie->fscache, page);
+		BUG_ON(!PageLocked(page));
+		fscache_uncache_page(vcookie->fscache, page);
+		ClearPageFsCache(page);
+	}
+}
+
+static void v9fs_vfs_readpage_complete(struct page *page, void *data,
+				       int error)
+{
+	if (!error)
+		SetPageUptodate(page);
+
+	unlock_page(page);
+}
+
+/**
+ * __v9fs_readpage_from_fscache - read a page from cache
+ *
+ * Returns 0 if the pages are in cache and a BIO is submitted,
+ * 1 if the pages are not in cache and -error otherwise.
+ */
+
+int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+	int ret;
+	const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+	if (!vcookie->fscache)
+		return -ENOBUFS;
+
+	ret = fscache_read_or_alloc_page(vcookie->fscache,
+					 page,
+					 v9fs_vfs_readpage_complete,
+					 NULL,
+					 GFP_KERNEL);
+	switch (ret) {
+	case -ENOBUFS:
+	case -ENODATA:
+		P9_DPRINTK(P9_DEBUG_FSC, "page/inode not in cache %d", ret);
+		return 1;
+	case 0:
+		P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted");
+		return ret;
+	default:
+		P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret);
+		return ret;
+	}
+}
+
+/**
+ * __v9fs_readpages_from_fscache - read multiple pages from cache
+ *
+ * Returns 0 if the pages are in cache and a BIO is submitted,
+ * 1 if the pages are not in cache and -error otherwise.
+ */
+
+int __v9fs_readpages_from_fscache(struct inode *inode,
+				  struct address_space *mapping,
+				  struct list_head *pages,
+				  unsigned *nr_pages)
+{
+	int ret;
+	const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages);
+	if (!vcookie->fscache)
+		return -ENOBUFS;
+
+	ret = fscache_read_or_alloc_pages(vcookie->fscache,
+					  mapping, pages, nr_pages,
+					  v9fs_vfs_readpage_complete,
+					  NULL,
+					  mapping_gfp_mask(mapping));
+	switch (ret) {
+	case -ENOBUFS:
+	case -ENODATA:
+		P9_DPRINTK(P9_DEBUG_FSC, "pages/inodes not in cache %d", ret);
+		return 1;
+	case 0:
+		BUG_ON(!list_empty(pages));
+		BUG_ON(*nr_pages != 0);
+		P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted");
+		return ret;
+	default:
+		P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret);
+		return ret;
+	}
+}
+
+/**
+ * __v9fs_readpage_to_fscache - write a page to the cache
+ *
+ */
+
+void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
+{
+	int ret;
+	const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+	ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL);
+	P9_DPRINTK(P9_DEBUG_FSC, "ret =  %d", ret);
+	if (ret != 0)
+		v9fs_uncache_page(inode, page);
+}
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
new file mode 100644
index 000000000000..a94192bfaee8
--- /dev/null
+++ b/fs/9p/cache.h
@@ -0,0 +1,176 @@
+/*
+ * V9FS cache definitions.
+ *
+ *  Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#ifndef _9P_CACHE_H
+#ifdef CONFIG_9P_FSCACHE
+#include <linux/fscache.h>
+#include <linux/spinlock.h>
+
+extern struct kmem_cache *vcookie_cache;
+
+struct v9fs_cookie {
+	spinlock_t lock;
+	struct inode inode;
+	struct fscache_cookie *fscache;
+	struct p9_qid *qid;
+};
+
+static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode)
+{
+	return container_of(inode, struct v9fs_cookie, inode);
+}
+
+extern struct fscache_netfs v9fs_cache_netfs;
+extern const struct fscache_cookie_def v9fs_cache_session_index_def;
+extern const struct fscache_cookie_def v9fs_cache_inode_index_def;
+
+extern void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses);
+extern void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses);
+
+extern void v9fs_cache_inode_get_cookie(struct inode *inode);
+extern void v9fs_cache_inode_put_cookie(struct inode *inode);
+extern void v9fs_cache_inode_flush_cookie(struct inode *inode);
+extern void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp);
+extern void v9fs_cache_inode_reset_cookie(struct inode *inode);
+
+extern int __v9fs_cache_register(void);
+extern void __v9fs_cache_unregister(void);
+
+extern int __v9fs_fscache_release_page(struct page *page, gfp_t gfp);
+extern void __v9fs_fscache_invalidate_page(struct page *page);
+extern int __v9fs_readpage_from_fscache(struct inode *inode,
+					struct page *page);
+extern int __v9fs_readpages_from_fscache(struct inode *inode,
+					 struct address_space *mapping,
+					 struct list_head *pages,
+					 unsigned *nr_pages);
+extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page);
+
+
+/**
+ * v9fs_cache_register - Register v9fs file system with the cache
+ */
+static inline int v9fs_cache_register(void)
+{
+	return __v9fs_cache_register();
+}
+
+/**
+ * v9fs_cache_unregister - Unregister v9fs from the cache
+ */
+static inline void v9fs_cache_unregister(void)
+{
+	__v9fs_cache_unregister();
+}
+
+static inline int v9fs_fscache_release_page(struct page *page,
+					    gfp_t gfp)
+{
+	return __v9fs_fscache_release_page(page, gfp);
+}
+
+static inline void v9fs_fscache_invalidate_page(struct page *page)
+{
+	__v9fs_fscache_invalidate_page(page);
+}
+
+static inline int v9fs_readpage_from_fscache(struct inode *inode,
+					     struct page *page)
+{
+	return __v9fs_readpage_from_fscache(inode, page);
+}
+
+static inline int v9fs_readpages_from_fscache(struct inode *inode,
+					      struct address_space *mapping,
+					      struct list_head *pages,
+					      unsigned *nr_pages)
+{
+	return __v9fs_readpages_from_fscache(inode, mapping, pages,
+					     nr_pages);
+}
+
+static inline void v9fs_readpage_to_fscache(struct inode *inode,
+					    struct page *page)
+{
+	if (PageFsCache(page))
+		__v9fs_readpage_to_fscache(inode, page);
+}
+
+static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
+{
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+	fscache_uncache_page(vcookie->fscache, page);
+	BUG_ON(PageFsCache(page));
+}
+
+static inline void v9fs_vcookie_set_qid(struct inode *inode,
+					struct p9_qid *qid)
+{
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+	spin_lock(&vcookie->lock);
+	vcookie->qid = qid;
+	spin_unlock(&vcookie->lock);
+}
+
+#else /* CONFIG_9P_FSCACHE */
+
+static inline int v9fs_cache_register(void)
+{
+	return 1;
+}
+
+static inline void v9fs_cache_unregister(void) {}
+
+static inline int v9fs_fscache_release_page(struct page *page,
+					    gfp_t gfp) {
+	return 1;
+}
+
+static inline void v9fs_fscache_invalidate_page(struct page *page) {}
+
+static inline int v9fs_readpage_from_fscache(struct inode *inode,
+					     struct page *page)
+{
+	return -ENOBUFS;
+}
+
+static inline int v9fs_readpages_from_fscache(struct inode *inode,
+					      struct address_space *mapping,
+					      struct list_head *pages,
+					      unsigned *nr_pages)
+{
+	return -ENOBUFS;
+}
+
+static inline void v9fs_readpage_to_fscache(struct inode *inode,
+					    struct page *page)
+{}
+
+static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
+{}
+
+static inline void v9fs_vcookie_set_qid(struct inode *inode,
+					struct p9_qid *qid)
+{}
+
+#endif /* CONFIG_9P_FSCACHE */
+#endif /* _9P_CACHE_H */
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index f7003cfac63d..cf62b05e296a 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -34,21 +34,25 @@
 #include <net/9p/transport.h>
 #include "v9fs.h"
 #include "v9fs_vfs.h"
+#include "cache.h"
+
+static DEFINE_SPINLOCK(v9fs_sessionlist_lock);
+static LIST_HEAD(v9fs_sessionlist);
 
 /*
-  * Option Parsing (code inspired by NFS code)
-  *  NOTE: each transport will parse its own options
-  */
+ * Option Parsing (code inspired by NFS code)
+ *  NOTE: each transport will parse its own options
+ */
 
 enum {
 	/* Options that take integer arguments */
 	Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid,
 	/* String options */
-	Opt_uname, Opt_remotename, Opt_trans,
+	Opt_uname, Opt_remotename, Opt_trans, Opt_cache, Opt_cachetag,
 	/* Options that take no arguments */
 	Opt_nodevmap,
 	/* Cache options */
-	Opt_cache_loose,
+	Opt_cache_loose, Opt_fscache,
 	/* Access options */
 	Opt_access,
 	/* Error token */
@@ -63,8 +67,10 @@ static const match_table_t tokens = {
 	{Opt_uname, "uname=%s"},
 	{Opt_remotename, "aname=%s"},
 	{Opt_nodevmap, "nodevmap"},
-	{Opt_cache_loose, "cache=loose"},
+	{Opt_cache, "cache=%s"},
 	{Opt_cache_loose, "loose"},
+	{Opt_fscache, "fscache"},
+	{Opt_cachetag, "cachetag=%s"},
 	{Opt_access, "access=%s"},
 	{Opt_err, NULL}
 };
@@ -89,16 +95,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 	v9ses->afid = ~0;
 	v9ses->debug = 0;
 	v9ses->cache = 0;
+#ifdef CONFIG_9P_FSCACHE
+	v9ses->cachetag = NULL;
+#endif
 
 	if (!opts)
 		return 0;
 
 	options = kstrdup(opts, GFP_KERNEL);
-	if (!options) {
-		P9_DPRINTK(P9_DEBUG_ERROR,
-			   "failed to allocate copy of option string\n");
-		return -ENOMEM;
-	}
+	if (!options)
+		goto fail_option_alloc;
 
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
@@ -143,16 +149,33 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 		case Opt_cache_loose:
 			v9ses->cache = CACHE_LOOSE;
 			break;
+		case Opt_fscache:
+			v9ses->cache = CACHE_FSCACHE;
+			break;
+		case Opt_cachetag:
+#ifdef CONFIG_9P_FSCACHE
+			v9ses->cachetag = match_strdup(&args[0]);
+#endif
+			break;
+		case Opt_cache:
+			s = match_strdup(&args[0]);
+			if (!s)
+				goto fail_option_alloc;
+
+			if (strcmp(s, "loose") == 0)
+				v9ses->cache = CACHE_LOOSE;
+			else if (strcmp(s, "fscache") == 0)
+				v9ses->cache = CACHE_FSCACHE;
+			else
+				v9ses->cache = CACHE_NONE;
+			kfree(s);
+			break;
 
 		case Opt_access:
 			s = match_strdup(&args[0]);
-			if (!s) {
-				P9_DPRINTK(P9_DEBUG_ERROR,
-					   "failed to allocate copy"
-					   " of option argument\n");
-				ret = -ENOMEM;
-				break;
-			}
+			if (!s)
+				goto fail_option_alloc;
+
 			v9ses->flags &= ~V9FS_ACCESS_MASK;
 			if (strcmp(s, "user") == 0)
 				v9ses->flags |= V9FS_ACCESS_USER;
@@ -173,6 +196,11 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 	}
 	kfree(options);
 	return ret;
+
+fail_option_alloc:
+	P9_DPRINTK(P9_DEBUG_ERROR,
+		   "failed to allocate copy of option argument\n");
+	return -ENOMEM;
 }
 
 /**
@@ -200,6 +228,10 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	spin_lock(&v9fs_sessionlist_lock);
+	list_add(&v9ses->slist, &v9fs_sessionlist);
+	spin_unlock(&v9fs_sessionlist_lock);
+
 	v9ses->flags = V9FS_EXTENDED | V9FS_ACCESS_USER;
 	strcpy(v9ses->uname, V9FS_DEFUSER);
 	strcpy(v9ses->aname, V9FS_DEFANAME);
@@ -249,6 +281,11 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 	else
 		fid->uid = ~0;
 
+#ifdef CONFIG_9P_FSCACHE
+	/* register the session for caching */
+	v9fs_cache_session_get_cookie(v9ses);
+#endif
+
 	return fid;
 
 error:
@@ -268,8 +305,18 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
 		v9ses->clnt = NULL;
 	}
 
+#ifdef CONFIG_9P_FSCACHE
+	if (v9ses->fscache) {
+		v9fs_cache_session_put_cookie(v9ses);
+		kfree(v9ses->cachetag);
+	}
+#endif
 	__putname(v9ses->uname);
 	__putname(v9ses->aname);
+
+	spin_lock(&v9fs_sessionlist_lock);
+	list_del(&v9ses->slist);
+	spin_unlock(&v9fs_sessionlist_lock);
 }
 
 /**
@@ -286,25 +333,132 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
 
 extern int v9fs_error_init(void);
 
+static struct kobject *v9fs_kobj;
+
+#ifdef CONFIG_9P_FSCACHE
 /**
- * v9fs_init - Initialize module
+ * caches_show - list caches associated with a session
+ *
+ * Returns the size of buffer written.
+ */
+
+static ssize_t caches_show(struct kobject *kobj,
+			   struct kobj_attribute *attr,
+			   char *buf)
+{
+	ssize_t n = 0, count = 0, limit = PAGE_SIZE;
+	struct v9fs_session_info *v9ses;
+
+	spin_lock(&v9fs_sessionlist_lock);
+	list_for_each_entry(v9ses, &v9fs_sessionlist, slist) {
+		if (v9ses->cachetag) {
+			n = snprintf(buf, limit, "%s\n", v9ses->cachetag);
+			if (n < 0) {
+				count = n;
+				break;
+			}
+
+			count += n;
+			limit -= n;
+		}
+	}
+
+	spin_unlock(&v9fs_sessionlist_lock);
+	return count;
+}
+
+static struct kobj_attribute v9fs_attr_cache = __ATTR_RO(caches);
+#endif /* CONFIG_9P_FSCACHE */
+
+static struct attribute *v9fs_attrs[] = {
+#ifdef CONFIG_9P_FSCACHE
+	&v9fs_attr_cache.attr,
+#endif
+	NULL,
+};
+
+static struct attribute_group v9fs_attr_group = {
+	.attrs = v9fs_attrs,
+};
+
+/**
+ * v9fs_sysfs_init - Initialize the v9fs sysfs interface
+ *
+ */
+
+static int v9fs_sysfs_init(void)
+{
+	v9fs_kobj = kobject_create_and_add("9p", fs_kobj);
+	if (!v9fs_kobj)
+		return -ENOMEM;
+
+	if (sysfs_create_group(v9fs_kobj, &v9fs_attr_group)) {
+		kobject_put(v9fs_kobj);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * v9fs_sysfs_cleanup - Unregister the v9fs sysfs interface
+ *
+ */
+
+static void v9fs_sysfs_cleanup(void)
+{
+	sysfs_remove_group(v9fs_kobj, &v9fs_attr_group);
+	kobject_put(v9fs_kobj);
+}
+
+/**
+ * init_v9fs - Initialize module
  *
  */
 
 static int __init init_v9fs(void)
 {
+	int err;
 	printk(KERN_INFO "Installing v9fs 9p2000 file system support\n");
 	/* TODO: Setup list of registered trasnport modules */
-	return register_filesystem(&v9fs_fs_type);
+	err = register_filesystem(&v9fs_fs_type);
+	if (err < 0) {
+		printk(KERN_ERR "Failed to register filesystem\n");
+		return err;
+	}
+
+	err = v9fs_cache_register();
+	if (err < 0) {
+		printk(KERN_ERR "Failed to register v9fs for caching\n");
+		goto out_fs_unreg;
+	}
+
+	err = v9fs_sysfs_init();
+	if (err < 0) {
+		printk(KERN_ERR "Failed to register with sysfs\n");
+		goto out_sysfs_cleanup;
+	}
+
+	return 0;
+
+out_sysfs_cleanup:
+	v9fs_sysfs_cleanup();
+
+out_fs_unreg:
+	unregister_filesystem(&v9fs_fs_type);
+
+	return err;
 }
 
 /**
- * v9fs_init - shutdown module
+ * exit_v9fs - shutdown module
  *
  */
 
 static void __exit exit_v9fs(void)
 {
+	v9fs_sysfs_cleanup();
+	v9fs_cache_unregister();
 	unregister_filesystem(&v9fs_fs_type);
 }
 
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 38762bf102a9..019f4ccb70c1 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -51,6 +51,7 @@ enum p9_session_flags {
 enum p9_cache_modes {
 	CACHE_NONE,
 	CACHE_LOOSE,
+	CACHE_FSCACHE,
 };
 
 /**
@@ -60,6 +61,8 @@ enum p9_cache_modes {
  * @debug: debug level
  * @afid: authentication handle
  * @cache: cache mode of type &p9_cache_modes
+ * @cachetag: the tag of the cache associated with this session
+ * @fscache: session cookie associated with FS-Cache
  * @options: copy of options string given by user
  * @uname: string user name to mount hierarchy as
  * @aname: mount specifier for remote hierarchy
@@ -68,7 +71,7 @@ enum p9_cache_modes {
  * @dfltgid: default numeric groupid to mount hierarchy as
  * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy
  * @clnt: reference to 9P network client instantiated for this session
- * @debugfs_dir: reference to debugfs_dir which can be used for add'l debug
+ * @slist: reference to list of registered 9p sessions
  *
  * This structure holds state for each session instance established during
  * a sys_mount() .
@@ -84,6 +87,10 @@ struct v9fs_session_info {
 	unsigned short debug;
 	unsigned int afid;
 	unsigned int cache;
+#ifdef CONFIG_9P_FSCACHE
+	char *cachetag;
+	struct fscache_cookie *fscache;
+#endif
 
 	char *uname;		/* user name to mount as */
 	char *aname;		/* name of remote hierarchy being mounted */
@@ -92,11 +99,9 @@ struct v9fs_session_info {
 	unsigned int dfltgid;	/* default gid for legacy support */
 	u32 uid;		/* if ACCESS_SINGLE, the uid that has access */
 	struct p9_client *clnt;	/* 9p client */
-	struct dentry *debugfs_dir;
+	struct list_head slist; /* list of sessions registered with v9fs */
 };
 
-extern struct dentry *v9fs_debugfs_root;
-
 struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
 									char *);
 void v9fs_session_close(struct v9fs_session_info *v9ses);
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index f0c7de78e205..3a7560e35865 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -44,7 +44,13 @@ extern const struct file_operations v9fs_dir_operations;
 extern const struct dentry_operations v9fs_dentry_operations;
 extern const struct dentry_operations v9fs_cached_dentry_operations;
 
+#ifdef CONFIG_9P_FSCACHE
+struct inode *v9fs_alloc_inode(struct super_block *sb);
+void v9fs_destroy_inode(struct inode *inode);
+#endif
+
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+void v9fs_clear_inode(struct inode *inode);
 ino_t v9fs_qid2ino(struct p9_qid *qid);
 void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
 int v9fs_dir_release(struct inode *inode, struct file *filp);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 92828281a30b..90e38449f4b3 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -38,6 +38,7 @@
 
 #include "v9fs.h"
 #include "v9fs_vfs.h"
+#include "cache.h"
 
 /**
  * v9fs_vfs_readpage - read an entire page in from 9P
@@ -52,18 +53,31 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
 	int retval;
 	loff_t offset;
 	char *buffer;
+	struct inode *inode;
 
+	inode = page->mapping->host;
 	P9_DPRINTK(P9_DEBUG_VFS, "\n");
+
+	BUG_ON(!PageLocked(page));
+
+	retval = v9fs_readpage_from_fscache(inode, page);
+	if (retval == 0)
+		return retval;
+
 	buffer = kmap(page);
 	offset = page_offset(page);
 
 	retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset);
-	if (retval < 0)
+	if (retval < 0) {
+		v9fs_uncache_page(inode, page);
 		goto done;
+	}
 
 	memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval);
 	flush_dcache_page(page);
 	SetPageUptodate(page);
+
+	v9fs_readpage_to_fscache(inode, page);
 	retval = 0;
 
 done:
@@ -72,6 +86,78 @@ done:
 	return retval;
 }
 
+/**
+ * v9fs_vfs_readpages - read a set of pages from 9P
+ *
+ * @filp: file being read
+ * @mapping: the address space
+ * @pages: list of pages to read
+ * @nr_pages: count of pages to read
+ *
+ */
+
+static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping,
+			     struct list_head *pages, unsigned nr_pages)
+{
+	int ret = 0;
+	struct inode *inode;
+
+	inode = mapping->host;
+	P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp);
+
+	ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages);
+	if (ret == 0)
+		return ret;
+
+	ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp);
+	P9_DPRINTK(P9_DEBUG_VFS, "  = %d\n", ret);
+	return ret;
+}
+
+/**
+ * v9fs_release_page - release the private state associated with a page
+ *
+ * Returns 1 if the page can be released, false otherwise.
+ */
+
+static int v9fs_release_page(struct page *page, gfp_t gfp)
+{
+	if (PagePrivate(page))
+		return 0;
+
+	return v9fs_fscache_release_page(page, gfp);
+}
+
+/**
+ * v9fs_invalidate_page - Invalidate a page completely or partially
+ *
+ * @page: structure to page
+ * @offset: offset in the page
+ */
+
+static void v9fs_invalidate_page(struct page *page, unsigned long offset)
+{
+	if (offset == 0)
+		v9fs_fscache_invalidate_page(page);
+}
+
+/**
+ * v9fs_launder_page - Writeback a dirty page
+ * Since the writes go directly to the server, we simply return a 0
+ * here to indicate success.
+ *
+ * Returns 0 on success.
+ */
+
+static int v9fs_launder_page(struct page *page)
+{
+	return 0;
+}
+
 const struct address_space_operations v9fs_addr_operations = {
       .readpage = v9fs_vfs_readpage,
+      .readpages = v9fs_vfs_readpages,
+      .releasepage = v9fs_release_page,
+      .invalidatepage = v9fs_invalidate_page,
+      .launder_page = v9fs_launder_page,
 };
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index cafaa46434ba..3902bf43a088 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -41,6 +41,7 @@
 #include "v9fs.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
+#include "cache.h"
 
 static const struct file_operations v9fs_cached_file_operations;
 
@@ -86,6 +87,10 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 		/* enable cached file options */
 		if(file->f_op == &v9fs_file_operations)
 			file->f_op = &v9fs_cached_file_operations;
+
+#ifdef CONFIG_9P_FSCACHE
+		v9fs_cache_inode_set_cookie(inode, file);
+#endif
 	}
 
 	return 0;
@@ -238,8 +243,9 @@ v9fs_file_write(struct file *filp, const char __user * data,
 	if (total > 0) {
 		pg_start = origin >> PAGE_CACHE_SHIFT;
 		pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
-		invalidate_inode_pages2_range(inode->i_mapping, pg_start,
-								pg_end);
+		if (inode->i_mapping && inode->i_mapping->nrpages)
+			invalidate_inode_pages2_range(inode->i_mapping,
+						      pg_start, pg_end);
 		*offset += total;
 		i_size_write(inode, i_size_read(inode) + total);
 		inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index f3bfa87926bd..5947628aefef 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -40,6 +40,7 @@
 #include "v9fs.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
+#include "cache.h"
 
 static const struct inode_operations v9fs_dir_inode_operations;
 static const struct inode_operations v9fs_dir_inode_operations_ext;
@@ -197,6 +198,39 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
 	wstat->extension = NULL;
 }
 
+#ifdef CONFIG_9P_FSCACHE
+/**
+ * v9fs_alloc_inode - helper function to allocate an inode
+ * This callback is executed before setting up the inode so that we
+ * can associate a vcookie with each inode.
+ *
+ */
+
+struct inode *v9fs_alloc_inode(struct super_block *sb)
+{
+	struct v9fs_cookie *vcookie;
+	vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache,
+							 GFP_KERNEL);
+	if (!vcookie)
+		return NULL;
+
+	vcookie->fscache = NULL;
+	vcookie->qid = NULL;
+	spin_lock_init(&vcookie->lock);
+	return &vcookie->inode;
+}
+
+/**
+ * v9fs_destroy_inode - destroy an inode
+ *
+ */
+
+void v9fs_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
+}
+#endif
+
 /**
  * v9fs_get_inode - helper function to setup an inode
  * @sb: superblock
@@ -326,6 +360,21 @@ error:
 }
 */
 
+
+/**
+ * v9fs_clear_inode - release an inode
+ * @inode: inode to release
+ *
+ */
+void v9fs_clear_inode(struct inode *inode)
+{
+	filemap_fdatawrite(inode->i_mapping);
+
+#ifdef CONFIG_9P_FSCACHE
+	v9fs_cache_inode_put_cookie(inode);
+#endif
+}
+
 /**
  * v9fs_inode_from_fid - populate an inode by issuing a attribute request
  * @v9ses: session information
@@ -356,8 +405,14 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
 
 	v9fs_stat2inode(st, ret, sb);
 	ret->i_ino = v9fs_qid2ino(&st->qid);
+
+#ifdef CONFIG_9P_FSCACHE
+	v9fs_vcookie_set_qid(ret, &st->qid);
+	v9fs_cache_inode_get_cookie(ret);
+#endif
 	p9stat_free(st);
 	kfree(st);
+
 	return ret;
 
 error:
@@ -751,7 +806,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
 	err = -EPERM;
 	v9ses = v9fs_inode2v9ses(dentry->d_inode);
-	if (v9ses->cache == CACHE_LOOSE)
+	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
 		return simple_getattr(mnt, dentry, stat);
 
 	fid = v9fs_fid_lookup(dentry);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 8961f1a8f668..14a86448572c 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -44,20 +44,8 @@
 #include "v9fs_vfs.h"
 #include "fid.h"
 
-static void v9fs_clear_inode(struct inode *);
 static const struct super_operations v9fs_super_ops;
 
-/**
- * v9fs_clear_inode - release an inode
- * @inode: inode to release
- *
- */
-
-static void v9fs_clear_inode(struct inode *inode)
-{
-	filemap_fdatawrite(inode->i_mapping);
-}
-
 /**
  * v9fs_set_super - set the superblock
  * @s: super block
@@ -220,6 +208,10 @@ v9fs_umount_begin(struct super_block *sb)
 }
 
 static const struct super_operations v9fs_super_ops = {
+#ifdef CONFIG_9P_FSCACHE
+	.alloc_inode = v9fs_alloc_inode,
+	.destroy_inode = v9fs_destroy_inode,
+#endif
 	.statfs = simple_statfs,
 	.clear_inode = v9fs_clear_inode,
 	.show_options = generic_show_options,
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index b77c1478c99f..a7fb54808a23 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -38,6 +38,8 @@
  * @P9_DEBUG_SLABS: memory management tracing
  * @P9_DEBUG_FCALL: verbose dump of protocol messages
  * @P9_DEBUG_FID: fid allocation/deallocation tracking
+ * @P9_DEBUG_PKT: packet marshalling/unmarshalling
+ * @P9_DEBUG_FSC: FS-cache tracing
  *
  * These flags are passed at mount time to turn on various levels of
  * verbosity and tracing which will be output to the system logs.
@@ -54,6 +56,7 @@ enum p9_debug_flags {
 	P9_DEBUG_FCALL =	(1<<8),
 	P9_DEBUG_FID =		(1<<9),
 	P9_DEBUG_PKT =		(1<<10),
+	P9_DEBUG_FSC =		(1<<11),
 };
 
 #ifdef CONFIG_NET_9P_DEBUG
-- 
cgit v1.2.3


From 1b9894f342a39601bb0420b7b8c7e445670c1b51 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 21 Sep 2009 11:12:03 -0700
Subject: serial core: fix new kernel-doc warnings

Fix new kernel-doc warnings in serial_core.[hc] files.

  Warning(include/linux/serial_core.h:485): No description found for parameter 'uport'
  Warning(include/linux/serial_core.h:485): Excess function parameter 'port' description in 'uart_handle_dcd_change'
  Warning(include/linux/serial_core.h:511): No description found for parameter 'uport'
  Warning(include/linux/serial_core.h:511): Excess function parameter 'port' description in 'uart_handle_cts_change'
  Warning(drivers/serial/serial_core.c:2437): No description found for parameter 'uport'
  Warning(drivers/serial/serial_core.c:2437): Excess function parameter 'port' description in 'uart_add_one_port'
  Warning(drivers/serial/serial_core.c:2509): No description found for parameter 'uport'
  Warning(drivers/serial/serial_core.c:2509): Excess function parameter 'port' description in 'uart_remove_one_port'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/serial_core.c | 4 ++--
 include/linux/serial_core.h  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 2514d00c0f6f..1689bda1d13b 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -2426,7 +2426,7 @@ struct tty_driver *uart_console_device(struct console *co, int *index)
 /**
  *	uart_add_one_port - attach a driver-defined port structure
  *	@drv: pointer to the uart low level driver structure for this port
- *	@port: uart port structure to use for this port.
+ *	@uport: uart port structure to use for this port.
  *
  *	This allows the driver to register its own uart_port structure
  *	with the core driver.  The main purpose is to allow the low
@@ -2499,7 +2499,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 /**
  *	uart_remove_one_port - detach a driver defined port structure
  *	@drv: pointer to the uart low level driver structure for this port
- *	@port: uart port structure for this port
+ *	@uport: uart port structure for this port
  *
  *	This unhooks (and hangs up) the specified port structure from the
  *	core driver.  No further calls will be made to the low-level code
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index d58e460844dd..fe661afe0713 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -477,7 +477,7 @@ static inline int uart_handle_break(struct uart_port *port)
 
 /**
  *	uart_handle_dcd_change - handle a change of carrier detect state
- *	@port: uart_port structure for the open port
+ *	@uport: uart_port structure for the open port
  *	@status: new carrier detect status, nonzero if active
  */
 static inline void
@@ -503,7 +503,7 @@ uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 
 /**
  *	uart_handle_cts_change - handle a change of clear-to-send state
- *	@port: uart_port structure for the open port
+ *	@uport: uart_port structure for the open port
  *	@status: new clear to send status, nonzero if active
  */
 static inline void
-- 
cgit v1.2.3


From 97363c6a4f93a20380b4a9e11f35e27fed68a517 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 23 Sep 2009 14:36:38 -0400
Subject: sunrpc: xdr_xcode_hyper helpers cannot presume 64-bit alignment

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 7da466ba4b0d..f5cc0898bc53 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -11,6 +11,7 @@
 
 #include <linux/uio.h>
 #include <asm/byteorder.h>
+#include <asm/unaligned.h>
 #include <linux/scatterlist.h>
 
 /*
@@ -117,14 +118,14 @@ static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int le
 static inline __be32 *
 xdr_encode_hyper(__be32 *p, __u64 val)
 {
-	*(__be64 *)p = cpu_to_be64(val);
+	put_unaligned_be64(val, p);
 	return p + 2;
 }
 
 static inline __be32 *
 xdr_decode_hyper(__be32 *p, __u64 *valp)
 {
-	*valp = be64_to_cpup((__be64 *)p);
+	*valp = get_unaligned_be64(p);
 	return p + 2;
 }
 
-- 
cgit v1.2.3


From 29c337a034b5526e80a785409d15d3b7c7edecf4 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:26 -0600
Subject: cpumask: remove obsolete node_to_cpumask now everyone uses
 cpumask_of_node

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/alpha/include/asm/topology.h          | 17 -----------------
 arch/ia64/include/asm/topology.h           |  1 -
 arch/mips/include/asm/mach-ip27/topology.h |  1 -
 arch/mips/sgi-ip27/ip27-memory.c           |  2 +-
 arch/powerpc/include/asm/topology.h        |  5 -----
 arch/sh/include/asm/topology.h             |  1 -
 arch/sparc/include/asm/topology_64.h       | 14 --------------
 include/asm-generic/topology.h             | 17 -----------------
 8 files changed, 1 insertion(+), 57 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h
index f5bd6cd4b3bc..36b3a30ba0e5 100644
--- a/arch/alpha/include/asm/topology.h
+++ b/arch/alpha/include/asm/topology.h
@@ -22,23 +22,6 @@ static inline int cpu_to_node(int cpu)
 	return node;
 }
 
-static inline cpumask_t node_to_cpumask(int node)
-{
-	cpumask_t node_cpu_mask = CPU_MASK_NONE;
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		if (cpu_to_node(cpu) == node)
-			cpu_set(cpu, node_cpu_mask);
-	}
-
-#ifdef DEBUG_NUMA
-	printk("node %d: cpu_mask: %016lx\n", node, node_cpu_mask);
-#endif
-
-	return node_cpu_mask;
-}
-
 extern struct cpumask node_to_cpumask_map[];
 /* FIXME: This is dumb, recalculating every time.  But simple. */
 static const struct cpumask *cpumask_of_node(int node)
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index d0141fbf51d0..e85da7f1db56 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -33,7 +33,6 @@
 /*
  * Returns a bitmask of CPUs on Node 'node'.
  */
-#define node_to_cpumask(node) (node_to_cpu_mask[node])
 #define cpumask_of_node(node) (&node_to_cpu_mask[node])
 
 /*
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 697244a7d39e..f6837422fe65 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -24,7 +24,6 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
 
 #define cpu_to_node(cpu)	(sn_cpu_info[(cpu)].p_nodeid)
 #define parent_node(node)	(node)
-#define node_to_cpumask(node)	(hub_data(node)->h_cpus)
 #define cpumask_of_node(node)	(&hub_data(node)->h_cpus)
 struct pci_bus;
 extern int pcibus_to_node(struct pci_bus *);
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 060d853d7b35..f61c164d1e67 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -421,7 +421,7 @@ static void __init node_mem_init(cnodeid_t node)
 
 /*
  * A node with nothing.  We use it to avoid any special casing in
- * node_to_cpumask
+ * cpumask_of_node
  */
 static struct node_data null_node = {
 	.hub = {
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 9a3300d6a27a..829bf3c9b689 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -17,11 +17,6 @@ static inline int cpu_to_node(int cpu)
 
 #define parent_node(node)	(node)
 
-static inline cpumask_t node_to_cpumask(int node)
-{
-	return numa_cpumask_lookup_table[node];
-}
-
 #define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
 
 int of_node_to_nid(struct device_node *device);
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index f8c40cc65054..65e7bd2f2240 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -31,7 +31,6 @@
 #define cpu_to_node(cpu)	((void)(cpu),0)
 #define parent_node(node)	((void)(node),0)
 
-#define node_to_cpumask(node)	((void)node, cpu_online_map)
 #define cpumask_of_node(node)	((void)node, cpu_online_mask)
 
 #define pcibus_to_node(bus)	((void)(bus), -1)
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 26cd25c08399..75752e106f47 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -12,22 +12,8 @@ static inline int cpu_to_node(int cpu)
 
 #define parent_node(node)	(node)
 
-static inline cpumask_t node_to_cpumask(int node)
-{
-	return numa_cpumask_lookup_table[node];
-}
 #define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
 
-/*
- * Returns a pointer to the cpumask of CPUs on Node 'node'.
- * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
- */
-#define node_to_cpumask_ptr(v, node)		\
-		cpumask_t *v = &(numa_cpumask_lookup_table[node])
-
-#define node_to_cpumask_ptr_next(v, node)	\
-			   v = &(numa_cpumask_lookup_table[node])
-
 struct pci_bus;
 #ifdef CONFIG_PCI
 extern int pcibus_to_node(struct pci_bus *pbus);
diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
index 88bada2ebc4b..510df36dd5d4 100644
--- a/include/asm-generic/topology.h
+++ b/include/asm-generic/topology.h
@@ -37,9 +37,6 @@
 #ifndef parent_node
 #define parent_node(node)	((void)(node),0)
 #endif
-#ifndef node_to_cpumask
-#define node_to_cpumask(node)	((void)node, cpu_online_map)
-#endif
 #ifndef cpumask_of_node
 #define cpumask_of_node(node)	((void)node, cpu_online_mask)
 #endif
@@ -55,18 +52,4 @@
 
 #endif	/* CONFIG_NUMA */
 
-/*
- * returns pointer to cpumask for specified node
- * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
- */
-#ifndef node_to_cpumask_ptr
-
-#define	node_to_cpumask_ptr(v, node) 					\
-		cpumask_t _##v = node_to_cpumask(node);			\
-		const cpumask_t *v = &_##v
-
-#define node_to_cpumask_ptr_next(v, node)				\
-			  _##v = node_to_cpumask(node)
-#endif
-
 #endif /* _ASM_GENERIC_TOPOLOGY_H */
-- 
cgit v1.2.3


From a0219d948dd712561817b0d7c95fd2f10b698203 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:35 -0600
Subject: cpumask: remove dangerous CPU_MASK_ALL_PTR

(Thanks to Al Viro for reminding me of this, via Ingo)

CPU_MASK_ALL is the (deprecated) "all bits set" cpumask, defined as so:

	#define CPU_MASK_ALL (cpumask_t) { { ... } }

Taking the address of such a temporary is questionable at best,
unfortunately 321a8e9d (cpumask: add CPU_MASK_ALL_PTR macro) added
CPU_MASK_ALL_PTR:

	#define CPU_MASK_ALL_PTR (&CPU_MASK_ALL)

Which formalizes this practice.  One day gcc could bite us over this
usage (though we seem to have gotten away with it so far).

Now all callers are removed, we kill it.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Mike Travis <travis@sgi.com>
---
 include/linux/cpumask.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 9b1d458aac6e..c0ab3588129d 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -324,8 +324,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
 } }
 
-#define CPU_MASK_ALL_PTR	(&CPU_MASK_ALL)
-
 #else
 
 #define CPU_MASK_ALL							\
@@ -336,7 +334,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 
 /* cpu_mask_all is in init/main.c */
 extern cpumask_t cpu_mask_all;
-#define CPU_MASK_ALL_PTR	(&cpu_mask_all)
 
 #endif
 
-- 
cgit v1.2.3


From 72d78d05cbaa69f2a32f5f9d65a4551ba0da571f Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:36 -0600
Subject: cpumask: remove unused cpu_mask_all

It's only defined for NR_CPUS > BITS_PER_LONG; cpu_all_mask is always
defined (and const).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/cpumask.h | 3 ---
 init/main.c             | 5 -----
 2 files changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index c0ab3588129d..dbb8367ecf56 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -332,9 +332,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
 } }
 
-/* cpu_mask_all is in init/main.c */
-extern cpumask_t cpu_mask_all;
-
 #endif
 
 #define CPU_MASK_NONE							\
diff --git a/init/main.c b/init/main.c
index 6107223124e4..51695cee1864 100644
--- a/init/main.c
+++ b/init/main.c
@@ -360,11 +360,6 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
 
 #else
 
-#if NR_CPUS > BITS_PER_LONG
-cpumask_t cpu_mask_all __read_mostly = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_mask_all);
-#endif
-
 /* Setup number of possible processor ids */
 int nr_cpu_ids __read_mostly = NR_CPUS;
 EXPORT_SYMBOL(nr_cpu_ids);
-- 
cgit v1.2.3


From ef79f8e191722dbc1fc33bdfc448f572266c37e9 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:37 -0600
Subject: cpumask: remove unused mask field from struct irqaction.

Up until 1.1.83, the primitive human tribes used struct sigaction for
interrupts.  The sa_mask field was overloaded to hold a pointer to the
name.

When someone created the new "struct irqaction" they carried across
the "mask" field as a kind of ancestor worship: the fact that it was
unused makes clear its spiritual significance.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/interrupt.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 8e9e151f811e..894ed7180bff 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -97,7 +97,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
 struct irqaction {
 	irq_handler_t handler;
 	unsigned long flags;
-	cpumask_t mask;
 	const char *name;
 	void *dev_id;
 	struct irqaction *next;
-- 
cgit v1.2.3


From 144e2ce6115c0a1ee4cb5c935360ea4e2966b0ce Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
Date: Mon, 15 Jun 2009 12:16:54 +0900
Subject: cpumask: Remove mask field from comments

By 7be23e278f, mask field was deleted by irqaction. However, it was not
deleted from comment.

Signed-off-by: Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
CC: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/interrupt.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 894ed7180bff..b78cf8194957 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -84,7 +84,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
  * struct irqaction - per interrupt action descriptor
  * @handler:	interrupt handler function
  * @flags:	flags (see IRQF_* above)
- * @mask:	no comment as it is useless and about to be removed
  * @name:	name of the device
  * @dev_id:	cookie to identify the device
  * @next:	pointer to the next irqaction for shared interrupts
-- 
cgit v1.2.3


From e0ad955680878998ff7dc51ce06ddad12260423a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:38 -0600
Subject: cpumask: don't define set_cpus_allowed() if CONFIG_CPUMASK_OFFSTACK=y

You're not supposed to pass cpumasks on the stack in that case.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/sched.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index cbf2a3b46280..848d1f20086e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1817,10 +1817,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 	return 0;
 }
 #endif
+
+#ifndef CONFIG_CPUMASK_OFFSTACK
 static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 {
 	return set_cpus_allowed_ptr(p, &new_mask);
 }
+#endif
 
 /*
  * Architectures can set this to 1 if they have specified
-- 
cgit v1.2.3


From fe71a3c7dc8cfe0f239c04b4fc6501f4aa56aa0a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:40 -0600
Subject: cpumask: remove the deprecated smp_call_function_mask()

Everyone is now using smp_call_function_many().

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/smp.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 9e3d8af09207..39c64bae776d 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -73,15 +73,6 @@ int smp_call_function(void(*func)(void *info), void *info, int wait);
 void smp_call_function_many(const struct cpumask *mask,
 			    void (*func)(void *info), void *info, bool wait);
 
-/* Deprecated: Use smp_call_function_many which takes a pointer to the mask. */
-static inline int
-smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
-		       int wait)
-{
-	smp_call_function_many(&mask, func, info, wait);
-	return 0;
-}
-
 void __smp_call_function_single(int cpuid, struct call_single_data *data,
 				int wait);
 
@@ -144,8 +135,6 @@ static inline int up_smp_call_function(void (*func)(void *), void *info)
 static inline void smp_send_reschedule(int cpu) { }
 #define num_booting_cpus()			1
 #define smp_prepare_boot_cpu()			do {} while (0)
-#define smp_call_function_mask(mask, func, info, wait) \
-			(up_smp_call_function(func, info))
 #define smp_call_function_many(mask, func, info, wait) \
 			(up_smp_call_function(func, info))
 static inline void init_call_single_data(void)
-- 
cgit v1.2.3


From 6f401420e2822c24c36e6e1c657f6e7f7f777a93 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:40 -0600
Subject: cpumask: remove obsolete topology_core_siblings and
 topology_thread_siblings: core

There were replaced by topology_core_cpumask and topology_thread_cpumask.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/topology.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 809b26c07090..fc0bf3edeb67 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -211,12 +211,6 @@ int arch_update_cpu_topology(void);
 #ifndef topology_core_id
 #define topology_core_id(cpu)			((void)(cpu), 0)
 #endif
-#ifndef topology_thread_siblings
-#define topology_thread_siblings(cpu)		cpumask_of_cpu(cpu)
-#endif
-#ifndef topology_core_siblings
-#define topology_core_siblings(cpu)		cpumask_of_cpu(cpu)
-#endif
 #ifndef topology_thread_cpumask
 #define topology_thread_cpumask(cpu)		cpumask_of(cpu)
 #endif
-- 
cgit v1.2.3


From 4b805b17382c11a8b1c9bb8053ce9d1dcde0701a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:52 -0600
Subject: cpumask: remove unused deprecated functions, avoid accusations of
 insanity

We're not forcing removal of the old cpu_ functions, but we might as
well delete the now-unused ones.

Especially CPUMASK_ALLOC and friends.  I actually got a phone call (!)
from a hacker who thought I had introduced them as the new cpumask
API.  He seemed bewildered that I had lost all taste.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: benh@kernel.crashing.org
---
 include/linux/cpumask.h | 112 +-----------------------------------------------
 1 file changed, 1 insertion(+), 111 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index dbb8367ecf56..e162d13c65ab 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -15,10 +15,6 @@
  * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c.
  * For details of cpulist_scnprintf() and cpulist_parse(), see
  * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c.
- * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c
- * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c.
- * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c.
- * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c.
  *
  * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
  * Note: The alternate operations with the suffix "_nr" are used
@@ -47,22 +43,17 @@
  * void cpus_or(dst, src1, src2)	dst = src1 | src2  [union]
  * void cpus_xor(dst, src1, src2)	dst = src1 ^ src2
  * int cpus_andnot(dst, src1, src2)	dst = src1 & ~src2
- * void cpus_complement(dst, src)	dst = ~src
  *
  * int cpus_equal(mask1, mask2)		Does mask1 == mask2?
  * int cpus_intersects(mask1, mask2)	Do mask1 and mask2 intersect?
  * int cpus_subset(mask1, mask2)	Is mask1 a subset of mask2?
  * int cpus_empty(mask)			Is mask empty (no bits sets)?
- * int cpus_full(mask)			Is mask full (all bits sets)?
  * int cpus_weight(mask)		Hamming weigh - number of set bits
- * int cpus_weight_nr(mask)		Same using nr_cpu_ids instead of NR_CPUS
  *
- * void cpus_shift_right(dst, src, n)	Shift right
  * void cpus_shift_left(dst, src, n)	Shift left
  *
  * int first_cpu(mask)			Number lowest set bit, or NR_CPUS
  * int next_cpu(cpu, mask)		Next cpu past 'cpu', or NR_CPUS
- * int next_cpu_nr(cpu, mask)		Next cpu past 'cpu', or nr_cpu_ids
  *
  * cpumask_t cpumask_of_cpu(cpu)	Return cpumask with bit 'cpu' set
  *					(can be used as an lvalue)
@@ -70,45 +61,10 @@
  * CPU_MASK_NONE			Initializer - no bits set
  * unsigned long *cpus_addr(mask)	Array of unsigned long's in mask
  *
- * CPUMASK_ALLOC kmalloc's a structure that is a composite of many cpumask_t
- * variables, and CPUMASK_PTR provides pointers to each field.
- *
- * The structure should be defined something like this:
- * struct my_cpumasks {
- *	cpumask_t mask1;
- *	cpumask_t mask2;
- * };
- *
- * Usage is then:
- *	CPUMASK_ALLOC(my_cpumasks);
- *	CPUMASK_PTR(mask1, my_cpumasks);
- *	CPUMASK_PTR(mask2, my_cpumasks);
- *
- *	--- DO NOT reference cpumask_t pointers until this check ---
- *	if (my_cpumasks == NULL)
- *		"kmalloc failed"...
- *
- * References are now pointers to the cpumask_t variables (*mask1, ...)
- *
- *if NR_CPUS > BITS_PER_LONG
- *   CPUMASK_ALLOC(m)			Declares and allocates struct m *m =
- *						kmalloc(sizeof(*m), GFP_KERNEL)
- *   CPUMASK_FREE(m)			Macro for kfree(m)
- *else
- *   CPUMASK_ALLOC(m)			Declares struct m _m, *m = &_m
- *   CPUMASK_FREE(m)			Nop
- *endif
- *   CPUMASK_PTR(v, m)			Declares cpumask_t *v = &(m->v)
- * ------------------------------------------------------------------------
- *
  * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
  * int cpumask_parse_user(ubuf, ulen, mask)	Parse ascii string as cpumask
  * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
  * int cpulist_parse(buf, map)		Parse ascii string as cpulist
- * int cpu_remap(oldbit, old, new)	newbit = map(old, new)(oldbit)
- * void cpus_remap(dst, src, old, new)	*dst = map(old, new)(src)
- * void cpus_onto(dst, orig, relmap)	*dst = orig relative to relmap
- * void cpus_fold(dst, orig, sz)	dst bits = orig bits mod sz
  *
  * for_each_cpu_mask(cpu, mask)		for-loop cpu over mask using NR_CPUS
  * for_each_cpu_mask_nr(cpu, mask)	for-loop cpu over mask using nr_cpu_ids
@@ -142,7 +98,6 @@
 #include <linux/bitmap.h>
 
 typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
-extern cpumask_t _unused_cpumask_arg_;
 
 #ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
@@ -207,13 +162,6 @@ static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
 	return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
 }
 
-#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS)
-static inline void __cpus_complement(cpumask_t *dstp,
-					const cpumask_t *srcp, int nbits)
-{
-	bitmap_complement(dstp->bits, srcp->bits, nbits);
-}
-
 #define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
 static inline int __cpus_equal(const cpumask_t *src1p,
 					const cpumask_t *src2p, int nbits)
@@ -241,26 +189,12 @@ static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
 	return bitmap_empty(srcp->bits, nbits);
 }
 
-#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS)
-static inline int __cpus_full(const cpumask_t *srcp, int nbits)
-{
-	return bitmap_full(srcp->bits, nbits);
-}
-
 #define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
 static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
 {
 	return bitmap_weight(srcp->bits, nbits);
 }
 
-#define cpus_shift_right(dst, src, n) \
-			__cpus_shift_right(&(dst), &(src), (n), NR_CPUS)
-static inline void __cpus_shift_right(cpumask_t *dstp,
-					const cpumask_t *srcp, int n, int nbits)
-{
-	bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
-}
-
 #define cpus_shift_left(dst, src, n) \
 			__cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
 static inline void __cpus_shift_left(cpumask_t *dstp,
@@ -346,46 +280,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 
 #define cpus_addr(src) ((src).bits)
 
-#if NR_CPUS > BITS_PER_LONG
-#define	CPUMASK_ALLOC(m)	struct m *m = kmalloc(sizeof(*m), GFP_KERNEL)
-#define	CPUMASK_FREE(m)		kfree(m)
-#else
-#define	CPUMASK_ALLOC(m)	struct m _m, *m = &_m
-#define	CPUMASK_FREE(m)
-#endif
-#define	CPUMASK_PTR(v, m) 	cpumask_t *v = &(m->v)
-
-#define cpu_remap(oldbit, old, new) \
-		__cpu_remap((oldbit), &(old), &(new), NR_CPUS)
-static inline int __cpu_remap(int oldbit,
-		const cpumask_t *oldp, const cpumask_t *newp, int nbits)
-{
-	return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits);
-}
-
-#define cpus_remap(dst, src, old, new) \
-		__cpus_remap(&(dst), &(src), &(old), &(new), NR_CPUS)
-static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp,
-		const cpumask_t *oldp, const cpumask_t *newp, int nbits)
-{
-	bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits);
-}
-
-#define cpus_onto(dst, orig, relmap) \
-		__cpus_onto(&(dst), &(orig), &(relmap), NR_CPUS)
-static inline void __cpus_onto(cpumask_t *dstp, const cpumask_t *origp,
-		const cpumask_t *relmapp, int nbits)
-{
-	bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits);
-}
-
-#define cpus_fold(dst, orig, sz) \
-		__cpus_fold(&(dst), &(orig), sz, NR_CPUS)
-static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp,
-		int sz, int nbits)
-{
-	bitmap_fold(dstp->bits, origp->bits, sz, nbits);
-}
 #endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 
 #if NR_CPUS == 1
@@ -419,18 +313,14 @@ int __any_online_cpu(const cpumask_t *mask);
 #ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 #if NR_CPUS <= 64
 
-#define next_cpu_nr(n, src)		next_cpu(n, src)
-#define cpus_weight_nr(cpumask)		cpus_weight(cpumask)
 #define for_each_cpu_mask_nr(cpu, mask)	for_each_cpu_mask(cpu, mask)
 
 #else /* NR_CPUS > 64 */
 
 int __next_cpu_nr(int n, const cpumask_t *srcp);
-#define next_cpu_nr(n, src)	__next_cpu_nr((n), &(src))
-#define cpus_weight_nr(cpumask)	__cpus_weight(&(cpumask), nr_cpu_ids)
 #define for_each_cpu_mask_nr(cpu, mask)			\
 	for ((cpu) = -1;				\
-		(cpu) = next_cpu_nr((cpu), (mask)),	\
+		(cpu) = __next_cpu_nr((cpu), &(mask)),	\
 		(cpu) < nr_cpu_ids; )
 
 #endif /* NR_CPUS > 64 */
-- 
cgit v1.2.3


From 6ba2ef7baac23a5d9bb85e28b882d16b439a2293 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:53 -0600
Subject: cpumask: Move deprecated functions to end of header.

The new ones have pretty kerneldoc.  Move the old ones to the end to
avoid confusing people.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: benh@kernel.crashing.org
---
 include/linux/cpumask.h | 593 ++++++++++++++++++++----------------------------
 1 file changed, 252 insertions(+), 341 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index e162d13c65ab..789cf5f920ce 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -3,328 +3,37 @@
 
 /*
  * Cpumasks provide a bitmap suitable for representing the
- * set of CPU's in a system, one bit position per CPU number.
- *
- * The new cpumask_ ops take a "struct cpumask *"; the old ones
- * use cpumask_t.
- *
- * See detailed comments in the file linux/bitmap.h describing the
- * data type on which these cpumasks are based.
- *
- * For details of cpumask_scnprintf() and cpumask_parse_user(),
- * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c.
- * For details of cpulist_scnprintf() and cpulist_parse(), see
- * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c.
- *
- * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
- * Note: The alternate operations with the suffix "_nr" are used
- *       to limit the range of the loop to nr_cpu_ids instead of
- *       NR_CPUS when NR_CPUS > 64 for performance reasons.
- *       If NR_CPUS is <= 64 then most assembler bitmask
- *       operators execute faster with a constant range, so
- *       the operator will continue to use NR_CPUS.
- *
- *       Another consideration is that nr_cpu_ids is initialized
- *       to NR_CPUS and isn't lowered until the possible cpus are
- *       discovered (including any disabled cpus).  So early uses
- *       will span the entire range of NR_CPUS.
- * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
- *
- * The obsolescent cpumask operations are:
- *
- * void cpu_set(cpu, mask)		turn on bit 'cpu' in mask
- * void cpu_clear(cpu, mask)		turn off bit 'cpu' in mask
- * void cpus_setall(mask)		set all bits
- * void cpus_clear(mask)		clear all bits
- * int cpu_isset(cpu, mask)		true iff bit 'cpu' set in mask
- * int cpu_test_and_set(cpu, mask)	test and set bit 'cpu' in mask
- *
- * int cpus_and(dst, src1, src2)	dst = src1 & src2  [intersection]
- * void cpus_or(dst, src1, src2)	dst = src1 | src2  [union]
- * void cpus_xor(dst, src1, src2)	dst = src1 ^ src2
- * int cpus_andnot(dst, src1, src2)	dst = src1 & ~src2
- *
- * int cpus_equal(mask1, mask2)		Does mask1 == mask2?
- * int cpus_intersects(mask1, mask2)	Do mask1 and mask2 intersect?
- * int cpus_subset(mask1, mask2)	Is mask1 a subset of mask2?
- * int cpus_empty(mask)			Is mask empty (no bits sets)?
- * int cpus_weight(mask)		Hamming weigh - number of set bits
- *
- * void cpus_shift_left(dst, src, n)	Shift left
- *
- * int first_cpu(mask)			Number lowest set bit, or NR_CPUS
- * int next_cpu(cpu, mask)		Next cpu past 'cpu', or NR_CPUS
- *
- * cpumask_t cpumask_of_cpu(cpu)	Return cpumask with bit 'cpu' set
- *					(can be used as an lvalue)
- * CPU_MASK_ALL				Initializer - all bits set
- * CPU_MASK_NONE			Initializer - no bits set
- * unsigned long *cpus_addr(mask)	Array of unsigned long's in mask
- *
- * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
- * int cpumask_parse_user(ubuf, ulen, mask)	Parse ascii string as cpumask
- * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
- * int cpulist_parse(buf, map)		Parse ascii string as cpulist
- *
- * for_each_cpu_mask(cpu, mask)		for-loop cpu over mask using NR_CPUS
- * for_each_cpu_mask_nr(cpu, mask)	for-loop cpu over mask using nr_cpu_ids
- *
- * int num_online_cpus()		Number of online CPUs
- * int num_possible_cpus()		Number of all possible CPUs
- * int num_present_cpus()		Number of present CPUs
- *
- * int cpu_online(cpu)			Is some cpu online?
- * int cpu_possible(cpu)		Is some cpu possible?
- * int cpu_present(cpu)			Is some cpu present (can schedule)?
- *
- * int any_online_cpu(mask)		First online cpu in mask
- *
- * for_each_possible_cpu(cpu)		for-loop cpu over cpu_possible_map
- * for_each_online_cpu(cpu)		for-loop cpu over cpu_online_map
- * for_each_present_cpu(cpu)		for-loop cpu over cpu_present_map
- *
- * Subtlety:
- * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway)
- *    to generate slightly worse code.  Note for example the additional
- *    40 lines of assembly code compiling the "for each possible cpu"
- *    loops buried in the disk_stat_read() macros calls when compiling
- *    drivers/block/genhd.c (arch i386, CONFIG_SMP=y).  So use a simple
- *    one-line #define for cpu_isset(), instead of wrapping an inline
- *    inside a macro, the way we do the other calls.
+ * set of CPU's in a system, one bit position per CPU number.  In general,
+ * only nr_cpu_ids (<= NR_CPUS) bits are valid.
  */
-
 #include <linux/kernel.h>
 #include <linux/threads.h>
 #include <linux/bitmap.h>
 
 typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
 
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
-static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
-{
-	set_bit(cpu, dstp->bits);
-}
-
-#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
-static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
-{
-	clear_bit(cpu, dstp->bits);
-}
-
-#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
-static inline void __cpus_setall(cpumask_t *dstp, int nbits)
-{
-	bitmap_fill(dstp->bits, nbits);
-}
-
-#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
-static inline void __cpus_clear(cpumask_t *dstp, int nbits)
-{
-	bitmap_zero(dstp->bits, nbits);
-}
-
-/* No static inline type checking - see Subtlety (1) above. */
-#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
-
-#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
-static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
-{
-	return test_and_set_bit(cpu, addr->bits);
-}
-
-#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
-static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
-static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
-static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_andnot(dst, src1, src2) \
-				__cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
-static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_equal(const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	return bitmap_equal(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_intersects(const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	return bitmap_intersects(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_subset(const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	return bitmap_subset(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
-static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
-{
-	return bitmap_empty(srcp->bits, nbits);
-}
-
-#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
-static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
-{
-	return bitmap_weight(srcp->bits, nbits);
-}
-
-#define cpus_shift_left(dst, src, n) \
-			__cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
-static inline void __cpus_shift_left(cpumask_t *dstp,
-					const cpumask_t *srcp, int n, int nbits)
-{
-	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
-}
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
-
 /**
- * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
- * @bitmap: the bitmap
+ * cpumask_bits - get the bits in a cpumask
+ * @maskp: the struct cpumask *
  *
- * There are a few places where cpumask_var_t isn't appropriate and
- * static cpumasks must be used (eg. very early boot), yet we don't
- * expose the definition of 'struct cpumask'.
- *
- * This does the conversion, and can be used as a constant initializer.
+ * You should only assume nr_cpu_ids bits of this mask are valid.  This is
+ * a macro so it's const-correct.
  */
-#define to_cpumask(bitmap)						\
-	((struct cpumask *)(1 ? (bitmap)				\
-			    : (void *)sizeof(__check_is_bitmap(bitmap))))
-
-static inline int __check_is_bitmap(const unsigned long *bitmap)
-{
-	return 1;
-}
-
-/*
- * Special-case data structure for "single bit set only" constant CPU masks.
- *
- * We pre-generate all the 64 (or 32) possible bit positions, with enough
- * padding to the left and the right, and return the constant pointer
- * appropriately offset.
- */
-extern const unsigned long
-	cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
-
-static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
-{
-	const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
-	p -= cpu / BITS_PER_LONG;
-	return to_cpumask(p);
-}
-
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-/*
- * In cases where we take the address of the cpumask immediately,
- * gcc optimizes it out (it's a constant) and there's no huge stack
- * variable created:
- */
-#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu))
-
-
-#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
-
-#if NR_CPUS <= BITS_PER_LONG
-
-#define CPU_MASK_ALL							\
-(cpumask_t) { {								\
-	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
-} }
-
-#else
-
-#define CPU_MASK_ALL							\
-(cpumask_t) { {								\
-	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,			\
-	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
-} }
-
-#endif
-
-#define CPU_MASK_NONE							\
-(cpumask_t) { {								\
-	[0 ... BITS_TO_LONGS(NR_CPUS)-1] =  0UL				\
-} }
-
-#define CPU_MASK_CPU0							\
-(cpumask_t) { {								\
-	[0] =  1UL							\
-} }
-
-#define cpus_addr(src) ((src).bits)
-
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+#define cpumask_bits(maskp) ((maskp)->bits)
 
 #if NR_CPUS == 1
-
 #define nr_cpu_ids		1
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#define first_cpu(src)		({ (void)(src); 0; })
-#define next_cpu(n, src)	({ (void)(src); 1; })
-#define any_online_cpu(mask)	0
-#define for_each_cpu_mask(cpu, mask)	\
-	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
-#else /* NR_CPUS > 1 */
-
+#else
 extern int nr_cpu_ids;
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-int __first_cpu(const cpumask_t *srcp);
-int __next_cpu(int n, const cpumask_t *srcp);
-int __any_online_cpu(const cpumask_t *mask);
-
-#define first_cpu(src)		__first_cpu(&(src))
-#define next_cpu(n, src)	__next_cpu((n), &(src))
-#define any_online_cpu(mask) __any_online_cpu(&(mask))
-#define for_each_cpu_mask(cpu, mask)			\
-	for ((cpu) = -1;				\
-		(cpu) = next_cpu((cpu), (mask)),	\
-		(cpu) < NR_CPUS; )
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 #endif
 
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#if NR_CPUS <= 64
-
-#define for_each_cpu_mask_nr(cpu, mask)	for_each_cpu_mask(cpu, mask)
-
-#else /* NR_CPUS > 64 */
-
-int __next_cpu_nr(int n, const cpumask_t *srcp);
-#define for_each_cpu_mask_nr(cpu, mask)			\
-	for ((cpu) = -1;				\
-		(cpu) = __next_cpu_nr((cpu), &(mask)),	\
-		(cpu) < nr_cpu_ids; )
-
-#endif /* NR_CPUS > 64 */
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/* Assuming NR_CPUS is huge, a runtime limit is more efficient.  Also,
+ * not all bits may be allocated. */
+#define nr_cpumask_bits	nr_cpu_ids
+#else
+#define nr_cpumask_bits	NR_CPUS
+#endif
 
 /*
  * The following particular system cpumasks and operations manage
@@ -371,12 +80,6 @@ extern const struct cpumask *const cpu_online_mask;
 extern const struct cpumask *const cpu_present_mask;
 extern const struct cpumask *const cpu_active_mask;
 
-/* These strip const, as traditionally they weren't const. */
-#define cpu_possible_map	(*(cpumask_t *)cpu_possible_mask)
-#define cpu_online_map		(*(cpumask_t *)cpu_online_mask)
-#define cpu_present_map		(*(cpumask_t *)cpu_present_mask)
-#define cpu_active_map		(*(cpumask_t *)cpu_active_mask)
-
 #if NR_CPUS > 1
 #define num_online_cpus()	cpumask_weight(cpu_online_mask)
 #define num_possible_cpus()	cpumask_weight(cpu_possible_mask)
@@ -395,35 +98,6 @@ extern const struct cpumask *const cpu_active_mask;
 #define cpu_active(cpu)		((cpu) == 0)
 #endif
 
-#define cpu_is_offline(cpu)	unlikely(!cpu_online(cpu))
-
-/* These are the new versions of the cpumask operators: passed by pointer.
- * The older versions will be implemented in terms of these, then deleted. */
-#define cpumask_bits(maskp) ((maskp)->bits)
-
-#if NR_CPUS <= BITS_PER_LONG
-#define CPU_BITS_ALL						\
-{								\
-	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD	\
-}
-
-#else /* NR_CPUS > BITS_PER_LONG */
-
-#define CPU_BITS_ALL						\
-{								\
-	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,		\
-	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
-}
-#endif /* NR_CPUS > BITS_PER_LONG */
-
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/* Assuming NR_CPUS is huge, a runtime limit is more efficient.  Also,
- * not all bits may be allocated. */
-#define nr_cpumask_bits	nr_cpu_ids
-#else
-#define nr_cpumask_bits	NR_CPUS
-#endif
-
 /* verify cpu argument to cpumask_* operators */
 static inline unsigned int cpumask_check(unsigned int cpu)
 {
@@ -984,4 +658,241 @@ void set_cpu_active(unsigned int cpu, bool active);
 void init_cpu_present(const struct cpumask *src);
 void init_cpu_possible(const struct cpumask *src);
 void init_cpu_online(const struct cpumask *src);
+
+/**
+ * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
+ * @bitmap: the bitmap
+ *
+ * There are a few places where cpumask_var_t isn't appropriate and
+ * static cpumasks must be used (eg. very early boot), yet we don't
+ * expose the definition of 'struct cpumask'.
+ *
+ * This does the conversion, and can be used as a constant initializer.
+ */
+#define to_cpumask(bitmap)						\
+	((struct cpumask *)(1 ? (bitmap)				\
+			    : (void *)sizeof(__check_is_bitmap(bitmap))))
+
+static inline int __check_is_bitmap(const unsigned long *bitmap)
+{
+	return 1;
+}
+
+/*
+ * Special-case data structure for "single bit set only" constant CPU masks.
+ *
+ * We pre-generate all the 64 (or 32) possible bit positions, with enough
+ * padding to the left and the right, and return the constant pointer
+ * appropriately offset.
+ */
+extern const unsigned long
+	cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
+
+static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
+{
+	const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
+	p -= cpu / BITS_PER_LONG;
+	return to_cpumask(p);
+}
+
+#define cpu_is_offline(cpu)	unlikely(!cpu_online(cpu))
+
+#if NR_CPUS <= BITS_PER_LONG
+#define CPU_BITS_ALL						\
+{								\
+	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD	\
+}
+
+#else /* NR_CPUS > BITS_PER_LONG */
+
+#define CPU_BITS_ALL						\
+{								\
+	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,		\
+	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
+}
+#endif /* NR_CPUS > BITS_PER_LONG */
+
+/*
+ *
+ * From here down, all obsolete.  Use cpumask_ variants!
+ *
+ */
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
+/* These strip const, as traditionally they weren't const. */
+#define cpu_possible_map	(*(cpumask_t *)cpu_possible_mask)
+#define cpu_online_map		(*(cpumask_t *)cpu_online_mask)
+#define cpu_present_map		(*(cpumask_t *)cpu_present_mask)
+#define cpu_active_map		(*(cpumask_t *)cpu_active_mask)
+
+#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu))
+
+#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
+
+#if NR_CPUS <= BITS_PER_LONG
+
+#define CPU_MASK_ALL							\
+(cpumask_t) { {								\
+	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
+} }
+
+#else
+
+#define CPU_MASK_ALL							\
+(cpumask_t) { {								\
+	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,			\
+	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
+} }
+
+#endif
+
+#define CPU_MASK_NONE							\
+(cpumask_t) { {								\
+	[0 ... BITS_TO_LONGS(NR_CPUS)-1] =  0UL				\
+} }
+
+#define CPU_MASK_CPU0							\
+(cpumask_t) { {								\
+	[0] =  1UL							\
+} }
+
+#if NR_CPUS == 1
+#define first_cpu(src)		({ (void)(src); 0; })
+#define next_cpu(n, src)	({ (void)(src); 1; })
+#define any_online_cpu(mask)	0
+#define for_each_cpu_mask(cpu, mask)	\
+	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#else /* NR_CPUS > 1 */
+int __first_cpu(const cpumask_t *srcp);
+int __next_cpu(int n, const cpumask_t *srcp);
+int __any_online_cpu(const cpumask_t *mask);
+
+#define first_cpu(src)		__first_cpu(&(src))
+#define next_cpu(n, src)	__next_cpu((n), &(src))
+#define any_online_cpu(mask) __any_online_cpu(&(mask))
+#define for_each_cpu_mask(cpu, mask)			\
+	for ((cpu) = -1;				\
+		(cpu) = next_cpu((cpu), (mask)),	\
+		(cpu) < NR_CPUS; )
+#endif /* SMP */
+
+#if NR_CPUS <= 64
+
+#define for_each_cpu_mask_nr(cpu, mask)	for_each_cpu_mask(cpu, mask)
+
+#else /* NR_CPUS > 64 */
+
+int __next_cpu_nr(int n, const cpumask_t *srcp);
+#define for_each_cpu_mask_nr(cpu, mask)			\
+	for ((cpu) = -1;				\
+		(cpu) = __next_cpu_nr((cpu), &(mask)),	\
+		(cpu) < nr_cpu_ids; )
+
+#endif /* NR_CPUS > 64 */
+
+#define cpus_addr(src) ((src).bits)
+
+#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
+static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
+{
+	set_bit(cpu, dstp->bits);
+}
+
+#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
+static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
+{
+	clear_bit(cpu, dstp->bits);
+}
+
+#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
+static inline void __cpus_setall(cpumask_t *dstp, int nbits)
+{
+	bitmap_fill(dstp->bits, nbits);
+}
+
+#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
+static inline void __cpus_clear(cpumask_t *dstp, int nbits)
+{
+	bitmap_zero(dstp->bits, nbits);
+}
+
+/* No static inline type checking - see Subtlety (1) above. */
+#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
+
+#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
+static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
+{
+	return test_and_set_bit(cpu, addr->bits);
+}
+
+#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
+static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_andnot(dst, src1, src2) \
+				__cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
+static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_equal(const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	return bitmap_equal(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_intersects(const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	return bitmap_intersects(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_subset(const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	return bitmap_subset(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
+static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
+{
+	return bitmap_empty(srcp->bits, nbits);
+}
+
+#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
+static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
+{
+	return bitmap_weight(srcp->bits, nbits);
+}
+
+#define cpus_shift_left(dst, src, n) \
+			__cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
+static inline void __cpus_shift_left(cpumask_t *dstp,
+					const cpumask_t *srcp, int n, int nbits)
+{
+	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
+}
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+
 #endif /* __LINUX_CPUMASK_H */
-- 
cgit v1.2.3


From 2bcd57ab61e7cabed626226a3771617981c11ce1 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 24 Sep 2009 04:22:25 +0400
Subject: headers: utsname.h redux

* remove asm/atomic.h inclusion from linux/utsname.h --
   not needed after kref conversion
 * remove linux/utsname.h inclusion from files which do not need it

NOTE: it looks like fs/binfmt_elf.c do not need utsname.h, however
due to some personality stuff it _is_ needed -- cowardly leave ELF-related
headers and files alone.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/process.c                     | 1 -
 arch/arm/kernel/sys_arm.c                       | 1 -
 arch/frv/kernel/sys_frv.c                       | 1 -
 arch/h8300/kernel/sys_h8300.c                   | 1 -
 arch/m68k/kernel/sys_m68k.c                     | 1 -
 arch/m68knommu/kernel/sys_m68k.c                | 1 -
 arch/microblaze/kernel/sys_microblaze.c         | 1 -
 arch/mn10300/kernel/sys_mn10300.c               | 1 -
 arch/parisc/kernel/sys_parisc32.c               | 1 -
 arch/powerpc/kernel/setup-common.c              | 1 -
 arch/powerpc/kernel/sys_ppc32.c                 | 1 -
 arch/s390/kernel/compat_linux.c                 | 1 -
 arch/s390/kernel/process.c                      | 1 -
 arch/sh/kernel/sys_sh32.c                       | 1 -
 arch/sh/kernel/sys_sh64.c                       | 1 -
 arch/sparc/kernel/sys_sparc32.c                 | 1 -
 arch/sparc/kernel/systbls.h                     | 3 ++-
 arch/x86/kernel/dumpstack_32.c                  | 1 -
 arch/x86/kernel/dumpstack_64.c                  | 1 -
 arch/x86/kernel/traps.c                         | 1 -
 drivers/media/video/usbvision/usbvision-core.c  | 1 -
 drivers/media/video/usbvision/usbvision-i2c.c   | 1 -
 drivers/media/video/usbvision/usbvision-video.c | 1 -
 drivers/s390/char/zcore.c                       | 1 -
 drivers/usb/gadget/f_loopback.c                 | 1 -
 drivers/usb/gadget/f_obex.c                     | 1 -
 drivers/usb/gadget/f_sourcesink.c               | 1 -
 drivers/usb/gadget/u_audio.c                    | 1 -
 drivers/usb/gadget/u_ether.c                    | 1 -
 fs/gfs2/ops_inode.c                             | 1 -
 fs/lockd/xdr.c                                  | 1 -
 fs/lockd/xdr4.c                                 | 1 -
 fs/nfs/nfs2xdr.c                                | 1 -
 fs/nfs/nfs3proc.c                               | 1 -
 fs/nfs/nfs3xdr.c                                | 1 -
 fs/nfs/nfs4proc.c                               | 1 -
 fs/nfs/nfs4xdr.c                                | 1 -
 fs/nfs/proc.c                                   | 1 -
 fs/nfsd/nfs4idmap.c                             | 1 -
 fs/ocfs2/dlm/dlmast.c                           | 1 -
 fs/ocfs2/dlm/dlmconvert.c                       | 1 -
 fs/ocfs2/dlm/dlmdebug.c                         | 1 -
 fs/ocfs2/dlm/dlmdomain.c                        | 1 -
 fs/ocfs2/dlm/dlmlock.c                          | 1 -
 fs/ocfs2/dlm/dlmmaster.c                        | 1 -
 fs/ocfs2/dlm/dlmrecovery.c                      | 1 -
 fs/ocfs2/dlm/dlmthread.c                        | 1 -
 fs/ocfs2/dlm/dlmunlock.c                        | 1 -
 fs/ocfs2/super.c                                | 1 -
 fs/ocfs2/symlink.c                              | 1 -
 include/linux/utsname.h                         | 1 -
 init/main.c                                     | 1 -
 kernel/power/swap.c                             | 1 -
 kernel/sysctl.c                                 | 1 -
 kernel/uid16.c                                  | 1 -
 net/sunrpc/auth_null.c                          | 1 -
 56 files changed, 2 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 3a2fb7a02db4..289039bb6bb2 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -19,7 +19,6 @@
 #include <linux/ptrace.h>
 #include <linux/slab.h>
 #include <linux/user.h>
-#include <linux/utsname.h>
 #include <linux/time.h>
 #include <linux/major.h>
 #include <linux/stat.h>
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index b3ec641b5cf8..78ecaac65206 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -25,7 +25,6 @@
 #include <linux/mman.h>
 #include <linux/fs.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/ipc.h>
 #include <linux/uaccess.h>
 
diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c
index baadc97f8627..2b6b5289cdcc 100644
--- a/arch/frv/kernel/sys_frv.c
+++ b/arch/frv/kernel/sys_frv.c
@@ -21,7 +21,6 @@
 #include <linux/stat.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/syscalls.h>
 #include <linux/ipc.h>
 
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index 2745656dcc52..8cb5d73a0e35 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -17,7 +17,6 @@
 #include <linux/syscalls.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/fs.h>
 #include <linux/ipc.h>
 
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 7f54efaf60bb..7deb402bfc75 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -20,7 +20,6 @@
 #include <linux/syscalls.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/ipc.h>
 
 #include <asm/setup.h>
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index 700281638629..efdd090778a3 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -17,7 +17,6 @@
 #include <linux/syscalls.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/ipc.h>
 #include <linux/fs.h>
 
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c
index b96f1682bb24..07cabed4b947 100644
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -23,7 +23,6 @@
 #include <linux/mman.h>
 #include <linux/sys.h>
 #include <linux/ipc.h>
-#include <linux/utsname.h>
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/err.h>
diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c
index 3e52a1054327..8ca5af00334c 100644
--- a/arch/mn10300/kernel/sys_mn10300.c
+++ b/arch/mn10300/kernel/sys_mn10300.c
@@ -19,7 +19,6 @@
 #include <linux/stat.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/tty.h>
 
 #include <asm/uaccess.h>
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 92a0acaa0d12..561388b17c91 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -18,7 +18,6 @@
 #include <linux/signal.h>
 #include <linux/resource.h>
 #include <linux/times.h>
-#include <linux/utsname.h>
 #include <linux/time.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 1d5570a1e456..74cd1a7d0d4b 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -24,7 +24,6 @@
 #include <linux/seq_file.h>
 #include <linux/ioport.h>
 #include <linux/console.h>
-#include <linux/utsname.h>
 #include <linux/screen_info.h>
 #include <linux/root_dev.h>
 #include <linux/notifier.h>
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 1cc5e9e5da96..b97c2d67f4ac 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -22,7 +22,6 @@
 #include <linux/signal.h>
 #include <linux/resource.h>
 #include <linux/times.h>
-#include <linux/utsname.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/sem.h>
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 5519cb745106..0debcec23a39 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -24,7 +24,6 @@
 #include <linux/signal.h>
 #include <linux/resource.h>
 #include <linux/times.h>
-#include <linux/utsname.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/sem.h>
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 59fe6ecc6ed3..5417eb57271a 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -27,7 +27,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
-#include <linux/utsname.h>
 #include <linux/tick.h>
 #include <linux/elfcore.h>
 #include <linux/kernel_stat.h>
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index 63ba12836eae..eb68bfdd86e6 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -9,7 +9,6 @@
 #include <linux/syscalls.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/ipc.h>
diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c
index 91fb8445a5a0..287235768bc5 100644
--- a/arch/sh/kernel/sys_sh64.c
+++ b/arch/sh/kernel/sys_sh64.c
@@ -23,7 +23,6 @@
 #include <linux/stat.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/syscalls.h>
 #include <linux/ipc.h>
 #include <asm/uaccess.h>
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index f5000a460c05..04e28b2671c8 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -16,7 +16,6 @@
 #include <linux/signal.h>
 #include <linux/resource.h>
 #include <linux/times.h>
-#include <linux/utsname.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/sem.h>
diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h
index 15c2d752b2bc..a63c5d2d9849 100644
--- a/arch/sparc/kernel/systbls.h
+++ b/arch/sparc/kernel/systbls.h
@@ -3,10 +3,11 @@
 
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/utsname.h>
 #include <asm/utrap.h>
 #include <asm/signal.h>
 
+struct new_utsname;
+
 extern asmlinkage unsigned long sys_getpagesize(void);
 extern asmlinkage unsigned long sparc_brk(unsigned long brk);
 extern asmlinkage long sparc_pipe(struct pt_regs *regs);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index bca5fba91c9e..f7dd2a7c3bf4 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -5,7 +5,6 @@
 #include <linux/kallsyms.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
-#include <linux/utsname.h>
 #include <linux/hardirq.h>
 #include <linux/kdebug.h>
 #include <linux/module.h>
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 54b0a3276766..a071e6be177e 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -5,7 +5,6 @@
 #include <linux/kallsyms.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
-#include <linux/utsname.h>
 #include <linux/hardirq.h>
 #include <linux/kdebug.h>
 #include <linux/module.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9346e102338d..a665c71352b8 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -14,7 +14,6 @@
 #include <linux/spinlock.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
-#include <linux/utsname.h>
 #include <linux/kdebug.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/drivers/media/video/usbvision/usbvision-core.c b/drivers/media/video/usbvision/usbvision-core.c
index 6ba16abeebdd..e0f91e4ab653 100644
--- a/drivers/media/video/usbvision/usbvision-core.c
+++ b/drivers/media/video/usbvision/usbvision-core.c
@@ -28,7 +28,6 @@
 #include <linux/timer.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/highmem.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
diff --git a/drivers/media/video/usbvision/usbvision-i2c.c b/drivers/media/video/usbvision/usbvision-i2c.c
index f97fd06d5948..c19f51dba2ee 100644
--- a/drivers/media/video/usbvision/usbvision-i2c.c
+++ b/drivers/media/video/usbvision/usbvision-i2c.c
@@ -28,7 +28,6 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <asm/uaccess.h>
 #include <linux/ioport.h>
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index 90d9b5c0e9a7..a2a50d608a3f 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -52,7 +52,6 @@
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/highmem.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index c431198bdbc4..82daa3c1dc9c 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -14,7 +14,6 @@
 
 #include <linux/init.h>
 #include <linux/miscdevice.h>
-#include <linux/utsname.h>
 #include <linux/debugfs.h>
 #include <asm/ipl.h>
 #include <asm/sclp.h>
diff --git a/drivers/usb/gadget/f_loopback.c b/drivers/usb/gadget/f_loopback.c
index eb6ddfc20857..6cb29d3df575 100644
--- a/drivers/usb/gadget/f_loopback.c
+++ b/drivers/usb/gadget/f_loopback.c
@@ -22,7 +22,6 @@
 /* #define VERBOSE_DEBUG */
 
 #include <linux/kernel.h>
-#include <linux/utsname.h>
 #include <linux/device.h>
 
 #include "g_zero.h"
diff --git a/drivers/usb/gadget/f_obex.c b/drivers/usb/gadget/f_obex.c
index 46d6266f30ec..b4a3ba654ea5 100644
--- a/drivers/usb/gadget/f_obex.c
+++ b/drivers/usb/gadget/f_obex.c
@@ -24,7 +24,6 @@
 /* #define VERBOSE_DEBUG */
 
 #include <linux/kernel.h>
-#include <linux/utsname.h>
 #include <linux/device.h>
 
 #include "u_serial.h"
diff --git a/drivers/usb/gadget/f_sourcesink.c b/drivers/usb/gadget/f_sourcesink.c
index bffe91d525f9..09cba273d2db 100644
--- a/drivers/usb/gadget/f_sourcesink.c
+++ b/drivers/usb/gadget/f_sourcesink.c
@@ -22,7 +22,6 @@
 /* #define VERBOSE_DEBUG */
 
 #include <linux/kernel.h>
-#include <linux/utsname.h>
 #include <linux/device.h>
 
 #include "g_zero.h"
diff --git a/drivers/usb/gadget/u_audio.c b/drivers/usb/gadget/u_audio.c
index b5200d551458..8252595d619d 100644
--- a/drivers/usb/gadget/u_audio.c
+++ b/drivers/usb/gadget/u_audio.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/utsname.h>
 #include <linux/device.h>
 #include <linux/delay.h>
 #include <linux/ctype.h>
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index f8751ff863cd..2fc02bd95848 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -23,7 +23,6 @@
 /* #define VERBOSE_DEBUG */
 
 #include <linux/kernel.h>
-#include <linux/utsname.h>
 #include <linux/device.h>
 #include <linux/ctype.h>
 #include <linux/etherdevice.h>
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index c3ac18054057..247436c10deb 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -12,7 +12,6 @@
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
 #include <linux/namei.h>
-#include <linux/utsname.h>
 #include <linux/mm.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 0336f2beacde..b583ab0a4cbb 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -8,7 +8,6 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
-#include <linux/utsname.h>
 #include <linux/nfs.h>
 
 #include <linux/sunrpc/xdr.h>
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index e1d528653192..ad9dbbc9145d 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -9,7 +9,6 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
-#include <linux/utsname.h>
 #include <linux/nfs.h>
 
 #include <linux/sunrpc/xdr.h>
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index c862c9340f9a..5e078b222b4e 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -13,7 +13,6 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/in.h>
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ee6a13f05443..3f8881d1a050 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -7,7 +7,6 @@
  */
 
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/sunrpc/clnt.h>
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 35869a4921f1..5fe5492fbd29 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -10,7 +10,6 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/in.h>
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index be6544aef41f..ed7c269e2514 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -36,7 +36,6 @@
  */
 
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index cfc30d362f94..83ad47cbdd8a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -39,7 +39,6 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/in.h>
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 7be72d90d49d..ef583854d8d0 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -32,7 +32,6 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/in.h>
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index cdfa86fa1471..ba2c199592fd 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -38,7 +38,6 @@
 #include <linux/init.h>
 
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/sunrpc/clnt.h>
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 81eff8e58322..01cf8cc3d286 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 75997b4deaf3..ca96bce50e18 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index c5c88124096d..ca46002ec10e 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -27,7 +27,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/sysctl.h>
 #include <linux/spinlock.h>
 #include <linux/debugfs.h>
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 4d9e6b288dd8..0334000676d3 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -28,7 +28,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/delay.h>
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 83a9f2972ac8..437698e9465f 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index f8b653fcd4dd..83bcaf266b35 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 43e6e3280569..d9fa3d22e17c 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 98569e86c613..52ec020ea78b 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 756f5b0998e0..00f53b2aea76 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 24feb449a1dc..4cc3c890a2cd 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -28,7 +28,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/random.h>
 #include <linux/statfs.h>
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 579dd1b1110f..e3421030a69f 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -38,7 +38,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
-#include <linux/utsname.h>
 #include <linux/namei.h>
 
 #define MLOG_MASK_PREFIX ML_NAMEI
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 3656b300de3a..69f39974c041 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -36,7 +36,6 @@ struct new_utsname {
 #include <linux/kref.h>
 #include <linux/nsproxy.h>
 #include <linux/err.h>
-#include <asm/atomic.h>
 
 struct uts_namespace {
 	struct kref kref;
diff --git a/init/main.c b/init/main.c
index 6107223124e4..76961db298f0 100644
--- a/init/main.c
+++ b/init/main.c
@@ -18,7 +18,6 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
-#include <linux/utsname.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/smp_lock.h>
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 8ba052c86d48..b101cdc4df3f 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -13,7 +13,6 @@
 
 #include <linux/module.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/delay.h>
 #include <linux/bitops.h>
 #include <linux/genhd.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0dfaa47d7cb6..7f4f57bea4ce 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -26,7 +26,6 @@
 #include <linux/proc_fs.h>
 #include <linux/security.h>
 #include <linux/ctype.h>
-#include <linux/utsname.h>
 #include <linux/kmemcheck.h>
 #include <linux/smp_lock.h>
 #include <linux/fs.h>
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 0314501688b9..419209893d87 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -4,7 +4,6 @@
  */
 
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/mman.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index c70dd7f5258e..1db618f56ecb 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -8,7 +8,6 @@
 
 #include <linux/types.h>
 #include <linux/module.h>
-#include <linux/utsname.h>
 #include <linux/sunrpc/clnt.h>
 
 #ifdef RPC_DEBUG
-- 
cgit v1.2.3


From 22fe404218156328a27e66349b1175cd0baa4990 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 18 Sep 2009 13:05:44 -0700
Subject: vfs: split generic_forget_inode() so that hugetlbfs does not have to
 copy it

Hugetlbfs needs to do special things instead of truncate_inode_pages().
 Currently, it copied generic_forget_inode() except for
truncate_inode_pages() call which is asking for trouble (the code there
isn't trivial).  So create a separate function generic_detach_inode()
which does all the list magic done in generic_forget_inode() and call
it from hugetlbfs_forget_inode().

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hugetlbfs/inode.c | 33 ++++-----------------------------
 fs/inode.c           | 21 +++++++++++++++++++--
 include/linux/fs.h   |  1 +
 3 files changed, 24 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index eba6d552d9c9..478a169a262d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -380,36 +380,11 @@ static void hugetlbfs_delete_inode(struct inode *inode)
 
 static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
 {
-	struct super_block *sb = inode->i_sb;
-
-	if (!hlist_unhashed(&inode->i_hash)) {
-		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-			list_move(&inode->i_list, &inode_unused);
-		inodes_stat.nr_unused++;
-		if (!sb || (sb->s_flags & MS_ACTIVE)) {
-			spin_unlock(&inode_lock);
-			return;
-		}
-		inode->i_state |= I_WILL_FREE;
-		spin_unlock(&inode_lock);
-		/*
-		 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
-		 * in our backing_dev_info.
-		 */
-		write_inode_now(inode, 1);
-		spin_lock(&inode_lock);
-		inode->i_state &= ~I_WILL_FREE;
-		inodes_stat.nr_unused--;
-		hlist_del_init(&inode->i_hash);
+	if (generic_detach_inode(inode)) {
+		truncate_hugepages(inode, 0);
+		clear_inode(inode);
+		destroy_inode(inode);
 	}
-	list_del_init(&inode->i_list);
-	list_del_init(&inode->i_sb_list);
-	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
-	spin_unlock(&inode_lock);
-	truncate_hugepages(inode, 0);
-	clear_inode(inode);
-	destroy_inode(inode);
 }
 
 static void hugetlbfs_drop_inode(struct inode *inode)
diff --git a/fs/inode.c b/fs/inode.c
index 07d775ea6161..fa506d539653 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1241,7 +1241,16 @@ void generic_delete_inode(struct inode *inode)
 }
 EXPORT_SYMBOL(generic_delete_inode);
 
-static void generic_forget_inode(struct inode *inode)
+/**
+ *	generic_detach_inode - remove inode from inode lists
+ *	@inode: inode to remove
+ *
+ *	Remove inode from inode lists, write it if it's dirty. This is just an
+ *	internal VFS helper exported for hugetlbfs. Do not use!
+ *
+ *	Returns 1 if inode should be completely destroyed.
+ */
+int generic_detach_inode(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
 
@@ -1251,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode)
 		inodes_stat.nr_unused++;
 		if (sb->s_flags & MS_ACTIVE) {
 			spin_unlock(&inode_lock);
-			return;
+			return 0;
 		}
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_WILL_FREE;
@@ -1269,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode)
 	inode->i_state |= I_FREEING;
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(generic_detach_inode);
+
+static void generic_forget_inode(struct inode *inode)
+{
+	if (!generic_detach_inode(inode))
+		return;
 	if (inode->i_data.nrpages)
 		truncate_inode_pages(&inode->i_data, 0);
 	clear_inode(inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51803528b095..955e34615cb7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2156,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t);
 extern int inode_needs_sync(struct inode *inode);
 extern void generic_delete_inode(struct inode *inode);
 extern void generic_drop_inode(struct inode *inode);
+extern int generic_detach_inode(struct inode *inode);
 
 extern struct inode *ilookup5_nowait(struct super_block *sb,
 		unsigned long hashval, int (*test)(struct inode *, void *),
-- 
cgit v1.2.3


From 42cb56ae2ab67390da34906b27bedc3f2ff1393b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 18 Sep 2009 13:05:53 -0700
Subject: vfs: change sb->s_maxbytes to a loff_t

sb->s_maxbytes is supposed to indicate the maximum size of a file that can
exist on the filesystem.  It's declared as an unsigned long long.

Even if a filesystem has no inherent limit that prevents it from using
every bit in that unsigned long long, it's still problematic to set it to
anything larger than MAX_LFS_FILESIZE.  There are places in the kernel
that cast s_maxbytes to a signed value.  If it's set too large then this
cast makes it a negative number and generally breaks the comparison.

Change s_maxbytes to be loff_t instead.  That should help eliminate the
temptation to set it too large by making it a signed value.

Also, add a warning for couple of releases to help catch filesystems that
set s_maxbytes too large.  Eventually we can either convert this to a
BUG() or just remove it and in the hope that no one will get it wrong now
that it's a signed value.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Robert Love <rlove@google.com>
Cc: Mandeep Singh Baines <msb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c         | 10 ++++++++++
 include/linux/fs.h |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index 0e7207b9815c..4906e2d8f400 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -892,6 +892,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
  	if (error)
  		goto out_sb;
 
+	/*
+	 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
+	 * but s_maxbytes was an unsigned long long for many releases. Throw
+	 * this warning for a little while to try and catch filesystems that
+	 * violate this rule. This warning should be either removed or
+	 * converted to a BUG() in 2.6.34.
+	 */
+	WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
+		"negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
+
 	mnt->mnt_mountpoint = mnt->mnt_root;
 	mnt->mnt_parent = mnt;
 	up_write(&mnt->mnt_sb->s_umount);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 955e34615cb7..cbb7724c11d3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1315,7 +1315,7 @@ struct super_block {
 	unsigned long		s_blocksize;
 	unsigned char		s_blocksize_bits;
 	unsigned char		s_dirt;
-	unsigned long long	s_maxbytes;	/* Max file size */
+	loff_t			s_maxbytes;	/* Max file size */
 	struct file_system_type	*s_type;
 	const struct super_operations	*s_op;
 	const struct dquot_operations	*dq_op;
-- 
cgit v1.2.3


From f84398068d9c2babe41500504ef247ae07081857 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 21 Sep 2009 14:48:36 +0200
Subject: vfs: seq_file: add helpers for data filling

Add two helpers that allow access to the seq_file's own buffer, but
hide the internal details of seq_files.

This allows easier implementation of special purpose filling
functions.  It also cleans up some existing functions which duplicated
the seq_file logic.

Make these inline functions in seq_file.h, as suggested by Al.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/seq_file.c            | 75 +++++++++++++++++++++++++-----------------------
 include/linux/seq_file.h | 38 ++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 66efd0aa8fb3..eae7d9dbf3ff 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path);
  */
 int seq_path(struct seq_file *m, struct path *path, char *esc)
 {
-	if (m->count < m->size) {
-		char *s = m->buf + m->count;
-		char *p = d_path(path, s, m->size - m->count);
+	char *buf;
+	size_t size = seq_get_buf(m, &buf);
+	int res = -1;
+
+	if (size) {
+		char *p = d_path(path, buf, size);
 		if (!IS_ERR(p)) {
-			s = mangle_path(s, p, esc);
-			if (s) {
-				p = m->buf + m->count;
-				m->count = s - m->buf;
-				return s - p;
-			}
+			char *end = mangle_path(buf, p, esc);
+			if (end)
+				res = end - buf;
 		}
 	}
-	m->count = m->size;
-	return -1;
+	seq_commit(m, res);
+
+	return res;
 }
 EXPORT_SYMBOL(seq_path);
 
@@ -454,27 +455,28 @@ EXPORT_SYMBOL(seq_path);
 int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 		  char *esc)
 {
-	int err = -ENAMETOOLONG;
-	if (m->count < m->size) {
-		char *s = m->buf + m->count;
+	char *buf;
+	size_t size = seq_get_buf(m, &buf);
+	int res = -ENAMETOOLONG;
+
+	if (size) {
 		char *p;
 
 		spin_lock(&dcache_lock);
-		p = __d_path(path, root, s, m->size - m->count);
+		p = __d_path(path, root, buf, size);
 		spin_unlock(&dcache_lock);
-		err = PTR_ERR(p);
+		res = PTR_ERR(p);
 		if (!IS_ERR(p)) {
-			s = mangle_path(s, p, esc);
-			if (s) {
-				p = m->buf + m->count;
-				m->count = s - m->buf;
-				return 0;
-			}
-			err = -ENAMETOOLONG;
+			char *end = mangle_path(buf, p, esc);
+			if (end)
+				res = end - buf;
+			else
+				res = -ENAMETOOLONG;
 		}
 	}
-	m->count = m->size;
-	return err;
+	seq_commit(m, res);
+
+	return res < 0 ? res : 0;
 }
 
 /*
@@ -482,20 +484,21 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
  */
 int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
 {
-	if (m->count < m->size) {
-		char *s = m->buf + m->count;
-		char *p = dentry_path(dentry, s, m->size - m->count);
+	char *buf;
+	size_t size = seq_get_buf(m, &buf);
+	int res = -1;
+
+	if (size) {
+		char *p = dentry_path(dentry, buf, size);
 		if (!IS_ERR(p)) {
-			s = mangle_path(s, p, esc);
-			if (s) {
-				p = m->buf + m->count;
-				m->count = s - m->buf;
-				return s - p;
-			}
+			char *end = mangle_path(buf, p, esc);
+			if (end)
+				res = end - buf;
 		}
 	}
-	m->count = m->size;
-	return -1;
+	seq_commit(m, res);
+
+	return res;
 }
 
 int seq_bitmap(struct seq_file *m, const unsigned long *bits,
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 0c6a86b79596..8366d8f12e53 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -35,6 +35,44 @@ struct seq_operations {
 
 #define SEQ_SKIP 1
 
+/**
+ * seq_get_buf - get buffer to write arbitrary data to
+ * @m: the seq_file handle
+ * @bufp: the beginning of the buffer is stored here
+ *
+ * Return the number of bytes available in the buffer, or zero if
+ * there's no space.
+ */
+static inline size_t seq_get_buf(struct seq_file *m, char **bufp)
+{
+	BUG_ON(m->count > m->size);
+	if (m->count < m->size)
+		*bufp = m->buf + m->count;
+	else
+		*bufp = NULL;
+
+	return m->size - m->count;
+}
+
+/**
+ * seq_commit - commit data to the buffer
+ * @m: the seq_file handle
+ * @num: the number of bytes to commit
+ *
+ * Commit @num bytes of data written to a buffer previously acquired
+ * by seq_buf_get.  To signal an error condition, or that the data
+ * didn't fit in the available space, pass a negative @num value.
+ */
+static inline void seq_commit(struct seq_file *m, int num)
+{
+	if (num < 0) {
+		m->count = m->size;
+	} else {
+		BUG_ON(m->count + num > m->size);
+		m->count += num;
+	}
+}
+
 char *mangle_path(char *s, char *p, char *esc);
 int seq_open(struct file *, const struct seq_operations *);
 ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
-- 
cgit v1.2.3


From 4fadd7bb20a1e7c774ed88dc703d8fbcd00ff917 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 3 Aug 2009 23:28:06 +0200
Subject: freeze_bdev: kill bd_mount_sem

Now that we have the freeze count there is not much reason for bd_mount_sem
anymore.  The actual freeze/thaw operations are serialized using the
bd_fsfreeze_mutex, and the only other place we take bd_mount_sem is
get_sb_bdev which tries to prevent mounting a filesystem while the block
device is frozen.  Instead of add a check for bd_fsfreeze_count and
return -EBUSY if a filesystem is frozen.  While that is a change in user
visible behaviour a failing mount is much better for this case rather
than having the mount process stuck uninterruptible for a long time.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c     | 8 +-------
 fs/super.c         | 9 +++++++--
 include/linux/fs.h | 1 -
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5d1ed50bd46c..22506eb4a58e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -216,8 +216,6 @@ EXPORT_SYMBOL(fsync_bdev);
  * freeze_bdev  --  lock a filesystem and force it into a consistent state
  * @bdev:	blockdevice to lock
  *
- * This takes the block device bd_mount_sem to make sure no new mounts
- * happen on bdev until thaw_bdev() is called.
  * If a superblock is found on this device, we take the s_umount semaphore
  * on it to make sure nobody unmounts until the snapshot creation is done.
  * The reference counter (bd_fsfreeze_count) guarantees that only the last
@@ -240,7 +238,6 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 	}
 	bdev->bd_fsfreeze_count++;
 
-	down(&bdev->bd_mount_sem);
 	sb = get_super(bdev);
 	if (sb && !(sb->s_flags & MS_RDONLY)) {
 		sb->s_frozen = SB_FREEZE_WRITE;
@@ -260,7 +257,6 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 					"VFS:Filesystem freeze failed\n");
 				sb->s_frozen = SB_UNFROZEN;
 				drop_super(sb);
-				up(&bdev->bd_mount_sem);
 				bdev->bd_fsfreeze_count--;
 				mutex_unlock(&bdev->bd_fsfreeze_mutex);
 				return ERR_PTR(error);
@@ -271,7 +267,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 	sync_blockdev(bdev);
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 
-	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
+	return sb;	/* thaw_bdev releases s->s_umount */
 }
 EXPORT_SYMBOL(freeze_bdev);
 
@@ -321,7 +317,6 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 		drop_super(sb);
 	}
 
-	up(&bdev->bd_mount_sem);
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	return 0;
 }
@@ -430,7 +425,6 @@ static void init_once(void *foo)
 
 	memset(bdev, 0, sizeof(*bdev));
 	mutex_init(&bdev->bd_mutex);
-	sema_init(&bdev->bd_mount_sem, 1);
 	INIT_LIST_HEAD(&bdev->bd_inodes);
 	INIT_LIST_HEAD(&bdev->bd_list);
 #ifdef CONFIG_SYSFS
diff --git a/fs/super.c b/fs/super.c
index 4906e2d8f400..1cb26a3e3df0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -743,9 +743,14 @@ int get_sb_bdev(struct file_system_type *fs_type,
 	 * will protect the lockfs code from trying to start a snapshot
 	 * while we are mounting
 	 */
-	down(&bdev->bd_mount_sem);
+	mutex_lock(&bdev->bd_fsfreeze_mutex);
+	if (bdev->bd_fsfreeze_count > 0) {
+		mutex_unlock(&bdev->bd_fsfreeze_mutex);
+		error = -EBUSY;
+		goto error_bdev;
+	}
 	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
-	up(&bdev->bd_mount_sem);
+	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	if (IS_ERR(s))
 		goto error_s;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cbb7724c11d3..72dfbd423974 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -640,7 +640,6 @@ struct block_device {
 	struct super_block *	bd_super;
 	int			bd_openers;
 	struct mutex		bd_mutex;	/* open/close mutex */
-	struct semaphore	bd_mount_sem;
 	struct list_head	bd_inodes;
 	void *			bd_holder;
 	int			bd_holders;
-- 
cgit v1.2.3


From 4504230a71566785a05d3e6b53fa1ee071b864eb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 3 Aug 2009 23:28:35 +0200
Subject: freeze_bdev: grab active reference to frozen superblocks

Currently we held s_umount while a filesystem is frozen, despite that we
might return to userspace and unlock it from a different process.  Instead
grab an active reference to keep the file system busy and add an explicit
check for frozen filesystems in remount and reject the remount instead
of blocking on s_umount.

Add a new get_active_super helper to super.c for use by freeze_bdev that
grabs an active reference to a superblock from a given block device.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c     | 132 +++++++++++++++++++++++++++++------------------------
 fs/super.c         |  48 ++++++++++++++++++-
 include/linux/fs.h |   1 +
 3 files changed, 120 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 22506eb4a58e..9cf4b926f8e4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -230,43 +230,54 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 	int error = 0;
 
 	mutex_lock(&bdev->bd_fsfreeze_mutex);
-	if (bdev->bd_fsfreeze_count > 0) {
-		bdev->bd_fsfreeze_count++;
+	if (++bdev->bd_fsfreeze_count > 1) {
+		/*
+		 * We don't even need to grab a reference - the first call
+		 * to freeze_bdev grab an active reference and only the last
+		 * thaw_bdev drops it.
+		 */
 		sb = get_super(bdev);
+		drop_super(sb);
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
 		return sb;
 	}
-	bdev->bd_fsfreeze_count++;
-
-	sb = get_super(bdev);
-	if (sb && !(sb->s_flags & MS_RDONLY)) {
-		sb->s_frozen = SB_FREEZE_WRITE;
-		smp_wmb();
-
-		sync_filesystem(sb);
-
-		sb->s_frozen = SB_FREEZE_TRANS;
-		smp_wmb();
-
-		sync_blockdev(sb->s_bdev);
-
-		if (sb->s_op->freeze_fs) {
-			error = sb->s_op->freeze_fs(sb);
-			if (error) {
-				printk(KERN_ERR
-					"VFS:Filesystem freeze failed\n");
-				sb->s_frozen = SB_UNFROZEN;
-				drop_super(sb);
-				bdev->bd_fsfreeze_count--;
-				mutex_unlock(&bdev->bd_fsfreeze_mutex);
-				return ERR_PTR(error);
-			}
+
+	sb = get_active_super(bdev);
+	if (!sb)
+		goto out;
+	if (sb->s_flags & MS_RDONLY) {
+		deactivate_locked_super(sb);
+		mutex_unlock(&bdev->bd_fsfreeze_mutex);
+		return sb;
+	}
+
+	sb->s_frozen = SB_FREEZE_WRITE;
+	smp_wmb();
+
+	sync_filesystem(sb);
+
+	sb->s_frozen = SB_FREEZE_TRANS;
+	smp_wmb();
+
+	sync_blockdev(sb->s_bdev);
+
+	if (sb->s_op->freeze_fs) {
+		error = sb->s_op->freeze_fs(sb);
+		if (error) {
+			printk(KERN_ERR
+				"VFS:Filesystem freeze failed\n");
+			sb->s_frozen = SB_UNFROZEN;
+			deactivate_locked_super(sb);
+			bdev->bd_fsfreeze_count--;
+			mutex_unlock(&bdev->bd_fsfreeze_mutex);
+			return ERR_PTR(error);
 		}
 	}
+	up_write(&sb->s_umount);
 
+ out:
 	sync_blockdev(bdev);
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
-
 	return sb;	/* thaw_bdev releases s->s_umount */
 }
 EXPORT_SYMBOL(freeze_bdev);
@@ -280,43 +291,44 @@ EXPORT_SYMBOL(freeze_bdev);
  */
 int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 {
-	int error = 0;
+	int error = -EINVAL;
 
 	mutex_lock(&bdev->bd_fsfreeze_mutex);
-	if (!bdev->bd_fsfreeze_count) {
-		mutex_unlock(&bdev->bd_fsfreeze_mutex);
-		return -EINVAL;
-	}
-
-	bdev->bd_fsfreeze_count--;
-	if (bdev->bd_fsfreeze_count > 0) {
-		if (sb)
-			drop_super(sb);
-		mutex_unlock(&bdev->bd_fsfreeze_mutex);
-		return 0;
-	}
-
-	if (sb) {
-		BUG_ON(sb->s_bdev != bdev);
-		if (!(sb->s_flags & MS_RDONLY)) {
-			if (sb->s_op->unfreeze_fs) {
-				error = sb->s_op->unfreeze_fs(sb);
-				if (error) {
-					printk(KERN_ERR
-						"VFS:Filesystem thaw failed\n");
-					sb->s_frozen = SB_FREEZE_TRANS;
-					bdev->bd_fsfreeze_count++;
-					mutex_unlock(&bdev->bd_fsfreeze_mutex);
-					return error;
-				}
-			}
-			sb->s_frozen = SB_UNFROZEN;
-			smp_wmb();
-			wake_up(&sb->s_wait_unfrozen);
+	if (!bdev->bd_fsfreeze_count)
+		goto out_unlock;
+
+	error = 0;
+	if (--bdev->bd_fsfreeze_count > 0)
+		goto out_unlock;
+
+	if (!sb)
+		goto out_unlock;
+
+	BUG_ON(sb->s_bdev != bdev);
+	down_write(&sb->s_umount);
+	if (sb->s_flags & MS_RDONLY)
+		goto out_deactivate;
+
+	if (sb->s_op->unfreeze_fs) {
+		error = sb->s_op->unfreeze_fs(sb);
+		if (error) {
+			printk(KERN_ERR
+				"VFS:Filesystem thaw failed\n");
+			sb->s_frozen = SB_FREEZE_TRANS;
+			bdev->bd_fsfreeze_count++;
+			mutex_unlock(&bdev->bd_fsfreeze_mutex);
+			return error;
 		}
-		drop_super(sb);
 	}
 
+	sb->s_frozen = SB_UNFROZEN;
+	smp_wmb();
+	wake_up(&sb->s_wait_unfrozen);
+
+out_deactivate:
+	if (sb)
+		deactivate_locked_super(sb);
+out_unlock:
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	return 0;
 }
diff --git a/fs/super.c b/fs/super.c
index 1cb26a3e3df0..19eb70b374bc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -465,6 +465,48 @@ rescan:
 }
 
 EXPORT_SYMBOL(get_super);
+
+/**
+ * get_active_super - get an active reference to the superblock of a device
+ * @bdev: device to get the superblock for
+ *
+ * Scans the superblock list and finds the superblock of the file system
+ * mounted on the device given.  Returns the superblock with an active
+ * reference and s_umount held exclusively or %NULL if none was found.
+ */
+struct super_block *get_active_super(struct block_device *bdev)
+{
+	struct super_block *sb;
+
+	if (!bdev)
+		return NULL;
+
+	spin_lock(&sb_lock);
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_bdev != bdev)
+			continue;
+
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+		down_write(&sb->s_umount);
+		if (sb->s_root) {
+			spin_lock(&sb_lock);
+			if (sb->s_count > S_BIAS) {
+				atomic_inc(&sb->s_active);
+				sb->s_count--;
+				spin_unlock(&sb_lock);
+				return sb;
+			}
+			spin_unlock(&sb_lock);
+		}
+		up_write(&sb->s_umount);
+		put_super(sb);
+		yield();
+		spin_lock(&sb_lock);
+	}
+	spin_unlock(&sb_lock);
+	return NULL;
+}
  
 struct super_block * user_get_super(dev_t dev)
 {
@@ -527,11 +569,15 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 {
 	int retval;
 	int remount_rw;
-	
+
+	if (sb->s_frozen != SB_UNFROZEN)
+		return -EBUSY;
+
 #ifdef CONFIG_BLOCK
 	if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
 		return -EACCES;
 #endif
+
 	if (flags & MS_RDONLY)
 		acct_auto_close(sb);
 	shrink_dcache_sb(sb);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 72dfbd423974..502d96ef345d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2334,6 +2334,7 @@ extern void get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
+extern struct super_block *get_active_super(struct block_device *bdev);
 extern struct super_block *user_get_super(dev_t);
 extern void drop_super(struct super_block *sb);
 
-- 
cgit v1.2.3


From 25d9e2d15286281ec834b829a4aaf8969011f1cd Mon Sep 17 00:00:00 2001
From: "npiggin@suse.de" <npiggin@suse.de>
Date: Fri, 21 Aug 2009 02:35:05 +1000
Subject: truncate: new helpers

Introduce new truncate helpers truncate_pagecache and inode_newsize_ok.
vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and
into mm/truncate.c.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/vm/locking |  2 +-
 fs/attr.c                | 46 ++++++++++++++++++++++++++++++++--
 include/linux/fs.h       |  3 ++-
 include/linux/mm.h       |  5 ++--
 mm/filemap.c             |  2 +-
 mm/memory.c              | 62 +++-------------------------------------------
 mm/mremap.c              |  4 +--
 mm/nommu.c               | 40 ------------------------------
 mm/truncate.c            | 64 ++++++++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 120 insertions(+), 108 deletions(-)

(limited to 'include')

diff --git a/Documentation/vm/locking b/Documentation/vm/locking
index f366fa956179..25fadb448760 100644
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
 mm start up ... this is a loose form of stability on mm_users. For
 example, it is used in copy_mm to protect against a racing tlb_gather_mmu
 single address space optimization, so that the zap_page_range (from
-vmtruncate) does not lose sending ipi's to cloned threads that might 
+truncate) does not lose sending ipi's to cloned threads that might
 be spawned underneath it and go to user mode to drag in pte's into tlbs.
 
 swap_lock
diff --git a/fs/attr.c b/fs/attr.c
index 9fe1b1bd30a8..96d394bdaddf 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -18,7 +18,7 @@
 /* Taken over from the old code... */
 
 /* POSIX UID/GID verification for setting inode attributes. */
-int inode_change_ok(struct inode *inode, struct iattr *attr)
+int inode_change_ok(const struct inode *inode, struct iattr *attr)
 {
 	int retval = -EPERM;
 	unsigned int ia_valid = attr->ia_valid;
@@ -60,9 +60,51 @@ fine:
 error:
 	return retval;
 }
-
 EXPORT_SYMBOL(inode_change_ok);
 
+/**
+ * inode_newsize_ok - may this inode be truncated to a given size
+ * @inode:	the inode to be truncated
+ * @offset:	the new size to assign to the inode
+ * @Returns:	0 on success, -ve errno on failure
+ *
+ * inode_newsize_ok will check filesystem limits and ulimits to check that the
+ * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
+ * when necessary. Caller must not proceed with inode size change if failure is
+ * returned. @inode must be a file (not directory), with appropriate
+ * permissions to allow truncate (inode_newsize_ok does NOT check these
+ * conditions).
+ *
+ * inode_newsize_ok must be called with i_mutex held.
+ */
+int inode_newsize_ok(const struct inode *inode, loff_t offset)
+{
+	if (inode->i_size < offset) {
+		unsigned long limit;
+
+		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+		if (limit != RLIM_INFINITY && offset > limit)
+			goto out_sig;
+		if (offset > inode->i_sb->s_maxbytes)
+			goto out_big;
+	} else {
+		/*
+		 * truncation of in-use swapfiles is disallowed - it would
+		 * cause subsequent swapout to scribble on the now-freed
+		 * blocks.
+		 */
+		if (IS_SWAPFILE(inode))
+			return -ETXTBSY;
+	}
+
+	return 0;
+out_sig:
+	send_sig(SIGXFSZ, current, 0);
+out_big:
+	return -EFBIG;
+}
+EXPORT_SYMBOL(inode_newsize_ok);
+
 int inode_setattr(struct inode * inode, struct iattr * attr)
 {
 	unsigned int ia_valid = attr->ia_valid;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 502d96ef345d..2b08b5ce09b6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2382,7 +2382,8 @@ extern int buffer_migrate_page(struct address_space *,
 #define buffer_migrate_page NULL
 #endif
 
-extern int inode_change_ok(struct inode *, struct iattr *);
+extern int inode_change_ok(const struct inode *, struct iattr *);
+extern int inode_newsize_ok(const struct inode *, loff_t offset);
 extern int __must_check inode_setattr(struct inode *, struct iattr *);
 
 extern void file_update_time(struct file *file);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b6eae5e3144b..8347e938fb2f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -791,8 +791,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 	unmap_mapping_range(mapping, holebegin, holelen, 0);
 }
 
-extern int vmtruncate(struct inode * inode, loff_t offset);
-extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
+extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
+extern int vmtruncate(struct inode *inode, loff_t offset);
+extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
 
 #ifdef CONFIG_MMU
 extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
diff --git a/mm/filemap.c b/mm/filemap.c
index bcc7372aebbc..33349adb227a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -58,7 +58,7 @@
 /*
  * Lock ordering:
  *
- *  ->i_mmap_lock		(vmtruncate)
+ *  ->i_mmap_lock		(truncate_pagecache)
  *    ->private_lock		(__free_pte->__set_page_dirty_buffers)
  *      ->swap_lock		(exclusive_swap_page, others)
  *        ->mapping->tree_lock
diff --git a/mm/memory.c b/mm/memory.c
index b1443ac07c00..ebcd3decac89 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		unsigned long addr = vma->vm_start;
 
 		/*
-		 * Hide vma from rmap and vmtruncate before freeing pgtables
+		 * Hide vma from rmap and truncate_pagecache before freeing
+		 * pgtables
 		 */
 		anon_vma_unlink(vma);
 		unlink_file_vma(vma);
@@ -2407,7 +2408,7 @@ restart:
  * @mapping: the address space containing mmaps to be unmapped.
  * @holebegin: byte in first page to unmap, relative to the start of
  * the underlying file.  This will be rounded down to a PAGE_SIZE
- * boundary.  Note that this is different from vmtruncate(), which
+ * boundary.  Note that this is different from truncate_pagecache(), which
  * must keep the partial page.  In contrast, we must get rid of
  * partial pages.
  * @holelen: size of prospective hole in bytes.  This will be rounded
@@ -2458,63 +2459,6 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
-/**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @offset: file offset to start truncating
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode * inode, loff_t offset)
-{
-	if (inode->i_size < offset) {
-		unsigned long limit;
-
-		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-		if (limit != RLIM_INFINITY && offset > limit)
-			goto out_sig;
-		if (offset > inode->i_sb->s_maxbytes)
-			goto out_big;
-		i_size_write(inode, offset);
-	} else {
-		struct address_space *mapping = inode->i_mapping;
-
-		/*
-		 * truncation of in-use swapfiles is disallowed - it would
-		 * cause subsequent swapout to scribble on the now-freed
-		 * blocks.
-		 */
-		if (IS_SWAPFILE(inode))
-			return -ETXTBSY;
-		i_size_write(inode, offset);
-
-		/*
-		 * unmap_mapping_range is called twice, first simply for
-		 * efficiency so that truncate_inode_pages does fewer
-		 * single-page unmaps.  However after this first call, and
-		 * before truncate_inode_pages finishes, it is possible for
-		 * private pages to be COWed, which remain after
-		 * truncate_inode_pages finishes, hence the second
-		 * unmap_mapping_range call must be made for correctness.
-		 */
-		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-		truncate_inode_pages(mapping, offset);
-		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-	}
-
-	if (inode->i_op->truncate)
-		inode->i_op->truncate(inode);
-	return 0;
-
-out_sig:
-	send_sig(SIGXFSZ, current, 0);
-out_big:
-	return -EFBIG;
-}
-EXPORT_SYMBOL(vmtruncate);
-
 int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
 {
 	struct address_space *mapping = inode->i_mapping;
diff --git a/mm/mremap.c b/mm/mremap.c
index 20a07dba6be0..97bff2547719 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	if (vma->vm_file) {
 		/*
 		 * Subtle point from Rajesh Venkatasubramanian: before
-		 * moving file-based ptes, we must lock vmtruncate out,
-		 * since it might clean the dst vma before the src vma,
+		 * moving file-based ptes, we must lock truncate_pagecache
+		 * out, since it might clean the dst vma before the src vma,
 		 * and we propagate stale pages into the dst afterward.
 		 */
 		mapping = vma->vm_file->f_mapping;
diff --git a/mm/nommu.c b/mm/nommu.c
index 8d484241d034..56a446f05971 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -82,46 +82,6 @@ DECLARE_RWSEM(nommu_region_sem);
 struct vm_operations_struct generic_file_vm_ops = {
 };
 
-/*
- * Handle all mappings that got truncated by a "truncate()"
- * system call.
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode *inode, loff_t offset)
-{
-	struct address_space *mapping = inode->i_mapping;
-	unsigned long limit;
-
-	if (inode->i_size < offset)
-		goto do_expand;
-	i_size_write(inode, offset);
-
-	truncate_inode_pages(mapping, offset);
-	goto out_truncate;
-
-do_expand:
-	limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-	if (limit != RLIM_INFINITY && offset > limit)
-		goto out_sig;
-	if (offset > inode->i_sb->s_maxbytes)
-		goto out;
-	i_size_write(inode, offset);
-
-out_truncate:
-	if (inode->i_op->truncate)
-		inode->i_op->truncate(inode);
-	return 0;
-out_sig:
-	send_sig(SIGXFSZ, current, 0);
-out:
-	return -EFBIG;
-}
-
-EXPORT_SYMBOL(vmtruncate);
-
 /*
  * Return the total memory allocated for this pointer, not
  * just what the caller asked for.
diff --git a/mm/truncate.c b/mm/truncate.c
index ccc3ecf7cb98..5900afca0fa9 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -465,3 +465,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
 	return invalidate_inode_pages2_range(mapping, 0, -1);
 }
 EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
+
+/**
+ * truncate_pagecache - unmap and remove pagecache that has been truncated
+ * @inode: inode
+ * @old: old file offset
+ * @new: new file offset
+ *
+ * inode's new i_size must already be written before truncate_pagecache
+ * is called.
+ *
+ * This function should typically be called before the filesystem
+ * releases resources associated with the freed range (eg. deallocates
+ * blocks). This way, pagecache will always stay logically coherent
+ * with on-disk format, and the filesystem would not have to deal with
+ * situations such as writepage being called for a page that has already
+ * had its underlying blocks deallocated.
+ */
+void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
+{
+	if (new < old) {
+		struct address_space *mapping = inode->i_mapping;
+
+		/*
+		 * unmap_mapping_range is called twice, first simply for
+		 * efficiency so that truncate_inode_pages does fewer
+		 * single-page unmaps.  However after this first call, and
+		 * before truncate_inode_pages finishes, it is possible for
+		 * private pages to be COWed, which remain after
+		 * truncate_inode_pages finishes, hence the second
+		 * unmap_mapping_range call must be made for correctness.
+		 */
+		unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+		truncate_inode_pages(mapping, new);
+		unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+	}
+}
+EXPORT_SYMBOL(truncate_pagecache);
+
+/**
+ * vmtruncate - unmap mappings "freed" by truncate() syscall
+ * @inode: inode of the file used
+ * @offset: file offset to start truncating
+ *
+ * NOTE! We have to be ready to update the memory sharing
+ * between the file and the memory map for a potential last
+ * incomplete page.  Ugly, but necessary.
+ */
+int vmtruncate(struct inode *inode, loff_t offset)
+{
+	loff_t oldsize;
+	int error;
+
+	error = inode_newsize_ok(inode, offset);
+	if (error)
+		return error;
+	oldsize = inode->i_size;
+	i_size_write(inode, offset);
+	truncate_pagecache(inode, oldsize, offset);
+	if (inode->i_op->truncate)
+		inode->i_op->truncate(inode);
+
+	return error;
+}
+EXPORT_SYMBOL(vmtruncate);
-- 
cgit v1.2.3


From 96a2c464de07d7c72988db851c029b204fc59108 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 1 Aug 2009 01:34:24 +0200
Subject: tracing/bkl: Add bkl ftrace events

Add two events lock_kernel and unlock_kernel() to trace the bkl uses.
This opens the door for userspace tools to perform statistics about
the callsites that use it, dependencies with other locks (by pairing
the trace with lock events), use with recursivity and so on...

The {__reacquire,release}_kernel_lock() events are not traced because
these are called from schedule, thus the sched events are sufficient
to trace them.

Example of a trace:

hald-addon-stor-4152  [000]   165.875501: unlock_kernel: depth: 0, fs/block_dev.c:1358 __blkdev_put()
hald-addon-stor-4152  [000]   167.832974: lock_kernel: depth: 0, fs/block_dev.c:1167 __blkdev_get()

How to get the callsites that acquire it recursively:

cd /debug/tracing/events/bkl
echo "lock_depth > 0" > filter

firefox-4951  [001]   206.276967: unlock_kernel: depth: 1, fs/reiserfs/super.c:575 reiserfs_dirty_inode()

You can also filter by file and/or line.

v2: Use of FILTER_PTR_STRING attribute for files and lines fields to
    make them traceable.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
---
 include/linux/smp_lock.h   | 19 ++++++++++++---
 include/trace/events/bkl.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/kernel_lock.c          | 11 +++++----
 3 files changed, 82 insertions(+), 9 deletions(-)
 create mode 100644 include/trace/events/bkl.h

(limited to 'include')

diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h
index 813be59bf345..d48cc77ba70d 100644
--- a/include/linux/smp_lock.h
+++ b/include/linux/smp_lock.h
@@ -3,6 +3,7 @@
 
 #ifdef CONFIG_LOCK_KERNEL
 #include <linux/sched.h>
+#include <trace/events/bkl.h>
 
 #define kernel_locked()		(current->lock_depth >= 0)
 
@@ -24,8 +25,18 @@ static inline int reacquire_kernel_lock(struct task_struct *task)
 	return 0;
 }
 
-extern void __lockfunc lock_kernel(void)	__acquires(kernel_lock);
-extern void __lockfunc unlock_kernel(void)	__releases(kernel_lock);
+extern void __lockfunc _lock_kernel(void)	__acquires(kernel_lock);
+extern void __lockfunc _unlock_kernel(void)	__releases(kernel_lock);
+
+#define lock_kernel()	{					\
+	trace_lock_kernel(__func__, __FILE__, __LINE__);	\
+	_lock_kernel();						\
+}
+
+#define unlock_kernel()	{					\
+	trace_unlock_kernel(__func__, __FILE__, __LINE__);	\
+	_unlock_kernel();					\
+}
 
 /*
  * Various legacy drivers don't really need the BKL in a specific
@@ -41,8 +52,8 @@ static inline void cycle_kernel_lock(void)
 
 #else
 
-#define lock_kernel()				do { } while(0)
-#define unlock_kernel()				do { } while(0)
+#define lock_kernel()	   trace_lock_kernel(__func__, __FILE__, __LINE__);
+#define unlock_kernel()    trace_unlock_kernel(__func__, __FILE__, __LINE__);
 #define release_kernel_lock(task)		do { } while(0)
 #define cycle_kernel_lock()			do { } while(0)
 #define reacquire_kernel_lock(task)		0
diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h
new file mode 100644
index 000000000000..8abd620a490e
--- /dev/null
+++ b/include/trace/events/bkl.h
@@ -0,0 +1,61 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bkl
+
+#if !defined(_TRACE_BKL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BKL_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(lock_kernel,
+
+	TP_PROTO(const char *func, const char *file, int line),
+
+	TP_ARGS(func, file, line),
+
+	TP_STRUCT__entry(
+		__field(	int,		lock_depth		)
+		__field_ext(	const char *,	func, FILTER_PTR_STRING	)
+		__field_ext(	const char *,	file, FILTER_PTR_STRING	)
+		__field(	int,		line			)
+	),
+
+	TP_fast_assign(
+		/* We want to record the lock_depth after lock is acquired */
+		__entry->lock_depth = current->lock_depth + 1;
+		__entry->func = func;
+		__entry->file = file;
+		__entry->line = line;
+	),
+
+	TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth,
+		  __entry->file, __entry->line, __entry->func)
+);
+
+TRACE_EVENT(unlock_kernel,
+
+	TP_PROTO(const char *func, const char *file, int line),
+
+	TP_ARGS(func, file, line),
+
+	TP_STRUCT__entry(
+		__field(int,		lock_depth)
+		__field(const char *,	func)
+		__field(const char *,	file)
+		__field(int,		line)
+	),
+
+	TP_fast_assign(
+		__entry->lock_depth = current->lock_depth;
+		__entry->func = func;
+		__entry->file = file;
+		__entry->line = line;
+	),
+
+	TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth,
+		  __entry->file, __entry->line, __entry->func)
+);
+
+#endif /* _TRACE_BKL_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 39f1029e3525..5c10b2e1fd08 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -5,10 +5,11 @@
  * relegated to obsolescence, but used by various less
  * important (or lazy) subsystems.
  */
-#include <linux/smp_lock.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/semaphore.h>
+#define CREATE_TRACE_POINTS
+#include <linux/smp_lock.h>
 
 /*
  * The 'big kernel lock'
@@ -113,7 +114,7 @@ static inline void __unlock_kernel(void)
  * This cannot happen asynchronously, so we only need to
  * worry about other CPU's.
  */
-void __lockfunc lock_kernel(void)
+void __lockfunc _lock_kernel(void)
 {
 	int depth = current->lock_depth+1;
 	if (likely(!depth))
@@ -121,13 +122,13 @@ void __lockfunc lock_kernel(void)
 	current->lock_depth = depth;
 }
 
-void __lockfunc unlock_kernel(void)
+void __lockfunc _unlock_kernel(void)
 {
 	BUG_ON(current->lock_depth < 0);
 	if (likely(--current->lock_depth < 0))
 		__unlock_kernel();
 }
 
-EXPORT_SYMBOL(lock_kernel);
-EXPORT_SYMBOL(unlock_kernel);
+EXPORT_SYMBOL(_lock_kernel);
+EXPORT_SYMBOL(_unlock_kernel);
 
-- 
cgit v1.2.3


From 57f1f0874f426a9bdfc5cd3f886113dd5cd17834 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Wed, 23 Sep 2009 15:56:10 -0700
Subject: time: add function to convert between calendar time and broken-down
 time for universal use

There are many similar code in kernel for one object: convert time between
calendar time and broken-down time.

Here is some source I found:
  fs/ncpfs/dir.c
  fs/smbfs/proc.c
  fs/fat/misc.c
  fs/udf/udftime.c
  fs/cifs/netmisc.c
  net/netfilter/xt_time.c
  drivers/scsi/ips.c
  drivers/input/misc/hp_sdc_rtc.c
  drivers/rtc/rtc-lib.c
  arch/ia64/hp/sim/boot/fw-emu.c
  arch/m68k/mac/misc.c
  arch/powerpc/kernel/time.c
  arch/parisc/include/asm/rtc.h
  ...

We can make a common function for this type of conversion, At least we
can get following benefit:

1: Make kernel simple and unify
2: Easy to fix bug in converting code
3: Reduce clone of code in future
   For example, I'm trying to make ftrace display walltime,
   this patch will make me easy.

This code is based on code from glibc-2.6

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/time.h   |  28 +++++++++++
 kernel/time/Makefile   |   2 +-
 kernel/time/timeconv.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 kernel/time/timeconv.c

(limited to 'include')

diff --git a/include/linux/time.h b/include/linux/time.h
index 56787c093345..fe04e5ef6a59 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -155,6 +155,34 @@ extern void timekeeping_leap_insert(int leapsecond);
 struct tms;
 extern void do_sys_times(struct tms *);
 
+/*
+ * Similar to the struct tm in userspace <time.h>, but it needs to be here so
+ * that the kernel source is self contained.
+ */
+struct tm {
+	/*
+	 * the number of seconds after the minute, normally in the range
+	 * 0 to 59, but can be up to 60 to allow for leap seconds
+	 */
+	int tm_sec;
+	/* the number of minutes after the hour, in the range 0 to 59*/
+	int tm_min;
+	/* the number of hours past midnight, in the range 0 to 23 */
+	int tm_hour;
+	/* the day of the month, in the range 1 to 31 */
+	int tm_mday;
+	/* the number of months since January, in the range 0 to 11 */
+	int tm_mon;
+	/* the number of years since 1900 */
+	long tm_year;
+	/* the number of days since Sunday, in the range 0 to 6 */
+	int tm_wday;
+	/* the number of days since January 1, in the range 0 to 365 */
+	int tm_yday;
+};
+
+void time_to_tm(time_t totalsecs, int offset, struct tm *result);
+
 /**
  * timespec_to_ns - Convert timespec to nanoseconds
  * @ts:		pointer to the timespec variable to be converted
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 0b0a6366c9d4..ee266620b06c 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,4 @@
-obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
+obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o
diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c
new file mode 100644
index 000000000000..86628e755f38
--- /dev/null
+++ b/kernel/time/timeconv.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
+ * This file is part of the GNU C Library.
+ * Contributed by Paul Eggert (eggert@twinsun.com).
+ *
+ * The GNU C Library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * The GNU C Library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Converts the calendar time to broken-down time representation
+ * Based on code from glibc-2.6
+ *
+ * 2009-7-14:
+ *   Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com>
+ */
+
+#include <linux/time.h>
+#include <linux/module.h>
+
+/*
+ * Nonzero if YEAR is a leap year (every 4 years,
+ * except every 100th isn't, and every 400th is).
+ */
+static int __isleap(long year)
+{
+	return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0);
+}
+
+/* do a mathdiv for long type */
+static long math_div(long a, long b)
+{
+	return a / b - (a % b < 0);
+}
+
+/* How many leap years between y1 and y2, y1 must less or equal to y2 */
+static long leaps_between(long y1, long y2)
+{
+	long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100)
+		+ math_div(y1 - 1, 400);
+	long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100)
+		+ math_div(y2 - 1, 400);
+	return leaps2 - leaps1;
+}
+
+/* How many days come before each month (0-12). */
+static const unsigned short __mon_yday[2][13] = {
+	/* Normal years. */
+	{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
+	/* Leap years. */
+	{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
+};
+
+#define SECS_PER_HOUR	(60 * 60)
+#define SECS_PER_DAY	(SECS_PER_HOUR * 24)
+
+/**
+ * time_to_tm - converts the calendar time to local broken-down time
+ *
+ * @totalsecs	the number of seconds elapsed since 00:00:00 on January 1, 1970,
+ *		Coordinated Universal Time (UTC).
+ * @offset	offset seconds adding to totalsecs.
+ * @result	pointer to struct tm variable to receive broken-down time
+ */
+void time_to_tm(time_t totalsecs, int offset, struct tm *result)
+{
+	long days, rem, y;
+	const unsigned short *ip;
+
+	days = totalsecs / SECS_PER_DAY;
+	rem = totalsecs % SECS_PER_DAY;
+	rem += offset;
+	while (rem < 0) {
+		rem += SECS_PER_DAY;
+		--days;
+	}
+	while (rem >= SECS_PER_DAY) {
+		rem -= SECS_PER_DAY;
+		++days;
+	}
+
+	result->tm_hour = rem / SECS_PER_HOUR;
+	rem %= SECS_PER_HOUR;
+	result->tm_min = rem / 60;
+	result->tm_sec = rem % 60;
+
+	/* January 1, 1970 was a Thursday. */
+	result->tm_wday = (4 + days) % 7;
+	if (result->tm_wday < 0)
+		result->tm_wday += 7;
+
+	y = 1970;
+
+	while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
+		/* Guess a corrected year, assuming 365 days per year. */
+		long yg = y + math_div(days, 365);
+
+		/* Adjust DAYS and Y to match the guessed year. */
+		days -= (yg - y) * 365 + leaps_between(y, yg);
+		y = yg;
+	}
+
+	result->tm_year = y - 1900;
+
+	result->tm_yday = days;
+
+	ip = __mon_yday[__isleap(y)];
+	for (y = 11; days < ip[y]; y--)
+		continue;
+	days -= ip[y];
+
+	result->tm_mon = y;
+	result->tm_mday = days + 1;
+}
+EXPORT_SYMBOL(time_to_tm);
-- 
cgit v1.2.3


From 55dff4954ebdeba2be59e19398a607d799c5fa9f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 23 Sep 2009 15:56:17 -0700
Subject: docs: fix various Documentation/ paths in header files

Fix various Documentation/ paths in include/linux/.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Reviewed-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/configfs.h   | 4 ++--
 include/linux/debugfs.h    | 2 +-
 include/linux/relay.h      | 2 +-
 include/linux/tracepoint.h | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 7f627775c947..ddb7a97c78c2 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -27,8 +27,8 @@
  *
  * configfs Copyright (C) 2005 Oracle.  All rights reserved.
  *
- * Please read Documentation/filesystems/configfs.txt before using the
- * configfs interface, ESPECIALLY the parts about reference counts and
+ * Please read Documentation/filesystems/configfs/configfs.txt before using
+ * the configfs interface, ESPECIALLY the parts about reference counts and
  * item destructors.
  */
 
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index eb5c2ba2f81a..fc1b930f246c 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -9,7 +9,7 @@
  *	2 as published by the Free Software Foundation.
  *
  *  debugfs is for people to use instead of /proc or /sys.
- *  See Documentation/DocBook/kernel-api for more details.
+ *  See Documentation/DocBook/filesystems for more details.
  */
 
 #ifndef _DEBUGFS_H_
diff --git a/include/linux/relay.h b/include/linux/relay.h
index 953fc055e875..14a86bc7102b 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -140,7 +140,7 @@ struct rchan_callbacks
 	 * cause relay_open() to create a single global buffer rather
 	 * than the default set of per-cpu buffers.
 	 *
-	 * See Documentation/filesystems/relayfs.txt for more info.
+	 * See Documentation/filesystems/relay.txt for more info.
 	 */
 	struct dentry *(*create_buf_file)(const char *filename,
 					  struct dentry *parent,
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 63a3f7a80580..660a9de96f81 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -4,7 +4,7 @@
 /*
  * Kernel Tracepoint API.
  *
- * See Documentation/tracepoint.txt.
+ * See Documentation/trace/tracepoints.txt.
  *
  * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
  *
-- 
cgit v1.2.3


From 8f3ff20862cfcb85500a2bb55ee64622bd59fd0c Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Wed, 23 Sep 2009 15:56:25 -0700
Subject: cgroups: revert "cgroups: fix pid namespace bug"

The following series adds a "cgroup.procs" file to each cgroup that
reports unique tgids rather than pids, and allows all threads in a
threadgroup to be atomically moved to a new cgroup.

The subsystem "attach" interface is modified to support attaching whole
threadgroups at a time, which could introduce potential problems if any
subsystem were to need to access the old cgroup of every thread being
moved.  The attach interface may need to be revised if this becomes the
case.

Also added is functionality for read/write locking all CLONE_THREAD
fork()ing within a threadgroup, by means of an rwsem that lives in the
sighand_struct, for per-threadgroup-ness and also for sharing a cacheline
with the sighand's atomic count.  This scheme should introduce no extra
overhead in the fork path when there's no contention.

The final patch reveals potential for a race when forking before a
subsystem's attach function is called - one potential solution in case any
subsystem has this problem is to hang on to the group's fork mutex through
the attach() calls, though no subsystem yet demonstrates need for an
extended critical section.

This patch:

Revert

commit 096b7fe012d66ed55e98bc8022405ede0cc80e96
Author:     Li Zefan <lizf@cn.fujitsu.com>
AuthorDate: Wed Jul 29 15:04:04 2009 -0700
Commit:     Linus Torvalds <torvalds@linux-foundation.org>
CommitDate: Wed Jul 29 19:10:35 2009 -0700

    cgroups: fix pid namespace bug

This is in preparation for some clashing cgroups changes that subsume the
original commit's functionaliy.

The original commit fixed a pid namespace bug which Ben Blum fixed
independently (in the same way, but with different code) as part of a
series of patches.  I played around with trying to reconcile Ben's patch
series with Li's patch, but concluded that it was simpler to just revert
Li's, given that Ben's patch series contained essentially the same fix.

Signed-off-by: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 11 +++---
 kernel/cgroup.c        | 95 +++++++++++++-------------------------------------
 2 files changed, 31 insertions(+), 75 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 90bba9e62286..c833d6f23672 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -179,11 +179,14 @@ struct cgroup {
 	 */
 	struct list_head release_list;
 
-	/* pids_mutex protects pids_list and cached pid arrays. */
+	/* pids_mutex protects the fields below */
 	struct rw_semaphore pids_mutex;
-
-	/* Linked list of struct cgroup_pids */
-	struct list_head pids_list;
+	/* Array of process ids in the cgroup */
+	pid_t *tasks_pids;
+	/* How many files are using the current tasks_pids array */
+	int pids_use_count;
+	/* Length of the current tasks_pids array */
+	int pids_length;
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 14efffed72c8..22db0a7cf1fa 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1121,7 +1121,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->children);
 	INIT_LIST_HEAD(&cgrp->css_sets);
 	INIT_LIST_HEAD(&cgrp->release_list);
-	INIT_LIST_HEAD(&cgrp->pids_list);
 	init_rwsem(&cgrp->pids_mutex);
 }
 
@@ -2431,30 +2430,12 @@ err:
 	return ret;
 }
 
-/*
- * Cache pids for all threads in the same pid namespace that are
- * opening the same "tasks" file.
- */
-struct cgroup_pids {
-	/* The node in cgrp->pids_list */
-	struct list_head list;
-	/* The cgroup those pids belong to */
-	struct cgroup *cgrp;
-	/* The namepsace those pids belong to */
-	struct pid_namespace *ns;
-	/* Array of process ids in the cgroup */
-	pid_t *tasks_pids;
-	/* How many files are using the this tasks_pids array */
-	int use_count;
-	/* Length of the current tasks_pids array */
-	int length;
-};
-
 static int cmppid(const void *a, const void *b)
 {
 	return *(pid_t *)a - *(pid_t *)b;
 }
 
+
 /*
  * seq_file methods for the "tasks" file. The seq_file position is the
  * next pid to display; the seq_file iterator is a pointer to the pid
@@ -2469,47 +2450,45 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
 	 * after a seek to the start). Use a binary-search to find the
 	 * next pid to display, if any
 	 */
-	struct cgroup_pids *cp = s->private;
-	struct cgroup *cgrp = cp->cgrp;
+	struct cgroup *cgrp = s->private;
 	int index = 0, pid = *pos;
 	int *iter;
 
 	down_read(&cgrp->pids_mutex);
 	if (pid) {
-		int end = cp->length;
+		int end = cgrp->pids_length;
 
 		while (index < end) {
 			int mid = (index + end) / 2;
-			if (cp->tasks_pids[mid] == pid) {
+			if (cgrp->tasks_pids[mid] == pid) {
 				index = mid;
 				break;
-			} else if (cp->tasks_pids[mid] <= pid)
+			} else if (cgrp->tasks_pids[mid] <= pid)
 				index = mid + 1;
 			else
 				end = mid;
 		}
 	}
 	/* If we're off the end of the array, we're done */
-	if (index >= cp->length)
+	if (index >= cgrp->pids_length)
 		return NULL;
 	/* Update the abstract position to be the actual pid that we found */
-	iter = cp->tasks_pids + index;
+	iter = cgrp->tasks_pids + index;
 	*pos = *iter;
 	return iter;
 }
 
 static void cgroup_tasks_stop(struct seq_file *s, void *v)
 {
-	struct cgroup_pids *cp = s->private;
-	struct cgroup *cgrp = cp->cgrp;
+	struct cgroup *cgrp = s->private;
 	up_read(&cgrp->pids_mutex);
 }
 
 static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
 {
-	struct cgroup_pids *cp = s->private;
+	struct cgroup *cgrp = s->private;
 	int *p = v;
-	int *end = cp->tasks_pids + cp->length;
+	int *end = cgrp->tasks_pids + cgrp->pids_length;
 
 	/*
 	 * Advance to the next pid in the array. If this goes off the
@@ -2536,33 +2515,26 @@ static const struct seq_operations cgroup_tasks_seq_operations = {
 	.show = cgroup_tasks_show,
 };
 
-static void release_cgroup_pid_array(struct cgroup_pids *cp)
+static void release_cgroup_pid_array(struct cgroup *cgrp)
 {
-	struct cgroup *cgrp = cp->cgrp;
-
 	down_write(&cgrp->pids_mutex);
-	BUG_ON(!cp->use_count);
-	if (!--cp->use_count) {
-		list_del(&cp->list);
-		put_pid_ns(cp->ns);
-		kfree(cp->tasks_pids);
-		kfree(cp);
+	BUG_ON(!cgrp->pids_use_count);
+	if (!--cgrp->pids_use_count) {
+		kfree(cgrp->tasks_pids);
+		cgrp->tasks_pids = NULL;
+		cgrp->pids_length = 0;
 	}
 	up_write(&cgrp->pids_mutex);
 }
 
 static int cgroup_tasks_release(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq;
-	struct cgroup_pids *cp;
+	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
 
 	if (!(file->f_mode & FMODE_READ))
 		return 0;
 
-	seq = file->private_data;
-	cp = seq->private;
-
-	release_cgroup_pid_array(cp);
+	release_cgroup_pid_array(cgrp);
 	return seq_release(inode, file);
 }
 
@@ -2581,8 +2553,6 @@ static struct file_operations cgroup_tasks_operations = {
 static int cgroup_tasks_open(struct inode *unused, struct file *file)
 {
 	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-	struct pid_namespace *ns = current->nsproxy->pid_ns;
-	struct cgroup_pids *cp;
 	pid_t *pidarray;
 	int npids;
 	int retval;
@@ -2609,37 +2579,20 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
 	 * array if necessary
 	 */
 	down_write(&cgrp->pids_mutex);
-
-	list_for_each_entry(cp, &cgrp->pids_list, list) {
-		if (ns == cp->ns)
-			goto found;
-	}
-
-	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
-	if (!cp) {
-		up_write(&cgrp->pids_mutex);
-		kfree(pidarray);
-		return -ENOMEM;
-	}
-	cp->cgrp = cgrp;
-	cp->ns = ns;
-	get_pid_ns(ns);
-	list_add(&cp->list, &cgrp->pids_list);
-found:
-	kfree(cp->tasks_pids);
-	cp->tasks_pids = pidarray;
-	cp->length = npids;
-	cp->use_count++;
+	kfree(cgrp->tasks_pids);
+	cgrp->tasks_pids = pidarray;
+	cgrp->pids_length = npids;
+	cgrp->pids_use_count++;
 	up_write(&cgrp->pids_mutex);
 
 	file->f_op = &cgroup_tasks_operations;
 
 	retval = seq_open(file, &cgroup_tasks_seq_operations);
 	if (retval) {
-		release_cgroup_pid_array(cp);
+		release_cgroup_pid_array(cgrp);
 		return retval;
 	}
-	((struct seq_file *)file->private_data)->private = cp;
+	((struct seq_file *)file->private_data)->private = cgrp;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 102a775e3647628727ae83a9a6abf0564c3ca7cb Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@google.com>
Date: Wed, 23 Sep 2009 15:56:26 -0700
Subject: cgroups: add a read-only "procs" file similar to "tasks" that shows
 only unique tgids

struct cgroup used to have a bunch of fields for keeping track of the
pidlist for the tasks file.  Those are now separated into a new struct
cgroup_pidlist, of which two are had, one for procs and one for tasks.
The way the seq_file operations are set up is changed so that just the
pidlist struct gets passed around as the private data.

Interface example: Suppose a multithreaded process has pid 1000 and other
threads with ids 1001, 1002, 1003:
$ cat tasks
1000
1001
1002
1003
$ cat cgroup.procs
1000
$

Signed-off-by: Ben Blum <bblum@google.com>
Signed-off-by: Paul Menage <menage@google.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |  22 ++--
 kernel/cgroup.c        | 278 ++++++++++++++++++++++++++++++-------------------
 2 files changed, 186 insertions(+), 114 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c833d6f23672..2357733a0a80 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -141,6 +141,17 @@ enum {
 	CGRP_WAIT_ON_RMDIR,
 };
 
+struct cgroup_pidlist {
+	/* protects the other fields */
+	struct rw_semaphore mutex;
+	/* array of xids */
+	pid_t *list;
+	/* how many elements the above list has */
+	int length;
+	/* how many files are using the current array */
+	int use_count;
+};
+
 struct cgroup {
 	unsigned long flags;		/* "unsigned long" so bitops work */
 
@@ -179,14 +190,9 @@ struct cgroup {
 	 */
 	struct list_head release_list;
 
-	/* pids_mutex protects the fields below */
-	struct rw_semaphore pids_mutex;
-	/* Array of process ids in the cgroup */
-	pid_t *tasks_pids;
-	/* How many files are using the current tasks_pids array */
-	int pids_use_count;
-	/* Length of the current tasks_pids array */
-	int pids_length;
+	/* we will have two separate pidlists, one for pids (the tasks file)
+	 * and one for tgids (the procs file). */
+	struct cgroup_pidlist tasks, procs;
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 22db0a7cf1fa..a9433f50e53d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1121,7 +1121,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->children);
 	INIT_LIST_HEAD(&cgrp->css_sets);
 	INIT_LIST_HEAD(&cgrp->release_list);
-	init_rwsem(&cgrp->pids_mutex);
+	init_rwsem(&(cgrp->tasks.mutex));
+	init_rwsem(&(cgrp->procs.mutex));
 }
 
 static void init_cgroup_root(struct cgroupfs_root *root)
@@ -1637,15 +1638,6 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
 	return ret;
 }
 
-/* The various types of files and directories in a cgroup file system */
-enum cgroup_filetype {
-	FILE_ROOT,
-	FILE_DIR,
-	FILE_TASKLIST,
-	FILE_NOTIFY_ON_RELEASE,
-	FILE_RELEASE_AGENT,
-};
-
 /**
  * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
  * @cgrp: the cgroup to be checked for liveness
@@ -2343,7 +2335,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
 }
 
 /*
- * Stuff for reading the 'tasks' file.
+ * Stuff for reading the 'tasks'/'procs' files.
  *
  * Reading this file can return large amounts of data if a cgroup has
  * *lots* of attached tasks. So it may need several calls to read(),
@@ -2353,27 +2345,106 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
  */
 
 /*
- * Load into 'pidarray' up to 'npids' of the tasks using cgroup
- * 'cgrp'.  Return actual number of pids loaded.  No need to
- * task_lock(p) when reading out p->cgroup, since we're in an RCU
- * read section, so the css_set can't go away, and is
- * immutable after creation.
+ * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
+ * If the new stripped list is sufficiently smaller and there's enough memory
+ * to allocate a new buffer, will let go of the unneeded memory. Returns the
+ * number of unique elements.
  */
-static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
+/* is the size difference enough that we should re-allocate the array? */
+#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
+static int pidlist_uniq(pid_t **p, int length)
 {
-	int n = 0, pid;
+	int src, dest = 1;
+	pid_t *list = *p;
+	pid_t *newlist;
+
+	/*
+	 * we presume the 0th element is unique, so i starts at 1. trivial
+	 * edge cases first; no work needs to be done for either
+	 */
+	if (length == 0 || length == 1)
+		return length;
+	/* src and dest walk down the list; dest counts unique elements */
+	for (src = 1; src < length; src++) {
+		/* find next unique element */
+		while (list[src] == list[src-1]) {
+			src++;
+			if (src == length)
+				goto after;
+		}
+		/* dest always points to where the next unique element goes */
+		list[dest] = list[src];
+		dest++;
+	}
+after:
+	/*
+	 * if the length difference is large enough, we want to allocate a
+	 * smaller buffer to save memory. if this fails due to out of memory,
+	 * we'll just stay with what we've got.
+	 */
+	if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
+		newlist = krealloc(list, dest * sizeof(pid_t), GFP_KERNEL);
+		if (newlist)
+			*p = newlist;
+	}
+	return dest;
+}
+
+static int cmppid(const void *a, const void *b)
+{
+	return *(pid_t *)a - *(pid_t *)b;
+}
+
+/*
+ * Load a cgroup's pidarray with either procs' tgids or tasks' pids
+ */
+static int pidlist_array_load(struct cgroup *cgrp, bool procs)
+{
+	pid_t *array;
+	int length;
+	int pid, n = 0; /* used for populating the array */
 	struct cgroup_iter it;
 	struct task_struct *tsk;
+	struct cgroup_pidlist *l;
+
+	/*
+	 * If cgroup gets more users after we read count, we won't have
+	 * enough space - tough.  This race is indistinguishable to the
+	 * caller from the case that the additional cgroup users didn't
+	 * show up until sometime later on.
+	 */
+	length = cgroup_task_count(cgrp);
+	array = kmalloc(length * sizeof(pid_t), GFP_KERNEL);
+	if (!array)
+		return -ENOMEM;
+	/* now, populate the array */
 	cgroup_iter_start(cgrp, &it);
 	while ((tsk = cgroup_iter_next(cgrp, &it))) {
-		if (unlikely(n == npids))
+		if (unlikely(n == length))
 			break;
-		pid = task_pid_vnr(tsk);
-		if (pid > 0)
-			pidarray[n++] = pid;
+		/* get tgid or pid for procs or tasks file respectively */
+		pid = (procs ? task_tgid_vnr(tsk) : task_pid_vnr(tsk));
+		if (pid > 0) /* make sure to only use valid results */
+			array[n++] = pid;
 	}
 	cgroup_iter_end(cgrp, &it);
-	return n;
+	length = n;
+	/* now sort & (if procs) strip out duplicates */
+	sort(array, length, sizeof(pid_t), cmppid, NULL);
+	if (procs) {
+		length = pidlist_uniq(&array, length);
+		l = &(cgrp->procs);
+	} else {
+		l = &(cgrp->tasks);
+	}
+	/* store array in cgroup, freeing old if necessary */
+	down_write(&l->mutex);
+	kfree(l->list);
+	l->list = array;
+	l->length = length;
+	l->use_count++;
+	up_write(&l->mutex);
+	return 0;
 }
 
 /**
@@ -2430,19 +2501,14 @@ err:
 	return ret;
 }
 
-static int cmppid(const void *a, const void *b)
-{
-	return *(pid_t *)a - *(pid_t *)b;
-}
-
 
 /*
- * seq_file methods for the "tasks" file. The seq_file position is the
+ * seq_file methods for the tasks/procs files. The seq_file position is the
  * next pid to display; the seq_file iterator is a pointer to the pid
- * in the cgroup->tasks_pids array.
+ * in the cgroup->l->list array.
  */
 
-static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
+static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
 {
 	/*
 	 * Initially we receive a position value that corresponds to
@@ -2450,46 +2516,45 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
 	 * after a seek to the start). Use a binary-search to find the
 	 * next pid to display, if any
 	 */
-	struct cgroup *cgrp = s->private;
+	struct cgroup_pidlist *l = s->private;
 	int index = 0, pid = *pos;
 	int *iter;
 
-	down_read(&cgrp->pids_mutex);
+	down_read(&l->mutex);
 	if (pid) {
-		int end = cgrp->pids_length;
+		int end = l->length;
 
 		while (index < end) {
 			int mid = (index + end) / 2;
-			if (cgrp->tasks_pids[mid] == pid) {
+			if (l->list[mid] == pid) {
 				index = mid;
 				break;
-			} else if (cgrp->tasks_pids[mid] <= pid)
+			} else if (l->list[mid] <= pid)
 				index = mid + 1;
 			else
 				end = mid;
 		}
 	}
 	/* If we're off the end of the array, we're done */
-	if (index >= cgrp->pids_length)
+	if (index >= l->length)
 		return NULL;
 	/* Update the abstract position to be the actual pid that we found */
-	iter = cgrp->tasks_pids + index;
+	iter = l->list + index;
 	*pos = *iter;
 	return iter;
 }
 
-static void cgroup_tasks_stop(struct seq_file *s, void *v)
+static void cgroup_pidlist_stop(struct seq_file *s, void *v)
 {
-	struct cgroup *cgrp = s->private;
-	up_read(&cgrp->pids_mutex);
+	struct cgroup_pidlist *l = s->private;
+	up_read(&l->mutex);
 }
 
-static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
+static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
 {
-	struct cgroup *cgrp = s->private;
-	int *p = v;
-	int *end = cgrp->tasks_pids + cgrp->pids_length;
-
+	struct cgroup_pidlist *l = s->private;
+	pid_t *p = v;
+	pid_t *end = l->list + l->length;
 	/*
 	 * Advance to the next pid in the array. If this goes off the
 	 * end, we're done
@@ -2503,98 +2568,94 @@ static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
 	}
 }
 
-static int cgroup_tasks_show(struct seq_file *s, void *v)
+static int cgroup_pidlist_show(struct seq_file *s, void *v)
 {
 	return seq_printf(s, "%d\n", *(int *)v);
 }
 
-static const struct seq_operations cgroup_tasks_seq_operations = {
-	.start = cgroup_tasks_start,
-	.stop = cgroup_tasks_stop,
-	.next = cgroup_tasks_next,
-	.show = cgroup_tasks_show,
+/*
+ * seq_operations functions for iterating on pidlists through seq_file -
+ * independent of whether it's tasks or procs
+ */
+static const struct seq_operations cgroup_pidlist_seq_operations = {
+	.start = cgroup_pidlist_start,
+	.stop = cgroup_pidlist_stop,
+	.next = cgroup_pidlist_next,
+	.show = cgroup_pidlist_show,
 };
 
-static void release_cgroup_pid_array(struct cgroup *cgrp)
+static void cgroup_release_pid_array(struct cgroup_pidlist *l)
 {
-	down_write(&cgrp->pids_mutex);
-	BUG_ON(!cgrp->pids_use_count);
-	if (!--cgrp->pids_use_count) {
-		kfree(cgrp->tasks_pids);
-		cgrp->tasks_pids = NULL;
-		cgrp->pids_length = 0;
+	down_write(&l->mutex);
+	BUG_ON(!l->use_count);
+	if (!--l->use_count) {
+		kfree(l->list);
+		l->list = NULL;
+		l->length = 0;
 	}
-	up_write(&cgrp->pids_mutex);
+	up_write(&l->mutex);
 }
 
-static int cgroup_tasks_release(struct inode *inode, struct file *file)
+static int cgroup_pidlist_release(struct inode *inode, struct file *file)
 {
-	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-
+	struct cgroup_pidlist *l;
 	if (!(file->f_mode & FMODE_READ))
 		return 0;
-
-	release_cgroup_pid_array(cgrp);
+	/*
+	 * the seq_file will only be initialized if the file was opened for
+	 * reading; hence we check if it's not null only in that case.
+	 */
+	l = ((struct seq_file *)file->private_data)->private;
+	cgroup_release_pid_array(l);
 	return seq_release(inode, file);
 }
 
-static struct file_operations cgroup_tasks_operations = {
+static const struct file_operations cgroup_pidlist_operations = {
 	.read = seq_read,
 	.llseek = seq_lseek,
 	.write = cgroup_file_write,
-	.release = cgroup_tasks_release,
+	.release = cgroup_pidlist_release,
 };
 
 /*
- * Handle an open on 'tasks' file.  Prepare an array containing the
- * process id's of tasks currently attached to the cgroup being opened.
+ * The following functions handle opens on a file that displays a pidlist
+ * (tasks or procs). Prepare an array of the process/thread IDs of whoever's
+ * in the cgroup.
  */
-
-static int cgroup_tasks_open(struct inode *unused, struct file *file)
+/* helper function for the two below it */
+static int cgroup_pidlist_open(struct file *file, bool procs)
 {
 	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-	pid_t *pidarray;
-	int npids;
+	struct cgroup_pidlist *l = (procs ? &cgrp->procs : &cgrp->tasks);
 	int retval;
 
 	/* Nothing to do for write-only files */
 	if (!(file->f_mode & FMODE_READ))
 		return 0;
 
-	/*
-	 * If cgroup gets more users after we read count, we won't have
-	 * enough space - tough.  This race is indistinguishable to the
-	 * caller from the case that the additional cgroup users didn't
-	 * show up until sometime later on.
-	 */
-	npids = cgroup_task_count(cgrp);
-	pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
-	if (!pidarray)
-		return -ENOMEM;
-	npids = pid_array_load(pidarray, npids, cgrp);
-	sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
-	/*
-	 * Store the array in the cgroup, freeing the old
-	 * array if necessary
-	 */
-	down_write(&cgrp->pids_mutex);
-	kfree(cgrp->tasks_pids);
-	cgrp->tasks_pids = pidarray;
-	cgrp->pids_length = npids;
-	cgrp->pids_use_count++;
-	up_write(&cgrp->pids_mutex);
-
-	file->f_op = &cgroup_tasks_operations;
+	/* have the array populated */
+	retval = pidlist_array_load(cgrp, procs);
+	if (retval)
+		return retval;
+	/* configure file information */
+	file->f_op = &cgroup_pidlist_operations;
 
-	retval = seq_open(file, &cgroup_tasks_seq_operations);
+	retval = seq_open(file, &cgroup_pidlist_seq_operations);
 	if (retval) {
-		release_cgroup_pid_array(cgrp);
+		cgroup_release_pid_array(l);
 		return retval;
 	}
-	((struct seq_file *)file->private_data)->private = cgrp;
+	((struct seq_file *)file->private_data)->private = l;
 	return 0;
 }
+static int cgroup_tasks_open(struct inode *unused, struct file *file)
+{
+	return cgroup_pidlist_open(file, false);
+}
+static int cgroup_procs_open(struct inode *unused, struct file *file)
+{
+	return cgroup_pidlist_open(file, true);
+}
 
 static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
 					    struct cftype *cft)
@@ -2617,21 +2678,27 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
 /*
  * for the common functions, 'private' gives the type of file
  */
+/* for hysterical raisins, we can't put this on the older files */
+#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
 static struct cftype files[] = {
 	{
 		.name = "tasks",
 		.open = cgroup_tasks_open,
 		.write_u64 = cgroup_tasks_write,
-		.release = cgroup_tasks_release,
-		.private = FILE_TASKLIST,
+		.release = cgroup_pidlist_release,
 		.mode = S_IRUGO | S_IWUSR,
 	},
-
+	{
+		.name = CGROUP_FILE_GENERIC_PREFIX "procs",
+		.open = cgroup_procs_open,
+		/* .write_u64 = cgroup_procs_write, TODO */
+		.release = cgroup_pidlist_release,
+		.mode = S_IRUGO,
+	},
 	{
 		.name = "notify_on_release",
 		.read_u64 = cgroup_read_notify_on_release,
 		.write_u64 = cgroup_write_notify_on_release,
-		.private = FILE_NOTIFY_ON_RELEASE,
 	},
 };
 
@@ -2640,7 +2707,6 @@ static struct cftype cft_release_agent = {
 	.read_seq_string = cgroup_release_agent_show,
 	.write_string = cgroup_release_agent_write,
 	.max_write_len = PATH_MAX,
-	.private = FILE_RELEASE_AGENT,
 };
 
 static int cgroup_populate_dir(struct cgroup *cgrp)
-- 
cgit v1.2.3


From 72a8cb30d10d4041c455a7054607a7d519167c87 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@google.com>
Date: Wed, 23 Sep 2009 15:56:27 -0700
Subject: cgroups: ensure correct concurrent opening/reading of pidlists across
 pid namespaces

Previously there was the problem in which two processes from different pid
namespaces reading the tasks or procs file could result in one process
seeing results from the other's namespace.  Rather than one pidlist for
each file in a cgroup, we now keep a list of pidlists keyed by namespace
and file type (tasks versus procs) in which entries are placed on demand.
Each pidlist has its own lock, and that the pidlists themselves are passed
around in the seq_file's private pointer means we don't have to touch the
cgroup or its master list except when creating and destroying entries.

Signed-off-by: Ben Blum <bblum@google.com>
Signed-off-by: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |  34 +++++++++++++---
 kernel/cgroup.c        | 107 +++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 119 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2357733a0a80..88e863460726 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -141,15 +141,36 @@ enum {
 	CGRP_WAIT_ON_RMDIR,
 };
 
+/* which pidlist file are we talking about? */
+enum cgroup_filetype {
+	CGROUP_FILE_PROCS,
+	CGROUP_FILE_TASKS,
+};
+
+/*
+ * A pidlist is a list of pids that virtually represents the contents of one
+ * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
+ * a pair (one each for procs, tasks) for each pid namespace that's relevant
+ * to the cgroup.
+ */
 struct cgroup_pidlist {
-	/* protects the other fields */
-	struct rw_semaphore mutex;
+	/*
+	 * used to find which pidlist is wanted. doesn't change as long as
+	 * this particular list stays in the list.
+	 */
+	struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
 	/* array of xids */
 	pid_t *list;
 	/* how many elements the above list has */
 	int length;
 	/* how many files are using the current array */
 	int use_count;
+	/* each of these stored in a list by its cgroup */
+	struct list_head links;
+	/* pointer to the cgroup we belong to, for list removal purposes */
+	struct cgroup *owner;
+	/* protects the other fields */
+	struct rw_semaphore mutex;
 };
 
 struct cgroup {
@@ -190,9 +211,12 @@ struct cgroup {
 	 */
 	struct list_head release_list;
 
-	/* we will have two separate pidlists, one for pids (the tasks file)
-	 * and one for tgids (the procs file). */
-	struct cgroup_pidlist tasks, procs;
+	/*
+	 * list of pidlists, up to two for each namespace (one for procs, one
+	 * for tasks); created on demand.
+	 */
+	struct list_head pidlists;
+	struct mutex pidlist_mutex;
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a9433f50e53d..97194ba12014 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -776,6 +776,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 		 */
 		deactivate_super(cgrp->root->sb);
 
+		/*
+		 * if we're getting rid of the cgroup, refcount should ensure
+		 * that there are no pidlists left.
+		 */
+		BUG_ON(!list_empty(&cgrp->pidlists));
+
 		call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
 	}
 	iput(inode);
@@ -1121,8 +1127,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->children);
 	INIT_LIST_HEAD(&cgrp->css_sets);
 	INIT_LIST_HEAD(&cgrp->release_list);
-	init_rwsem(&(cgrp->tasks.mutex));
-	init_rwsem(&(cgrp->procs.mutex));
+	INIT_LIST_HEAD(&cgrp->pidlists);
+	mutex_init(&cgrp->pidlist_mutex);
 }
 
 static void init_cgroup_root(struct cgroupfs_root *root)
@@ -2395,10 +2401,60 @@ static int cmppid(const void *a, const void *b)
 	return *(pid_t *)a - *(pid_t *)b;
 }
 
+/*
+ * find the appropriate pidlist for our purpose (given procs vs tasks)
+ * returns with the lock on that pidlist already held, and takes care
+ * of the use count, or returns NULL with no locks held if we're out of
+ * memory.
+ */
+static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
+						  enum cgroup_filetype type)
+{
+	struct cgroup_pidlist *l;
+	/* don't need task_nsproxy() if we're looking at ourself */
+	struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns);
+	/*
+	 * We can't drop the pidlist_mutex before taking the l->mutex in case
+	 * the last ref-holder is trying to remove l from the list at the same
+	 * time. Holding the pidlist_mutex precludes somebody taking whichever
+	 * list we find out from under us - compare release_pid_array().
+	 */
+	mutex_lock(&cgrp->pidlist_mutex);
+	list_for_each_entry(l, &cgrp->pidlists, links) {
+		if (l->key.type == type && l->key.ns == ns) {
+			/* found a matching list - drop the extra refcount */
+			put_pid_ns(ns);
+			/* make sure l doesn't vanish out from under us */
+			down_write(&l->mutex);
+			mutex_unlock(&cgrp->pidlist_mutex);
+			l->use_count++;
+			return l;
+		}
+	}
+	/* entry not found; create a new one */
+	l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
+	if (!l) {
+		mutex_unlock(&cgrp->pidlist_mutex);
+		put_pid_ns(ns);
+		return l;
+	}
+	init_rwsem(&l->mutex);
+	down_write(&l->mutex);
+	l->key.type = type;
+	l->key.ns = ns;
+	l->use_count = 0; /* don't increment here */
+	l->list = NULL;
+	l->owner = cgrp;
+	list_add(&l->links, &cgrp->pidlists);
+	mutex_unlock(&cgrp->pidlist_mutex);
+	return l;
+}
+
 /*
  * Load a cgroup's pidarray with either procs' tgids or tasks' pids
  */
-static int pidlist_array_load(struct cgroup *cgrp, bool procs)
+static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
+			      struct cgroup_pidlist **lp)
 {
 	pid_t *array;
 	int length;
@@ -2423,7 +2479,10 @@ static int pidlist_array_load(struct cgroup *cgrp, bool procs)
 		if (unlikely(n == length))
 			break;
 		/* get tgid or pid for procs or tasks file respectively */
-		pid = (procs ? task_tgid_vnr(tsk) : task_pid_vnr(tsk));
+		if (type == CGROUP_FILE_PROCS)
+			pid = task_tgid_vnr(tsk);
+		else
+			pid = task_pid_vnr(tsk);
 		if (pid > 0) /* make sure to only use valid results */
 			array[n++] = pid;
 	}
@@ -2431,19 +2490,20 @@ static int pidlist_array_load(struct cgroup *cgrp, bool procs)
 	length = n;
 	/* now sort & (if procs) strip out duplicates */
 	sort(array, length, sizeof(pid_t), cmppid, NULL);
-	if (procs) {
+	if (type == CGROUP_FILE_PROCS)
 		length = pidlist_uniq(&array, length);
-		l = &(cgrp->procs);
-	} else {
-		l = &(cgrp->tasks);
+	l = cgroup_pidlist_find(cgrp, type);
+	if (!l) {
+		kfree(array);
+		return -ENOMEM;
 	}
-	/* store array in cgroup, freeing old if necessary */
-	down_write(&l->mutex);
+	/* store array, freeing old if necessary - lock already held */
 	kfree(l->list);
 	l->list = array;
 	l->length = length;
 	l->use_count++;
 	up_write(&l->mutex);
+	*lp = l;
 	return 0;
 }
 
@@ -2586,13 +2646,26 @@ static const struct seq_operations cgroup_pidlist_seq_operations = {
 
 static void cgroup_release_pid_array(struct cgroup_pidlist *l)
 {
+	/*
+	 * the case where we're the last user of this particular pidlist will
+	 * have us remove it from the cgroup's list, which entails taking the
+	 * mutex. since in pidlist_find the pidlist->lock depends on cgroup->
+	 * pidlist_mutex, we have to take pidlist_mutex first.
+	 */
+	mutex_lock(&l->owner->pidlist_mutex);
 	down_write(&l->mutex);
 	BUG_ON(!l->use_count);
 	if (!--l->use_count) {
+		/* we're the last user if refcount is 0; remove and free */
+		list_del(&l->links);
+		mutex_unlock(&l->owner->pidlist_mutex);
 		kfree(l->list);
-		l->list = NULL;
-		l->length = 0;
+		put_pid_ns(l->key.ns);
+		up_write(&l->mutex);
+		kfree(l);
+		return;
 	}
+	mutex_unlock(&l->owner->pidlist_mutex);
 	up_write(&l->mutex);
 }
 
@@ -2623,10 +2696,10 @@ static const struct file_operations cgroup_pidlist_operations = {
  * in the cgroup.
  */
 /* helper function for the two below it */
-static int cgroup_pidlist_open(struct file *file, bool procs)
+static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
 {
 	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-	struct cgroup_pidlist *l = (procs ? &cgrp->procs : &cgrp->tasks);
+	struct cgroup_pidlist *l;
 	int retval;
 
 	/* Nothing to do for write-only files */
@@ -2634,7 +2707,7 @@ static int cgroup_pidlist_open(struct file *file, bool procs)
 		return 0;
 
 	/* have the array populated */
-	retval = pidlist_array_load(cgrp, procs);
+	retval = pidlist_array_load(cgrp, type, &l);
 	if (retval)
 		return retval;
 	/* configure file information */
@@ -2650,11 +2723,11 @@ static int cgroup_pidlist_open(struct file *file, bool procs)
 }
 static int cgroup_tasks_open(struct inode *unused, struct file *file)
 {
-	return cgroup_pidlist_open(file, false);
+	return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
 }
 static int cgroup_procs_open(struct inode *unused, struct file *file)
 {
-	return cgroup_pidlist_open(file, true);
+	return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
 }
 
 static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
-- 
cgit v1.2.3


From c378369d8b4fa516ff2b1e79c3eded4e0e955ebb Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@google.com>
Date: Wed, 23 Sep 2009 15:56:29 -0700
Subject: cgroups: change css_set freeing mechanism to be under RCU

Changes css_set freeing mechanism to be under RCU

This is a prepatch for making the procs file writable. In order to free the
old css_sets for each task to be moved as they're being moved, the freeing
mechanism must be RCU-protected, or else we would have to have a call to
synchronize_rcu() for each task before freeing its old css_set.

Signed-off-by: Ben Blum <bblum@google.com>
Signed-off-by: Paul Menage <menage@google.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 3 +++
 kernel/cgroup.c        | 8 +++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 88e863460726..3ac78a2f4b5a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -260,6 +260,9 @@ struct css_set {
 	 * during subsystem registration (at boot time).
 	 */
 	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+	/* For RCU-protected deletion */
+	struct rcu_head rcu_head;
 };
 
 /*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3e356b05b2d5..bf8dd1a9f2d1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -267,6 +267,12 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
 	return &css_set_table[index];
 }
 
+static void free_css_set_rcu(struct rcu_head *obj)
+{
+	struct css_set *cg = container_of(obj, struct css_set, rcu_head);
+	kfree(cg);
+}
+
 /* We don't maintain the lists running through each css_set to its
  * task until after the first call to cgroup_iter_start(). This
  * reduces the fork()/exit() overhead for people who have cgroups
@@ -310,7 +316,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
 	}
 
 	write_unlock(&css_set_lock);
-	kfree(cg);
+	call_rcu(&cg->rcu_head, free_css_set_rcu);
 }
 
 /*
-- 
cgit v1.2.3


From be367d09927023d081f9199665c8500f69f14d22 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@google.com>
Date: Wed, 23 Sep 2009 15:56:31 -0700
Subject: cgroups: let ss->can_attach and ss->attach do whole threadgroups at a
 time

Alter the ss->can_attach and ss->attach functions to be able to deal with
a whole threadgroup at a time, for use in cgroup_attach_proc.  (This is a
pre-patch to cgroup-procs-writable.patch.)

Currently, new mode of the attach function can only tell the subsystem
about the old cgroup of the threadgroup leader.  No subsystem currently
needs that information for each thread that's being moved, but if one were
to be added (for example, one that counts tasks within a group) this bit
would need to be reworked a bit to tell the subsystem the right
information.

[hidave.darkstar@gmail.com: fix build]
Signed-off-by: Ben Blum <bblum@google.com>
Signed-off-by: Paul Menage <menage@google.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cgroups.txt | 12 +++++--
 include/linux/cgroup.h            |  7 +++--
 kernel/cgroup.c                   |  4 +--
 kernel/cgroup_freezer.c           | 15 ++++++++-
 kernel/cpuset.c                   | 66 ++++++++++++++++++++++++++++++---------
 kernel/ns_cgroup.c                | 16 ++++++++--
 kernel/sched.c                    | 35 +++++++++++++++++++--
 mm/memcontrol.c                   |  3 +-
 security/device_cgroup.c          |  3 +-
 9 files changed, 131 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 4bccfc19196b..455d4e6d346d 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -521,7 +521,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
 called multiple times against a cgroup.
 
 int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	       struct task_struct *task)
+	       struct task_struct *task, bool threadgroup)
 (cgroup_mutex held by caller)
 
 Called prior to moving a task into a cgroup; if the subsystem
@@ -529,14 +529,20 @@ returns an error, this will abort the attach operation.  If a NULL
 task is passed, then a successful result indicates that *any*
 unspecified task can be moved into the cgroup. Note that this isn't
 called on a fork. If this method returns 0 (success) then this should
-remain valid while the caller holds cgroup_mutex.
+remain valid while the caller holds cgroup_mutex. If threadgroup is
+true, then a successful result indicates that all threads in the given
+thread's threadgroup can be moved together.
 
 void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	    struct cgroup *old_cgrp, struct task_struct *task)
+	    struct cgroup *old_cgrp, struct task_struct *task,
+	    bool threadgroup)
 (cgroup_mutex held by caller)
 
 Called after the task has been attached to the cgroup, to allow any
 post-attachment activity that requires memory allocations or blocking.
+If threadgroup is true, the subsystem should take care of all threads
+in the specified thread's threadgroup. Currently does not support any
+subsystem that might need the old_cgrp for every thread in the group.
 
 void fork(struct cgroup_subsy *ss, struct task_struct *task)
 
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 3ac78a2f4b5a..b62bb9294d0c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -425,10 +425,11 @@ struct cgroup_subsys {
 						  struct cgroup *cgrp);
 	int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
-	int (*can_attach)(struct cgroup_subsys *ss,
-			  struct cgroup *cgrp, struct task_struct *tsk);
+	int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
+			  struct task_struct *tsk, bool threadgroup);
 	void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-			struct cgroup *old_cgrp, struct task_struct *tsk);
+			struct cgroup *old_cgrp, struct task_struct *tsk,
+			bool threadgroup);
 	void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
 	void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
 	int (*populate)(struct cgroup_subsys *ss,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index bf8dd1a9f2d1..7ccba4bc5e3b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1552,7 +1552,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 
 	for_each_subsys(root, ss) {
 		if (ss->can_attach) {
-			retval = ss->can_attach(ss, cgrp, tsk);
+			retval = ss->can_attach(ss, cgrp, tsk, false);
 			if (retval)
 				return retval;
 		}
@@ -1590,7 +1590,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 
 	for_each_subsys(root, ss) {
 		if (ss->attach)
-			ss->attach(ss, cgrp, oldcgrp, tsk);
+			ss->attach(ss, cgrp, oldcgrp, tsk, false);
 	}
 	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
 	synchronize_rcu();
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index fb249e2bcada..59e9ef6aab40 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -159,7 +159,7 @@ static bool is_task_frozen_enough(struct task_struct *task)
  */
 static int freezer_can_attach(struct cgroup_subsys *ss,
 			      struct cgroup *new_cgroup,
-			      struct task_struct *task)
+			      struct task_struct *task, bool threadgroup)
 {
 	struct freezer *freezer;
 
@@ -177,6 +177,19 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
 	if (freezer->state == CGROUP_FROZEN)
 		return -EBUSY;
 
+	if (threadgroup) {
+		struct task_struct *c;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+			if (is_task_frozen_enough(c)) {
+				rcu_read_unlock();
+				return -EBUSY;
+			}
+		}
+		rcu_read_unlock();
+	}
+
 	return 0;
 }
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 7e75a41bd508..b5cb469d2545 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1324,9 +1324,10 @@ static int fmeter_getrate(struct fmeter *fmp)
 static cpumask_var_t cpus_attach;
 
 /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
-static int cpuset_can_attach(struct cgroup_subsys *ss,
-			     struct cgroup *cont, struct task_struct *tsk)
+static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+			     struct task_struct *tsk, bool threadgroup)
 {
+	int ret;
 	struct cpuset *cs = cgroup_cs(cont);
 
 	if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1343,18 +1344,51 @@ static int cpuset_can_attach(struct cgroup_subsys *ss,
 	if (tsk->flags & PF_THREAD_BOUND)
 		return -EINVAL;
 
-	return security_task_setscheduler(tsk, 0, NULL);
+	ret = security_task_setscheduler(tsk, 0, NULL);
+	if (ret)
+		return ret;
+	if (threadgroup) {
+		struct task_struct *c;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+			ret = security_task_setscheduler(c, 0, NULL);
+			if (ret) {
+				rcu_read_unlock();
+				return ret;
+			}
+		}
+		rcu_read_unlock();
+	}
+	return 0;
+}
+
+static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
+			       struct cpuset *cs)
+{
+	int err;
+	/*
+	 * can_attach beforehand should guarantee that this doesn't fail.
+	 * TODO: have a better way to handle failure here
+	 */
+	err = set_cpus_allowed_ptr(tsk, cpus_attach);
+	WARN_ON_ONCE(err);
+
+	task_lock(tsk);
+	cpuset_change_task_nodemask(tsk, to);
+	task_unlock(tsk);
+	cpuset_update_task_spread_flag(cs, tsk);
+
 }
 
-static void cpuset_attach(struct cgroup_subsys *ss,
-			  struct cgroup *cont, struct cgroup *oldcont,
-			  struct task_struct *tsk)
+static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+			  struct cgroup *oldcont, struct task_struct *tsk,
+			  bool threadgroup)
 {
 	nodemask_t from, to;
 	struct mm_struct *mm;
 	struct cpuset *cs = cgroup_cs(cont);
 	struct cpuset *oldcs = cgroup_cs(oldcont);
-	int err;
 
 	if (cs == &top_cpuset) {
 		cpumask_copy(cpus_attach, cpu_possible_mask);
@@ -1363,15 +1397,19 @@ static void cpuset_attach(struct cgroup_subsys *ss,
 		guarantee_online_cpus(cs, cpus_attach);
 		guarantee_online_mems(cs, &to);
 	}
-	err = set_cpus_allowed_ptr(tsk, cpus_attach);
-	if (err)
-		return;
 
-	task_lock(tsk);
-	cpuset_change_task_nodemask(tsk, &to);
-	task_unlock(tsk);
-	cpuset_update_task_spread_flag(cs, tsk);
+	/* do per-task migration stuff possibly for each in the threadgroup */
+	cpuset_attach_task(tsk, &to, cs);
+	if (threadgroup) {
+		struct task_struct *c;
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+			cpuset_attach_task(c, &to, cs);
+		}
+		rcu_read_unlock();
+	}
 
+	/* change mm; only needs to be done once even if threadgroup */
 	from = oldcs->mems_allowed;
 	to = cs->mems_allowed;
 	mm = get_task_mm(tsk);
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
index 5aa854f9e5ae..2a5dfec8efe0 100644
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -42,8 +42,8 @@ int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
  *       (hence either you are in the same cgroup as task, or in an
  *        ancestor cgroup thereof)
  */
-static int ns_can_attach(struct cgroup_subsys *ss,
-		struct cgroup *new_cgroup, struct task_struct *task)
+static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup,
+			 struct task_struct *task, bool threadgroup)
 {
 	if (current != task) {
 		if (!capable(CAP_SYS_ADMIN))
@@ -56,6 +56,18 @@ static int ns_can_attach(struct cgroup_subsys *ss,
 	if (!cgroup_is_descendant(new_cgroup, task))
 		return -EPERM;
 
+	if (threadgroup) {
+		struct task_struct *c;
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+			if (!cgroup_is_descendant(new_cgroup, c)) {
+				rcu_read_unlock();
+				return -EPERM;
+			}
+		}
+		rcu_read_unlock();
+	}
+
 	return 0;
 }
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 2f76e06bea58..0d0361b9dbb3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -10377,8 +10377,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
 }
 
 static int
-cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-		      struct task_struct *tsk)
+cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 #ifdef CONFIG_RT_GROUP_SCHED
 	if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
@@ -10388,15 +10387,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 	if (tsk->sched_class != &fair_sched_class)
 		return -EINVAL;
 #endif
+	return 0;
+}
 
+static int
+cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+		      struct task_struct *tsk, bool threadgroup)
+{
+	int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
+	if (retval)
+		return retval;
+	if (threadgroup) {
+		struct task_struct *c;
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+			retval = cpu_cgroup_can_attach_task(cgrp, c);
+			if (retval) {
+				rcu_read_unlock();
+				return retval;
+			}
+		}
+		rcu_read_unlock();
+	}
 	return 0;
 }
 
 static void
 cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-			struct cgroup *old_cont, struct task_struct *tsk)
+		  struct cgroup *old_cont, struct task_struct *tsk,
+		  bool threadgroup)
 {
 	sched_move_task(tsk);
+	if (threadgroup) {
+		struct task_struct *c;
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+			sched_move_task(c);
+		}
+		rcu_read_unlock();
+	}
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9b10d8753784..cf2e717f5c12 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2612,7 +2612,8 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 				struct cgroup *cont,
 				struct cgroup *old_cont,
-				struct task_struct *p)
+				struct task_struct *p,
+				bool threadgroup)
 {
 	mutex_lock(&memcg_tasklist);
 	/*
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index b8186bac8b7e..6cf8fd2b79e8 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -61,7 +61,8 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
 struct cgroup_subsys devices_subsys;
 
 static int devcgroup_can_attach(struct cgroup_subsys *ss,
-		struct cgroup *new_cgroup, struct task_struct *task)
+		struct cgroup *new_cgroup, struct task_struct *task,
+		bool threadgroup)
 {
 	if (current != task && !capable(CAP_SYS_ADMIN))
 			return -EPERM;
-- 
cgit v1.2.3


From 4b3bde4c983de36c59e6c1a24701f6fe816f9f55 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Wed, 23 Sep 2009 15:56:32 -0700
Subject: memcg: remove the overhead associated with the root cgroup

Change the memory cgroup to remove the overhead associated with accounting
all pages in the root cgroup.  As a side-effect, we can no longer set a
memory hard limit in the root cgroup.

A new flag to track whether the page has been accounted or not has been
added as well.  Flags are now set atomically for page_cgroup,
pcg_default_flags is now obsolete and removed.

[akpm@linux-foundation.org: fix a few documentation glitches]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/memory.txt |  4 +++
 include/linux/page_cgroup.h      | 13 ++++++++++
 mm/memcontrol.c                  | 54 +++++++++++++++++++++++++++++-----------
 3 files changed, 57 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 23d1262c0775..ab0a02172cf4 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -179,6 +179,9 @@ The reclaim algorithm has not been modified for cgroups, except that
 pages that are selected for reclaiming come from the per cgroup LRU
 list.
 
+NOTE: Reclaim does not work for the root cgroup, since we cannot set any
+limits on the root cgroup.
+
 2. Locking
 
 The memory controller uses the following hierarchy
@@ -210,6 +213,7 @@ We can alter the memory limit:
 NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
 mega or gigabytes.
 NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
+NOTE: We cannot set limits on the root cgroup any more.
 
 # cat /cgroups/0/memory.limit_in_bytes
 4194304
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index ada779f24178..4b938d4f3ac2 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -38,6 +38,7 @@ enum {
 	PCG_LOCK,  /* page cgroup is locked */
 	PCG_CACHE, /* charged as cache */
 	PCG_USED, /* this object is in use. */
+	PCG_ACCT_LRU, /* page has been accounted for */
 };
 
 #define TESTPCGFLAG(uname, lname)			\
@@ -52,11 +53,23 @@ static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
 static inline void ClearPageCgroup##uname(struct page_cgroup *pc)	\
 	{ clear_bit(PCG_##lname, &pc->flags);  }
 
+#define TESTCLEARPCGFLAG(uname, lname)			\
+static inline int TestClearPageCgroup##uname(struct page_cgroup *pc)	\
+	{ return test_and_clear_bit(PCG_##lname, &pc->flags);  }
+
 /* Cache flag is set only once (at allocation) */
 TESTPCGFLAG(Cache, CACHE)
+CLEARPCGFLAG(Cache, CACHE)
+SETPCGFLAG(Cache, CACHE)
 
 TESTPCGFLAG(Used, USED)
 CLEARPCGFLAG(Used, USED)
+SETPCGFLAG(Used, USED)
+
+SETPCGFLAG(AcctLRU, ACCT_LRU)
+CLEARPCGFLAG(AcctLRU, ACCT_LRU)
+TESTPCGFLAG(AcctLRU, ACCT_LRU)
+TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
 
 static inline int page_cgroup_nid(struct page_cgroup *pc)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cf2e717f5c12..b0757660663f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -43,6 +43,7 @@
 
 struct cgroup_subsys mem_cgroup_subsys __read_mostly;
 #define MEM_CGROUP_RECLAIM_RETRIES	5
+struct mem_cgroup *root_mem_cgroup __read_mostly;
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
@@ -200,13 +201,8 @@ enum charge_type {
 #define PCGF_CACHE	(1UL << PCG_CACHE)
 #define PCGF_USED	(1UL << PCG_USED)
 #define PCGF_LOCK	(1UL << PCG_LOCK)
-static const unsigned long
-pcg_default_flags[NR_CHARGE_TYPE] = {
-	PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */
-	PCGF_USED | PCGF_LOCK, /* Anon */
-	PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
-	0, /* FORCE */
-};
+/* Not used, but added here for completeness */
+#define PCGF_ACCT	(1UL << PCG_ACCT)
 
 /* for encoding cft->private value on file */
 #define _MEM			(0)
@@ -354,6 +350,11 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
 	return ret;
 }
 
+static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
+{
+	return (mem == root_mem_cgroup);
+}
+
 /*
  * Following LRU functions are allowed to be used without PCG_LOCK.
  * Operations are called by routine of global LRU independently from memcg.
@@ -371,22 +372,24 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
 void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
 {
 	struct page_cgroup *pc;
-	struct mem_cgroup *mem;
 	struct mem_cgroup_per_zone *mz;
 
 	if (mem_cgroup_disabled())
 		return;
 	pc = lookup_page_cgroup(page);
 	/* can happen while we handle swapcache. */
-	if (list_empty(&pc->lru) || !pc->mem_cgroup)
+	if (!TestClearPageCgroupAcctLRU(pc))
 		return;
+	VM_BUG_ON(!pc->mem_cgroup);
 	/*
 	 * We don't check PCG_USED bit. It's cleared when the "page" is finally
 	 * removed from global LRU.
 	 */
 	mz = page_cgroup_zoneinfo(pc);
-	mem = pc->mem_cgroup;
 	MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+	if (mem_cgroup_is_root(pc->mem_cgroup))
+		return;
+	VM_BUG_ON(list_empty(&pc->lru));
 	list_del_init(&pc->lru);
 	return;
 }
@@ -410,8 +413,8 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
 	 * For making pc->mem_cgroup visible, insert smp_rmb() here.
 	 */
 	smp_rmb();
-	/* unused page is not rotated. */
-	if (!PageCgroupUsed(pc))
+	/* unused or root page is not rotated. */
+	if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup))
 		return;
 	mz = page_cgroup_zoneinfo(pc);
 	list_move(&pc->lru, &mz->lists[lru]);
@@ -425,6 +428,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
 	if (mem_cgroup_disabled())
 		return;
 	pc = lookup_page_cgroup(page);
+	VM_BUG_ON(PageCgroupAcctLRU(pc));
 	/*
 	 * Used bit is set without atomic ops but after smp_wmb().
 	 * For making pc->mem_cgroup visible, insert smp_rmb() here.
@@ -435,6 +439,9 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
 
 	mz = page_cgroup_zoneinfo(pc);
 	MEM_CGROUP_ZSTAT(mz, lru) += 1;
+	SetPageCgroupAcctLRU(pc);
+	if (mem_cgroup_is_root(pc->mem_cgroup))
+		return;
 	list_add(&pc->lru, &mz->lists[lru]);
 }
 
@@ -469,7 +476,7 @@ static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page)
 
 	spin_lock_irqsave(&zone->lru_lock, flags);
 	/* link when the page is linked to LRU but page_cgroup isn't */
-	if (PageLRU(page) && list_empty(&pc->lru))
+	if (PageLRU(page) && !PageCgroupAcctLRU(pc))
 		mem_cgroup_add_lru_list(page, page_lru(page));
 	spin_unlock_irqrestore(&zone->lru_lock, flags);
 }
@@ -1125,9 +1132,22 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
 		css_put(&mem->css);
 		return;
 	}
+
 	pc->mem_cgroup = mem;
 	smp_wmb();
-	pc->flags = pcg_default_flags[ctype];
+	switch (ctype) {
+	case MEM_CGROUP_CHARGE_TYPE_CACHE:
+	case MEM_CGROUP_CHARGE_TYPE_SHMEM:
+		SetPageCgroupCache(pc);
+		SetPageCgroupUsed(pc);
+		break;
+	case MEM_CGROUP_CHARGE_TYPE_MAPPED:
+		ClearPageCgroupCache(pc);
+		SetPageCgroupUsed(pc);
+		break;
+	default:
+		break;
+	}
 
 	mem_cgroup_charge_statistics(mem, pc, true);
 
@@ -2083,6 +2103,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
 	name = MEMFILE_ATTR(cft->private);
 	switch (name) {
 	case RES_LIMIT:
+		if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
+			ret = -EINVAL;
+			break;
+		}
 		/* This function does all necessary parse...reuse it */
 		ret = res_counter_memparse_write_strategy(buffer, &val);
 		if (ret)
@@ -2549,6 +2573,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 	if (cont->parent == NULL) {
 		enable_swap_cgroup();
 		parent = NULL;
+		root_mem_cgroup = mem;
 	} else {
 		parent = mem_cgroup_from_cont(cont->parent);
 		mem->use_hierarchy = parent->use_hierarchy;
@@ -2577,6 +2602,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 	return &mem->css;
 free_out:
 	__mem_cgroup_free(mem);
+	root_mem_cgroup = NULL;
 	return ERR_PTR(error);
 }
 
-- 
cgit v1.2.3


From 296c81d89f4f14269f7346f81442910158c0a83a Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Wed, 23 Sep 2009 15:56:36 -0700
Subject: memory controller: soft limit interface

Add an interface to allow get/set of soft limits.  Soft limits for memory
plus swap controller (memsw) is currently not supported.  Resource
counters have been enhanced to support soft limits and new type
RES_SOFT_LIMIT has been added.  Unlike hard limits, soft limits can be
directly set and do not need any reclaim or checks before setting them to
a newer value.

Kamezawa-San raised a question as to whether soft limit should belong to
res_counter.  Since all resources understand the basic concepts of hard
and soft limits, it is justified to add soft limits here.  Soft limits are
a generic resource usage feature, even file system quotas support soft
limits.

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 58 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/res_counter.c        |  3 +++
 mm/memcontrol.c             | 20 ++++++++++++++++
 3 files changed, 81 insertions(+)

(limited to 'include')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 511f42fc6816..fcb9884df618 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -34,6 +34,10 @@ struct res_counter {
 	 * the limit that usage cannot exceed
 	 */
 	unsigned long long limit;
+	/*
+	 * the limit that usage can be exceed
+	 */
+	unsigned long long soft_limit;
 	/*
 	 * the number of unsuccessful attempts to consume the resource
 	 */
@@ -87,6 +91,7 @@ enum {
 	RES_MAX_USAGE,
 	RES_LIMIT,
 	RES_FAILCNT,
+	RES_SOFT_LIMIT,
 };
 
 /*
@@ -132,6 +137,36 @@ static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
 	return false;
 }
 
+static inline bool res_counter_soft_limit_check_locked(struct res_counter *cnt)
+{
+	if (cnt->usage < cnt->soft_limit)
+		return true;
+
+	return false;
+}
+
+/**
+ * Get the difference between the usage and the soft limit
+ * @cnt: The counter
+ *
+ * Returns 0 if usage is less than or equal to soft limit
+ * The difference between usage and soft limit, otherwise.
+ */
+static inline unsigned long long
+res_counter_soft_limit_excess(struct res_counter *cnt)
+{
+	unsigned long long excess;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cnt->lock, flags);
+	if (cnt->usage <= cnt->soft_limit)
+		excess = 0;
+	else
+		excess = cnt->usage - cnt->soft_limit;
+	spin_unlock_irqrestore(&cnt->lock, flags);
+	return excess;
+}
+
 /*
  * Helper function to detect if the cgroup is within it's limit or
  * not. It's currently called from cgroup_rss_prepare()
@@ -147,6 +182,17 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt)
 	return ret;
 }
 
+static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cnt->lock, flags);
+	ret = res_counter_soft_limit_check_locked(cnt);
+	spin_unlock_irqrestore(&cnt->lock, flags);
+	return ret;
+}
+
 static inline void res_counter_reset_max(struct res_counter *cnt)
 {
 	unsigned long flags;
@@ -180,4 +226,16 @@ static inline int res_counter_set_limit(struct res_counter *cnt,
 	return ret;
 }
 
+static inline int
+res_counter_set_soft_limit(struct res_counter *cnt,
+				unsigned long long soft_limit)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cnt->lock, flags);
+	cnt->soft_limit = soft_limit;
+	spin_unlock_irqrestore(&cnt->lock, flags);
+	return 0;
+}
+
 #endif
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index e1338f074314..bcdabf37c40b 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -19,6 +19,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent)
 {
 	spin_lock_init(&counter->lock);
 	counter->limit = RESOURCE_MAX;
+	counter->soft_limit = RESOURCE_MAX;
 	counter->parent = parent;
 }
 
@@ -101,6 +102,8 @@ res_counter_member(struct res_counter *counter, int member)
 		return &counter->limit;
 	case RES_FAILCNT:
 		return &counter->failcnt;
+	case RES_SOFT_LIMIT:
+		return &counter->soft_limit;
 	};
 
 	BUG();
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index eb9571815f0c..4ad3e6be045d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2123,6 +2123,20 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
 		else
 			ret = mem_cgroup_resize_memsw_limit(memcg, val);
 		break;
+	case RES_SOFT_LIMIT:
+		ret = res_counter_memparse_write_strategy(buffer, &val);
+		if (ret)
+			break;
+		/*
+		 * For memsw, soft limits are hard to implement in terms
+		 * of semantics, for now, we support soft limits for
+		 * control without swap
+		 */
+		if (type == _MEM)
+			ret = res_counter_set_soft_limit(&memcg->res, val);
+		else
+			ret = -EINVAL;
+		break;
 	default:
 		ret = -EINVAL; /* should be BUG() ? */
 		break;
@@ -2375,6 +2389,12 @@ static struct cftype mem_cgroup_files[] = {
 		.write_string = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read,
 	},
+	{
+		.name = "soft_limit_in_bytes",
+		.private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
+		.write_string = mem_cgroup_write,
+		.read_u64 = mem_cgroup_read,
+	},
 	{
 		.name = "failcnt",
 		.private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
-- 
cgit v1.2.3


From f64c3f54940d6929a2b6dcffaab942bd62be2e66 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Wed, 23 Sep 2009 15:56:37 -0700
Subject: memory controller: soft limit organize cgroups

Organize cgroups over soft limit in a RB-Tree

Introduce an RB-Tree for storing memory cgroups that are over their soft
limit.  The overall goal is to

1. Add a memory cgroup to the RB-Tree when the soft limit is exceeded.
   We are careful about updates, updates take place only after a particular
   time interval has passed
2. We remove the node from the RB-Tree when the usage goes below the soft
   limit

The next set of patches will exploit the RB-Tree to get the group that is
over its soft limit by the largest amount and reclaim from it, when we
face memory contention.

[hugh.dickins@tiscali.co.uk: CONFIG_CGROUP_MEM_RES_CTLR=y CONFIG_PREEMPT=y fails to boot]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h |   6 +-
 kernel/res_counter.c        |  18 ++-
 mm/memcontrol.c             | 300 +++++++++++++++++++++++++++++++++++++-------
 3 files changed, 277 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index fcb9884df618..731af71cddc9 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -114,7 +114,8 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
 int __must_check res_counter_charge_locked(struct res_counter *counter,
 		unsigned long val);
 int __must_check res_counter_charge(struct res_counter *counter,
-		unsigned long val, struct res_counter **limit_fail_at);
+		unsigned long val, struct res_counter **limit_fail_at,
+		struct res_counter **soft_limit_at);
 
 /*
  * uncharge - tell that some portion of the resource is released
@@ -127,7 +128,8 @@ int __must_check res_counter_charge(struct res_counter *counter,
  */
 
 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
-void res_counter_uncharge(struct res_counter *counter, unsigned long val);
+void res_counter_uncharge(struct res_counter *counter, unsigned long val,
+				bool *was_soft_limit_excess);
 
 static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
 {
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index bcdabf37c40b..88faec23e833 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -37,17 +37,27 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
 }
 
 int res_counter_charge(struct res_counter *counter, unsigned long val,
-			struct res_counter **limit_fail_at)
+			struct res_counter **limit_fail_at,
+			struct res_counter **soft_limit_fail_at)
 {
 	int ret;
 	unsigned long flags;
 	struct res_counter *c, *u;
 
 	*limit_fail_at = NULL;
+	if (soft_limit_fail_at)
+		*soft_limit_fail_at = NULL;
 	local_irq_save(flags);
 	for (c = counter; c != NULL; c = c->parent) {
 		spin_lock(&c->lock);
 		ret = res_counter_charge_locked(c, val);
+		/*
+		 * With soft limits, we return the highest ancestor
+		 * that exceeds its soft limit
+		 */
+		if (soft_limit_fail_at &&
+			!res_counter_soft_limit_check_locked(c))
+			*soft_limit_fail_at = c;
 		spin_unlock(&c->lock);
 		if (ret < 0) {
 			*limit_fail_at = c;
@@ -75,7 +85,8 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
 	counter->usage -= val;
 }
 
-void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+void res_counter_uncharge(struct res_counter *counter, unsigned long val,
+				bool *was_soft_limit_excess)
 {
 	unsigned long flags;
 	struct res_counter *c;
@@ -83,6 +94,9 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val)
 	local_irq_save(flags);
 	for (c = counter; c != NULL; c = c->parent) {
 		spin_lock(&c->lock);
+		if (was_soft_limit_excess)
+			*was_soft_limit_excess =
+				!res_counter_soft_limit_check_locked(c);
 		res_counter_uncharge_locked(c, val);
 		spin_unlock(&c->lock);
 	}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4ad3e6be045d..0ed325943cd1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -29,6 +29,7 @@
 #include <linux/rcupdate.h>
 #include <linux/limits.h>
 #include <linux/mutex.h>
+#include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/spinlock.h>
@@ -54,6 +55,7 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/
 #endif
 
 static DEFINE_MUTEX(memcg_tasklist);	/* can be hold under cgroup_mutex */
+#define SOFTLIMIT_EVENTS_THRESH (1000)
 
 /*
  * Statistics for memory cgroup.
@@ -67,6 +69,7 @@ enum mem_cgroup_stat_index {
 	MEM_CGROUP_STAT_MAPPED_FILE,  /* # of pages charged as file rss */
 	MEM_CGROUP_STAT_PGPGIN_COUNT,	/* # of pages paged in */
 	MEM_CGROUP_STAT_PGPGOUT_COUNT,	/* # of pages paged out */
+	MEM_CGROUP_STAT_EVENTS,	/* sum of pagein + pageout for internal use */
 
 	MEM_CGROUP_STAT_NSTATS,
 };
@@ -79,6 +82,20 @@ struct mem_cgroup_stat {
 	struct mem_cgroup_stat_cpu cpustat[0];
 };
 
+static inline void
+__mem_cgroup_stat_reset_safe(struct mem_cgroup_stat_cpu *stat,
+				enum mem_cgroup_stat_index idx)
+{
+	stat->count[idx] = 0;
+}
+
+static inline s64
+__mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat,
+				enum mem_cgroup_stat_index idx)
+{
+	return stat->count[idx];
+}
+
 /*
  * For accounting under irq disable, no need for increment preempt count.
  */
@@ -118,6 +135,10 @@ struct mem_cgroup_per_zone {
 	unsigned long		count[NR_LRU_LISTS];
 
 	struct zone_reclaim_stat reclaim_stat;
+	struct rb_node		tree_node;	/* RB tree node */
+	unsigned long long	usage_in_excess;/* Set to the value by which */
+						/* the soft limit is exceeded*/
+	bool			on_tree;
 };
 /* Macro for accessing counter */
 #define MEM_CGROUP_ZSTAT(mz, idx)	((mz)->count[(idx)])
@@ -130,6 +151,26 @@ struct mem_cgroup_lru_info {
 	struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES];
 };
 
+/*
+ * Cgroups above their limits are maintained in a RB-Tree, independent of
+ * their hierarchy representation
+ */
+
+struct mem_cgroup_tree_per_zone {
+	struct rb_root rb_root;
+	spinlock_t lock;
+};
+
+struct mem_cgroup_tree_per_node {
+	struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
+};
+
+struct mem_cgroup_tree {
+	struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
+};
+
+static struct mem_cgroup_tree soft_limit_tree __read_mostly;
+
 /*
  * The memory controller data structure. The memory controller controls both
  * page cache and RSS per cgroup. We would eventually like to provide
@@ -215,6 +256,150 @@ static void mem_cgroup_get(struct mem_cgroup *mem);
 static void mem_cgroup_put(struct mem_cgroup *mem);
 static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
 
+static struct mem_cgroup_per_zone *
+mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
+{
+	return &mem->info.nodeinfo[nid]->zoneinfo[zid];
+}
+
+static struct mem_cgroup_per_zone *
+page_cgroup_zoneinfo(struct page_cgroup *pc)
+{
+	struct mem_cgroup *mem = pc->mem_cgroup;
+	int nid = page_cgroup_nid(pc);
+	int zid = page_cgroup_zid(pc);
+
+	if (!mem)
+		return NULL;
+
+	return mem_cgroup_zoneinfo(mem, nid, zid);
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_node_zone(int nid, int zid)
+{
+	return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_from_page(struct page *page)
+{
+	int nid = page_to_nid(page);
+	int zid = page_zonenum(page);
+
+	return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static void
+mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+				struct mem_cgroup_per_zone *mz,
+				struct mem_cgroup_tree_per_zone *mctz)
+{
+	struct rb_node **p = &mctz->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct mem_cgroup_per_zone *mz_node;
+
+	if (mz->on_tree)
+		return;
+
+	mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res);
+	spin_lock(&mctz->lock);
+	while (*p) {
+		parent = *p;
+		mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
+					tree_node);
+		if (mz->usage_in_excess < mz_node->usage_in_excess)
+			p = &(*p)->rb_left;
+		/*
+		 * We can't avoid mem cgroups that are over their soft
+		 * limit by the same amount
+		 */
+		else if (mz->usage_in_excess >= mz_node->usage_in_excess)
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&mz->tree_node, parent, p);
+	rb_insert_color(&mz->tree_node, &mctz->rb_root);
+	mz->on_tree = true;
+	spin_unlock(&mctz->lock);
+}
+
+static void
+mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+				struct mem_cgroup_per_zone *mz,
+				struct mem_cgroup_tree_per_zone *mctz)
+{
+	spin_lock(&mctz->lock);
+	rb_erase(&mz->tree_node, &mctz->rb_root);
+	mz->on_tree = false;
+	spin_unlock(&mctz->lock);
+}
+
+static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
+{
+	bool ret = false;
+	int cpu;
+	s64 val;
+	struct mem_cgroup_stat_cpu *cpustat;
+
+	cpu = get_cpu();
+	cpustat = &mem->stat.cpustat[cpu];
+	val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_EVENTS);
+	if (unlikely(val > SOFTLIMIT_EVENTS_THRESH)) {
+		__mem_cgroup_stat_reset_safe(cpustat, MEM_CGROUP_STAT_EVENTS);
+		ret = true;
+	}
+	put_cpu();
+	return ret;
+}
+
+static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
+{
+	unsigned long long prev_usage_in_excess, new_usage_in_excess;
+	bool updated_tree = false;
+	struct mem_cgroup_per_zone *mz;
+	struct mem_cgroup_tree_per_zone *mctz;
+
+	mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page));
+	mctz = soft_limit_tree_from_page(page);
+
+	/*
+	 * We do updates in lazy mode, mem's are removed
+	 * lazily from the per-zone, per-node rb tree
+	 */
+	prev_usage_in_excess = mz->usage_in_excess;
+
+	new_usage_in_excess = res_counter_soft_limit_excess(&mem->res);
+	if (prev_usage_in_excess) {
+		mem_cgroup_remove_exceeded(mem, mz, mctz);
+		updated_tree = true;
+	}
+	if (!new_usage_in_excess)
+		goto done;
+	mem_cgroup_insert_exceeded(mem, mz, mctz);
+
+done:
+	if (updated_tree) {
+		spin_lock(&mctz->lock);
+		mz->usage_in_excess = new_usage_in_excess;
+		spin_unlock(&mctz->lock);
+	}
+}
+
+static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
+{
+	int node, zone;
+	struct mem_cgroup_per_zone *mz;
+	struct mem_cgroup_tree_per_zone *mctz;
+
+	for_each_node_state(node, N_POSSIBLE) {
+		for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+			mz = mem_cgroup_zoneinfo(mem, node, zone);
+			mctz = soft_limit_tree_node_zone(node, zone);
+			mem_cgroup_remove_exceeded(mem, mz, mctz);
+		}
+	}
+}
+
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 					 struct page_cgroup *pc,
 					 bool charge)
@@ -236,28 +421,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 	else
 		__mem_cgroup_stat_add_safe(cpustat,
 				MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
+	__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_EVENTS, 1);
 	put_cpu();
 }
 
-static struct mem_cgroup_per_zone *
-mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
-{
-	return &mem->info.nodeinfo[nid]->zoneinfo[zid];
-}
-
-static struct mem_cgroup_per_zone *
-page_cgroup_zoneinfo(struct page_cgroup *pc)
-{
-	struct mem_cgroup *mem = pc->mem_cgroup;
-	int nid = page_cgroup_nid(pc);
-	int zid = page_cgroup_zid(pc);
-
-	if (!mem)
-		return NULL;
-
-	return mem_cgroup_zoneinfo(mem, nid, zid);
-}
-
 static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
 					enum lru_list idx)
 {
@@ -972,11 +1139,11 @@ done:
  */
 static int __mem_cgroup_try_charge(struct mm_struct *mm,
 			gfp_t gfp_mask, struct mem_cgroup **memcg,
-			bool oom)
+			bool oom, struct page *page)
 {
-	struct mem_cgroup *mem, *mem_over_limit;
+	struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit;
 	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
-	struct res_counter *fail_res;
+	struct res_counter *fail_res, *soft_fail_res = NULL;
 
 	if (unlikely(test_thread_flag(TIF_MEMDIE))) {
 		/* Don't account this! */
@@ -1006,16 +1173,17 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		int ret;
 		bool noswap = false;
 
-		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
+		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
+						&soft_fail_res);
 		if (likely(!ret)) {
 			if (!do_swap_account)
 				break;
 			ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
-							&fail_res);
+							&fail_res, NULL);
 			if (likely(!ret))
 				break;
 			/* mem+swap counter fails */
-			res_counter_uncharge(&mem->res, PAGE_SIZE);
+			res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
 			noswap = true;
 			mem_over_limit = mem_cgroup_from_res_counter(fail_res,
 									memsw);
@@ -1053,13 +1221,23 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 			goto nomem;
 		}
 	}
+	/*
+	 * Insert just the ancestor, we should trickle down to the correct
+	 * cgroup for reclaim, since the other nodes will be below their
+	 * soft limit
+	 */
+	if (soft_fail_res) {
+		mem_over_soft_limit =
+			mem_cgroup_from_res_counter(soft_fail_res, res);
+		if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
+			mem_cgroup_update_tree(mem_over_soft_limit, page);
+	}
 	return 0;
 nomem:
 	css_put(&mem->css);
 	return -ENOMEM;
 }
 
-
 /*
  * A helper function to get mem_cgroup from ID. must be called under
  * rcu_read_lock(). The caller must check css_is_removed() or some if
@@ -1126,9 +1304,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
 	lock_page_cgroup(pc);
 	if (unlikely(PageCgroupUsed(pc))) {
 		unlock_page_cgroup(pc);
-		res_counter_uncharge(&mem->res, PAGE_SIZE);
+		res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
 		if (do_swap_account)
-			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
 		css_put(&mem->css);
 		return;
 	}
@@ -1205,7 +1383,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	if (pc->mem_cgroup != from)
 		goto out;
 
-	res_counter_uncharge(&from->res, PAGE_SIZE);
+	res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
 	mem_cgroup_charge_statistics(from, pc, false);
 
 	page = pc->page;
@@ -1225,7 +1403,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	}
 
 	if (do_swap_account)
-		res_counter_uncharge(&from->memsw, PAGE_SIZE);
+		res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
 	css_put(&from->css);
 
 	css_get(&to->css);
@@ -1265,7 +1443,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
 	parent = mem_cgroup_from_cont(pcg);
 
 
-	ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);
+	ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page);
 	if (ret || !parent)
 		return ret;
 
@@ -1295,9 +1473,9 @@ uncharge:
 	/* drop extra refcnt by try_charge() */
 	css_put(&parent->css);
 	/* uncharge if move fails */
-	res_counter_uncharge(&parent->res, PAGE_SIZE);
+	res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
 	if (do_swap_account)
-		res_counter_uncharge(&parent->memsw, PAGE_SIZE);
+		res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
 	return ret;
 }
 
@@ -1322,7 +1500,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 	prefetchw(pc);
 
 	mem = memcg;
-	ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);
+	ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page);
 	if (ret || !mem)
 		return ret;
 
@@ -1441,14 +1619,14 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 	if (!mem)
 		goto charge_cur_mm;
 	*ptr = mem;
-	ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
+	ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, page);
 	/* drop extra refcnt from tryget */
 	css_put(&mem->css);
 	return ret;
 charge_cur_mm:
 	if (unlikely(!mm))
 		mm = &init_mm;
-	return __mem_cgroup_try_charge(mm, mask, ptr, true);
+	return __mem_cgroup_try_charge(mm, mask, ptr, true, page);
 }
 
 static void
@@ -1486,7 +1664,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 			 * This recorded memcg can be obsolete one. So, avoid
 			 * calling css_tryget
 			 */
-			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+			res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
 			mem_cgroup_put(memcg);
 		}
 		rcu_read_unlock();
@@ -1511,9 +1689,9 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
 		return;
 	if (!mem)
 		return;
-	res_counter_uncharge(&mem->res, PAGE_SIZE);
+	res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
 	if (do_swap_account)
-		res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
 	css_put(&mem->css);
 }
 
@@ -1527,6 +1705,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	struct page_cgroup *pc;
 	struct mem_cgroup *mem = NULL;
 	struct mem_cgroup_per_zone *mz;
+	bool soft_limit_excess = false;
 
 	if (mem_cgroup_disabled())
 		return NULL;
@@ -1565,9 +1744,9 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 		break;
 	}
 
-	res_counter_uncharge(&mem->res, PAGE_SIZE);
+	res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
 	if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
-		res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
 	mem_cgroup_charge_statistics(mem, pc, false);
 
 	ClearPageCgroupUsed(pc);
@@ -1581,6 +1760,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	mz = page_cgroup_zoneinfo(pc);
 	unlock_page_cgroup(pc);
 
+	if (soft_limit_excess && mem_cgroup_soft_limit_check(mem))
+		mem_cgroup_update_tree(mem, page);
 	/* at swapout, this memcg will be accessed to record to swap */
 	if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
 		css_put(&mem->css);
@@ -1656,7 +1837,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
 		 * We uncharge this because swap is freed.
 		 * This memcg can be obsolete one. We avoid calling css_tryget
 		 */
-		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+		res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
 		mem_cgroup_put(memcg);
 	}
 	rcu_read_unlock();
@@ -1685,7 +1866,8 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
 	unlock_page_cgroup(pc);
 
 	if (mem) {
-		ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
+		ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false,
+						page);
 		css_put(&mem->css);
 	}
 	*ptr = mem;
@@ -2194,6 +2376,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
 			res_counter_reset_failcnt(&mem->memsw);
 		break;
 	}
+
 	return 0;
 }
 
@@ -2489,6 +2672,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 		mz = &pn->zoneinfo[zone];
 		for_each_lru(l)
 			INIT_LIST_HEAD(&mz->lists[l]);
+		mz->usage_in_excess = 0;
 	}
 	return 0;
 }
@@ -2534,6 +2718,7 @@ static void __mem_cgroup_free(struct mem_cgroup *mem)
 {
 	int node;
 
+	mem_cgroup_remove_from_trees(mem);
 	free_css_id(&mem_cgroup_subsys, &mem->css);
 
 	for_each_node_state(node, N_POSSIBLE)
@@ -2582,6 +2767,31 @@ static void __init enable_swap_cgroup(void)
 }
 #endif
 
+static int mem_cgroup_soft_limit_tree_init(void)
+{
+	struct mem_cgroup_tree_per_node *rtpn;
+	struct mem_cgroup_tree_per_zone *rtpz;
+	int tmp, node, zone;
+
+	for_each_node_state(node, N_POSSIBLE) {
+		tmp = node;
+		if (!node_state(node, N_NORMAL_MEMORY))
+			tmp = -1;
+		rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
+		if (!rtpn)
+			return 1;
+
+		soft_limit_tree.rb_tree_per_node[node] = rtpn;
+
+		for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+			rtpz = &rtpn->rb_tree_per_zone[zone];
+			rtpz->rb_root = RB_ROOT;
+			spin_lock_init(&rtpz->lock);
+		}
+	}
+	return 0;
+}
+
 static struct cgroup_subsys_state * __ref
 mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 {
@@ -2596,11 +2806,15 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 	for_each_node_state(node, N_POSSIBLE)
 		if (alloc_mem_cgroup_per_zone_info(mem, node))
 			goto free_out;
+
 	/* root ? */
 	if (cont->parent == NULL) {
 		enable_swap_cgroup();
 		parent = NULL;
 		root_mem_cgroup = mem;
+		if (mem_cgroup_soft_limit_tree_init())
+			goto free_out;
+
 	} else {
 		parent = mem_cgroup_from_cont(cont->parent);
 		mem->use_hierarchy = parent->use_hierarchy;
-- 
cgit v1.2.3


From 4e41695356fb4e0b153be1440ad027e46e0a7ea2 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Wed, 23 Sep 2009 15:56:39 -0700
Subject: memory controller: soft limit reclaim on contention

Implement reclaim from groups over their soft limit

Permit reclaim from memory cgroups on contention (via the direct reclaim
path).

memory cgroup soft limit reclaim finds the group that exceeds its soft
limit by the largest number of pages and reclaims pages from it and then
reinserts the cgroup into its correct place in the rbtree.

Add additional checks to mem_cgroup_hierarchical_reclaim() to detect long
loops in case all swap is turned off.  The code has been refactored and
the loop check (loop < 2) has been enhanced for soft limits.  For soft
limits, we try to do more targetted reclaim.  Instead of bailing out after
two loops, the routine now reclaims memory proportional to the size by
which the soft limit is exceeded.  The proportion has been empirically
determined.

[akpm@linux-foundation.org: build fix]
[kamezawa.hiroyu@jp.fujitsu.com: fix softlimit css refcnt handling]
[nishimura@mxp.nes.nec.co.jp: refcount of the "victim" should be decremented before exiting the loop]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  10 ++
 include/linux/swap.h       |   5 +
 mm/memcontrol.c            | 226 ++++++++++++++++++++++++++++++++++++++++++---
 mm/vmscan.c                |  45 ++++++++-
 4 files changed, 271 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e46a0734ab6e..bf9213b2db8f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -118,6 +118,9 @@ static inline bool mem_cgroup_disabled(void)
 
 extern bool mem_cgroup_oom_called(struct task_struct *task);
 void mem_cgroup_update_mapped_file_stat(struct page *page, int val);
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+						gfp_t gfp_mask, int nid,
+						int zid);
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct mem_cgroup;
 
@@ -276,6 +279,13 @@ static inline void mem_cgroup_update_mapped_file_stat(struct page *page,
 {
 }
 
+static inline
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+					    gfp_t gfp_mask, int nid, int zid)
+{
+	return 0;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 6c990e658f4e..4c78fea989b9 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -217,6 +217,11 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
 						  gfp_t gfp_mask, bool noswap,
 						  unsigned int swappiness);
+extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+						gfp_t gfp_mask, bool noswap,
+						unsigned int swappiness,
+						struct zone *zone,
+						int nid);
 extern int __isolate_lru_page(struct page *page, int mode, int file);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 90f0b13e1c3c..011aba6cad70 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -139,6 +139,8 @@ struct mem_cgroup_per_zone {
 	unsigned long long	usage_in_excess;/* Set to the value by which */
 						/* the soft limit is exceeded*/
 	bool			on_tree;
+	struct mem_cgroup	*mem;		/* Back pointer, we cannot */
+						/* use container_of	   */
 };
 /* Macro for accessing counter */
 #define MEM_CGROUP_ZSTAT(mz, idx)	((mz)->count[(idx)])
@@ -228,6 +230,13 @@ struct mem_cgroup {
 	struct mem_cgroup_stat stat;
 };
 
+/*
+ * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
+ * limit reclaim to prevent infinite loops, if they ever occur.
+ */
+#define	MEM_CGROUP_MAX_RECLAIM_LOOPS		(100)
+#define	MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS	(2)
+
 enum charge_type {
 	MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
 	MEM_CGROUP_CHARGE_TYPE_MAPPED,
@@ -259,6 +268,8 @@ enum charge_type {
 #define MEM_CGROUP_RECLAIM_NOSWAP	(1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
 #define MEM_CGROUP_RECLAIM_SHRINK_BIT	0x1
 #define MEM_CGROUP_RECLAIM_SHRINK	(1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
+#define MEM_CGROUP_RECLAIM_SOFT_BIT	0x2
+#define MEM_CGROUP_RECLAIM_SOFT		(1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
 
 static void mem_cgroup_get(struct mem_cgroup *mem);
 static void mem_cgroup_put(struct mem_cgroup *mem);
@@ -299,7 +310,7 @@ soft_limit_tree_from_page(struct page *page)
 }
 
 static void
-mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+__mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
 				struct mem_cgroup_per_zone *mz,
 				struct mem_cgroup_tree_per_zone *mctz)
 {
@@ -311,7 +322,6 @@ mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
 		return;
 
 	mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res);
-	spin_lock(&mctz->lock);
 	while (*p) {
 		parent = *p;
 		mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
@@ -328,6 +338,26 @@ mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
 	rb_link_node(&mz->tree_node, parent, p);
 	rb_insert_color(&mz->tree_node, &mctz->rb_root);
 	mz->on_tree = true;
+}
+
+static void
+__mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+				struct mem_cgroup_per_zone *mz,
+				struct mem_cgroup_tree_per_zone *mctz)
+{
+	if (!mz->on_tree)
+		return;
+	rb_erase(&mz->tree_node, &mctz->rb_root);
+	mz->on_tree = false;
+}
+
+static void
+mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+				struct mem_cgroup_per_zone *mz,
+				struct mem_cgroup_tree_per_zone *mctz)
+{
+	spin_lock(&mctz->lock);
+	__mem_cgroup_insert_exceeded(mem, mz, mctz);
 	spin_unlock(&mctz->lock);
 }
 
@@ -337,8 +367,7 @@ mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
 				struct mem_cgroup_tree_per_zone *mctz)
 {
 	spin_lock(&mctz->lock);
-	rb_erase(&mz->tree_node, &mctz->rb_root);
-	mz->on_tree = false;
+	__mem_cgroup_remove_exceeded(mem, mz, mctz);
 	spin_unlock(&mctz->lock);
 }
 
@@ -408,6 +437,47 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
 	}
 }
 
+static inline unsigned long mem_cgroup_get_excess(struct mem_cgroup *mem)
+{
+	return res_counter_soft_limit_excess(&mem->res) >> PAGE_SHIFT;
+}
+
+static struct mem_cgroup_per_zone *
+__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+	struct rb_node *rightmost = NULL;
+	struct mem_cgroup_per_zone *mz = NULL;
+
+retry:
+	rightmost = rb_last(&mctz->rb_root);
+	if (!rightmost)
+		goto done;		/* Nothing to reclaim from */
+
+	mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
+	/*
+	 * Remove the node now but someone else can add it back,
+	 * we will to add it back at the end of reclaim to its correct
+	 * position in the tree.
+	 */
+	__mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
+	if (!res_counter_soft_limit_excess(&mz->mem->res) ||
+		!css_tryget(&mz->mem->css))
+		goto retry;
+done:
+	return mz;
+}
+
+static struct mem_cgroup_per_zone *
+mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+	struct mem_cgroup_per_zone *mz;
+
+	spin_lock(&mctz->lock);
+	mz = __mem_cgroup_largest_soft_limit_node(mctz);
+	spin_unlock(&mctz->lock);
+	return mz;
+}
+
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 					 struct page_cgroup *pc,
 					 bool charge)
@@ -1037,6 +1107,7 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
  * If shrink==true, for avoiding to free too much, this returns immedieately.
  */
 static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
+						struct zone *zone,
 						gfp_t gfp_mask,
 						unsigned long reclaim_options)
 {
@@ -1045,23 +1116,53 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
 	int loop = 0;
 	bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
 	bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
+	bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
+	unsigned long excess = mem_cgroup_get_excess(root_mem);
 
 	/* If memsw_is_minimum==1, swap-out is of-no-use. */
 	if (root_mem->memsw_is_minimum)
 		noswap = true;
 
-	while (loop < 2) {
+	while (1) {
 		victim = mem_cgroup_select_victim(root_mem);
-		if (victim == root_mem)
+		if (victim == root_mem) {
 			loop++;
+			if (loop >= 2) {
+				/*
+				 * If we have not been able to reclaim
+				 * anything, it might because there are
+				 * no reclaimable pages under this hierarchy
+				 */
+				if (!check_soft || !total) {
+					css_put(&victim->css);
+					break;
+				}
+				/*
+				 * We want to do more targetted reclaim.
+				 * excess >> 2 is not to excessive so as to
+				 * reclaim too much, nor too less that we keep
+				 * coming back to reclaim from this cgroup
+				 */
+				if (total >= (excess >> 2) ||
+					(loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) {
+					css_put(&victim->css);
+					break;
+				}
+			}
+		}
 		if (!mem_cgroup_local_usage(&victim->stat)) {
 			/* this cgroup's local usage == 0 */
 			css_put(&victim->css);
 			continue;
 		}
 		/* we use swappiness of local cgroup */
-		ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap,
-						   get_swappiness(victim));
+		if (check_soft)
+			ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
+				noswap, get_swappiness(victim), zone,
+				zone->zone_pgdat->node_id);
+		else
+			ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
+						noswap, get_swappiness(victim));
 		css_put(&victim->css);
 		/*
 		 * At shrinking usage, we can't check we should stop here or
@@ -1071,7 +1172,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
 		if (shrink)
 			return ret;
 		total += ret;
-		if (mem_cgroup_check_under_limit(root_mem))
+		if (check_soft) {
+			if (res_counter_check_under_soft_limit(&root_mem->res))
+				return total;
+		} else if (mem_cgroup_check_under_limit(root_mem))
 			return 1 + total;
 	}
 	return total;
@@ -1206,8 +1310,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		if (!(gfp_mask & __GFP_WAIT))
 			goto nomem;
 
-		ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
-							flags);
+		ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
+						gfp_mask, flags);
 		if (ret)
 			continue;
 
@@ -2018,8 +2122,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
 		if (!ret)
 			break;
 
-		progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
-						   MEM_CGROUP_RECLAIM_SHRINK);
+		progress = mem_cgroup_hierarchical_reclaim(memcg, NULL,
+						GFP_KERNEL,
+						MEM_CGROUP_RECLAIM_SHRINK);
 		curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
 		/* Usage is reduced ? */
   		if (curusage >= oldusage)
@@ -2071,7 +2176,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
 		if (!ret)
 			break;
 
-		mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
+		mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
 						MEM_CGROUP_RECLAIM_NOSWAP |
 						MEM_CGROUP_RECLAIM_SHRINK);
 		curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
@@ -2084,6 +2189,97 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
 	return ret;
 }
 
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+						gfp_t gfp_mask, int nid,
+						int zid)
+{
+	unsigned long nr_reclaimed = 0;
+	struct mem_cgroup_per_zone *mz, *next_mz = NULL;
+	unsigned long reclaimed;
+	int loop = 0;
+	struct mem_cgroup_tree_per_zone *mctz;
+
+	if (order > 0)
+		return 0;
+
+	mctz = soft_limit_tree_node_zone(nid, zid);
+	/*
+	 * This loop can run a while, specially if mem_cgroup's continuously
+	 * keep exceeding their soft limit and putting the system under
+	 * pressure
+	 */
+	do {
+		if (next_mz)
+			mz = next_mz;
+		else
+			mz = mem_cgroup_largest_soft_limit_node(mctz);
+		if (!mz)
+			break;
+
+		reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
+						gfp_mask,
+						MEM_CGROUP_RECLAIM_SOFT);
+		nr_reclaimed += reclaimed;
+		spin_lock(&mctz->lock);
+
+		/*
+		 * If we failed to reclaim anything from this memory cgroup
+		 * it is time to move on to the next cgroup
+		 */
+		next_mz = NULL;
+		if (!reclaimed) {
+			do {
+				/*
+				 * Loop until we find yet another one.
+				 *
+				 * By the time we get the soft_limit lock
+				 * again, someone might have aded the
+				 * group back on the RB tree. Iterate to
+				 * make sure we get a different mem.
+				 * mem_cgroup_largest_soft_limit_node returns
+				 * NULL if no other cgroup is present on
+				 * the tree
+				 */
+				next_mz =
+				__mem_cgroup_largest_soft_limit_node(mctz);
+				if (next_mz == mz) {
+					css_put(&next_mz->mem->css);
+					next_mz = NULL;
+				} else /* next_mz == NULL or other memcg */
+					break;
+			} while (1);
+		}
+		mz->usage_in_excess =
+			res_counter_soft_limit_excess(&mz->mem->res);
+		__mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
+		/*
+		 * One school of thought says that we should not add
+		 * back the node to the tree if reclaim returns 0.
+		 * But our reclaim could return 0, simply because due
+		 * to priority we are exposing a smaller subset of
+		 * memory to reclaim from. Consider this as a longer
+		 * term TODO.
+		 */
+		if (mz->usage_in_excess)
+			__mem_cgroup_insert_exceeded(mz->mem, mz, mctz);
+		spin_unlock(&mctz->lock);
+		css_put(&mz->mem->css);
+		loop++;
+		/*
+		 * Could not reclaim anything and there are no more
+		 * mem cgroups to try or we seem to be looping without
+		 * reclaiming anything.
+		 */
+		if (!nr_reclaimed &&
+			(next_mz == NULL ||
+			loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
+			break;
+	} while (!nr_reclaimed);
+	if (next_mz)
+		css_put(&next_mz->mem->css);
+	return nr_reclaimed;
+}
+
 /*
  * This routine traverse page_cgroup in given list and drop them all.
  * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
@@ -2686,6 +2882,8 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 		for_each_lru(l)
 			INIT_LIST_HEAD(&mz->lists[l]);
 		mz->usage_in_excess = 0;
+		mz->on_tree = false;
+		mz->mem = mem;
 	}
 	return 0;
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 613e89f471d9..2423782214ab 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1836,11 +1836,45 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
+unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+						gfp_t gfp_mask, bool noswap,
+						unsigned int swappiness,
+						struct zone *zone, int nid)
+{
+	struct scan_control sc = {
+		.may_writepage = !laptop_mode,
+		.may_unmap = 1,
+		.may_swap = !noswap,
+		.swap_cluster_max = SWAP_CLUSTER_MAX,
+		.swappiness = swappiness,
+		.order = 0,
+		.mem_cgroup = mem,
+		.isolate_pages = mem_cgroup_isolate_pages,
+	};
+	nodemask_t nm  = nodemask_of_node(nid);
+
+	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
+			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
+	sc.nodemask = &nm;
+	sc.nr_reclaimed = 0;
+	sc.nr_scanned = 0;
+	/*
+	 * NOTE: Although we can get the priority field, using it
+	 * here is not a good idea, since it limits the pages we can scan.
+	 * if we don't reclaim here, the shrink_zone from balance_pgdat
+	 * will pick up pages from other mem cgroup's as well. We hack
+	 * the priority and make it zero.
+	 */
+	shrink_zone(0, zone, &sc);
+	return sc.nr_reclaimed;
+}
+
 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 					   gfp_t gfp_mask,
 					   bool noswap,
 					   unsigned int swappiness)
 {
+	struct zonelist *zonelist;
 	struct scan_control sc = {
 		.may_writepage = !laptop_mode,
 		.may_unmap = 1,
@@ -1852,7 +1886,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 		.isolate_pages = mem_cgroup_isolate_pages,
 		.nodemask = NULL, /* we don't care the placement */
 	};
-	struct zonelist *zonelist;
 
 	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -1974,6 +2007,7 @@ loop_again:
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 			int nr_slab;
+			int nid, zid;
 
 			if (!populated_zone(zone))
 				continue;
@@ -1988,6 +2022,15 @@ loop_again:
 			temp_priority[i] = priority;
 			sc.nr_scanned = 0;
 			note_zone_scanning_priority(zone, priority);
+
+			nid = pgdat->node_id;
+			zid = zone_idx(zone);
+			/*
+			 * Call soft limit reclaim before calling shrink_zone.
+			 * For now we ignore the return value
+			 */
+			mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
+							nid, zid);
 			/*
 			 * We put equal pressure on every zone, unless one
 			 * zone has way too many pages free already.
-- 
cgit v1.2.3


From a7f0765edfd53aed09cb7b0e15863688b39447de Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 23 Sep 2009 15:56:44 -0700
Subject: ptrace: __ptrace_detach: do __wake_up_parent() if we reap the tracee

The bug is old, it wasn't cause by recent changes.

Test case:

	static void *tfunc(void *arg)
	{
		int pid = (long)arg;

		assert(ptrace(PTRACE_ATTACH, pid, NULL, NULL) == 0);
		kill(pid, SIGKILL);

		sleep(1);
		return NULL;
	}

	int main(void)
	{
		pthread_t th;
		long pid = fork();

		if (!pid)
			pause();

		signal(SIGCHLD, SIG_IGN);
		assert(pthread_create(&th, NULL, tfunc, (void*)pid) == 0);

		int r = waitpid(-1, NULL, __WNOTHREAD);
		printf("waitpid: %d %m\n", r);

		return 0;
	}

Before the patch this program hangs, after this patch waitpid() correctly
fails with errno == -ECHILD.

The problem is, __ptrace_detach() reaps the EXIT_ZOMBIE tracee if its
->real_parent is our sub-thread and we ignore SIGCHLD.  But in this case
we should wake up other threads which can sleep in do_wait().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Vitaly Mayatskikh <vmayatsk@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  1 +
 kernel/exit.c         |  5 +++++
 kernel/ptrace.c       | 11 +++++++----
 kernel/signal.c       |  9 ---------
 4 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 848d1f20086e..9e5a88afe6be 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2059,6 +2059,7 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv);
 extern int kill_pid(struct pid *pid, int sig, int priv);
 extern int kill_proc_info(int, struct siginfo *, pid_t);
 extern int do_notify_parent(struct task_struct *, int);
+extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
 extern void force_sig(int, struct task_struct *);
 extern void force_sig_specific(int, struct task_struct *);
 extern int send_sig(int, struct task_struct *, int);
diff --git a/kernel/exit.c b/kernel/exit.c
index 60d6fdcc9265..782b2e1f7ca2 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1575,6 +1575,11 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
 	return 0;
 }
 
+void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
+{
+	wake_up_interruptible_sync(&parent->signal->wait_chldexit);
+}
+
 static long do_wait(struct wait_opts *wo)
 {
 	DECLARE_WAITQUEUE(wait, current);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 307c285af59e..23bd09cd042e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -266,9 +266,10 @@ static int ignoring_children(struct sighand_struct *sigh)
  * or self-reaping.  Do notification now if it would have happened earlier.
  * If it should reap itself, return true.
  *
- * If it's our own child, there is no notification to do.
- * But if our normal children self-reap, then this child
- * was prevented by ptrace and we must reap it now.
+ * If it's our own child, there is no notification to do. But if our normal
+ * children self-reap, then this child was prevented by ptrace and we must
+ * reap it now, in that case we must also wake up sub-threads sleeping in
+ * do_wait().
  */
 static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
 {
@@ -278,8 +279,10 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
 		if (!task_detached(p) && thread_group_empty(p)) {
 			if (!same_thread_group(p->real_parent, tracer))
 				do_notify_parent(p, p->exit_signal);
-			else if (ignoring_children(tracer->sighand))
+			else if (ignoring_children(tracer->sighand)) {
+				__wake_up_parent(p, tracer);
 				p->exit_signal = -1;
+			}
 		}
 		if (task_detached(p)) {
 			/* Mark it as in the process of being reaped. */
diff --git a/kernel/signal.c b/kernel/signal.c
index 64c5deeaca5d..534ea81cde47 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1382,15 +1382,6 @@ ret:
 	return ret;
 }
 
-/*
- * Wake up any threads in the parent blocked in wait* syscalls.
- */
-static inline void __wake_up_parent(struct task_struct *p,
-				    struct task_struct *parent)
-{
-	wake_up_interruptible_sync(&parent->signal->wait_chldexit);
-}
-
 /*
  * Let a parent know about the death of a child.
  * For a stopped/continued status change, use do_notify_parent_cldstop instead.
-- 
cgit v1.2.3


From ae6d2ed7bb3877ff35b9569402025f40ea2e1803 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 23 Sep 2009 15:56:53 -0700
Subject: signals: tracehook_notify_jctl change

This changes tracehook_notify_jctl() so it's called with the siglock held,
and changes its argument and return value definition.  These clean-ups
make it a better fit for what new tracing hooks need to check.

Tracing needs the siglock here, held from the time TASK_STOPPED was set,
to avoid potential SIGCONT races if it wants to allow any blocking in its
tracing hooks.

This also folds the finish_stop() function into its caller
do_signal_stop().  The function is short, called only once and only
unconditionally.  It aids readability to fold it in.

[oleg@redhat.com: do not call tracehook_notify_jctl() in TASK_STOPPED state]
[oleg@redhat.com: introduce tracehook_finish_jctl() helper]
Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 34 ++++++++++++-----
 kernel/signal.c           | 97 +++++++++++++++++++++++------------------------
 2 files changed, 72 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 17ba82efa483..1eb44a924e56 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -1,7 +1,7 @@
 /*
  * Tracing hooks
  *
- * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -463,22 +463,38 @@ static inline int tracehook_get_signal(struct task_struct *task,
 
 /**
  * tracehook_notify_jctl - report about job control stop/continue
- * @notify:		nonzero if this is the last thread in the group to stop
+ * @notify:		zero, %CLD_STOPPED or %CLD_CONTINUED
  * @why:		%CLD_STOPPED or %CLD_CONTINUED
  *
  * This is called when we might call do_notify_parent_cldstop().
- * It's called when about to stop for job control; we are already in
- * %TASK_STOPPED state, about to call schedule().  It's also called when
- * a delayed %CLD_STOPPED or %CLD_CONTINUED report is ready to be made.
  *
- * Return nonzero to generate a %SIGCHLD with @why, which is
- * normal if @notify is nonzero.
+ * @notify is zero if we would not ordinarily send a %SIGCHLD,
+ * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD.
  *
- * Called with no locks held.
+ * @why is %CLD_STOPPED when about to stop for job control;
+ * we are already in %TASK_STOPPED state, about to call schedule().
+ * It might also be that we have just exited (check %PF_EXITING),
+ * but need to report that a group-wide stop is complete.
+ *
+ * @why is %CLD_CONTINUED when waking up after job control stop and
+ * ready to make a delayed @notify report.
+ *
+ * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal.
+ *
+ * Called with the siglock held.
  */
 static inline int tracehook_notify_jctl(int notify, int why)
 {
-	return notify || (current->ptrace & PT_PTRACED);
+	return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
+}
+
+/**
+ * tracehook_finish_jctl - report about return from job control stop
+ *
+ * This is called by do_signal_stop() after wakeup.
+ */
+static inline void tracehook_finish_jctl(void)
+{
 }
 
 #define DEATH_REAP			-1
diff --git a/kernel/signal.c b/kernel/signal.c
index 534ea81cde47..5d3b3f8f219b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -705,7 +705,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
 
 		if (why) {
 			/*
-			 * The first thread which returns from finish_stop()
+			 * The first thread which returns from do_signal_stop()
 			 * will take ->siglock, notice SIGNAL_CLD_MASK, and
 			 * notify its parent. See get_signal_to_deliver().
 			 */
@@ -1664,29 +1664,6 @@ void ptrace_notify(int exit_code)
 	spin_unlock_irq(&current->sighand->siglock);
 }
 
-static void
-finish_stop(int stop_count)
-{
-	/*
-	 * If there are no other threads in the group, or if there is
-	 * a group stop in progress and we are the last to stop,
-	 * report to the parent.  When ptraced, every thread reports itself.
-	 */
-	if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) {
-		read_lock(&tasklist_lock);
-		do_notify_parent_cldstop(current, CLD_STOPPED);
-		read_unlock(&tasklist_lock);
-	}
-
-	do {
-		schedule();
-	} while (try_to_freeze());
-	/*
-	 * Now we don't run again until continued.
-	 */
-	current->exit_code = 0;
-}
-
 /*
  * This performs the stopping for SIGSTOP and other stop signals.
  * We have to stop all threads in the thread group.
@@ -1696,15 +1673,9 @@ finish_stop(int stop_count)
 static int do_signal_stop(int signr)
 {
 	struct signal_struct *sig = current->signal;
-	int stop_count;
+	int notify;
 
-	if (sig->group_stop_count > 0) {
-		/*
-		 * There is a group stop in progress.  We don't need to
-		 * start another one.
-		 */
-		stop_count = --sig->group_stop_count;
-	} else {
+	if (!sig->group_stop_count) {
 		struct task_struct *t;
 
 		if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
@@ -1716,7 +1687,7 @@ static int do_signal_stop(int signr)
 		 */
 		sig->group_exit_code = signr;
 
-		stop_count = 0;
+		sig->group_stop_count = 1;
 		for (t = next_thread(current); t != current; t = next_thread(t))
 			/*
 			 * Setting state to TASK_STOPPED for a group
@@ -1725,19 +1696,44 @@ static int do_signal_stop(int signr)
 			 */
 			if (!(t->flags & PF_EXITING) &&
 			    !task_is_stopped_or_traced(t)) {
-				stop_count++;
+				sig->group_stop_count++;
 				signal_wake_up(t, 0);
 			}
-		sig->group_stop_count = stop_count;
 	}
+	/*
+	 * If there are no other threads in the group, or if there is
+	 * a group stop in progress and we are the last to stop, report
+	 * to the parent.  When ptraced, every thread reports itself.
+	 */
+	notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
+	notify = tracehook_notify_jctl(notify, CLD_STOPPED);
+	/*
+	 * tracehook_notify_jctl() can drop and reacquire siglock, so
+	 * we keep ->group_stop_count != 0 before the call. If SIGCONT
+	 * or SIGKILL comes in between ->group_stop_count == 0.
+	 */
+	if (sig->group_stop_count) {
+		if (!--sig->group_stop_count)
+			sig->flags = SIGNAL_STOP_STOPPED;
+		current->exit_code = sig->group_exit_code;
+		__set_current_state(TASK_STOPPED);
+	}
+	spin_unlock_irq(&current->sighand->siglock);
 
-	if (stop_count == 0)
-		sig->flags = SIGNAL_STOP_STOPPED;
-	current->exit_code = sig->group_exit_code;
-	__set_current_state(TASK_STOPPED);
+	if (notify) {
+		read_lock(&tasklist_lock);
+		do_notify_parent_cldstop(current, notify);
+		read_unlock(&tasklist_lock);
+	}
+
+	/* Now we don't run again until woken by SIGCONT or SIGKILL */
+	do {
+		schedule();
+	} while (try_to_freeze());
+
+	tracehook_finish_jctl();
+	current->exit_code = 0;
 
-	spin_unlock_irq(&current->sighand->siglock);
-	finish_stop(stop_count);
 	return 1;
 }
 
@@ -1806,14 +1802,15 @@ relock:
 		int why = (signal->flags & SIGNAL_STOP_CONTINUED)
 				? CLD_CONTINUED : CLD_STOPPED;
 		signal->flags &= ~SIGNAL_CLD_MASK;
-		spin_unlock_irq(&sighand->siglock);
 
-		if (unlikely(!tracehook_notify_jctl(1, why)))
-			goto relock;
+		why = tracehook_notify_jctl(why, CLD_CONTINUED);
+		spin_unlock_irq(&sighand->siglock);
 
-		read_lock(&tasklist_lock);
-		do_notify_parent_cldstop(current->group_leader, why);
-		read_unlock(&tasklist_lock);
+		if (why) {
+			read_lock(&tasklist_lock);
+			do_notify_parent_cldstop(current->group_leader, why);
+			read_unlock(&tasklist_lock);
+		}
 		goto relock;
 	}
 
@@ -1978,14 +1975,14 @@ void exit_signals(struct task_struct *tsk)
 	if (unlikely(tsk->signal->group_stop_count) &&
 			!--tsk->signal->group_stop_count) {
 		tsk->signal->flags = SIGNAL_STOP_STOPPED;
-		group_stop = 1;
+		group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
 	}
 out:
 	spin_unlock_irq(&tsk->sighand->siglock);
 
-	if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) {
+	if (unlikely(group_stop)) {
 		read_lock(&tasklist_lock);
-		do_notify_parent_cldstop(tsk, CLD_STOPPED);
+		do_notify_parent_cldstop(tsk, group_stop);
 		read_unlock(&tasklist_lock);
 	}
 }
-- 
cgit v1.2.3


From 964ee7df90d799e38fb1556c57cd5c45fc736436 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 23 Sep 2009 15:56:59 -0700
Subject: exec: fix set_binfmt() vs sys_delete_module() race

sys_delete_module() can set MODULE_STATE_GOING after
search_binary_handler() does try_module_get().  In this case
set_binfmt()->try_module_get() fails but since none of the callers
check the returned error, the task will run with the wrong old
->binfmt.

The proper fix should change all ->load_binary() methods, but we can
rely on fact that the caller must hold a reference to binfmt->module
and use __module_get() which never fails.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c               | 14 +++++---------
 include/linux/binfmts.h |  2 +-
 2 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 8efbdc606a1e..6dc92c39dd94 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1395,18 +1395,14 @@ out_ret:
 	return retval;
 }
 
-int set_binfmt(struct linux_binfmt *new)
+void set_binfmt(struct linux_binfmt *new)
 {
-	struct linux_binfmt *old = current->binfmt;
+	if (current->binfmt)
+		module_put(current->binfmt->module);
 
-	if (new) {
-		if (!try_module_get(new->module))
-			return -1;
-	}
 	current->binfmt = new;
-	if (old)
-		module_put(old->module);
-	return 0;
+	if (new)
+		__module_get(new->module);
 }
 
 EXPORT_SYMBOL(set_binfmt);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 2046b5b8af48..aece486ac734 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -120,7 +120,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
 extern int prepare_bprm_creds(struct linux_binprm *bprm);
 extern void install_exec_creds(struct linux_binprm *bprm);
 extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
-extern int set_binfmt(struct linux_binfmt *new);
+extern void set_binfmt(struct linux_binfmt *new);
 extern void free_bprm(struct linux_binprm *);
 
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 4a30debfb778240a4b1767d4b0c5a5b25ab97160 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 23 Sep 2009 15:57:00 -0700
Subject: signals: introduce do_send_sig_info() helper

Introduce do_send_sig_info() and convert group_send_sig_info(),
send_sig_info(), do_send_specific() to use this helper.

Hopefully it will have more users soon, it allows to specify
specific/group behaviour via "bool group" argument.

Shaves 80 bytes from .text.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h |  2 ++
 kernel/signal.c        | 56 ++++++++++++++++++++++++--------------------------
 2 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index c7552836bd95..ab9272cc270c 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -233,6 +233,8 @@ static inline int valid_signal(unsigned long sig)
 }
 
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
+extern int do_send_sig_info(int sig, struct siginfo *info,
+				struct task_struct *p, bool group);
 extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
diff --git a/kernel/signal.c b/kernel/signal.c
index 5d3b3f8f219b..c6d7a24a86a1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -971,6 +971,20 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 	return send_signal(sig, info, t, 0);
 }
 
+int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
+			bool group)
+{
+	unsigned long flags;
+	int ret = -ESRCH;
+
+	if (lock_task_sighand(p, &flags)) {
+		ret = send_signal(sig, info, p, group);
+		unlock_task_sighand(p, &flags);
+	}
+
+	return ret;
+}
+
 /*
  * Force a signal that the process can't ignore: if necessary
  * we unblock the signal and change any SIG_IGN to SIG_DFL.
@@ -1068,18 +1082,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long
  */
 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
-	unsigned long flags;
-	int ret;
-
-	ret = check_kill_permission(sig, info, p);
+	int ret = check_kill_permission(sig, info, p);
 
-	if (!ret && sig) {
-		ret = -ESRCH;
-		if (lock_task_sighand(p, &flags)) {
-			ret = __group_send_sig_info(sig, info, p);
-			unlock_task_sighand(p, &flags);
-		}
-	}
+	if (!ret && sig)
+		ret = do_send_sig_info(sig, info, p, true);
 
 	return ret;
 }
@@ -1224,15 +1230,9 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
  * These are for backward compatibility with the rest of the kernel source.
  */
 
-/*
- * The caller must ensure the task can't exit.
- */
 int
 send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
-	int ret;
-	unsigned long flags;
-
 	/*
 	 * Make sure legacy kernel users don't send in bad values
 	 * (normal paths check this in check_kill_permission).
@@ -1240,10 +1240,7 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 	if (!valid_signal(sig))
 		return -EINVAL;
 
-	spin_lock_irqsave(&p->sighand->siglock, flags);
-	ret = specific_send_sig_info(sig, info, p);
-	spin_unlock_irqrestore(&p->sighand->siglock, flags);
-	return ret;
+	return do_send_sig_info(sig, info, p, false);
 }
 
 #define __si_special(priv) \
@@ -2278,7 +2275,6 @@ static int
 do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
 {
 	struct task_struct *p;
-	unsigned long flags;
 	int error = -ESRCH;
 
 	rcu_read_lock();
@@ -2288,14 +2284,16 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
 		/*
 		 * The null signal is a permissions and process existence
 		 * probe.  No signal is actually delivered.
-		 *
-		 * If lock_task_sighand() fails we pretend the task dies
-		 * after receiving the signal. The window is tiny, and the
-		 * signal is private anyway.
 		 */
-		if (!error && sig && lock_task_sighand(p, &flags)) {
-			error = specific_send_sig_info(sig, info, p);
-			unlock_task_sighand(p, &flags);
+		if (!error && sig) {
+			error = do_send_sig_info(sig, info, p, false);
+			/*
+			 * If lock_task_sighand() failed we pretend the task
+			 * dies after receiving the signal. The window is tiny,
+			 * and the signal is private anyway.
+			 */
+			if (unlikely(error == -ESRCH))
+				error = 0;
 		}
 	}
 	rcu_read_unlock();
-- 
cgit v1.2.3


From ba0a6c9f6fceed11c6a99e8326f0477fe383e6b5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 23 Sep 2009 15:57:03 -0700
Subject: fcntl: add F_[SG]ETOWN_EX

In order to direct the SIGIO signal to a particular thread of a
multi-threaded application we cannot, like suggested by the manpage, put a
TID into the regular fcntl(F_SETOWN) call.  It will still be send to the
whole process of which that thread is part.

Since people do want to properly direct SIGIO we introduce F_SETOWN_EX.

The need to direct SIGIO comes from self-monitoring profiling such as with
perf-counters.  Perf-counters uses SIGIO to notify that new sample data is
available.  If the signal is delivered to the same task that generated the
new sample it can augment that data by inspecting the task's user-space
state right after it returns from the kernel.  This is esp.  convenient
for interpreted or virtual machine driven environments.

Both F_SETOWN_EX and F_GETOWN_EX take a pointer to a struct f_owner_ex
as argument:

struct f_owner_ex {
	int   type;
	pid_t pid;
};

Where type is one of F_OWNER_TID, F_OWNER_PID or F_OWNER_GID.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Tested-by: stephane eranian <eranian@googlemail.com>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/fcntl.h  |   2 +
 arch/parisc/include/asm/fcntl.h |   2 +
 fs/fcntl.c                      | 108 +++++++++++++++++++++++++++++++++++++---
 include/asm-generic/fcntl.h     |  13 +++++
 4 files changed, 117 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h
index 25da0017ec87..e42823e954aa 100644
--- a/arch/alpha/include/asm/fcntl.h
+++ b/arch/alpha/include/asm/fcntl.h
@@ -26,6 +26,8 @@
 #define F_GETOWN	6	/*  for sockets. */
 #define F_SETSIG	10	/*  for sockets. */
 #define F_GETSIG	11	/*  for sockets. */
+#define F_SETOWN_EX	12
+#define F_GETOWN_EX	13
 
 /* for posix fcntl() and lockf() */
 #define F_RDLCK		1
diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h
index 1e1c824764ee..5f39d5597ced 100644
--- a/arch/parisc/include/asm/fcntl.h
+++ b/arch/parisc/include/asm/fcntl.h
@@ -28,6 +28,8 @@
 #define F_SETOWN	12	/*  for sockets. */
 #define F_SETSIG	13	/*  for sockets. */
 #define F_GETSIG	14	/*  for sockets. */
+#define F_GETOWN_EX	15
+#define F_SETOWN_EX	16
 
 /* for posix fcntl() and lockf() */
 #define F_RDLCK		01
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 50a988993262..fc089f2f7f56 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp)
 	return pid;
 }
 
+static int f_setown_ex(struct file *filp, unsigned long arg)
+{
+	struct f_owner_ex * __user owner_p = (void * __user)arg;
+	struct f_owner_ex owner;
+	struct pid *pid;
+	int type;
+	int ret;
+
+	ret = copy_from_user(&owner, owner_p, sizeof(owner));
+	if (ret)
+		return ret;
+
+	switch (owner.type) {
+	case F_OWNER_TID:
+		type = PIDTYPE_MAX;
+		break;
+
+	case F_OWNER_PID:
+		type = PIDTYPE_PID;
+		break;
+
+	case F_OWNER_GID:
+		type = PIDTYPE_PGID;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	rcu_read_lock();
+	pid = find_vpid(owner.pid);
+	if (owner.pid && !pid)
+		ret = -ESRCH;
+	else
+		ret = __f_setown(filp, pid, type, 1);
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int f_getown_ex(struct file *filp, unsigned long arg)
+{
+	struct f_owner_ex * __user owner_p = (void * __user)arg;
+	struct f_owner_ex owner;
+	int ret = 0;
+
+	read_lock(&filp->f_owner.lock);
+	owner.pid = pid_vnr(filp->f_owner.pid);
+	switch (filp->f_owner.pid_type) {
+	case PIDTYPE_MAX:
+		owner.type = F_OWNER_TID;
+		break;
+
+	case PIDTYPE_PID:
+		owner.type = F_OWNER_PID;
+		break;
+
+	case PIDTYPE_PGID:
+		owner.type = F_OWNER_GID;
+		break;
+
+	default:
+		WARN_ON(1);
+		ret = -EINVAL;
+		break;
+	}
+	read_unlock(&filp->f_owner.lock);
+
+	if (!ret)
+		ret = copy_to_user(owner_p, &owner, sizeof(owner));
+	return ret;
+}
+
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		struct file *filp)
 {
@@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_SETOWN:
 		err = f_setown(filp, arg, 1);
 		break;
+	case F_GETOWN_EX:
+		err = f_getown_ex(filp, arg);
+		break;
+	case F_SETOWN_EX:
+		err = f_setown_ex(filp, arg);
+		break;
 	case F_GETSIG:
 		err = filp->f_owner.signum;
 		break;
@@ -428,8 +507,7 @@ static inline int sigio_perm(struct task_struct *p,
 
 static void send_sigio_to_task(struct task_struct *p,
 			       struct fown_struct *fown,
-			       int fd,
-			       int reason)
+			       int fd, int reason, int group)
 {
 	/*
 	 * F_SETSIG can change ->signum lockless in parallel, make
@@ -461,11 +539,11 @@ static void send_sigio_to_task(struct task_struct *p,
 			else
 				si.si_band = band_table[reason - POLL_IN];
 			si.si_fd    = fd;
-			if (!do_send_sig_info(signum, &si, p, true))
+			if (!do_send_sig_info(signum, &si, p, group))
 				break;
 		/* fall-through: fall back on the old plain SIGIO signal */
 		case 0:
-			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, true);
+			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
 	}
 }
 
@@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
 	struct task_struct *p;
 	enum pid_type type;
 	struct pid *pid;
+	int group = 1;
 	
 	read_lock(&fown->lock);
+
 	type = fown->pid_type;
+	if (type == PIDTYPE_MAX) {
+		group = 0;
+		type = PIDTYPE_PID;
+	}
+
 	pid = fown->pid;
 	if (!pid)
 		goto out_unlock_fown;
 	
 	read_lock(&tasklist_lock);
 	do_each_pid_task(pid, type, p) {
-		send_sigio_to_task(p, fown, fd, band);
+		send_sigio_to_task(p, fown, fd, band, group);
 	} while_each_pid_task(pid, type, p);
 	read_unlock(&tasklist_lock);
  out_unlock_fown:
@@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
 }
 
 static void send_sigurg_to_task(struct task_struct *p,
-                                struct fown_struct *fown)
+				struct fown_struct *fown, int group)
 {
 	if (sigio_perm(p, fown, SIGURG))
-		group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
+		do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
 }
 
 int send_sigurg(struct fown_struct *fown)
@@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown)
 	struct task_struct *p;
 	enum pid_type type;
 	struct pid *pid;
+	int group = 1;
 	int ret = 0;
 	
 	read_lock(&fown->lock);
+
 	type = fown->pid_type;
+	if (type == PIDTYPE_MAX) {
+		group = 0;
+		type = PIDTYPE_PID;
+	}
+
 	pid = fown->pid;
 	if (!pid)
 		goto out_unlock_fown;
@@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown)
 	
 	read_lock(&tasklist_lock);
 	do_each_pid_task(pid, type, p) {
-		send_sigurg_to_task(p, fown);
+		send_sigurg_to_task(p, fown, group);
 	} while_each_pid_task(pid, type, p);
 	read_unlock(&tasklist_lock);
  out_unlock_fown:
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index 4d3e48373e74..0c3dd8603927 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -73,6 +73,19 @@
 #define F_SETSIG	10	/* for sockets. */
 #define F_GETSIG	11	/* for sockets. */
 #endif
+#ifndef F_SETOWN_EX
+#define F_SETOWN_EX	12
+#define F_GETOWN_EX	13
+#endif
+
+#define F_OWNER_TID	0
+#define F_OWNER_PID	1
+#define F_OWNER_GID	2
+
+struct f_owner_ex {
+	int	type;
+	pid_t	pid;
+};
 
 /* for F_[GET|SET]FL */
 #define FD_CLOEXEC	1	/* actually anything with low bit set goes */
-- 
cgit v1.2.3


From d9588725e52650e82989707f8fd2feb67ad2dc8e Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 23 Sep 2009 15:57:04 -0700
Subject: signals: inline __fatal_signal_pending

__fatal_signal_pending inlines to one instruction on x86, probably two
instructions on other machines.  It takes two longer x86 instructions just
to call it and test its return value, not to mention the function itself.

On my random x86_64 config, this saved 70 bytes of text (59 of those being
__fatal_signal_pending itself).

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 5 ++++-
 kernel/signal.c       | 6 ------
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9e5a88afe6be..e951bd2eb9fc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2337,7 +2337,10 @@ static inline int signal_pending(struct task_struct *p)
 	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
 }
 
-extern int __fatal_signal_pending(struct task_struct *p);
+static inline int __fatal_signal_pending(struct task_struct *p)
+{
+	return unlikely(sigismember(&p->pending.signal, SIGKILL));
+}
 
 static inline int fatal_signal_pending(struct task_struct *p)
 {
diff --git a/kernel/signal.c b/kernel/signal.c
index c6d7a24a86a1..6705320784fd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1050,12 +1050,6 @@ void zap_other_threads(struct task_struct *p)
 	}
 }
 
-int __fatal_signal_pending(struct task_struct *tsk)
-{
-	return sigismember(&tsk->pending.signal, SIGKILL);
-}
-EXPORT_SYMBOL(__fatal_signal_pending);
-
 struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
 {
 	struct sighand_struct *sighand;
-- 
cgit v1.2.3


From 8d65af789f3e2cf4cfbdbf71a0f7a61ebcd41d38 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 23 Sep 2009 15:57:19 -0700
Subject: sysctl: remove "struct file *" argument of ->proc_handler

It's unused.

It isn't needed -- read or write flag is already passed and sysctl
shouldn't care about the rest.

It _was_ used in two places at arch/frv for some reason.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/kernel/pm.c               | 14 +++----
 arch/mips/lasat/sysctl.c           | 18 ++++-----
 arch/s390/appldata/appldata_base.c |  9 ++---
 arch/s390/kernel/debug.c           |  4 +-
 arch/s390/mm/cmm.c                 |  4 +-
 arch/x86/include/asm/nmi.h         |  3 +-
 arch/x86/kernel/apic/nmi.c         |  4 +-
 arch/x86/kernel/vsyscall_64.c      | 10 +----
 drivers/cdrom/cdrom.c              |  8 ++--
 drivers/char/random.c              |  4 +-
 drivers/net/wireless/arlan-proc.c  | 28 +++++++-------
 drivers/parport/procfs.c           | 12 +++---
 fs/coda/coda_int.h                 |  1 +
 fs/drop_caches.c                   |  4 +-
 fs/file_table.c                    |  6 +--
 fs/proc/proc_sysctl.c              |  2 +-
 fs/xfs/linux-2.6/xfs_sysctl.c      |  3 +-
 include/linux/fs.h                 |  2 +-
 include/linux/ftrace.h             |  4 +-
 include/linux/hugetlb.h            |  6 +--
 include/linux/mm.h                 |  2 +-
 include/linux/mmzone.h             | 13 +++----
 include/linux/sched.h              |  8 ++--
 include/linux/security.h           |  2 +-
 include/linux/swap.h               |  2 +-
 include/linux/sysctl.h             | 19 +++++-----
 include/linux/writeback.h          | 11 +++---
 include/net/ip.h                   |  2 +-
 include/net/ndisc.h                |  2 -
 ipc/ipc_sysctl.c                   | 16 ++++----
 ipc/mq_sysctl.c                    |  8 ++--
 kernel/hung_task.c                 |  4 +-
 kernel/sched.c                     |  4 +-
 kernel/sched_fair.c                |  4 +-
 kernel/slow-work.c                 | 12 +++---
 kernel/softlockup.c                |  4 +-
 kernel/sysctl.c                    | 78 ++++++++++++++++----------------------
 kernel/trace/ftrace.c              |  4 +-
 kernel/trace/trace_stack.c         |  4 +-
 kernel/utsname_sysctl.c            |  4 +-
 mm/hugetlb.c                       | 12 +++---
 mm/page-writeback.c                | 20 +++++-----
 mm/page_alloc.c                    | 24 ++++++------
 mm/vmscan.c                        |  4 +-
 net/bridge/br_netfilter.c          |  4 +-
 net/decnet/dn_dev.c                |  5 +--
 net/decnet/sysctl_net_decnet.c     |  2 -
 net/ipv4/devinet.c                 | 12 +++---
 net/ipv4/route.c                   |  7 ++--
 net/ipv4/sysctl_net_ipv4.c         | 16 ++++----
 net/ipv6/addrconf.c                |  8 ++--
 net/ipv6/ndisc.c                   |  8 ++--
 net/ipv6/route.c                   |  4 +-
 net/irda/irsysctl.c                |  8 ++--
 net/netfilter/ipvs/ip_vs_ctl.c     |  8 ++--
 net/netfilter/nf_log.c             |  4 +-
 net/phonet/sysctl.c                |  4 +-
 net/sunrpc/sysctl.c                |  4 +-
 net/sunrpc/xprtrdma/svc_rdma.c     |  2 +-
 security/min_addr.c                |  4 +-
 60 files changed, 239 insertions(+), 270 deletions(-)

(limited to 'include')

diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c
index be722fc1acff..0d4d3e3a4cfc 100644
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -150,7 +150,7 @@ static int user_atoi(char __user *ubuf, size_t len)
 /*
  * Send us to sleep.
  */
-static int sysctl_pm_do_suspend(ctl_table *ctl, int write, struct file *filp,
+static int sysctl_pm_do_suspend(ctl_table *ctl, int write,
 				void __user *buffer, size_t *lenp, loff_t *fpos)
 {
 	int retval, mode;
@@ -198,13 +198,13 @@ static int try_set_cmode(int new_cmode)
 }
 
 
-static int cmode_procctl(ctl_table *ctl, int write, struct file *filp,
+static int cmode_procctl(ctl_table *ctl, int write,
 			 void __user *buffer, size_t *lenp, loff_t *fpos)
 {
 	int new_cmode;
 
 	if (!write)
-		return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+		return proc_dointvec(ctl, write, buffer, lenp, fpos);
 
 	new_cmode = user_atoi(buffer, *lenp);
 
@@ -301,13 +301,13 @@ static int try_set_cm(int new_cm)
 	return 0;
 }
 
-static int p0_procctl(ctl_table *ctl, int write, struct file *filp,
+static int p0_procctl(ctl_table *ctl, int write,
 		      void __user *buffer, size_t *lenp, loff_t *fpos)
 {
 	int new_p0;
 
 	if (!write)
-		return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+		return proc_dointvec(ctl, write, buffer, lenp, fpos);
 
 	new_p0 = user_atoi(buffer, *lenp);
 
@@ -345,13 +345,13 @@ static int p0_sysctl(ctl_table *table,
 	return 1;
 }
 
-static int cm_procctl(ctl_table *ctl, int write, struct file *filp,
+static int cm_procctl(ctl_table *ctl, int write,
 		      void __user *buffer, size_t *lenp, loff_t *fpos)
 {
 	int new_cm;
 
 	if (!write)
-		return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+		return proc_dointvec(ctl, write, buffer, lenp, fpos);
 
 	new_cm = user_atoi(buffer, *lenp);
 
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index 3f04d4c406b7..b3deed8db619 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -56,12 +56,12 @@ int sysctl_lasatstring(ctl_table *table,
 
 
 /* And the same for proc */
-int proc_dolasatstring(ctl_table *table, int write, struct file *filp,
+int proc_dolasatstring(ctl_table *table, int write,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int r;
 
-	r = proc_dostring(table, write, filp, buffer, lenp, ppos);
+	r = proc_dostring(table, write, buffer, lenp, ppos);
 	if ((!write) || r)
 		return r;
 
@@ -71,12 +71,12 @@ int proc_dolasatstring(ctl_table *table, int write, struct file *filp,
 }
 
 /* proc function to write EEPROM after changing int entry */
-int proc_dolasatint(ctl_table *table, int write, struct file *filp,
+int proc_dolasatint(ctl_table *table, int write,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int r;
 
-	r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	r = proc_dointvec(table, write, buffer, lenp, ppos);
 	if ((!write) || r)
 		return r;
 
@@ -89,7 +89,7 @@ int proc_dolasatint(ctl_table *table, int write, struct file *filp,
 static int rtctmp;
 
 /* proc function to read/write RealTime Clock */
-int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
+int proc_dolasatrtc(ctl_table *table, int write,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct timespec ts;
@@ -102,7 +102,7 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
 		if (rtctmp < 0)
 			rtctmp = 0;
 	}
-	r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	r = proc_dointvec(table, write, buffer, lenp, ppos);
 	if (r)
 		return r;
 
@@ -154,7 +154,7 @@ int sysctl_lasat_rtc(ctl_table *table,
 #endif
 
 #ifdef CONFIG_INET
-int proc_lasat_ip(ctl_table *table, int write, struct file *filp,
+int proc_lasat_ip(ctl_table *table, int write,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	unsigned int ip;
@@ -231,12 +231,12 @@ static int sysctl_lasat_prid(ctl_table *table,
 	return 0;
 }
 
-int proc_lasat_prid(ctl_table *table, int write, struct file *filp,
+int proc_lasat_prid(ctl_table *table, int write,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int r;
 
-	r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	r = proc_dointvec(table, write, buffer, lenp, ppos);
 	if (r < 0)
 		return r;
 	if (write) {
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 264528e4f58d..b55fd7ed1c31 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -50,10 +50,9 @@ static struct platform_device *appldata_pdev;
  * /proc entries (sysctl)
  */
 static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
-static int appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+static int appldata_timer_handler(ctl_table *ctl, int write,
 				  void __user *buffer, size_t *lenp, loff_t *ppos);
 static int appldata_interval_handler(ctl_table *ctl, int write,
-					 struct file *filp,
 					 void __user *buffer,
 					 size_t *lenp, loff_t *ppos);
 
@@ -247,7 +246,7 @@ __appldata_vtimer_setup(int cmd)
  * Start/Stop timer, show status of timer (0 = not active, 1 = active)
  */
 static int
-appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_timer_handler(ctl_table *ctl, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int len;
@@ -289,7 +288,7 @@ out:
  * current timer interval.
  */
 static int
-appldata_interval_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_interval_handler(ctl_table *ctl, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int len, interval;
@@ -335,7 +334,7 @@ out:
  * monitoring (0 = not in process, 1 = in process)
  */
 static int
-appldata_generic_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_generic_handler(ctl_table *ctl, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct appldata_ops *ops = NULL, *tmp_ops;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 4c512561687d..20f282c911c2 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -881,11 +881,11 @@ static int debug_active=1;
  * if debug_active is already off
  */
 static int
-s390dbf_procactive(ctl_table *table, int write, struct file *filp,
+s390dbf_procactive(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	if (!write || debug_stoppable || !debug_active)
-		return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+		return proc_dointvec(table, write, buffer, lenp, ppos);
 	else
 		return 0;
 }
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 413c240cbca7..b201135cc18c 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -262,7 +262,7 @@ cmm_skip_blanks(char *cp, char **endp)
 static struct ctl_table cmm_table[];
 
 static int
-cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
+cmm_pages_handler(ctl_table *ctl, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char buf[16], *p;
@@ -303,7 +303,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
 }
 
 static int
-cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
+cmm_timeout_handler(ctl_table *ctl, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char buf[64], *p;
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index e63cf7d441e1..139d4c1a33a7 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog;
 #define NMI_INVALID	3
 
 struct ctl_table;
-struct file;
-extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
+extern int proc_nmi_enabled(struct ctl_table *, int ,
 			void __user *, size_t *, loff_t *);
 extern int unknown_nmi_panic;
 
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index cb66a22d98ad..7ff61d6a188a 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
 /*
  * proc handler for /proc/sys/kernel/nmi
  */
-int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+int proc_nmi_enabled(struct ctl_table *table, int write,
 			void __user *buffer, size_t *length, loff_t *ppos)
 {
 	int old_state;
 
 	nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
 	old_state = nmi_watchdog_enabled;
-	proc_dointvec(table, write, file, buffer, length, ppos);
+	proc_dointvec(table, write, buffer, length, ppos);
 	if (!!old_state == !!nmi_watchdog_enabled)
 		return 0;
 
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index cf53a78e2dcf..8cb4974ff599 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void)
 }
 
 #ifdef CONFIG_SYSCTL
-
-static int
-vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
-		       void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-}
-
 static ctl_table kernel_table2[] = {
 	{ .procname = "vsyscall64",
 	  .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
 	  .mode = 0644,
-	  .proc_handler = vsyscall_sysctl_change },
+	  .proc_handler = proc_dointvec },
 	{}
 };
 
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 71d1b9bab70b..614da5b8613a 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3412,7 +3412,7 @@ static int cdrom_print_info(const char *header, int val, char *info,
 	return 0;
 }
 
-static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp,
+static int cdrom_sysctl_info(ctl_table *ctl, int write,
                            void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int pos;
@@ -3489,7 +3489,7 @@ static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp,
 		goto done;
 doit:
 	mutex_unlock(&cdrom_mutex);
-	return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	return proc_dostring(ctl, write, buffer, lenp, ppos);
 done:
 	printk(KERN_INFO "cdrom: info buffer too small\n");
 	goto doit;
@@ -3525,12 +3525,12 @@ static void cdrom_update_settings(void)
 	mutex_unlock(&cdrom_mutex);
 }
 
-static int cdrom_sysctl_handler(ctl_table *ctl, int write, struct file * filp,
+static int cdrom_sysctl_handler(ctl_table *ctl, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 	
-	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write) {
 	
diff --git a/drivers/char/random.c b/drivers/char/random.c
index d8a9255e1a3f..04b505e5a5e2 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1231,7 +1231,7 @@ static char sysctl_bootid[16];
  * as an ASCII string in the standard UUID format.  If accesses via the
  * sysctl system call, it is returned as 16 bytes of binary data.
  */
-static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
+static int proc_do_uuid(ctl_table *table, int write,
 			void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	ctl_table fake_table;
@@ -1254,7 +1254,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
 	fake_table.data = buf;
 	fake_table.maxlen = sizeof(buf);
 
-	return proc_dostring(&fake_table, write, filp, buffer, lenp, ppos);
+	return proc_dostring(&fake_table, write, buffer, lenp, ppos);
 }
 
 static int uuid_strategy(ctl_table *table,
diff --git a/drivers/net/wireless/arlan-proc.c b/drivers/net/wireless/arlan-proc.c
index 2ab1d59870f4..a8b689635a3b 100644
--- a/drivers/net/wireless/arlan-proc.c
+++ b/drivers/net/wireless/arlan-proc.c
@@ -402,7 +402,7 @@ static int arlan_setup_card_by_book(struct net_device *dev)
 
 static char arlan_drive_info[ARLAN_STR_SIZE] = "A655\n\0";
 
-static int arlan_sysctl_info(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info(ctl_table * ctl, int write,
 		      void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int i;
@@ -629,7 +629,7 @@ final:
 	*lenp = pos;
 
 	if (!write)
-		retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+		retv = proc_dostring(ctl, write, buffer, lenp, ppos);
 	else
 	{
 		*lenp = 0;
@@ -639,7 +639,7 @@ final:
 }
 
 
-static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info161719(ctl_table * ctl, int write,
 			    void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int i;
@@ -669,11 +669,11 @@ static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp
 
 final:
 	*lenp = pos;
-	retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	retv = proc_dostring(ctl, write, buffer, lenp, ppos);
 	return retv;
 }
 
-static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_infotxRing(ctl_table * ctl, int write,
 			    void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int i;
@@ -698,11 +698,11 @@ static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp
 	SARLBNpln(u_char, txBuffer, 0x800);
 final:
 	*lenp = pos;
-	retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	retv = proc_dostring(ctl, write, buffer, lenp, ppos);
 	return retv;
 }
 
-static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_inforxRing(ctl_table * ctl, int write,
 			    void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int i;
@@ -726,11 +726,11 @@ static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp
 	SARLBNpln(u_char, rxBuffer, 0x800);
 final:
 	*lenp = pos;
-	retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	retv = proc_dostring(ctl, write, buffer, lenp, ppos);
 	return retv;
 }
 
-static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info18(ctl_table * ctl, int write,
 			void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int i;
@@ -756,7 +756,7 @@ static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp,
 
 final:
 	*lenp = pos;
-	retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	retv = proc_dostring(ctl, write, buffer, lenp, ppos);
 	return retv;
 }
 
@@ -766,7 +766,7 @@ final:
 
 static char conf_reset_result[200];
 
-static int arlan_configure(ctl_table * ctl, int write, struct file *filp,
+static int arlan_configure(ctl_table * ctl, int write,
 		    void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int pos = 0;
@@ -788,10 +788,10 @@ static int arlan_configure(ctl_table * ctl, int write, struct file *filp,
 		return -1;
 
 	*lenp = pos;
-	return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	return proc_dostring(ctl, write, buffer, lenp, ppos);
 }
 
-static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_reset(ctl_table * ctl, int write,
 		       void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int pos = 0;
@@ -811,7 +811,7 @@ static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp,
 	} else
 		return -1;
 	*lenp = pos + 3;
-	return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	return proc_dostring(ctl, write, buffer, lenp, ppos);
 }
 
 
diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index 554e11f9e1ce..8eefe56f1cbe 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -31,7 +31,7 @@
 #define PARPORT_MIN_SPINTIME_VALUE 1
 #define PARPORT_MAX_SPINTIME_VALUE 1000
 
-static int do_active_device(ctl_table *table, int write, struct file *filp,
+static int do_active_device(ctl_table *table, int write,
 		      void __user *result, size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
@@ -68,7 +68,7 @@ static int do_active_device(ctl_table *table, int write, struct file *filp,
 }
 
 #ifdef CONFIG_PARPORT_1284
-static int do_autoprobe(ctl_table *table, int write, struct file *filp,
+static int do_autoprobe(ctl_table *table, int write,
 			void __user *result, size_t *lenp, loff_t *ppos)
 {
 	struct parport_device_info *info = table->extra2;
@@ -111,7 +111,7 @@ static int do_autoprobe(ctl_table *table, int write, struct file *filp,
 #endif /* IEEE1284.3 support. */
 
 static int do_hardware_base_addr (ctl_table *table, int write,
-				  struct file *filp, void __user *result,
+				  void __user *result,
 				  size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
@@ -139,7 +139,7 @@ static int do_hardware_base_addr (ctl_table *table, int write,
 }
 
 static int do_hardware_irq (ctl_table *table, int write,
-			    struct file *filp, void __user *result,
+			    void __user *result,
 			    size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
@@ -167,7 +167,7 @@ static int do_hardware_irq (ctl_table *table, int write,
 }
 
 static int do_hardware_dma (ctl_table *table, int write,
-			    struct file *filp, void __user *result,
+			    void __user *result,
 			    size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
@@ -195,7 +195,7 @@ static int do_hardware_dma (ctl_table *table, int write,
 }
 
 static int do_hardware_modes (ctl_table *table, int write,
-			      struct file *filp, void __user *result,
+			      void __user *result,
 			      size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h
index 8ccd5ed81d9c..d99860a33890 100644
--- a/fs/coda/coda_int.h
+++ b/fs/coda/coda_int.h
@@ -2,6 +2,7 @@
 #define _CODA_INT_
 
 struct dentry;
+struct file;
 
 extern struct file_system_type coda_fs_type;
 extern unsigned long coda_timeout;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index a2edb7913447..31f4b0e6d72c 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -63,9 +63,9 @@ static void drop_slab(void)
 }
 
 int drop_caches_sysctl_handler(ctl_table *table, int write,
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
-	proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	proc_dointvec_minmax(table, write, buffer, length, ppos);
 	if (write) {
 		if (sysctl_drop_caches & 1)
 			drop_pagecache();
diff --git a/fs/file_table.c b/fs/file_table.c
index 334ce39881f8..8eb44042e009 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(get_max_files);
  * Handle nr_files sysctl
  */
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
+int proc_nr_files(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	files_stat.nr_files = get_nr_files();
-	return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	return proc_dointvec(table, write, buffer, lenp, ppos);
 }
 #else
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
+int proc_nr_files(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 9b1e4e9a16bf..f667e8aeabdf 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 
 	/* careful: calling conventions are nasty here */
 	res = count;
-	error = table->proc_handler(table, write, filp, buf, &res, ppos);
+	error = table->proc_handler(table, write, buf, &res, ppos);
 	if (!error)
 		error = res;
 out:
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 916c0ffb6083..c5bc67c4e3bb 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -26,7 +26,6 @@ STATIC int
 xfs_stats_clear_proc_handler(
 	ctl_table	*ctl,
 	int		write,
-	struct file	*filp,
 	void		__user *buffer,
 	size_t		*lenp,
 	loff_t		*ppos)
@@ -34,7 +33,7 @@ xfs_stats_clear_proc_handler(
 	int		c, ret, *valp = ctl->data;
 	__uint32_t	vn_active;
 
-	ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
 
 	if (!ret && write && *valp) {
 		printk("XFS Clearing xfsstats\n");
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51803528b095..33ed6644abd0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2467,7 +2467,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 			  size_t len, loff_t *ppos);
 
 struct ctl_table;
-int proc_nr_files(struct ctl_table *table, int write, struct file *filp,
+int proc_nr_files(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
 
 int __init get_filesystem_list(char *buf);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 3c0924a18daf..cd3d2abaf30a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -19,7 +19,7 @@
 extern int ftrace_enabled;
 extern int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
-		     struct file *filp, void __user *buffer, size_t *lenp,
+		     void __user *buffer, size_t *lenp,
 		     loff_t *ppos);
 
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
@@ -94,7 +94,7 @@ static inline void ftrace_start(void) { }
 extern int stack_tracer_enabled;
 int
 stack_trace_sysctl(struct ctl_table *table, int write,
-		   struct file *file, void __user *buffer, size_t *lenp,
+		   void __user *buffer, size_t *lenp,
 		   loff_t *ppos);
 #endif
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 176e7ee73eff..11ab19ac6b3d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -20,9 +20,9 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 }
 
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
-int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
-int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
-int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
+int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
 int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
 			struct page **, struct vm_area_struct **,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b6eae5e3144b..87218ae84e36 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1279,7 +1279,7 @@ int in_gate_area_no_task(unsigned long addr);
 #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
 #endif	/* __HAVE_ARCH_GATE_AREA */
 
-int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
+int drop_caches_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 			unsigned long lru_pages);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 652ef01be582..6f7561730d88 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -755,21 +755,20 @@ static inline int is_dma(struct zone *zone)
 
 /* These two functions are used to setup the per zone pages min values */
 struct ctl_table;
-struct file;
-int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, 
+int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
+int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
-int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
+int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
-			struct file *, void __user *, size_t *, loff_t *);
+			void __user *, size_t *, loff_t *);
 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
-			struct file *, void __user *, size_t *, loff_t *);
+			void __user *, size_t *, loff_t *);
 
 extern int numa_zonelist_order_handler(struct ctl_table *, int,
-			struct file *, void __user *, size_t *, loff_t *);
+			void __user *, size_t *, loff_t *);
 extern char numa_zonelist_order[];
 #define NUMA_ZONELIST_ORDER_LEN 16	/* string buffer size */
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e951bd2eb9fc..811cd96524d7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -309,7 +309,7 @@ extern void softlockup_tick(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-				    struct file *filp, void __user *buffer,
+				    void __user *buffer,
 				    size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
 extern int softlockup_thresh;
@@ -331,7 +331,7 @@ extern unsigned long sysctl_hung_task_check_count;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_warnings;
 extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-					 struct file *filp, void __user *buffer,
+					 void __user *buffer,
 					 size_t *lenp, loff_t *ppos);
 #endif
 
@@ -1906,7 +1906,7 @@ extern unsigned int sysctl_sched_time_avg;
 extern unsigned int sysctl_timer_migration;
 
 int sched_nr_latency_handler(struct ctl_table *table, int write,
-		struct file *file, void __user *buffer, size_t *length,
+		void __user *buffer, size_t *length,
 		loff_t *ppos);
 #endif
 #ifdef CONFIG_SCHED_DEBUG
@@ -1924,7 +1924,7 @@ extern unsigned int sysctl_sched_rt_period;
 extern int sysctl_sched_rt_runtime;
 
 int sched_rt_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 
 extern unsigned int sysctl_sched_compat_yield;
diff --git a/include/linux/security.h b/include/linux/security.h
index d050b66ab9ef..239e40d0450b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -133,7 +133,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
 		return PAGE_ALIGN(mmap_min_addr);
 	return hint;
 }
-extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+extern int mmap_min_addr_handler(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp, loff_t *ppos);
 
 #ifdef CONFIG_SECURITY
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4c78fea989b9..82232dbea3f7 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -245,7 +245,7 @@ extern int page_evictable(struct page *page, struct vm_area_struct *vma);
 extern void scan_mapping_unevictable_pages(struct address_space *);
 
 extern unsigned long scan_unevictable_pages;
-extern int scan_unevictable_handler(struct ctl_table *, int, struct file *,
+extern int scan_unevictable_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 extern int scan_unevictable_register_node(struct node *node);
 extern void scan_unevictable_unregister_node(struct node *node);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index e76d3b22a466..1e4743ee6831 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -29,7 +29,6 @@
 #include <linux/types.h>
 #include <linux/compiler.h>
 
-struct file;
 struct completion;
 
 #define CTL_MAXNAME 10		/* how many path components do we allow in a
@@ -977,25 +976,25 @@ typedef int ctl_handler (struct ctl_table *table,
 			 void __user *oldval, size_t __user *oldlenp,
 			 void __user *newval, size_t newlen);
 
-typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp,
+typedef int proc_handler (struct ctl_table *ctl, int write,
 			  void __user *buffer, size_t *lenp, loff_t *ppos);
 
-extern int proc_dostring(struct ctl_table *, int, struct file *,
+extern int proc_dostring(struct ctl_table *, int,
 			 void __user *, size_t *, loff_t *);
-extern int proc_dointvec(struct ctl_table *, int, struct file *,
+extern int proc_dointvec(struct ctl_table *, int,
 			 void __user *, size_t *, loff_t *);
-extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_minmax(struct ctl_table *, int,
 				void __user *, size_t *, loff_t *);
-extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_jiffies(struct ctl_table *, int,
 				 void __user *, size_t *, loff_t *);
-extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
-extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_ms_jiffies(struct ctl_table *, int,
 				    void __user *, size_t *, loff_t *);
-extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *,
+extern int proc_doulongvec_minmax(struct ctl_table *, int,
 				  void __user *, size_t *, loff_t *);
 extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
-				      struct file *, void __user *, size_t *, loff_t *);
+				      void __user *, size_t *, loff_t *);
 
 extern int do_sysctl (int __user *name, int nlen,
 		      void __user *oldval, size_t __user *oldlenp,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 75cf58666ff9..66ebddcff664 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -110,21 +110,20 @@ extern int laptop_mode;
 extern unsigned long determine_dirtyable_memory(void);
 
 extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 extern int dirty_ratio_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 extern int dirty_bytes_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 
 struct ctl_table;
-struct file;
-int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
+int dirty_writeback_centisecs_handler(struct ctl_table *, int,
 				      void __user *, size_t *, loff_t *);
 
 void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
diff --git a/include/net/ip.h b/include/net/ip.h
index 72c36926c26d..5b26a0bd178e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -399,7 +399,7 @@ extern void	ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
  * fed into the routing cache should use these handlers.
  */
 int ipv4_doint_and_flush(ctl_table *ctl, int write,
-			 struct file* filp, void __user *buffer,
+			 void __user *buffer,
 			 size_t *lenp, loff_t *ppos);
 int ipv4_doint_and_flush_strategy(ctl_table *table,
 				  void __user *oldval, size_t __user *oldlenp,
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 1459ed3e2697..f76f22d05721 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -55,7 +55,6 @@ enum {
 #include <net/neighbour.h>
 
 struct ctl_table;
-struct file;
 struct inet6_dev;
 struct net_device;
 struct net_proto_family;
@@ -139,7 +138,6 @@ extern int			igmp6_event_report(struct sk_buff *skb);
 #ifdef CONFIG_SYSCTL
 extern int 			ndisc_ifinfo_sysctl_change(struct ctl_table *ctl,
 							   int write,
-							   struct file * filp,
 							   void __user *buffer,
 							   size_t *lenp,
 							   loff_t *ppos);
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 40eab7314aeb..7d3704750efc 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -27,18 +27,18 @@ static void *get_ipc(ctl_table *table)
 }
 
 #ifdef CONFIG_PROC_SYSCTL
-static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
+static int proc_ipc_dointvec(ctl_table *table, int write,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
 	memcpy(&ipc_table, table, sizeof(ipc_table));
 	ipc_table.data = get_ipc(table);
 
-	return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
+	return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
 }
 
 static int proc_ipc_callback_dointvec(ctl_table *table, int write,
-	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
 	size_t lenp_bef = *lenp;
@@ -47,7 +47,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
 	memcpy(&ipc_table, table, sizeof(ipc_table));
 	ipc_table.data = get_ipc(table);
 
-	rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
+	rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
 
 	if (write && !rc && lenp_bef == *lenp)
 		/*
@@ -61,13 +61,13 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
 }
 
 static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
-	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
 	memcpy(&ipc_table, table, sizeof(ipc_table));
 	ipc_table.data = get_ipc(table);
 
-	return proc_doulongvec_minmax(&ipc_table, write, filp, buffer,
+	return proc_doulongvec_minmax(&ipc_table, write, buffer,
 					lenp, ppos);
 }
 
@@ -95,7 +95,7 @@ static void ipc_auto_callback(int val)
 }
 
 static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
-	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
 	size_t lenp_bef = *lenp;
@@ -106,7 +106,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
 	ipc_table.data = get_ipc(table);
 	oldval = *((int *)(ipc_table.data));
 
-	rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos);
+	rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
 
 	if (write && !rc && lenp_bef == *lenp) {
 		int newval = *((int *)(ipc_table.data));
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 24ae46dfe45d..8a058711fc10 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -31,24 +31,24 @@ static void *get_mq(ctl_table *table)
 	return which;
 }
 
-static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
+static int proc_mq_dointvec(ctl_table *table, int write,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table mq_table;
 	memcpy(&mq_table, table, sizeof(mq_table));
 	mq_table.data = get_mq(table);
 
-	return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos);
+	return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
 }
 
 static int proc_mq_dointvec_minmax(ctl_table *table, int write,
-	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table mq_table;
 	memcpy(&mq_table, table, sizeof(mq_table));
 	mq_table.data = get_mq(table);
 
-	return proc_dointvec_minmax(&mq_table, write, filp, buffer,
+	return proc_dointvec_minmax(&mq_table, write, buffer,
 					lenp, ppos);
 }
 #else
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 022a4927b785..d4e841747400 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -171,12 +171,12 @@ static unsigned long timeout_jiffies(unsigned long timeout)
  * Process updating of timeout sysctl
  */
 int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-				  struct file *filp, void __user *buffer,
+				  void __user *buffer,
 				  size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
-	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 
 	if (ret || !write)
 		goto out;
diff --git a/kernel/sched.c b/kernel/sched.c
index 0d0361b9dbb3..ee61f454a98b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -10312,7 +10312,7 @@ static int sched_rt_global_constraints(void)
 #endif /* CONFIG_RT_GROUP_SCHED */
 
 int sched_rt_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
 	int ret;
@@ -10323,7 +10323,7 @@ int sched_rt_handler(struct ctl_table *table, int write,
 	old_period = sysctl_sched_rt_period;
 	old_runtime = sysctl_sched_rt_runtime;
 
-	ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
 
 	if (!ret && write) {
 		ret = sched_rt_global_constraints();
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ecc637a0d591..4e777b47eeda 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 
 #ifdef CONFIG_SCHED_DEBUG
 int sched_nr_latency_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
-	int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 
 	if (ret || !write)
 		return ret;
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index 09d7519557d3..0d31135efbf4 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -26,10 +26,10 @@ static void slow_work_cull_timeout(unsigned long);
 static void slow_work_oom_timeout(unsigned long);
 
 #ifdef CONFIG_SYSCTL
-static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *,
+static int slow_work_min_threads_sysctl(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 
-static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *,
+static int slow_work_max_threads_sysctl(struct ctl_table *, int ,
 					void __user *, size_t *, loff_t *);
 #endif
 
@@ -493,10 +493,10 @@ static void slow_work_oom_timeout(unsigned long data)
  * Handle adjustment of the minimum number of threads
  */
 static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
-					struct file *filp, void __user *buffer,
+					void __user *buffer,
 					size_t *lenp, loff_t *ppos)
 {
-	int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	int n;
 
 	if (ret == 0) {
@@ -521,10 +521,10 @@ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
  * Handle adjustment of the maximum number of threads
  */
 static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
-					struct file *filp, void __user *buffer,
+					void __user *buffer,
 					size_t *lenp, loff_t *ppos)
 {
-	int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	int n;
 
 	if (ret == 0) {
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 88796c330838..81324d12eb35 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -90,11 +90,11 @@ void touch_all_softlockup_watchdogs(void)
 EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
 
 int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-			     struct file *filp, void __user *buffer,
+			     void __user *buffer,
 			     size_t *lenp, loff_t *ppos)
 {
 	touch_all_softlockup_watchdogs();
-	return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 37abb8c3995b..a02697b7cb97 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -163,9 +163,9 @@ extern int max_lock_depth;
 #endif
 
 #ifdef CONFIG_PROC_SYSCTL
-static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
+static int proc_do_cad_pid(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
-static int proc_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write,
 			       void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
@@ -2226,7 +2226,7 @@ void sysctl_head_put(struct ctl_table_header *head)
 #ifdef CONFIG_PROC_SYSCTL
 
 static int _proc_do_string(void* data, int maxlen, int write,
-			   struct file *filp, void __user *buffer,
+			   void __user *buffer,
 			   size_t *lenp, loff_t *ppos)
 {
 	size_t len;
@@ -2287,7 +2287,6 @@ static int _proc_do_string(void* data, int maxlen, int write,
  * proc_dostring - read a string sysctl
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2301,10 +2300,10 @@ static int _proc_do_string(void* data, int maxlen, int write,
  *
  * Returns 0 on success.
  */
-int proc_dostring(struct ctl_table *table, int write, struct file *filp,
+int proc_dostring(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	return _proc_do_string(table->data, table->maxlen, write, filp,
+	return _proc_do_string(table->data, table->maxlen, write,
 			       buffer, lenp, ppos);
 }
 
@@ -2329,7 +2328,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
 }
 
 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
-		  int write, struct file *filp, void __user *buffer,
+		  int write, void __user *buffer,
 		  size_t *lenp, loff_t *ppos,
 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
 			      int write, void *data),
@@ -2436,13 +2435,13 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
 #undef TMPBUFLEN
 }
 
-static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+static int do_proc_dointvec(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos,
 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
 			      int write, void *data),
 		  void *data)
 {
-	return __do_proc_dointvec(table->data, table, write, filp,
+	return __do_proc_dointvec(table->data, table, write,
 			buffer, lenp, ppos, conv, data);
 }
 
@@ -2450,7 +2449,6 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil
  * proc_dointvec - read a vector of integers
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2460,10 +2458,10 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil
  *
  * Returns 0 on success.
  */
-int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+    return do_proc_dointvec(table,write,buffer,lenp,ppos,
 		    	    NULL,NULL);
 }
 
@@ -2471,7 +2469,7 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
  * Taint values can only be increased
  * This means we can safely use a temporary.
  */
-static int proc_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write,
 			       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table t;
@@ -2483,7 +2481,7 @@ static int proc_taint(struct ctl_table *table, int write, struct file *filp,
 
 	t = *table;
 	t.data = &tmptaint;
-	err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos);
+	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
 	if (err < 0)
 		return err;
 
@@ -2535,7 +2533,6 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
  * proc_dointvec_minmax - read a vector of integers with min/max values
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2548,19 +2545,18 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
  *
  * Returns 0 on success.
  */
-int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_minmax(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct do_proc_dointvec_minmax_conv_param param = {
 		.min = (int *) table->extra1,
 		.max = (int *) table->extra2,
 	};
-	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
+	return do_proc_dointvec(table, write, buffer, lenp, ppos,
 				do_proc_dointvec_minmax_conv, &param);
 }
 
 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
-				     struct file *filp,
 				     void __user *buffer,
 				     size_t *lenp, loff_t *ppos,
 				     unsigned long convmul,
@@ -2665,21 +2661,19 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
 }
 
 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
-				     struct file *filp,
 				     void __user *buffer,
 				     size_t *lenp, loff_t *ppos,
 				     unsigned long convmul,
 				     unsigned long convdiv)
 {
 	return __do_proc_doulongvec_minmax(table->data, table, write,
-			filp, buffer, lenp, ppos, convmul, convdiv);
+			buffer, lenp, ppos, convmul, convdiv);
 }
 
 /**
  * proc_doulongvec_minmax - read a vector of long integers with min/max values
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2692,17 +2686,16 @@ static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
  *
  * Returns 0 on success.
  */
-int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
+    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
 }
 
 /**
  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2717,11 +2710,10 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp
  * Returns 0 on success.
  */
 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
-				      struct file *filp,
 				      void __user *buffer,
 				      size_t *lenp, loff_t *ppos)
 {
-    return do_proc_doulongvec_minmax(table, write, filp, buffer,
+    return do_proc_doulongvec_minmax(table, write, buffer,
 				     lenp, ppos, HZ, 1000l);
 }
 
@@ -2797,7 +2789,6 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
  * proc_dointvec_jiffies - read a vector of integers as seconds
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2809,10 +2800,10 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
  *
  * Returns 0 on success.
  */
-int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
 			  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+    return do_proc_dointvec(table,write,buffer,lenp,ppos,
 		    	    do_proc_dointvec_jiffies_conv,NULL);
 }
 
@@ -2820,7 +2811,6 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: pointer to the file position
@@ -2832,10 +2822,10 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
  *
  * Returns 0 on success.
  */
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+    return do_proc_dointvec(table,write,buffer,lenp,ppos,
 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
 }
 
@@ -2843,7 +2833,6 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file
  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2856,14 +2845,14 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file
  *
  * Returns 0 on success.
  */
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
 			     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
+	return do_proc_dointvec(table, write, buffer, lenp, ppos,
 				do_proc_dointvec_ms_jiffies_conv, NULL);
 }
 
-static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
+static int proc_do_cad_pid(struct ctl_table *table, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct pid *new_pid;
@@ -2872,7 +2861,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
 
 	tmp = pid_vnr(cad_pid);
 
-	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
+	r = __do_proc_dointvec(&tmp, table, write, buffer,
 			       lenp, ppos, NULL, NULL);
 	if (r || !write)
 		return r;
@@ -2887,50 +2876,49 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
 
 #else /* CONFIG_PROC_FS */
 
-int proc_dostring(struct ctl_table *table, int write, struct file *filp,
+int proc_dostring(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_minmax(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
 			     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
-				      struct file *filp,
 				      void __user *buffer,
 				      size_t *lenp, loff_t *ppos)
 {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 23df7771c937..a142579765bf 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3015,7 +3015,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
 
 int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
-		     struct file *file, void __user *buffer, size_t *lenp,
+		     void __user *buffer, size_t *lenp,
 		     loff_t *ppos)
 {
 	int ret;
@@ -3025,7 +3025,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 	mutex_lock(&ftrace_lock);
 
-	ret  = proc_dointvec(table, write, file, buffer, lenp, ppos);
+	ret  = proc_dointvec(table, write, buffer, lenp, ppos);
 
 	if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
 		goto out;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 0f6facb050a1..8504ac71e4e8 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -296,14 +296,14 @@ static const struct file_operations stack_trace_fops = {
 
 int
 stack_trace_sysctl(struct ctl_table *table, int write,
-		   struct file *file, void __user *buffer, size_t *lenp,
+		   void __user *buffer, size_t *lenp,
 		   loff_t *ppos)
 {
 	int ret;
 
 	mutex_lock(&stack_sysctl_mutex);
 
-	ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
 
 	if (ret || !write ||
 	    (last_stack_tracer_enabled == !!stack_tracer_enabled))
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 92359cc747a7..69eae358a726 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -42,14 +42,14 @@ static void put_uts(ctl_table *table, int write, void *which)
  *	Special case of dostring for the UTS structure. This has locks
  *	to observe. Should this be in kernel/sys.c ????
  */
-static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+static int proc_do_uts_string(ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table uts_table;
 	int r;
 	memcpy(&uts_table, table, sizeof(uts_table));
 	uts_table.data = get_uts(table, write);
-	r = proc_dostring(&uts_table,write,filp,buffer,lenp, ppos);
+	r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
 	put_uts(table, write, uts_table.data);
 	return r;
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 815dbd4a6dcb..6f048fcc749c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1537,7 +1537,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
 
 #ifdef CONFIG_SYSCTL
 int hugetlb_sysctl_handler(struct ctl_table *table, int write,
-			   struct file *file, void __user *buffer,
+			   void __user *buffer,
 			   size_t *length, loff_t *ppos)
 {
 	struct hstate *h = &default_hstate;
@@ -1548,7 +1548,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 
 	table->data = &tmp;
 	table->maxlen = sizeof(unsigned long);
-	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+	proc_doulongvec_minmax(table, write, buffer, length, ppos);
 
 	if (write)
 		h->max_huge_pages = set_max_huge_pages(h, tmp);
@@ -1557,10 +1557,10 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 }
 
 int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
-			struct file *file, void __user *buffer,
+			void __user *buffer,
 			size_t *length, loff_t *ppos)
 {
-	proc_dointvec(table, write, file, buffer, length, ppos);
+	proc_dointvec(table, write, buffer, length, ppos);
 	if (hugepages_treat_as_movable)
 		htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
 	else
@@ -1569,7 +1569,7 @@ int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
 }
 
 int hugetlb_overcommit_handler(struct ctl_table *table, int write,
-			struct file *file, void __user *buffer,
+			void __user *buffer,
 			size_t *length, loff_t *ppos)
 {
 	struct hstate *h = &default_hstate;
@@ -1580,7 +1580,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 
 	table->data = &tmp;
 	table->maxlen = sizeof(unsigned long);
-	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+	proc_doulongvec_minmax(table, write, buffer, length, ppos);
 
 	if (write) {
 		spin_lock(&hugetlb_lock);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5f378dd58802..be197f71b096 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -155,37 +155,37 @@ static void update_completion_period(void)
 }
 
 int dirty_background_ratio_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
 	int ret;
 
-	ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (ret == 0 && write)
 		dirty_background_bytes = 0;
 	return ret;
 }
 
 int dirty_background_bytes_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
 	int ret;
 
-	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 	if (ret == 0 && write)
 		dirty_background_ratio = 0;
 	return ret;
 }
 
 int dirty_ratio_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
 	int old_ratio = vm_dirty_ratio;
 	int ret;
 
-	ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
 		update_completion_period();
 		vm_dirty_bytes = 0;
@@ -195,13 +195,13 @@ int dirty_ratio_handler(struct ctl_table *table, int write,
 
 
 int dirty_bytes_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
 	unsigned long old_bytes = vm_dirty_bytes;
 	int ret;
 
-	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 	if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
 		update_completion_period();
 		vm_dirty_ratio = 0;
@@ -686,9 +686,9 @@ static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
  * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
  */
 int dirty_writeback_centisecs_handler(ctl_table *table, int write,
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
-	proc_dointvec(table, write, file, buffer, length, ppos);
+	proc_dointvec(table, write, buffer, length, ppos);
 	return 0;
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5717f27a0704..88248b3c20bb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2373,7 +2373,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order);
  * sysctl handler for numa_zonelist_order
  */
 int numa_zonelist_order_handler(ctl_table *table, int write,
-		struct file *file, void __user *buffer, size_t *length,
+		void __user *buffer, size_t *length,
 		loff_t *ppos)
 {
 	char saved_string[NUMA_ZONELIST_ORDER_LEN];
@@ -2382,7 +2382,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
 	if (write)
 		strncpy(saved_string, (char*)table->data,
 			NUMA_ZONELIST_ORDER_LEN);
-	ret = proc_dostring(table, write, file, buffer, length, ppos);
+	ret = proc_dostring(table, write, buffer, length, ppos);
 	if (ret)
 		return ret;
 	if (write) {
@@ -4706,9 +4706,9 @@ module_init(init_per_zone_wmark_min)
  *	changes.
  */
 int min_free_kbytes_sysctl_handler(ctl_table *table, int write, 
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
-	proc_dointvec(table, write, file, buffer, length, ppos);
+	proc_dointvec(table, write, buffer, length, ppos);
 	if (write)
 		setup_per_zone_wmarks();
 	return 0;
@@ -4716,12 +4716,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
 
 #ifdef CONFIG_NUMA
 int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
 	struct zone *zone;
 	int rc;
 
-	rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
 	if (rc)
 		return rc;
 
@@ -4732,12 +4732,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
 }
 
 int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
 	struct zone *zone;
 	int rc;
 
-	rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
 	if (rc)
 		return rc;
 
@@ -4758,9 +4758,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
  * if in function of the boot time zone sizes.
  */
 int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
-	proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	proc_dointvec_minmax(table, write, buffer, length, ppos);
 	setup_per_zone_lowmem_reserve();
 	return 0;
 }
@@ -4772,13 +4772,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
  */
 
 int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
 	struct zone *zone;
 	unsigned int cpu;
 	int ret;
 
-	ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
 	if (!write || (ret == -EINVAL))
 		return ret;
 	for_each_populated_zone(zone) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2423782214ab..f444b7409085 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2844,10 +2844,10 @@ static void scan_all_zones_unevictable_pages(void)
 unsigned long scan_unevictable_pages;
 
 int scan_unevictable_handler(struct ctl_table *table, int write,
-			   struct file *file, void __user *buffer,
+			   void __user *buffer,
 			   size_t *length, loff_t *ppos)
 {
-	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+	proc_doulongvec_minmax(table, write, buffer, length, ppos);
 
 	if (write && *(unsigned long *)table->data)
 		scan_all_zones_unevictable_pages();
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 907a82e9023d..a16a2342f6bf 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -965,12 +965,12 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 
 #ifdef CONFIG_SYSCTL
 static
-int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp,
+int brnf_sysctl_call_tables(ctl_table * ctl, int write,
 			    void __user * buffer, size_t * lenp, loff_t * ppos)
 {
 	int ret;
 
-	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write && *(int *)(ctl->data))
 		*(int *)(ctl->data) = 1;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 1c6a5bb6f0c8..6e1f085db06a 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -164,7 +164,7 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU
 static int min_priority[1];
 static int max_priority[] = { 127 }; /* From DECnet spec */
 
-static int dn_forwarding_proc(ctl_table *, int, struct file *,
+static int dn_forwarding_proc(ctl_table *, int,
 			void __user *, size_t *, loff_t *);
 static int dn_forwarding_sysctl(ctl_table *table,
 			void __user *oldval, size_t __user *oldlenp,
@@ -274,7 +274,6 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
 }
 
 static int dn_forwarding_proc(ctl_table *table, int write,
-				struct file *filep,
 				void __user *buffer,
 				size_t *lenp, loff_t *ppos)
 {
@@ -290,7 +289,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
 	dn_db = dev->dn_ptr;
 	old = dn_db->parms.forwarding;
 
-	err = proc_dointvec(table, write, filep, buffer, lenp, ppos);
+	err = proc_dointvec(table, write, buffer, lenp, ppos);
 
 	if ((err >= 0) && write) {
 		if (dn_db->parms.forwarding < 0)
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 5bcd592ae6dd..26b0ab1e9f56 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -165,7 +165,6 @@ static int dn_node_address_strategy(ctl_table *table,
 }
 
 static int dn_node_address_handler(ctl_table *table, int write,
-				struct file *filp,
 				void __user *buffer,
 				size_t *lenp, loff_t *ppos)
 {
@@ -276,7 +275,6 @@ static int dn_def_dev_strategy(ctl_table *table,
 
 
 static int dn_def_dev_handler(ctl_table *table, int write,
-				struct file * filp,
 				void __user *buffer,
 				size_t *lenp, loff_t *ppos)
 {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 07336c6201f0..e92f1fd28aa5 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1270,10 +1270,10 @@ static void inet_forward_change(struct net *net)
 }
 
 static int devinet_conf_proc(ctl_table *ctl, int write,
-			     struct file *filp, void __user *buffer,
+			     void __user *buffer,
 			     size_t *lenp, loff_t *ppos)
 {
-	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write) {
 		struct ipv4_devconf *cnf = ctl->extra1;
@@ -1342,12 +1342,12 @@ static int devinet_conf_sysctl(ctl_table *table,
 }
 
 static int devinet_sysctl_forward(ctl_table *ctl, int write,
-				  struct file *filp, void __user *buffer,
+				  void __user *buffer,
 				  size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
-	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write && *valp != val) {
 		struct net *net = ctl->extra2;
@@ -1372,12 +1372,12 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 }
 
 int ipv4_doint_and_flush(ctl_table *ctl, int write,
-			 struct file *filp, void __user *buffer,
+			 void __user *buffer,
 			 size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
-	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 	struct net *net = ctl->extra2;
 
 	if (write && *valp != val)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df9347314538..bb4199252026 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3036,7 +3036,7 @@ void ip_rt_multicast_event(struct in_device *in_dev)
 
 #ifdef CONFIG_SYSCTL
 static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
-					struct file *filp, void __user *buffer,
+					void __user *buffer,
 					size_t *lenp, loff_t *ppos)
 {
 	if (write) {
@@ -3046,7 +3046,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 
 		memcpy(&ctl, __ctl, sizeof(ctl));
 		ctl.data = &flush_delay;
-		proc_dointvec(&ctl, write, filp, buffer, lenp, ppos);
+		proc_dointvec(&ctl, write, buffer, lenp, ppos);
 
 		net = (struct net *)__ctl->extra1;
 		rt_cache_flush(net, flush_delay);
@@ -3106,12 +3106,11 @@ static void rt_secret_reschedule(int old)
 }
 
 static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
-					  struct file *filp,
 					  void __user *buffer, size_t *lenp,
 					  loff_t *ppos)
 {
 	int old = ip_rt_secret_interval;
-	int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
 
 	rt_secret_reschedule(old);
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4710d219f06a..2dcf04d9b005 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -36,7 +36,7 @@ static void set_local_port_range(int range[2])
 }
 
 /* Validate changes from /proc interface. */
-static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
+static int ipv4_local_port_range(ctl_table *table, int write,
 				 void __user *buffer,
 				 size_t *lenp, loff_t *ppos)
 {
@@ -51,7 +51,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
 	};
 
 	inet_get_local_port_range(range, range + 1);
-	ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 
 	if (write && ret == 0) {
 		if (range[1] < range[0])
@@ -91,7 +91,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table,
 }
 
 
-static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp,
+static int proc_tcp_congestion_control(ctl_table *ctl, int write,
 				       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char val[TCP_CA_NAME_MAX];
@@ -103,7 +103,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
 
 	tcp_get_default_congestion_control(val);
 
-	ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 	if (write && ret == 0)
 		ret = tcp_set_default_congestion_control(val);
 	return ret;
@@ -129,7 +129,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table,
 }
 
 static int proc_tcp_available_congestion_control(ctl_table *ctl,
-						 int write, struct file * filp,
+						 int write,
 						 void __user *buffer, size_t *lenp,
 						 loff_t *ppos)
 {
@@ -140,13 +140,13 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl,
 	if (!tbl.data)
 		return -ENOMEM;
 	tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
-	ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 	kfree(tbl.data);
 	return ret;
 }
 
 static int proc_allowed_congestion_control(ctl_table *ctl,
-					   int write, struct file * filp,
+					   int write,
 					   void __user *buffer, size_t *lenp,
 					   loff_t *ppos)
 {
@@ -158,7 +158,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
 		return -ENOMEM;
 
 	tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
-	ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 	if (write && ret == 0)
 		ret = tcp_set_allowed_congestion_control(tbl.data);
 	kfree(tbl.data);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 55f486d89c88..1fd0a3d775d2 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3986,14 +3986,14 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 #ifdef CONFIG_SYSCTL
 
 static
-int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+int addrconf_sysctl_forward(ctl_table *ctl, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
 	int ret;
 
-	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write)
 		ret = addrconf_fixup_forwarding(ctl, valp, val);
@@ -4090,14 +4090,14 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
 }
 
 static
-int addrconf_sysctl_disable(ctl_table *ctl, int write, struct file * filp,
+int addrconf_sysctl_disable(ctl_table *ctl, int write,
 			    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
 	int ret;
 
-	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write)
 		ret = addrconf_disable_ipv6(ctl, valp, val);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 7015478797f6..498b9b0b0fad 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1735,7 +1735,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
 	}
 }
 
-int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net_device *dev = ctl->extra1;
 	struct inet6_dev *idev;
@@ -1746,16 +1746,16 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
 		ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
 
 	if (strcmp(ctl->procname, "retrans_time") == 0)
-		ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+		ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	else if (strcmp(ctl->procname, "base_reachable_time") == 0)
 		ret = proc_dointvec_jiffies(ctl, write,
-					    filp, buffer, lenp, ppos);
+					    buffer, lenp, ppos);
 
 	else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
 		 (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
 		ret = proc_dointvec_ms_jiffies(ctl, write,
-					       filp, buffer, lenp, ppos);
+					       buffer, lenp, ppos);
 	else
 		ret = -1;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 77aecbe8ff6c..d6fe7646a8ff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2524,13 +2524,13 @@ static const struct file_operations rt6_stats_seq_fops = {
 #ifdef CONFIG_SYSCTL
 
 static
-int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
+int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
 			      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net = current->nsproxy->net_ns;
 	int delay = net->ipv6.sysctl.flush_delay;
 	if (write) {
-		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+		proc_dointvec(ctl, write, buffer, lenp, ppos);
 		fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
 		return 0;
 	} else
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
index 57f8817c3979..5c86567e5a78 100644
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -73,12 +73,12 @@ static int min_lap_keepalive_time = 100;	/* 100us */
 /* For other sysctl, I've no idea of the range. Maybe Dag could help
  * us on that - Jean II */
 
-static int do_devname(ctl_table *table, int write, struct file *filp,
+static int do_devname(ctl_table *table, int write,
 		      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
-	ret = proc_dostring(table, write, filp, buffer, lenp, ppos);
+	ret = proc_dostring(table, write, buffer, lenp, ppos);
 	if (ret == 0 && write) {
 		struct ias_value *val;
 
@@ -90,12 +90,12 @@ static int do_devname(ctl_table *table, int write, struct file *filp,
 }
 
 
-static int do_discovery(ctl_table *table, int write, struct file *filp,
+static int do_discovery(ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int ret;
 
-       ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
        if (ret)
 	       return ret;
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index fba2892b99e1..446e9bd4b4bc 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1496,14 +1496,14 @@ static int ip_vs_zero_all(void)
 
 
 static int
-proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
+proc_do_defense_mode(ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = table->data;
 	int val = *valp;
 	int rc;
 
-	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	rc = proc_dointvec(table, write, buffer, lenp, ppos);
 	if (write && (*valp != val)) {
 		if ((*valp < 0) || (*valp > 3)) {
 			/* Restore the correct value */
@@ -1517,7 +1517,7 @@ proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
 
 
 static int
-proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
+proc_do_sync_threshold(ctl_table *table, int write,
 		       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = table->data;
@@ -1527,7 +1527,7 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
 	/* backup the value first */
 	memcpy(val, valp, sizeof(val));
 
-	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	rc = proc_dointvec(table, write, buffer, lenp, ppos);
 	if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
 		/* Restore the correct value */
 		memcpy(valp, val, sizeof(val));
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 4e620305f28c..c93494fef8ef 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -226,7 +226,7 @@ static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
 static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
 static struct ctl_table_header *nf_log_dir_header;
 
-static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
+static int nf_log_proc_dostring(ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	const struct nf_logger *logger;
@@ -260,7 +260,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
 			table->data = "NONE";
 		else
 			table->data = logger->name;
-		r = proc_dostring(table, write, filp, buffer, lenp, ppos);
+		r = proc_dostring(table, write, buffer, lenp, ppos);
 		mutex_unlock(&nf_log_mutex);
 	}
 
diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c
index 7b5749ee2765..2220f3322326 100644
--- a/net/phonet/sysctl.c
+++ b/net/phonet/sysctl.c
@@ -56,7 +56,7 @@ void phonet_get_local_port_range(int *min, int *max)
 	} while (read_seqretry(&local_port_range_lock, seq));
 }
 
-static int proc_local_port_range(ctl_table *table, int write, struct file *filp,
+static int proc_local_port_range(ctl_table *table, int write,
 				void __user *buffer,
 				size_t *lenp, loff_t *ppos)
 {
@@ -70,7 +70,7 @@ static int proc_local_port_range(ctl_table *table, int write, struct file *filp,
 		.extra2 = &local_port_range_max,
 	};
 
-	ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 
 	if (write && ret == 0) {
 		if (range[1] < range[0])
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 5231f7aaac0e..42f9748ae093 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -56,7 +56,7 @@ rpc_unregister_sysctl(void)
 	}
 }
 
-static int proc_do_xprt(ctl_table *table, int write, struct file *file,
+static int proc_do_xprt(ctl_table *table, int write,
 			void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char tmpbuf[256];
@@ -71,7 +71,7 @@ static int proc_do_xprt(ctl_table *table, int write, struct file *file,
 }
 
 static int
-proc_dodebug(ctl_table *table, int write, struct file *file,
+proc_dodebug(ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char		tmpbuf[20], c, *s;
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 87101177825b..35fb68b9c8ec 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -80,7 +80,7 @@ struct kmem_cache *svc_rdma_ctxt_cachep;
  * current value.
  */
 static int read_reset_stat(ctl_table *table, int write,
-			   struct file *filp, void __user *buffer, size_t *lenp,
+			   void __user *buffer, size_t *lenp,
 			   loff_t *ppos)
 {
 	atomic_t *stat = (atomic_t *)table->data;
diff --git a/security/min_addr.c b/security/min_addr.c
index 14cc7b3b8d03..c844eed7915d 100644
--- a/security/min_addr.c
+++ b/security/min_addr.c
@@ -28,12 +28,12 @@ static void update_mmap_min_addr(void)
  * sysctl handler which just sets dac_mmap_min_addr = the new value and then
  * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly
  */
-int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+int mmap_min_addr_handler(struct ctl_table *table, int write,
 			  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
-	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 
 	update_mmap_min_addr();
 
-- 
cgit v1.2.3


From 9064a6787aa1d8ceaf5ba16fe1dfedb0755dc7eb Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 23 Sep 2009 15:57:23 -0700
Subject: linux/futex.h: place kernel types behind __KERNEL__

The forward decls for some kernel types are only needed by the code behind
__KERNEL__, so don't bleed these types to userspace.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/futex.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 34956c8fdebf..8ec17997d94f 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -4,11 +4,6 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 
-struct inode;
-struct mm_struct;
-struct task_struct;
-union ktime;
-
 /* Second argument to futex syscall */
 
 
@@ -129,6 +124,11 @@ struct robust_list_head {
 #define FUTEX_BITSET_MATCH_ANY	0xffffffff
 
 #ifdef __KERNEL__
+struct inode;
+struct mm_struct;
+struct task_struct;
+union ktime;
+
 long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
 	      u32 __user *uaddr2, u32 val2, u32 val3);
 
-- 
cgit v1.2.3


From 858f09930b32c11b40fd0c5c467982ba09b10894 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 23 Sep 2009 15:57:32 -0700
Subject: aio: ifdef fields in mm_struct

->ioctx_lock and ->ioctx_list are used only under CONFIG_AIO.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  5 ++---
 kernel/fork.c            | 11 +++++++++--
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0042090a4d70..6b7029ab9c8e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -259,11 +259,10 @@ struct mm_struct {
 	unsigned long flags; /* Must use atomic bitops to access the bits */
 
 	struct core_state *core_state; /* coredumping support */
-
-	/* aio bits */
+#ifdef CONFIG_AIO
 	spinlock_t		ioctx_lock;
 	struct hlist_head	ioctx_list;
-
+#endif
 #ifdef CONFIG_MM_OWNER
 	/*
 	 * "owner" points to a task that is regarded as the canonical
diff --git a/kernel/fork.c b/kernel/fork.c
index b51fd2ccb2f1..e49f181ba1ca 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -434,6 +434,14 @@ __setup("coredump_filter=", coredump_filter_setup);
 
 #include <linux/init_task.h>
 
+static void mm_init_aio(struct mm_struct *mm)
+{
+#ifdef CONFIG_AIO
+	spin_lock_init(&mm->ioctx_lock);
+	INIT_HLIST_HEAD(&mm->ioctx_list);
+#endif
+}
+
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
 	atomic_set(&mm->mm_users, 1);
@@ -447,10 +455,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 	set_mm_counter(mm, file_rss, 0);
 	set_mm_counter(mm, anon_rss, 0);
 	spin_lock_init(&mm->page_table_lock);
-	spin_lock_init(&mm->ioctx_lock);
-	INIT_HLIST_HEAD(&mm->ioctx_list);
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
+	mm_init_aio(mm);
 	mm_init_owner(mm, p);
 
 	if (likely(!mm_alloc_pgd(mm))) {
-- 
cgit v1.2.3


From 2fa4341074cd02fb39aa23410740764948755635 Mon Sep 17 00:00:00 2001
From: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Date: Wed, 23 Sep 2009 15:57:38 -0700
Subject: include/linux/unaligned/{l,b}e_byteshift.h: fix usage for compressed
 kernels

When unaligned accesses are required for uncompressing a kernel (such as
for LZO decompression on ARM in a patch that follows), including
<linux/kernel.h> causes issues as it brings in a lot of things that are
not available in the decompression environment.

linux/kernel.h brings at least:
extern int console_printk[];
extern const char hex_asc[];
which causes errors at link-time as they are not available when
compiling the pre-boot environement. There are also a few others:

  arch/arm/boot/compressed/misc.o: In function `valid_user_regs':
   arch/arm/include/asm/ptrace.h:158: undefined reference to `elf_hwcap'
  arch/arm/boot/compressed/misc.o: In function `console_silent':
   include/linux/kernel.h:292: undefined reference to `console_printk'
  arch/arm/boot/compressed/misc.o: In function `console_verbose':
   include/linux/kernel.h:297: undefined reference to `console_printk'
  arch/arm/boot/compressed/misc.o: In function `pack_hex_byte':
   include/linux/kernel.h:360: undefined reference to `hex_asc'
  arch/arm/boot/compressed/misc.o: In function `hweight_long':
   include/linux/bitops.h:45: undefined reference to `hweight32'
  arch/arm/boot/compressed/misc.o: In function `__cmpxchg_local_generic':
   include/asm-generic/cmpxchg-local.h:21: undefined reference to `wrong_size_cmpxchg'
   include/asm-generic/cmpxchg-local.h:42: undefined reference to `wrong_size_cmpxchg'
  arch/arm/boot/compressed/misc.o: In function `__xchg':
   arch/arm/include/asm/system.h:309: undefined reference to `__bad_xchg'

However, those files apparently use nothing from <linux/kernel.h>, all
they need is the declaration of types such as u32 or u64, so
<linux/types.h> should be enough

Signed-off-by: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Phillip Lougher <phillip@lougher.demon.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/unaligned/be_byteshift.h | 2 +-
 include/linux/unaligned/le_byteshift.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h
index 46dd12c5709e..9356b24223ac 100644
--- a/include/linux/unaligned/be_byteshift.h
+++ b/include/linux/unaligned/be_byteshift.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H
 #define _LINUX_UNALIGNED_BE_BYTESHIFT_H
 
-#include <linux/kernel.h>
+#include <linux/types.h>
 
 static inline u16 __get_unaligned_be16(const u8 *p)
 {
diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h
index 59777e951baf..be376fb79b64 100644
--- a/include/linux/unaligned/le_byteshift.h
+++ b/include/linux/unaligned/le_byteshift.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H
 #define _LINUX_UNALIGNED_LE_BYTESHIFT_H
 
-#include <linux/kernel.h>
+#include <linux/types.h>
 
 static inline u16 __get_unaligned_le16(const u8 *p)
 {
-- 
cgit v1.2.3


From 801460d0cf5c5288153b722565773059b0f44348 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 23 Sep 2009 15:57:41 -0700
Subject: task_struct cleanup: move binfmt field to mm_struct

Because the binfmt is not different between threads in the same process,
it can be moved from task_struct to mm_struct.  And binfmt moudle is
handled per mm_struct instead of task_struct.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 10 ++++++----
 include/linux/mm_types.h |  2 ++
 include/linux/sched.h    |  1 -
 kernel/exit.c            |  2 --
 kernel/fork.c            | 13 +++++++------
 5 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 6dc92c39dd94..d49be6bc1793 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1397,10 +1397,12 @@ out_ret:
 
 void set_binfmt(struct linux_binfmt *new)
 {
-	if (current->binfmt)
-		module_put(current->binfmt->module);
+	struct mm_struct *mm = current->mm;
+
+	if (mm->binfmt)
+		module_put(mm->binfmt->module);
 
-	current->binfmt = new;
+	mm->binfmt = new;
 	if (new)
 		__module_get(new->module);
 }
@@ -1770,7 +1772,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 
 	audit_core_dumps(signr);
 
-	binfmt = current->binfmt;
+	binfmt = mm->binfmt;
 	if (!binfmt || !binfmt->core_dump)
 		goto fail;
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6b7029ab9c8e..21d6aa45206a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -240,6 +240,8 @@ struct mm_struct {
 
 	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
+	struct linux_binfmt *binfmt;
+
 	cpumask_t cpu_vm_mask;
 
 	/* Architecture-specific MM context */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 811cd96524d7..8a16f6d11dcd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1271,7 +1271,6 @@ struct task_struct {
 	struct mm_struct *mm, *active_mm;
 
 /* task state */
-	struct linux_binfmt *binfmt;
 	int exit_state;
 	int exit_code, exit_signal;
 	int pdeath_signal;  /*  The signal sent when the parent dies  */
diff --git a/kernel/exit.c b/kernel/exit.c
index 6c75ff83a8fe..5859f598c951 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -976,8 +976,6 @@ NORET_TYPE void do_exit(long code)
 		disassociate_ctty(1);
 
 	module_put(task_thread_info(tsk)->exec_domain->module);
-	if (tsk->binfmt)
-		module_put(tsk->binfmt->module);
 
 	proc_exit_connector(tsk);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index e49f181ba1ca..266c6af6ef1b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -518,6 +518,8 @@ void mmput(struct mm_struct *mm)
 			spin_unlock(&mmlist_lock);
 		}
 		put_swap_token(mm);
+		if (mm->binfmt)
+			module_put(mm->binfmt->module);
 		mmdrop(mm);
 	}
 }
@@ -643,9 +645,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
 	mm->hiwater_rss = get_mm_rss(mm);
 	mm->hiwater_vm = mm->total_vm;
 
+	if (mm->binfmt && !try_module_get(mm->binfmt->module))
+		goto free_pt;
+
 	return mm;
 
 free_pt:
+	/* don't put binfmt in mmput, we haven't got module yet */
+	mm->binfmt = NULL;
 	mmput(mm);
 
 fail_nomem:
@@ -1037,9 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (!try_module_get(task_thread_info(p)->exec_domain->module))
 		goto bad_fork_cleanup_count;
 
-	if (p->binfmt && !try_module_get(p->binfmt->module))
-		goto bad_fork_cleanup_put_domain;
-
 	p->did_exec = 0;
 	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
 	copy_flags(clone_flags, p);
@@ -1327,9 +1331,6 @@ bad_fork_cleanup_cgroup:
 #endif
 	cgroup_exit(p, cgroup_callbacks_done);
 	delayacct_tsk_free(p);
-	if (p->binfmt)
-		module_put(p->binfmt->module);
-bad_fork_cleanup_put_domain:
 	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
 	atomic_dec(&p->cred->user->processes);
-- 
cgit v1.2.3


From 4a4962263f07d14660849ec134ee42b63e95ea9a Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Mon, 6 Jul 2009 14:50:42 +0100
Subject: module: reduce symbol table for loaded modules (v2)

Discard all symbols not interesting for kallsyms use: absolute,
section, and in the common case (!KALLSYMS_ALL) data ones.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 10 ++++--
 kernel/module.c        | 91 +++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 94 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index 1c755b2f937d..1d3ccb173fd6 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -308,9 +308,13 @@ struct module
 #endif
 
 #ifdef CONFIG_KALLSYMS
-	/* We keep the symbol and string tables for kallsyms. */
-	Elf_Sym *symtab;
-	unsigned int num_symtab;
+	/*
+	 * We keep the symbol and string tables for kallsyms.
+	 * The core_* fields below are temporary, loader-only (they
+	 * could really be discarded after module init).
+	 */
+	Elf_Sym *symtab, *core_symtab;
+	unsigned int num_symtab, core_num_syms;
 	char *strtab;
 
 	/* Section attributes */
diff --git a/kernel/module.c b/kernel/module.c
index e6bc4b28aa62..97f4d5e15535 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1862,13 +1862,68 @@ static char elf_type(const Elf_Sym *sym,
 	return '?';
 }
 
+static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
+                           unsigned int shnum)
+{
+	const Elf_Shdr *sec;
+
+	if (src->st_shndx == SHN_UNDEF
+	    || src->st_shndx >= shnum
+	    || !src->st_name)
+		return false;
+
+	sec = sechdrs + src->st_shndx;
+	if (!(sec->sh_flags & SHF_ALLOC)
+#ifndef CONFIG_KALLSYMS_ALL
+	    || !(sec->sh_flags & SHF_EXECINSTR)
+#endif
+	    || (sec->sh_entsize & INIT_OFFSET_MASK))
+		return false;
+
+	return true;
+}
+
+static unsigned long layout_symtab(struct module *mod,
+				   Elf_Shdr *sechdrs,
+				   unsigned int symindex,
+				   const Elf_Ehdr *hdr,
+				   const char *secstrings)
+{
+	unsigned long symoffs;
+	Elf_Shdr *symsect = sechdrs + symindex;
+	const Elf_Sym *src;
+	unsigned int i, nsrc, ndst;
+
+	/* Put symbol section at end of init part of module. */
+	symsect->sh_flags |= SHF_ALLOC;
+	symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
+					 symindex) | INIT_OFFSET_MASK;
+	DEBUGP("\t%s\n", secstrings + symsect->sh_name);
+
+	src = (void *)hdr + symsect->sh_offset;
+	nsrc = symsect->sh_size / sizeof(*src);
+	for (ndst = i = 1; i < nsrc; ++i, ++src)
+		if (is_core_symbol(src, sechdrs, hdr->e_shnum))
+			++ndst;
+
+	/* Append room for core symbols at end of core part. */
+	symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
+	mod->core_size = symoffs + ndst * sizeof(Elf_Sym);
+
+	return symoffs;
+}
+
 static void add_kallsyms(struct module *mod,
 			 Elf_Shdr *sechdrs,
+			 unsigned int shnum,
 			 unsigned int symindex,
 			 unsigned int strindex,
+			 unsigned long symoffs,
 			 const char *secstrings)
 {
-	unsigned int i;
+	unsigned int i, ndst;
+	const Elf_Sym *src;
+	Elf_Sym *dst;
 
 	mod->symtab = (void *)sechdrs[symindex].sh_addr;
 	mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
@@ -1878,12 +1933,32 @@ static void add_kallsyms(struct module *mod,
 	for (i = 0; i < mod->num_symtab; i++)
 		mod->symtab[i].st_info
 			= elf_type(&mod->symtab[i], sechdrs, secstrings, mod);
+
+	mod->core_symtab = dst = mod->module_core + symoffs;
+	src = mod->symtab;
+	*dst = *src;
+	for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
+		if (!is_core_symbol(src, sechdrs, shnum))
+			continue;
+		dst[ndst] = *src;
+		++ndst;
+	}
+	mod->core_num_syms = ndst;
 }
 #else
+static inline unsigned long layout_symtab(struct module *mod,
+					  Elf_Shdr *sechdrs,
+					  unsigned int symindex,
+					  const Elf_Hdr *hdr,
+					  const char *secstrings)
+{
+}
 static inline void add_kallsyms(struct module *mod,
 				Elf_Shdr *sechdrs,
+				unsigned int shnum,
 				unsigned int symindex,
 				unsigned int strindex,
+				unsigned long symoffs,
 				const char *secstrings)
 {
 }
@@ -1959,6 +2034,9 @@ static noinline struct module *load_module(void __user *umod,
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+#ifdef CONFIG_KALLSYMS
+	unsigned long symoffs;
+#endif
 	mm_segment_t old_fs;
 
 	DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2041,8 +2119,7 @@ static noinline struct module *load_module(void __user *umod,
 	sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
 	sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
 #ifdef CONFIG_KALLSYMS
-	/* Keep symbol and string tables for decoding later. */
-	sechdrs[symindex].sh_flags |= SHF_ALLOC;
+	/* Keep string table for decoding later. */
 	sechdrs[strindex].sh_flags |= SHF_ALLOC;
 #endif
 
@@ -2109,6 +2186,7 @@ static noinline struct module *load_module(void __user *umod,
 	   this is done generically; there doesn't appear to be any
 	   special cases for the architectures. */
 	layout_sections(mod, hdr, sechdrs, secstrings);
+	symoffs = layout_symtab(mod, sechdrs, symindex, hdr, secstrings);
 
 	/* Do the allocs. */
 	ptr = module_alloc_update_bounds(mod->core_size);
@@ -2313,7 +2391,8 @@ static noinline struct module *load_module(void __user *umod,
 	percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
 		       sechdrs[pcpuindex].sh_size);
 
-	add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+	add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
+		     symoffs, secstrings);
 
 	if (!mod->taints) {
 		struct _ddebug *debug;
@@ -2491,6 +2570,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 	/* Drop initial reference. */
 	module_put(mod);
 	trim_init_extable(mod);
+#ifdef CONFIG_KALLSYMS
+	mod->num_symtab = mod->core_num_syms;
+	mod->symtab = mod->core_symtab;
+#endif
 	module_free(mod, mod->module_init);
 	mod->module_init = NULL;
 	mod->init_size = 0;
-- 
cgit v1.2.3


From 554bdfe5acf3715e87c8d5e25a4f9a896ac9f014 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Mon, 6 Jul 2009 14:51:44 +0100
Subject: module: reduce string table for loaded modules (v2)

Also remove all parts of the string table (referenced by the symbol
table) that are not needed for kallsyms use (i.e. which were only
referenced by symbols discarded by the previous patch, or not
referenced at all for whatever reason).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h |  2 +-
 kernel/module.c        | 68 +++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 57 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index 1d3ccb173fd6..aca980365956 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -315,7 +315,7 @@ struct module
 	 */
 	Elf_Sym *symtab, *core_symtab;
 	unsigned int num_symtab, core_num_syms;
-	char *strtab;
+	char *strtab, *core_strtab;
 
 	/* Section attributes */
 	struct module_sect_attrs *sect_attrs;
diff --git a/kernel/module.c b/kernel/module.c
index 97f4d5e15535..39827c3d9484 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1886,12 +1886,17 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
 static unsigned long layout_symtab(struct module *mod,
 				   Elf_Shdr *sechdrs,
 				   unsigned int symindex,
+				   unsigned int strindex,
 				   const Elf_Ehdr *hdr,
-				   const char *secstrings)
+				   const char *secstrings,
+				   unsigned long *pstroffs,
+				   unsigned long *strmap)
 {
 	unsigned long symoffs;
 	Elf_Shdr *symsect = sechdrs + symindex;
+	Elf_Shdr *strsect = sechdrs + strindex;
 	const Elf_Sym *src;
+	const char *strtab;
 	unsigned int i, nsrc, ndst;
 
 	/* Put symbol section at end of init part of module. */
@@ -1902,14 +1907,31 @@ static unsigned long layout_symtab(struct module *mod,
 
 	src = (void *)hdr + symsect->sh_offset;
 	nsrc = symsect->sh_size / sizeof(*src);
+	strtab = (void *)hdr + strsect->sh_offset;
 	for (ndst = i = 1; i < nsrc; ++i, ++src)
-		if (is_core_symbol(src, sechdrs, hdr->e_shnum))
+		if (is_core_symbol(src, sechdrs, hdr->e_shnum)) {
+			unsigned int j = src->st_name;
+
+			while(!__test_and_set_bit(j, strmap) && strtab[j])
+				++j;
 			++ndst;
+		}
 
 	/* Append room for core symbols at end of core part. */
 	symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
 	mod->core_size = symoffs + ndst * sizeof(Elf_Sym);
 
+	/* Put string table section at end of init part of module. */
+	strsect->sh_flags |= SHF_ALLOC;
+	strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
+					 strindex) | INIT_OFFSET_MASK;
+	DEBUGP("\t%s\n", secstrings + strsect->sh_name);
+
+	/* Append room for core symbols' strings at end of core part. */
+	*pstroffs = mod->core_size;
+	__set_bit(0, strmap);
+	mod->core_size += bitmap_weight(strmap, strsect->sh_size);
+
 	return symoffs;
 }
 
@@ -1919,11 +1941,14 @@ static void add_kallsyms(struct module *mod,
 			 unsigned int symindex,
 			 unsigned int strindex,
 			 unsigned long symoffs,
-			 const char *secstrings)
+			 unsigned long stroffs,
+			 const char *secstrings,
+			 unsigned long *strmap)
 {
 	unsigned int i, ndst;
 	const Elf_Sym *src;
 	Elf_Sym *dst;
+	char *s;
 
 	mod->symtab = (void *)sechdrs[symindex].sh_addr;
 	mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
@@ -1941,16 +1966,25 @@ static void add_kallsyms(struct module *mod,
 		if (!is_core_symbol(src, sechdrs, shnum))
 			continue;
 		dst[ndst] = *src;
+		dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name);
 		++ndst;
 	}
 	mod->core_num_syms = ndst;
+
+	mod->core_strtab = s = mod->module_core + stroffs;
+	for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i)
+		if (test_bit(i, strmap))
+			*++s = mod->strtab[i];
 }
 #else
 static inline unsigned long layout_symtab(struct module *mod,
 					  Elf_Shdr *sechdrs,
 					  unsigned int symindex,
+					  unsigned int strindex,
 					  const Elf_Hdr *hdr,
-					  const char *secstrings)
+					  const char *secstrings,
+					  unsigned long *pstroffs,
+					  unsigned long *strmap)
 {
 }
 static inline void add_kallsyms(struct module *mod,
@@ -1959,7 +1993,9 @@ static inline void add_kallsyms(struct module *mod,
 				unsigned int symindex,
 				unsigned int strindex,
 				unsigned long symoffs,
-				const char *secstrings)
+				unsigned long stroffs,
+				const char *secstrings,
+				const unsigned long *strmap)
 {
 }
 #endif /* CONFIG_KALLSYMS */
@@ -2035,7 +2071,7 @@ static noinline struct module *load_module(void __user *umod,
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
 #ifdef CONFIG_KALLSYMS
-	unsigned long symoffs;
+	unsigned long symoffs, stroffs, *strmap;
 #endif
 	mm_segment_t old_fs;
 
@@ -2118,10 +2154,6 @@ static noinline struct module *load_module(void __user *umod,
 	/* Don't keep modinfo and version sections. */
 	sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
 	sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
-#ifdef CONFIG_KALLSYMS
-	/* Keep string table for decoding later. */
-	sechdrs[strindex].sh_flags |= SHF_ALLOC;
-#endif
 
 	/* Check module struct version now, before we try to use module. */
 	if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -2157,6 +2189,13 @@ static noinline struct module *load_module(void __user *umod,
 		goto free_hdr;
 	}
 
+	strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size)
+			 * sizeof(long), GFP_KERNEL);
+	if (!strmap) {
+		err = -ENOMEM;
+		goto free_mod;
+	}
+
 	if (find_module(mod->name)) {
 		err = -EEXIST;
 		goto free_mod;
@@ -2186,7 +2225,8 @@ static noinline struct module *load_module(void __user *umod,
 	   this is done generically; there doesn't appear to be any
 	   special cases for the architectures. */
 	layout_sections(mod, hdr, sechdrs, secstrings);
-	symoffs = layout_symtab(mod, sechdrs, symindex, hdr, secstrings);
+	symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr,
+				secstrings, &stroffs, strmap);
 
 	/* Do the allocs. */
 	ptr = module_alloc_update_bounds(mod->core_size);
@@ -2392,7 +2432,9 @@ static noinline struct module *load_module(void __user *umod,
 		       sechdrs[pcpuindex].sh_size);
 
 	add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
-		     symoffs, secstrings);
+		     symoffs, stroffs, secstrings, strmap);
+	kfree(strmap);
+	strmap = NULL;
 
 	if (!mod->taints) {
 		struct _ddebug *debug;
@@ -2481,6 +2523,7 @@ static noinline struct module *load_module(void __user *umod,
 		percpu_modfree(percpu);
  free_mod:
 	kfree(args);
+	kfree(strmap);
  free_hdr:
 	vfree(hdr);
 	return ERR_PTR(err);
@@ -2573,6 +2616,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 #ifdef CONFIG_KALLSYMS
 	mod->num_symtab = mod->core_num_syms;
 	mod->symtab = mod->core_symtab;
+	mod->strtab = mod->core_strtab;
 #endif
 	module_free(mod, mod->module_init);
 	mod->module_init = NULL;
-- 
cgit v1.2.3


From 1d7015caa082d465faeae5d6fd1be077ee6dfa87 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 25 Sep 2009 00:32:58 -0600
Subject: module: preferred way to use MODULE_AUTHOR

For the longest time now we've been using multiple MODULE_AUTHOR()
statements when a module has more than one author, but the comment here
disagrees.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Luciano Coelho <luciano.coelho@nokia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index aca980365956..482efc865acf 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -128,7 +128,10 @@ extern struct module __this_module;
  */
 #define MODULE_LICENSE(_license) MODULE_INFO(license, _license)
 
-/* Author, ideally of form NAME[, NAME]*[ and NAME] */
+/*
+ * Author(s), use "Name <email>" or just "Name", for multiple
+ * authors use multiple MODULE_AUTHOR() statements/lines.
+ */
 #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)
   
 /* What your module does. */
-- 
cgit v1.2.3


From 18a1166de994685d770425086b2bcc1ba567f7ed Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 23 Sep 2009 03:17:11 +0000
Subject: Phonet: error on broadcast sending (unimplemented)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we ever implement this, then we can stop returning an error.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonet.h | 1 +
 net/phonet/af_phonet.c | 6 ++++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 1ef5a0781831..e5126cff9b2a 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -38,6 +38,7 @@
 #define PNPIPE_IFINDEX		2
 
 #define PNADDR_ANY		0
+#define PNADDR_BROADCAST	0xFC
 #define PNPORT_RESOURCE_ROUTING	0
 
 /* Values for PNPIPE_ENCAP option */
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index a662e62a99cf..f60c0c2aacba 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -168,6 +168,12 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
 		goto drop;
 	}
 
+	/* Broadcast sending is not implemented */
+	if (pn_addr(dst) == PNADDR_BROADCAST) {
+		err = -EOPNOTSUPP;
+		goto drop;
+	}
+
 	skb_reset_transport_header(skb);
 	WARN_ON(skb_headroom(skb) & 1); /* HW assumes word alignment */
 	skb_push(skb, sizeof(struct phonethdr));
-- 
cgit v1.2.3


From a43912ab1925788765208da5cd664b6f8e011d08 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 23 Sep 2009 10:28:33 +0000
Subject: tunnel: eliminate recursion field

It seems recursion field from "struct ip_tunnel" is not anymore needed.
recursion prevention is done at the upper level (in dev_queue_xmit()),
since we use HARD_TX_LOCK protection for tunnels.

This avoids a cache line ping pong on "struct ip_tunnel" : This structure
should be now mostly read on xmit and receive paths.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipip.h    |  1 -
 net/ipv4/ip_gre.c     | 13 +------------
 net/ipv4/ipip.c       |  8 --------
 net/ipv6/ip6_tunnel.c |  7 -------
 net/ipv6/sit.c        |  8 --------
 5 files changed, 1 insertion(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/net/ipip.h b/include/net/ipip.h
index 5d3036fa1511..76e3ea6e2fe5 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -12,7 +12,6 @@ struct ip_tunnel
 	struct ip_tunnel	*next;
 	struct net_device	*dev;
 
-	int			recursion;	/* Depth of hard_start_xmit recursion */
 	int			err_count;	/* Number of arrived ICMP errors */
 	unsigned long		err_time;	/* Time when the last ICMP error arrived */
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d9645c94a067..41ada9904d31 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -66,10 +66,7 @@
    solution, but it supposes maintaing new variable in ALL
    skb, even if no tunneling is used.
 
-   Current solution: t->recursion lock breaks dead loops. It looks
-   like dev->tbusy flag, but I preferred new variable, because
-   the semantics is different. One day, when hard_start_xmit
-   will be multithreaded we will have to use skb->encapsulation.
+   Current solution: HARD_TX_LOCK lock breaks dead loops.
 
 
@@ -678,11 +675,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	__be32 dst;
 	int    mtu;
 
-	if (tunnel->recursion++) {
-		stats->collisions++;
-		goto tx_error;
-	}
-
 	if (dev->type == ARPHRD_ETHER)
 		IPCB(skb)->flags = 0;
 
@@ -820,7 +812,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			ip_rt_put(rt);
 			stats->tx_dropped++;
 			dev_kfree_skb(skb);
-			tunnel->recursion--;
 			return NETDEV_TX_OK;
 		}
 		if (skb->sk)
@@ -888,7 +879,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	nf_reset(skb);
 
 	IPTUNNEL_XMIT();
-	tunnel->recursion--;
 	return NETDEV_TX_OK;
 
 tx_error_icmp:
@@ -897,7 +887,6 @@ tx_error_icmp:
 tx_error:
 	stats->tx_errors++;
 	dev_kfree_skb(skb);
-	tunnel->recursion--;
 	return NETDEV_TX_OK;
 }
 
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 62548cb0923c..08ccd344de7a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -402,11 +402,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	__be32 dst = tiph->daddr;
 	int    mtu;
 
-	if (tunnel->recursion++) {
-		stats->collisions++;
-		goto tx_error;
-	}
-
 	if (skb->protocol != htons(ETH_P_IP))
 		goto tx_error;
 
@@ -485,7 +480,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			ip_rt_put(rt);
 			stats->tx_dropped++;
 			dev_kfree_skb(skb);
-			tunnel->recursion--;
 			return NETDEV_TX_OK;
 		}
 		if (skb->sk)
@@ -523,7 +517,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	nf_reset(skb);
 
 	IPTUNNEL_XMIT();
-	tunnel->recursion--;
 	return NETDEV_TX_OK;
 
 tx_error_icmp:
@@ -531,7 +524,6 @@ tx_error_icmp:
 tx_error:
 	stats->tx_errors++;
 	dev_kfree_skb(skb);
-	tunnel->recursion--;
 	return NETDEV_TX_OK;
 }
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 7d25bbe32110..c595bbe1ed99 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1043,11 +1043,6 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_device_stats *stats = &t->dev->stats;
 	int ret;
 
-	if (t->recursion++) {
-		stats->collisions++;
-		goto tx_err;
-	}
-
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 		ret = ip4ip6_tnl_xmit(skb, dev);
@@ -1062,14 +1057,12 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (ret < 0)
 		goto tx_err;
 
-	t->recursion--;
 	return NETDEV_TX_OK;
 
 tx_err:
 	stats->tx_errors++;
 	stats->tx_dropped++;
 	kfree_skb(skb);
-	t->recursion--;
 	return NETDEV_TX_OK;
 }
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0ae4f6448187..fcb539628847 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -626,11 +626,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	struct in6_addr *addr6;
 	int addr_type;
 
-	if (tunnel->recursion++) {
-		stats->collisions++;
-		goto tx_error;
-	}
-
 	if (skb->protocol != htons(ETH_P_IPV6))
 		goto tx_error;
 
@@ -753,7 +748,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 			ip_rt_put(rt);
 			stats->tx_dropped++;
 			dev_kfree_skb(skb);
-			tunnel->recursion--;
 			return NETDEV_TX_OK;
 		}
 		if (skb->sk)
@@ -794,7 +788,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	nf_reset(skb);
 
 	IPTUNNEL_XMIT();
-	tunnel->recursion--;
 	return NETDEV_TX_OK;
 
 tx_error_icmp:
@@ -802,7 +795,6 @@ tx_error_icmp:
 tx_error:
 	stats->tx_errors++;
 	dev_kfree_skb(skb);
-	tunnel->recursion--;
 	return NETDEV_TX_OK;
 }
 
-- 
cgit v1.2.3


From b8273570f802a7658827dcb077b0b517ba75a289 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 24 Sep 2009 15:44:05 -0700
Subject: genetlink: fix netns vs. netlink table locking (2)

Similar to commit d136f1bd366fdb7e747ca7e0218171e7a00a98a5,
there's a bug when unregistering a generic netlink family,
which is caught by the might_sleep() added in that commit:

    BUG: sleeping function called from invalid context at net/netlink/af_netlink.c:183
    in_atomic(): 1, irqs_disabled(): 0, pid: 1510, name: rmmod
    2 locks held by rmmod/1510:
     #0:  (genl_mutex){+.+.+.}, at: [<ffffffff8138283b>] genl_unregister_family+0x2b/0x130
     #1:  (rcu_read_lock){.+.+..}, at: [<ffffffff8138270c>] __genl_unregister_mc_group+0x1c/0x120
    Pid: 1510, comm: rmmod Not tainted 2.6.31-wl #444
    Call Trace:
     [<ffffffff81044ff9>] __might_sleep+0x119/0x150
     [<ffffffff81380501>] netlink_table_grab+0x21/0x100
     [<ffffffff813813a3>] netlink_clear_multicast_users+0x23/0x60
     [<ffffffff81382761>] __genl_unregister_mc_group+0x71/0x120
     [<ffffffff81382866>] genl_unregister_family+0x56/0x130
     [<ffffffffa0007d85>] nl80211_exit+0x15/0x20 [cfg80211]
     [<ffffffffa000005a>] cfg80211_exit+0x1a/0x40 [cfg80211]

Fix in the same way by grabbing the netlink table lock
before doing rcu_read_lock().

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  1 +
 net/netlink/af_netlink.c | 19 +++++++++++--------
 net/netlink/genetlink.c  |  4 +++-
 3 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 080f6ba9e73a..ab5d3126831f 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -187,6 +187,7 @@ extern struct sock *netlink_kernel_create(struct net *net,
 extern void netlink_kernel_release(struct sock *sk);
 extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
+extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
 extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group);
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
 extern int netlink_has_listeners(struct sock *sk, unsigned int group);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 55180b99562a..a4bafbf15097 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1609,6 +1609,16 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups)
 	return err;
 }
 
+void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+	struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
+
+	sk_for_each_bound(sk, node, &tbl->mc_list)
+		netlink_update_socket_mc(nlk_sk(sk), group, 0);
+}
+
 /**
  * netlink_clear_multicast_users - kick off multicast listeners
  *
@@ -1619,15 +1629,8 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups)
  */
 void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
 {
-	struct sock *sk;
-	struct hlist_node *node;
-	struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
-
 	netlink_table_grab();
-
-	sk_for_each_bound(sk, node, &tbl->mc_list)
-		netlink_update_socket_mc(nlk_sk(sk), group, 0);
-
+	__netlink_clear_multicast_users(ksk, group);
 	netlink_table_ungrab();
 }
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 566941e03363..44ff3f3810fa 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -220,10 +220,12 @@ static void __genl_unregister_mc_group(struct genl_family *family,
 	struct net *net;
 	BUG_ON(grp->family != family);
 
+	netlink_table_grab();
 	rcu_read_lock();
 	for_each_net_rcu(net)
-		netlink_clear_multicast_users(net->genl_sock, grp->id);
+		__netlink_clear_multicast_users(net->genl_sock, grp->id);
 	rcu_read_unlock();
+	netlink_table_ungrab();
 
 	clear_bit(grp->id, mc_groups);
 	list_del(&grp->list);
-- 
cgit v1.2.3


From e9ea0e2d1d00959b451dfc239df126d3c6a22043 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 24 Sep 2009 14:47:45 -0700
Subject: hugetlb_file_setup(): use C, not cpp

Why macros are always wrong:

  mm/mmap.c: In function 'do_mmap_pgoff':
  mm/mmap.c:953: warning: unused variable 'user'

also, move a couple of struct forward-decls outside `#ifdef
CONFIG_HUGETLB_PAGE' - it's pointless and frequently harmful to make these
conditional (eg, this patch needed `struct user_struct').

Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Cc: Eric B Munson <ebmunson@us.ibm.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 11ab19ac6b3d..16937995abd4 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -3,15 +3,15 @@
 
 #include <linux/fs.h>
 
+struct ctl_table;
+struct user_struct;
+
 #ifdef CONFIG_HUGETLB_PAGE
 
 #include <linux/mempolicy.h>
 #include <linux/shm.h>
 #include <asm/tlbflush.h>
 
-struct ctl_table;
-struct user_struct;
-
 int PageHuge(struct page *page);
 
 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
@@ -187,7 +187,11 @@ static inline void set_file_hugepages(struct file *file)
 
 #define is_file_hugepages(file)			0
 #define set_file_hugepages(file)		BUG()
-#define hugetlb_file_setup(name,size,acct,user,creat)	ERR_PTR(-ENOSYS)
+static inline struct file *hugetlb_file_setup(const char *name, size_t size,
+		int acctflag, struct user_struct **user, int creat_flags)
+{
+	return ERR_PTR(-ENOSYS);
+}
 
 #endif /* !CONFIG_HUGETLBFS */
 
-- 
cgit v1.2.3


From 1b2086227cd1a24f748398c22ea9652c383499cf Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Thu, 24 Sep 2009 10:36:16 -0400
Subject: Optimize the ordering of sections in RW_DATA_SECTION.

The old RW_DATA_SECTION had INIT_TASK_DATA (which was
more-than-PAGE_SIZE-aligned), followed by a bunch of small alignment
stuff, followed by more PAGE_SIZE-aligned stuff, so you wasted memory
in the middle of .data re-aligning back up to PAGE_SIZE.

This patch sorts the sections by alignment requirements, which should
pack them essentially optimally.

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/vmlinux.lds.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 29ca8f53ffbe..b6e818f4b247 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -721,12 +721,12 @@
 	. = ALIGN(PAGE_SIZE);						\
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {				\
 		INIT_TASK_DATA(inittask)				\
+		NOSAVE_DATA						\
+		PAGE_ALIGNED_DATA(pagealigned)				\
 		CACHELINE_ALIGNED_DATA(cacheline)			\
 		READ_MOSTLY_DATA(cacheline)				\
 		DATA_DATA						\
 		CONSTRUCTORS						\
-		NOSAVE_DATA						\
-		PAGE_ALIGNED_DATA(pagealigned)				\
 	}
 
 #define INIT_TEXT_SECTION(inittext_align)				\
-- 
cgit v1.2.3


From 934831d060ccd5471ecbc562804a8d3ccd6e562c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 24 Sep 2009 12:33:32 +0100
Subject: NOMMU: Fallback for is_vmalloc_or_module_addr() should be inline

The NOMMU fallback for is_vmalloc_or_module_addr() should be static inline,
not just static, in linux/mm.h.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index df08551cb0ad..24c395694f4d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -288,7 +288,7 @@ static inline int is_vmalloc_addr(const void *x)
 #ifdef CONFIG_MMU
 extern int is_vmalloc_or_module_addr(const void *x);
 #else
-static int is_vmalloc_or_module_addr(const void *x)
+static inline int is_vmalloc_or_module_addr(const void *x)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From d50ba256b5f1478e15accfcfda9b72fd7a661364 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Wed, 23 Sep 2009 14:44:08 +1000
Subject: drm/kms: start adding command line interface using fb.

[note this requires an fb patch posted to linux-fbdev-devel already]

This uses the normal video= command line option to control the kms
output setup at boot time. It is used to override the autodetection
done by kms.

video= normally takes a framebuffer as the first parameter, in kms
it will take a connector name, DVI-I-1, or LVDS-1 etc. If no output
connector is specified the mode string will apply to all connectors.

The mode specification used will match down the probed modes, and if
no mode is found it will add a CVT mode that matches.

video=1024x768 - all connectors match a 1024x768 mode or add a CVT on
video=VGA-1:1024x768, VGA-1 connector gets mode only.

The same strings as used in current fb modedb.c are used, except I've
added three more letters, e, D, d, e = enable, D = enable Digital,
d = disable, which allow a connector to be forced into a certain state.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c                 |   1 +
 drivers/gpu/drm/drm_crtc_helper.c          |  79 +++++++++-
 drivers/gpu/drm/drm_edid.c                 |   6 +-
 drivers/gpu/drm/drm_fb_helper.c            | 224 ++++++++++++++++++++++++++++-
 drivers/gpu/drm/drm_modes.c                |   3 +-
 drivers/gpu/drm/i915/intel_fb.c            |   5 +-
 drivers/gpu/drm/radeon/radeon_connectors.c |  25 +++-
 drivers/gpu/drm/radeon/radeon_fb.c         |  26 +++-
 include/drm/drm_crtc.h                     |  14 +-
 include/drm/drm_fb_helper.h                |  24 ++++
 10 files changed, 386 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index ba728ad77f2a..8e7b0ebece0c 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -482,6 +482,7 @@ void drm_connector_cleanup(struct drm_connector *connector)
 	list_for_each_entry_safe(mode, t, &connector->user_modes, head)
 		drm_mode_remove(connector, mode);
 
+	kfree(connector->fb_helper_private);
 	mutex_lock(&dev->mode_config.mutex);
 	drm_mode_object_put(dev, &connector->base);
 	list_del(&connector->head);
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index fe8697447f32..82fd6e82450f 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -32,6 +32,7 @@
 #include "drmP.h"
 #include "drm_crtc.h"
 #include "drm_crtc_helper.h"
+#include "drm_fb_helper.h"
 
 static void drm_mode_validate_flag(struct drm_connector *connector,
 				   int flags)
@@ -90,7 +91,15 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 	list_for_each_entry_safe(mode, t, &connector->modes, head)
 		mode->status = MODE_UNVERIFIED;
 
-	connector->status = connector->funcs->detect(connector);
+	if (connector->force) {
+		if (connector->force == DRM_FORCE_ON)
+			connector->status = connector_status_connected;
+		else
+			connector->status = connector_status_disconnected;
+		if (connector->funcs->force)
+			connector->funcs->force(connector);
+	} else
+		connector->status = connector->funcs->detect(connector);
 
 	if (connector->status == connector_status_disconnected) {
 		DRM_DEBUG_KMS("%s is disconnected\n",
@@ -267,6 +276,56 @@ static struct drm_display_mode *drm_has_preferred_mode(struct drm_connector *con
 	return NULL;
 }
 
+static bool drm_has_cmdline_mode(struct drm_connector *connector)
+{
+	struct drm_fb_helper_connector *fb_help_conn = connector->fb_helper_private;
+	struct drm_fb_helper_cmdline_mode *cmdline_mode = &fb_help_conn->cmdline_mode;
+	return cmdline_mode->specified;
+}
+
+static struct drm_display_mode *drm_pick_cmdline_mode(struct drm_connector *connector, int width, int height)
+{
+	struct drm_fb_helper_connector *fb_help_conn = connector->fb_helper_private;
+	struct drm_fb_helper_cmdline_mode *cmdline_mode = &fb_help_conn->cmdline_mode;
+	struct drm_display_mode *mode = NULL;
+
+	if (cmdline_mode->specified == false)
+		return mode;
+
+	/* attempt to find a matching mode in the list of modes
+	 *  we have gotten so far, if not add a CVT mode that conforms
+	 */
+	if (cmdline_mode->rb || cmdline_mode->margins)
+		goto create_mode;
+
+	list_for_each_entry(mode, &connector->modes, head) {
+		/* check width/height */
+		if (mode->hdisplay != cmdline_mode->xres ||
+		    mode->vdisplay != cmdline_mode->yres)
+			continue;
+
+		if (cmdline_mode->refresh_specified) {
+			if (mode->vrefresh != cmdline_mode->refresh)
+				continue;
+		}
+
+		if (cmdline_mode->interlace) {
+			if (!(mode->flags & DRM_MODE_FLAG_INTERLACE))
+				continue;
+		}
+		return mode;
+	}
+
+create_mode:
+	mode = drm_cvt_mode(connector->dev, cmdline_mode->xres,
+			    cmdline_mode->yres,
+			    cmdline_mode->refresh_specified ? cmdline_mode->refresh : 60,
+			    cmdline_mode->rb, cmdline_mode->interlace,
+			    cmdline_mode->margins);
+	list_add(&mode->head, &connector->modes);
+	return mode;
+}
+
 static bool drm_connector_enabled(struct drm_connector *connector, bool strict)
 {
 	bool enable;
@@ -317,10 +376,16 @@ static bool drm_target_preferred(struct drm_device *dev,
 			continue;
 		}
 
-		DRM_DEBUG_KMS("looking for preferred mode on connector %d\n",
-			  connector->base.id);
+		DRM_DEBUG_KMS("looking for cmdline mode on connector %d\n",
+			      connector->base.id);
 
-		modes[i] = drm_has_preferred_mode(connector, width, height);
+		/* got for command line mode first */
+		modes[i] = drm_pick_cmdline_mode(connector, width, height);
+		if (!modes[i]) {
+			DRM_DEBUG_KMS("looking for preferred mode on connector %d\n",
+				      connector->base.id);
+			modes[i] = drm_has_preferred_mode(connector, width, height);
+		}
 		/* No preferred modes, pick one off the list */
 		if (!modes[i] && !list_empty(&connector->modes)) {
 			list_for_each_entry(modes[i], &connector->modes, head)
@@ -369,6 +434,8 @@ static int drm_pick_crtcs(struct drm_device *dev,
 	my_score = 1;
 	if (connector->status == connector_status_connected)
 		my_score++;
+	if (drm_has_cmdline_mode(connector))
+		my_score++;
 	if (drm_has_preferred_mode(connector, width, height))
 		my_score++;
 
@@ -943,6 +1010,8 @@ bool drm_helper_initial_config(struct drm_device *dev)
 {
 	int count = 0;
 
+	drm_fb_helper_parse_command_line(dev);
+
 	count = drm_helper_probe_connector_modes(dev,
 						 dev->mode_config.max_width,
 						 dev->mode_config.max_height);
@@ -950,7 +1019,7 @@ bool drm_helper_initial_config(struct drm_device *dev)
 	/*
 	 * we shouldn't end up with no modes here.
 	 */
-	WARN(!count, "Connected connector with 0 modes\n");
+	WARN(!count, "No connectors reported connected with modes\n");
 
 	drm_setup_crtcs(dev);
 
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 9888c2076b2e..3c0d2b3aed76 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -560,7 +560,8 @@ struct drm_display_mode *drm_mode_std(struct drm_device *dev,
 		vsize = (hsize * 9) / 16;
 	/* HDTV hack */
 	if (hsize == 1360 && vsize == 765 && vrefresh_rate == 60) {
-		mode = drm_cvt_mode(dev, hsize, vsize, vrefresh_rate, 0, 0);
+		mode = drm_cvt_mode(dev, hsize, vsize, vrefresh_rate, 0, 0,
+				    false);
 		mode->hdisplay = 1366;
 		mode->vsync_start = mode->vsync_start - 1;
 		mode->vsync_end = mode->vsync_end - 1;
@@ -579,7 +580,8 @@ struct drm_display_mode *drm_mode_std(struct drm_device *dev,
 		mode = drm_gtf_mode(dev, hsize, vsize, vrefresh_rate, 0, 0);
 		break;
 	case LEVEL_CVT:
-		mode = drm_cvt_mode(dev, hsize, vsize, vrefresh_rate, 0, 0);
+		mode = drm_cvt_mode(dev, hsize, vsize, vrefresh_rate, 0, 0,
+				    false);
 		break;
 	}
 	return mode;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 2c4671314884..2537d2e81849 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -40,6 +40,196 @@ MODULE_LICENSE("GPL and additional rights");
 
 static LIST_HEAD(kernel_fb_helper_list);
 
+int drm_fb_helper_add_connector(struct drm_connector *connector)
+{
+	connector->fb_helper_private = kzalloc(sizeof(struct drm_fb_helper_connector), GFP_KERNEL);
+	if (!connector->fb_helper_private)
+		return -ENOMEM;
+
+	return 0;
+
+}
+EXPORT_SYMBOL(drm_fb_helper_add_connector);
+
+static int my_atoi(const char *name)
+{
+	int val = 0;
+
+	for (;; name++) {
+		switch (*name) {
+		case '0' ... '9':
+			val = 10*val+(*name-'0');
+			break;
+		default:
+			return val;
+		}
+	}
+}
+
+/**
+ * drm_fb_helper_connector_parse_command_line - parse command line for connector
+ * @connector - connector to parse line for
+ * @mode_option - per connector mode option
+ *
+ * This parses the connector specific then generic command lines for
+ * modes and options to configure the connector.
+ *
+ * This uses the same parameters as the fb modedb.c, except for extra
+ *	<xres>x<yres>[M][R][-<bpp>][@<refresh>][i][m][eDd]
+ *
+ * enable/enable Digital/disable bit at the end
+ */
+static bool drm_fb_helper_connector_parse_command_line(struct drm_connector *connector,
+						       const char *mode_option)
+{
+	const char *name;
+	unsigned int namelen;
+	int res_specified = 0, bpp_specified = 0, refresh_specified = 0;
+	unsigned int xres = 0, yres = 0, bpp = 32, refresh = 0;
+	int yres_specified = 0, cvt = 0, rb = 0, interlace = 0, margins = 0;
+	int i;
+	enum drm_connector_force force = DRM_FORCE_UNSPECIFIED;
+	struct drm_fb_helper_connector *fb_help_conn = connector->fb_helper_private;
+	struct drm_fb_helper_cmdline_mode *cmdline_mode = &fb_help_conn->cmdline_mode;
+
+	if (!mode_option)
+		mode_option = fb_mode_option;
+
+	if (!mode_option) {
+		cmdline_mode->specified = false;
+		return false;
+	}
+
+	name = mode_option;
+	namelen = strlen(name);
+	for (i = namelen-1; i >= 0; i--) {
+		switch (name[i]) {
+		case '@':
+			namelen = i;
+			if (!refresh_specified && !bpp_specified &&
+			    !yres_specified) {
+				refresh = my_atoi(&name[i+1]);
+				refresh_specified = 1;
+				if (cvt || rb)
+					cvt = 0;
+			} else
+				goto done;
+			break;
+		case '-':
+			namelen = i;
+			if (!bpp_specified && !yres_specified) {
+				bpp = my_atoi(&name[i+1]);
+				bpp_specified = 1;
+				if (cvt || rb)
+					cvt = 0;
+			} else
+				goto done;
+			break;
+		case 'x':
+			if (!yres_specified) {
+				yres = my_atoi(&name[i+1]);
+				yres_specified = 1;
+			} else
+				goto done;
+		case '0' ... '9':
+			break;
+		case 'M':
+			if (!yres_specified)
+				cvt = 1;
+			break;
+		case 'R':
+			if (!cvt)
+				rb = 1;
+			break;
+		case 'm':
+			if (!cvt)
+				margins = 1;
+			break;
+		case 'i':
+			if (!cvt)
+				interlace = 1;
+			break;
+		case 'e':
+			force = DRM_FORCE_ON;
+			break;
+		case 'D':
+			if ((connector->connector_type != DRM_MODE_CONNECTOR_DVII) ||
+			    (connector->connector_type != DRM_MODE_CONNECTOR_HDMIB))
+				force = DRM_FORCE_ON;
+			else
+				force = DRM_FORCE_ON_DIGITAL;
+			break;
+		case 'd':
+			force = DRM_FORCE_OFF;
+			break;
+		default:
+			goto done;
+		}
+	}
+	if (i < 0 && yres_specified) {
+		xres = my_atoi(name);
+		res_specified = 1;
+	}
+done:
+
+	DRM_DEBUG_KMS("cmdline mode for connector %s %dx%d@%dHz%s%s%s\n",
+		drm_get_connector_name(connector), xres, yres,
+		(refresh) ? refresh : 60, (rb) ? " reduced blanking" :
+		"", (margins) ? " with margins" : "", (interlace) ?
+		" interlaced" : "");
+
+	if (force) {
+		const char *s;
+		switch (force) {
+		case DRM_FORCE_OFF: s = "OFF"; break;
+		case DRM_FORCE_ON_DIGITAL: s = "ON - dig"; break;
+		default:
+		case DRM_FORCE_ON: s = "ON"; break;
+		}
+
+		DRM_INFO("forcing %s connector %s\n",
+			 drm_get_connector_name(connector), s);
+		connector->force = force;
+	}
+
+	if (res_specified) {
+		cmdline_mode->specified = true;
+		cmdline_mode->xres = xres;
+		cmdline_mode->yres = yres;
+	}
+
+	if (refresh_specified) {
+		cmdline_mode->refresh_specified = true;
+		cmdline_mode->refresh = refresh;
+	}
+
+	if (bpp_specified) {
+		cmdline_mode->bpp_specified = true;
+		cmdline_mode->bpp = bpp;
+	}
+	cmdline_mode->rb = rb ? true : false;
+	cmdline_mode->cvt = cvt  ? true : false;
+	cmdline_mode->interlace = interlace ? true : false;
+
+	return true;
+}
+
+int drm_fb_helper_parse_command_line(struct drm_device *dev)
+{
+	struct drm_connector *connector;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		char *option = NULL;
+
+		/* do something on return - turn off connector maybe */
+		if (fb_get_options(drm_get_connector_name(connector), &option))
+			continue;
+
+		drm_fb_helper_connector_parse_command_line(connector, option);
+	}
+	return 0;
+}
+
 bool drm_fb_helper_force_kernel_mode(void)
 {
 	int i = 0;
@@ -484,6 +674,8 @@ int drm_fb_helper_single_fb_probe(struct drm_device *dev,
 						   uint32_t fb_height,
 						   uint32_t surface_width,
 						   uint32_t surface_height,
+						   uint32_t surface_depth,
+						   uint32_t surface_bpp,
 						   struct drm_framebuffer **fb_ptr))
 {
 	struct drm_crtc *crtc;
@@ -497,8 +689,37 @@ int drm_fb_helper_single_fb_probe(struct drm_device *dev,
 	struct drm_framebuffer *fb;
 	struct drm_mode_set *modeset = NULL;
 	struct drm_fb_helper *fb_helper;
+	uint32_t surface_depth = 24, surface_bpp = 32;
 
 	/* first up get a count of crtcs now in use and new min/maxes width/heights */
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		struct drm_fb_helper_connector *fb_help_conn = connector->fb_helper_private;
+		struct drm_fb_helper_cmdline_mode *cmdline_mode = &fb_help_conn->cmdline_mode;
+
+		if (cmdline_mode->bpp_specified) {
+			switch (cmdline_mode->bpp) {
+			case 8:
+				surface_depth = surface_bpp = 8;
+				break;
+			case 15:
+				surface_depth = 15;
+				surface_bpp = 16;
+				break;
+			case 16:
+				surface_depth = surface_bpp = 16;
+				break;
+			case 24:
+				surface_depth = surface_bpp = 24;
+				break;
+			case 32:
+				surface_depth = 24;
+				surface_bpp = 32;
+				break;
+			}
+			break;
+		}
+	}
+
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		if (drm_helper_crtc_in_use(crtc)) {
 			if (crtc->desired_mode) {
@@ -527,7 +748,8 @@ int drm_fb_helper_single_fb_probe(struct drm_device *dev,
 	/* do we have an fb already? */
 	if (list_empty(&dev->mode_config.fb_kernel_list)) {
 		ret = (*fb_create)(dev, fb_width, fb_height, surface_width,
-				   surface_height, &fb);
+				   surface_height, surface_depth, surface_bpp,
+				   &fb);
 		if (ret)
 			return -EINVAL;
 		new_fb = 1;
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 49404ce1666e..51f677215f1d 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -88,7 +88,7 @@ EXPORT_SYMBOL(drm_mode_debug_printmodeline);
 #define HV_FACTOR			1000
 struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay,
 				      int vdisplay, int vrefresh,
-				      bool reduced, bool interlaced)
+				      bool reduced, bool interlaced, bool margins)
 {
 	/* 1) top/bottom margin size (% of height) - default: 1.8, */
 #define	CVT_MARGIN_PERCENTAGE		18
@@ -101,7 +101,6 @@ struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay,
 	/* Pixel Clock step (kHz) */
 #define CVT_CLOCK_STEP			250
 	struct drm_display_mode *drm_mode;
-	bool margins = false;
 	unsigned int vfieldrate, hperiod;
 	int hdisplay_rnd, hmargin, vdisplay_rnd, vmargin, vsync;
 	int interlace;
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 7ba4a232a97f..e85d7e9eed7d 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -110,6 +110,7 @@ EXPORT_SYMBOL(intelfb_resize);
 static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 			  uint32_t fb_height, uint32_t surface_width,
 			  uint32_t surface_height,
+			  uint32_t surface_depth, uint32_t surface_bpp,
 			  struct drm_framebuffer **fb_p)
 {
 	struct fb_info *info;
@@ -125,9 +126,9 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
 
-	mode_cmd.bpp = 32;
+	mode_cmd.bpp = surface_bpp;
 	mode_cmd.pitch = ALIGN(mode_cmd.width * ((mode_cmd.bpp + 1) / 8), 64);
-	mode_cmd.depth = 24;
+	mode_cmd.depth = surface_depth;
 
 	size = mode_cmd.pitch * mode_cmd.height;
 	size = ALIGN(size, PAGE_SIZE);
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index f8cb8b4e2b17..db155d5e60ce 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -26,6 +26,7 @@
 #include "drmP.h"
 #include "drm_edid.h"
 #include "drm_crtc_helper.h"
+#include "drm_fb_helper.h"
 #include "radeon_drm.h"
 #include "radeon.h"
 #include "atom.h"
@@ -245,7 +246,7 @@ static void radeon_add_common_modes(struct drm_encoder *encoder, struct drm_conn
 		if (common_modes[i].w < 320 || common_modes[i].h < 200)
 			continue;
 
-		mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false);
+		mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
 		drm_mode_probed_add(connector, mode);
 	}
 }
@@ -559,7 +560,7 @@ static int radeon_tv_get_modes(struct drm_connector *connector)
 		radeon_add_common_modes(encoder, connector);
 	else {
 		/* only 800x600 is supported right now on pre-avivo chips */
-		tv_mode = drm_cvt_mode(dev, 800, 600, 60, false, false);
+		tv_mode = drm_cvt_mode(dev, 800, 600, 60, false, false, false);
 		tv_mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
 		drm_mode_probed_add(connector, tv_mode);
 	}
@@ -743,6 +744,15 @@ struct drm_encoder *radeon_dvi_encoder(struct drm_connector *connector)
 	return NULL;
 }
 
+static void radeon_dvi_force(struct drm_connector *connector)
+{
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	if (connector->force == DRM_FORCE_ON)
+		radeon_connector->use_digital = false;
+	if (connector->force == DRM_FORCE_ON_DIGITAL)
+		radeon_connector->use_digital = true;
+}
+
 struct drm_connector_helper_funcs radeon_dvi_connector_helper_funcs = {
 	.get_modes = radeon_dvi_get_modes,
 	.mode_valid = radeon_vga_mode_valid,
@@ -755,6 +765,7 @@ struct drm_connector_funcs radeon_dvi_connector_funcs = {
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = radeon_connector_set_property,
 	.destroy = radeon_connector_destroy,
+	.force = radeon_dvi_force,
 };
 
 void
@@ -771,6 +782,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 	struct radeon_connector *radeon_connector;
 	struct radeon_connector_atom_dig *radeon_dig_connector;
 	uint32_t subpixel_order = SubPixelNone;
+	int ret;
 
 	/* fixme - tv/cv/din */
 	if (connector_type == DRM_MODE_CONNECTOR_Unknown)
@@ -914,6 +926,10 @@ radeon_add_atom_connector(struct drm_device *dev,
 		break;
 	}
 
+	ret = drm_fb_helper_add_connector(connector);
+	if (ret)
+		goto failed;
+
 	connector->display_info.subpixel_order = subpixel_order;
 	drm_sysfs_connector_add(connector);
 	return;
@@ -936,6 +952,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
 	struct drm_connector *connector;
 	struct radeon_connector *radeon_connector;
 	uint32_t subpixel_order = SubPixelNone;
+	int ret;
 
 	/* fixme - tv/cv/din */
 	if (connector_type == DRM_MODE_CONNECTOR_Unknown)
@@ -1027,6 +1044,10 @@ radeon_add_legacy_connector(struct drm_device *dev,
 		break;
 	}
 
+	ret = drm_fb_helper_add_connector(connector);
+	if (ret)
+		goto failed;
+
 	connector->display_info.subpixel_order = subpixel_order;
 	drm_sysfs_connector_add(connector);
 	return;
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 944e4fa78db5..1ba704eedefb 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -128,6 +128,7 @@ static struct drm_fb_helper_funcs radeon_fb_helper_funcs = {
 int radeonfb_create(struct drm_device *dev,
 		    uint32_t fb_width, uint32_t fb_height,
 		    uint32_t surface_width, uint32_t surface_height,
+		    uint32_t surface_depth, uint32_t surface_bpp,
 		    struct drm_framebuffer **fb_p)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -148,10 +149,10 @@ int radeonfb_create(struct drm_device *dev,
 
 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
-	mode_cmd.bpp = 32;
+	mode_cmd.bpp = surface_bpp;
 	/* need to align pitch with crtc limits */
 	mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp, fb_tiled) * ((mode_cmd.bpp + 1) / 8);
-	mode_cmd.depth = 24;
+	mode_cmd.depth = surface_depth;
 
 	size = mode_cmd.pitch * mode_cmd.height;
 	aligned_size = ALIGN(size, PAGE_SIZE);
@@ -290,13 +291,26 @@ out:
 	return ret;
 }
 
+static char *mode_option;
+int radeon_parse_options(char *options)
+{
+	char *this_opt;
+
+	if (!options || !*options)
+		return 0;
+
+	while ((this_opt = strsep(&options, ",")) != NULL) {
+		if (!*this_opt)
+			continue;
+		mode_option = this_opt;
+	}
+	return 0;
+}
+
 int radeonfb_probe(struct drm_device *dev)
 {
-	int ret;
-	ret = drm_fb_helper_single_fb_probe(dev, &radeonfb_create);
-	return ret;
+	return drm_fb_helper_single_fb_probe(dev, &radeonfb_create);
 }
-EXPORT_SYMBOL(radeonfb_probe);
 
 int radeonfb_remove(struct drm_device *dev, struct drm_framebuffer *fb)
 {
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index ae1e9e166959..b69347b8904f 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -387,6 +387,7 @@ struct drm_crtc {
  * @get_modes: get mode list for this connector
  * @set_property: property for this connector may need update
  * @destroy: make object go away
+ * @force: notify the driver the connector is forced on
  *
  * Each CRTC may have one or more connectors attached to it.  The functions
  * below allow the core DRM code to control connectors, enumerate available modes,
@@ -401,6 +402,7 @@ struct drm_connector_funcs {
 	int (*set_property)(struct drm_connector *connector, struct drm_property *property,
 			     uint64_t val);
 	void (*destroy)(struct drm_connector *connector);
+	void (*force)(struct drm_connector *connector);
 };
 
 struct drm_encoder_funcs {
@@ -429,6 +431,13 @@ struct drm_encoder {
 	void *helper_private;
 };
 
+enum drm_connector_force {
+	DRM_FORCE_UNSPECIFIED,
+	DRM_FORCE_OFF,
+	DRM_FORCE_ON,         /* force on analog part normally */
+	DRM_FORCE_ON_DIGITAL, /* for DVI-I use digital connector */
+};
+
 /**
  * drm_connector - central DRM connector control structure
  * @crtc: CRTC this connector is currently connected to, NULL if none
@@ -478,9 +487,12 @@ struct drm_connector {
 
 	void *helper_private;
 
+	/* forced on connector */
+	enum drm_connector_force force;
 	uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER];
 	uint32_t force_encoder_id;
 	struct drm_encoder *encoder; /* currently active encoder */
+	void *fb_helper_private;
 };
 
 /**
@@ -746,7 +758,7 @@ extern int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 extern bool drm_detect_hdmi_monitor(struct edid *edid);
 extern struct drm_display_mode *drm_cvt_mode(struct drm_device *dev,
 				int hdisplay, int vdisplay, int vrefresh,
-				bool reduced, bool interlaced);
+				bool reduced, bool interlaced, bool margins);
 extern struct drm_display_mode *drm_gtf_mode(struct drm_device *dev,
 				int hdisplay, int vdisplay, int vrefresh,
 				bool interlaced, int margins);
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 88fffbdfa26f..4aa5740ce59f 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -35,11 +35,30 @@ struct drm_fb_helper_crtc {
 	struct drm_mode_set mode_set;
 };
 
+
 struct drm_fb_helper_funcs {
 	void (*gamma_set)(struct drm_crtc *crtc, u16 red, u16 green,
 			  u16 blue, int regno);
 };
 
+/* mode specified on the command line */
+struct drm_fb_helper_cmdline_mode {
+	bool specified;
+	bool refresh_specified;
+	bool bpp_specified;
+	int xres, yres;
+	int bpp;
+	int refresh;
+	bool rb;
+	bool interlace;
+	bool cvt;
+	bool margins;
+};
+
+struct drm_fb_helper_connector {
+	struct drm_fb_helper_cmdline_mode cmdline_mode;
+};
+
 struct drm_fb_helper {
 	struct drm_framebuffer *fb;
 	struct drm_device *dev;
@@ -57,6 +76,8 @@ int drm_fb_helper_single_fb_probe(struct drm_device *dev,
 						   uint32_t fb_height,
 						   uint32_t surface_width,
 						   uint32_t surface_height,
+						   uint32_t surface_depth,
+						   uint32_t surface_bpp,
 						   struct drm_framebuffer **fb_ptr));
 int drm_fb_helper_init_crtc_count(struct drm_fb_helper *helper, int crtc_count,
 				  int max_conn);
@@ -79,4 +100,7 @@ void drm_fb_helper_fill_var(struct fb_info *info, struct drm_framebuffer *fb,
 			    uint32_t fb_width, uint32_t fb_height);
 void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch);
 
+int drm_fb_helper_add_connector(struct drm_connector *connector);
+int drm_fb_helper_parse_command_line(struct drm_device *dev);
+
 #endif
-- 
cgit v1.2.3


From caaa6efb3d82d0102db9e7094ca5773c46e6780c Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 19:29:10 +0000
Subject: ACPI: save device_type in acpi_device

Most uses of the ACPI bus device_type (ACPI_BUS_TYPE_DEVICE,
ACPI_BUS_TYPE_POWER, etc) are during device initialization, but
we do need it later for notify handler installation, since that
is different for fixed hardware devices vs. namespace devices.

This patch saves the device_type in the acpi_device structure,
so we can check that rather than comparing against the _HID string.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c     | 1 +
 include/acpi/acpi_bus.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index ab5a26469707..c73681b7e69e 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1240,6 +1240,7 @@ acpi_add_single_object(struct acpi_device **child,
 		return -ENOMEM;
 	}
 
+	device->device_type = type;
 	device->handle = handle;
 	device->parent = parent;
 	device->bus_ops = *ops; /* workround for not call .start */
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 1cef1398e358..8456e8cbf9fd 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -262,7 +262,8 @@ struct acpi_device_wakeup {
 /* Device */
 
 struct acpi_device {
-	acpi_handle handle;
+	int device_type;
+	acpi_handle handle;		/* no handle for fixed hardware */
 	struct acpi_device *parent;
 	struct list_head children;
 	struct list_head node;
-- 
cgit v1.2.3


From 859ac9a4be0c753cece0e30a2e4a65fd2cdcaeee Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 19:29:50 +0000
Subject: ACPI: identify device tree root by null parent pointer, not
 ACPI_BUS_TYPE

We can identify the root of the ACPI device tree by the fact that it
has no parent.  This is simpler than passing around ACPI_BUS_TYPE_SYSTEM
and will help remove special treatment of the device tree root.

Currently, we add the root by hand with ACPI_BUS_TYPE_SYSTEM.  If we
traverse the tree treating the root as just another device and use
acpi_get_type(), the root shows up as ACPI_TYPE_DEVICE.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c     | 20 +++++++++++++-------
 include/acpi/acpi_bus.h |  1 -
 2 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 27d2dec55c6c..0b5aaf059c9b 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -22,6 +22,8 @@ extern struct acpi_device *acpi_root;
 #define ACPI_BUS_HID			"LNXSYBUS"
 #define ACPI_BUS_DEVICE_NAME		"System Bus"
 
+#define ACPI_IS_ROOT_DEVICE(device)    (!(device)->parent)
+
 static LIST_HEAD(acpi_device_list);
 static LIST_HEAD(acpi_bus_id_list);
 DEFINE_MUTEX(acpi_device_lock);
@@ -955,10 +957,12 @@ static void acpi_device_get_busid(struct acpi_device *device)
 	 * The device's Bus ID is simply the object name.
 	 * TBD: Shouldn't this value be unique (within the ACPI namespace)?
 	 */
-	switch (device->device_type) {
-	case ACPI_BUS_TYPE_SYSTEM:
+	if (ACPI_IS_ROOT_DEVICE(device)) {
 		strcpy(device->pnp.bus_id, "ACPI");
-		break;
+		return;
+	}
+
+	switch (device->device_type) {
 	case ACPI_BUS_TYPE_POWER_BUTTON:
 		strcpy(device->pnp.bus_id, "PWRF");
 		break;
@@ -1093,6 +1097,11 @@ static void acpi_device_set_id(struct acpi_device *device)
 
 	switch (device->device_type) {
 	case ACPI_BUS_TYPE_DEVICE:
+		if (ACPI_IS_ROOT_DEVICE(device)) {
+			hid = ACPI_SYSTEM_HID;
+			break;
+		}
+
 		status = acpi_get_object_info(device->handle, &info);
 		if (ACPI_FAILURE(status)) {
 			printk(KERN_ERR PREFIX "%s: Error reading device info\n", __func__);
@@ -1129,9 +1138,6 @@ static void acpi_device_set_id(struct acpi_device *device)
 	case ACPI_BUS_TYPE_PROCESSOR:
 		hid = ACPI_PROCESSOR_OBJECT_HID;
 		break;
-	case ACPI_BUS_TYPE_SYSTEM:
-		hid = ACPI_SYSTEM_HID;
-		break;
 	case ACPI_BUS_TYPE_THERMAL:
 		hid = ACPI_THERMAL_HID;
 		break;
@@ -1643,7 +1649,7 @@ int __init acpi_scan_init(void)
 	 * Create the root device in the bus's device tree
 	 */
 	result = acpi_add_single_object(&acpi_root, ACPI_ROOT_OBJECT,
-					ACPI_BUS_TYPE_SYSTEM, &ops);
+					ACPI_BUS_TYPE_DEVICE, &ops);
 	if (result)
 		goto Done;
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 8456e8cbf9fd..bc7a69516dce 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -70,7 +70,6 @@ enum acpi_bus_device_type {
 	ACPI_BUS_TYPE_POWER,
 	ACPI_BUS_TYPE_PROCESSOR,
 	ACPI_BUS_TYPE_THERMAL,
-	ACPI_BUS_TYPE_SYSTEM,
 	ACPI_BUS_TYPE_POWER_BUTTON,
 	ACPI_BUS_TYPE_SLEEP_BUTTON,
 	ACPI_BUS_DEVICE_TYPE_COUNT
-- 
cgit v1.2.3


From 402ac53614bce0c273c73a80339556bf56dd3d39 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 19:30:01 +0000
Subject: ACPI: add acpi_bus_get_status_handle()

Add acpi_bus_get_status_handle() so we can get the status of a namespace
object before building a struct acpi_device.

This removes a use of "device->flags.dynamic_status", a cached indicator of
whether _STA exists.  It seems simpler and more reliable to just evaluate
_STA and catch AE_NOT_FOUND errors.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/bus.c      | 49 ++++++++++++++++++++++---------------------------
 include/acpi/acpi_bus.h |  2 ++
 2 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 135fbfe1825c..741191524353 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -94,36 +94,33 @@ int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device)
 
 EXPORT_SYMBOL(acpi_bus_get_device);
 
-int acpi_bus_get_status(struct acpi_device *device)
+acpi_status acpi_bus_get_status_handle(acpi_handle handle,
+				       unsigned long long *sta)
 {
-	acpi_status status = AE_OK;
-	unsigned long long sta = 0;
-
+	acpi_status status;
 
-	if (!device)
-		return -EINVAL;
+	status = acpi_evaluate_integer(handle, "_STA", NULL, sta);
+	if (ACPI_SUCCESS(status))
+		return AE_OK;
 
-	/*
-	 * Evaluate _STA if present.
-	 */
-	if (device->flags.dynamic_status) {
-		status =
-		    acpi_evaluate_integer(device->handle, "_STA", NULL, &sta);
-		if (ACPI_FAILURE(status))
-			return -ENODEV;
-		STRUCT_TO_INT(device->status) = (int)sta;
+	if (status == AE_NOT_FOUND) {
+		*sta = ACPI_STA_DEVICE_PRESENT | ACPI_STA_DEVICE_ENABLED |
+		       ACPI_STA_DEVICE_UI      | ACPI_STA_DEVICE_FUNCTIONING;
+		return AE_OK;
 	}
+	return status;
+}
 
-	/*
-	 * According to ACPI spec some device can be present and functional
-	 * even if the parent is not present but functional.
-	 * In such conditions the child device should not inherit the status
-	 * from the parent.
-	 */
-	else
-		STRUCT_TO_INT(device->status) =
-		    ACPI_STA_DEVICE_PRESENT | ACPI_STA_DEVICE_ENABLED |
-		    ACPI_STA_DEVICE_UI      | ACPI_STA_DEVICE_FUNCTIONING;
+int acpi_bus_get_status(struct acpi_device *device)
+{
+	acpi_status status;
+	unsigned long long sta;
+
+	status = acpi_bus_get_status_handle(device->handle, &sta);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	STRUCT_TO_INT(device->status) = (int) sta;
 
 	if (device->status.functional && !device->status.present) {
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] status [%08x]: "
@@ -135,10 +132,8 @@ int acpi_bus_get_status(struct acpi_device *device)
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] status [%08x]\n",
 			  device->pnp.bus_id,
 			  (u32) STRUCT_TO_INT(device->status)));
-
 	return 0;
 }
-
 EXPORT_SYMBOL(acpi_bus_get_status);
 
 void acpi_bus_private_data_handler(acpi_handle handle,
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index bc7a69516dce..670f7f33837e 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -322,6 +322,8 @@ extern void unregister_acpi_bus_notifier(struct notifier_block *nb);
 
 int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device);
 void acpi_bus_data_handler(acpi_handle handle, void *context);
+acpi_status acpi_bus_get_status_handle(acpi_handle handle,
+				       unsigned long long *sta);
 int acpi_bus_get_status(struct acpi_device *device);
 int acpi_bus_get_power(acpi_handle handle, int *state);
 int acpi_bus_set_power(acpi_handle handle, int state);
-- 
cgit v1.2.3


From 7f47fa6c2ff15f5e59cdbb350f86faef6829294a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 13:35:19 -0600
Subject: ACPI: maintain a single list of _HID and _CID IDs

There's no need to treat _HID and _CID differently.  Keeping them in
a single list makes code that uses the IDs a little simpler because it
can just traverse the list rather than checking "do we have a HID?",
"do we have any CIDs?"

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c        | 166 +++++++++++++--------------------------------
 drivers/pnp/pnpacpi/core.c |  16 ++---
 include/acpi/acpi_bus.h    |  10 ++-
 3 files changed, 60 insertions(+), 132 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 53b96e7a64ab..2e8889f62666 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -45,6 +45,7 @@ static int create_modalias(struct acpi_device *acpi_dev, char *modalias,
 {
 	int len;
 	int count;
+	struct acpi_hardware_id *id;
 
 	if (!acpi_dev->flags.hardware_id && !acpi_dev->flags.compatible_ids)
 		return -ENODEV;
@@ -52,33 +53,14 @@ static int create_modalias(struct acpi_device *acpi_dev, char *modalias,
 	len = snprintf(modalias, size, "acpi:");
 	size -= len;
 
-	if (acpi_dev->flags.hardware_id) {
-		count = snprintf(&modalias[len], size, "%s:",
-				 acpi_dev->pnp.hardware_id);
+	list_for_each_entry(id, &acpi_dev->pnp.ids, list) {
+		count = snprintf(&modalias[len], size, "%s:", id->id);
 		if (count < 0 || count >= size)
 			return -EINVAL;
 		len += count;
 		size -= count;
 	}
 
-	if (acpi_dev->flags.compatible_ids) {
-		struct acpica_device_id_list *cid_list;
-		int i;
-
-		cid_list = acpi_dev->pnp.cid_list;
-		for (i = 0; i < cid_list->count; i++) {
-			count = snprintf(&modalias[len], size, "%s:",
-					 cid_list->ids[i].string);
-			if (count < 0 || count >= size) {
-				printk(KERN_ERR PREFIX "%s cid[%i] exceeds event buffer size",
-				       acpi_dev->pnp.device_name, i);
-				break;
-			}
-			len += count;
-			size -= count;
-		}
-	}
-
 	modalias[len] = '\0';
 	return len;
 }
@@ -273,6 +255,7 @@ int acpi_match_device_ids(struct acpi_device *device,
 			  const struct acpi_device_id *ids)
 {
 	const struct acpi_device_id *id;
+	struct acpi_hardware_id *hwid;
 
 	/*
 	 * If the device is not present, it is unnecessary to load device
@@ -281,40 +264,30 @@ int acpi_match_device_ids(struct acpi_device *device,
 	if (!device->status.present)
 		return -ENODEV;
 
-	if (device->flags.hardware_id) {
-		for (id = ids; id->id[0]; id++) {
-			if (!strcmp((char*)id->id, device->pnp.hardware_id))
+	for (id = ids; id->id[0]; id++)
+		list_for_each_entry(hwid, &device->pnp.ids, list)
+			if (!strcmp((char *) id->id, hwid->id))
 				return 0;
-		}
-	}
-
-	if (device->flags.compatible_ids) {
-		struct acpica_device_id_list *cid_list = device->pnp.cid_list;
-		int i;
-
-		for (id = ids; id->id[0]; id++) {
-			/* compare multiple _CID entries against driver ids */
-			for (i = 0; i < cid_list->count; i++) {
-				if (!strcmp((char*)id->id,
-					    cid_list->ids[i].string))
-					return 0;
-			}
-		}
-	}
 
 	return -ENOENT;
 }
 EXPORT_SYMBOL(acpi_match_device_ids);
 
+static void acpi_free_ids(struct acpi_device *device)
+{
+	struct acpi_hardware_id *id, *tmp;
+
+	list_for_each_entry_safe(id, tmp, &device->pnp.ids, list) {
+		kfree(id->id);
+		kfree(id);
+	}
+}
+
 static void acpi_device_release(struct device *dev)
 {
 	struct acpi_device *acpi_dev = to_acpi_device(dev);
 
-	kfree(acpi_dev->pnp.cid_list);
-	if (acpi_dev->flags.hardware_id)
-		kfree(acpi_dev->pnp.hardware_id);
-	if (acpi_dev->flags.unique_id)
-		kfree(acpi_dev->pnp.unique_id);
+	acpi_free_ids(acpi_dev);
 	kfree(acpi_dev);
 }
 
@@ -1028,62 +1001,31 @@ static int acpi_dock_match(struct acpi_device *device)
 	return acpi_get_handle(device->handle, "_DCK", &tmp);
 }
 
-static struct acpica_device_id_list*
-acpi_add_cid(
-	struct acpi_device_info         *info,
-	struct acpica_device_id         *new_cid)
+char *acpi_device_hid(struct acpi_device *device)
 {
-	struct acpica_device_id_list    *cid;
-	char                            *next_id_string;
-	acpi_size                       cid_length;
-	acpi_size                       new_cid_length;
-	u32                             i;
-
-
-	/* Allocate new CID list with room for the new CID */
-
-	if (!new_cid)
-		new_cid_length = info->compatible_id_list.list_size;
-	else if (info->compatible_id_list.list_size)
-		new_cid_length = info->compatible_id_list.list_size +
-			new_cid->length + sizeof(struct acpica_device_id);
-	else
-		new_cid_length = sizeof(struct acpica_device_id_list) + new_cid->length;
-
-	cid = ACPI_ALLOCATE_ZEROED(new_cid_length);
-	if (!cid) {
-		return NULL;
-	}
-
-	cid->list_size = new_cid_length;
-	cid->count = info->compatible_id_list.count;
-	if (new_cid)
-		cid->count++;
-	next_id_string = (char *) cid->ids + (cid->count * sizeof(struct acpica_device_id));
-
-	/* Copy all existing CIDs */
+	struct acpi_hardware_id *hid;
 
-	for (i = 0; i < info->compatible_id_list.count; i++) {
-		cid_length = info->compatible_id_list.ids[i].length;
-		cid->ids[i].string = next_id_string;
-		cid->ids[i].length = cid_length;
-
-		ACPI_MEMCPY(next_id_string, info->compatible_id_list.ids[i].string,
-			cid_length);
-
-		next_id_string += cid_length;
-	}
+	hid = list_first_entry(&device->pnp.ids, struct acpi_hardware_id, list);
+	return hid->id;
+}
+EXPORT_SYMBOL(acpi_device_hid);
 
-	/* Append the new CID */
+static void acpi_add_id(struct acpi_device *device, const char *dev_id)
+{
+	struct acpi_hardware_id *id;
 
-	if (new_cid) {
-		cid->ids[i].string = next_id_string;
-		cid->ids[i].length = new_cid->length;
+	id = kmalloc(sizeof(*id), GFP_KERNEL);
+	if (!id)
+		return;
 
-		ACPI_MEMCPY(next_id_string, new_cid->string, new_cid->length);
+	id->id = kmalloc(strlen(dev_id) + 1, GFP_KERNEL);
+	if (!id->id) {
+		kfree(id);
+		return;
 	}
 
-	return cid;
+	strcpy(id->id, dev_id);
+	list_add_tail(&id->list, &device->pnp.ids);
 }
 
 static void acpi_device_set_id(struct acpi_device *device)
@@ -1094,6 +1036,7 @@ static void acpi_device_set_id(struct acpi_device *device)
 	struct acpica_device_id_list *cid_list = NULL;
 	char *cid_add = NULL;
 	acpi_status status;
+	int i;
 
 	switch (device->device_type) {
 	case ACPI_BUS_TYPE_DEVICE:
@@ -1166,15 +1109,9 @@ static void acpi_device_set_id(struct acpi_device *device)
 		hid = "device";
 
 	if (hid) {
-		device->pnp.hardware_id = ACPI_ALLOCATE_ZEROED(strlen (hid) + 1);
-		if (device->pnp.hardware_id) {
-			strcpy(device->pnp.hardware_id, hid);
-			device->flags.hardware_id = 1;
-		}
+		acpi_add_id(device, hid);
+		device->flags.hardware_id = 1;
 	}
-	if (!device->flags.hardware_id)
-		device->pnp.hardware_id = "";
-
 	if (uid) {
 		device->pnp.unique_id = ACPI_ALLOCATE_ZEROED(strlen (uid) + 1);
 		if (device->pnp.unique_id) {
@@ -1185,24 +1122,12 @@ static void acpi_device_set_id(struct acpi_device *device)
 	if (!device->flags.unique_id)
 		device->pnp.unique_id = "";
 
-	if (cid_list || cid_add) {
-		struct acpica_device_id_list *list;
-
-		if (cid_add) {
-			struct acpica_device_id cid;
-			cid.length = strlen (cid_add) + 1;
-			cid.string = cid_add;
-
-			list = acpi_add_cid(info, &cid);
-		} else {
-			list = acpi_add_cid(info, NULL);
-		}
-
-		if (list) {
-			device->pnp.cid_list = list;
-			if (cid_add)
-				device->flags.compatible_ids = 1;
-		}
+	if (cid_list)
+		for (i = 0; i < cid_list->count; i++)
+			acpi_add_id(device, cid_list->ids[i].string);
+	if (cid_add) {
+		acpi_add_id(device, cid_add);
+		device->flags.compatible_ids = 1;
 	}
 
 	kfree(info);
@@ -1269,6 +1194,7 @@ static int acpi_add_single_object(struct acpi_device **child,
 		return -ENOMEM;
 	}
 
+	INIT_LIST_HEAD(&device->pnp.ids);
 	device->device_type = type;
 	device->handle = handle;
 	device->parent = acpi_bus_get_parent(handle);
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index ff963d4dab46..3a4478f1fc72 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -153,6 +153,7 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
 	acpi_handle temp = NULL;
 	acpi_status status;
 	struct pnp_dev *dev;
+	struct acpi_hardware_id *id;
 
 	/*
 	 * If a PnPacpi device is not present , the device
@@ -193,15 +194,12 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
 	if (dev->capabilities & PNP_CONFIGURABLE)
 		pnpacpi_parse_resource_option_data(dev);
 
-	if (device->flags.compatible_ids) {
-		struct acpica_device_id_list *cid_list = device->pnp.cid_list;
-		int i;
-
-		for (i = 0; i < cid_list->count; i++) {
-			if (!ispnpidacpi(cid_list->ids[i].string))
-				continue;
-			pnp_add_id(dev, cid_list->ids[i].string);
-		}
+	list_for_each_entry(id, &device->pnp.ids, list) {
+		if (!strcmp(id->id, acpi_device_hid(device)))
+			continue;
+		if (!ispnpidacpi(id->id))
+			continue;
+		pnp_add_id(dev, id->id);
 	}
 
 	/* clear out the damaged flags */
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 670f7f33837e..c2c434626edc 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -171,19 +171,23 @@ typedef unsigned long acpi_bus_address;
 typedef char acpi_device_name[40];
 typedef char acpi_device_class[20];
 
+struct acpi_hardware_id {
+	struct list_head list;
+	char *id;
+};
+
 struct acpi_device_pnp {
 	acpi_bus_id bus_id;	/* Object name */
 	acpi_bus_address bus_address;	/* _ADR */
-	char *hardware_id;	/* _HID */
-	struct acpica_device_id_list *cid_list;	/* _CIDs */
 	char *unique_id;	/* _UID */
+	struct list_head ids;		/* _HID and _CIDs */
 	acpi_device_name device_name;	/* Driver-determined */
 	acpi_device_class device_class;	/*        "          */
 };
 
 #define acpi_device_bid(d)	((d)->pnp.bus_id)
 #define acpi_device_adr(d)	((d)->pnp.bus_address)
-#define acpi_device_hid(d)	((d)->pnp.hardware_id)
+char *acpi_device_hid(struct acpi_device *device);
 #define acpi_device_uid(d)	((d)->pnp.unique_id)
 #define acpi_device_name(d)	((d)->pnp.device_name)
 #define acpi_device_class(d)	((d)->pnp.device_class)
-- 
cgit v1.2.3


From b2972f87508a21db7584d11fdb5c97cb7101a788 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 13:35:24 -0600
Subject: ACPI: remove acpi_device.flags.compatible_ids

We now keep a single list of IDs that includes both the _HID and any
_CIDs.  We no longer need to keep track of whether the device has a _CID.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c     | 15 ++++-----------
 include/acpi/acpi_bus.h |  3 +--
 2 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 2e8889f62666..395ae129aae0 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -47,7 +47,7 @@ static int create_modalias(struct acpi_device *acpi_dev, char *modalias,
 	int count;
 	struct acpi_hardware_id *id;
 
-	if (!acpi_dev->flags.hardware_id && !acpi_dev->flags.compatible_ids)
+	if (!acpi_dev->flags.hardware_id)
 		return -ENODEV;
 
 	len = snprintf(modalias, size, "acpi:");
@@ -209,7 +209,7 @@ static int acpi_device_setup_files(struct acpi_device *dev)
 			goto end;
 	}
 
-	if (dev->flags.hardware_id || dev->flags.compatible_ids) {
+	if (dev->flags.hardware_id) {
 		result = device_create_file(&dev->dev, &dev_attr_modalias);
 		if (result)
 			goto end;
@@ -239,7 +239,7 @@ static void acpi_device_remove_files(struct acpi_device *dev)
 	if (ACPI_SUCCESS(status))
 		device_remove_file(&dev->dev, &dev_attr_eject);
 
-	if (dev->flags.hardware_id || dev->flags.compatible_ids)
+	if (dev->flags.hardware_id)
 		device_remove_file(&dev->dev, &dev_attr_modalias);
 
 	if (dev->flags.hardware_id)
@@ -876,11 +876,6 @@ static int acpi_bus_get_flags(struct acpi_device *device)
 	if (ACPI_SUCCESS(status))
 		device->flags.dynamic_status = 1;
 
-	/* Presence of _CID indicates 'compatible_ids' */
-	status = acpi_get_handle(device->handle, "_CID", &temp);
-	if (ACPI_SUCCESS(status))
-		device->flags.compatible_ids = 1;
-
 	/* Presence of _RMV indicates 'removable' */
 	status = acpi_get_handle(device->handle, "_RMV", &temp);
 	if (ACPI_SUCCESS(status))
@@ -1125,10 +1120,8 @@ static void acpi_device_set_id(struct acpi_device *device)
 	if (cid_list)
 		for (i = 0; i < cid_list->count; i++)
 			acpi_add_id(device, cid_list->ids[i].string);
-	if (cid_add) {
+	if (cid_add)
 		acpi_add_id(device, cid_add);
-		device->flags.compatible_ids = 1;
-	}
 
 	kfree(info);
 }
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c2c434626edc..0a970e4ade6f 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -142,7 +142,6 @@ struct acpi_device_status {
 struct acpi_device_flags {
 	u32 dynamic_status:1;
 	u32 hardware_id:1;
-	u32 compatible_ids:1;
 	u32 bus_address:1;
 	u32 unique_id:1;
 	u32 removable:1;
@@ -153,7 +152,7 @@ struct acpi_device_flags {
 	u32 performance_manageable:1;
 	u32 wake_capable:1;	/* Wakeup(_PRW) supported? */
 	u32 force_power_state:1;
-	u32 reserved:19;
+	u32 reserved:20;
 };
 
 /* File System */
-- 
cgit v1.2.3


From 1131b938f0757350f569f8ad5bee737cd02b8e58 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 13:35:29 -0600
Subject: ACPI: remove acpi_device.flags.hardware_id

Every acpi_device has at least one ID (if there's no _HID or _CID, we
give it a synthetic or default ID).  So there's no longer a need to
check whether an ID exists; we can just use it.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c        | 37 +++++++++++++------------------------
 drivers/pnp/pnpacpi/core.c |  3 +--
 include/acpi/acpi_bus.h    |  3 +--
 3 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 395ae129aae0..7e031b90c09c 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -47,9 +47,6 @@ static int create_modalias(struct acpi_device *acpi_dev, char *modalias,
 	int count;
 	struct acpi_hardware_id *id;
 
-	if (!acpi_dev->flags.hardware_id)
-		return -ENODEV;
-
 	len = snprintf(modalias, size, "acpi:");
 	size -= len;
 
@@ -203,17 +200,13 @@ static int acpi_device_setup_files(struct acpi_device *dev)
 			goto end;
 	}
 
-	if (dev->flags.hardware_id) {
-		result = device_create_file(&dev->dev, &dev_attr_hid);
-		if (result)
-			goto end;
-	}
+	result = device_create_file(&dev->dev, &dev_attr_hid);
+	if (result)
+		goto end;
 
-	if (dev->flags.hardware_id) {
-		result = device_create_file(&dev->dev, &dev_attr_modalias);
-		if (result)
-			goto end;
-	}
+	result = device_create_file(&dev->dev, &dev_attr_modalias);
+	if (result)
+		goto end;
 
         /*
          * If device has _EJ0, 'eject' file is created that is used to trigger
@@ -239,11 +232,8 @@ static void acpi_device_remove_files(struct acpi_device *dev)
 	if (ACPI_SUCCESS(status))
 		device_remove_file(&dev->dev, &dev_attr_eject);
 
-	if (dev->flags.hardware_id)
-		device_remove_file(&dev->dev, &dev_attr_modalias);
-
-	if (dev->flags.hardware_id)
-		device_remove_file(&dev->dev, &dev_attr_hid);
+	device_remove_file(&dev->dev, &dev_attr_modalias);
+	device_remove_file(&dev->dev, &dev_attr_hid);
 	if (dev->handle)
 		device_remove_file(&dev->dev, &dev_attr_path);
 }
@@ -474,8 +464,9 @@ static int acpi_device_register(struct acpi_device *device)
 	 * If failed, create one and link it into acpi_bus_id_list
 	 */
 	list_for_each_entry(acpi_device_bus_id, &acpi_bus_id_list, node) {
-		if (!strcmp(acpi_device_bus_id->bus_id, device->flags.hardware_id ? acpi_device_hid(device) : "device")) {
-			acpi_device_bus_id->instance_no ++;
+		if (!strcmp(acpi_device_bus_id->bus_id,
+			    acpi_device_hid(device))) {
+			acpi_device_bus_id->instance_no++;
 			found = 1;
 			kfree(new_bus_id);
 			break;
@@ -483,7 +474,7 @@ static int acpi_device_register(struct acpi_device *device)
 	}
 	if (!found) {
 		acpi_device_bus_id = new_bus_id;
-		strcpy(acpi_device_bus_id->bus_id, device->flags.hardware_id ? acpi_device_hid(device) : "device");
+		strcpy(acpi_device_bus_id->bus_id, acpi_device_hid(device));
 		acpi_device_bus_id->instance_no = 0;
 		list_add_tail(&acpi_device_bus_id->node, &acpi_bus_id_list);
 	}
@@ -1103,10 +1094,8 @@ static void acpi_device_set_id(struct acpi_device *device)
 	if (!hid && !cid_list && !cid_add)
 		hid = "device";
 
-	if (hid) {
+	if (hid)
 		acpi_add_id(device, hid);
-		device->flags.hardware_id = 1;
-	}
 	if (uid) {
 		device->pnp.unique_id = ACPI_ALLOCATE_ZEROED(strlen (uid) + 1);
 		if (device->pnp.unique_id) {
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index 3a4478f1fc72..83b8b5ac49c9 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -230,8 +230,7 @@ static int __init acpi_pnp_match(struct device *dev, void *_pnp)
 	struct pnp_dev *pnp = _pnp;
 
 	/* true means it matched */
-	return acpi->flags.hardware_id
-	    && !acpi_get_physical_device(acpi->handle)
+	return !acpi_get_physical_device(acpi->handle)
 	    && compare_pnp_id(pnp->id, acpi_device_hid(acpi));
 }
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 0a970e4ade6f..6599b8cab45a 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -141,7 +141,6 @@ struct acpi_device_status {
 
 struct acpi_device_flags {
 	u32 dynamic_status:1;
-	u32 hardware_id:1;
 	u32 bus_address:1;
 	u32 unique_id:1;
 	u32 removable:1;
@@ -152,7 +151,7 @@ struct acpi_device_flags {
 	u32 performance_manageable:1;
 	u32 wake_capable:1;	/* Wakeup(_PRW) supported? */
 	u32 force_power_state:1;
-	u32 reserved:20;
+	u32 reserved:21;
 };
 
 /* File System */
-- 
cgit v1.2.3


From 6622d8cee73a26bce958484065c8f0e704911a62 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 13:35:35 -0600
Subject: ACPI: remove acpi_device_uid() and related stuff

Nobody uses acpi_device_uid(), so this patch removes it.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c     | 18 ------------------
 include/acpi/acpi_bus.h |  4 +---
 2 files changed, 1 insertion(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 7e031b90c09c..da11b5379dc8 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1018,7 +1018,6 @@ static void acpi_device_set_id(struct acpi_device *device)
 {
 	struct acpi_device_info *info = NULL;
 	char *hid = NULL;
-	char *uid = NULL;
 	struct acpica_device_id_list *cid_list = NULL;
 	char *cid_add = NULL;
 	acpi_status status;
@@ -1045,8 +1044,6 @@ static void acpi_device_set_id(struct acpi_device *device)
 
 		if (info->valid & ACPI_VALID_HID)
 			hid = info->hardware_id.string;
-		if (info->valid & ACPI_VALID_UID)
-			uid = info->unique_id.string;
 		if (info->valid & ACPI_VALID_CID)
 			cid_list = &info->compatible_id_list;
 		if (info->valid & ACPI_VALID_ADR) {
@@ -1096,16 +1093,6 @@ static void acpi_device_set_id(struct acpi_device *device)
 
 	if (hid)
 		acpi_add_id(device, hid);
-	if (uid) {
-		device->pnp.unique_id = ACPI_ALLOCATE_ZEROED(strlen (uid) + 1);
-		if (device->pnp.unique_id) {
-			strcpy(device->pnp.unique_id, uid);
-			device->flags.unique_id = 1;
-		}
-	}
-	if (!device->flags.unique_id)
-		device->pnp.unique_id = "";
-
 	if (cid_list)
 		for (i = 0; i < cid_list->count; i++)
 			acpi_add_id(device, cid_list->ids[i].string);
@@ -1200,11 +1187,6 @@ static int acpi_add_single_object(struct acpi_device **child,
 	 * -----------------
 	 * TBD: Synch with Core's enumeration/initialization process.
 	 */
-
-	/*
-	 * Hardware ID, Unique ID, & Bus Address
-	 * -------------------------------------
-	 */
 	acpi_device_set_id(device);
 
 	/*
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 6599b8cab45a..3cd9ccdcbd8f 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -142,7 +142,6 @@ struct acpi_device_status {
 struct acpi_device_flags {
 	u32 dynamic_status:1;
 	u32 bus_address:1;
-	u32 unique_id:1;
 	u32 removable:1;
 	u32 ejectable:1;
 	u32 lockable:1;
@@ -151,7 +150,7 @@ struct acpi_device_flags {
 	u32 performance_manageable:1;
 	u32 wake_capable:1;	/* Wakeup(_PRW) supported? */
 	u32 force_power_state:1;
-	u32 reserved:21;
+	u32 reserved:22;
 };
 
 /* File System */
@@ -186,7 +185,6 @@ struct acpi_device_pnp {
 #define acpi_device_bid(d)	((d)->pnp.bus_id)
 #define acpi_device_adr(d)	((d)->pnp.bus_address)
 char *acpi_device_hid(struct acpi_device *device);
-#define acpi_device_uid(d)	((d)->pnp.unique_id)
 #define acpi_device_name(d)	((d)->pnp.device_name)
 #define acpi_device_class(d)	((d)->pnp.device_class)
 
-- 
cgit v1.2.3


From a72bfd4dea053bb8e2233902c3f1893ef5485802 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Sat, 26 Sep 2009 00:07:46 +0200
Subject: writeback: pass in super_block to bdi_start_writeback()

Sometimes we only want to write pages from a specific super_block,
so allow that to be passed in.

This fixes a problem with commit 56a131dcf7ed36c3c6e36bea448b674ea85ed5bb
causing writeback on all super_blocks on a bdi, where we only really
want to sync a specific sb from writeback_inodes_sb().

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c           | 6 ++++--
 include/linux/backing-dev.h | 3 ++-
 mm/page-writeback.c         | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index fb61178c86e3..9d5360c4c2af 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -250,9 +250,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
  *   completion. Caller need not hold sb s_umount semaphore.
  *
  */
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
+void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
+			 long nr_pages)
 {
 	struct wb_writeback_args args = {
+		.sb		= sb,
 		.sync_mode	= WB_SYNC_NONE,
 		.nr_pages	= nr_pages,
 		.range_cyclic	= 1,
@@ -1206,7 +1208,7 @@ void writeback_inodes_sb(struct super_block *sb)
 	nr_to_write = nr_dirty + nr_unstable +
 			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
 
-	bdi_start_writeback(sb->s_bdi, nr_to_write);
+	bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0ee33c2e6129..b449e738533a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -101,7 +101,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
+void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
+				long nr_pages);
 int bdi_writeback_task(struct bdi_writeback *wb);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 69b5fbabc8bd..a3b14090b1fb 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -596,7 +596,7 @@ static void balance_dirty_pages(struct address_space *mapping,
 	    (!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
 			       + global_page_state(NR_UNSTABLE_NFS))
 					  > background_thresh)))
-		bdi_start_writeback(bdi, 0);
+		bdi_start_writeback(bdi, NULL, 0);
 }
 
 void set_page_dirty_balance(struct page *page, int page_mkwrite)
-- 
cgit v1.2.3


From 1d1764c39815db55e10b2d78732db4d6dd9d6039 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 26 Sep 2009 19:37:22 +0400
Subject: headers: kref.h redux

* remove asm/atomic.h inclusion from kref.h -- not needed, linux/types.h
  is enough for atomic_t
* remove linux/kref.h inclusion from files which do not need it.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/net/irda/kingsun-sir.c           | 1 -
 drivers/net/irda/ks959-sir.c             | 1 -
 drivers/net/irda/ksdazzle-sir.c          | 1 -
 drivers/net/irda/mcs7780.c               | 1 -
 drivers/scsi/pmcraid.h                   | 1 -
 drivers/usb/misc/sisusbvga/sisusb_init.c | 1 -
 include/linux/ipc.h                      | 2 --
 include/linux/kref.h                     | 1 -
 include/linux/nfs_fs.h                   | 1 -
 9 files changed, 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/irda/kingsun-sir.c b/drivers/net/irda/kingsun-sir.c
index 2fc30b449eea..cb90d640007a 100644
--- a/drivers/net/irda/kingsun-sir.c
+++ b/drivers/net/irda/kingsun-sir.c
@@ -66,7 +66,6 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/kref.h>
 #include <linux/usb.h>
 #include <linux/device.h>
 #include <linux/crc32.h>
diff --git a/drivers/net/irda/ks959-sir.c b/drivers/net/irda/ks959-sir.c
index f4d13fc51cbc..b54d3b48045e 100644
--- a/drivers/net/irda/ks959-sir.c
+++ b/drivers/net/irda/ks959-sir.c
@@ -118,7 +118,6 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/kref.h>
 #include <linux/usb.h>
 #include <linux/device.h>
 #include <linux/crc32.h>
diff --git a/drivers/net/irda/ksdazzle-sir.c b/drivers/net/irda/ksdazzle-sir.c
index 5f9d73353972..8d713ebac15b 100644
--- a/drivers/net/irda/ksdazzle-sir.c
+++ b/drivers/net/irda/ksdazzle-sir.c
@@ -82,7 +82,6 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/kref.h>
 #include <linux/usb.h>
 #include <linux/device.h>
 #include <linux/crc32.h>
diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index b3d30bcb88e7..c0e0bb9401d3 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -50,7 +50,6 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/kref.h>
 #include <linux/usb.h>
 #include <linux/device.h>
 #include <linux/crc32.h>
diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h
index 614b3a764fed..3441b3f90827 100644
--- a/drivers/scsi/pmcraid.h
+++ b/drivers/scsi/pmcraid.h
@@ -26,7 +26,6 @@
 #include <linux/completion.h>
 #include <linux/list.h>
 #include <scsi/scsi.h>
-#include <linux/kref.h>
 #include <scsi/scsi_cmnd.h>
 #include <linux/cdev.h>
 #include <net/netlink.h>
diff --git a/drivers/usb/misc/sisusbvga/sisusb_init.c b/drivers/usb/misc/sisusbvga/sisusb_init.c
index 273de5d0934e..0ab990744830 100644
--- a/drivers/usb/misc/sisusbvga/sisusb_init.c
+++ b/drivers/usb/misc/sisusbvga/sisusb_init.c
@@ -43,7 +43,6 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/kref.h>
 
 #include "sisusb.h"
 
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index b8826107b518..3b1594d662b0 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -78,8 +78,6 @@ struct ipc_kludge {
 #define IPCCALL(version,op)	((version)<<16 | (op))
 
 #ifdef __KERNEL__
-
-#include <linux/kref.h>
 #include <linux/spinlock.h>
 
 #define IPCMNI 32768  /* <= MAX_INT limit for ipc arrays (including sysctl changes) */
diff --git a/include/linux/kref.h b/include/linux/kref.h
index 0cef6badd6fb..b0cb0ebad9e6 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -16,7 +16,6 @@
 #define _KREF_H_
 
 #include <linux/types.h>
-#include <asm/atomic.h>
 
 struct kref {
 	atomic_t refcount;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f6b90240dd41..d09db1bc9083 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -40,7 +40,6 @@
 #ifdef __KERNEL__
 
 #include <linux/in.h>
-#include <linux/kref.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/rbtree.h>
-- 
cgit v1.2.3


From d1f8297a96b0d70f17704296a6666468f2087ce6 Mon Sep 17 00:00:00 2001
From: Sascha Hlusiak <contact@saschahlusiak.de>
Date: Sat, 26 Sep 2009 20:28:07 -0700
Subject: Revert "sit: stateless autoconf for isatap"

This reverts commit 645069299a1c7358cf7330afe293f07552f11a5d.

While the code does not actually break anything, it does not completely follow
RFC5214 yet. After talking back with Fred L. Templin, I agree that completing the
ISATAP specific RS/RA code, would pollute the kernel a lot with code that is better
implemented in userspace.

The kernel should not send RS packages for ISATAP at all.

Signed-off-by: Sascha Hlusiak <contact@saschahlusiak.de>
Acked-by: Fred L. Templin <Fred.L.Templin@boeing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tunnel.h |  2 +-
 include/net/ipip.h        |  7 ------
 net/ipv6/ndisc.c          |  1 -
 net/ipv6/sit.c            | 58 -----------------------------------------------
 4 files changed, 1 insertion(+), 67 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index 5eb9b0f857e0..5a9aae4adb44 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -44,7 +44,7 @@ struct ip_tunnel_prl {
 	__u16			flags;
 	__u16			__reserved;
 	__u32			datalen;
-	__u32			rs_delay;
+	__u32			__reserved2;
 	/* data follows */
 };
 
diff --git a/include/net/ipip.h b/include/net/ipip.h
index 76e3ea6e2fe5..87acf8f3a155 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -27,18 +27,11 @@ struct ip_tunnel
 	unsigned int			prl_count;	/* # of entries in PRL */
 };
 
-/* ISATAP: default interval between RS in secondy */
-#define IPTUNNEL_RS_DEFAULT_DELAY	(900)
-
 struct ip_tunnel_prl_entry
 {
 	struct ip_tunnel_prl_entry	*next;
 	__be32				addr;
 	u16				flags;
-	unsigned long			rs_delay;
-	struct timer_list		rs_timer;
-	struct ip_tunnel		*tunnel;
-	spinlock_t			lock;
 };
 
 #define IPTUNNEL_XMIT() do {						\
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 498b9b0b0fad..f74e4e2cdd06 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -658,7 +658,6 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
 		     &icmp6h, NULL,
 		     send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
 }
-EXPORT_SYMBOL(ndisc_send_rs);
 
 
 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index fcb539628847..d65e0c496cc0 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -15,7 +15,6 @@
  * Roger Venning <r.venning@telstra.com>:	6to4 support
  * Nate Thompson <nate@thebog.net>:		6to4 support
  * Fred Templin <fred.l.templin@boeing.com>:	isatap support
- * Sascha Hlusiak <mail@saschahlusiak.de>:	stateless autoconf for isatap
  */
 
 #include <linux/module.h>
@@ -223,44 +222,6 @@ failed:
 	return NULL;
 }
 
-static void ipip6_tunnel_rs_timer(unsigned long data)
-{
-	struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *) data;
-	struct inet6_dev *ifp;
-	struct inet6_ifaddr *addr;
-
-	spin_lock(&p->lock);
-	ifp = __in6_dev_get(p->tunnel->dev);
-
-	read_lock_bh(&ifp->lock);
-	for (addr = ifp->addr_list; addr; addr = addr->if_next) {
-		struct in6_addr rtr;
-
-		if (!(ipv6_addr_type(&addr->addr) & IPV6_ADDR_LINKLOCAL))
-			continue;
-
-		/* Send RS to guessed linklocal address of router
-		 *
-		 * Better: send to ff02::2 encapsuled in unicast directly
-		 * to router-v4 instead of guessing the v6 address.
-		 *
-		 * Cisco/Windows seem to not set the u/l bit correctly,
-		 * so we won't guess right.
-		 */
-		ipv6_addr_set(&rtr,  htonl(0xFE800000), 0, 0, 0);
-		if (!__ipv6_isatap_ifid(rtr.s6_addr + 8,
-					p->addr)) {
-			ndisc_send_rs(p->tunnel->dev, &addr->addr, &rtr);
-		}
-	}
-	read_unlock_bh(&ifp->lock);
-
-	mod_timer(&p->rs_timer, jiffies + HZ * p->rs_delay);
-	spin_unlock(&p->lock);
-
-	return;
-}
-
 static struct ip_tunnel_prl_entry *
 __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
 {
@@ -319,7 +280,6 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
 			continue;
 		kp[c].addr = prl->addr;
 		kp[c].flags = prl->flags;
-		kp[c].rs_delay = prl->rs_delay;
 		c++;
 		if (kprl.addr != htonl(INADDR_ANY))
 			break;
@@ -369,23 +329,11 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
 	}
 
 	p->next = t->prl;
-	p->tunnel = t;
 	t->prl = p;
 	t->prl_count++;
-
-	spin_lock_init(&p->lock);
-	setup_timer(&p->rs_timer, ipip6_tunnel_rs_timer, (unsigned long) p);
 update:
 	p->addr = a->addr;
 	p->flags = a->flags;
-	p->rs_delay = a->rs_delay;
-	if (p->rs_delay == 0)
-		p->rs_delay = IPTUNNEL_RS_DEFAULT_DELAY;
-	spin_lock(&p->lock);
-	del_timer(&p->rs_timer);
-	if (p->flags & PRL_DEFAULT)
-		mod_timer(&p->rs_timer, jiffies + 1);
-	spin_unlock(&p->lock);
 out:
 	write_unlock(&ipip6_lock);
 	return err;
@@ -404,9 +352,6 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 			if ((*p)->addr == a->addr) {
 				x = *p;
 				*p = x->next;
-				spin_lock(&x->lock);
-				del_timer(&x->rs_timer);
-				spin_unlock(&x->lock);
 				kfree(x);
 				t->prl_count--;
 				goto out;
@@ -417,9 +362,6 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 		while (t->prl) {
 			x = t->prl;
 			t->prl = t->prl->next;
-			spin_lock(&x->lock);
-			del_timer(&x->rs_timer);
-			spin_unlock(&x->lock);
 			kfree(x);
 			t->prl_count--;
 		}
-- 
cgit v1.2.3


From f0f37e2f77731b3473fa6bd5ee53255d9a9cdb40 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 27 Sep 2009 22:29:37 +0400
Subject: const: mark struct vm_struct_operations

* mark struct vm_area_struct::vm_ops as const
* mark vm_ops in AGP code

But leave TTM code alone, something is fishy there with global vm_ops
being used.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/ia32/binfmt_elf32.c                |  4 ++--
 arch/powerpc/platforms/cell/spufs/file.c     | 14 +++++++-------
 arch/x86/pci/i386.c                          |  2 +-
 drivers/char/agp/agp.h                       |  2 +-
 drivers/char/agp/alpha-agp.c                 |  2 +-
 drivers/char/mem.c                           |  2 +-
 drivers/char/mspec.c                         |  2 +-
 drivers/gpu/drm/drm_vm.c                     |  8 ++++----
 drivers/gpu/drm/radeon/radeon_ttm.c          |  2 +-
 drivers/gpu/drm/ttm/ttm_bo_vm.c              |  2 +-
 drivers/ieee1394/dma.c                       |  2 +-
 drivers/infiniband/hw/ehca/ehca_uverbs.c     |  2 +-
 drivers/infiniband/hw/ipath/ipath_file_ops.c |  2 +-
 drivers/infiniband/hw/ipath/ipath_mmap.c     |  2 +-
 drivers/media/video/cafe_ccic.c              |  2 +-
 drivers/media/video/et61x251/et61x251_core.c |  2 +-
 drivers/media/video/gspca/gspca.c            |  2 +-
 drivers/media/video/meye.c                   |  2 +-
 drivers/media/video/sn9c102/sn9c102_core.c   |  2 +-
 drivers/media/video/stk-webcam.c             |  2 +-
 drivers/media/video/uvc/uvc_v4l2.c           |  2 +-
 drivers/media/video/videobuf-dma-contig.c    |  2 +-
 drivers/media/video/videobuf-dma-sg.c        |  2 +-
 drivers/media/video/videobuf-vmalloc.c       |  2 +-
 drivers/media/video/vino.c                   |  2 +-
 drivers/media/video/zc0301/zc0301_core.c     |  2 +-
 drivers/media/video/zoran/zoran_driver.c     |  2 +-
 drivers/misc/sgi-gru/grufile.c               |  2 +-
 drivers/misc/sgi-gru/grutables.h             |  2 +-
 drivers/scsi/sg.c                            |  2 +-
 drivers/uio/uio.c                            |  2 +-
 drivers/usb/mon/mon_bin.c                    |  2 +-
 drivers/video/fb_defio.c                     |  2 +-
 drivers/video/omap/dispc.c                   |  2 +-
 fs/btrfs/file.c                              |  2 +-
 fs/ext4/file.c                               |  2 +-
 fs/fuse/file.c                               |  2 +-
 fs/gfs2/file.c                               |  2 +-
 fs/ncpfs/mmap.c                              |  2 +-
 fs/nfs/file.c                                |  4 ++--
 fs/nilfs2/file.c                             |  2 +-
 fs/ocfs2/mmap.c                              |  2 +-
 fs/sysfs/bin.c                               |  4 ++--
 fs/ubifs/file.c                              |  2 +-
 fs/xfs/linux-2.6/xfs_file.c                  |  4 ++--
 include/linux/agp_backend.h                  |  2 +-
 include/linux/hugetlb.h                      |  2 +-
 include/linux/mm_types.h                     |  2 +-
 include/linux/ramfs.h                        |  2 +-
 ipc/shm.c                                    |  4 ++--
 kernel/perf_event.c                          |  2 +-
 kernel/relay.c                               |  2 +-
 mm/filemap.c                                 |  2 +-
 mm/filemap_xip.c                             |  2 +-
 mm/hugetlb.c                                 |  2 +-
 mm/mmap.c                                    |  2 +-
 mm/nommu.c                                   |  2 +-
 mm/shmem.c                                   |  4 ++--
 net/packet/af_packet.c                       |  2 +-
 sound/core/pcm_native.c                      |  8 ++++----
 sound/usb/usx2y/us122l.c                     |  2 +-
 sound/usb/usx2y/usX2Yhwdep.c                 |  2 +-
 sound/usb/usx2y/usx2yhwdeppcm.c              |  2 +-
 virt/kvm/kvm_main.c                          |  4 ++--
 64 files changed, 83 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index f92bdaac8976..c69552bf893e 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -69,11 +69,11 @@ ia32_install_gate_page (struct vm_area_struct *vma, struct vm_fault *vmf)
 }
 
 
-static struct vm_operations_struct ia32_shared_page_vm_ops = {
+static const struct vm_operations_struct ia32_shared_page_vm_ops = {
 	.fault = ia32_install_shared_page
 };
 
-static struct vm_operations_struct ia32_gate_page_vm_ops = {
+static const struct vm_operations_struct ia32_gate_page_vm_ops = {
 	.fault = ia32_install_gate_page
 };
 
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 8f079b865ad0..961309446170 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -309,7 +309,7 @@ static int spufs_mem_mmap_access(struct vm_area_struct *vma,
 	return len;
 }
 
-static struct vm_operations_struct spufs_mem_mmap_vmops = {
+static const struct vm_operations_struct spufs_mem_mmap_vmops = {
 	.fault = spufs_mem_mmap_fault,
 	.access = spufs_mem_mmap_access,
 };
@@ -436,7 +436,7 @@ static int spufs_cntl_mmap_fault(struct vm_area_struct *vma,
 	return spufs_ps_fault(vma, vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
 }
 
-static struct vm_operations_struct spufs_cntl_mmap_vmops = {
+static const struct vm_operations_struct spufs_cntl_mmap_vmops = {
 	.fault = spufs_cntl_mmap_fault,
 };
 
@@ -1143,7 +1143,7 @@ spufs_signal1_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 #endif
 }
 
-static struct vm_operations_struct spufs_signal1_mmap_vmops = {
+static const struct vm_operations_struct spufs_signal1_mmap_vmops = {
 	.fault = spufs_signal1_mmap_fault,
 };
 
@@ -1279,7 +1279,7 @@ spufs_signal2_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 #endif
 }
 
-static struct vm_operations_struct spufs_signal2_mmap_vmops = {
+static const struct vm_operations_struct spufs_signal2_mmap_vmops = {
 	.fault = spufs_signal2_mmap_fault,
 };
 
@@ -1397,7 +1397,7 @@ spufs_mss_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
 }
 
-static struct vm_operations_struct spufs_mss_mmap_vmops = {
+static const struct vm_operations_struct spufs_mss_mmap_vmops = {
 	.fault = spufs_mss_mmap_fault,
 };
 
@@ -1458,7 +1458,7 @@ spufs_psmap_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_PS_MAP_SIZE);
 }
 
-static struct vm_operations_struct spufs_psmap_mmap_vmops = {
+static const struct vm_operations_struct spufs_psmap_mmap_vmops = {
 	.fault = spufs_psmap_mmap_fault,
 };
 
@@ -1517,7 +1517,7 @@ spufs_mfc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return spufs_ps_fault(vma, vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
 }
 
-static struct vm_operations_struct spufs_mfc_mmap_vmops = {
+static const struct vm_operations_struct spufs_mfc_mmap_vmops = {
 	.fault = spufs_mfc_mmap_fault,
 };
 
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 52e62e57fedd..b22d13b0c71d 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -266,7 +266,7 @@ void pcibios_set_master(struct pci_dev *dev)
 	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
 }
 
-static struct vm_operations_struct pci_mmap_ops = {
+static const struct vm_operations_struct pci_mmap_ops = {
 	.access = generic_access_phys,
 };
 
diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index d6f36c004d9b..870f12cfed93 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -131,7 +131,7 @@ struct agp_bridge_driver {
 struct agp_bridge_data {
 	const struct agp_version *version;
 	const struct agp_bridge_driver *driver;
-	struct vm_operations_struct *vm_ops;
+	const struct vm_operations_struct *vm_ops;
 	void *previous_size;
 	void *current_size;
 	void *dev_private_data;
diff --git a/drivers/char/agp/alpha-agp.c b/drivers/char/agp/alpha-agp.c
index 5ea4da8e9954..dd84af4d4f7e 100644
--- a/drivers/char/agp/alpha-agp.c
+++ b/drivers/char/agp/alpha-agp.c
@@ -40,7 +40,7 @@ static struct aper_size_info_fixed alpha_core_agp_sizes[] =
 	{ 0, 0, 0 }, /* filled in by alpha_core_agp_setup */
 };
 
-struct vm_operations_struct alpha_core_agp_vm_ops = {
+static const struct vm_operations_struct alpha_core_agp_vm_ops = {
 	.fault = alpha_core_agp_vm_fault,
 };
 
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 6c8b65d069e5..a074fceb67d3 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -301,7 +301,7 @@ static inline int private_mapping_ok(struct vm_area_struct *vma)
 }
 #endif
 
-static struct vm_operations_struct mmap_mem_ops = {
+static const struct vm_operations_struct mmap_mem_ops = {
 #ifdef CONFIG_HAVE_IOREMAP_PROT
 	.access = generic_access_phys
 #endif
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index 30f095a8c2d4..1997270bb6f4 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -239,7 +239,7 @@ mspec_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return VM_FAULT_NOPAGE;
 }
 
-static struct vm_operations_struct mspec_vm_ops = {
+static const struct vm_operations_struct mspec_vm_ops = {
 	.open = mspec_open,
 	.close = mspec_close,
 	.fault = mspec_fault,
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index 7e1fbe5d4779..4ac900f4647f 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -369,28 +369,28 @@ static int drm_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 }
 
 /** AGP virtual memory operations */
-static struct vm_operations_struct drm_vm_ops = {
+static const struct vm_operations_struct drm_vm_ops = {
 	.fault = drm_vm_fault,
 	.open = drm_vm_open,
 	.close = drm_vm_close,
 };
 
 /** Shared virtual memory operations */
-static struct vm_operations_struct drm_vm_shm_ops = {
+static const struct vm_operations_struct drm_vm_shm_ops = {
 	.fault = drm_vm_shm_fault,
 	.open = drm_vm_open,
 	.close = drm_vm_shm_close,
 };
 
 /** DMA virtual memory operations */
-static struct vm_operations_struct drm_vm_dma_ops = {
+static const struct vm_operations_struct drm_vm_dma_ops = {
 	.fault = drm_vm_dma_fault,
 	.open = drm_vm_open,
 	.close = drm_vm_close,
 };
 
 /** Scatter-gather virtual memory operations */
-static struct vm_operations_struct drm_vm_sg_ops = {
+static const struct vm_operations_struct drm_vm_sg_ops = {
 	.fault = drm_vm_sg_fault,
 	.open = drm_vm_open,
 	.close = drm_vm_close,
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index acd889c94549..5b1cf04a011a 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -530,7 +530,7 @@ void radeon_ttm_fini(struct radeon_device *rdev)
 }
 
 static struct vm_operations_struct radeon_ttm_vm_ops;
-static struct vm_operations_struct *ttm_vm_ops = NULL;
+static const struct vm_operations_struct *ttm_vm_ops = NULL;
 
 static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 33de7637c0c6..1c040d040338 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -228,7 +228,7 @@ static void ttm_bo_vm_close(struct vm_area_struct *vma)
 	vma->vm_private_data = NULL;
 }
 
-static struct vm_operations_struct ttm_bo_vm_ops = {
+static const struct vm_operations_struct ttm_bo_vm_ops = {
 	.fault = ttm_bo_vm_fault,
 	.open = ttm_bo_vm_open,
 	.close = ttm_bo_vm_close
diff --git a/drivers/ieee1394/dma.c b/drivers/ieee1394/dma.c
index 1aba8c13fe8f..8e7e3344c4b3 100644
--- a/drivers/ieee1394/dma.c
+++ b/drivers/ieee1394/dma.c
@@ -247,7 +247,7 @@ static int dma_region_pagefault(struct vm_area_struct *vma,
 	return 0;
 }
 
-static struct vm_operations_struct dma_region_vm_ops = {
+static const struct vm_operations_struct dma_region_vm_ops = {
 	.fault = dma_region_pagefault,
 };
 
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 3cb688d29131..f1565cae8ec6 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -95,7 +95,7 @@ static void ehca_mm_close(struct vm_area_struct *vma)
 		     vma->vm_start, vma->vm_end, *count);
 }
 
-static struct vm_operations_struct vm_ops = {
+static const struct vm_operations_struct vm_ops = {
 	.open =	ehca_mm_open,
 	.close = ehca_mm_close,
 };
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 38a287006612..40dbe54056c7 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1151,7 +1151,7 @@ static int ipath_file_vma_fault(struct vm_area_struct *vma,
 	return 0;
 }
 
-static struct vm_operations_struct ipath_file_vm_ops = {
+static const struct vm_operations_struct ipath_file_vm_ops = {
 	.fault = ipath_file_vma_fault,
 };
 
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
index fa830e22002f..b28865faf435 100644
--- a/drivers/infiniband/hw/ipath/ipath_mmap.c
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -74,7 +74,7 @@ static void ipath_vma_close(struct vm_area_struct *vma)
 	kref_put(&ip->ref, ipath_release_mmap_info);
 }
 
-static struct vm_operations_struct ipath_vm_ops = {
+static const struct vm_operations_struct ipath_vm_ops = {
 	.open =     ipath_vma_open,
 	.close =    ipath_vma_close,
 };
diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c
index 657c481d255c..10230cb3d210 100644
--- a/drivers/media/video/cafe_ccic.c
+++ b/drivers/media/video/cafe_ccic.c
@@ -1325,7 +1325,7 @@ static void cafe_v4l_vm_close(struct vm_area_struct *vma)
 	mutex_unlock(&sbuf->cam->s_mutex);
 }
 
-static struct vm_operations_struct cafe_v4l_vm_ops = {
+static const struct vm_operations_struct cafe_v4l_vm_ops = {
 	.open = cafe_v4l_vm_open,
 	.close = cafe_v4l_vm_close
 };
diff --git a/drivers/media/video/et61x251/et61x251_core.c b/drivers/media/video/et61x251/et61x251_core.c
index 74092f436be6..88987a57cf7b 100644
--- a/drivers/media/video/et61x251/et61x251_core.c
+++ b/drivers/media/video/et61x251/et61x251_core.c
@@ -1496,7 +1496,7 @@ static void et61x251_vm_close(struct vm_area_struct* vma)
 }
 
 
-static struct vm_operations_struct et61x251_vm_ops = {
+static const struct vm_operations_struct et61x251_vm_ops = {
 	.open = et61x251_vm_open,
 	.close = et61x251_vm_close,
 };
diff --git a/drivers/media/video/gspca/gspca.c b/drivers/media/video/gspca/gspca.c
index cf6540da1e42..23d3fb776918 100644
--- a/drivers/media/video/gspca/gspca.c
+++ b/drivers/media/video/gspca/gspca.c
@@ -99,7 +99,7 @@ static void gspca_vm_close(struct vm_area_struct *vma)
 		frame->v4l2_buf.flags &= ~V4L2_BUF_FLAG_MAPPED;
 }
 
-static struct vm_operations_struct gspca_vm_ops = {
+static const struct vm_operations_struct gspca_vm_ops = {
 	.open		= gspca_vm_open,
 	.close		= gspca_vm_close,
 };
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index d0765bed79c9..4b1bc05a462c 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -1589,7 +1589,7 @@ static void meye_vm_close(struct vm_area_struct *vma)
 	meye.vma_use_count[idx]--;
 }
 
-static struct vm_operations_struct meye_vm_ops = {
+static const struct vm_operations_struct meye_vm_ops = {
 	.open		= meye_vm_open,
 	.close		= meye_vm_close,
 };
diff --git a/drivers/media/video/sn9c102/sn9c102_core.c b/drivers/media/video/sn9c102/sn9c102_core.c
index 9d84c94e8a40..4a7711c3e745 100644
--- a/drivers/media/video/sn9c102/sn9c102_core.c
+++ b/drivers/media/video/sn9c102/sn9c102_core.c
@@ -2077,7 +2077,7 @@ static void sn9c102_vm_close(struct vm_area_struct* vma)
 }
 
 
-static struct vm_operations_struct sn9c102_vm_ops = {
+static const struct vm_operations_struct sn9c102_vm_ops = {
 	.open = sn9c102_vm_open,
 	.close = sn9c102_vm_close,
 };
diff --git a/drivers/media/video/stk-webcam.c b/drivers/media/video/stk-webcam.c
index 0b996ea4134e..6b41865f42bd 100644
--- a/drivers/media/video/stk-webcam.c
+++ b/drivers/media/video/stk-webcam.c
@@ -790,7 +790,7 @@ static void stk_v4l_vm_close(struct vm_area_struct *vma)
 	if (sbuf->mapcount == 0)
 		sbuf->v4lbuf.flags &= ~V4L2_BUF_FLAG_MAPPED;
 }
-static struct vm_operations_struct stk_v4l_vm_ops = {
+static const struct vm_operations_struct stk_v4l_vm_ops = {
 	.open = stk_v4l_vm_open,
 	.close = stk_v4l_vm_close
 };
diff --git a/drivers/media/video/uvc/uvc_v4l2.c b/drivers/media/video/uvc/uvc_v4l2.c
index 9e7351569b5d..a2bdd806efab 100644
--- a/drivers/media/video/uvc/uvc_v4l2.c
+++ b/drivers/media/video/uvc/uvc_v4l2.c
@@ -1069,7 +1069,7 @@ static void uvc_vm_close(struct vm_area_struct *vma)
 	buffer->vma_use_count--;
 }
 
-static struct vm_operations_struct uvc_vm_ops = {
+static const struct vm_operations_struct uvc_vm_ops = {
 	.open		= uvc_vm_open,
 	.close		= uvc_vm_close,
 };
diff --git a/drivers/media/video/videobuf-dma-contig.c b/drivers/media/video/videobuf-dma-contig.c
index d09ce83a9429..635ffc7b0391 100644
--- a/drivers/media/video/videobuf-dma-contig.c
+++ b/drivers/media/video/videobuf-dma-contig.c
@@ -105,7 +105,7 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 	}
 }
 
-static struct vm_operations_struct videobuf_vm_ops = {
+static const struct vm_operations_struct videobuf_vm_ops = {
 	.open     = videobuf_vm_open,
 	.close    = videobuf_vm_close,
 };
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index a8dd22ace3fb..53cdd67cebe1 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -394,7 +394,7 @@ videobuf_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-static struct vm_operations_struct videobuf_vm_ops =
+static const struct vm_operations_struct videobuf_vm_ops =
 {
 	.open     = videobuf_vm_open,
 	.close    = videobuf_vm_close,
diff --git a/drivers/media/video/videobuf-vmalloc.c b/drivers/media/video/videobuf-vmalloc.c
index 30ae30f99ccc..35f3900c5633 100644
--- a/drivers/media/video/videobuf-vmalloc.c
+++ b/drivers/media/video/videobuf-vmalloc.c
@@ -116,7 +116,7 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 	return;
 }
 
-static struct vm_operations_struct videobuf_vm_ops =
+static const struct vm_operations_struct videobuf_vm_ops =
 {
 	.open     = videobuf_vm_open,
 	.close    = videobuf_vm_close,
diff --git a/drivers/media/video/vino.c b/drivers/media/video/vino.c
index cd6a3446ab7e..b034a81d2b1c 100644
--- a/drivers/media/video/vino.c
+++ b/drivers/media/video/vino.c
@@ -3857,7 +3857,7 @@ static void vino_vm_close(struct vm_area_struct *vma)
 	dprintk("vino_vm_close(): count = %d\n", fb->map_count);
 }
 
-static struct vm_operations_struct vino_vm_ops = {
+static const struct vm_operations_struct vino_vm_ops = {
 	.open	= vino_vm_open,
 	.close	= vino_vm_close,
 };
diff --git a/drivers/media/video/zc0301/zc0301_core.c b/drivers/media/video/zc0301/zc0301_core.c
index b3c6436b33ba..312a71336fd0 100644
--- a/drivers/media/video/zc0301/zc0301_core.c
+++ b/drivers/media/video/zc0301/zc0301_core.c
@@ -935,7 +935,7 @@ static void zc0301_vm_close(struct vm_area_struct* vma)
 }
 
 
-static struct vm_operations_struct zc0301_vm_ops = {
+static const struct vm_operations_struct zc0301_vm_ops = {
 	.open = zc0301_vm_open,
 	.close = zc0301_vm_close,
 };
diff --git a/drivers/media/video/zoran/zoran_driver.c b/drivers/media/video/zoran/zoran_driver.c
index bcdefb1bcb3d..47137deafcfd 100644
--- a/drivers/media/video/zoran/zoran_driver.c
+++ b/drivers/media/video/zoran/zoran_driver.c
@@ -3172,7 +3172,7 @@ zoran_vm_close (struct vm_area_struct *vma)
 	mutex_unlock(&zr->resource_lock);
 }
 
-static struct vm_operations_struct zoran_vm_ops = {
+static const struct vm_operations_struct zoran_vm_ops = {
 	.open = zoran_vm_open,
 	.close = zoran_vm_close,
 };
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index aed609832bc2..300e7ba391a0 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -438,7 +438,7 @@ static struct miscdevice gru_miscdev = {
 	.fops		= &gru_fops,
 };
 
-struct vm_operations_struct gru_vm_ops = {
+const struct vm_operations_struct gru_vm_ops = {
 	.close		= gru_vma_close,
 	.fault		= gru_fault,
 };
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index 34ab3d453919..46990bcfa536 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -624,7 +624,7 @@ static inline int is_kernel_context(struct gru_thread_state *gts)
  */
 struct gru_unload_context_req;
 
-extern struct vm_operations_struct gru_vm_ops;
+extern const struct vm_operations_struct gru_vm_ops;
 extern struct device *grudev;
 
 extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma,
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 848b59466850..0cb049f5cc56 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1185,7 +1185,7 @@ sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-static struct vm_operations_struct sg_mmap_vm_ops = {
+static const struct vm_operations_struct sg_mmap_vm_ops = {
 	.fault = sg_vma_fault,
 };
 
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 03efb065455f..a9d707047202 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -658,7 +658,7 @@ static int uio_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-static struct vm_operations_struct uio_vm_ops = {
+static const struct vm_operations_struct uio_vm_ops = {
 	.open = uio_vma_open,
 	.close = uio_vma_close,
 	.fault = uio_vma_fault,
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index dfdc43e2e00d..9ed3e741bee1 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1174,7 +1174,7 @@ static int mon_bin_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-static struct vm_operations_struct mon_bin_vm_ops = {
+static const struct vm_operations_struct mon_bin_vm_ops = {
 	.open =     mon_bin_vma_open,
 	.close =    mon_bin_vma_close,
 	.fault =    mon_bin_vma_fault,
diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
index 0a7a6679ee6e..c27ab1ed9604 100644
--- a/drivers/video/fb_defio.c
+++ b/drivers/video/fb_defio.c
@@ -125,7 +125,7 @@ page_already_added:
 	return 0;
 }
 
-static struct vm_operations_struct fb_deferred_io_vm_ops = {
+static const struct vm_operations_struct fb_deferred_io_vm_ops = {
 	.fault		= fb_deferred_io_fault,
 	.page_mkwrite	= fb_deferred_io_mkwrite,
 };
diff --git a/drivers/video/omap/dispc.c b/drivers/video/omap/dispc.c
index 80a11d078df4..f16e42154229 100644
--- a/drivers/video/omap/dispc.c
+++ b/drivers/video/omap/dispc.c
@@ -1035,7 +1035,7 @@ static void mmap_user_close(struct vm_area_struct *vma)
 	atomic_dec(&dispc.map_count[plane]);
 }
 
-static struct vm_operations_struct mmap_user_ops = {
+static const struct vm_operations_struct mmap_user_ops = {
 	.open = mmap_user_open,
 	.close = mmap_user_close,
 };
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 571ad3c13b47..a3492a3ad96b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1184,7 +1184,7 @@ out:
 	return ret > 0 ? EIO : ret;
 }
 
-static struct vm_operations_struct btrfs_file_vm_ops = {
+static const struct vm_operations_struct btrfs_file_vm_ops = {
 	.fault		= filemap_fault,
 	.page_mkwrite	= btrfs_page_mkwrite,
 };
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 5ca3eca70a1e..9630583cef28 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -81,7 +81,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
 	return generic_file_aio_write(iocb, iov, nr_segs, pos);
 }
 
-static struct vm_operations_struct ext4_file_vm_ops = {
+static const struct vm_operations_struct ext4_file_vm_ops = {
 	.fault		= filemap_fault,
 	.page_mkwrite   = ext4_page_mkwrite,
 };
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index cbc464043b6f..a3492f7d207c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1313,7 +1313,7 @@ static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-static struct vm_operations_struct fuse_file_vm_ops = {
+static const struct vm_operations_struct fuse_file_vm_ops = {
 	.close		= fuse_vma_close,
 	.fault		= filemap_fault,
 	.page_mkwrite	= fuse_page_mkwrite,
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 166f38fbd246..4eb308aa3234 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -418,7 +418,7 @@ out:
 	return ret;
 }
 
-static struct vm_operations_struct gfs2_vm_ops = {
+static const struct vm_operations_struct gfs2_vm_ops = {
 	.fault = filemap_fault,
 	.page_mkwrite = gfs2_page_mkwrite,
 };
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 5d8dcb9ee326..15458decdb8a 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -95,7 +95,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
 	return VM_FAULT_MAJOR;
 }
 
-static struct vm_operations_struct ncp_file_mmap =
+static const struct vm_operations_struct ncp_file_mmap =
 {
 	.fault = ncp_file_mmap_fault,
 };
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 86d6b4db1096..f5fdd39e037a 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -59,7 +59,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
 static int nfs_setlease(struct file *file, long arg, struct file_lock **fl);
 
-static struct vm_operations_struct nfs_file_vm_ops;
+static const struct vm_operations_struct nfs_file_vm_ops;
 
 const struct file_operations nfs_file_operations = {
 	.llseek		= nfs_file_llseek,
@@ -572,7 +572,7 @@ out_unlock:
 	return VM_FAULT_SIGBUS;
 }
 
-static struct vm_operations_struct nfs_file_vm_ops = {
+static const struct vm_operations_struct nfs_file_vm_ops = {
 	.fault = filemap_fault,
 	.page_mkwrite = nfs_vm_page_mkwrite,
 };
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index fc8278c77cdd..7d7b4983dee3 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -117,7 +117,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-struct vm_operations_struct nilfs_file_vm_ops = {
+static const struct vm_operations_struct nilfs_file_vm_ops = {
 	.fault		= filemap_fault,
 	.page_mkwrite	= nilfs_page_mkwrite,
 };
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index b606496b72ec..39737613424a 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -202,7 +202,7 @@ out:
 	return ret;
 }
 
-static struct vm_operations_struct ocfs2_file_vm_ops = {
+static const struct vm_operations_struct ocfs2_file_vm_ops = {
 	.fault		= ocfs2_fault,
 	.page_mkwrite	= ocfs2_page_mkwrite,
 };
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 2524714bece1..60c702bc10ae 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -40,7 +40,7 @@ struct bin_buffer {
 	struct mutex			mutex;
 	void				*buffer;
 	int				mmapped;
-	struct vm_operations_struct 	*vm_ops;
+	const struct vm_operations_struct *vm_ops;
 	struct file			*file;
 	struct hlist_node		list;
 };
@@ -331,7 +331,7 @@ static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
 }
 #endif
 
-static struct vm_operations_struct bin_vm_ops = {
+static const struct vm_operations_struct bin_vm_ops = {
 	.open		= bin_vma_open,
 	.close		= bin_vma_close,
 	.fault		= bin_fault,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 2e6481a7701c..1009adc8d602 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1534,7 +1534,7 @@ out_unlock:
 	return err;
 }
 
-static struct vm_operations_struct ubifs_file_vm_ops = {
+static const struct vm_operations_struct ubifs_file_vm_ops = {
 	.fault        = filemap_fault,
 	.page_mkwrite = ubifs_vm_page_mkwrite,
 };
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 988d8f87bc0f..629370974e57 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -42,7 +42,7 @@
 
 #include <linux/dcache.h>
 
-static struct vm_operations_struct xfs_file_vm_ops;
+static const struct vm_operations_struct xfs_file_vm_ops;
 
 STATIC ssize_t
 xfs_file_aio_read(
@@ -280,7 +280,7 @@ const struct file_operations xfs_dir_file_operations = {
 	.fsync		= xfs_file_fsync,
 };
 
-static struct vm_operations_struct xfs_file_vm_ops = {
+static const struct vm_operations_struct xfs_file_vm_ops = {
 	.fault		= filemap_fault,
 	.page_mkwrite	= xfs_vm_page_mkwrite,
 };
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index 880130f7311f..9101ed64f803 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -53,7 +53,7 @@ struct agp_kern_info {
 	int current_memory;
 	bool cant_use_aperture;
 	unsigned long page_mask;
-	struct vm_operations_struct *vm_ops;
+	const struct vm_operations_struct *vm_ops;
 };
 
 /*
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 16937995abd4..41a59afc70fa 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -163,7 +163,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
 }
 
 extern const struct file_operations hugetlbfs_file_operations;
-extern struct vm_operations_struct hugetlb_vm_ops;
+extern const struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_file_setup(const char *name, size_t size, int acct,
 				struct user_struct **user, int creat_flags);
 int hugetlb_get_quota(struct address_space *mapping, long delta);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 21d6aa45206a..84a524afb3dc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -171,7 +171,7 @@ struct vm_area_struct {
 	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */
 
 	/* Function pointers to deal with this struct. */
-	struct vm_operations_struct * vm_ops;
+	const struct vm_operations_struct *vm_ops;
 
 	/* Information about our backing store: */
 	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index 37aaf2b39863..4e768dda87b0 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -17,7 +17,7 @@ extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
 #endif
 
 extern const struct file_operations ramfs_file_operations;
-extern struct vm_operations_struct generic_file_vm_ops;
+extern const struct vm_operations_struct generic_file_vm_ops;
 extern int __init init_rootfs(void);
 
 #endif
diff --git a/ipc/shm.c b/ipc/shm.c
index 9eb1488b543b..464694e0aa4a 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -55,7 +55,7 @@ struct shm_file_data {
 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
 
 static const struct file_operations shm_file_operations;
-static struct vm_operations_struct shm_vm_ops;
+static const struct vm_operations_struct shm_vm_ops;
 
 #define shm_ids(ns)	((ns)->ids[IPC_SHM_IDS])
 
@@ -312,7 +312,7 @@ static const struct file_operations shm_file_operations = {
 	.get_unmapped_area	= shm_get_unmapped_area,
 };
 
-static struct vm_operations_struct shm_vm_ops = {
+static const struct vm_operations_struct shm_vm_ops = {
 	.open	= shm_open,	/* callback for a new vm-area open */
 	.close	= shm_close,	/* callback for when the vm-area is released */
 	.fault	= shm_fault,
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 76ac4db405e9..0f86feb6db0c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2253,7 +2253,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	}
 }
 
-static struct vm_operations_struct perf_mmap_vmops = {
+static const struct vm_operations_struct perf_mmap_vmops = {
 	.open		= perf_mmap_open,
 	.close		= perf_mmap_close,
 	.fault		= perf_mmap_fault,
diff --git a/kernel/relay.c b/kernel/relay.c
index bc188549788f..760c26209a3c 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -60,7 +60,7 @@ static int relay_buf_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 /*
  * vm_ops for relay file mappings.
  */
-static struct vm_operations_struct relay_file_mmap_ops = {
+static const struct vm_operations_struct relay_file_mmap_ops = {
 	.fault = relay_buf_fault,
 	.close = relay_file_mmap_close,
 };
diff --git a/mm/filemap.c b/mm/filemap.c
index 6c84e598b4a9..ef169f37156d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1611,7 +1611,7 @@ page_not_uptodate:
 }
 EXPORT_SYMBOL(filemap_fault);
 
-struct vm_operations_struct generic_file_vm_ops = {
+const struct vm_operations_struct generic_file_vm_ops = {
 	.fault		= filemap_fault,
 };
 
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 427dfe3ce78c..1888b2d71bb8 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -296,7 +296,7 @@ out:
 	}
 }
 
-static struct vm_operations_struct xip_file_vm_ops = {
+static const struct vm_operations_struct xip_file_vm_ops = {
 	.fault	= xip_file_fault,
 };
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6f048fcc749c..5d7601b02874 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1721,7 +1721,7 @@ static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-struct vm_operations_struct hugetlb_vm_ops = {
+const struct vm_operations_struct hugetlb_vm_ops = {
 	.fault = hugetlb_vm_op_fault,
 	.open = hugetlb_vm_op_open,
 	.close = hugetlb_vm_op_close,
diff --git a/mm/mmap.c b/mm/mmap.c
index 21d4029a07b3..73f5e4b64010 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2282,7 +2282,7 @@ static void special_mapping_close(struct vm_area_struct *vma)
 {
 }
 
-static struct vm_operations_struct special_mapping_vmops = {
+static const struct vm_operations_struct special_mapping_vmops = {
 	.close = special_mapping_close,
 	.fault = special_mapping_fault,
 };
diff --git a/mm/nommu.c b/mm/nommu.c
index c73aa4753d79..5189b5aed8c0 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -79,7 +79,7 @@ static struct kmem_cache *vm_region_jar;
 struct rb_root nommu_region_tree = RB_ROOT;
 DECLARE_RWSEM(nommu_region_sem);
 
-struct vm_operations_struct generic_file_vm_ops = {
+const struct vm_operations_struct generic_file_vm_ops = {
 };
 
 /*
diff --git a/mm/shmem.c b/mm/shmem.c
index ccf446a9faa1..356dd99566ec 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -218,7 +218,7 @@ static const struct file_operations shmem_file_operations;
 static const struct inode_operations shmem_inode_operations;
 static const struct inode_operations shmem_dir_inode_operations;
 static const struct inode_operations shmem_special_inode_operations;
-static struct vm_operations_struct shmem_vm_ops;
+static const struct vm_operations_struct shmem_vm_ops;
 
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
 	.ra_pages	= 0,	/* No readahead */
@@ -2498,7 +2498,7 @@ static const struct super_operations shmem_ops = {
 	.put_super	= shmem_put_super,
 };
 
-static struct vm_operations_struct shmem_vm_ops = {
+static const struct vm_operations_struct shmem_vm_ops = {
 	.fault		= shmem_fault,
 #ifdef CONFIG_NUMA
 	.set_policy     = shmem_set_policy,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d3d52c66cdc2..103d5611b818 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2084,7 +2084,7 @@ static void packet_mm_close(struct vm_area_struct *vma)
 		atomic_dec(&pkt_sk(sk)->mapped);
 }
 
-static struct vm_operations_struct packet_mmap_ops = {
+static const struct vm_operations_struct packet_mmap_ops = {
 	.open	=	packet_mm_open,
 	.close	=	packet_mm_close,
 };
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 561d6d95a2d3..ab73edf2c89a 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -2985,7 +2985,7 @@ static int snd_pcm_mmap_status_fault(struct vm_area_struct *area,
 	return 0;
 }
 
-static struct vm_operations_struct snd_pcm_vm_ops_status =
+static const struct vm_operations_struct snd_pcm_vm_ops_status =
 {
 	.fault =	snd_pcm_mmap_status_fault,
 };
@@ -3024,7 +3024,7 @@ static int snd_pcm_mmap_control_fault(struct vm_area_struct *area,
 	return 0;
 }
 
-static struct vm_operations_struct snd_pcm_vm_ops_control =
+static const struct vm_operations_struct snd_pcm_vm_ops_control =
 {
 	.fault =	snd_pcm_mmap_control_fault,
 };
@@ -3094,7 +3094,7 @@ static int snd_pcm_mmap_data_fault(struct vm_area_struct *area,
 	return 0;
 }
 
-static struct vm_operations_struct snd_pcm_vm_ops_data =
+static const struct vm_operations_struct snd_pcm_vm_ops_data =
 {
 	.open =		snd_pcm_mmap_data_open,
 	.close =	snd_pcm_mmap_data_close,
@@ -3118,7 +3118,7 @@ static int snd_pcm_default_mmap(struct snd_pcm_substream *substream,
  * mmap the DMA buffer on I/O memory area
  */
 #if SNDRV_PCM_INFO_MMAP_IOMEM
-static struct vm_operations_struct snd_pcm_vm_ops_data_mmio =
+static const struct vm_operations_struct snd_pcm_vm_ops_data_mmio =
 {
 	.open =		snd_pcm_mmap_data_open,
 	.close =	snd_pcm_mmap_data_close,
diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c
index fd44946ce4b3..99f33766cd51 100644
--- a/sound/usb/usx2y/us122l.c
+++ b/sound/usb/usx2y/us122l.c
@@ -154,7 +154,7 @@ static void usb_stream_hwdep_vm_close(struct vm_area_struct *area)
 	snd_printdd(KERN_DEBUG "%i\n", atomic_read(&us122l->mmap_count));
 }
 
-static struct vm_operations_struct usb_stream_hwdep_vm_ops = {
+static const struct vm_operations_struct usb_stream_hwdep_vm_ops = {
 	.open = usb_stream_hwdep_vm_open,
 	.fault = usb_stream_hwdep_vm_fault,
 	.close = usb_stream_hwdep_vm_close,
diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c
index f3d8f71265dd..52e04b2f35d3 100644
--- a/sound/usb/usx2y/usX2Yhwdep.c
+++ b/sound/usb/usx2y/usX2Yhwdep.c
@@ -53,7 +53,7 @@ static int snd_us428ctls_vm_fault(struct vm_area_struct *area,
 	return 0;
 }
 
-static struct vm_operations_struct us428ctls_vm_ops = {
+static const struct vm_operations_struct us428ctls_vm_ops = {
 	.fault = snd_us428ctls_vm_fault,
 };
 
diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c
index 117946f2debb..4b2304c2e02d 100644
--- a/sound/usb/usx2y/usx2yhwdeppcm.c
+++ b/sound/usb/usx2y/usx2yhwdeppcm.c
@@ -697,7 +697,7 @@ static int snd_usX2Y_hwdep_pcm_vm_fault(struct vm_area_struct *area,
 }
 
 
-static struct vm_operations_struct snd_usX2Y_hwdep_pcm_vm_ops = {
+static const struct vm_operations_struct snd_usX2Y_hwdep_pcm_vm_ops = {
 	.open = snd_usX2Y_hwdep_pcm_vm_open,
 	.close = snd_usX2Y_hwdep_pcm_vm_close,
 	.fault = snd_usX2Y_hwdep_pcm_vm_fault,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 034a798b0431..b5e7e3f1183f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1713,7 +1713,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-static struct vm_operations_struct kvm_vcpu_vm_ops = {
+static const struct vm_operations_struct kvm_vcpu_vm_ops = {
 	.fault = kvm_vcpu_fault,
 };
 
@@ -2317,7 +2317,7 @@ static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-static struct vm_operations_struct kvm_vm_vm_ops = {
+static const struct vm_operations_struct kvm_vm_vm_ops = {
 	.fault = kvm_vm_fault,
 };
 
-- 
cgit v1.2.3


From f278a2f7bbc2239f479eaf63d0b3ae573b1d746c Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Sun, 27 Sep 2009 16:00:42 +0000
Subject: tty: Fix regressions caused by commit b50989dc

The following commit made console open fails while booting:

	commit b50989dc444599c8b21edc23536fc305f4e9b7d5
	Author: Alan Cox <alan@linux.intel.com>
	Date:   Sat Sep 19 13:13:22 2009 -0700

	tty: make the kref destructor occur asynchronously

Due to tty release routines run in a workqueue now, error like the
following will be reported while booting:

INIT open /dev/console Input/output error

It also causes hibernation regression to appear as reported at
http://bugzilla.kernel.org/show_bug.cgi?id=14229

The reason is that now there's latency issue with closing, but when
we open a "closing not finished" tty, -EIO will be returned.

Fix it as per the following Alan's suggestion:

  Fun but it's actually not a bug and the fix is wrong in itself as
  the port may be closing but not yet being destructed, in which case
  it seems to do the wrong thing.  Opening a tty that is closing (and
  could be closing for long periods) is supposed to return -EIO.

  I suspect a better way to deal with this and keep the old console
  timing is to split tty->shutdown into two functions.

  tty->shutdown() - called synchronously just before we dump the tty
  onto the waitqueue for destruction

  tty->cleanup() - called when the destructor runs.

  We would then do the shutdown part which can occur in IRQ context
  fine, before queueing the rest of the release (from tty->magic = 0
  ...  the end) to occur asynchronously

  The USB update in -next would then need a call like

       if (tty->cleanup)
               tty->cleanup(tty);

  at the top of the async function and the USB shutdown to be split
  between shutdown and cleanup as the USB resource cleanup and final
  tidy cannot occur synchronously as it needs to sleep.

  In other words the logic becomes

       final kref put
               make object unfindable

       async
               clean it up

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
[ rjw: Rebased on top of 2.6.31-git, reworked the changelog. ]
Signed-off-by: "Rafael J. Wysocki" <rjw@sisk.pl>
[ Changed serial naming to match new rules, dropped tty_shutdown as per
  comments from Alan Stern  - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_io.c           | 15 ++++++++++-----
 drivers/usb/serial/usb-serial.c | 14 ++++++--------
 include/linux/tty_driver.h      | 13 +++++++++++--
 3 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index ea18a129b0b5..59499ee0fe6a 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1389,7 +1389,7 @@ EXPORT_SYMBOL(tty_shutdown);
  *	of ttys that the driver keeps.
  *
  *	This method gets called from a work queue so that the driver private
- *	shutdown ops can sleep (needed for USB at least)
+ *	cleanup ops can sleep (needed for USB at least)
  */
 static void release_one_tty(struct work_struct *work)
 {
@@ -1397,10 +1397,9 @@ static void release_one_tty(struct work_struct *work)
 		container_of(work, struct tty_struct, hangup_work);
 	struct tty_driver *driver = tty->driver;
 
-	if (tty->ops->shutdown)
-		tty->ops->shutdown(tty);
-	else
-		tty_shutdown(tty);
+	if (tty->ops->cleanup)
+		tty->ops->cleanup(tty);
+
 	tty->magic = 0;
 	tty_driver_kref_put(driver);
 	module_put(driver->owner);
@@ -1415,6 +1414,12 @@ static void release_one_tty(struct work_struct *work)
 static void queue_release_one_tty(struct kref *kref)
 {
 	struct tty_struct *tty = container_of(kref, struct tty_struct, kref);
+
+	if (tty->ops->shutdown)
+		tty->ops->shutdown(tty);
+	else
+		tty_shutdown(tty);
+
 	/* The hangup queue is now free so we can reuse it rather than
 	   waste a chunk of memory for each port */
 	INIT_WORK(&tty->hangup_work, release_one_tty);
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index ff75a3589e7e..aa6b2ae951ae 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -192,7 +192,7 @@ void usb_serial_put(struct usb_serial *serial)
  * This is the first place a new tty gets used.  Hence this is where we
  * acquire references to the usb_serial structure and the driver module,
  * where we store a pointer to the port, and where we do an autoresume.
- * All these actions are reversed in serial_release().
+ * All these actions are reversed in serial_cleanup().
  */
 static int serial_install(struct tty_driver *driver, struct tty_struct *tty)
 {
@@ -339,15 +339,16 @@ static void serial_close(struct tty_struct *tty, struct file *filp)
 }
 
 /**
- * serial_release - free resources post close/hangup
+ * serial_cleanup - free resources post close/hangup
  * @port: port to free up
  *
  * Do the resource freeing and refcount dropping for the port.
  * Avoid freeing the console.
  *
- * Called when the last tty kref is dropped.
+ * Called asynchronously after the last tty kref is dropped,
+ * and the tty layer has already done the tty_shutdown(tty);
  */
-static void serial_release(struct tty_struct *tty)
+static void serial_cleanup(struct tty_struct *tty)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct usb_serial *serial;
@@ -361,9 +362,6 @@ static void serial_release(struct tty_struct *tty)
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* Standard shutdown processing */
-	tty_shutdown(tty);
-
 	tty->driver_data = NULL;
 
 	serial = port->serial;
@@ -1210,7 +1208,7 @@ static const struct tty_operations serial_ops = {
 	.chars_in_buffer =	serial_chars_in_buffer,
 	.tiocmget =		serial_tiocmget,
 	.tiocmset =		serial_tiocmset,
-	.shutdown = 		serial_release,
+	.cleanup = 		serial_cleanup,
 	.install = 		serial_install,
 	.proc_fops =		&serial_proc_fops,
 };
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 3566129384a4..b08677982525 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -45,8 +45,16 @@
  *
  * void (*shutdown)(struct tty_struct * tty);
  *
- * 	This routine is called when a particular tty device is closed for
- *	the last time freeing up the resources.
+ * 	This routine is called synchronously when a particular tty device
+ *	is closed for the last time freeing up the resources.
+ *
+ *
+ * void (*cleanup)(struct tty_struct * tty);
+ *
+ *	This routine is called asynchronously when a particular tty device
+ *	is closed for the last time freeing up the resources. This is
+ *	actually the second part of shutdown for routines that might sleep.
+ *
  *
  * int (*write)(struct tty_struct * tty,
  * 		 const unsigned char *buf, int count);
@@ -233,6 +241,7 @@ struct tty_operations {
 	int  (*open)(struct tty_struct * tty, struct file * filp);
 	void (*close)(struct tty_struct * tty, struct file * filp);
 	void (*shutdown)(struct tty_struct *tty);
+	void (*cleanup)(struct tty_struct *tty);
 	int  (*write)(struct tty_struct * tty,
 		      const unsigned char *buf, int count);
 	int  (*put_char)(struct tty_struct *tty, unsigned char ch);
-- 
cgit v1.2.3


From 74bf2ad508efa93db4254c9da9c7238da44e2c58 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Mon, 28 Sep 2009 15:31:10 +1000
Subject: drm/kms: make fb helper work for all drivers.

This initialises the fb helper with the connector helper,
so that the fb cmdline code works for intel as well.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_fb_helper.c            |  1 -
 drivers/gpu/drm/radeon/radeon_connectors.c | 64 +++++++++++++++++++-----------
 include/drm/drm_crtc_helper.h              |  4 +-
 3 files changed, 43 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 10d810ef8faa..83d7b7d03863 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -47,7 +47,6 @@ int drm_fb_helper_add_connector(struct drm_connector *connector)
 		return -ENOMEM;
 
 	return 0;
-
 }
 EXPORT_SYMBOL(drm_fb_helper_add_connector);
 
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index db155d5e60ce..e376be47a4a0 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -808,7 +808,9 @@ radeon_add_atom_connector(struct drm_device *dev,
 	switch (connector_type) {
 	case DRM_MODE_CONNECTOR_VGA:
 		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "VGA");
 			if (!radeon_connector->ddc_bus)
@@ -821,7 +823,9 @@ radeon_add_atom_connector(struct drm_device *dev,
 		break;
 	case DRM_MODE_CONNECTOR_DVIA:
 		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DVI");
 			if (!radeon_connector->ddc_bus)
@@ -841,7 +845,9 @@ radeon_add_atom_connector(struct drm_device *dev,
 		radeon_dig_connector->igp_lane_info = igp_lane_info;
 		radeon_connector->con_priv = radeon_dig_connector;
 		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DVI");
 			if (!radeon_connector->ddc_bus)
@@ -865,7 +871,9 @@ radeon_add_atom_connector(struct drm_device *dev,
 		radeon_dig_connector->igp_lane_info = igp_lane_info;
 		radeon_connector->con_priv = radeon_dig_connector;
 		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "HDMI");
 			if (!radeon_connector->ddc_bus)
@@ -884,7 +892,9 @@ radeon_add_atom_connector(struct drm_device *dev,
 		radeon_dig_connector->igp_lane_info = igp_lane_info;
 		radeon_connector->con_priv = radeon_dig_connector;
 		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DP");
 			if (!radeon_connector->ddc_bus)
@@ -897,12 +907,14 @@ radeon_add_atom_connector(struct drm_device *dev,
 	case DRM_MODE_CONNECTOR_9PinDIN:
 		if (radeon_tv == 1) {
 			drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type);
-			drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
+			ret = drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
+			if (ret)
+				goto failed;
+			radeon_connector->dac_load_detect = true;
+			drm_connector_attach_property(&radeon_connector->base,
+						      rdev->mode_info.load_detect_property,
+						      1);
 		}
-		radeon_connector->dac_load_detect = true;
-		drm_connector_attach_property(&radeon_connector->base,
-					      rdev->mode_info.load_detect_property,
-					      1);
 		break;
 	case DRM_MODE_CONNECTOR_LVDS:
 		radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
@@ -912,7 +924,9 @@ radeon_add_atom_connector(struct drm_device *dev,
 		radeon_dig_connector->igp_lane_info = igp_lane_info;
 		radeon_connector->con_priv = radeon_dig_connector;
 		drm_connector_init(dev, &radeon_connector->base, &radeon_lvds_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_lvds_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_lvds_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "LVDS");
 			if (!radeon_connector->ddc_bus)
@@ -926,10 +940,6 @@ radeon_add_atom_connector(struct drm_device *dev,
 		break;
 	}
 
-	ret = drm_fb_helper_add_connector(connector);
-	if (ret)
-		goto failed;
-
 	connector->display_info.subpixel_order = subpixel_order;
 	drm_sysfs_connector_add(connector);
 	return;
@@ -978,7 +988,9 @@ radeon_add_legacy_connector(struct drm_device *dev,
 	switch (connector_type) {
 	case DRM_MODE_CONNECTOR_VGA:
 		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "VGA");
 			if (!radeon_connector->ddc_bus)
@@ -991,7 +1003,9 @@ radeon_add_legacy_connector(struct drm_device *dev,
 		break;
 	case DRM_MODE_CONNECTOR_DVIA:
 		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DVI");
 			if (!radeon_connector->ddc_bus)
@@ -1005,7 +1019,9 @@ radeon_add_legacy_connector(struct drm_device *dev,
 	case DRM_MODE_CONNECTOR_DVII:
 	case DRM_MODE_CONNECTOR_DVID:
 		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DVI");
 			if (!radeon_connector->ddc_bus)
@@ -1022,7 +1038,9 @@ radeon_add_legacy_connector(struct drm_device *dev,
 	case DRM_MODE_CONNECTOR_9PinDIN:
 		if (radeon_tv == 1) {
 			drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type);
-			drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
+			ret = drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
+			if (ret)
+				goto failed;
 			radeon_connector->dac_load_detect = true;
 			drm_connector_attach_property(&radeon_connector->base,
 						      rdev->mode_info.load_detect_property,
@@ -1031,7 +1049,9 @@ radeon_add_legacy_connector(struct drm_device *dev,
 		break;
 	case DRM_MODE_CONNECTOR_LVDS:
 		drm_connector_init(dev, &radeon_connector->base, &radeon_lvds_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_lvds_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_lvds_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "LVDS");
 			if (!radeon_connector->ddc_bus)
@@ -1044,10 +1064,6 @@ radeon_add_legacy_connector(struct drm_device *dev,
 		break;
 	}
 
-	ret = drm_fb_helper_add_connector(connector);
-	if (ret)
-		goto failed;
-
 	connector->display_info.subpixel_order = subpixel_order;
 	drm_sysfs_connector_add(connector);
 	return;
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 4c8dacaf4f58..ef47dfd8e5e9 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -39,6 +39,7 @@
 
 #include <linux/fb.h>
 
+#include "drm_fb_helper.h"
 struct drm_crtc_helper_funcs {
 	/*
 	 * Control power levels on the CRTC.  If the mode passed in is
@@ -119,10 +120,11 @@ static inline void drm_encoder_helper_add(struct drm_encoder *encoder,
 	encoder->helper_private = (void *)funcs;
 }
 
-static inline void drm_connector_helper_add(struct drm_connector *connector,
+static inline int drm_connector_helper_add(struct drm_connector *connector,
 					    const struct drm_connector_helper_funcs *funcs)
 {
 	connector->helper_private = (void *)funcs;
+	return drm_fb_helper_add_connector(connector);
 }
 
 extern int drm_helper_resume_force_mode(struct drm_device *dev);
-- 
cgit v1.2.3


From 925936ebf35a95c290e010b784c962164e6728f3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 28 Sep 2009 17:12:49 +0200
Subject: tracing: Pushdown the bkl tracepoints calls

Currently we are calling the bkl tracepoint callbacks just before the
bkl lock/unlock operations, ie the tracepoint call is not inside a
lock_kernel() function but inside a lock_kernel() macro. Hence the
bkl trace event header must be included from smp_lock.h. This raises
some nasty circular header dependencies:

linux/smp_lock.h -> trace/events/bkl.h -> trace/define_trace.h
-> trace/ftrace.h -> linux/ftrace_event.h -> linux/hardirq.h
-> linux/smp_lock.h

This results in incomplete event declarations, spurious event
definitions and other kind of funny behaviours.

This is hardly fixable without ugly workarounds. So instead, we push
the file name, line number and function name as lock_kernel()
parameters, so that we only deal with the trace event header from
lib/kernel_lock.c

This adds two parameters to lock_kernel() and unlock_kernel() but
it should be fine wrt to performances because this pair dos not seem
to be called in fast paths.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Li Zefan <lizf@cn.fujitsu.com>
---
 include/linux/smp_lock.h | 28 +++++++++++++++-------------
 lib/kernel_lock.c        | 15 +++++++++++----
 2 files changed, 26 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h
index d48cc77ba70d..2ea1dd1ba21c 100644
--- a/include/linux/smp_lock.h
+++ b/include/linux/smp_lock.h
@@ -3,7 +3,6 @@
 
 #ifdef CONFIG_LOCK_KERNEL
 #include <linux/sched.h>
-#include <trace/events/bkl.h>
 
 #define kernel_locked()		(current->lock_depth >= 0)
 
@@ -25,18 +24,21 @@ static inline int reacquire_kernel_lock(struct task_struct *task)
 	return 0;
 }
 
-extern void __lockfunc _lock_kernel(void)	__acquires(kernel_lock);
-extern void __lockfunc _unlock_kernel(void)	__releases(kernel_lock);
+extern void __lockfunc
+_lock_kernel(const char *func, const char *file, int line)
+__acquires(kernel_lock);
 
-#define lock_kernel()	{					\
-	trace_lock_kernel(__func__, __FILE__, __LINE__);	\
-	_lock_kernel();						\
-}
+extern void __lockfunc
+_unlock_kernel(const char *func, const char *file, int line)
+__releases(kernel_lock);
 
-#define unlock_kernel()	{					\
-	trace_unlock_kernel(__func__, __FILE__, __LINE__);	\
-	_unlock_kernel();					\
-}
+#define lock_kernel() do {					\
+	_lock_kernel(__func__, __FILE__, __LINE__);		\
+} while (0)
+
+#define unlock_kernel()	do {					\
+	_unlock_kernel(__func__, __FILE__, __LINE__);		\
+} while (0)
 
 /*
  * Various legacy drivers don't really need the BKL in a specific
@@ -52,8 +54,8 @@ static inline void cycle_kernel_lock(void)
 
 #else
 
-#define lock_kernel()	   trace_lock_kernel(__func__, __FILE__, __LINE__);
-#define unlock_kernel()    trace_unlock_kernel(__func__, __FILE__, __LINE__);
+#define lock_kernel()
+#define unlock_kernel()
 #define release_kernel_lock(task)		do { } while(0)
 #define cycle_kernel_lock()			do { } while(0)
 #define reacquire_kernel_lock(task)		0
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 5c10b2e1fd08..4ebfa5a164d7 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -8,9 +8,11 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/semaphore.h>
-#define CREATE_TRACE_POINTS
 #include <linux/smp_lock.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/bkl.h>
+
 /*
  * The 'big kernel lock'
  *
@@ -114,19 +116,24 @@ static inline void __unlock_kernel(void)
  * This cannot happen asynchronously, so we only need to
  * worry about other CPU's.
  */
-void __lockfunc _lock_kernel(void)
+void __lockfunc _lock_kernel(const char *func, const char *file, int line)
 {
-	int depth = current->lock_depth+1;
+	int depth = current->lock_depth + 1;
+
+	trace_lock_kernel(func, file, line);
+
 	if (likely(!depth))
 		__lock_kernel();
 	current->lock_depth = depth;
 }
 
-void __lockfunc _unlock_kernel(void)
+void __lockfunc _unlock_kernel(const char *func, const char *file, int line)
 {
 	BUG_ON(current->lock_depth < 0);
 	if (likely(--current->lock_depth < 0))
 		__unlock_kernel();
+
+	trace_unlock_kernel(func, file, line);
 }
 
 EXPORT_SYMBOL(_lock_kernel);
-- 
cgit v1.2.3


From 55138e0bc29c0751e2152df9ad35deea542f29b3 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 29 Sep 2009 13:31:31 -0400
Subject: ext4: Adjust ext4_da_writepages() to write out larger contiguous
 chunks

Work around problems in the writeback code to force out writebacks in
larger chunks than just 4mb, which is just too small.  This also works
around limitations in the ext4 block allocator, which can't allocate
more than 2048 blocks at a time.  So we need to defeat the round-robin
characteristics of the writeback code and try to write out as many
blocks in one inode before allowing the writeback code to move on to
another inode.  We add a a new per-filesystem tunable,
max_writeback_mb_bump, which caps this to a default of 128mb per
inode.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4.h              |   1 +
 fs/ext4/inode.c             | 105 +++++++++++++++++++++++++++++++++++++++-----
 fs/ext4/super.c             |   3 ++
 include/trace/events/ext4.h |  14 ++++--
 4 files changed, 107 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e227eea23f05..a58438e18d0b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -942,6 +942,7 @@ struct ext4_sb_info {
 	unsigned int s_mb_stats;
 	unsigned int s_mb_order2_reqs;
 	unsigned int s_mb_group_prealloc;
+	unsigned int s_max_writeback_mb_bump;
 	/* where last allocation was done - for stream allocation */
 	unsigned long s_mb_last_group;
 	unsigned long s_mb_last_start;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5fb72a98ccbe..20e2d704dc2e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1144,6 +1144,64 @@ static int check_block_validity(struct inode *inode, const char *msg,
 	return 0;
 }
 
+/*
+ * Return the number of dirty pages in the given inode starting at
+ * page frame idx.
+ */
+static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
+				    unsigned int max_pages)
+{
+	struct address_space *mapping = inode->i_mapping;
+	pgoff_t	index;
+	struct pagevec pvec;
+	pgoff_t num = 0;
+	int i, nr_pages, done = 0;
+
+	if (max_pages == 0)
+		return 0;
+	pagevec_init(&pvec, 0);
+	while (!done) {
+		index = idx;
+		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+					      PAGECACHE_TAG_DIRTY,
+					      (pgoff_t)PAGEVEC_SIZE);
+		if (nr_pages == 0)
+			break;
+		for (i = 0; i < nr_pages; i++) {
+			struct page *page = pvec.pages[i];
+			struct buffer_head *bh, *head;
+
+			lock_page(page);
+			if (unlikely(page->mapping != mapping) ||
+			    !PageDirty(page) ||
+			    PageWriteback(page) ||
+			    page->index != idx) {
+				done = 1;
+				unlock_page(page);
+				break;
+			}
+			head = page_buffers(page);
+			bh = head;
+			do {
+				if (!buffer_delay(bh) &&
+				    !buffer_unwritten(bh)) {
+					done = 1;
+					break;
+				}
+			} while ((bh = bh->b_this_page) != head);
+			unlock_page(page);
+			if (done)
+				break;
+			idx++;
+			num++;
+			if (num >= max_pages)
+				break;
+		}
+		pagevec_release(&pvec);
+	}
+	return num;
+}
+
 /*
  * The ext4_get_blocks() function tries to look up the requested blocks,
  * and returns if the blocks are already mapped.
@@ -2743,8 +2801,10 @@ static int ext4_da_writepages(struct address_space *mapping,
 	int no_nrwrite_index_update;
 	int pages_written = 0;
 	long pages_skipped;
+	unsigned int max_pages;
 	int range_cyclic, cycled = 1, io_done = 0;
-	int needed_blocks, ret = 0, nr_to_writebump = 0;
+	int needed_blocks, ret = 0;
+	long desired_nr_to_write, nr_to_writebump = 0;
 	loff_t range_start = wbc->range_start;
 	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
 
@@ -2771,16 +2831,6 @@ static int ext4_da_writepages(struct address_space *mapping,
 	if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
 		return -EROFS;
 
-	/*
-	 * Make sure nr_to_write is >= sbi->s_mb_stream_request
-	 * This make sure small files blocks are allocated in
-	 * single attempt. This ensure that small files
-	 * get less fragmented.
-	 */
-	if (wbc->nr_to_write < sbi->s_mb_stream_request) {
-		nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
-		wbc->nr_to_write = sbi->s_mb_stream_request;
-	}
 	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 		range_whole = 1;
 
@@ -2795,6 +2845,36 @@ static int ext4_da_writepages(struct address_space *mapping,
 	} else
 		index = wbc->range_start >> PAGE_CACHE_SHIFT;
 
+	/*
+	 * This works around two forms of stupidity.  The first is in
+	 * the writeback code, which caps the maximum number of pages
+	 * written to be 1024 pages.  This is wrong on multiple
+	 * levels; different architectues have a different page size,
+	 * which changes the maximum amount of data which gets
+	 * written.  Secondly, 4 megabytes is way too small.  XFS
+	 * forces this value to be 16 megabytes by multiplying
+	 * nr_to_write parameter by four, and then relies on its
+	 * allocator to allocate larger extents to make them
+	 * contiguous.  Unfortunately this brings us to the second
+	 * stupidity, which is that ext4's mballoc code only allocates
+	 * at most 2048 blocks.  So we force contiguous writes up to
+	 * the number of dirty blocks in the inode, or
+	 * sbi->max_writeback_mb_bump whichever is smaller.
+	 */
+	max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
+	if (!range_cyclic && range_whole)
+		desired_nr_to_write = wbc->nr_to_write * 8;
+	else
+		desired_nr_to_write = ext4_num_dirty_pages(inode, index,
+							   max_pages);
+	if (desired_nr_to_write > max_pages)
+		desired_nr_to_write = max_pages;
+
+	if (wbc->nr_to_write < desired_nr_to_write) {
+		nr_to_writebump = desired_nr_to_write - wbc->nr_to_write;
+		wbc->nr_to_write = desired_nr_to_write;
+	}
+
 	mpd.wbc = wbc;
 	mpd.inode = mapping->host;
 
@@ -2914,7 +2994,8 @@ retry:
 out_writepages:
 	if (!no_nrwrite_index_update)
 		wbc->no_nrwrite_index_update = 0;
-	wbc->nr_to_write -= nr_to_writebump;
+	if (wbc->nr_to_write > nr_to_writebump)
+		wbc->nr_to_write -= nr_to_writebump;
 	wbc->range_start = range_start;
 	trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
 	return ret;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index df539ba27779..16817737ba52 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2197,6 +2197,7 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
+EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
 
 static struct attribute *ext4_attrs[] = {
 	ATTR_LIST(delayed_allocation_blocks),
@@ -2210,6 +2211,7 @@ static struct attribute *ext4_attrs[] = {
 	ATTR_LIST(mb_order2_req),
 	ATTR_LIST(mb_stream_req),
 	ATTR_LIST(mb_group_prealloc),
+	ATTR_LIST(max_writeback_mb_bump),
 	NULL,
 };
 
@@ -2679,6 +2681,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	sbi->s_stripe = ext4_get_stripe_size(sbi);
+	sbi->s_max_writeback_mb_bump = 128;
 
 	/*
 	 * set up enough so that it can read an inode
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index c1bd8f1e8b94..7c6bbb7198a3 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -236,6 +236,7 @@ TRACE_EVENT(ext4_da_writepages,
 		__field(	char,	for_kupdate		)
 		__field(	char,	for_reclaim		)
 		__field(	char,	range_cyclic		)
+		__field(       pgoff_t,	writeback_index		)
 	),
 
 	TP_fast_assign(
@@ -249,15 +250,17 @@ TRACE_EVENT(ext4_da_writepages,
 		__entry->for_kupdate	= wbc->for_kupdate;
 		__entry->for_reclaim	= wbc->for_reclaim;
 		__entry->range_cyclic	= wbc->range_cyclic;
+		__entry->writeback_index = inode->i_mapping->writeback_index;
 	),
 
-	TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d",
+	TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d writeback_index %lu",
 		  jbd2_dev_to_name(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->nr_to_write,
 		  __entry->pages_skipped, __entry->range_start,
 		  __entry->range_end, __entry->nonblocking,
 		  __entry->for_kupdate, __entry->for_reclaim,
-		  __entry->range_cyclic)
+		  __entry->range_cyclic,
+		  (unsigned long) __entry->writeback_index)
 );
 
 TRACE_EVENT(ext4_da_write_pages,
@@ -309,6 +312,7 @@ TRACE_EVENT(ext4_da_writepages_result,
 		__field(	char,	encountered_congestion	)
 		__field(	char,	more_io			)	
 		__field(	char,	no_nrwrite_index_update )
+		__field(       pgoff_t,	writeback_index		)
 	),
 
 	TP_fast_assign(
@@ -320,14 +324,16 @@ TRACE_EVENT(ext4_da_writepages_result,
 		__entry->encountered_congestion	= wbc->encountered_congestion;
 		__entry->more_io	= wbc->more_io;
 		__entry->no_nrwrite_index_update = wbc->no_nrwrite_index_update;
+		__entry->writeback_index = inode->i_mapping->writeback_index;
 	),
 
-	TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d",
+	TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d writeback_index %lu",
 		  jbd2_dev_to_name(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->ret,
 		  __entry->pages_written, __entry->pages_skipped,
 		  __entry->encountered_congestion, __entry->more_io,
-		  __entry->no_nrwrite_index_update)
+		  __entry->no_nrwrite_index_update,
+		  (unsigned long) __entry->writeback_index)
 );
 
 TRACE_EVENT(ext4_da_write_begin,
-- 
cgit v1.2.3


From 8f1546cadf7ac5e9a40d54089a1c7302264ec49b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 28 Sep 2009 15:26:43 +0200
Subject: wext: add back wireless/ dir in sysfs for cfg80211 interfaces

The move away from having drivers assign wireless handlers,
in favour of making cfg80211 assign them, broke the sysfs
registration (the wireless/ dir went missing) because the
handlers are now assigned only after registration, which is
too late.

Fix this by special-casing cfg80211-based devices, all
of which are required to have an ieee80211_ptr, in the
sysfs code, and also using get_wireless_stats() to have
the same values reported as in procfs.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Reported-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Tested-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/wext.h   |  1 +
 net/core/net-sysfs.c | 12 +++++-------
 net/wireless/wext.c  |  2 +-
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/wext.h b/include/net/wext.h
index 6d76a39a9c5b..3f2b94de2cfa 100644
--- a/include/net/wext.h
+++ b/include/net/wext.h
@@ -14,6 +14,7 @@ extern int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cm
 			     void __user *arg);
 extern int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 				    unsigned long arg);
+extern struct iw_statistics *get_wireless_stats(struct net_device *dev);
 #else
 static inline int wext_proc_init(struct net *net)
 {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7d4c57523b09..821d30918cfc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -16,7 +16,7 @@
 #include <net/sock.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
-#include <net/iw_handler.h>
+#include <net/wext.h>
 
 #include "net-sysfs.h"
 
@@ -363,15 +363,13 @@ static ssize_t wireless_show(struct device *d, char *buf,
 					       char *))
 {
 	struct net_device *dev = to_net_dev(d);
-	const struct iw_statistics *iw = NULL;
+	const struct iw_statistics *iw;
 	ssize_t ret = -EINVAL;
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
-		if (dev->wireless_handlers &&
-		    dev->wireless_handlers->get_wireless_stats)
-			iw = dev->wireless_handlers->get_wireless_stats(dev);
-		if (iw != NULL)
+		iw = get_wireless_stats(dev);
+		if (iw)
 			ret = (*format)(iw, buf);
 	}
 	read_unlock(&dev_base_lock);
@@ -505,7 +503,7 @@ int netdev_register_kobject(struct net_device *net)
 	*groups++ = &netstat_group;
 
 #ifdef CONFIG_WIRELESS_EXT_SYSFS
-	if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
+	if (net->wireless_handlers || net->ieee80211_ptr)
 		*groups++ = &wireless_group;
 #endif
 #endif /* CONFIG_SYSFS */
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index ac4ac26b53ce..60fe57761ca9 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -470,7 +470,7 @@ static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
 /*
  * Get statistics out of the driver
  */
-static struct iw_statistics *get_wireless_stats(struct net_device *dev)
+struct iw_statistics *get_wireless_stats(struct net_device *dev)
 {
 	/* New location */
 	if ((dev->wireless_handlers != NULL) &&
-- 
cgit v1.2.3


From 827b4649d4626bf97b203b4bcd69476bb9b4e760 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 29 Sep 2009 00:10:41 +0200
Subject: PM / PCMCIA: Drop second argument of pcmcia_socket_dev_suspend()

pcmcia_socket_dev_suspend() doesn't use its second argument, so it
may be dropped safely.

This change is necessary for the subsequent yenta suspend/resume fix.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: stable@kernel.org
---
 drivers/pcmcia/at91_cf.c        | 2 +-
 drivers/pcmcia/au1000_generic.c | 2 +-
 drivers/pcmcia/bfin_cf_pcmcia.c | 2 +-
 drivers/pcmcia/cs.c             | 2 +-
 drivers/pcmcia/i82092.c         | 2 +-
 drivers/pcmcia/i82365.c         | 2 +-
 drivers/pcmcia/m32r_cfc.c       | 2 +-
 drivers/pcmcia/m32r_pcc.c       | 2 +-
 drivers/pcmcia/m8xx_pcmcia.c    | 2 +-
 drivers/pcmcia/omap_cf.c        | 2 +-
 drivers/pcmcia/pd6729.c         | 2 +-
 drivers/pcmcia/pxa2xx_base.c    | 2 +-
 drivers/pcmcia/sa1100_generic.c | 2 +-
 drivers/pcmcia/sa1111_generic.c | 2 +-
 drivers/pcmcia/tcic.c           | 2 +-
 drivers/pcmcia/vrc4171_card.c   | 2 +-
 drivers/pcmcia/yenta_socket.c   | 2 +-
 include/pcmcia/ss.h             | 2 +-
 18 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c
index 9e1140f085fd..e1dccedc5960 100644
--- a/drivers/pcmcia/at91_cf.c
+++ b/drivers/pcmcia/at91_cf.c
@@ -363,7 +363,7 @@ static int at91_cf_suspend(struct platform_device *pdev, pm_message_t mesg)
 	struct at91_cf_socket	*cf = platform_get_drvdata(pdev);
 	struct at91_cf_data	*board = cf->board;
 
-	pcmcia_socket_dev_suspend(&pdev->dev, mesg);
+	pcmcia_socket_dev_suspend(&pdev->dev);
 	if (device_may_wakeup(&pdev->dev)) {
 		enable_irq_wake(board->det_pin);
 		if (board->irq_pin)
diff --git a/drivers/pcmcia/au1000_generic.c b/drivers/pcmcia/au1000_generic.c
index 90013341cd5f..02088704ac2c 100644
--- a/drivers/pcmcia/au1000_generic.c
+++ b/drivers/pcmcia/au1000_generic.c
@@ -515,7 +515,7 @@ static int au1x00_drv_pcmcia_probe(struct platform_device *dev)
 static int au1x00_drv_pcmcia_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int au1x00_drv_pcmcia_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/bfin_cf_pcmcia.c b/drivers/pcmcia/bfin_cf_pcmcia.c
index b59d4115d20f..300b368605c9 100644
--- a/drivers/pcmcia/bfin_cf_pcmcia.c
+++ b/drivers/pcmcia/bfin_cf_pcmcia.c
@@ -302,7 +302,7 @@ static int __devexit bfin_cf_remove(struct platform_device *pdev)
 
 static int bfin_cf_suspend(struct platform_device *pdev, pm_message_t mesg)
 {
-	return pcmcia_socket_dev_suspend(&pdev->dev, mesg);
+	return pcmcia_socket_dev_suspend(&pdev->dev);
 }
 
 static int bfin_cf_resume(struct platform_device *pdev)
diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c
index 0660ad182589..934d4bee39a0 100644
--- a/drivers/pcmcia/cs.c
+++ b/drivers/pcmcia/cs.c
@@ -101,7 +101,7 @@ EXPORT_SYMBOL(pcmcia_socket_list_rwsem);
 static int socket_resume(struct pcmcia_socket *skt);
 static int socket_suspend(struct pcmcia_socket *skt);
 
-int pcmcia_socket_dev_suspend(struct device *dev, pm_message_t state)
+int pcmcia_socket_dev_suspend(struct device *dev)
 {
 	struct pcmcia_socket *socket;
 
diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c
index 46561face128..a04f21c8170f 100644
--- a/drivers/pcmcia/i82092.c
+++ b/drivers/pcmcia/i82092.c
@@ -42,7 +42,7 @@ MODULE_DEVICE_TABLE(pci, i82092aa_pci_ids);
 #ifdef CONFIG_PM
 static int i82092aa_socket_suspend (struct pci_dev *dev, pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int i82092aa_socket_resume (struct pci_dev *dev)
diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c
index 40d4953e4b12..b906abe26ad0 100644
--- a/drivers/pcmcia/i82365.c
+++ b/drivers/pcmcia/i82365.c
@@ -1241,7 +1241,7 @@ static int pcic_init(struct pcmcia_socket *s)
 static int i82365_drv_pcmcia_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int i82365_drv_pcmcia_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/m32r_cfc.c b/drivers/pcmcia/m32r_cfc.c
index 62b4ecc97c46..d1d89c4491ad 100644
--- a/drivers/pcmcia/m32r_cfc.c
+++ b/drivers/pcmcia/m32r_cfc.c
@@ -699,7 +699,7 @@ static struct pccard_operations pcc_operations = {
 static int cfc_drv_pcmcia_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int cfc_drv_pcmcia_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c
index 12034b41d196..a0655839c8d3 100644
--- a/drivers/pcmcia/m32r_pcc.c
+++ b/drivers/pcmcia/m32r_pcc.c
@@ -675,7 +675,7 @@ static struct pccard_operations pcc_operations = {
 static int pcc_drv_pcmcia_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int pcc_drv_pcmcia_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/m8xx_pcmcia.c b/drivers/pcmcia/m8xx_pcmcia.c
index d1ad0966392d..c69f2c4fe520 100644
--- a/drivers/pcmcia/m8xx_pcmcia.c
+++ b/drivers/pcmcia/m8xx_pcmcia.c
@@ -1296,7 +1296,7 @@ static int m8xx_remove(struct of_device *ofdev)
 #ifdef CONFIG_PM
 static int m8xx_suspend(struct platform_device *pdev, pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&pdev->dev, state);
+	return pcmcia_socket_dev_suspend(&pdev->dev);
 }
 
 static int m8xx_resume(struct platform_device *pdev)
diff --git a/drivers/pcmcia/omap_cf.c b/drivers/pcmcia/omap_cf.c
index f3736398900e..68570bc3ac86 100644
--- a/drivers/pcmcia/omap_cf.c
+++ b/drivers/pcmcia/omap_cf.c
@@ -334,7 +334,7 @@ static int __exit omap_cf_remove(struct platform_device *pdev)
 
 static int omap_cf_suspend(struct platform_device *pdev, pm_message_t mesg)
 {
-	return pcmcia_socket_dev_suspend(&pdev->dev, mesg);
+	return pcmcia_socket_dev_suspend(&pdev->dev);
 }
 
 static int omap_cf_resume(struct platform_device *pdev)
diff --git a/drivers/pcmcia/pd6729.c b/drivers/pcmcia/pd6729.c
index 8bed1dab9039..1c39d3438f20 100644
--- a/drivers/pcmcia/pd6729.c
+++ b/drivers/pcmcia/pd6729.c
@@ -758,7 +758,7 @@ static void __devexit pd6729_pci_remove(struct pci_dev *dev)
 #ifdef CONFIG_PM
 static int pd6729_socket_suspend(struct pci_dev *dev, pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int pd6729_socket_resume(struct pci_dev *dev)
diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c
index 87e22ef8eb02..0e35acb1366b 100644
--- a/drivers/pcmcia/pxa2xx_base.c
+++ b/drivers/pcmcia/pxa2xx_base.c
@@ -302,7 +302,7 @@ static int pxa2xx_drv_pcmcia_remove(struct platform_device *dev)
 
 static int pxa2xx_drv_pcmcia_suspend(struct device *dev)
 {
-	return pcmcia_socket_dev_suspend(dev, PMSG_SUSPEND);
+	return pcmcia_socket_dev_suspend(dev);
 }
 
 static int pxa2xx_drv_pcmcia_resume(struct device *dev)
diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c
index d8da5ac844e9..2d0e99751530 100644
--- a/drivers/pcmcia/sa1100_generic.c
+++ b/drivers/pcmcia/sa1100_generic.c
@@ -89,7 +89,7 @@ static int sa11x0_drv_pcmcia_remove(struct platform_device *dev)
 static int sa11x0_drv_pcmcia_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int sa11x0_drv_pcmcia_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c
index 401052a21ce8..4be4e172ffa1 100644
--- a/drivers/pcmcia/sa1111_generic.c
+++ b/drivers/pcmcia/sa1111_generic.c
@@ -159,7 +159,7 @@ static int __devexit pcmcia_remove(struct sa1111_dev *dev)
 
 static int pcmcia_suspend(struct sa1111_dev *dev, pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int pcmcia_resume(struct sa1111_dev *dev)
diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c
index 8eb04230fec7..582413fcb62f 100644
--- a/drivers/pcmcia/tcic.c
+++ b/drivers/pcmcia/tcic.c
@@ -366,7 +366,7 @@ static int __init get_tcic_id(void)
 static int tcic_drv_pcmcia_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int tcic_drv_pcmcia_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/vrc4171_card.c b/drivers/pcmcia/vrc4171_card.c
index d4ad50d737b0..c9fcbdc164ea 100644
--- a/drivers/pcmcia/vrc4171_card.c
+++ b/drivers/pcmcia/vrc4171_card.c
@@ -707,7 +707,7 @@ __setup("vrc4171_card=", vrc4171_card_setup);
 static int vrc4171_card_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int vrc4171_card_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index b459e87a30ac..6fa1ed8f2b2f 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -1230,7 +1230,7 @@ static int yenta_dev_suspend (struct pci_dev *dev, pm_message_t state)
 	struct yenta_socket *socket = pci_get_drvdata(dev);
 	int ret;
 
-	ret = pcmcia_socket_dev_suspend(&dev->dev, state);
+	ret = pcmcia_socket_dev_suspend(&dev->dev);
 
 	if (socket) {
 		if (socket->type && socket->type->save_state)
diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 9a3b49865173..d696a692d94a 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -279,7 +279,7 @@ extern struct pccard_resource_ops pccard_iodyn_ops;
 extern struct pccard_resource_ops pccard_nonstatic_ops;
 
 /* socket drivers are expected to use these callbacks in their .drv struct */
-extern int pcmcia_socket_dev_suspend(struct device *dev, pm_message_t state);
+extern int pcmcia_socket_dev_suspend(struct device *dev);
 extern int pcmcia_socket_dev_resume(struct device *dev);
 
 /* socket drivers use this callback in their IRQ handler */
-- 
cgit v1.2.3


From 296c355cd6443d89fa251885a8d78778fe111dc4 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 30 Sep 2009 00:32:42 -0400
Subject: ext4: Use tracepoints for mb_history trace file

The /proc/fs/ext4/<dev>/mb_history was maintained manually, and had a
number of problems: it required a largish amount of memory to be
allocated for each ext4 filesystem, and the s_mb_history_lock
introduced a CPU contention problem.

By ripping out the mb_history code and replacing it with ftrace
tracepoints, and we get more functionality: timestamps, event
filtering, the ability to correlate mballoc history with other ext4
tracepoints, etc.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 Documentation/filesystems/proc.txt |   1 -
 fs/ext4/ext4.h                     |  14 +-
 fs/ext4/mballoc.c                  | 301 ++-----------------------------------
 fs/ext4/mballoc.h                  |  33 ----
 fs/ext4/super.c                    |  18 +--
 include/trace/events/ext4.h        | 163 ++++++++++++++++++++
 6 files changed, 182 insertions(+), 348 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index b5aee7838a00..2c48f945546b 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1113,7 +1113,6 @@ Table 1-12: Files in /proc/fs/ext4/<devname>
 ..............................................................................
  File            Content                                        
  mb_groups       details of multiblock allocator buddy cache of free blocks
- mb_history      multiblock allocation history
 ..............................................................................
 
 
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b491576e11c3..c508cf7be75c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -65,6 +65,12 @@ typedef __u32 ext4_lblk_t;
 /* data type for block group number */
 typedef unsigned int ext4_group_t;
 
+/*
+ * Flags used in mballoc's allocation_context flags field.  
+ *
+ * Also used to show what's going on for debugging purposes when the
+ * flag field is exported via the traceport interface
+ */
 
 /* prefer goal again. length */
 #define EXT4_MB_HINT_MERGE		0x0001
@@ -971,14 +977,6 @@ struct ext4_sb_info {
 	unsigned long s_mb_last_group;
 	unsigned long s_mb_last_start;
 
-	/* history to debug policy */
-	struct ext4_mb_history *s_mb_history;
-	int s_mb_history_cur;
-	int s_mb_history_max;
-	int s_mb_history_num;
-	spinlock_t s_mb_history_lock;
-	int s_mb_history_filter;
-
 	/* stats for buddy allocator */
 	spinlock_t s_mb_pa_lock;
 	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 3e2320e66721..bba12824defa 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2096,207 +2096,6 @@ out:
 	return err;
 }
 
-#ifdef EXT4_MB_HISTORY
-struct ext4_mb_proc_session {
-	struct ext4_mb_history *history;
-	struct super_block *sb;
-	int start;
-	int max;
-};
-
-static void *ext4_mb_history_skip_empty(struct ext4_mb_proc_session *s,
-					struct ext4_mb_history *hs,
-					int first)
-{
-	if (hs == s->history + s->max)
-		hs = s->history;
-	if (!first && hs == s->history + s->start)
-		return NULL;
-	while (hs->orig.fe_len == 0) {
-		hs++;
-		if (hs == s->history + s->max)
-			hs = s->history;
-		if (hs == s->history + s->start)
-			return NULL;
-	}
-	return hs;
-}
-
-static void *ext4_mb_seq_history_start(struct seq_file *seq, loff_t *pos)
-{
-	struct ext4_mb_proc_session *s = seq->private;
-	struct ext4_mb_history *hs;
-	int l = *pos;
-
-	if (l == 0)
-		return SEQ_START_TOKEN;
-	hs = ext4_mb_history_skip_empty(s, s->history + s->start, 1);
-	if (!hs)
-		return NULL;
-	while (--l && (hs = ext4_mb_history_skip_empty(s, ++hs, 0)) != NULL);
-	return hs;
-}
-
-static void *ext4_mb_seq_history_next(struct seq_file *seq, void *v,
-				      loff_t *pos)
-{
-	struct ext4_mb_proc_session *s = seq->private;
-	struct ext4_mb_history *hs = v;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ext4_mb_history_skip_empty(s, s->history + s->start, 1);
-	else
-		return ext4_mb_history_skip_empty(s, ++hs, 0);
-}
-
-static int ext4_mb_seq_history_show(struct seq_file *seq, void *v)
-{
-	char buf[25], buf2[25], buf3[25], *fmt;
-	struct ext4_mb_history *hs = v;
-
-	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s "
-				"%-5s %-2s %-6s %-5s %-5s %-6s\n",
-			  "pid", "inode", "original", "goal", "result", "found",
-			   "grps", "cr", "flags", "merge", "tail", "broken");
-		return 0;
-	}
-
-	if (hs->op == EXT4_MB_HISTORY_ALLOC) {
-		fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u "
-			"0x%04x %-5s %-5u %-6u\n";
-		sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
-			hs->result.fe_start, hs->result.fe_len,
-			hs->result.fe_logical);
-		sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
-			hs->orig.fe_start, hs->orig.fe_len,
-			hs->orig.fe_logical);
-		sprintf(buf3, "%u/%d/%u@%u", hs->goal.fe_group,
-			hs->goal.fe_start, hs->goal.fe_len,
-			hs->goal.fe_logical);
-		seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2,
-				hs->found, hs->groups, hs->cr, hs->flags,
-				hs->merged ? "M" : "", hs->tail,
-				hs->buddy ? 1 << hs->buddy : 0);
-	} else if (hs->op == EXT4_MB_HISTORY_PREALLOC) {
-		fmt = "%-5u %-8u %-23s %-23s %-23s\n";
-		sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
-			hs->result.fe_start, hs->result.fe_len,
-			hs->result.fe_logical);
-		sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
-			hs->orig.fe_start, hs->orig.fe_len,
-			hs->orig.fe_logical);
-		seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2);
-	} else if (hs->op == EXT4_MB_HISTORY_DISCARD) {
-		sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
-			hs->result.fe_start, hs->result.fe_len);
-		seq_printf(seq, "%-5u %-8u %-23s discard\n",
-				hs->pid, hs->ino, buf2);
-	} else if (hs->op == EXT4_MB_HISTORY_FREE) {
-		sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
-			hs->result.fe_start, hs->result.fe_len);
-		seq_printf(seq, "%-5u %-8u %-23s free\n",
-				hs->pid, hs->ino, buf2);
-	}
-	return 0;
-}
-
-static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v)
-{
-}
-
-static const struct seq_operations ext4_mb_seq_history_ops = {
-	.start  = ext4_mb_seq_history_start,
-	.next   = ext4_mb_seq_history_next,
-	.stop   = ext4_mb_seq_history_stop,
-	.show   = ext4_mb_seq_history_show,
-};
-
-static int ext4_mb_seq_history_open(struct inode *inode, struct file *file)
-{
-	struct super_block *sb = PDE(inode)->data;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	struct ext4_mb_proc_session *s;
-	int rc;
-	int size;
-
-	if (unlikely(sbi->s_mb_history == NULL))
-		return -ENOMEM;
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
-	if (s == NULL)
-		return -ENOMEM;
-	s->sb = sb;
-	size = sizeof(struct ext4_mb_history) * sbi->s_mb_history_max;
-	s->history = kmalloc(size, GFP_KERNEL);
-	if (s->history == NULL) {
-		kfree(s);
-		return -ENOMEM;
-	}
-
-	spin_lock(&sbi->s_mb_history_lock);
-	memcpy(s->history, sbi->s_mb_history, size);
-	s->max = sbi->s_mb_history_max;
-	s->start = sbi->s_mb_history_cur % s->max;
-	spin_unlock(&sbi->s_mb_history_lock);
-
-	rc = seq_open(file, &ext4_mb_seq_history_ops);
-	if (rc == 0) {
-		struct seq_file *m = (struct seq_file *)file->private_data;
-		m->private = s;
-	} else {
-		kfree(s->history);
-		kfree(s);
-	}
-	return rc;
-
-}
-
-static int ext4_mb_seq_history_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = (struct seq_file *)file->private_data;
-	struct ext4_mb_proc_session *s = seq->private;
-	kfree(s->history);
-	kfree(s);
-	return seq_release(inode, file);
-}
-
-static ssize_t ext4_mb_seq_history_write(struct file *file,
-				const char __user *buffer,
-				size_t count, loff_t *ppos)
-{
-	struct seq_file *seq = (struct seq_file *)file->private_data;
-	struct ext4_mb_proc_session *s = seq->private;
-	struct super_block *sb = s->sb;
-	char str[32];
-	int value;
-
-	if (count >= sizeof(str)) {
-		printk(KERN_ERR "EXT4-fs: %s string too long, max %u bytes\n",
-				"mb_history", (int)sizeof(str));
-		return -EOVERFLOW;
-	}
-
-	if (copy_from_user(str, buffer, count))
-		return -EFAULT;
-
-	value = simple_strtol(str, NULL, 0);
-	if (value < 0)
-		return -ERANGE;
-	EXT4_SB(sb)->s_mb_history_filter = value;
-
-	return count;
-}
-
-static const struct file_operations ext4_mb_seq_history_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ext4_mb_seq_history_open,
-	.read		= seq_read,
-	.write		= ext4_mb_seq_history_write,
-	.llseek		= seq_lseek,
-	.release	= ext4_mb_seq_history_release,
-};
-
 static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
 {
 	struct super_block *sb = seq->private;
@@ -2396,82 +2195,6 @@ static const struct file_operations ext4_mb_seq_groups_fops = {
 	.release	= seq_release,
 };
 
-static void ext4_mb_history_release(struct super_block *sb)
-{
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-
-	if (sbi->s_proc != NULL) {
-		remove_proc_entry("mb_groups", sbi->s_proc);
-		if (sbi->s_mb_history_max)
-			remove_proc_entry("mb_history", sbi->s_proc);
-	}
-	kfree(sbi->s_mb_history);
-}
-
-static void ext4_mb_history_init(struct super_block *sb)
-{
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	int i;
-
-	if (sbi->s_proc != NULL) {
-		if (sbi->s_mb_history_max)
-			proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
-					 &ext4_mb_seq_history_fops, sb);
-		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
-				 &ext4_mb_seq_groups_fops, sb);
-	}
-
-	sbi->s_mb_history_cur = 0;
-	spin_lock_init(&sbi->s_mb_history_lock);
-	i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
-	sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL;
-	/* if we can't allocate history, then we simple won't use it */
-}
-
-static noinline_for_stack void
-ext4_mb_store_history(struct ext4_allocation_context *ac)
-{
-	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
-	struct ext4_mb_history h;
-
-	if (sbi->s_mb_history == NULL)
-		return;
-
-	if (!(ac->ac_op & sbi->s_mb_history_filter))
-		return;
-
-	h.op = ac->ac_op;
-	h.pid = current->pid;
-	h.ino = ac->ac_inode ? ac->ac_inode->i_ino : 0;
-	h.orig = ac->ac_o_ex;
-	h.result = ac->ac_b_ex;
-	h.flags = ac->ac_flags;
-	h.found = ac->ac_found;
-	h.groups = ac->ac_groups_scanned;
-	h.cr = ac->ac_criteria;
-	h.tail = ac->ac_tail;
-	h.buddy = ac->ac_buddy;
-	h.merged = 0;
-	if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) {
-		if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
-				ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
-			h.merged = 1;
-		h.goal = ac->ac_g_ex;
-		h.result = ac->ac_f_ex;
-	}
-
-	spin_lock(&sbi->s_mb_history_lock);
-	memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h));
-	if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max)
-		sbi->s_mb_history_cur = 0;
-	spin_unlock(&sbi->s_mb_history_lock);
-}
-
-#else
-#define ext4_mb_history_release(sb)
-#define ext4_mb_history_init(sb)
-#endif
-
 
 /* Create and initialize ext4_group_info data for the given group. */
 int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
@@ -2690,7 +2413,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 	sbi->s_mb_stats = MB_DEFAULT_STATS;
 	sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
 	sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
-	sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
 	sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
 
 	sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
@@ -2708,7 +2430,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 		spin_lock_init(&lg->lg_prealloc_lock);
 	}
 
-	ext4_mb_history_init(sb);
+	if (sbi->s_proc)
+		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
+				 &ext4_mb_seq_groups_fops, sb);
 
 	if (sbi->s_journal)
 		sbi->s_journal->j_commit_callback = release_blocks_on_commit;
@@ -2788,7 +2512,8 @@ int ext4_mb_release(struct super_block *sb)
 	}
 
 	free_percpu(sbi->s_locality_groups);
-	ext4_mb_history_release(sb);
+	if (sbi->s_proc)
+		remove_proc_entry("mb_groups", sbi->s_proc);
 
 	return 0;
 }
@@ -3274,7 +2999,10 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
 			atomic_inc(&sbi->s_bal_breaks);
 	}
 
-	ext4_mb_store_history(ac);
+	if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
+		trace_ext4_mballoc_alloc(ac);
+	else
+		trace_ext4_mballoc_prealloc(ac);
 }
 
 /*
@@ -3774,7 +3502,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 	if (ac) {
 		ac->ac_sb = sb;
 		ac->ac_inode = pa->pa_inode;
-		ac->ac_op = EXT4_MB_HISTORY_DISCARD;
 	}
 
 	while (bit < end) {
@@ -3794,7 +3521,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 			ac->ac_b_ex.fe_start = bit;
 			ac->ac_b_ex.fe_len = next - bit;
 			ac->ac_b_ex.fe_logical = 0;
-			ext4_mb_store_history(ac);
+			trace_ext4_mballoc_discard(ac);
 		}
 
 		trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
@@ -3829,9 +3556,6 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
 	ext4_group_t group;
 	ext4_grpblk_t bit;
 
-	if (ac)
-		ac->ac_op = EXT4_MB_HISTORY_DISCARD;
-
 	trace_ext4_mb_release_group_pa(ac, pa);
 	BUG_ON(pa->pa_deleted == 0);
 	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
@@ -3846,7 +3570,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
 		ac->ac_b_ex.fe_start = bit;
 		ac->ac_b_ex.fe_len = pa->pa_len;
 		ac->ac_b_ex.fe_logical = 0;
-		ext4_mb_store_history(ac);
+		trace_ext4_mballoc_discard(ac);
 	}
 
 	return 0;
@@ -4737,7 +4461,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
 
 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 	if (ac) {
-		ac->ac_op = EXT4_MB_HISTORY_FREE;
 		ac->ac_inode = inode;
 		ac->ac_sb = sb;
 	}
@@ -4804,7 +4527,7 @@ do_more:
 		ac->ac_b_ex.fe_group = block_group;
 		ac->ac_b_ex.fe_start = bit;
 		ac->ac_b_ex.fe_len = count;
-		ext4_mb_store_history(ac);
+		trace_ext4_mballoc_free(ac);
 	}
 
 	err = ext4_mb_load_buddy(sb, block_group, &e4b);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 14f25f253112..0ca811061bc7 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -52,18 +52,8 @@ extern u8 mb_enable_debug;
 #define mb_debug(n, fmt, a...)
 #endif
 
-/*
- * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
- * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
- */
-#define EXT4_MB_HISTORY
 #define EXT4_MB_HISTORY_ALLOC		1	/* allocation */
 #define EXT4_MB_HISTORY_PREALLOC	2	/* preallocated blocks used */
-#define EXT4_MB_HISTORY_DISCARD		4	/* preallocation discarded */
-#define EXT4_MB_HISTORY_FREE		8	/* free */
-
-#define EXT4_MB_HISTORY_DEFAULT		(EXT4_MB_HISTORY_ALLOC | \
-					 EXT4_MB_HISTORY_PREALLOC)
 
 /*
  * How long mballoc can look for a best extent (in found extents)
@@ -217,22 +207,6 @@ struct ext4_allocation_context {
 #define AC_STATUS_FOUND		2
 #define AC_STATUS_BREAK		3
 
-struct ext4_mb_history {
-	struct ext4_free_extent orig;	/* orig allocation */
-	struct ext4_free_extent goal;	/* goal allocation */
-	struct ext4_free_extent result;	/* result allocation */
-	unsigned pid;
-	unsigned ino;
-	__u16 found;	/* how many extents have been found */
-	__u16 groups;	/* how many groups have been scanned */
-	__u16 tail;	/* what tail broke some buddy */
-	__u16 buddy;	/* buddy the tail ^^^ broke */
-	__u16 flags;
-	__u8 cr:3;	/* which phase the result extent was found at */
-	__u8 op:4;
-	__u8 merged:1;
-};
-
 struct ext4_buddy {
 	struct page *bd_buddy_page;
 	void *bd_buddy;
@@ -247,13 +221,6 @@ struct ext4_buddy {
 #define EXT4_MB_BITMAP(e4b)	((e4b)->bd_bitmap)
 #define EXT4_MB_BUDDY(e4b)	((e4b)->bd_buddy)
 
-#ifndef EXT4_MB_HISTORY
-static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
-{
-	return;
-}
-#endif
-
 #define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
 
 static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e5b206a043a5..12e726a7073f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -50,13 +50,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/ext4.h>
 
-static int default_mb_history_length = 1000;
-
-module_param_named(default_mb_history_length, default_mb_history_length,
-		   int, 0644);
-MODULE_PARM_DESC(default_mb_history_length,
-		 "Default number of entries saved for mb_history");
-
 struct proc_dir_entry *ext4_proc_root;
 static struct kset *ext4_kset;
 
@@ -1079,7 +1072,7 @@ enum {
 	Opt_journal_update, Opt_journal_dev,
 	Opt_journal_checksum, Opt_journal_async_commit,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
-	Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length,
+	Opt_data_err_abort, Opt_data_err_ignore,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
 	Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
@@ -1126,7 +1119,6 @@ static const match_table_t tokens = {
 	{Opt_data_writeback, "data=writeback"},
 	{Opt_data_err_abort, "data_err=abort"},
 	{Opt_data_err_ignore, "data_err=ignore"},
-	{Opt_mb_history_length, "mb_history_length=%u"},
 	{Opt_offusrjquota, "usrjquota="},
 	{Opt_usrjquota, "usrjquota=%s"},
 	{Opt_offgrpjquota, "grpjquota="},
@@ -1367,13 +1359,6 @@ static int parse_options(char *options, struct super_block *sb,
 		case Opt_data_err_ignore:
 			clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
 			break;
-		case Opt_mb_history_length:
-			if (match_int(&args[0], &option))
-				return 0;
-			if (option < 0)
-				return 0;
-			sbi->s_mb_history_max = option;
-			break;
 #ifdef CONFIG_QUOTA
 		case Opt_usrjquota:
 			qtype = USRQUOTA;
@@ -2435,7 +2420,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
 	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
 	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
-	sbi->s_mb_history_max = default_mb_history_length;
 
 	set_opt(sbi->s_mount_opt, BARRIER);
 
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 7c6bbb7198a3..b8320256dc5d 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -743,6 +743,169 @@ TRACE_EVENT(ext4_alloc_da_blocks,
 		  __entry->data_blocks, __entry->meta_blocks)
 );
 
+TRACE_EVENT(ext4_mballoc_alloc,
+	TP_PROTO(struct ext4_allocation_context *ac),
+
+	TP_ARGS(ac),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u16,	found			)
+		__field(	__u16,	groups			)
+		__field(	__u16,	buddy			)
+		__field(	__u16,	flags			)
+		__field(	__u16,	tail			)
+		__field(	__u8,	cr			)
+		__field(	__u32, 	orig_logical		)
+		__field(	  int,	orig_start		)
+		__field(	__u32, 	orig_group		)
+		__field(	  int,	orig_len		)
+		__field(	__u32, 	goal_logical		)
+		__field(	  int,	goal_start		)
+		__field(	__u32, 	goal_group		)
+		__field(	  int,	goal_len		)
+		__field(	__u32, 	result_logical		)
+		__field(	  int,	result_start		)
+		__field(	__u32, 	result_group		)
+		__field(	  int,	result_len		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ac->ac_inode->i_sb->s_dev;
+		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->found		= ac->ac_found;
+		__entry->flags		= ac->ac_flags;
+		__entry->groups		= ac->ac_groups_scanned;
+		__entry->buddy		= ac->ac_buddy;
+		__entry->tail		= ac->ac_tail;
+		__entry->cr		= ac->ac_criteria;
+		__entry->orig_logical	= ac->ac_o_ex.fe_logical;
+		__entry->orig_start	= ac->ac_o_ex.fe_start;
+		__entry->orig_group	= ac->ac_o_ex.fe_group;
+		__entry->orig_len	= ac->ac_o_ex.fe_len;
+		__entry->goal_logical	= ac->ac_g_ex.fe_logical;
+		__entry->goal_start	= ac->ac_g_ex.fe_start;
+		__entry->goal_group	= ac->ac_g_ex.fe_group;
+		__entry->goal_len	= ac->ac_g_ex.fe_len;
+		__entry->result_logical	= ac->ac_f_ex.fe_logical;
+		__entry->result_start	= ac->ac_f_ex.fe_start;
+		__entry->result_group	= ac->ac_f_ex.fe_group;
+		__entry->result_len	= ac->ac_f_ex.fe_len;
+	),
+
+	TP_printk("dev %s inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
+		  "result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x "
+		  "tail %u broken %u",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->orig_group, __entry->orig_start,
+		  __entry->orig_len, __entry->orig_logical,
+		  __entry->goal_group, __entry->goal_start,
+		  __entry->goal_len, __entry->goal_logical,
+		  __entry->result_group, __entry->result_start,
+		  __entry->result_len, __entry->result_logical,
+		  __entry->found, __entry->groups, __entry->cr,
+		  __entry->flags, __entry->tail,
+		  __entry->buddy ? 1 << __entry->buddy : 0)
+);
+
+TRACE_EVENT(ext4_mballoc_prealloc,
+	TP_PROTO(struct ext4_allocation_context *ac),
+
+	TP_ARGS(ac),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u32, 	orig_logical		)
+		__field(	  int,	orig_start		)
+		__field(	__u32, 	orig_group		)
+		__field(	  int,	orig_len		)
+		__field(	__u32, 	result_logical		)
+		__field(	  int,	result_start		)
+		__field(	__u32, 	result_group		)
+		__field(	  int,	result_len		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ac->ac_inode->i_sb->s_dev;
+		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->orig_logical	= ac->ac_o_ex.fe_logical;
+		__entry->orig_start	= ac->ac_o_ex.fe_start;
+		__entry->orig_group	= ac->ac_o_ex.fe_group;
+		__entry->orig_len	= ac->ac_o_ex.fe_len;
+		__entry->result_logical	= ac->ac_b_ex.fe_logical;
+		__entry->result_start	= ac->ac_b_ex.fe_start;
+		__entry->result_group	= ac->ac_b_ex.fe_group;
+		__entry->result_len	= ac->ac_b_ex.fe_len;
+	),
+
+	TP_printk("dev %s inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->orig_group, __entry->orig_start,
+		  __entry->orig_len, __entry->orig_logical,
+		  __entry->result_group, __entry->result_start,
+		  __entry->result_len, __entry->result_logical)
+);
+
+TRACE_EVENT(ext4_mballoc_discard,
+	TP_PROTO(struct ext4_allocation_context *ac),
+
+	TP_ARGS(ac),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u32, 	result_logical		)
+		__field(	  int,	result_start		)
+		__field(	__u32, 	result_group		)
+		__field(	  int,	result_len		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ac->ac_inode->i_sb->s_dev;
+		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->result_logical	= ac->ac_b_ex.fe_logical;
+		__entry->result_start	= ac->ac_b_ex.fe_start;
+		__entry->result_group	= ac->ac_b_ex.fe_group;
+		__entry->result_len	= ac->ac_b_ex.fe_len;
+	),
+
+	TP_printk("dev %s inode %lu extent %u/%d/%u@%u ",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->result_group, __entry->result_start,
+		  __entry->result_len, __entry->result_logical)
+);
+
+TRACE_EVENT(ext4_mballoc_free,
+	TP_PROTO(struct ext4_allocation_context *ac),
+
+	TP_ARGS(ac),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u32, 	result_logical		)
+		__field(	  int,	result_start		)
+		__field(	__u32, 	result_group		)
+		__field(	  int,	result_len		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ac->ac_inode->i_sb->s_dev;
+		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->result_logical	= ac->ac_b_ex.fe_logical;
+		__entry->result_start	= ac->ac_b_ex.fe_start;
+		__entry->result_group	= ac->ac_b_ex.fe_group;
+		__entry->result_len	= ac->ac_b_ex.fe_len;
+	),
+
+	TP_printk("dev %s inode %lu extent %u/%d/%u@%u ",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->result_group, __entry->result_start,
+		  __entry->result_len, __entry->result_logical)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From bf6993276f74d46776f35c45ddef29b981b1d1c6 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 30 Sep 2009 00:32:06 -0400
Subject: jbd2: Use tracepoints for history file

The /proc/fs/jbd2/<dev>/history was maintained manually; by using
tracepoints, we can get all of the existing functionality of the /proc
file plus extra capabilities thanks to the ftrace infrastructure.  We
save memory as a bonus.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/checkpoint.c        |   7 ++
 fs/jbd2/commit.c            |  59 +++++++-------
 fs/jbd2/journal.c           | 187 +++-----------------------------------------
 include/linux/jbd2.h        |  27 ++-----
 include/trace/events/jbd2.h |  78 ++++++++++++++++++
 5 files changed, 130 insertions(+), 228 deletions(-)

(limited to 'include')

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 5d70b3e6d49b..ca0f5eb62b20 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -643,6 +643,7 @@ out:
 
 int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 {
+	struct transaction_chp_stats_s *stats;
 	transaction_t *transaction;
 	journal_t *journal;
 	int ret = 0;
@@ -679,6 +680,12 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 
 	/* OK, that was the last buffer for the transaction: we can now
 	   safely remove this transaction from the log */
+	stats = &transaction->t_chp_stats;
+	if (stats->cs_chp_time)
+		stats->cs_chp_time = jbd2_time_diff(stats->cs_chp_time,
+						    jiffies);
+	trace_jbd2_checkpoint_stats(journal->j_fs_dev->bd_dev,
+				    transaction->t_tid, stats);
 
 	__jbd2_journal_drop_transaction(journal, transaction);
 	kfree(transaction);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 26d991ddc1e6..d4cfd6d2779e 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -410,10 +410,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	if (commit_transaction->t_synchronous_commit)
 		write_op = WRITE_SYNC_PLUG;
 	trace_jbd2_commit_locking(journal, commit_transaction);
-	stats.u.run.rs_wait = commit_transaction->t_max_wait;
-	stats.u.run.rs_locked = jiffies;
-	stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
-						stats.u.run.rs_locked);
+	stats.run.rs_wait = commit_transaction->t_max_wait;
+	stats.run.rs_locked = jiffies;
+	stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
+					      stats.run.rs_locked);
 
 	spin_lock(&commit_transaction->t_handle_lock);
 	while (commit_transaction->t_updates) {
@@ -486,9 +486,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	jbd2_journal_switch_revoke_table(journal);
 
 	trace_jbd2_commit_flushing(journal, commit_transaction);
-	stats.u.run.rs_flushing = jiffies;
-	stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
-					       stats.u.run.rs_flushing);
+	stats.run.rs_flushing = jiffies;
+	stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
+					     stats.run.rs_flushing);
 
 	commit_transaction->t_state = T_FLUSH;
 	journal->j_committing_transaction = commit_transaction;
@@ -523,11 +523,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	spin_unlock(&journal->j_state_lock);
 
 	trace_jbd2_commit_logging(journal, commit_transaction);
-	stats.u.run.rs_logging = jiffies;
-	stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
-						 stats.u.run.rs_logging);
-	stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
-	stats.u.run.rs_blocks_logged = 0;
+	stats.run.rs_logging = jiffies;
+	stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
+					       stats.run.rs_logging);
+	stats.run.rs_blocks = commit_transaction->t_outstanding_credits;
+	stats.run.rs_blocks_logged = 0;
 
 	J_ASSERT(commit_transaction->t_nr_buffers <=
 		 commit_transaction->t_outstanding_credits);
@@ -695,7 +695,7 @@ start_journal_io:
 				submit_bh(write_op, bh);
 			}
 			cond_resched();
-			stats.u.run.rs_blocks_logged += bufs;
+			stats.run.rs_blocks_logged += bufs;
 
 			/* Force a new descriptor to be generated next
                            time round the loop. */
@@ -988,33 +988,30 @@ restart_loop:
 	J_ASSERT(commit_transaction->t_state == T_COMMIT);
 
 	commit_transaction->t_start = jiffies;
-	stats.u.run.rs_logging = jbd2_time_diff(stats.u.run.rs_logging,
-						commit_transaction->t_start);
+	stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
+					      commit_transaction->t_start);
 
 	/*
-	 * File the transaction for history
+	 * File the transaction statistics
 	 */
-	stats.ts_type = JBD2_STATS_RUN;
 	stats.ts_tid = commit_transaction->t_tid;
-	stats.u.run.rs_handle_count = commit_transaction->t_handle_count;
-	spin_lock(&journal->j_history_lock);
-	memcpy(journal->j_history + journal->j_history_cur, &stats,
-			sizeof(stats));
-	if (++journal->j_history_cur == journal->j_history_max)
-		journal->j_history_cur = 0;
+	stats.run.rs_handle_count = commit_transaction->t_handle_count;
+	trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
+			     commit_transaction->t_tid, &stats.run);
 
 	/*
 	 * Calculate overall stats
 	 */
+	spin_lock(&journal->j_history_lock);
 	journal->j_stats.ts_tid++;
-	journal->j_stats.u.run.rs_wait += stats.u.run.rs_wait;
-	journal->j_stats.u.run.rs_running += stats.u.run.rs_running;
-	journal->j_stats.u.run.rs_locked += stats.u.run.rs_locked;
-	journal->j_stats.u.run.rs_flushing += stats.u.run.rs_flushing;
-	journal->j_stats.u.run.rs_logging += stats.u.run.rs_logging;
-	journal->j_stats.u.run.rs_handle_count += stats.u.run.rs_handle_count;
-	journal->j_stats.u.run.rs_blocks += stats.u.run.rs_blocks;
-	journal->j_stats.u.run.rs_blocks_logged += stats.u.run.rs_blocks_logged;
+	journal->j_stats.run.rs_wait += stats.run.rs_wait;
+	journal->j_stats.run.rs_running += stats.run.rs_running;
+	journal->j_stats.run.rs_locked += stats.run.rs_locked;
+	journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
+	journal->j_stats.run.rs_logging += stats.run.rs_logging;
+	journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
+	journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
+	journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
 	spin_unlock(&journal->j_history_lock);
 
 	commit_transaction->t_state = T_FINISHED;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 977a8dafb76d..761af77491f5 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -676,153 +676,6 @@ struct jbd2_stats_proc_session {
 	int max;
 };
 
-static void *jbd2_history_skip_empty(struct jbd2_stats_proc_session *s,
-					struct transaction_stats_s *ts,
-					int first)
-{
-	if (ts == s->stats + s->max)
-		ts = s->stats;
-	if (!first && ts == s->stats + s->start)
-		return NULL;
-	while (ts->ts_type == 0) {
-		ts++;
-		if (ts == s->stats + s->max)
-			ts = s->stats;
-		if (ts == s->stats + s->start)
-			return NULL;
-	}
-	return ts;
-
-}
-
-static void *jbd2_seq_history_start(struct seq_file *seq, loff_t *pos)
-{
-	struct jbd2_stats_proc_session *s = seq->private;
-	struct transaction_stats_s *ts;
-	int l = *pos;
-
-	if (l == 0)
-		return SEQ_START_TOKEN;
-	ts = jbd2_history_skip_empty(s, s->stats + s->start, 1);
-	if (!ts)
-		return NULL;
-	l--;
-	while (l) {
-		ts = jbd2_history_skip_empty(s, ++ts, 0);
-		if (!ts)
-			break;
-		l--;
-	}
-	return ts;
-}
-
-static void *jbd2_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct jbd2_stats_proc_session *s = seq->private;
-	struct transaction_stats_s *ts = v;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return jbd2_history_skip_empty(s, s->stats + s->start, 1);
-	else
-		return jbd2_history_skip_empty(s, ++ts, 0);
-}
-
-static int jbd2_seq_history_show(struct seq_file *seq, void *v)
-{
-	struct transaction_stats_s *ts = v;
-	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
-				"%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
-				"wait", "run", "lock", "flush", "log", "hndls",
-				"block", "inlog", "ctime", "write", "drop",
-				"close");
-		return 0;
-	}
-	if (ts->ts_type == JBD2_STATS_RUN)
-		seq_printf(seq, "%-4s %-5lu %-5u %-5u %-5u %-5u %-5u "
-				"%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
-				jiffies_to_msecs(ts->u.run.rs_wait),
-				jiffies_to_msecs(ts->u.run.rs_running),
-				jiffies_to_msecs(ts->u.run.rs_locked),
-				jiffies_to_msecs(ts->u.run.rs_flushing),
-				jiffies_to_msecs(ts->u.run.rs_logging),
-				ts->u.run.rs_handle_count,
-				ts->u.run.rs_blocks,
-				ts->u.run.rs_blocks_logged);
-	else if (ts->ts_type == JBD2_STATS_CHECKPOINT)
-		seq_printf(seq, "%-4s %-5lu %48s %-5u %-5lu %-5lu %-5lu\n",
-				"C", ts->ts_tid, " ",
-				jiffies_to_msecs(ts->u.chp.cs_chp_time),
-				ts->u.chp.cs_written, ts->u.chp.cs_dropped,
-				ts->u.chp.cs_forced_to_close);
-	else
-		J_ASSERT(0);
-	return 0;
-}
-
-static void jbd2_seq_history_stop(struct seq_file *seq, void *v)
-{
-}
-
-static const struct seq_operations jbd2_seq_history_ops = {
-	.start  = jbd2_seq_history_start,
-	.next   = jbd2_seq_history_next,
-	.stop   = jbd2_seq_history_stop,
-	.show   = jbd2_seq_history_show,
-};
-
-static int jbd2_seq_history_open(struct inode *inode, struct file *file)
-{
-	journal_t *journal = PDE(inode)->data;
-	struct jbd2_stats_proc_session *s;
-	int rc, size;
-
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
-	if (s == NULL)
-		return -ENOMEM;
-	size = sizeof(struct transaction_stats_s) * journal->j_history_max;
-	s->stats = kmalloc(size, GFP_KERNEL);
-	if (s->stats == NULL) {
-		kfree(s);
-		return -ENOMEM;
-	}
-	spin_lock(&journal->j_history_lock);
-	memcpy(s->stats, journal->j_history, size);
-	s->max = journal->j_history_max;
-	s->start = journal->j_history_cur % s->max;
-	spin_unlock(&journal->j_history_lock);
-
-	rc = seq_open(file, &jbd2_seq_history_ops);
-	if (rc == 0) {
-		struct seq_file *m = file->private_data;
-		m->private = s;
-	} else {
-		kfree(s->stats);
-		kfree(s);
-	}
-	return rc;
-
-}
-
-static int jbd2_seq_history_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct jbd2_stats_proc_session *s = seq->private;
-
-	kfree(s->stats);
-	kfree(s);
-	return seq_release(inode, file);
-}
-
-static struct file_operations jbd2_seq_history_fops = {
-	.owner		= THIS_MODULE,
-	.open           = jbd2_seq_history_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = jbd2_seq_history_release,
-};
-
 static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)
 {
 	return *pos ? NULL : SEQ_START_TOKEN;
@@ -839,29 +692,29 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v)
 
 	if (v != SEQ_START_TOKEN)
 		return 0;
-	seq_printf(seq, "%lu transaction, each upto %u blocks\n",
+	seq_printf(seq, "%lu transaction, each up to %u blocks\n",
 			s->stats->ts_tid,
 			s->journal->j_max_transaction_buffers);
 	if (s->stats->ts_tid == 0)
 		return 0;
 	seq_printf(seq, "average: \n  %ums waiting for transaction\n",
-	    jiffies_to_msecs(s->stats->u.run.rs_wait / s->stats->ts_tid));
+	    jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid));
 	seq_printf(seq, "  %ums running transaction\n",
-	    jiffies_to_msecs(s->stats->u.run.rs_running / s->stats->ts_tid));
+	    jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid));
 	seq_printf(seq, "  %ums transaction was being locked\n",
-	    jiffies_to_msecs(s->stats->u.run.rs_locked / s->stats->ts_tid));
+	    jiffies_to_msecs(s->stats->run.rs_locked / s->stats->ts_tid));
 	seq_printf(seq, "  %ums flushing data (in ordered mode)\n",
-	    jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid));
+	    jiffies_to_msecs(s->stats->run.rs_flushing / s->stats->ts_tid));
 	seq_printf(seq, "  %ums logging transaction\n",
-	    jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid));
+	    jiffies_to_msecs(s->stats->run.rs_logging / s->stats->ts_tid));
 	seq_printf(seq, "  %lluus average transaction commit time\n",
 		   div_u64(s->journal->j_average_commit_time, 1000));
 	seq_printf(seq, "  %lu handles per transaction\n",
-	    s->stats->u.run.rs_handle_count / s->stats->ts_tid);
+	    s->stats->run.rs_handle_count / s->stats->ts_tid);
 	seq_printf(seq, "  %lu blocks per transaction\n",
-	    s->stats->u.run.rs_blocks / s->stats->ts_tid);
+	    s->stats->run.rs_blocks / s->stats->ts_tid);
 	seq_printf(seq, "  %lu logged blocks per transaction\n",
-	    s->stats->u.run.rs_blocks_logged / s->stats->ts_tid);
+	    s->stats->run.rs_blocks_logged / s->stats->ts_tid);
 	return 0;
 }
 
@@ -931,8 +784,6 @@ static void jbd2_stats_proc_init(journal_t *journal)
 {
 	journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats);
 	if (journal->j_proc_entry) {
-		proc_create_data("history", S_IRUGO, journal->j_proc_entry,
-				 &jbd2_seq_history_fops, journal);
 		proc_create_data("info", S_IRUGO, journal->j_proc_entry,
 				 &jbd2_seq_info_fops, journal);
 	}
@@ -941,27 +792,9 @@ static void jbd2_stats_proc_init(journal_t *journal)
 static void jbd2_stats_proc_exit(journal_t *journal)
 {
 	remove_proc_entry("info", journal->j_proc_entry);
-	remove_proc_entry("history", journal->j_proc_entry);
 	remove_proc_entry(journal->j_devname, proc_jbd2_stats);
 }
 
-static void journal_init_stats(journal_t *journal)
-{
-	int size;
-
-	if (!proc_jbd2_stats)
-		return;
-
-	journal->j_history_max = 100;
-	size = sizeof(struct transaction_stats_s) * journal->j_history_max;
-	journal->j_history = kzalloc(size, GFP_KERNEL);
-	if (!journal->j_history) {
-		journal->j_history_max = 0;
-		return;
-	}
-	spin_lock_init(&journal->j_history_lock);
-}
-
 /*
  * Management for journal control blocks: functions to create and
  * destroy journal_t structures, and to initialise and read existing
@@ -1006,7 +839,7 @@ static journal_t * journal_init_common (void)
 		goto fail;
 	}
 
-	journal_init_stats(journal);
+	spin_lock_init(&journal->j_history_lock);
 
 	return journal;
 fail:
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 52695d3dfd0b..f1011f7f3d41 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -464,9 +464,9 @@ struct handle_s
  */
 struct transaction_chp_stats_s {
 	unsigned long		cs_chp_time;
-	unsigned long		cs_forced_to_close;
-	unsigned long		cs_written;
-	unsigned long		cs_dropped;
+	__u32			cs_forced_to_close;
+	__u32			cs_written;
+	__u32			cs_dropped;
 };
 
 /* The transaction_t type is the guts of the journaling mechanism.  It
@@ -668,23 +668,16 @@ struct transaction_run_stats_s {
 	unsigned long		rs_flushing;
 	unsigned long		rs_logging;
 
-	unsigned long		rs_handle_count;
-	unsigned long		rs_blocks;
-	unsigned long		rs_blocks_logged;
+	__u32			rs_handle_count;
+	__u32			rs_blocks;
+	__u32			rs_blocks_logged;
 };
 
 struct transaction_stats_s {
-	int 			ts_type;
 	unsigned long		ts_tid;
-	union {
-		struct transaction_run_stats_s run;
-		struct transaction_chp_stats_s chp;
-	} u;
+	struct transaction_run_stats_s run;
 };
 
-#define JBD2_STATS_RUN		1
-#define JBD2_STATS_CHECKPOINT	2
-
 static inline unsigned long
 jbd2_time_diff(unsigned long start, unsigned long end)
 {
@@ -988,12 +981,6 @@ struct journal_s
 	/*
 	 * Journal statistics
 	 */
-	struct transaction_stats_s *j_history;
-	int			j_history_max;
-	int			j_history_cur;
-	/*
-	 * Protect the transactions statistics history
-	 */
 	spinlock_t		j_history_lock;
 	struct proc_dir_entry	*j_proc_entry;
 	struct transaction_stats_s j_stats;
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index b851f0b4701c..3c60b75adb9e 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -7,6 +7,9 @@
 #include <linux/jbd2.h>
 #include <linux/tracepoint.h>
 
+struct transaction_chp_stats_s;
+struct transaction_run_stats_s;
+
 TRACE_EVENT(jbd2_checkpoint,
 
 	TP_PROTO(journal_t *journal, int result),
@@ -162,6 +165,81 @@ TRACE_EVENT(jbd2_submit_inode_data,
 		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
 );
 
+TRACE_EVENT(jbd2_run_stats,
+	TP_PROTO(dev_t dev, unsigned long tid,
+		 struct transaction_run_stats_s *stats),
+
+	TP_ARGS(dev, tid, stats),
+
+	TP_STRUCT__entry(
+		__field(		dev_t,	dev		)
+		__field(	unsigned long,	tid		)
+		__field(	unsigned long,	wait		)
+		__field(	unsigned long,	running		)
+		__field(	unsigned long,	locked		)
+		__field(	unsigned long,	flushing	)
+		__field(	unsigned long,	logging		)
+		__field(		__u32,	handle_count	)
+		__field(		__u32,	blocks		)
+		__field(		__u32,	blocks_logged	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= dev;
+		__entry->tid		= tid;
+		__entry->wait		= stats->rs_wait;
+		__entry->running	= stats->rs_running;
+		__entry->locked		= stats->rs_locked;
+		__entry->flushing	= stats->rs_flushing;
+		__entry->logging	= stats->rs_logging;
+		__entry->handle_count	= stats->rs_handle_count;
+		__entry->blocks		= stats->rs_blocks;
+		__entry->blocks_logged	= stats->rs_blocks_logged;
+	),
+
+	TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u "
+		  "logging %u handle_count %u blocks %u blocks_logged %u",
+		  jbd2_dev_to_name(__entry->dev), __entry->tid,
+		  jiffies_to_msecs(__entry->wait),
+		  jiffies_to_msecs(__entry->running),
+		  jiffies_to_msecs(__entry->locked),
+		  jiffies_to_msecs(__entry->flushing),
+		  jiffies_to_msecs(__entry->logging),
+		  __entry->handle_count, __entry->blocks,
+		  __entry->blocks_logged)
+);
+
+TRACE_EVENT(jbd2_checkpoint_stats,
+	TP_PROTO(dev_t dev, unsigned long tid,
+		 struct transaction_chp_stats_s *stats),
+
+	TP_ARGS(dev, tid, stats),
+
+	TP_STRUCT__entry(
+		__field(		dev_t,	dev		)
+		__field(	unsigned long,	tid		)
+		__field(	unsigned long,	chp_time	)
+		__field(		__u32,	forced_to_close	)
+		__field(		__u32,	written		)
+		__field(		__u32,	dropped		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= dev;
+		__entry->tid		= tid;
+		__entry->chp_time	= stats->cs_chp_time;
+		__entry->forced_to_close= stats->cs_forced_to_close;
+		__entry->written	= stats->cs_written;
+		__entry->dropped	= stats->cs_dropped;
+	),
+
+	TP_printk("dev %s tid %lu chp_time %u forced_to_close %u "
+		  "written %u dropped %u",
+		  jbd2_dev_to_name(__entry->dev), __entry->tid,
+		  jiffies_to_msecs(__entry->chp_time),
+		  __entry->forced_to_close, __entry->written, __entry->dropped)
+);
+
 #endif /* _TRACE_JBD2_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 0ef122494020521309be855bfdeeb41f34bf8c94 Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone@redhat.com>
Date: Wed, 30 Sep 2009 00:51:22 -0400
Subject: ext4: Add a stub for mpage_da_data in the trace header

The tracepoint ext4_da_write_pages has a struct mpage_da_data*
parameter, but that struct is only defined in fs/ext4/ext4.h.  This
patch adds a forward declaration for that struct, so this tracepoint
header can still be used by tools like SystemTap.

This is a continuation of the fix in commit 3661d286.

http://sourceware.org/bugzilla/show_bug.cgi?id=10703

Signed-off-by: Josh Stone <jistone@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/trace/events/ext4.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index b8320256dc5d..d09550bf3f95 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -11,6 +11,7 @@ struct ext4_allocation_context;
 struct ext4_allocation_request;
 struct ext4_prealloc_space;
 struct ext4_inode_info;
+struct mpage_da_data;
 
 #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
 
-- 
cgit v1.2.3


From 9fcd66e572b94974365a9119b073e0a43d496eb7 Mon Sep 17 00:00:00 2001
From: Maxime Bizon <mbizon@freebox.fr>
Date: Fri, 18 Sep 2009 13:04:58 +0200
Subject: MIPS: BCM63xx: Add serial driver for bcm63xx integrated UART.

Signed-off-by: Maxime Bizon <mbizon@freebox.fr>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/bcm63xx/Makefile                         |   2 +-
 arch/mips/bcm63xx/boards/board_bcm963xx.c          |   3 +
 arch/mips/bcm63xx/dev-uart.c                       |  41 +
 .../include/asm/mach-bcm63xx/bcm63xx_dev_uart.h    |   6 +
 drivers/serial/Kconfig                             |  19 +
 drivers/serial/Makefile                            |   1 +
 drivers/serial/bcm63xx_uart.c                      | 890 +++++++++++++++++++++
 include/linux/serial_core.h                        |   3 +
 8 files changed, 964 insertions(+), 1 deletion(-)
 create mode 100644 arch/mips/bcm63xx/dev-uart.c
 create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_uart.h
 create mode 100644 drivers/serial/bcm63xx_uart.c

(limited to 'include')

diff --git a/arch/mips/bcm63xx/Makefile b/arch/mips/bcm63xx/Makefile
index aaa585cf26e3..cff75de8449b 100644
--- a/arch/mips/bcm63xx/Makefile
+++ b/arch/mips/bcm63xx/Makefile
@@ -1,5 +1,5 @@
 obj-y		+= clk.o cpu.o cs.o gpio.o irq.o prom.o setup.o timer.o \
-		   dev-dsp.o dev-enet.o
+		   dev-dsp.o dev-enet.o dev-uart.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-y		+= boards/
diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c
index 12add0ca9fed..5a327f3a7167 100644
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -23,6 +23,7 @@
 #include <bcm63xx_dev_pci.h>
 #include <bcm63xx_dev_enet.h>
 #include <bcm63xx_dev_dsp.h>
+#include <bcm63xx_dev_uart.h>
 #include <board_bcm963xx.h>
 
 #define PFX	"board_bcm963xx: "
@@ -792,6 +793,8 @@ int __init board_register_devices(void)
 {
 	u32 val;
 
+	bcm63xx_uart_register();
+
 	if (board.has_enet0 &&
 	    !board_get_mac_address(board.enet0.mac_addr))
 		bcm63xx_enet_register(0, &board.enet0);
diff --git a/arch/mips/bcm63xx/dev-uart.c b/arch/mips/bcm63xx/dev-uart.c
new file mode 100644
index 000000000000..5f3d89c4a988
--- /dev/null
+++ b/arch/mips/bcm63xx/dev-uart.c
@@ -0,0 +1,41 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2008 Maxime Bizon <mbizon@freebox.fr>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <bcm63xx_cpu.h>
+#include <bcm63xx_dev_uart.h>
+
+static struct resource uart_resources[] = {
+	{
+		.start		= -1, /* filled at runtime */
+		.end		= -1, /* filled at runtime */
+		.flags		= IORESOURCE_MEM,
+	},
+	{
+		.start		= -1, /* filled at runtime */
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device bcm63xx_uart_device = {
+	.name		= "bcm63xx_uart",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(uart_resources),
+	.resource	= uart_resources,
+};
+
+int __init bcm63xx_uart_register(void)
+{
+	uart_resources[0].start = bcm63xx_regset_address(RSET_UART0);
+	uart_resources[0].end = uart_resources[0].start;
+	uart_resources[0].end += RSET_UART_SIZE - 1;
+	uart_resources[1].start = bcm63xx_get_irq_number(IRQ_UART0);
+	return platform_device_register(&bcm63xx_uart_device);
+}
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_uart.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_uart.h
new file mode 100644
index 000000000000..bf348f573bbc
--- /dev/null
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_uart.h
@@ -0,0 +1,6 @@
+#ifndef BCM63XX_DEV_UART_H_
+#define BCM63XX_DEV_UART_H_
+
+int bcm63xx_uart_register(void);
+
+#endif /* BCM63XX_DEV_UART_H_ */
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 03422ce878cf..e70712044a7e 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1458,4 +1458,23 @@ config SERIAL_TIMBERDALE
 	---help---
 	Add support for UART controller on timberdale.
 
+config SERIAL_BCM63XX
+	tristate "bcm63xx serial port support"
+	select SERIAL_CORE
+	depends on BCM63XX
+	help
+	  If you have a bcm63xx CPU, you can enable its onboard
+	  serial port by enabling this options.
+
+          To compile this driver as a module, choose M here: the
+          module will be called bcm963xx_uart.
+
+config SERIAL_BCM63XX_CONSOLE
+	bool "Console on bcm63xx serial port"
+	depends on SERIAL_BCM63XX=y
+	select SERIAL_CORE_CONSOLE
+	help
+	  If you have enabled the serial port on the bcm63xx CPU
+	  you can make it the console by answering Y to this option.
+
 endmenu
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index 97f6fcc8b432..d21d5dd5d048 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_SERIAL_CLPS711X) += clps711x.o
 obj-$(CONFIG_SERIAL_PXA) += pxa.o
 obj-$(CONFIG_SERIAL_PNX8XXX) += pnx8xxx_uart.o
 obj-$(CONFIG_SERIAL_SA1100) += sa1100.o
+obj-$(CONFIG_SERIAL_BCM63XX) += bcm63xx_uart.o
 obj-$(CONFIG_SERIAL_BFIN) += bfin_5xx.o
 obj-$(CONFIG_SERIAL_BFIN_SPORT) += bfin_sport_uart.o
 obj-$(CONFIG_SERIAL_SAMSUNG) += samsung.o
diff --git a/drivers/serial/bcm63xx_uart.c b/drivers/serial/bcm63xx_uart.c
new file mode 100644
index 000000000000..beddaa6e9069
--- /dev/null
+++ b/drivers/serial/bcm63xx_uart.c
@@ -0,0 +1,890 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Derived from many drivers using generic_serial interface.
+ *
+ * Copyright (C) 2008 Maxime Bizon <mbizon@freebox.fr>
+ *
+ *  Serial driver for BCM63xx integrated UART.
+ *
+ * Hardware flow control was _not_ tested since I only have RX/TX on
+ * my board.
+ */
+
+#if defined(CONFIG_SERIAL_BCM63XX_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+#define SUPPORT_SYSRQ
+#endif
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/clk.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/sysrq.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+
+#include <bcm63xx_clk.h>
+#include <bcm63xx_irq.h>
+#include <bcm63xx_regs.h>
+#include <bcm63xx_io.h>
+
+#define BCM63XX_NR_UARTS	1
+
+static struct uart_port ports[BCM63XX_NR_UARTS];
+
+/*
+ * rx interrupt mask / stat
+ *
+ * mask:
+ *  - rx fifo full
+ *  - rx fifo above threshold
+ *  - rx fifo not empty for too long
+ */
+#define UART_RX_INT_MASK	(UART_IR_MASK(UART_IR_RXOVER) |		\
+				UART_IR_MASK(UART_IR_RXTHRESH) |	\
+				UART_IR_MASK(UART_IR_RXTIMEOUT))
+
+#define UART_RX_INT_STAT	(UART_IR_STAT(UART_IR_RXOVER) |		\
+				UART_IR_STAT(UART_IR_RXTHRESH) |	\
+				UART_IR_STAT(UART_IR_RXTIMEOUT))
+
+/*
+ * tx interrupt mask / stat
+ *
+ * mask:
+ * - tx fifo empty
+ * - tx fifo below threshold
+ */
+#define UART_TX_INT_MASK	(UART_IR_MASK(UART_IR_TXEMPTY) |	\
+				UART_IR_MASK(UART_IR_TXTRESH))
+
+#define UART_TX_INT_STAT	(UART_IR_STAT(UART_IR_TXEMPTY) |	\
+				UART_IR_STAT(UART_IR_TXTRESH))
+
+/*
+ * external input interrupt
+ *
+ * mask: any edge on CTS, DCD
+ */
+#define UART_EXTINP_INT_MASK	(UART_EXTINP_IRMASK(UART_EXTINP_IR_CTS) | \
+				 UART_EXTINP_IRMASK(UART_EXTINP_IR_DCD))
+
+/*
+ * handy uart register accessor
+ */
+static inline unsigned int bcm_uart_readl(struct uart_port *port,
+					 unsigned int offset)
+{
+	return bcm_readl(port->membase + offset);
+}
+
+static inline void bcm_uart_writel(struct uart_port *port,
+				  unsigned int value, unsigned int offset)
+{
+	bcm_writel(value, port->membase + offset);
+}
+
+/*
+ * serial core request to check if uart tx fifo is empty
+ */
+static unsigned int bcm_uart_tx_empty(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_IR_REG);
+	return (val & UART_IR_STAT(UART_IR_TXEMPTY)) ? 1 : 0;
+}
+
+/*
+ * serial core request to set RTS and DTR pin state and loopback mode
+ */
+static void bcm_uart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_MCTL_REG);
+	val &= ~(UART_MCTL_DTR_MASK | UART_MCTL_RTS_MASK);
+	/* invert of written value is reflected on the pin */
+	if (!(mctrl & TIOCM_DTR))
+		val |= UART_MCTL_DTR_MASK;
+	if (!(mctrl & TIOCM_RTS))
+		val |= UART_MCTL_RTS_MASK;
+	bcm_uart_writel(port, val, UART_MCTL_REG);
+
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	if (mctrl & TIOCM_LOOP)
+		val |= UART_CTL_LOOPBACK_MASK;
+	else
+		val &= ~UART_CTL_LOOPBACK_MASK;
+	bcm_uart_writel(port, val, UART_CTL_REG);
+}
+
+/*
+ * serial core request to return RI, CTS, DCD and DSR pin state
+ */
+static unsigned int bcm_uart_get_mctrl(struct uart_port *port)
+{
+	unsigned int val, mctrl;
+
+	mctrl = 0;
+	val = bcm_uart_readl(port, UART_EXTINP_REG);
+	if (val & UART_EXTINP_RI_MASK)
+		mctrl |= TIOCM_RI;
+	if (val & UART_EXTINP_CTS_MASK)
+		mctrl |= TIOCM_CTS;
+	if (val & UART_EXTINP_DCD_MASK)
+		mctrl |= TIOCM_CD;
+	if (val & UART_EXTINP_DSR_MASK)
+		mctrl |= TIOCM_DSR;
+	return mctrl;
+}
+
+/*
+ * serial core request to disable tx ASAP (used for flow control)
+ */
+static void bcm_uart_stop_tx(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	val &= ~(UART_CTL_TXEN_MASK);
+	bcm_uart_writel(port, val, UART_CTL_REG);
+
+	val = bcm_uart_readl(port, UART_IR_REG);
+	val &= ~UART_TX_INT_MASK;
+	bcm_uart_writel(port, val, UART_IR_REG);
+}
+
+/*
+ * serial core request to (re)enable tx
+ */
+static void bcm_uart_start_tx(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_IR_REG);
+	val |= UART_TX_INT_MASK;
+	bcm_uart_writel(port, val, UART_IR_REG);
+
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	val |= UART_CTL_TXEN_MASK;
+	bcm_uart_writel(port, val, UART_CTL_REG);
+}
+
+/*
+ * serial core request to stop rx, called before port shutdown
+ */
+static void bcm_uart_stop_rx(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_IR_REG);
+	val &= ~UART_RX_INT_MASK;
+	bcm_uart_writel(port, val, UART_IR_REG);
+}
+
+/*
+ * serial core request to enable modem status interrupt reporting
+ */
+static void bcm_uart_enable_ms(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_IR_REG);
+	val |= UART_IR_MASK(UART_IR_EXTIP);
+	bcm_uart_writel(port, val, UART_IR_REG);
+}
+
+/*
+ * serial core request to start/stop emitting break char
+ */
+static void bcm_uart_break_ctl(struct uart_port *port, int ctl)
+{
+	unsigned long flags;
+	unsigned int val;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	if (ctl)
+		val |= UART_CTL_XMITBRK_MASK;
+	else
+		val &= ~UART_CTL_XMITBRK_MASK;
+	bcm_uart_writel(port, val, UART_CTL_REG);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/*
+ * return port type in string format
+ */
+static const char *bcm_uart_type(struct uart_port *port)
+{
+	return (port->type == PORT_BCM63XX) ? "bcm63xx_uart" : NULL;
+}
+
+/*
+ * read all chars in rx fifo and send them to core
+ */
+static void bcm_uart_do_rx(struct uart_port *port)
+{
+	struct tty_struct *tty;
+	unsigned int max_count;
+
+	/* limit number of char read in interrupt, should not be
+	 * higher than fifo size anyway since we're much faster than
+	 * serial port */
+	max_count = 32;
+	tty = port->info->port.tty;
+	do {
+		unsigned int iestat, c, cstat;
+		char flag;
+
+		/* get overrun/fifo empty information from ier
+		 * register */
+		iestat = bcm_uart_readl(port, UART_IR_REG);
+		if (!(iestat & UART_IR_STAT(UART_IR_RXNOTEMPTY)))
+			break;
+
+		cstat = c = bcm_uart_readl(port, UART_FIFO_REG);
+		port->icount.rx++;
+		flag = TTY_NORMAL;
+		c &= 0xff;
+
+		if (unlikely((cstat & UART_FIFO_ANYERR_MASK))) {
+			/* do stats first */
+			if (cstat & UART_FIFO_BRKDET_MASK) {
+				port->icount.brk++;
+				if (uart_handle_break(port))
+					continue;
+			}
+
+			if (cstat & UART_FIFO_PARERR_MASK)
+				port->icount.parity++;
+			if (cstat & UART_FIFO_FRAMEERR_MASK)
+				port->icount.frame++;
+
+			/* update flag wrt read_status_mask */
+			cstat &= port->read_status_mask;
+			if (cstat & UART_FIFO_BRKDET_MASK)
+				flag = TTY_BREAK;
+			if (cstat & UART_FIFO_FRAMEERR_MASK)
+				flag = TTY_FRAME;
+			if (cstat & UART_FIFO_PARERR_MASK)
+				flag = TTY_PARITY;
+		}
+
+		if (uart_handle_sysrq_char(port, c))
+			continue;
+
+		if (unlikely(iestat & UART_IR_STAT(UART_IR_RXOVER))) {
+			port->icount.overrun++;
+			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		}
+
+		if ((cstat & port->ignore_status_mask) == 0)
+			tty_insert_flip_char(tty, c, flag);
+
+	} while (--max_count);
+
+	tty_flip_buffer_push(tty);
+}
+
+/*
+ * fill tx fifo with chars to send, stop when fifo is about to be full
+ * or when all chars have been sent.
+ */
+static void bcm_uart_do_tx(struct uart_port *port)
+{
+	struct circ_buf *xmit;
+	unsigned int val, max_count;
+
+	if (port->x_char) {
+		bcm_uart_writel(port, port->x_char, UART_FIFO_REG);
+		port->icount.tx++;
+		port->x_char = 0;
+		return;
+	}
+
+	if (uart_tx_stopped(port)) {
+		bcm_uart_stop_tx(port);
+		return;
+	}
+
+	xmit = &port->info->xmit;
+	if (uart_circ_empty(xmit))
+		goto txq_empty;
+
+	val = bcm_uart_readl(port, UART_MCTL_REG);
+	val = (val & UART_MCTL_TXFIFOFILL_MASK) >> UART_MCTL_TXFIFOFILL_SHIFT;
+	max_count = port->fifosize - val;
+
+	while (max_count--) {
+		unsigned int c;
+
+		c = xmit->buf[xmit->tail];
+		bcm_uart_writel(port, c, UART_FIFO_REG);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+		if (uart_circ_empty(xmit))
+			break;
+	}
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	if (uart_circ_empty(xmit))
+		goto txq_empty;
+	return;
+
+txq_empty:
+	/* nothing to send, disable transmit interrupt */
+	val = bcm_uart_readl(port, UART_IR_REG);
+	val &= ~UART_TX_INT_MASK;
+	bcm_uart_writel(port, val, UART_IR_REG);
+	return;
+}
+
+/*
+ * process uart interrupt
+ */
+static irqreturn_t bcm_uart_interrupt(int irq, void *dev_id)
+{
+	struct uart_port *port;
+	unsigned int irqstat;
+
+	port = dev_id;
+	spin_lock(&port->lock);
+
+	irqstat = bcm_uart_readl(port, UART_IR_REG);
+	if (irqstat & UART_RX_INT_STAT)
+		bcm_uart_do_rx(port);
+
+	if (irqstat & UART_TX_INT_STAT)
+		bcm_uart_do_tx(port);
+
+	if (irqstat & UART_IR_MASK(UART_IR_EXTIP)) {
+		unsigned int estat;
+
+		estat = bcm_uart_readl(port, UART_EXTINP_REG);
+		if (estat & UART_EXTINP_IRSTAT(UART_EXTINP_IR_CTS))
+			uart_handle_cts_change(port,
+					       estat & UART_EXTINP_CTS_MASK);
+		if (estat & UART_EXTINP_IRSTAT(UART_EXTINP_IR_DCD))
+			uart_handle_dcd_change(port,
+					       estat & UART_EXTINP_DCD_MASK);
+	}
+
+	spin_unlock(&port->lock);
+	return IRQ_HANDLED;
+}
+
+/*
+ * enable rx & tx operation on uart
+ */
+static void bcm_uart_enable(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	val |= (UART_CTL_BRGEN_MASK | UART_CTL_TXEN_MASK | UART_CTL_RXEN_MASK);
+	bcm_uart_writel(port, val, UART_CTL_REG);
+}
+
+/*
+ * disable rx & tx operation on uart
+ */
+static void bcm_uart_disable(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	val &= ~(UART_CTL_BRGEN_MASK | UART_CTL_TXEN_MASK |
+		 UART_CTL_RXEN_MASK);
+	bcm_uart_writel(port, val, UART_CTL_REG);
+}
+
+/*
+ * clear all unread data in rx fifo and unsent data in tx fifo
+ */
+static void bcm_uart_flush(struct uart_port *port)
+{
+	unsigned int val;
+
+	/* empty rx and tx fifo */
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	val |= UART_CTL_RSTRXFIFO_MASK | UART_CTL_RSTTXFIFO_MASK;
+	bcm_uart_writel(port, val, UART_CTL_REG);
+
+	/* read any pending char to make sure all irq status are
+	 * cleared */
+	(void)bcm_uart_readl(port, UART_FIFO_REG);
+}
+
+/*
+ * serial core request to initialize uart and start rx operation
+ */
+static int bcm_uart_startup(struct uart_port *port)
+{
+	unsigned int val;
+	int ret;
+
+	/* mask all irq and flush port */
+	bcm_uart_disable(port);
+	bcm_uart_writel(port, 0, UART_IR_REG);
+	bcm_uart_flush(port);
+
+	/* clear any pending external input interrupt */
+	(void)bcm_uart_readl(port, UART_EXTINP_REG);
+
+	/* set rx/tx fifo thresh to fifo half size */
+	val = bcm_uart_readl(port, UART_MCTL_REG);
+	val &= ~(UART_MCTL_RXFIFOTHRESH_MASK | UART_MCTL_TXFIFOTHRESH_MASK);
+	val |= (port->fifosize / 2) << UART_MCTL_RXFIFOTHRESH_SHIFT;
+	val |= (port->fifosize / 2) << UART_MCTL_TXFIFOTHRESH_SHIFT;
+	bcm_uart_writel(port, val, UART_MCTL_REG);
+
+	/* set rx fifo timeout to 1 char time */
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	val &= ~UART_CTL_RXTMOUTCNT_MASK;
+	val |= 1 << UART_CTL_RXTMOUTCNT_SHIFT;
+	bcm_uart_writel(port, val, UART_CTL_REG);
+
+	/* report any edge on dcd and cts */
+	val = UART_EXTINP_INT_MASK;
+	val |= UART_EXTINP_DCD_NOSENSE_MASK;
+	val |= UART_EXTINP_CTS_NOSENSE_MASK;
+	bcm_uart_writel(port, val, UART_EXTINP_REG);
+
+	/* register irq and enable rx interrupts */
+	ret = request_irq(port->irq, bcm_uart_interrupt, 0,
+			  bcm_uart_type(port), port);
+	if (ret)
+		return ret;
+	bcm_uart_writel(port, UART_RX_INT_MASK, UART_IR_REG);
+	bcm_uart_enable(port);
+	return 0;
+}
+
+/*
+ * serial core request to flush & disable uart
+ */
+static void bcm_uart_shutdown(struct uart_port *port)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	bcm_uart_writel(port, 0, UART_IR_REG);
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	bcm_uart_disable(port);
+	bcm_uart_flush(port);
+	free_irq(port->irq, port);
+}
+
+/*
+ * serial core request to change current uart setting
+ */
+static void bcm_uart_set_termios(struct uart_port *port,
+				 struct ktermios *new,
+				 struct ktermios *old)
+{
+	unsigned int ctl, baud, quot, ier;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* disable uart while changing speed */
+	bcm_uart_disable(port);
+	bcm_uart_flush(port);
+
+	/* update Control register */
+	ctl = bcm_uart_readl(port, UART_CTL_REG);
+	ctl &= ~UART_CTL_BITSPERSYM_MASK;
+
+	switch (new->c_cflag & CSIZE) {
+	case CS5:
+		ctl |= (0 << UART_CTL_BITSPERSYM_SHIFT);
+		break;
+	case CS6:
+		ctl |= (1 << UART_CTL_BITSPERSYM_SHIFT);
+		break;
+	case CS7:
+		ctl |= (2 << UART_CTL_BITSPERSYM_SHIFT);
+		break;
+	default:
+		ctl |= (3 << UART_CTL_BITSPERSYM_SHIFT);
+		break;
+	}
+
+	ctl &= ~UART_CTL_STOPBITS_MASK;
+	if (new->c_cflag & CSTOPB)
+		ctl |= UART_CTL_STOPBITS_2;
+	else
+		ctl |= UART_CTL_STOPBITS_1;
+
+	ctl &= ~(UART_CTL_RXPAREN_MASK | UART_CTL_TXPAREN_MASK);
+	if (new->c_cflag & PARENB)
+		ctl |= (UART_CTL_RXPAREN_MASK | UART_CTL_TXPAREN_MASK);
+	ctl &= ~(UART_CTL_RXPAREVEN_MASK | UART_CTL_TXPAREVEN_MASK);
+	if (new->c_cflag & PARODD)
+		ctl |= (UART_CTL_RXPAREVEN_MASK | UART_CTL_TXPAREVEN_MASK);
+	bcm_uart_writel(port, ctl, UART_CTL_REG);
+
+	/* update Baudword register */
+	baud = uart_get_baud_rate(port, new, old, 0, port->uartclk / 16);
+	quot = uart_get_divisor(port, baud) - 1;
+	bcm_uart_writel(port, quot, UART_BAUD_REG);
+
+	/* update Interrupt register */
+	ier = bcm_uart_readl(port, UART_IR_REG);
+
+	ier &= ~UART_IR_MASK(UART_IR_EXTIP);
+	if (UART_ENABLE_MS(port, new->c_cflag))
+		ier |= UART_IR_MASK(UART_IR_EXTIP);
+
+	bcm_uart_writel(port, ier, UART_IR_REG);
+
+	/* update read/ignore mask */
+	port->read_status_mask = UART_FIFO_VALID_MASK;
+	if (new->c_iflag & INPCK) {
+		port->read_status_mask |= UART_FIFO_FRAMEERR_MASK;
+		port->read_status_mask |= UART_FIFO_PARERR_MASK;
+	}
+	if (new->c_iflag & (BRKINT))
+		port->read_status_mask |= UART_FIFO_BRKDET_MASK;
+
+	port->ignore_status_mask = 0;
+	if (new->c_iflag & IGNPAR)
+		port->ignore_status_mask |= UART_FIFO_PARERR_MASK;
+	if (new->c_iflag & IGNBRK)
+		port->ignore_status_mask |= UART_FIFO_BRKDET_MASK;
+	if (!(new->c_cflag & CREAD))
+		port->ignore_status_mask |= UART_FIFO_VALID_MASK;
+
+	uart_update_timeout(port, new->c_cflag, baud);
+	bcm_uart_enable(port);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/*
+ * serial core request to claim uart iomem
+ */
+static int bcm_uart_request_port(struct uart_port *port)
+{
+	unsigned int size;
+
+	size = RSET_UART_SIZE;
+	if (!request_mem_region(port->mapbase, size, "bcm63xx")) {
+		dev_err(port->dev, "Memory region busy\n");
+		return -EBUSY;
+	}
+
+	port->membase = ioremap(port->mapbase, size);
+	if (!port->membase) {
+		dev_err(port->dev, "Unable to map registers\n");
+		release_mem_region(port->mapbase, size);
+		return -EBUSY;
+	}
+	return 0;
+}
+
+/*
+ * serial core request to release uart iomem
+ */
+static void bcm_uart_release_port(struct uart_port *port)
+{
+	release_mem_region(port->mapbase, RSET_UART_SIZE);
+	iounmap(port->membase);
+}
+
+/*
+ * serial core request to do any port required autoconfiguration
+ */
+static void bcm_uart_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE) {
+		if (bcm_uart_request_port(port))
+			return;
+		port->type = PORT_BCM63XX;
+	}
+}
+
+/*
+ * serial core request to check that port information in serinfo are
+ * suitable
+ */
+static int bcm_uart_verify_port(struct uart_port *port,
+				struct serial_struct *serinfo)
+{
+	if (port->type != PORT_BCM63XX)
+		return -EINVAL;
+	if (port->irq != serinfo->irq)
+		return -EINVAL;
+	if (port->iotype != serinfo->io_type)
+		return -EINVAL;
+	if (port->mapbase != (unsigned long)serinfo->iomem_base)
+		return -EINVAL;
+	return 0;
+}
+
+/* serial core callbacks */
+static struct uart_ops bcm_uart_ops = {
+	.tx_empty	= bcm_uart_tx_empty,
+	.get_mctrl	= bcm_uart_get_mctrl,
+	.set_mctrl	= bcm_uart_set_mctrl,
+	.start_tx	= bcm_uart_start_tx,
+	.stop_tx	= bcm_uart_stop_tx,
+	.stop_rx	= bcm_uart_stop_rx,
+	.enable_ms	= bcm_uart_enable_ms,
+	.break_ctl	= bcm_uart_break_ctl,
+	.startup	= bcm_uart_startup,
+	.shutdown	= bcm_uart_shutdown,
+	.set_termios	= bcm_uart_set_termios,
+	.type		= bcm_uart_type,
+	.release_port	= bcm_uart_release_port,
+	.request_port	= bcm_uart_request_port,
+	.config_port	= bcm_uart_config_port,
+	.verify_port	= bcm_uart_verify_port,
+};
+
+
+
+#ifdef CONFIG_SERIAL_BCM63XX_CONSOLE
+static inline void wait_for_xmitr(struct uart_port *port)
+{
+	unsigned int tmout;
+
+	/* Wait up to 10ms for the character(s) to be sent. */
+	tmout = 10000;
+	while (--tmout) {
+		unsigned int val;
+
+		val = bcm_uart_readl(port, UART_IR_REG);
+		if (val & UART_IR_STAT(UART_IR_TXEMPTY))
+			break;
+		udelay(1);
+	}
+
+	/* Wait up to 1s for flow control if necessary */
+	if (port->flags & UPF_CONS_FLOW) {
+		tmout = 1000000;
+		while (--tmout) {
+			unsigned int val;
+
+			val = bcm_uart_readl(port, UART_EXTINP_REG);
+			if (val & UART_EXTINP_CTS_MASK)
+				break;
+			udelay(1);
+		}
+	}
+}
+
+/*
+ * output given char
+ */
+static void bcm_console_putchar(struct uart_port *port, int ch)
+{
+	wait_for_xmitr(port);
+	bcm_uart_writel(port, ch, UART_FIFO_REG);
+}
+
+/*
+ * console core request to output given string
+ */
+static void bcm_console_write(struct console *co, const char *s,
+			      unsigned int count)
+{
+	struct uart_port *port;
+	unsigned long flags;
+	int locked;
+
+	port = &ports[co->index];
+
+	local_irq_save(flags);
+	if (port->sysrq) {
+		/* bcm_uart_interrupt() already took the lock */
+		locked = 0;
+	} else if (oops_in_progress) {
+		locked = spin_trylock(&port->lock);
+	} else {
+		spin_lock(&port->lock);
+		locked = 1;
+	}
+
+	/* call helper to deal with \r\n */
+	uart_console_write(port, s, count, bcm_console_putchar);
+
+	/* and wait for char to be transmitted */
+	wait_for_xmitr(port);
+
+	if (locked)
+		spin_unlock(&port->lock);
+	local_irq_restore(flags);
+}
+
+/*
+ * console core request to setup given console, find matching uart
+ * port and setup it.
+ */
+static int bcm_console_setup(struct console *co, char *options)
+{
+	struct uart_port *port;
+	int baud = 9600;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (co->index < 0 || co->index >= BCM63XX_NR_UARTS)
+		return -EINVAL;
+	port = &ports[co->index];
+	if (!port->membase)
+		return -ENODEV;
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct uart_driver bcm_uart_driver;
+
+static struct console bcm63xx_console = {
+	.name		= "ttyS",
+	.write		= bcm_console_write,
+	.device		= uart_console_device,
+	.setup		= bcm_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= -1,
+	.data		= &bcm_uart_driver,
+};
+
+static int __init bcm63xx_console_init(void)
+{
+	register_console(&bcm63xx_console);
+	return 0;
+}
+
+console_initcall(bcm63xx_console_init);
+
+#define BCM63XX_CONSOLE	(&bcm63xx_console)
+#else
+#define BCM63XX_CONSOLE	NULL
+#endif /* CONFIG_SERIAL_BCM63XX_CONSOLE */
+
+static struct uart_driver bcm_uart_driver = {
+	.owner		= THIS_MODULE,
+	.driver_name	= "bcm63xx_uart",
+	.dev_name	= "ttyS",
+	.major		= TTY_MAJOR,
+	.minor		= 64,
+	.nr		= 1,
+	.cons		= BCM63XX_CONSOLE,
+};
+
+/*
+ * platform driver probe/remove callback
+ */
+static int __devinit bcm_uart_probe(struct platform_device *pdev)
+{
+	struct resource *res_mem, *res_irq;
+	struct uart_port *port;
+	struct clk *clk;
+	int ret;
+
+	if (pdev->id < 0 || pdev->id >= BCM63XX_NR_UARTS)
+		return -EINVAL;
+
+	if (ports[pdev->id].membase)
+		return -EBUSY;
+
+	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res_mem)
+		return -ENODEV;
+
+	res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res_irq)
+		return -ENODEV;
+
+	clk = clk_get(&pdev->dev, "periph");
+	if (IS_ERR(clk))
+		return -ENODEV;
+
+	port = &ports[pdev->id];
+	memset(port, 0, sizeof(*port));
+	port->iotype = UPIO_MEM;
+	port->mapbase = res_mem->start;
+	port->irq = res_irq->start;
+	port->ops = &bcm_uart_ops;
+	port->flags = UPF_BOOT_AUTOCONF;
+	port->dev = &pdev->dev;
+	port->fifosize = 16;
+	port->uartclk = clk_get_rate(clk) / 2;
+	clk_put(clk);
+
+	ret = uart_add_one_port(&bcm_uart_driver, port);
+	if (ret) {
+		kfree(port);
+		return ret;
+	}
+	platform_set_drvdata(pdev, port);
+	return 0;
+}
+
+static int __devexit bcm_uart_remove(struct platform_device *pdev)
+{
+	struct uart_port *port;
+
+	port = platform_get_drvdata(pdev);
+	uart_remove_one_port(&bcm_uart_driver, port);
+	platform_set_drvdata(pdev, NULL);
+	/* mark port as free */
+	ports[pdev->id].membase = 0;
+	return 0;
+}
+
+/*
+ * platform driver stuff
+ */
+static struct platform_driver bcm_uart_platform_driver = {
+	.probe	= bcm_uart_probe,
+	.remove	= __devexit_p(bcm_uart_remove),
+	.driver	= {
+		.owner = THIS_MODULE,
+		.name  = "bcm63xx_uart",
+	},
+};
+
+static int __init bcm_uart_init(void)
+{
+	int ret;
+
+	ret = uart_register_driver(&bcm_uart_driver);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&bcm_uart_platform_driver);
+	if (ret)
+		uart_unregister_driver(&bcm_uart_driver);
+
+	return ret;
+}
+
+static void __exit bcm_uart_exit(void)
+{
+	platform_driver_unregister(&bcm_uart_platform_driver);
+	uart_unregister_driver(&bcm_uart_driver);
+}
+
+module_init(bcm_uart_init);
+module_exit(bcm_uart_exit);
+
+MODULE_AUTHOR("Maxime Bizon <mbizon@freebox.fr>");
+MODULE_DESCRIPTION("Broadcom 63<xx integrated uart driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index fe661afe0713..db532ce288be 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -176,6 +176,9 @@
 /* Qualcomm MSM SoCs */
 #define PORT_MSM	88
 
+/* BCM63xx family SoCs */
+#define PORT_BCM63XX	89
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3


From b7058842c940ad2c08dd829b21e5c92ebe3b8758 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 30 Sep 2009 16:12:20 -0700
Subject: net: Make setsockopt() optlen be unsigned.

This provides safety against negative optlen at the type
level instead of depending upon (sometimes non-trivial)
checks against this sprinkled all over the the place, in
each and every implementation.

Based upon work done by Arjan van de Ven and feedback
from Linus Torvalds.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/ambassador.c           |  8 -----
 drivers/atm/eni.c                  |  2 +-
 drivers/atm/firestream.c           |  2 +-
 drivers/atm/fore200e.c             |  2 +-
 drivers/atm/horizon.c              |  2 +-
 drivers/atm/iphase.c               |  2 +-
 drivers/atm/zatm.c                 |  2 +-
 drivers/isdn/mISDN/socket.c        |  2 +-
 drivers/net/pppol2tp.c             |  2 +-
 include/linux/atmdev.h             |  2 +-
 include/linux/mroute.h             |  4 +--
 include/linux/mroute6.h            |  4 +--
 include/linux/net.h                |  8 ++---
 include/linux/netfilter.h          |  4 +--
 include/net/compat.h               |  4 +--
 include/net/inet_connection_sock.h |  6 ++--
 include/net/ip.h                   |  4 +--
 include/net/ipv6.h                 |  4 +--
 include/net/sctp/structs.h         |  4 +--
 include/net/sock.h                 | 12 ++++----
 include/net/tcp.h                  |  4 +--
 include/net/udp.h                  |  2 +-
 net/atm/common.c                   |  2 +-
 net/atm/common.h                   |  2 +-
 net/atm/pvc.c                      |  2 +-
 net/atm/svc.c                      |  2 +-
 net/ax25/af_ax25.c                 |  2 +-
 net/bluetooth/hci_sock.c           |  2 +-
 net/bluetooth/l2cap.c              |  4 +--
 net/bluetooth/rfcomm/sock.c        |  4 +--
 net/bluetooth/sco.c                |  2 +-
 net/can/raw.c                      |  2 +-
 net/compat.c                       | 12 ++++----
 net/core/sock.c                    |  8 ++---
 net/dccp/dccp.h                    |  4 +--
 net/dccp/proto.c                   | 10 +++---
 net/decnet/af_decnet.c             |  6 ++--
 net/ieee802154/dgram.c             |  2 +-
 net/ieee802154/raw.c               |  2 +-
 net/ipv4/inet_connection_sock.c    |  2 +-
 net/ipv4/ip_sockglue.c             |  6 ++--
 net/ipv4/ipmr.c                    |  2 +-
 net/ipv4/raw.c                     |  6 ++--
 net/ipv4/tcp.c                     |  6 ++--
 net/ipv4/udp.c                     |  6 ++--
 net/ipv4/udp_impl.h                |  4 +--
 net/ipv6/ip6mr.c                   |  2 +-
 net/ipv6/ipv6_sockglue.c           |  6 ++--
 net/ipv6/raw.c                     |  6 ++--
 net/ipv6/udp.c                     |  4 +--
 net/ipv6/udp_impl.h                |  4 +--
 net/ipx/af_ipx.c                   |  2 +-
 net/irda/af_irda.c                 |  2 +-
 net/iucv/af_iucv.c                 |  2 +-
 net/llc/af_llc.c                   |  2 +-
 net/netfilter/nf_sockopt.c         |  4 +--
 net/netlink/af_netlink.c           |  2 +-
 net/netrom/af_netrom.c             |  2 +-
 net/packet/af_packet.c             |  2 +-
 net/phonet/pep.c                   |  2 +-
 net/rds/af_rds.c                   |  2 +-
 net/rose/af_rose.c                 |  2 +-
 net/rxrpc/af_rxrpc.c               |  2 +-
 net/sctp/socket.c                  | 62 ++++++++++++++++++++------------------
 net/socket.c                       |  2 +-
 net/tipc/socket.c                  |  2 +-
 net/x25/af_x25.c                   |  2 +-
 67 files changed, 149 insertions(+), 153 deletions(-)

(limited to 'include')

diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index 703364b52170..66e181345b3a 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -1306,14 +1306,6 @@ static void amb_close (struct atm_vcc * atm_vcc) {
   return;
 }
 
-/********** Set socket options for a VC **********/
-
-// int amb_getsockopt (struct atm_vcc * atm_vcc, int level, int optname, void * optval, int optlen);
-
-/********** Set socket options for a VC **********/
-
-// int amb_setsockopt (struct atm_vcc * atm_vcc, int level, int optname, void * optval, int optlen);
-
 /********** Send **********/
 
 static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 5503bfc8e132..0c3026145443 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -2031,7 +2031,7 @@ static int eni_getsockopt(struct atm_vcc *vcc,int level,int optname,
 
 
 static int eni_setsockopt(struct atm_vcc *vcc,int level,int optname,
-    void __user *optval,int optlen)
+    void __user *optval,unsigned int optlen)
 {
 	return -EINVAL;
 }
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index b119640e1ee9..cd5049af47a9 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -1244,7 +1244,7 @@ static int fs_getsockopt(struct atm_vcc *vcc,int level,int optname,
 
 
 static int fs_setsockopt(struct atm_vcc *vcc,int level,int optname,
-			 void __user *optval,int optlen)
+			 void __user *optval,unsigned int optlen)
 {
 	func_enter ();
 	func_exit ();
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 10f000dbe448..f766cc46b4c4 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -1795,7 +1795,7 @@ fore200e_getsockopt(struct atm_vcc* vcc, int level, int optname, void __user *op
 
 
 static int
-fore200e_setsockopt(struct atm_vcc* vcc, int level, int optname, void __user *optval, int optlen)
+fore200e_setsockopt(struct atm_vcc* vcc, int level, int optname, void __user *optval, unsigned int optlen)
 {
     /* struct fore200e* fore200e = FORE200E_DEV(vcc->dev); */
     
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 01ce241dbeae..4e49021e67ee 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -2590,7 +2590,7 @@ static int hrz_getsockopt (struct atm_vcc * atm_vcc, int level, int optname,
 }
 
 static int hrz_setsockopt (struct atm_vcc * atm_vcc, int level, int optname,
-			   void *optval, int optlen) {
+			   void *optval, unsigned int optlen) {
   hrz_dev * dev = HRZ_DEV(atm_vcc->dev);
   PRINTD (DBG_FLOW|DBG_VCC, "hrz_setsockopt");
   switch (level) {
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index 78c9736c3579..b2c1b37ab2e4 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -2862,7 +2862,7 @@ static int ia_getsockopt(struct atm_vcc *vcc, int level, int optname,
 }  
   
 static int ia_setsockopt(struct atm_vcc *vcc, int level, int optname,   
-	void __user *optval, int optlen)  
+	void __user *optval, unsigned int optlen)  
 {  
 	IF_EVENT(printk(">ia_setsockopt\n");)  
 	return -EINVAL;  
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index 752b1ba81f7e..2e9635be048c 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -1517,7 +1517,7 @@ static int zatm_getsockopt(struct atm_vcc *vcc,int level,int optname,
 
 
 static int zatm_setsockopt(struct atm_vcc *vcc,int level,int optname,
-    void __user *optval,int optlen)
+    void __user *optval,unsigned int optlen)
 {
 	return -EINVAL;
 }
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index c36f52137456..feb0fa45b664 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -415,7 +415,7 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 }
 
 static int data_sock_setsockopt(struct socket *sock, int level, int optname,
-	char __user *optval, int len)
+	char __user *optval, unsigned int len)
 {
 	struct sock *sk = sock->sk;
 	int err = 0, opt = 0;
diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index cc394d073755..5910df60c93e 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -2179,7 +2179,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
  * session or the special tunnel type.
  */
 static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
-			       char __user *optval, int optlen)
+			       char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct pppol2tp_session *session = sk->sk_user_data;
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 086e5c362d3a..817b23705c91 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -397,7 +397,7 @@ struct atmdev_ops { /* only send is required */
 	int (*getsockopt)(struct atm_vcc *vcc,int level,int optname,
 	    void __user *optval,int optlen);
 	int (*setsockopt)(struct atm_vcc *vcc,int level,int optname,
-	    void __user *optval,int optlen);
+	    void __user *optval,unsigned int optlen);
 	int (*send)(struct atm_vcc *vcc,struct sk_buff *skb);
 	int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags);
 	void (*phy_put)(struct atm_dev *dev,unsigned char value,
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 0d45b4e8d367..08bc776d05e2 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -145,14 +145,14 @@ static inline int ip_mroute_opt(int opt)
 #endif
 
 #ifdef CONFIG_IP_MROUTE
-extern int ip_mroute_setsockopt(struct sock *, int, char __user *, int);
+extern int ip_mroute_setsockopt(struct sock *, int, char __user *, unsigned int);
 extern int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
 extern int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
 extern int ip_mr_init(void);
 #else
 static inline
 int ip_mroute_setsockopt(struct sock *sock,
-			 int optname, char __user *optval, int optlen)
+			 int optname, char __user *optval, unsigned int optlen)
 {
 	return -ENOPROTOOPT;
 }
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 43dc97e32183..b191865a6ca3 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -134,7 +134,7 @@ static inline int ip6_mroute_opt(int opt)
 struct sock;
 
 #ifdef CONFIG_IPV6_MROUTE
-extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, int);
+extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, unsigned int);
 extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
 extern int ip6_mr_input(struct sk_buff *skb);
 extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg);
@@ -143,7 +143,7 @@ extern void ip6_mr_cleanup(void);
 #else
 static inline
 int ip6_mroute_setsockopt(struct sock *sock,
-			  int optname, char __user *optval, int optlen)
+			  int optname, char __user *optval, unsigned int optlen)
 {
 	return -ENOPROTOOPT;
 }
diff --git a/include/linux/net.h b/include/linux/net.h
index 9040a10584f7..529a0931711d 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -178,11 +178,11 @@ struct proto_ops {
 	int		(*listen)    (struct socket *sock, int len);
 	int		(*shutdown)  (struct socket *sock, int flags);
 	int		(*setsockopt)(struct socket *sock, int level,
-				      int optname, char __user *optval, int optlen);
+				      int optname, char __user *optval, unsigned int optlen);
 	int		(*getsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, int __user *optlen);
 	int		(*compat_setsockopt)(struct socket *sock, int level,
-				      int optname, char __user *optval, int optlen);
+				      int optname, char __user *optval, unsigned int optlen);
 	int		(*compat_getsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, int __user *optlen);
 	int		(*sendmsg)   (struct kiocb *iocb, struct socket *sock,
@@ -256,7 +256,7 @@ extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
 extern int kernel_getsockopt(struct socket *sock, int level, int optname,
 			     char *optval, int *optlen);
 extern int kernel_setsockopt(struct socket *sock, int level, int optname,
-			     char *optval, int optlen);
+			     char *optval, unsigned int optlen);
 extern int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 			   size_t size, int flags);
 extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
@@ -313,7 +313,7 @@ SOCKCALL_WRAP(name, compat_ioctl, (struct socket *sock, unsigned int cmd, \
 SOCKCALL_WRAP(name, listen, (struct socket *sock, int len), (sock, len)) \
 SOCKCALL_WRAP(name, shutdown, (struct socket *sock, int flags), (sock, flags)) \
 SOCKCALL_WRAP(name, setsockopt, (struct socket *sock, int level, int optname, \
-			 char __user *optval, int optlen), (sock, level, optname, optval, optlen)) \
+			 char __user *optval, unsigned int optlen), (sock, level, optname, optval, optlen)) \
 SOCKCALL_WRAP(name, getsockopt, (struct socket *sock, int level, int optname, \
 			 char __user *optval, int __user *optlen), (sock, level, optname, optval, optlen)) \
 SOCKCALL_WRAP(name, sendmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len), \
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 48cfe51bfddc..6132b5e6d9d3 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -221,12 +221,12 @@ __ret;})
 
 /* Call setsockopt() */
 int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
-		  int len);
+		  unsigned int len);
 int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
 		  int *len);
 
 int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval,
-		char __user *opt, int len);
+		char __user *opt, unsigned int len);
 int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval,
 		char __user *opt, int *len);
 
diff --git a/include/net/compat.h b/include/net/compat.h
index 5bbf8bf9efea..7c3002832d05 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -40,8 +40,8 @@ extern int put_cmsg_compat(struct msghdr*, int, int, int, void *);
 
 extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *, unsigned char *, int);
 
-extern int compat_mc_setsockopt(struct sock *, int, int, char __user *, int,
-	int (*)(struct sock *, int, int, char __user *, int));
+extern int compat_mc_setsockopt(struct sock *, int, int, char __user *, unsigned int,
+	int (*)(struct sock *, int, int, char __user *, unsigned int));
 extern int compat_mc_getsockopt(struct sock *, int, int, char __user *,
 	int __user *, int (*)(struct sock *, int, int, char __user *,
 				int __user *));
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 03cffd9f64e3..696d6e4ce68a 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -48,13 +48,13 @@ struct inet_connection_sock_af_ops {
 	u16	    net_header_len;
 	u16	    sockaddr_len;
 	int	    (*setsockopt)(struct sock *sk, int level, int optname, 
-				  char __user *optval, int optlen);
+				  char __user *optval, unsigned int optlen);
 	int	    (*getsockopt)(struct sock *sk, int level, int optname, 
 				  char __user *optval, int __user *optlen);
 #ifdef CONFIG_COMPAT
 	int	    (*compat_setsockopt)(struct sock *sk,
 				int level, int optname,
-				char __user *optval, int optlen);
+				char __user *optval, unsigned int optlen);
 	int	    (*compat_getsockopt)(struct sock *sk,
 				int level, int optname,
 				char __user *optval, int __user *optlen);
@@ -332,5 +332,5 @@ extern void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
 extern int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
 				      char __user *optval, int __user *optlen);
 extern int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
-				      char __user *optval, int optlen);
+				      char __user *optval, unsigned int optlen);
 #endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 5b26a0bd178e..2f47e5482b55 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -381,10 +381,10 @@ extern int ip_options_rcv_srr(struct sk_buff *skb);
 extern void	ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb);
 extern int	ip_cmsg_send(struct net *net,
 			     struct msghdr *msg, struct ipcm_cookie *ipc);
-extern int	ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen);
+extern int	ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen);
 extern int	ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen);
 extern int	compat_ip_setsockopt(struct sock *sk, int level,
-			int optname, char __user *optval, int optlen);
+			int optname, char __user *optval, unsigned int optlen);
 extern int	compat_ip_getsockopt(struct sock *sk, int level,
 			int optname, char __user *optval, int __user *optlen);
 extern int	ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *));
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ad9a51130254..8c31d8a0c1fe 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -550,7 +550,7 @@ extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type);
 extern int			ipv6_setsockopt(struct sock *sk, int level, 
 						int optname,
 						char __user *optval, 
-						int optlen);
+						unsigned int optlen);
 extern int			ipv6_getsockopt(struct sock *sk, int level, 
 						int optname,
 						char __user *optval, 
@@ -559,7 +559,7 @@ extern int			compat_ipv6_setsockopt(struct sock *sk,
 						int level,
 						int optname,
 						char __user *optval,
-						int optlen);
+						unsigned int optlen);
 extern int			compat_ipv6_getsockopt(struct sock *sk,
 						int level,
 						int optname,
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 42d00ced5eb8..6e5f0e0c7967 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -544,7 +544,7 @@ struct sctp_af {
 					 int level,
 					 int optname,
 					 char __user *optval,
-					 int optlen);
+					 unsigned int optlen);
 	int		(*getsockopt)	(struct sock *sk,
 					 int level,
 					 int optname,
@@ -554,7 +554,7 @@ struct sctp_af {
 					 int level,
 					 int optname,
 					 char __user *optval,
-					 int optlen);
+					 unsigned int optlen);
 	int		(*compat_getsockopt)	(struct sock *sk,
 					 int level,
 					 int optname,
diff --git a/include/net/sock.h b/include/net/sock.h
index 950409dcec3d..1621935aad5b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -624,7 +624,7 @@ struct proto {
 	void			(*shutdown)(struct sock *sk, int how);
 	int			(*setsockopt)(struct sock *sk, int level, 
 					int optname, char __user *optval,
-					int optlen);
+					unsigned int optlen);
 	int			(*getsockopt)(struct sock *sk, int level, 
 					int optname, char __user *optval, 
 					int __user *option);  	 
@@ -632,7 +632,7 @@ struct proto {
 	int			(*compat_setsockopt)(struct sock *sk,
 					int level,
 					int optname, char __user *optval,
-					int optlen);
+					unsigned int optlen);
 	int			(*compat_getsockopt)(struct sock *sk,
 					int level,
 					int optname, char __user *optval,
@@ -951,7 +951,7 @@ extern void			sock_rfree(struct sk_buff *skb);
 
 extern int			sock_setsockopt(struct socket *sock, int level,
 						int op, char __user *optval,
-						int optlen);
+						unsigned int optlen);
 
 extern int			sock_getsockopt(struct socket *sock, int level,
 						int op, char __user *optval, 
@@ -993,7 +993,7 @@ extern int                      sock_no_shutdown(struct socket *, int);
 extern int			sock_no_getsockopt(struct socket *, int , int,
 						   char __user *, int __user *);
 extern int			sock_no_setsockopt(struct socket *, int, int,
-						   char __user *, int);
+						   char __user *, unsigned int);
 extern int                      sock_no_sendmsg(struct kiocb *, struct socket *,
 						struct msghdr *, size_t);
 extern int                      sock_no_recvmsg(struct kiocb *, struct socket *,
@@ -1015,11 +1015,11 @@ extern int sock_common_getsockopt(struct socket *sock, int level, int optname,
 extern int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
 			       struct msghdr *msg, size_t size, int flags);
 extern int sock_common_setsockopt(struct socket *sock, int level, int optname,
-				  char __user *optval, int optlen);
+				  char __user *optval, unsigned int optlen);
 extern int compat_sock_common_getsockopt(struct socket *sock, int level,
 		int optname, char __user *optval, int __user *optlen);
 extern int compat_sock_common_setsockopt(struct socket *sock, int level,
-		int optname, char __user *optval, int optlen);
+		int optname, char __user *optval, unsigned int optlen);
 
 extern void sk_common_release(struct sock *sk);
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 56b76027b85e..03a49c703377 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -394,13 +394,13 @@ extern int			tcp_getsockopt(struct sock *sk, int level,
 					       int __user *optlen);
 extern int			tcp_setsockopt(struct sock *sk, int level, 
 					       int optname, char __user *optval, 
-					       int optlen);
+					       unsigned int optlen);
 extern int			compat_tcp_getsockopt(struct sock *sk,
 					int level, int optname,
 					char __user *optval, int __user *optlen);
 extern int			compat_tcp_setsockopt(struct sock *sk,
 					int level, int optname,
-					char __user *optval, int optlen);
+					char __user *optval, unsigned int optlen);
 extern void			tcp_set_keepalive(struct sock *sk, int val);
 extern int			tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
 					    struct msghdr *msg,
diff --git a/include/net/udp.h b/include/net/udp.h
index 5fb029f817a3..f98abd2ce709 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -144,7 +144,7 @@ extern unsigned int udp_poll(struct file *file, struct socket *sock,
 extern int 	udp_lib_getsockopt(struct sock *sk, int level, int optname,
 			           char __user *optval, int __user *optlen);
 extern int 	udp_lib_setsockopt(struct sock *sk, int level, int optname,
-				   char __user *optval, int optlen,
+				   char __user *optval, unsigned int optlen,
 				   int (*push_pending_frames)(struct sock *));
 
 extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
diff --git a/net/atm/common.c b/net/atm/common.c
index 8c4d843eb17f..950bd16d2383 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -679,7 +679,7 @@ static int check_qos(const struct atm_qos *qos)
 }
 
 int vcc_setsockopt(struct socket *sock, int level, int optname,
-		   char __user *optval, int optlen)
+		   char __user *optval, unsigned int optlen)
 {
 	struct atm_vcc *vcc;
 	unsigned long value;
diff --git a/net/atm/common.h b/net/atm/common.h
index 92e2981f479f..f48a76b6cdf4 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -21,7 +21,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
 int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int vcc_setsockopt(struct socket *sock, int level, int optname,
-		   char __user *optval, int optlen);
+		   char __user *optval, unsigned int optlen);
 int vcc_getsockopt(struct socket *sock, int level, int optname,
 		   char __user *optval, int __user *optlen);
 
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index e1d22d9430dd..d4c024504f99 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -59,7 +59,7 @@ static int pvc_connect(struct socket *sock,struct sockaddr *sockaddr,
 }
 
 static int pvc_setsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int error;
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 7b831b526d0b..f90d143c4b25 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -446,7 +446,7 @@ int svc_change_qos(struct atm_vcc *vcc,struct atm_qos *qos)
 
 
 static int svc_setsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct atm_vcc *vcc = ATM_SD(sock);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 4102de1022ee..cd1c3dc0fe01 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -534,7 +534,7 @@ ax25_cb *ax25_create_cb(void)
  */
 
 static int ax25_setsockopt(struct socket *sock, int level, int optname,
-	char __user *optval, int optlen)
+	char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	ax25_cb *ax25;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 4f9621f759a0..75302a986067 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -466,7 +466,7 @@ drop:
 	goto done;
 }
 
-static int hci_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int len)
+static int hci_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int len)
 {
 	struct hci_ufilter uf = { .opcode = 0 };
 	struct sock *sk = sock->sk;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index b03012564647..555d9da1869b 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1698,7 +1698,7 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms
 	return bt_sock_recvmsg(iocb, sock, msg, len, flags);
 }
 
-static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, int optlen)
+static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct l2cap_options opts;
@@ -1755,7 +1755,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 	return err;
 }
 
-static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct bt_security sec;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 0b85e8116859..8a20aaf1f231 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -730,7 +730,7 @@ out:
 	return copied ? : err;
 }
 
-static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, int optlen)
+static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int err = 0;
@@ -766,7 +766,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __u
 	return err;
 }
 
-static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct bt_security sec;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 13c27f17192c..77f4153bdb5e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -644,7 +644,7 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	return err;
 }
 
-static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int err = 0;
diff --git a/net/can/raw.c b/net/can/raw.c
index db3152df7d2b..b5e897922d32 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -411,7 +411,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
 }
 
 static int raw_setsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct raw_sock *ro = raw_sk(sk);
diff --git a/net/compat.c b/net/compat.c
index 12728b17a226..a407c3addbae 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -331,7 +331,7 @@ struct compat_sock_fprog {
 };
 
 static int do_set_attach_filter(struct socket *sock, int level, int optname,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	struct compat_sock_fprog __user *fprog32 = (struct compat_sock_fprog __user *)optval;
 	struct sock_fprog __user *kfprog = compat_alloc_user_space(sizeof(struct sock_fprog));
@@ -351,7 +351,7 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname,
 }
 
 static int do_set_sock_timeout(struct socket *sock, int level,
-		int optname, char __user *optval, int optlen)
+		int optname, char __user *optval, unsigned int optlen)
 {
 	struct compat_timeval __user *up = (struct compat_timeval __user *) optval;
 	struct timeval ktime;
@@ -373,7 +373,7 @@ static int do_set_sock_timeout(struct socket *sock, int level,
 }
 
 static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	if (optname == SO_ATTACH_FILTER)
 		return do_set_attach_filter(sock, level, optname,
@@ -385,7 +385,7 @@ static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
 }
 
 asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	int err;
 	struct socket *sock;
@@ -558,8 +558,8 @@ struct compat_group_filter {
 
 
 int compat_mc_setsockopt(struct sock *sock, int level, int optname,
-	char __user *optval, int optlen,
-	int (*setsockopt)(struct sock *,int,int,char __user *,int))
+	char __user *optval, unsigned int optlen,
+	int (*setsockopt)(struct sock *,int,int,char __user *,unsigned int))
 {
 	char __user	*koptval = optval;
 	int		koptlen = optlen;
diff --git a/net/core/sock.c b/net/core/sock.c
index 524712a7b154..77fbfed332e8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -446,7 +446,7 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
  */
 
 int sock_setsockopt(struct socket *sock, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int val;
@@ -1697,7 +1697,7 @@ int sock_no_shutdown(struct socket *sock, int how)
 EXPORT_SYMBOL(sock_no_shutdown);
 
 int sock_no_setsockopt(struct socket *sock, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	return -EOPNOTSUPP;
 }
@@ -2018,7 +2018,7 @@ EXPORT_SYMBOL(sock_common_recvmsg);
  *	Set socket options on an inet socket.
  */
 int sock_common_setsockopt(struct socket *sock, int level, int optname,
-			   char __user *optval, int optlen)
+			   char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 
@@ -2028,7 +2028,7 @@ EXPORT_SYMBOL(sock_common_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
-				  char __user *optval, int optlen)
+				  char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index d6bc47363b1c..5ef32c2f0d6a 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -290,14 +290,14 @@ extern int	   dccp_disconnect(struct sock *sk, int flags);
 extern int	   dccp_getsockopt(struct sock *sk, int level, int optname,
 				   char __user *optval, int __user *optlen);
 extern int	   dccp_setsockopt(struct sock *sk, int level, int optname,
-				   char __user *optval, int optlen);
+				   char __user *optval, unsigned int optlen);
 #ifdef CONFIG_COMPAT
 extern int	   compat_dccp_getsockopt(struct sock *sk,
 				int level, int optname,
 				char __user *optval, int __user *optlen);
 extern int	   compat_dccp_setsockopt(struct sock *sk,
 				int level, int optname,
-				char __user *optval, int optlen);
+				char __user *optval, unsigned int optlen);
 #endif
 extern int	   dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 extern int	   dccp_sendmsg(struct kiocb *iocb, struct sock *sk,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index bc4467082a00..a156319fd0ac 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -393,7 +393,7 @@ out:
 EXPORT_SYMBOL_GPL(dccp_ioctl);
 
 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
-				   char __user *optval, int optlen)
+				   char __user *optval, unsigned int optlen)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_service_list *sl = NULL;
@@ -464,7 +464,7 @@ static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
 }
 
 static int dccp_setsockopt_ccid(struct sock *sk, int type,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	u8 *val;
 	int rc = 0;
@@ -494,7 +494,7 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type,
 }
 
 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
-		char __user *optval, int optlen)
+		char __user *optval, unsigned int optlen)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	int val, err = 0;
@@ -546,7 +546,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 }
 
 int dccp_setsockopt(struct sock *sk, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	if (level != SOL_DCCP)
 		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
@@ -559,7 +559,7 @@ EXPORT_SYMBOL_GPL(dccp_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
-			   char __user *optval, int optlen)
+			   char __user *optval, unsigned int optlen)
 {
 	if (level != SOL_DCCP)
 		return inet_csk_compat_setsockopt(sk, level, optname,
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 77d40289653c..7a58c87baf17 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -157,7 +157,7 @@ static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
 static struct hlist_head dn_wild_sk;
 static atomic_t decnet_memory_allocated;
 
-static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen, int flags);
+static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
 static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
 
 static struct hlist_head *dn_find_list(struct sock *sk)
@@ -1325,7 +1325,7 @@ out:
 	return err;
 }
 
-static int dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+static int dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int err;
@@ -1337,7 +1337,7 @@ static int dn_setsockopt(struct socket *sock, int level, int optname, char __use
 	return err;
 }
 
-static int __dn_setsockopt(struct socket *sock, int level,int optname, char __user *optval, int optlen, int flags)
+static int __dn_setsockopt(struct socket *sock, int level,int optname, char __user *optval, unsigned int optlen, int flags)
 {
 	struct	sock *sk = sock->sk;
 	struct dn_scp *scp = DN_SK(sk);
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 51593a48f2dd..a413b1bf4465 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -414,7 +414,7 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname,
 }
 
 static int dgram_setsockopt(struct sock *sk, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	struct dgram_sock *ro = dgram_sk(sk);
 	int val;
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 13198859982e..30e74eee07d6 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -244,7 +244,7 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
 }
 
 static int raw_setsockopt(struct sock *sk, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 22cd19ee44e5..4351ca2cf0b8 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -714,7 +714,7 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
 
 int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
-			       char __user *optval, int optlen)
+			       char __user *optval, unsigned int optlen)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5a0693576e82..0c0b6e363a20 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -440,7 +440,7 @@ out:
  */
 
 static int do_ip_setsockopt(struct sock *sk, int level,
-			    int optname, char __user *optval, int optlen)
+			    int optname, char __user *optval, unsigned int optlen)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	int val = 0, err;
@@ -950,7 +950,7 @@ e_inval:
 }
 
 int ip_setsockopt(struct sock *sk, int level,
-		int optname, char __user *optval, int optlen)
+		int optname, char __user *optval, unsigned int optlen)
 {
 	int err;
 
@@ -975,7 +975,7 @@ EXPORT_SYMBOL(ip_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_ip_setsockopt(struct sock *sk, int level, int optname,
-			 char __user *optval, int optlen)
+			 char __user *optval, unsigned int optlen)
 {
 	int err;
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c43ec2d51ce2..630a56df7b47 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -931,7 +931,7 @@ static void mrtsock_destruct(struct sock *sk)
  *	MOSPF/PIM router set up we can clean this up.
  */
 
-int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
+int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 {
 	int ret;
 	struct vifctl vif;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ebb1e5848bc6..757c9171e7c2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -741,7 +741,7 @@ out:	return ret;
 }
 
 static int do_raw_setsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	if (optname == ICMP_FILTER) {
 		if (inet_sk(sk)->num != IPPROTO_ICMP)
@@ -753,7 +753,7 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname,
 }
 
 static int raw_setsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	if (level != SOL_RAW)
 		return ip_setsockopt(sk, level, optname, optval, optlen);
@@ -762,7 +762,7 @@ static int raw_setsockopt(struct sock *sk, int level, int optname,
 
 #ifdef CONFIG_COMPAT
 static int compat_raw_setsockopt(struct sock *sk, int level, int optname,
-				 char __user *optval, int optlen)
+				 char __user *optval, unsigned int optlen)
 {
 	if (level != SOL_RAW)
 		return compat_ip_setsockopt(sk, level, optname, optval, optlen);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 21387ebabf00..5a15e7629d8e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2032,7 +2032,7 @@ int tcp_disconnect(struct sock *sk, int flags)
  *	Socket option code for TCP.
  */
 static int do_tcp_setsockopt(struct sock *sk, int level,
-		int optname, char __user *optval, int optlen)
+		int optname, char __user *optval, unsigned int optlen)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2220,7 +2220,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 }
 
 int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
-		   int optlen)
+		   unsigned int optlen)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
@@ -2232,7 +2232,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 
 #ifdef CONFIG_COMPAT
 int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	if (level != SOL_TCP)
 		return inet_csk_compat_setsockopt(sk, level, optname,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ebaaa7f973d7..3326aff65906 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1359,7 +1359,7 @@ void udp_destroy_sock(struct sock *sk)
  *	Socket option code for UDP
  */
 int udp_lib_setsockopt(struct sock *sk, int level, int optname,
-		       char __user *optval, int optlen,
+		       char __user *optval, unsigned int optlen,
 		       int (*push_pending_frames)(struct sock *))
 {
 	struct udp_sock *up = udp_sk(sk);
@@ -1441,7 +1441,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL(udp_lib_setsockopt);
 
 int udp_setsockopt(struct sock *sk, int level, int optname,
-		   char __user *optval, int optlen)
+		   char __user *optval, unsigned int optlen)
 {
 	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
 		return udp_lib_setsockopt(sk, level, optname, optval, optlen,
@@ -1451,7 +1451,7 @@ int udp_setsockopt(struct sock *sk, int level, int optname,
 
 #ifdef CONFIG_COMPAT
 int compat_udp_setsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
 		return udp_lib_setsockopt(sk, level, optname, optval, optlen,
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 9f4a6165f722..aaad650d47d9 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -11,13 +11,13 @@ extern void 	__udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
 extern int	udp_v4_get_port(struct sock *sk, unsigned short snum);
 
 extern int	udp_setsockopt(struct sock *sk, int level, int optname,
-			       char __user *optval, int optlen);
+			       char __user *optval, unsigned int optlen);
 extern int	udp_getsockopt(struct sock *sk, int level, int optname,
 			       char __user *optval, int __user *optlen);
 
 #ifdef CONFIG_COMPAT
 extern int	compat_udp_setsockopt(struct sock *sk, int level, int optname,
-				      char __user *optval, int optlen);
+				      char __user *optval, unsigned int optlen);
 extern int	compat_udp_getsockopt(struct sock *sk, int level, int optname,
 				      char __user *optval, int __user *optlen);
 #endif
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 090675e269ee..716153941fc4 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1281,7 +1281,7 @@ int ip6mr_sk_done(struct sock *sk)
  *	MOSPF/PIM router set up we can clean this up.
  */
 
-int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
+int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 {
 	int ret;
 	struct mif6ctl vif;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index f5e0682b402d..14f54eb5a7fc 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -123,7 +123,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
 }
 
 static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct net *net = sock_net(sk);
@@ -773,7 +773,7 @@ e_inval:
 }
 
 int ipv6_setsockopt(struct sock *sk, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	int err;
 
@@ -801,7 +801,7 @@ EXPORT_SYMBOL(ipv6_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
-			   char __user *optval, int optlen)
+			   char __user *optval, unsigned int optlen)
 {
 	int err;
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 7d675b8d82d3..4f24570b0869 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -957,7 +957,7 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
 
 
 static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
-			    char __user *optval, int optlen)
+			    char __user *optval, unsigned int optlen)
 {
 	struct raw6_sock *rp = raw6_sk(sk);
 	int val;
@@ -1000,7 +1000,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
 }
 
 static int rawv6_setsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	switch(level) {
 		case SOL_RAW:
@@ -1024,7 +1024,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
 
 #ifdef CONFIG_COMPAT
 static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
-				   char __user *optval, int optlen)
+				   char __user *optval, unsigned int optlen)
 {
 	switch (level) {
 	case SOL_RAW:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b265b7047d3e..3a60f12b34ed 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1044,7 +1044,7 @@ void udpv6_destroy_sock(struct sock *sk)
  *	Socket option code for UDP
  */
 int udpv6_setsockopt(struct sock *sk, int level, int optname,
-		     char __user *optval, int optlen)
+		     char __user *optval, unsigned int optlen)
 {
 	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
 		return udp_lib_setsockopt(sk, level, optname, optval, optlen,
@@ -1054,7 +1054,7 @@ int udpv6_setsockopt(struct sock *sk, int level, int optname,
 
 #ifdef CONFIG_COMPAT
 int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
-			    char __user *optval, int optlen)
+			    char __user *optval, unsigned int optlen)
 {
 	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
 		return udp_lib_setsockopt(sk, level, optname, optval, optlen,
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 6bb303471e20..d7571046bfc4 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -16,10 +16,10 @@ extern int	udp_v6_get_port(struct sock *sk, unsigned short snum);
 extern int	udpv6_getsockopt(struct sock *sk, int level, int optname,
 				 char __user *optval, int __user *optlen);
 extern int	udpv6_setsockopt(struct sock *sk, int level, int optname,
-				 char __user *optval, int optlen);
+				 char __user *optval, unsigned int optlen);
 #ifdef CONFIG_COMPAT
 extern int	compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
-					char __user *optval, int optlen);
+					char __user *optval, unsigned int optlen);
 extern int	compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
 				       char __user *optval, int __user *optlen);
 #endif
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index f1118d92a191..66c7a20011f3 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1292,7 +1292,7 @@ const char *ipx_device_name(struct ipx_interface *intrfc)
  * socket object. */
 
 static int ipx_setsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int opt;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 50b43c57d5d8..dd35641835f4 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1826,7 +1826,7 @@ static int irda_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
  *
  */
 static int irda_setsockopt(struct socket *sock, int level, int optname,
-			   char __user *optval, int optlen)
+			   char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct irda_sock *self = irda_sk(sk);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index d985d163dcfc..bada1b9c670b 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1387,7 +1387,7 @@ static int iucv_sock_release(struct socket *sock)
 
 /* getsockopt and setsockopt */
 static int iucv_sock_setsockopt(struct socket *sock, int level, int optname,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct iucv_sock *iucv = iucv_sk(sk);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c45eee1c0e8d..7aa4fd170104 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -973,7 +973,7 @@ static int llc_ui_ioctl(struct socket *sock, unsigned int cmd,
  *	Set various connection specific parameters.
  */
 static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
-			     char __user *optval, int optlen)
+			     char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct llc_sock *llc = llc_sk(sk);
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index 8ab829f86574..f042ae521557 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -113,7 +113,7 @@ static int nf_sockopt(struct sock *sk, u_int8_t pf, int val,
 }
 
 int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
-		  int len)
+		  unsigned int len)
 {
 	return nf_sockopt(sk, pf, val, opt, &len, 0);
 }
@@ -154,7 +154,7 @@ static int compat_nf_sockopt(struct sock *sk, u_int8_t pf, int val,
 }
 
 int compat_nf_setsockopt(struct sock *sk, u_int8_t pf,
-		int val, char __user *opt, int len)
+		int val, char __user *opt, unsigned int len)
 {
 	return compat_nf_sockopt(sk, pf, val, opt, &len, 0);
 }
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index dd85320907cb..19e98007691c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1150,7 +1150,7 @@ static void netlink_update_socket_mc(struct netlink_sock *nlk,
 }
 
 static int netlink_setsockopt(struct socket *sock, int level, int optname,
-			      char __user *optval, int optlen)
+			      char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ce1a34b99c23..7a834952f67f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -301,7 +301,7 @@ void nr_destroy_socket(struct sock *sk)
  */
 
 static int nr_setsockopt(struct socket *sock, int level, int optname,
-	char __user *optval, int optlen)
+	char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct nr_sock *nr = nr_sk(sk);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d3d52c66cdc2..1238949e66a9 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1701,7 +1701,7 @@ static void packet_flush_mclist(struct sock *sk)
 }
 
 static int
-packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct packet_sock *po = pkt_sk(sk);
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index b8252d289cd7..5f32d217535b 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -742,7 +742,7 @@ static int pep_init(struct sock *sk)
 }
 
 static int pep_setsockopt(struct sock *sk, int level, int optname,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	struct pep_sock *pn = pep_sk(sk);
 	int val = 0, err = 0;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 6b58aeff4c7a..98e05382fd3c 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -248,7 +248,7 @@ static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
 }
 
 static int rds_setsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	struct rds_sock *rs = rds_sk_to_rs(sock->sk);
 	int ret;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 1e166c9685aa..502cce76621d 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -370,7 +370,7 @@ void rose_destroy_socket(struct sock *sk)
  */
 
 static int rose_setsockopt(struct socket *sock, int level, int optname,
-	char __user *optval, int optlen)
+	char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct rose_sock *rose = rose_sk(sk);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index bfe493ebf27c..a86afceaa94f 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -507,7 +507,7 @@ out:
  * set RxRPC socket options
  */
 static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
-			    char __user *optval, int optlen)
+			    char __user *optval, unsigned int optlen)
 {
 	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
 	unsigned min_sec_level;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 89af37a6c871..c8d05758661d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2027,7 +2027,8 @@ out:
  * instead a error will be indicated to the user.
  */
 static int sctp_setsockopt_disable_fragments(struct sock *sk,
-					    char __user *optval, int optlen)
+					     char __user *optval,
+					     unsigned int optlen)
 {
 	int val;
 
@@ -2043,7 +2044,7 @@ static int sctp_setsockopt_disable_fragments(struct sock *sk,
 }
 
 static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
-					int optlen)
+				  unsigned int optlen)
 {
 	if (optlen > sizeof(struct sctp_event_subscribe))
 		return -EINVAL;
@@ -2064,7 +2065,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
  * association is closed.
  */
 static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
-					    int optlen)
+				     unsigned int optlen)
 {
 	struct sctp_sock *sp = sctp_sk(sk);
 
@@ -2318,7 +2319,8 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 }
 
 static int sctp_setsockopt_peer_addr_params(struct sock *sk,
-					    char __user *optval, int optlen)
+					    char __user *optval,
+					    unsigned int optlen)
 {
 	struct sctp_paddrparams  params;
 	struct sctp_transport   *trans = NULL;
@@ -2430,7 +2432,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
  */
 
 static int sctp_setsockopt_delayed_ack(struct sock *sk,
-					    char __user *optval, int optlen)
+				       char __user *optval, unsigned int optlen)
 {
 	struct sctp_sack_info    params;
 	struct sctp_transport   *trans = NULL;
@@ -2546,7 +2548,7 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk,
  * by the change).  With TCP-style sockets, this option is inherited by
  * sockets derived from a listener socket.
  */
-static int sctp_setsockopt_initmsg(struct sock *sk, char __user *optval, int optlen)
+static int sctp_setsockopt_initmsg(struct sock *sk, char __user *optval, unsigned int optlen)
 {
 	struct sctp_initmsg sinit;
 	struct sctp_sock *sp = sctp_sk(sk);
@@ -2583,7 +2585,8 @@ static int sctp_setsockopt_initmsg(struct sock *sk, char __user *optval, int opt
  *   to this call if the caller is using the UDP model.
  */
 static int sctp_setsockopt_default_send_param(struct sock *sk,
-						char __user *optval, int optlen)
+					      char __user *optval,
+					      unsigned int optlen)
 {
 	struct sctp_sndrcvinfo info;
 	struct sctp_association *asoc;
@@ -2622,7 +2625,7 @@ static int sctp_setsockopt_default_send_param(struct sock *sk,
  * association peer's addresses.
  */
 static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval,
-					int optlen)
+					unsigned int optlen)
 {
 	struct sctp_prim prim;
 	struct sctp_transport *trans;
@@ -2651,7 +2654,7 @@ static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval,
  *  integer boolean flag.
  */
 static int sctp_setsockopt_nodelay(struct sock *sk, char __user *optval,
-					int optlen)
+				   unsigned int optlen)
 {
 	int val;
 
@@ -2676,7 +2679,8 @@ static int sctp_setsockopt_nodelay(struct sock *sk, char __user *optval,
  * be changed.
  *
  */
-static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, int optlen) {
+static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigned int optlen)
+{
 	struct sctp_rtoinfo rtoinfo;
 	struct sctp_association *asoc;
 
@@ -2728,7 +2732,7 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, int opt
  * See [SCTP] for more information.
  *
  */
-static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int optlen)
+static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, unsigned int optlen)
 {
 
 	struct sctp_assocparams assocparams;
@@ -2800,7 +2804,7 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int o
  * addresses and a user will receive both PF_INET6 and PF_INET type
  * addresses on the socket.
  */
-static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, int optlen)
+static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsigned int optlen)
 {
 	int val;
 	struct sctp_sock *sp = sctp_sk(sk);
@@ -2844,7 +2848,7 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, int op
  *    changed (effecting future associations only).
  * assoc_value:  This parameter specifies the maximum size in bytes.
  */
-static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optlen)
+static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
 {
 	struct sctp_assoc_value params;
 	struct sctp_association *asoc;
@@ -2899,7 +2903,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl
  *   set primary request:
  */
 static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval,
-					     int optlen)
+					     unsigned int optlen)
 {
 	struct sctp_sock	*sp;
 	struct sctp_endpoint	*ep;
@@ -2950,7 +2954,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
 }
 
 static int sctp_setsockopt_adaptation_layer(struct sock *sk, char __user *optval,
-					  int optlen)
+					    unsigned int optlen)
 {
 	struct sctp_setadaptation adaptation;
 
@@ -2979,7 +2983,7 @@ static int sctp_setsockopt_adaptation_layer(struct sock *sk, char __user *optval
  * saved with outbound messages.
  */
 static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
-				   int optlen)
+				   unsigned int optlen)
 {
 	struct sctp_assoc_value params;
 	struct sctp_sock *sp;
@@ -3030,7 +3034,7 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
  */
 static int sctp_setsockopt_fragment_interleave(struct sock *sk,
 					       char __user *optval,
-					       int optlen)
+					       unsigned int optlen)
 {
 	int val;
 
@@ -3063,7 +3067,7 @@ static int sctp_setsockopt_fragment_interleave(struct sock *sk,
  */
 static int sctp_setsockopt_partial_delivery_point(struct sock *sk,
 						  char __user *optval,
-						  int optlen)
+						  unsigned int optlen)
 {
 	u32 val;
 
@@ -3096,7 +3100,7 @@ static int sctp_setsockopt_partial_delivery_point(struct sock *sk,
  */
 static int sctp_setsockopt_maxburst(struct sock *sk,
 				    char __user *optval,
-				    int optlen)
+				    unsigned int optlen)
 {
 	struct sctp_assoc_value params;
 	struct sctp_sock *sp;
@@ -3140,8 +3144,8 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
  * will only effect future associations on the socket.
  */
 static int sctp_setsockopt_auth_chunk(struct sock *sk,
-				    char __user *optval,
-				    int optlen)
+				      char __user *optval,
+				      unsigned int optlen)
 {
 	struct sctp_authchunk val;
 
@@ -3172,8 +3176,8 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
  * endpoint requires the peer to use.
  */
 static int sctp_setsockopt_hmac_ident(struct sock *sk,
-				    char __user *optval,
-				    int optlen)
+				      char __user *optval,
+				      unsigned int optlen)
 {
 	struct sctp_hmacalgo *hmacs;
 	u32 idents;
@@ -3215,7 +3219,7 @@ out:
  */
 static int sctp_setsockopt_auth_key(struct sock *sk,
 				    char __user *optval,
-				    int optlen)
+				    unsigned int optlen)
 {
 	struct sctp_authkey *authkey;
 	struct sctp_association *asoc;
@@ -3260,8 +3264,8 @@ out:
  * the association shared key.
  */
 static int sctp_setsockopt_active_key(struct sock *sk,
-					char __user *optval,
-					int optlen)
+				      char __user *optval,
+				      unsigned int optlen)
 {
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
@@ -3288,8 +3292,8 @@ static int sctp_setsockopt_active_key(struct sock *sk,
  * This set option will delete a shared secret key from use.
  */
 static int sctp_setsockopt_del_key(struct sock *sk,
-					char __user *optval,
-					int optlen)
+				   char __user *optval,
+				   unsigned int optlen)
 {
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
@@ -3332,7 +3336,7 @@ static int sctp_setsockopt_del_key(struct sock *sk,
  *   optlen  - the size of the buffer.
  */
 SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	int retval = 0;
 
diff --git a/net/socket.c b/net/socket.c
index 41e8847508aa..75655365b5fd 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2391,7 +2391,7 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
 }
 
 int kernel_setsockopt(struct socket *sock, int level, int optname,
-			char *optval, int optlen)
+			char *optval, unsigned int optlen)
 {
 	mm_segment_t oldfs = get_fs();
 	int err;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e8254e809b79..e6d9abf7440e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1658,7 +1658,7 @@ restart:
  */
 
 static int setsockopt(struct socket *sock,
-		      int lvl, int opt, char __user *ov, int ol)
+		      int lvl, int opt, char __user *ov, unsigned int ol)
 {
 	struct sock *sk = sock->sk;
 	struct tipc_port *tport = tipc_sk_port(sk);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5e6c072c64d3..7fa9c7ad3d3b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -409,7 +409,7 @@ static void x25_destroy_socket(struct sock *sk)
  */
 
 static int x25_setsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	int opt;
 	struct sock *sk = sock->sk;
-- 
cgit v1.2.3


From 48c0d4d4c04dd520c55e0fd756fa4e7c83de3d13 Mon Sep 17 00:00:00 2001
From: Zdenek Kabelac <zdenek.kabelac@gmail.com>
Date: Fri, 25 Sep 2009 06:19:26 +0200
Subject: Add missing blk_trace_remove_sysfs to be in pair with
 blk_trace_init_sysfs

Add missing blk_trace_remove_sysfs to be in pair with blk_trace_init_sysfs
introduced in commit 1d54ad6da9192fed5dd3b60224d9f2dfea0dcd82.
Release kobject also in case the request_fn is NULL.

Problem was noticed via kmemleak backtrace when some sysfs entries were
note properly destroyed during  device removal:

unreferenced object 0xffff88001aa76640 (size 80):
  comm "lvcreate", pid 2120, jiffies 4294885144
  hex dump (first 32 bytes):
    01 00 00 00 00 00 00 00 f0 65 a7 1a 00 88 ff ff  .........e......
    90 66 a7 1a 00 88 ff ff 86 1d 53 81 ff ff ff ff  .f........S.....
  backtrace:
    [<ffffffff813f9cc6>] kmemleak_alloc+0x26/0x60
    [<ffffffff8111d693>] kmem_cache_alloc+0x133/0x1c0
    [<ffffffff81195891>] sysfs_new_dirent+0x41/0x120
    [<ffffffff81194b0c>] sysfs_add_file_mode+0x3c/0xb0
    [<ffffffff81197c81>] internal_create_group+0xc1/0x1a0
    [<ffffffff81197d93>] sysfs_create_group+0x13/0x20
    [<ffffffff810d8004>] blk_trace_init_sysfs+0x14/0x20
    [<ffffffff8123f45c>] blk_register_queue+0x3c/0xf0
    [<ffffffff812447e4>] add_disk+0x94/0x160
    [<ffffffffa00d8b08>] dm_create+0x598/0x6e0 [dm_mod]
    [<ffffffffa00de951>] dev_create+0x51/0x350 [dm_mod]
    [<ffffffffa00de823>] ctl_ioctl+0x1a3/0x240 [dm_mod]
    [<ffffffffa00de8f2>] dm_compat_ctl_ioctl+0x12/0x20 [dm_mod]
    [<ffffffff81177bfd>] compat_sys_ioctl+0xcd/0x4f0
    [<ffffffff81036ed8>] sysenter_dispatch+0x7/0x2c
    [<ffffffffffffffff>] 0xffffffffffffffff

Signed-off-by: Zdenek Kabelac <zkabelac@redhat.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-sysfs.c            | 11 ++++++-----
 include/linux/blktrace_api.h |  2 ++
 kernel/trace/blktrace.c      |  5 +++++
 3 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b78c9c3e2670..8a6d81afb284 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -452,6 +452,7 @@ int blk_register_queue(struct gendisk *disk)
 	if (ret) {
 		kobject_uevent(&q->kobj, KOBJ_REMOVE);
 		kobject_del(&q->kobj);
+		blk_trace_remove_sysfs(disk_to_dev(disk));
 		return ret;
 	}
 
@@ -465,11 +466,11 @@ void blk_unregister_queue(struct gendisk *disk)
 	if (WARN_ON(!q))
 		return;
 
-	if (q->request_fn) {
+	if (q->request_fn)
 		elv_unregister_queue(q);
 
-		kobject_uevent(&q->kobj, KOBJ_REMOVE);
-		kobject_del(&q->kobj);
-		kobject_put(&disk_to_dev(disk)->kobj);
-	}
+	kobject_uevent(&q->kobj, KOBJ_REMOVE);
+	kobject_del(&q->kobj);
+	blk_trace_remove_sysfs(disk_to_dev(disk));
+	kobject_put(&disk_to_dev(disk)->kobj);
 }
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7e4350ece0f8..622939a23299 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -198,6 +198,7 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 			   char __user *arg);
 extern int blk_trace_startstop(struct request_queue *q, int start);
 extern int blk_trace_remove(struct request_queue *q);
+extern void blk_trace_remove_sysfs(struct device *dev);
 extern int blk_trace_init_sysfs(struct device *dev);
 
 extern struct attribute_group blk_trace_attr_group;
@@ -211,6 +212,7 @@ extern struct attribute_group blk_trace_attr_group;
 # define blk_trace_startstop(q, start)			(-ENOTTY)
 # define blk_trace_remove(q)				(-ENOTTY)
 # define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
+# define blk_trace_remove_sysfs(struct device *dev)	do { } while (0)
 static inline int blk_trace_init_sysfs(struct device *dev)
 {
 	return 0;
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 3eb159c277c8..60b5c5a3d4b4 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1657,6 +1657,11 @@ int blk_trace_init_sysfs(struct device *dev)
 	return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
 }
 
+void blk_trace_remove_sysfs(struct device *dev)
+{
+	sysfs_remove_group(&dev->kobj, &blk_trace_attr_group);
+}
+
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
 #ifdef CONFIG_EVENT_TRACING
-- 
cgit v1.2.3


From c15227de132f1295f3db6b7df9079956b1020fd8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 30 Sep 2009 13:52:12 +0200
Subject: block: use normal I/O path for discard requests

prepare_discard_fn() was being called in a place where memory allocation
was effectively impossible.  This makes it inappropriate for all but
the most trivial translations of Linux's DISCARD operation to the block
command set.  Additionally adding a payload there makes the ownership
of the bio backing unclear as it's now allocated by the device driver
and not the submitter as usual.

It is replaced with QUEUE_FLAG_DISCARD which is used to indicate whether
the queue supports discard operations or not.  blkdev_issue_discard now
allocates a one-page, sector-length payload which is the right thing
for the common ATA and SCSI implementations.

The mtd implementation of prepare_discard_fn() is replaced with simply
checking for the request being a discard.

Largely based on a previous patch from Matthew Wilcox <matthew@wil.cx>
which did the prepare_discard_fn but not the different payload allocation
yet.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c         | 35 ++++++++++++++++++++++++++++++-----
 block/blk-core.c            |  3 +--
 block/blk-settings.c        | 17 -----------------
 drivers/mtd/mtd_blkdevs.c   | 19 +++++--------------
 drivers/staging/dst/dcore.c |  2 +-
 include/linux/blkdev.h      |  6 ++----
 6 files changed, 39 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6593ab39cfe9..21f5025c3945 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
 
 	if (bio->bi_private)
 		complete(bio->bi_private);
+	__free_page(bio_page(bio));
 
 	bio_put(bio);
 }
@@ -372,26 +373,44 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	struct request_queue *q = bdev_get_queue(bdev);
 	int type = flags & DISCARD_FL_BARRIER ?
 		DISCARD_BARRIER : DISCARD_NOBARRIER;
+	struct bio *bio;
+	struct page *page;
 	int ret = 0;
 
 	if (!q)
 		return -ENXIO;
 
-	if (!q->prepare_discard_fn)
+	if (!blk_queue_discard(q))
 		return -EOPNOTSUPP;
 
 	while (nr_sects && !ret) {
-		struct bio *bio = bio_alloc(gfp_mask, 0);
-		if (!bio)
-			return -ENOMEM;
+		unsigned int sector_size = q->limits.logical_block_size;
 
+		bio = bio_alloc(gfp_mask, 1);
+		if (!bio)
+			goto out;
+		bio->bi_sector = sector;
 		bio->bi_end_io = blkdev_discard_end_io;
 		bio->bi_bdev = bdev;
 		if (flags & DISCARD_FL_WAIT)
 			bio->bi_private = &wait;
 
-		bio->bi_sector = sector;
+		/*
+		 * Add a zeroed one-sector payload as that's what
+		 * our current implementations need.  If we'll ever need
+		 * more the interface will need revisiting.
+		 */
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!page)
+			goto out_free_bio;
+		if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+			goto out_free_page;
 
+		/*
+		 * And override the bio size - the way discard works we
+		 * touch many more blocks on disk than the actual payload
+		 * length.
+		 */
 		if (nr_sects > queue_max_hw_sectors(q)) {
 			bio->bi_size = queue_max_hw_sectors(q) << 9;
 			nr_sects -= queue_max_hw_sectors(q);
@@ -414,5 +433,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		bio_put(bio);
 	}
 	return ret;
+out_free_page:
+	__free_page(page);
+out_free_bio:
+	bio_put(bio);
+out:
+	return -ENOMEM;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-core.c b/block/blk-core.c
index 8135228e4b29..80a020dd1580 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1124,7 +1124,6 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 		req->cmd_flags |= REQ_DISCARD;
 		if (bio_rw_flagged(bio, BIO_RW_BARRIER))
 			req->cmd_flags |= REQ_SOFTBARRIER;
-		req->q->prepare_discard_fn(req->q, req);
 	} else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
 		req->cmd_flags |= REQ_HARDBARRIER;
 
@@ -1470,7 +1469,7 @@ static inline void __generic_make_request(struct bio *bio)
 			goto end_io;
 
 		if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
-		    !q->prepare_discard_fn) {
+		    !blk_queue_discard(q)) {
 			err = -EOPNOTSUPP;
 			goto end_io;
 		}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index eaf122ff5f16..d29498ef1eb5 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -33,23 +33,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
 }
 EXPORT_SYMBOL(blk_queue_prep_rq);
 
-/**
- * blk_queue_set_discard - set a discard_sectors function for queue
- * @q:		queue
- * @dfn:	prepare_discard function
- *
- * It's possible for a queue to register a discard callback which is used
- * to transform a discard request into the appropriate type for the
- * hardware. If none is registered, then discard requests are failed
- * with %EOPNOTSUPP.
- *
- */
-void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
-{
-	q->prepare_discard_fn = dfn;
-}
-EXPORT_SYMBOL(blk_queue_set_discard);
-
 /**
  * blk_queue_merge_bvec - set a merge_bvec function for queue
  * @q:		queue
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 0acbf4f5be50..8ca17a3e96ea 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -32,14 +32,6 @@ struct mtd_blkcore_priv {
 	spinlock_t queue_lock;
 };
 
-static int blktrans_discard_request(struct request_queue *q,
-				    struct request *req)
-{
-	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
-	req->cmd[0] = REQ_LB_OP_DISCARD;
-	return 0;
-}
-
 static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 			       struct mtd_blktrans_dev *dev,
 			       struct request *req)
@@ -52,10 +44,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 
 	buf = req->buffer;
 
-	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
-	    req->cmd[0] == REQ_LB_OP_DISCARD)
-		return tr->discard(dev, block, nsect);
-
 	if (!blk_fs_request(req))
 		return -EIO;
 
@@ -63,6 +51,9 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	    get_capacity(req->rq_disk))
 		return -EIO;
 
+	if (blk_discard_rq(req))
+		return tr->discard(dev, block, nsect);
+
 	switch(rq_data_dir(req)) {
 	case READ:
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
@@ -380,8 +371,8 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
 	tr->blkcore_priv->rq->queuedata = tr;
 	blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize);
 	if (tr->discard)
-		blk_queue_set_discard(tr->blkcore_priv->rq,
-				      blktrans_discard_request);
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+					tr->blkcore_priv->rq);
 
 	tr->blkshift = ffs(tr->blksize) - 1;
 
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index ac8577358ba0..5e8db0677582 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -102,7 +102,7 @@ static int dst_request(struct request_queue *q, struct bio *bio)
 	struct dst_node *n = q->queuedata;
 	int err = -EIO;
 
-	if (bio_empty_barrier(bio) && !q->prepare_discard_fn) {
+	if (bio_empty_barrier(bio) && !blk_queue_discard(q)) {
 		/*
 		 * This is a dirty^Wnice hack, but if we complete this
 		 * operation with -EOPNOTSUPP like intended, XFS
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e23a86cae5ac..f62d45e87618 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -82,7 +82,6 @@ enum rq_cmd_type_bits {
 enum {
 	REQ_LB_OP_EJECT	= 0x40,		/* eject request */
 	REQ_LB_OP_FLUSH = 0x41,		/* flush request */
-	REQ_LB_OP_DISCARD = 0x42,	/* discard sectors */
 };
 
 /*
@@ -261,7 +260,6 @@ typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unplug_fn) (struct request_queue *);
-typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -340,7 +338,6 @@ struct request_queue
 	make_request_fn		*make_request_fn;
 	prep_rq_fn		*prep_rq_fn;
 	unplug_fn		*unplug_fn;
-	prepare_discard_fn	*prepare_discard_fn;
 	merge_bvec_fn		*merge_bvec_fn;
 	prepare_flush_fn	*prepare_flush_fn;
 	softirq_done_fn		*softirq_done_fn;
@@ -460,6 +457,7 @@ struct request_queue
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
 #define QUEUE_FLAG_CQ	       16	/* hardware does queuing */
+#define QUEUE_FLAG_DISCARD     17	/* supports DISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
@@ -591,6 +589,7 @@ enum {
 #define blk_queue_flushing(q)	((q)->ordseq)
 #define blk_queue_stackable(q)	\
 	test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
+#define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 
 #define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
 #define blk_pc_request(rq)	((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
@@ -955,7 +954,6 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
-extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-- 
cgit v1.2.3


From 67efc9258010da35b27b3854d0880c7e193004ed Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 30 Sep 2009 13:54:20 +0200
Subject: block: allow large discard requests

Currently we set the bio size to the byte equivalent of the blocks to
be trimmed when submitting the initial DISCARD ioctl.  That means it
is subject to the max_hw_sectors limitation of the HBA which is
much lower than the size of a DISCARD request we can support.
Add a separate max_discard_sectors tunable to limit the size for discard
requests.

We limit the max discard request size in bytes to 32bit as that is the
limit for bio->bi_size.  This could be much larger if we had a way to pass
that information through the block layer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    | 10 ++++++----
 block/blk-core.c       |  3 ++-
 block/blk-settings.c   | 13 +++++++++++++
 include/linux/blkdev.h |  3 +++
 4 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 21f5025c3945..8873b9b439ff 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -385,6 +385,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
 	while (nr_sects && !ret) {
 		unsigned int sector_size = q->limits.logical_block_size;
+		unsigned int max_discard_sectors =
+			min(q->limits.max_discard_sectors, UINT_MAX >> 9);
 
 		bio = bio_alloc(gfp_mask, 1);
 		if (!bio)
@@ -411,10 +413,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		 * touch many more blocks on disk than the actual payload
 		 * length.
 		 */
-		if (nr_sects > queue_max_hw_sectors(q)) {
-			bio->bi_size = queue_max_hw_sectors(q) << 9;
-			nr_sects -= queue_max_hw_sectors(q);
-			sector += queue_max_hw_sectors(q);
+		if (nr_sects > max_discard_sectors) {
+			bio->bi_size = max_discard_sectors << 9;
+			nr_sects -= max_discard_sectors;
+			sector += max_discard_sectors;
 		} else {
 			bio->bi_size = nr_sects << 9;
 			nr_sects = 0;
diff --git a/block/blk-core.c b/block/blk-core.c
index 80a020dd1580..34504f309728 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1436,7 +1436,8 @@ static inline void __generic_make_request(struct bio *bio)
 			goto end_io;
 		}
 
-		if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
+		if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+			     nr_sectors > queue_max_hw_sectors(q))) {
 			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
 			       bdevname(bio->bi_bdev, b),
 			       bio_sectors(bio),
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d29498ef1eb5..e0695bca7027 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -96,6 +96,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->max_segment_size = MAX_SEGMENT_SIZE;
 	lim->max_sectors = BLK_DEF_MAX_SECTORS;
 	lim->max_hw_sectors = INT_MAX;
+	lim->max_discard_sectors = SAFE_MAX_SECTORS;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
@@ -238,6 +239,18 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
+/**
+ * blk_queue_max_discard_sectors - set max sectors for a single discard
+ * @q:  the request queue for the device
+ * @max_discard: maximum number of sectors to discard
+ **/
+void blk_queue_max_discard_sectors(struct request_queue *q,
+		unsigned int max_discard_sectors)
+{
+	q->limits.max_discard_sectors = max_discard_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_discard_sectors);
+
 /**
  * blk_queue_max_phys_segments - set max phys segments for a request for this queue
  * @q:  the request queue for the device
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f62d45e87618..1a03b715dfad 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -311,6 +311,7 @@ struct queue_limits {
 	unsigned int		alignment_offset;
 	unsigned int		io_min;
 	unsigned int		io_opt;
+	unsigned int		max_discard_sectors;
 
 	unsigned short		logical_block_size;
 	unsigned short		max_hw_segments;
@@ -928,6 +929,8 @@ extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
+extern void blk_queue_max_discard_sectors(struct request_queue *q,
+		unsigned int max_discard_sectors);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_alignment_offset(struct request_queue *q,
-- 
cgit v1.2.3


From b0da3f0dada78832c9da03ad2152ae76bd9a2496 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Thu, 1 Oct 2009 21:16:13 +0200
Subject: Add a tracepoint for block request remapping

Since 2.6.31 now has request-based device-mapper, it's useful to have
a tracepoint for request-remapping as well as bio-remapping.
This patch adds a tracepoint for request-remapping, trace_block_rq_remap().

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c             |  1 +
 include/linux/blktrace_api.h |  2 +-
 include/trace/events/block.h | 33 +++++++++++++++++++++++++++++++++
 kernel/trace/blktrace.c      | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 69 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 34504f309728..ddaaea4fdffc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
 #include "blk.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
 static int __make_request(struct request_queue *q, struct bio *bio);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 622939a23299..3b73b9992b26 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -212,7 +212,7 @@ extern struct attribute_group blk_trace_attr_group;
 # define blk_trace_startstop(q, start)			(-ENOTTY)
 # define blk_trace_remove(q)				(-ENOTTY)
 # define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
-# define blk_trace_remove_sysfs(struct device *dev)	do { } while (0)
+# define blk_trace_remove_sysfs(dev)			do { } while (0)
 static inline int blk_trace_init_sysfs(struct device *dev)
 {
 	return 0;
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index d86af94691c2..00405b5f624a 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -488,6 +488,39 @@ TRACE_EVENT(block_remap,
 		  (unsigned long long)__entry->old_sector)
 );
 
+TRACE_EVENT(block_rq_remap,
+
+	TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev,
+		 sector_t from),
+
+	TP_ARGS(q, rq, dev, from),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev		)
+		__field( sector_t,	sector		)
+		__field( unsigned int,	nr_sector	)
+		__field( dev_t,		old_dev		)
+		__field( sector_t,	old_sector	)
+		__array( char,		rwbs,	6	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= disk_devt(rq->rq_disk);
+		__entry->sector		= blk_rq_pos(rq);
+		__entry->nr_sector	= blk_rq_sectors(rq);
+		__entry->old_dev	= dev;
+		__entry->old_sector	= from;
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+	),
+
+	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector,
+		  MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+		  (unsigned long long)__entry->old_sector)
+);
+
 #endif /* _TRACE_BLOCK_H */
 
 /* This part must be outside protection */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 60b5c5a3d4b4..d9d6206e0b14 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -855,6 +855,37 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
 			sizeof(r), &r);
 }
 
+/**
+ * blk_add_trace_rq_remap - Add a trace for a request-remap operation
+ * @q:		queue the io is for
+ * @rq:		the source request
+ * @dev:	target device
+ * @from:	source sector
+ *
+ * Description:
+ *     Device mapper remaps request to other devices.
+ *     Add a trace for that action.
+ *
+ **/
+static void blk_add_trace_rq_remap(struct request_queue *q,
+				   struct request *rq, dev_t dev,
+				   sector_t from)
+{
+	struct blk_trace *bt = q->blk_trace;
+	struct blk_io_trace_remap r;
+
+	if (likely(!bt))
+		return;
+
+	r.device_from = cpu_to_be32(dev);
+	r.device_to   = cpu_to_be32(disk_devt(rq->rq_disk));
+	r.sector_from = cpu_to_be64(from);
+
+	__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
+			rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors,
+			sizeof(r), &r);
+}
+
 /**
  * blk_add_driver_data - Add binary message with driver-specific data
  * @q:		queue the io is for
@@ -922,10 +953,13 @@ static void blk_register_tracepoints(void)
 	WARN_ON(ret);
 	ret = register_trace_block_remap(blk_add_trace_remap);
 	WARN_ON(ret);
+	ret = register_trace_block_rq_remap(blk_add_trace_rq_remap);
+	WARN_ON(ret);
 }
 
 static void blk_unregister_tracepoints(void)
 {
+	unregister_trace_block_rq_remap(blk_add_trace_rq_remap);
 	unregister_trace_block_remap(blk_add_trace_remap);
 	unregister_trace_block_split(blk_add_trace_split);
 	unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
-- 
cgit v1.2.3


From 828c09509b9695271bcbdc53e9fc9a6a737148d2 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 1 Oct 2009 15:43:56 -0700
Subject: const: constify remaining file_operations

[akpm@linux-foundation.org: fix KVM]
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mach-ns9xxx/clock.c               |  2 +-
 arch/blackfin/mach-bf561/coreb.c           |  2 +-
 arch/cris/arch-v10/drivers/sync_serial.c   |  2 +-
 arch/cris/arch-v32/drivers/mach-fs/gpio.c  |  2 +-
 arch/powerpc/kvm/timing.c                  |  2 +-
 arch/powerpc/platforms/cell/spufs/file.c   |  2 +-
 arch/powerpc/platforms/pseries/dtl.c       |  2 +-
 arch/x86/xen/debugfs.c                     |  2 +-
 drivers/acpi/video.c                       |  2 +-
 drivers/block/cciss.c                      |  2 +-
 drivers/char/apm-emulation.c               |  2 +-
 drivers/char/bfin-otp.c                    |  2 +-
 drivers/char/xilinx_hwicap/xilinx_hwicap.c |  2 +-
 drivers/gpio/gpiolib.c                     |  2 +-
 drivers/hwmon/fschmd.c                     |  2 +-
 drivers/lguest/lguest_user.c               |  2 +-
 drivers/media/dvb/dvb-core/dmxdev.c        |  2 +-
 drivers/media/dvb/firewire/firedtv-ci.c    |  2 +-
 drivers/misc/phantom.c                     |  2 +-
 drivers/misc/sgi-gru/grufile.c             |  3 +--
 drivers/mmc/core/debugfs.c                 |  2 +-
 drivers/s390/cio/qdio_debug.c              |  2 +-
 drivers/s390/cio/qdio_perf.c               |  2 +-
 drivers/scsi/sg.c                          | 43 +++++++++++++++++++++---------
 drivers/spi/spidev.c                       |  2 +-
 drivers/usb/class/usbtmc.c                 |  2 +-
 drivers/usb/gadget/printer.c               |  2 +-
 drivers/usb/host/whci/debug.c              |  6 ++---
 drivers/usb/misc/rio500.c                  |  3 +--
 drivers/uwb/uwb-debug.c                    |  6 ++---
 fs/btrfs/ctree.h                           |  2 +-
 fs/btrfs/file.c                            |  2 +-
 fs/btrfs/inode.c                           |  4 +--
 fs/jbd2/journal.c                          |  2 +-
 fs/nfsd/nfsctl.c                           |  2 +-
 fs/nilfs2/dir.c                            |  2 +-
 fs/nilfs2/file.c                           |  2 +-
 fs/nilfs2/mdt.c                            |  2 +-
 fs/nilfs2/nilfs.h                          |  4 +--
 fs/ocfs2/cluster/heartbeat.c               |  2 +-
 fs/ocfs2/cluster/netdebug.c                |  4 +--
 fs/ocfs2/dlm/dlmdebug.c                    |  8 +++---
 fs/ocfs2/super.c                           |  2 +-
 fs/omfs/dir.c                              |  2 +-
 fs/omfs/file.c                             |  2 +-
 fs/omfs/omfs.h                             |  4 +--
 include/linux/cgroup.h                     |  2 +-
 include/linux/fs.h                         |  2 +-
 kernel/cgroup.c                            | 10 +++----
 kernel/kprobes.c                           |  4 +--
 kernel/rcutree_trace.c                     | 10 +++----
 kernel/sched.c                             |  2 +-
 kernel/time/timer_list.c                   |  2 +-
 kernel/time/timer_stats.c                  |  2 +-
 samples/tracepoints/tracepoint-sample.c    |  2 +-
 security/integrity/ima/ima_fs.c            | 10 +++----
 virt/kvm/kvm_main.c                        |  2 +-
 57 files changed, 110 insertions(+), 95 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-ns9xxx/clock.c b/arch/arm/mach-ns9xxx/clock.c
index 44ed20d4a388..cf81cbc57544 100644
--- a/arch/arm/mach-ns9xxx/clock.c
+++ b/arch/arm/mach-ns9xxx/clock.c
@@ -195,7 +195,7 @@ static int clk_debugfs_open(struct inode *inode, struct file *file)
 	return single_open(file, clk_debugfs_show, NULL);
 }
 
-static struct file_operations clk_debugfs_operations = {
+static const struct file_operations clk_debugfs_operations = {
 	.open = clk_debugfs_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
diff --git a/arch/blackfin/mach-bf561/coreb.c b/arch/blackfin/mach-bf561/coreb.c
index 93635a766f9c..1e60a92dd602 100644
--- a/arch/blackfin/mach-bf561/coreb.c
+++ b/arch/blackfin/mach-bf561/coreb.c
@@ -48,7 +48,7 @@ coreb_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned l
 	return ret;
 }
 
-static struct file_operations coreb_fops = {
+static const struct file_operations coreb_fops = {
 	.owner   = THIS_MODULE,
 	.ioctl   = coreb_ioctl,
 };
diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c
index 6cc1a0319a5d..562b9a7feae7 100644
--- a/arch/cris/arch-v10/drivers/sync_serial.c
+++ b/arch/cris/arch-v10/drivers/sync_serial.c
@@ -244,7 +244,7 @@ static unsigned sync_serial_prescale_shadow;
 
 #define NUMBER_OF_PORTS 2
 
-static struct file_operations sync_serial_fops = {
+static const struct file_operations sync_serial_fops = {
 	.owner   = THIS_MODULE,
 	.write   = sync_serial_write,
 	.read    = sync_serial_read,
diff --git a/arch/cris/arch-v32/drivers/mach-fs/gpio.c b/arch/cris/arch-v32/drivers/mach-fs/gpio.c
index fe1fde893887..d89ab80498ed 100644
--- a/arch/cris/arch-v32/drivers/mach-fs/gpio.c
+++ b/arch/cris/arch-v32/drivers/mach-fs/gpio.c
@@ -855,7 +855,7 @@ gpio_leds_ioctl(unsigned int cmd, unsigned long arg)
 	return 0;
 }
 
-struct file_operations gpio_fops = {
+static const struct file_operations gpio_fops = {
 	.owner       = THIS_MODULE,
 	.poll        = gpio_poll,
 	.ioctl       = gpio_ioctl,
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index 47ee603f558e..2aa371e30079 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -201,7 +201,7 @@ static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
 	return single_open(file, kvmppc_exit_timing_show, inode->i_private);
 }
 
-static struct file_operations kvmppc_exit_timing_fops = {
+static const struct file_operations kvmppc_exit_timing_fops = {
 	.owner   = THIS_MODULE,
 	.open    = kvmppc_exit_timing_open,
 	.read    = seq_read,
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 961309446170..884e8bcec499 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -147,7 +147,7 @@ static int __fops ## _open(struct inode *inode, struct file *file)	\
 	__simple_attr_check_format(__fmt, 0ull);			\
 	return spufs_attr_open(inode, file, __get, __set, __fmt);	\
 }									\
-static struct file_operations __fops = {				\
+static const struct file_operations __fops = {				\
 	.owner	 = THIS_MODULE,						\
 	.open	 = __fops ## _open,					\
 	.release = spufs_attr_release,					\
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index ab69925d579b..937a544a236d 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -209,7 +209,7 @@ static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len,
 	return n_read * sizeof(struct dtl_entry);
 }
 
-static struct file_operations dtl_fops = {
+static const struct file_operations dtl_fops = {
 	.open		= dtl_file_open,
 	.release	= dtl_file_release,
 	.read		= dtl_file_read,
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index b53225d2cac3..e133ce25e290 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -100,7 +100,7 @@ static int xen_array_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static struct file_operations u32_array_fops = {
+static const struct file_operations u32_array_fops = {
 	.owner	= THIS_MODULE,
 	.open	= u32_array_open,
 	.release= xen_array_release,
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index a4fddb24476f..f6e54bf8dd96 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -285,7 +285,7 @@ static int acpi_video_device_brightness_open_fs(struct inode *inode,
 						struct file *file);
 static ssize_t acpi_video_device_write_brightness(struct file *file,
 	const char __user *buffer, size_t count, loff_t *data);
-static struct file_operations acpi_video_device_brightness_fops = {
+static const struct file_operations acpi_video_device_brightness_fops = {
 	.owner = THIS_MODULE,
 	.open = acpi_video_device_brightness_open_fs,
 	.read = seq_read,
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 24c3e21ab263..1ece0b47b581 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -426,7 +426,7 @@ out:
 	return err;
 }
 
-static struct file_operations cciss_proc_fops = {
+static const struct file_operations cciss_proc_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = cciss_seq_open,
 	.read    = seq_read,
diff --git a/drivers/char/apm-emulation.c b/drivers/char/apm-emulation.c
index aaca40283be9..4f568cb9af3f 100644
--- a/drivers/char/apm-emulation.c
+++ b/drivers/char/apm-emulation.c
@@ -393,7 +393,7 @@ static int apm_open(struct inode * inode, struct file * filp)
 	return as ? 0 : -ENOMEM;
 }
 
-static struct file_operations apm_bios_fops = {
+static const struct file_operations apm_bios_fops = {
 	.owner		= THIS_MODULE,
 	.read		= apm_read,
 	.poll		= apm_poll,
diff --git a/drivers/char/bfin-otp.c b/drivers/char/bfin-otp.c
index e3dd24bff514..836d4f0a876f 100644
--- a/drivers/char/bfin-otp.c
+++ b/drivers/char/bfin-otp.c
@@ -217,7 +217,7 @@ static long bfin_otp_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
 # define bfin_otp_ioctl NULL
 #endif
 
-static struct file_operations bfin_otp_fops = {
+static const struct file_operations bfin_otp_fops = {
 	.owner          = THIS_MODULE,
 	.unlocked_ioctl = bfin_otp_ioctl,
 	.read           = bfin_otp_read,
diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
index f40ab699860f..4846d50199f3 100644
--- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c
+++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
@@ -559,7 +559,7 @@ static int hwicap_release(struct inode *inode, struct file *file)
 	return status;
 }
 
-static struct file_operations hwicap_fops = {
+static const struct file_operations hwicap_fops = {
 	.owner = THIS_MODULE,
 	.write = hwicap_write,
 	.read = hwicap_read,
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index bb11a429394a..662ed923d9eb 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1487,7 +1487,7 @@ static int gpiolib_open(struct inode *inode, struct file *file)
 	return single_open(file, gpiolib_show, NULL);
 }
 
-static struct file_operations gpiolib_operations = {
+static const struct file_operations gpiolib_operations = {
 	.open		= gpiolib_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/drivers/hwmon/fschmd.c b/drivers/hwmon/fschmd.c
index ea955edde87e..2a7a85a6dc36 100644
--- a/drivers/hwmon/fschmd.c
+++ b/drivers/hwmon/fschmd.c
@@ -915,7 +915,7 @@ static int watchdog_ioctl(struct inode *inode, struct file *filp,
 	return ret;
 }
 
-static struct file_operations watchdog_fops = {
+static const struct file_operations watchdog_fops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.open = watchdog_open,
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index b4d3f7ca554f..bd1632388e4a 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -508,7 +508,7 @@ static int close(struct inode *inode, struct file *file)
  * uses: reading and writing a character device called /dev/lguest.  All the
  * work happens in the read(), write() and close() routines:
  */
-static struct file_operations lguest_fops = {
+static const struct file_operations lguest_fops = {
 	.owner	 = THIS_MODULE,
 	.release = close,
 	.write	 = write,
diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c
index 3750ff48cba1..516414983593 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.c
+++ b/drivers/media/dvb/dvb-core/dmxdev.c
@@ -1203,7 +1203,7 @@ static unsigned int dvb_dvr_poll(struct file *file, poll_table *wait)
 	return mask;
 }
 
-static struct file_operations dvb_dvr_fops = {
+static const struct file_operations dvb_dvr_fops = {
 	.owner = THIS_MODULE,
 	.read = dvb_dvr_read,
 	.write = dvb_dvr_write,
diff --git a/drivers/media/dvb/firewire/firedtv-ci.c b/drivers/media/dvb/firewire/firedtv-ci.c
index eeb80d0ea3ff..853e04b7cb36 100644
--- a/drivers/media/dvb/firewire/firedtv-ci.c
+++ b/drivers/media/dvb/firewire/firedtv-ci.c
@@ -215,7 +215,7 @@ static unsigned int fdtv_ca_io_poll(struct file *file, poll_table *wait)
 	return POLLIN;
 }
 
-static struct file_operations fdtv_ca_fops = {
+static const struct file_operations fdtv_ca_fops = {
 	.owner		= THIS_MODULE,
 	.ioctl		= dvb_generic_ioctl,
 	.open		= dvb_generic_open,
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index fa57b67593ae..90a95ce8dc34 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -271,7 +271,7 @@ static unsigned int phantom_poll(struct file *file, poll_table *wait)
 	return mask;
 }
 
-static struct file_operations phantom_file_ops = {
+static const struct file_operations phantom_file_ops = {
 	.open = phantom_open,
 	.release = phantom_release,
 	.unlocked_ioctl = phantom_ioctl,
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 300e7ba391a0..41c8fe2a928c 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -53,7 +53,6 @@ struct gru_stats_s gru_stats;
 /* Guaranteed user available resources on each node */
 static int max_user_cbrs, max_user_dsr_bytes;
 
-static struct file_operations gru_fops;
 static struct miscdevice gru_miscdev;
 
 
@@ -426,7 +425,7 @@ static void __exit gru_exit(void)
 	gru_proc_exit();
 }
 
-static struct file_operations gru_fops = {
+static const struct file_operations gru_fops = {
 	.owner		= THIS_MODULE,
 	.unlocked_ioctl	= gru_file_unlocked_ioctl,
 	.mmap		= gru_file_mmap,
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 610dbd1fcc82..96d10f40fb23 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -240,7 +240,7 @@ static int mmc_ext_csd_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static struct file_operations mmc_dbg_ext_csd_fops = {
+static const struct file_operations mmc_dbg_ext_csd_fops = {
 	.open		= mmc_ext_csd_open,
 	.read		= mmc_ext_csd_read,
 	.release	= mmc_ext_csd_release,
diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c
index 1b78f639ead3..76769978285f 100644
--- a/drivers/s390/cio/qdio_debug.c
+++ b/drivers/s390/cio/qdio_debug.c
@@ -125,7 +125,7 @@ static int qstat_seq_open(struct inode *inode, struct file *filp)
 			   filp->f_path.dentry->d_inode->i_private);
 }
 
-static struct file_operations debugfs_fops = {
+static const struct file_operations debugfs_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = qstat_seq_open,
 	.read	 = seq_read,
diff --git a/drivers/s390/cio/qdio_perf.c b/drivers/s390/cio/qdio_perf.c
index eff943923c6f..968e3c7c2632 100644
--- a/drivers/s390/cio/qdio_perf.c
+++ b/drivers/s390/cio/qdio_perf.c
@@ -84,7 +84,7 @@ static int qdio_perf_seq_open(struct inode *inode, struct file *filp)
 	return single_open(filp, qdio_perf_proc_show, NULL);
 }
 
-static struct file_operations qdio_perf_proc_fops = {
+static const struct file_operations qdio_perf_proc_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = qdio_perf_seq_open,
 	.read	 = seq_read,
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 0cb049f5cc56..747a5e5c1276 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1317,7 +1317,7 @@ static void sg_rq_end_io(struct request *rq, int uptodate)
 	}
 }
 
-static struct file_operations sg_fops = {
+static const struct file_operations sg_fops = {
 	.owner = THIS_MODULE,
 	.read = sg_read,
 	.write = sg_write,
@@ -2194,9 +2194,11 @@ static int sg_proc_seq_show_int(struct seq_file *s, void *v);
 static int sg_proc_single_open_adio(struct inode *inode, struct file *file);
 static ssize_t sg_proc_write_adio(struct file *filp, const char __user *buffer,
 			          size_t count, loff_t *off);
-static struct file_operations adio_fops = {
-	/* .owner, .read and .llseek added in sg_proc_init() */
+static const struct file_operations adio_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_single_open_adio,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.write = sg_proc_write_adio,
 	.release = single_release,
 };
@@ -2204,23 +2206,32 @@ static struct file_operations adio_fops = {
 static int sg_proc_single_open_dressz(struct inode *inode, struct file *file);
 static ssize_t sg_proc_write_dressz(struct file *filp, 
 		const char __user *buffer, size_t count, loff_t *off);
-static struct file_operations dressz_fops = {
+static const struct file_operations dressz_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_single_open_dressz,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.write = sg_proc_write_dressz,
 	.release = single_release,
 };
 
 static int sg_proc_seq_show_version(struct seq_file *s, void *v);
 static int sg_proc_single_open_version(struct inode *inode, struct file *file);
-static struct file_operations version_fops = {
+static const struct file_operations version_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_single_open_version,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.release = single_release,
 };
 
 static int sg_proc_seq_show_devhdr(struct seq_file *s, void *v);
 static int sg_proc_single_open_devhdr(struct inode *inode, struct file *file);
-static struct file_operations devhdr_fops = {
+static const struct file_operations devhdr_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_single_open_devhdr,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.release = single_release,
 };
 
@@ -2229,8 +2240,11 @@ static int sg_proc_open_dev(struct inode *inode, struct file *file);
 static void * dev_seq_start(struct seq_file *s, loff_t *pos);
 static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos);
 static void dev_seq_stop(struct seq_file *s, void *v);
-static struct file_operations dev_fops = {
+static const struct file_operations dev_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_open_dev,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.release = seq_release,
 };
 static const struct seq_operations dev_seq_ops = {
@@ -2242,8 +2256,11 @@ static const struct seq_operations dev_seq_ops = {
 
 static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v);
 static int sg_proc_open_devstrs(struct inode *inode, struct file *file);
-static struct file_operations devstrs_fops = {
+static const struct file_operations devstrs_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_open_devstrs,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.release = seq_release,
 };
 static const struct seq_operations devstrs_seq_ops = {
@@ -2255,8 +2272,11 @@ static const struct seq_operations devstrs_seq_ops = {
 
 static int sg_proc_seq_show_debug(struct seq_file *s, void *v);
 static int sg_proc_open_debug(struct inode *inode, struct file *file);
-static struct file_operations debug_fops = {
+static const struct file_operations debug_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_open_debug,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.release = seq_release,
 };
 static const struct seq_operations debug_seq_ops = {
@@ -2269,7 +2289,7 @@ static const struct seq_operations debug_seq_ops = {
 
 struct sg_proc_leaf {
 	const char * name;
-	struct file_operations * fops;
+	const struct file_operations * fops;
 };
 
 static struct sg_proc_leaf sg_proc_leaf_arr[] = {
@@ -2295,9 +2315,6 @@ sg_proc_init(void)
 	for (k = 0; k < num_leaves; ++k) {
 		leaf = &sg_proc_leaf_arr[k];
 		mask = leaf->fops->write ? S_IRUGO | S_IWUSR : S_IRUGO;
-		leaf->fops->owner = THIS_MODULE;
-		leaf->fops->read = seq_read;
-		leaf->fops->llseek = seq_lseek;
 		proc_create(leaf->name, mask, sg_proc_sgp, leaf->fops);
 	}
 	return 0;
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index f921bd1109e1..5d23983f02fc 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -537,7 +537,7 @@ static int spidev_release(struct inode *inode, struct file *filp)
 	return status;
 }
 
-static struct file_operations spidev_fops = {
+static const struct file_operations spidev_fops = {
 	.owner =	THIS_MODULE,
 	/* REVISIT switch to aio primitives, so that userspace
 	 * gets more complete API coverage.  It'll simplify things
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 333ee02e7b2b..864f0ba6a344 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -993,7 +993,7 @@ skip_io_on_zombie:
 	return retval;
 }
 
-static struct file_operations fops = {
+static const struct file_operations fops = {
 	.owner		= THIS_MODULE,
 	.read		= usbtmc_read,
 	.write		= usbtmc_write,
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c
index 29500154d00c..2d867fd22413 100644
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -875,7 +875,7 @@ printer_ioctl(struct file *fd, unsigned int code, unsigned long arg)
 }
 
 /* used after endpoint configuration */
-static struct file_operations printer_io_operations = {
+static const struct file_operations printer_io_operations = {
 	.owner =	THIS_MODULE,
 	.open =		printer_open,
 	.read =		printer_read,
diff --git a/drivers/usb/host/whci/debug.c b/drivers/usb/host/whci/debug.c
index cf2d45946c57..2273c815941f 100644
--- a/drivers/usb/host/whci/debug.c
+++ b/drivers/usb/host/whci/debug.c
@@ -134,7 +134,7 @@ static int pzl_open(struct inode *inode, struct file *file)
 	return single_open(file, pzl_print, inode->i_private);
 }
 
-static struct file_operations di_fops = {
+static const struct file_operations di_fops = {
 	.open    = di_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
@@ -142,7 +142,7 @@ static struct file_operations di_fops = {
 	.owner   = THIS_MODULE,
 };
 
-static struct file_operations asl_fops = {
+static const struct file_operations asl_fops = {
 	.open    = asl_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
@@ -150,7 +150,7 @@ static struct file_operations asl_fops = {
 	.owner   = THIS_MODULE,
 };
 
-static struct file_operations pzl_fops = {
+static const struct file_operations pzl_fops = {
 	.open    = pzl_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c
index d645f3899fe1..32d0199d0c32 100644
--- a/drivers/usb/misc/rio500.c
+++ b/drivers/usb/misc/rio500.c
@@ -429,8 +429,7 @@ read_rio(struct file *file, char __user *buffer, size_t count, loff_t * ppos)
 	return read_count;
 }
 
-static struct
-file_operations usb_rio_fops = {
+static const struct file_operations usb_rio_fops = {
 	.owner =	THIS_MODULE,
 	.read =		read_rio,
 	.write =	write_rio,
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 4a42993700c1..2eecec0c13c9 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -205,7 +205,7 @@ static ssize_t command_write(struct file *file, const char __user *buf,
 	return ret < 0 ? ret : len;
 }
 
-static struct file_operations command_fops = {
+static const struct file_operations command_fops = {
 	.open   = command_open,
 	.write  = command_write,
 	.read   = NULL,
@@ -255,7 +255,7 @@ static int reservations_open(struct inode *inode, struct file *file)
 	return single_open(file, reservations_print, inode->i_private);
 }
 
-static struct file_operations reservations_fops = {
+static const struct file_operations reservations_fops = {
 	.open    = reservations_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
@@ -283,7 +283,7 @@ static int drp_avail_open(struct inode *inode, struct file *file)
 	return single_open(file, drp_avail_print, inode->i_private);
 }
 
-static struct file_operations drp_avail_fops = {
+static const struct file_operations drp_avail_fops = {
 	.open    = drp_avail_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80599b4e42bd..4484eb3408af 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2326,7 +2326,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
 int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 			    int skip_pinned);
 int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
-extern struct file_operations btrfs_file_operations;
+extern const struct file_operations btrfs_file_operations;
 int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct inode *inode,
 		       u64 start, u64 end, u64 locked_end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a3492a3ad96b..9ed17dbe5c6e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1196,7 +1196,7 @@ static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
 	return 0;
 }
 
-struct file_operations btrfs_file_operations = {
+const struct file_operations btrfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.aio_read       = generic_file_aio_read,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e9b76bcd1c12..b9fe06d751c0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -62,7 +62,7 @@ static const struct inode_operations btrfs_special_inode_operations;
 static const struct inode_operations btrfs_file_inode_operations;
 static const struct address_space_operations btrfs_aops;
 static const struct address_space_operations btrfs_symlink_aops;
-static struct file_operations btrfs_dir_file_operations;
+static const struct file_operations btrfs_dir_file_operations;
 static struct extent_io_ops btrfs_extent_io_ops;
 
 static struct kmem_cache *btrfs_inode_cachep;
@@ -5544,7 +5544,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
 	.permission	= btrfs_permission,
 };
 
-static struct file_operations btrfs_dir_file_operations = {
+static const struct file_operations btrfs_dir_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= btrfs_real_readdir,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 761af77491f5..b0ab5219becb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -770,7 +770,7 @@ static int jbd2_seq_info_release(struct inode *inode, struct file *file)
 	return seq_release(inode, file);
 }
 
-static struct file_operations jbd2_seq_info_fops = {
+static const struct file_operations jbd2_seq_info_fops = {
 	.owner		= THIS_MODULE,
 	.open           = jbd2_seq_info_open,
 	.read           = seq_read,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 00388d2a3c99..5c01fc148ce8 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -176,7 +176,7 @@ static const struct file_operations exports_operations = {
 extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
 extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
 
-static struct file_operations pool_stats_operations = {
+static const struct file_operations pool_stats_operations = {
 	.open		= nfsd_pool_stats_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 1a4fa04cf071..e097099bfc8f 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -697,7 +697,7 @@ not_empty:
 	return 0;
 }
 
-struct file_operations nilfs_dir_operations = {
+const struct file_operations nilfs_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= nilfs_readdir,
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 7d7b4983dee3..30292df443ce 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -134,7 +134,7 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
  * We have mostly NULL's here: the current defaults are ok for
  * the nilfs filesystem.
  */
-struct file_operations nilfs_file_operations = {
+const struct file_operations nilfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index b18c4998f8d0..f6326112d647 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -433,7 +433,7 @@ static const struct address_space_operations def_mdt_aops = {
 };
 
 static const struct inode_operations def_mdt_iops;
-static struct file_operations def_mdt_fops;
+static const struct file_operations def_mdt_fops;
 
 /*
  * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile,
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index bad7368782d0..4da6f67e9a91 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -294,9 +294,9 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *);
 /*
  * Inodes and files operations
  */
-extern struct file_operations nilfs_dir_operations;
+extern const struct file_operations nilfs_dir_operations;
 extern const struct inode_operations nilfs_file_inode_operations;
-extern struct file_operations nilfs_file_operations;
+extern const struct file_operations nilfs_file_operations;
 extern const struct address_space_operations nilfs_aops;
 extern const struct inode_operations nilfs_dir_inode_operations;
 extern const struct inode_operations nilfs_special_inode_operations;
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 09cc25d04611..c452d116b892 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -966,7 +966,7 @@ static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
 }
 #endif  /* CONFIG_DEBUG_FS */
 
-static struct file_operations o2hb_debug_fops = {
+static const struct file_operations o2hb_debug_fops = {
 	.open =		o2hb_debug_open,
 	.release =	o2hb_debug_release,
 	.read =		o2hb_debug_read,
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index cfb2be708abe..da794bc07a6c 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -207,7 +207,7 @@ static int nst_fop_release(struct inode *inode, struct file *file)
 	return seq_release_private(inode, file);
 }
 
-static struct file_operations nst_seq_fops = {
+static const struct file_operations nst_seq_fops = {
 	.open = nst_fop_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
@@ -388,7 +388,7 @@ static int sc_fop_release(struct inode *inode, struct file *file)
 	return seq_release_private(inode, file);
 }
 
-static struct file_operations sc_seq_fops = {
+static const struct file_operations sc_seq_fops = {
 	.open = sc_fop_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index ca46002ec10e..42b0bad7a612 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -478,7 +478,7 @@ bail:
 	return -ENOMEM;
 }
 
-static struct file_operations debug_purgelist_fops = {
+static const struct file_operations debug_purgelist_fops = {
 	.open =		debug_purgelist_open,
 	.release =	debug_buffer_release,
 	.read =		debug_buffer_read,
@@ -538,7 +538,7 @@ bail:
 	return -ENOMEM;
 }
 
-static struct file_operations debug_mle_fops = {
+static const struct file_operations debug_mle_fops = {
 	.open =		debug_mle_open,
 	.release =	debug_buffer_release,
 	.read =		debug_buffer_read,
@@ -741,7 +741,7 @@ static int debug_lockres_release(struct inode *inode, struct file *file)
 	return seq_release_private(inode, file);
 }
 
-static struct file_operations debug_lockres_fops = {
+static const struct file_operations debug_lockres_fops = {
 	.open =		debug_lockres_open,
 	.release =	debug_lockres_release,
 	.read =		seq_read,
@@ -925,7 +925,7 @@ bail:
 	return -ENOMEM;
 }
 
-static struct file_operations debug_state_fops = {
+static const struct file_operations debug_state_fops = {
 	.open =		debug_state_open,
 	.release =	debug_buffer_release,
 	.read =		debug_buffer_read,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 4cc3c890a2cd..c0e48aeebb1c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -373,7 +373,7 @@ static ssize_t ocfs2_debug_read(struct file *file, char __user *buf,
 }
 #endif	/* CONFIG_DEBUG_FS */
 
-static struct file_operations ocfs2_osb_debug_fops = {
+static const struct file_operations ocfs2_osb_debug_fops = {
 	.open =		ocfs2_osb_debug_open,
 	.release =	ocfs2_debug_release,
 	.read =		ocfs2_debug_read,
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 3680bae335b5..b42d62419034 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -498,7 +498,7 @@ const struct inode_operations omfs_dir_inops = {
 	.rmdir = omfs_rmdir,
 };
 
-struct file_operations omfs_dir_operations = {
+const struct file_operations omfs_dir_operations = {
 	.read = generic_read_dir,
 	.readdir = omfs_readdir,
 	.llseek = generic_file_llseek,
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 4845fbb18e6e..399487c09364 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -322,7 +322,7 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping, block, omfs_get_block);
 }
 
-struct file_operations omfs_file_operations = {
+const struct file_operations omfs_file_operations = {
 	.llseek = generic_file_llseek,
 	.read = do_sync_read,
 	.write = do_sync_write,
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index df71039945ac..ebe2fdbe535e 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -44,14 +44,14 @@ extern int omfs_allocate_range(struct super_block *sb, int min_request,
 extern int omfs_clear_range(struct super_block *sb, u64 block, int count);
 
 /* dir.c */
-extern struct file_operations omfs_dir_operations;
+extern const struct file_operations omfs_dir_operations;
 extern const struct inode_operations omfs_dir_inops;
 extern int omfs_make_empty(struct inode *inode, struct super_block *sb);
 extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
 			u64 fsblock);
 
 /* file.c */
-extern struct file_operations omfs_file_operations;
+extern const struct file_operations omfs_file_operations;
 extern const struct inode_operations omfs_file_inops;
 extern const struct address_space_operations omfs_aops;
 extern void omfs_make_empty_table(struct buffer_head *bh, int offset);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b62bb9294d0c..0008dee66514 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -37,7 +37,7 @@ extern void cgroup_exit(struct task_struct *p, int run_callbacks);
 extern int cgroupstats_build(struct cgroupstats *stats,
 				struct dentry *dentry);
 
-extern struct file_operations proc_cgroup_operations;
+extern const struct file_operations proc_cgroup_operations;
 
 /* Define the enumeration of all cgroup subsystems */
 #define SUBSYS(_x) _x ## _subsys_id,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2adaa2529f18..a1e6899d4b6c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2446,7 +2446,7 @@ static int __fops ## _open(struct inode *inode, struct file *file)	\
 	__simple_attr_check_format(__fmt, 0ull);			\
 	return simple_attr_open(inode, file, __get, __set, __fmt);	\
 }									\
-static struct file_operations __fops = {				\
+static const struct file_operations __fops = {				\
 	.owner	 = THIS_MODULE,						\
 	.open	 = __fops ## _open,					\
 	.release = simple_attr_release,					\
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7ccba4bc5e3b..d2b88596efde 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -703,7 +703,7 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
 static int cgroup_populate_dir(struct cgroup *cgrp);
 static const struct inode_operations cgroup_dir_inode_operations;
-static struct file_operations proc_cgroupstats_operations;
+static const struct file_operations proc_cgroupstats_operations;
 
 static struct backing_dev_info cgroup_backing_dev_info = {
 	.name		= "cgroup",
@@ -1863,7 +1863,7 @@ static int cgroup_seqfile_release(struct inode *inode, struct file *file)
 	return single_release(inode, file);
 }
 
-static struct file_operations cgroup_seqfile_operations = {
+static const struct file_operations cgroup_seqfile_operations = {
 	.read = seq_read,
 	.write = cgroup_file_write,
 	.llseek = seq_lseek,
@@ -1922,7 +1922,7 @@ static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
 }
 
-static struct file_operations cgroup_file_operations = {
+static const struct file_operations cgroup_file_operations = {
 	.read = cgroup_file_read,
 	.write = cgroup_file_write,
 	.llseek = generic_file_llseek,
@@ -3369,7 +3369,7 @@ static int cgroup_open(struct inode *inode, struct file *file)
 	return single_open(file, proc_cgroup_show, pid);
 }
 
-struct file_operations proc_cgroup_operations = {
+const struct file_operations proc_cgroup_operations = {
 	.open		= cgroup_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -3398,7 +3398,7 @@ static int cgroupstats_open(struct inode *inode, struct file *file)
 	return single_open(file, proc_cgroupstats_show, NULL);
 }
 
-static struct file_operations proc_cgroupstats_operations = {
+static const struct file_operations proc_cgroupstats_operations = {
 	.open = cgroupstats_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index cfadc1291d0b..5240d75f4c60 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1333,7 +1333,7 @@ static int __kprobes kprobes_open(struct inode *inode, struct file *filp)
 	return seq_open(filp, &kprobes_seq_ops);
 }
 
-static struct file_operations debugfs_kprobes_operations = {
+static const struct file_operations debugfs_kprobes_operations = {
 	.open           = kprobes_open,
 	.read           = seq_read,
 	.llseek         = seq_lseek,
@@ -1515,7 +1515,7 @@ static ssize_t write_enabled_file_bool(struct file *file,
 	return count;
 }
 
-static struct file_operations fops_kp = {
+static const struct file_operations fops_kp = {
 	.read =         read_enabled_file_bool,
 	.write =        write_enabled_file_bool,
 };
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index c89f5e9fd173..179e6ad80dc0 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -93,7 +93,7 @@ static int rcudata_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcudata, NULL);
 }
 
-static struct file_operations rcudata_fops = {
+static const struct file_operations rcudata_fops = {
 	.owner = THIS_MODULE,
 	.open = rcudata_open,
 	.read = seq_read,
@@ -145,7 +145,7 @@ static int rcudata_csv_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcudata_csv, NULL);
 }
 
-static struct file_operations rcudata_csv_fops = {
+static const struct file_operations rcudata_csv_fops = {
 	.owner = THIS_MODULE,
 	.open = rcudata_csv_open,
 	.read = seq_read,
@@ -196,7 +196,7 @@ static int rcuhier_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcuhier, NULL);
 }
 
-static struct file_operations rcuhier_fops = {
+static const struct file_operations rcuhier_fops = {
 	.owner = THIS_MODULE,
 	.open = rcuhier_open,
 	.read = seq_read,
@@ -222,7 +222,7 @@ static int rcugp_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcugp, NULL);
 }
 
-static struct file_operations rcugp_fops = {
+static const struct file_operations rcugp_fops = {
 	.owner = THIS_MODULE,
 	.open = rcugp_open,
 	.read = seq_read,
@@ -276,7 +276,7 @@ static int rcu_pending_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcu_pending, NULL);
 }
 
-static struct file_operations rcu_pending_fops = {
+static const struct file_operations rcu_pending_fops = {
 	.owner = THIS_MODULE,
 	.open = rcu_pending_open,
 	.read = seq_read,
diff --git a/kernel/sched.c b/kernel/sched.c
index ee61f454a98b..1535f3884b88 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -780,7 +780,7 @@ static int sched_feat_open(struct inode *inode, struct file *filp)
 	return single_open(filp, sched_feat_show, NULL);
 }
 
-static struct file_operations sched_feat_fops = {
+static const struct file_operations sched_feat_fops = {
 	.open		= sched_feat_open,
 	.write		= sched_feat_write,
 	.read		= seq_read,
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index fddd69d16e03..1b5b7aa2fdfd 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -275,7 +275,7 @@ static int timer_list_open(struct inode *inode, struct file *filp)
 	return single_open(filp, timer_list_show, NULL);
 }
 
-static struct file_operations timer_list_fops = {
+static const struct file_operations timer_list_fops = {
 	.open		= timer_list_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 4cde8b9c716f..ee5681f8d7ec 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -395,7 +395,7 @@ static int tstats_open(struct inode *inode, struct file *filp)
 	return single_open(filp, tstats_show, NULL);
 }
 
-static struct file_operations tstats_fops = {
+static const struct file_operations tstats_fops = {
 	.open		= tstats_open,
 	.read		= seq_read,
 	.write		= tstats_write,
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
index 9cf80a11e8b6..26fab33ffa8c 100644
--- a/samples/tracepoints/tracepoint-sample.c
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -28,7 +28,7 @@ static int my_open(struct inode *inode, struct file *file)
 	return -EPERM;
 }
 
-static struct file_operations mark_ops = {
+static const struct file_operations mark_ops = {
 	.open = my_open,
 };
 
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 8e9777b76405..0c72c9c38956 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -43,7 +43,7 @@ static ssize_t ima_show_htable_violations(struct file *filp,
 	return ima_show_htable_value(buf, count, ppos, &ima_htable.violations);
 }
 
-static struct file_operations ima_htable_violations_ops = {
+static const struct file_operations ima_htable_violations_ops = {
 	.read = ima_show_htable_violations
 };
 
@@ -55,7 +55,7 @@ static ssize_t ima_show_measurements_count(struct file *filp,
 
 }
 
-static struct file_operations ima_measurements_count_ops = {
+static const struct file_operations ima_measurements_count_ops = {
 	.read = ima_show_measurements_count
 };
 
@@ -158,7 +158,7 @@ static int ima_measurements_open(struct inode *inode, struct file *file)
 	return seq_open(file, &ima_measurments_seqops);
 }
 
-static struct file_operations ima_measurements_ops = {
+static const struct file_operations ima_measurements_ops = {
 	.open = ima_measurements_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
@@ -233,7 +233,7 @@ static int ima_ascii_measurements_open(struct inode *inode, struct file *file)
 	return seq_open(file, &ima_ascii_measurements_seqops);
 }
 
-static struct file_operations ima_ascii_measurements_ops = {
+static const struct file_operations ima_ascii_measurements_ops = {
 	.open = ima_ascii_measurements_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
@@ -313,7 +313,7 @@ static int ima_release_policy(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static struct file_operations ima_measure_policy_ops = {
+static const struct file_operations ima_measure_policy_ops = {
 	.open = ima_open_policy,
 	.write = ima_write_policy,
 	.release = ima_release_policy
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b5e7e3f1183f..e79c54034bcd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2625,7 +2625,7 @@ static int vcpu_stat_get(void *_offset, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
 
-static struct file_operations *stat_fops[] = {
+static const struct file_operations *stat_fops[] = {
 	[KVM_STAT_VCPU] = &vcpu_stat_fops,
 	[KVM_STAT_VM]   = &vm_stat_fops,
 };
-- 
cgit v1.2.3


From b3db4a8ad19173a8fd0ced13d47c97910f1ab14b Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 1 Oct 2009 15:43:56 -0700
Subject: asm-generic/gpio.h: pull in linux/kernel.h for might_sleep()

The asm-generic/gpio.h header uses the might_sleep() macro but doesn't
include the header for it, so any source code that might include
linux/gpio.h before linux/kernel.h can easily lead to a build failure.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/gpio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 9cca3785cab8..66d6106a2067 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_GENERIC_GPIO_H
 #define _ASM_GENERIC_GPIO_H
 
+#include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 
-- 
cgit v1.2.3


From 4e649152cbaa1aedd01821d200ab9d597fe469e4 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 1 Oct 2009 15:44:11 -0700
Subject: memcg: some modification to softlimit under hierarchical memory
 reclaim.

This patch clean up/fixes for memcg's uncharge soft limit path.

Problems:
  Now, res_counter_charge()/uncharge() handles softlimit information at
  charge/uncharge and softlimit-check is done when event counter per memcg
  goes over limit. Now, event counter per memcg is updated only when
  memory usage is over soft limit. Here, considering hierarchical memcg
  management, ancesotors should be taken care of.

  Now, ancerstors(hierarchy) are handled in charge() but not in uncharge().
  This is not good.

  Prolems:
  1. memcg's event counter incremented only when softlimit hits. That's bad.
     It makes event counter hard to be reused for other purpose.

  2. At uncharge, only the lowest level rescounter is handled. This is bug.
     Because ancesotor's event counter is not incremented, children should
     take care of them.

  3. res_counter_uncharge()'s 3rd argument is NULL in most case.
     ops under res_counter->lock should be small. No "if" sentense is better.

Fixes:
  * Removed soft_limit_xx poitner and checks in charge and uncharge.
    Do-check-only-when-necessary scheme works enough well without them.

  * make event-counter of memcg incremented at every charge/uncharge.
    (per-cpu area will be accessed soon anyway)

  * All ancestors are checked at soft-limit-check. This is necessary because
    ancesotor's event counter may never be modified. Then, they should be
    checked at the same time.

Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h |   6 +--
 kernel/res_counter.c        |  18 +------
 mm/memcontrol.c             | 113 ++++++++++++++++++++------------------------
 3 files changed, 54 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 731af71cddc9..fcb9884df618 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -114,8 +114,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
 int __must_check res_counter_charge_locked(struct res_counter *counter,
 		unsigned long val);
 int __must_check res_counter_charge(struct res_counter *counter,
-		unsigned long val, struct res_counter **limit_fail_at,
-		struct res_counter **soft_limit_at);
+		unsigned long val, struct res_counter **limit_fail_at);
 
 /*
  * uncharge - tell that some portion of the resource is released
@@ -128,8 +127,7 @@ int __must_check res_counter_charge(struct res_counter *counter,
  */
 
 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
-void res_counter_uncharge(struct res_counter *counter, unsigned long val,
-				bool *was_soft_limit_excess);
+void res_counter_uncharge(struct res_counter *counter, unsigned long val);
 
 static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
 {
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 88faec23e833..bcdabf37c40b 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -37,27 +37,17 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
 }
 
 int res_counter_charge(struct res_counter *counter, unsigned long val,
-			struct res_counter **limit_fail_at,
-			struct res_counter **soft_limit_fail_at)
+			struct res_counter **limit_fail_at)
 {
 	int ret;
 	unsigned long flags;
 	struct res_counter *c, *u;
 
 	*limit_fail_at = NULL;
-	if (soft_limit_fail_at)
-		*soft_limit_fail_at = NULL;
 	local_irq_save(flags);
 	for (c = counter; c != NULL; c = c->parent) {
 		spin_lock(&c->lock);
 		ret = res_counter_charge_locked(c, val);
-		/*
-		 * With soft limits, we return the highest ancestor
-		 * that exceeds its soft limit
-		 */
-		if (soft_limit_fail_at &&
-			!res_counter_soft_limit_check_locked(c))
-			*soft_limit_fail_at = c;
 		spin_unlock(&c->lock);
 		if (ret < 0) {
 			*limit_fail_at = c;
@@ -85,8 +75,7 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
 	counter->usage -= val;
 }
 
-void res_counter_uncharge(struct res_counter *counter, unsigned long val,
-				bool *was_soft_limit_excess)
+void res_counter_uncharge(struct res_counter *counter, unsigned long val)
 {
 	unsigned long flags;
 	struct res_counter *c;
@@ -94,9 +83,6 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val,
 	local_irq_save(flags);
 	for (c = counter; c != NULL; c = c->parent) {
 		spin_lock(&c->lock);
-		if (was_soft_limit_excess)
-			*was_soft_limit_excess =
-				!res_counter_soft_limit_check_locked(c);
 		res_counter_uncharge_locked(c, val);
 		spin_unlock(&c->lock);
 	}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 21a30629ca80..1ae8c439584a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -352,16 +352,6 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
 	mz->on_tree = false;
 }
 
-static void
-mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
-				struct mem_cgroup_per_zone *mz,
-				struct mem_cgroup_tree_per_zone *mctz)
-{
-	spin_lock(&mctz->lock);
-	__mem_cgroup_insert_exceeded(mem, mz, mctz);
-	spin_unlock(&mctz->lock);
-}
-
 static void
 mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
 				struct mem_cgroup_per_zone *mz,
@@ -392,34 +382,40 @@ static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
 
 static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
 {
-	unsigned long long prev_usage_in_excess, new_usage_in_excess;
-	bool updated_tree = false;
+	unsigned long long new_usage_in_excess;
 	struct mem_cgroup_per_zone *mz;
 	struct mem_cgroup_tree_per_zone *mctz;
-
-	mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page));
+	int nid = page_to_nid(page);
+	int zid = page_zonenum(page);
 	mctz = soft_limit_tree_from_page(page);
 
 	/*
-	 * We do updates in lazy mode, mem's are removed
-	 * lazily from the per-zone, per-node rb tree
+	 * Necessary to update all ancestors when hierarchy is used.
+	 * because their event counter is not touched.
 	 */
-	prev_usage_in_excess = mz->usage_in_excess;
-
-	new_usage_in_excess = res_counter_soft_limit_excess(&mem->res);
-	if (prev_usage_in_excess) {
-		mem_cgroup_remove_exceeded(mem, mz, mctz);
-		updated_tree = true;
-	}
-	if (!new_usage_in_excess)
-		goto done;
-	mem_cgroup_insert_exceeded(mem, mz, mctz);
-
-done:
-	if (updated_tree) {
-		spin_lock(&mctz->lock);
-		mz->usage_in_excess = new_usage_in_excess;
-		spin_unlock(&mctz->lock);
+	for (; mem; mem = parent_mem_cgroup(mem)) {
+		mz = mem_cgroup_zoneinfo(mem, nid, zid);
+		new_usage_in_excess =
+			res_counter_soft_limit_excess(&mem->res);
+		/*
+		 * We have to update the tree if mz is on RB-tree or
+		 * mem is over its softlimit.
+		 */
+		if (new_usage_in_excess || mz->on_tree) {
+			spin_lock(&mctz->lock);
+			/* if on-tree, remove it */
+			if (mz->on_tree)
+				__mem_cgroup_remove_exceeded(mem, mz, mctz);
+			/*
+			 * if over soft limit, insert again. mz->usage_in_excess
+			 * will be updated properly.
+			 */
+			if (new_usage_in_excess)
+				__mem_cgroup_insert_exceeded(mem, mz, mctz);
+			else
+				mz->usage_in_excess = 0;
+			spin_unlock(&mctz->lock);
+		}
 	}
 }
 
@@ -1271,9 +1267,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 			gfp_t gfp_mask, struct mem_cgroup **memcg,
 			bool oom, struct page *page)
 {
-	struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit;
+	struct mem_cgroup *mem, *mem_over_limit;
 	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
-	struct res_counter *fail_res, *soft_fail_res = NULL;
+	struct res_counter *fail_res;
 
 	if (unlikely(test_thread_flag(TIF_MEMDIE))) {
 		/* Don't account this! */
@@ -1305,17 +1301,16 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 
 		if (mem_cgroup_is_root(mem))
 			goto done;
-		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
-						&soft_fail_res);
+		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
 		if (likely(!ret)) {
 			if (!do_swap_account)
 				break;
 			ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
-							&fail_res, NULL);
+							&fail_res);
 			if (likely(!ret))
 				break;
 			/* mem+swap counter fails */
-			res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+			res_counter_uncharge(&mem->res, PAGE_SIZE);
 			flags |= MEM_CGROUP_RECLAIM_NOSWAP;
 			mem_over_limit = mem_cgroup_from_res_counter(fail_res,
 									memsw);
@@ -1354,16 +1349,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		}
 	}
 	/*
-	 * Insert just the ancestor, we should trickle down to the correct
-	 * cgroup for reclaim, since the other nodes will be below their
-	 * soft limit
+	 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
+	 * if they exceeds softlimit.
 	 */
-	if (soft_fail_res) {
-		mem_over_soft_limit =
-			mem_cgroup_from_res_counter(soft_fail_res, res);
-		if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
-			mem_cgroup_update_tree(mem_over_soft_limit, page);
-	}
+	if (mem_cgroup_soft_limit_check(mem))
+		mem_cgroup_update_tree(mem, page);
 done:
 	return 0;
 nomem:
@@ -1438,10 +1428,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
 	if (unlikely(PageCgroupUsed(pc))) {
 		unlock_page_cgroup(pc);
 		if (!mem_cgroup_is_root(mem)) {
-			res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+			res_counter_uncharge(&mem->res, PAGE_SIZE);
 			if (do_swap_account)
-				res_counter_uncharge(&mem->memsw, PAGE_SIZE,
-							NULL);
+				res_counter_uncharge(&mem->memsw, PAGE_SIZE);
 		}
 		css_put(&mem->css);
 		return;
@@ -1520,7 +1509,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 		goto out;
 
 	if (!mem_cgroup_is_root(from))
-		res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
+		res_counter_uncharge(&from->res, PAGE_SIZE);
 	mem_cgroup_charge_statistics(from, pc, false);
 
 	page = pc->page;
@@ -1540,7 +1529,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	}
 
 	if (do_swap_account && !mem_cgroup_is_root(from))
-		res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
+		res_counter_uncharge(&from->memsw, PAGE_SIZE);
 	css_put(&from->css);
 
 	css_get(&to->css);
@@ -1611,9 +1600,9 @@ uncharge:
 	css_put(&parent->css);
 	/* uncharge if move fails */
 	if (!mem_cgroup_is_root(parent)) {
-		res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
+		res_counter_uncharge(&parent->res, PAGE_SIZE);
 		if (do_swap_account)
-			res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
+			res_counter_uncharge(&parent->memsw, PAGE_SIZE);
 	}
 	return ret;
 }
@@ -1804,8 +1793,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 			 * calling css_tryget
 			 */
 			if (!mem_cgroup_is_root(memcg))
-				res_counter_uncharge(&memcg->memsw, PAGE_SIZE,
-							NULL);
+				res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
 			mem_cgroup_swap_statistics(memcg, false);
 			mem_cgroup_put(memcg);
 		}
@@ -1832,9 +1820,9 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
 	if (!mem)
 		return;
 	if (!mem_cgroup_is_root(mem)) {
-		res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+		res_counter_uncharge(&mem->res, PAGE_SIZE);
 		if (do_swap_account)
-			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
 	}
 	css_put(&mem->css);
 }
@@ -1849,7 +1837,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	struct page_cgroup *pc;
 	struct mem_cgroup *mem = NULL;
 	struct mem_cgroup_per_zone *mz;
-	bool soft_limit_excess = false;
 
 	if (mem_cgroup_disabled())
 		return NULL;
@@ -1889,10 +1876,10 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	}
 
 	if (!mem_cgroup_is_root(mem)) {
-		res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
+		res_counter_uncharge(&mem->res, PAGE_SIZE);
 		if (do_swap_account &&
 				(ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
-			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
 	}
 	if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
 		mem_cgroup_swap_statistics(mem, true);
@@ -1909,7 +1896,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	mz = page_cgroup_zoneinfo(pc);
 	unlock_page_cgroup(pc);
 
-	if (soft_limit_excess && mem_cgroup_soft_limit_check(mem))
+	if (mem_cgroup_soft_limit_check(mem))
 		mem_cgroup_update_tree(mem, page);
 	/* at swapout, this memcg will be accessed to record to swap */
 	if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
@@ -1987,7 +1974,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
 		 * This memcg can be obsolete one. We avoid calling css_tryget
 		 */
 		if (!mem_cgroup_is_root(memcg))
-			res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
 		mem_cgroup_swap_statistics(memcg, false);
 		mem_cgroup_put(memcg);
 	}
-- 
cgit v1.2.3


From 329bd4119c8a0afea95f9db6d6b402a2f2b40e84 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Fri, 2 Oct 2009 15:23:21 +0200
Subject: initcalls: Add early_initcall() for modules

Complete the early_initcall() API by making it available in modules
too. To be used by the EDAC/MCE code.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andi Kleen <andi@firstfloor.org>
LKML-Reference: <20091002132321.GC28682@aftab>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/init.h b/include/linux/init.h
index 400adbb45414..ff8bde520d03 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -271,6 +271,7 @@ void __init parse_early_options(char *cmdline);
 #else /* MODULE */
 
 /* Don't use these in modules, but some people do... */
+#define early_initcall(fn)		module_init(fn)
 #define core_initcall(fn)		module_init(fn)
 #define postcore_initcall(fn)		module_init(fn)
 #define arch_initcall(fn)		module_init(fn)
-- 
cgit v1.2.3


From c6af404215bab0d333accbb497f835d10cb0050c Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 18 Sep 2009 17:32:59 -0400
Subject: [SCSI] Deprecate SCSI_PROT_*_CONVERT operations

The checksum format is orthogonal to whether the protection information
is being passed on beyond the HBA or not.  It is perfectly valid to use
a non-T10 CRC with WRITE_STRIP and READ_INSERT.

Consequently it no longer makes sense to explicitly refer to the
conversion in the protection operation.  Update sd_dif and lpfc
accordingly.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Ihab Hamadi <Ihab.Hamadi@Emulex.Com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/lpfc/lpfc_scsi.c | 15 +++------------
 drivers/scsi/sd_dif.c         | 20 ++++----------------
 include/scsi/scsi_cmnd.h      |  4 ----
 3 files changed, 7 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 61d089703806..c88f59f0ce30 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -56,8 +56,6 @@ static char *dif_op_str[] = {
 	"SCSI_PROT_WRITE_INSERT",
 	"SCSI_PROT_READ_PASS",
 	"SCSI_PROT_WRITE_PASS",
-	"SCSI_PROT_READ_CONVERT",
-	"SCSI_PROT_WRITE_CONVERT"
 };
 static void
 lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb);
@@ -1131,13 +1129,11 @@ lpfc_sc_to_sli_prof(struct scsi_cmnd *sc)
 			ret_prof = LPFC_PROF_A1;
 			break;
 
-		case SCSI_PROT_READ_CONVERT:
-		case SCSI_PROT_WRITE_CONVERT:
+		case SCSI_PROT_READ_PASS:
+		case SCSI_PROT_WRITE_PASS:
 			ret_prof = LPFC_PROF_AST1;
 			break;
 
-		case SCSI_PROT_READ_PASS:
-		case SCSI_PROT_WRITE_PASS:
 		case SCSI_PROT_NORMAL:
 		default:
 			printk(KERN_ERR "Bad op/guard:%d/%d combination\n",
@@ -1157,8 +1153,6 @@ lpfc_sc_to_sli_prof(struct scsi_cmnd *sc)
 			ret_prof = LPFC_PROF_C1;
 			break;
 
-		case SCSI_PROT_READ_CONVERT:
-		case SCSI_PROT_WRITE_CONVERT:
 		case SCSI_PROT_READ_INSERT:
 		case SCSI_PROT_WRITE_STRIP:
 		case SCSI_PROT_NORMAL:
@@ -1209,8 +1203,7 @@ lpfc_get_cmd_dif_parms(struct scsi_cmnd *sc, uint16_t *apptagmask,
 	static int cnt;
 
 	if (protcnt && (op == SCSI_PROT_WRITE_STRIP ||
-				op == SCSI_PROT_WRITE_PASS ||
-				op == SCSI_PROT_WRITE_CONVERT)) {
+				op == SCSI_PROT_WRITE_PASS)) {
 
 		cnt++;
 		spt = page_address(sg_page(scsi_prot_sglist(sc))) +
@@ -1501,8 +1494,6 @@ lpfc_prot_group_type(struct lpfc_hba *phba, struct scsi_cmnd *sc)
 	case SCSI_PROT_WRITE_STRIP:
 	case SCSI_PROT_READ_PASS:
 	case SCSI_PROT_WRITE_PASS:
-	case SCSI_PROT_WRITE_CONVERT:
-	case SCSI_PROT_READ_CONVERT:
 		ret = LPFC_PG_TYPE_DIF_BUF;
 		break;
 	default:
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 82f14a9482d0..84224dd21acf 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -364,15 +364,9 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
  */
 void sd_dif_op(struct scsi_cmnd *scmd, unsigned int dif, unsigned int dix, unsigned int type)
 {
-	int csum_convert, prot_op;
+	int prot_op;
 
-	prot_op = 0;
-
-	/* Convert checksum? */
-	if (scsi_host_get_guard(scmd->device->host) != SHOST_DIX_GUARD_CRC)
-		csum_convert = 1;
-	else
-		csum_convert = 0;
+	prot_op = SCSI_PROT_NORMAL;
 
 	BUG_ON(dif && (scmd->cmnd[0] == READ_6 || scmd->cmnd[0] == WRITE_6));
 
@@ -382,10 +376,7 @@ void sd_dif_op(struct scsi_cmnd *scmd, unsigned int dif, unsigned int dix, unsig
 	case READ_12:
 	case READ_16:
 		if (dif && dix)
-			if (csum_convert)
-				prot_op = SCSI_PROT_READ_CONVERT;
-			else
-				prot_op = SCSI_PROT_READ_PASS;
+			prot_op = SCSI_PROT_READ_PASS;
 		else if (dif && !dix)
 			prot_op = SCSI_PROT_READ_STRIP;
 		else if (!dif && dix)
@@ -398,10 +389,7 @@ void sd_dif_op(struct scsi_cmnd *scmd, unsigned int dif, unsigned int dix, unsig
 	case WRITE_12:
 	case WRITE_16:
 		if (dif && dix)
-			if (csum_convert)
-				prot_op = SCSI_PROT_WRITE_CONVERT;
-			else
-				prot_op = SCSI_PROT_WRITE_PASS;
+			prot_op = SCSI_PROT_WRITE_PASS;
 		else if (dif && !dix)
 			prot_op = SCSI_PROT_WRITE_INSERT;
 		else if (!dif && dix)
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 3878d1dc7f59..a5e885a111df 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -229,10 +229,6 @@ enum scsi_prot_operations {
 	/* OS-HBA: Protected, HBA-Target: Protected */
 	SCSI_PROT_READ_PASS,
 	SCSI_PROT_WRITE_PASS,
-
-	/* OS-HBA: Protected, HBA-Target: Protected, checksum conversion */
-	SCSI_PROT_READ_CONVERT,
-	SCSI_PROT_WRITE_CONVERT,
 };
 
 static inline void scsi_set_prot_op(struct scsi_cmnd *scmd, unsigned char op)
-- 
cgit v1.2.3


From 35e1a5d90b66487d754ef2f2dcbf1007f806d921 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 18 Sep 2009 17:33:00 -0400
Subject: [SCSI] sd: Detach DIF from block integrity infrastructure

So far we have only issued DIF commands if CONFIG_BLK_DEV_INTEGRITY is
enabled.  However, communication between initiator and target should be
independent of protection information DMA.  There are DIF-only host
adapters coming out that will be able to take advantage of this.

Move the relevant DIF bits to sd.c.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/sd.c        | 61 +++++++++++++++++++++++++++++++++---------------
 drivers/scsi/sd.h        |  4 ----
 drivers/scsi/sd_dif.c    | 53 -----------------------------------------
 include/scsi/scsi_host.h | 15 ++++++++----
 4 files changed, 53 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 8dd96dcd716c..1e0a0b07dab6 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -370,6 +370,31 @@ static void scsi_disk_put(struct scsi_disk *sdkp)
 	mutex_unlock(&sd_ref_mutex);
 }
 
+static void sd_prot_op(struct scsi_cmnd *scmd, unsigned int dif)
+{
+	unsigned int prot_op = SCSI_PROT_NORMAL;
+	unsigned int dix = scsi_prot_sg_count(scmd);
+
+	if (scmd->sc_data_direction == DMA_FROM_DEVICE) {
+		if (dif && dix)
+			prot_op = SCSI_PROT_READ_PASS;
+		else if (dif && !dix)
+			prot_op = SCSI_PROT_READ_STRIP;
+		else if (!dif && dix)
+			prot_op = SCSI_PROT_READ_INSERT;
+	} else {
+		if (dif && dix)
+			prot_op = SCSI_PROT_WRITE_PASS;
+		else if (dif && !dix)
+			prot_op = SCSI_PROT_WRITE_INSERT;
+		else if (!dif && dix)
+			prot_op = SCSI_PROT_WRITE_STRIP;
+	}
+
+	scsi_set_prot_op(scmd, prot_op);
+	scsi_set_prot_type(scmd, dif);
+}
+
 /**
  *	sd_init_command - build a scsi (read or write) command from
  *	information in the request structure.
@@ -578,8 +603,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 
 	/* If DIF or DIX is enabled, tell HBA how to handle request */
 	if (host_dif || scsi_prot_sg_count(SCpnt))
-		sd_dif_op(SCpnt, host_dif, scsi_prot_sg_count(SCpnt),
-			  sdkp->protection_type);
+		sd_prot_op(SCpnt, host_dif);
 
 	/*
 	 * We shouldn't disconnect in the middle of a sector, so with a dumb
@@ -1238,34 +1262,33 @@ void sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer)
 	u8 type;
 
 	if (scsi_device_protection(sdp) == 0 || (buffer[12] & 1) == 0)
-		type = 0;
-	else
-		type = ((buffer[12] >> 1) & 7) + 1; /* P_TYPE 0 = Type 1 */
+		return;
+
+	type = ((buffer[12] >> 1) & 7) + 1; /* P_TYPE 0 = Type 1 */
+
+	if (type == sdkp->protection_type || !sdkp->first_scan)
+		return;
 
 	sdkp->protection_type = type;
 
 	switch (type) {
-	case SD_DIF_TYPE0_PROTECTION:
 	case SD_DIF_TYPE1_PROTECTION:
 	case SD_DIF_TYPE3_PROTECTION:
 		break;
 
-	case SD_DIF_TYPE2_PROTECTION:
-		sd_printk(KERN_ERR, sdkp, "formatted with DIF Type 2 "	\
-			  "protection which is currently unsupported. "	\
-			  "Disabling disk!\n");
-		goto disable;
-
 	default:
-		sd_printk(KERN_ERR, sdkp, "formatted with unknown "	\
-			  "protection type %d. Disabling disk!\n", type);
-		goto disable;
+		sd_printk(KERN_ERR, sdkp, "formatted with unsupported "	\
+			  "protection type %u. Disabling disk!\n", type);
+		sdkp->capacity = 0;
+		return;
 	}
 
-	return;
-
-disable:
-	sdkp->capacity = 0;
+	if (scsi_host_dif_capable(sdp->host, type))
+		sd_printk(KERN_NOTICE, sdkp,
+			  "Enabling DIF Type %u protection\n", type);
+	else
+		sd_printk(KERN_NOTICE, sdkp,
+			  "Disabling DIF Type %u protection\n", type);
 }
 
 static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp,
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 8474b5bad3fe..ce1f5f899fe3 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -101,16 +101,12 @@ struct sd_dif_tuple {
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 
-extern void sd_dif_op(struct scsi_cmnd *, unsigned int, unsigned int, unsigned int);
 extern void sd_dif_config_host(struct scsi_disk *);
 extern int sd_dif_prepare(struct request *rq, sector_t, unsigned int);
 extern void sd_dif_complete(struct scsi_cmnd *, unsigned int);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
-static inline void sd_dif_op(struct scsi_cmnd *cmd, unsigned int a, unsigned int b, unsigned int c)
-{
-}
 static inline void sd_dif_config_host(struct scsi_disk *disk)
 {
 }
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 84224dd21acf..88da97745710 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -320,15 +320,6 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 		dif = 0; dix = 1;
 	}
 
-	if (type) {
-		if (dif)
-			sd_printk(KERN_NOTICE, sdkp,
-				  "Enabling DIF Type %d protection\n", type);
-		else
-			sd_printk(KERN_NOTICE, sdkp,
-				  "Disabling DIF Type %d protection\n", type);
-	}
-
 	if (!dix)
 		return;
 
@@ -359,50 +350,6 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 	}
 }
 
-/*
- * DIF DMA operation magic decoder ring.
- */
-void sd_dif_op(struct scsi_cmnd *scmd, unsigned int dif, unsigned int dix, unsigned int type)
-{
-	int prot_op;
-
-	prot_op = SCSI_PROT_NORMAL;
-
-	BUG_ON(dif && (scmd->cmnd[0] == READ_6 || scmd->cmnd[0] == WRITE_6));
-
-	switch (scmd->cmnd[0]) {
-	case READ_6:
-	case READ_10:
-	case READ_12:
-	case READ_16:
-		if (dif && dix)
-			prot_op = SCSI_PROT_READ_PASS;
-		else if (dif && !dix)
-			prot_op = SCSI_PROT_READ_STRIP;
-		else if (!dif && dix)
-			prot_op = SCSI_PROT_READ_INSERT;
-
-		break;
-
-	case WRITE_6:
-	case WRITE_10:
-	case WRITE_12:
-	case WRITE_16:
-		if (dif && dix)
-			prot_op = SCSI_PROT_WRITE_PASS;
-		else if (dif && !dix)
-			prot_op = SCSI_PROT_WRITE_INSERT;
-		else if (!dif && dix)
-			prot_op = SCSI_PROT_WRITE_STRIP;
-
-		break;
-	}
-
-	scsi_set_prot_op(scmd, prot_op);
-	if (dif)
-		scsi_set_prot_type(scmd, type);
-}
-
 /*
  * The virtual start sector is the one that was originally submitted
  * by the block layer.	Due to partitioning, MD/DM cloning, etc. the
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index b62a097b3ecb..6e728b176904 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -798,9 +798,15 @@ static inline unsigned int scsi_host_get_prot(struct Scsi_Host *shost)
 static inline unsigned int scsi_host_dif_capable(struct Scsi_Host *shost, unsigned int target_type)
 {
 	switch (target_type) {
-	case 1: return shost->prot_capabilities & SHOST_DIF_TYPE1_PROTECTION;
-	case 2: return shost->prot_capabilities & SHOST_DIF_TYPE2_PROTECTION;
-	case 3: return shost->prot_capabilities & SHOST_DIF_TYPE3_PROTECTION;
+	case 1:
+		if (shost->prot_capabilities & SHOST_DIF_TYPE1_PROTECTION)
+			return target_type;
+	case 2:
+		if (shost->prot_capabilities & SHOST_DIF_TYPE2_PROTECTION)
+			return target_type;
+	case 3:
+		if (shost->prot_capabilities & SHOST_DIF_TYPE3_PROTECTION)
+			return target_type;
 	}
 
 	return 0;
@@ -808,13 +814,14 @@ static inline unsigned int scsi_host_dif_capable(struct Scsi_Host *shost, unsign
 
 static inline unsigned int scsi_host_dix_capable(struct Scsi_Host *shost, unsigned int target_type)
 {
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
 	switch (target_type) {
 	case 0: return shost->prot_capabilities & SHOST_DIX_TYPE0_PROTECTION;
 	case 1: return shost->prot_capabilities & SHOST_DIX_TYPE1_PROTECTION;
 	case 2: return shost->prot_capabilities & SHOST_DIX_TYPE2_PROTECTION;
 	case 3: return shost->prot_capabilities & SHOST_DIX_TYPE3_PROTECTION;
 	}
-
+#endif
 	return 0;
 }
 
-- 
cgit v1.2.3


From 4e7392ec582cf06753b0969ca9ab959923e38493 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Sun, 20 Sep 2009 16:49:38 -0400
Subject: [SCSI] sd: Support disks formatted with DIF Type 2

Disks formatted with DIF Type 2 reject READ/WRITE 6/10/12/16 commands
when protection is enabled.  Only the 32-byte variants are supported.

Implement support for issusing 32-byte READ/WRITE and enable Type 2
drives in the protection type detection logic.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/sd.c   | 81 +++++++++++++++++++++++++++++++++++++++++++++--------
 drivers/scsi/sd.h   |  5 ++++
 include/scsi/scsi.h |  3 ++
 3 files changed, 78 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 1e0a0b07dab6..9093c7261f33 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -116,6 +116,9 @@ static DEFINE_IDA(sd_index_ida);
  * object after last put) */
 static DEFINE_MUTEX(sd_ref_mutex);
 
+struct kmem_cache *sd_cdb_cache;
+mempool_t *sd_cdb_pool;
+
 static const char *sd_cache_types[] = {
 	"write through", "none", "write back",
 	"write back, no read (daft)"
@@ -413,6 +416,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	sector_t threshold;
 	unsigned int this_count = blk_rq_sectors(rq);
 	int ret, host_dif;
+	unsigned char protect;
 
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		ret = scsi_setup_blk_pc_cmnd(sdp, rq);
@@ -545,13 +549,49 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	/* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */
 	host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
 	if (host_dif)
-		SCpnt->cmnd[1] = 1 << 5;
+		protect = 1 << 5;
 	else
-		SCpnt->cmnd[1] = 0;
+		protect = 0;
+
+	if (host_dif == SD_DIF_TYPE2_PROTECTION) {
+		SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC);
+
+		if (unlikely(SCpnt->cmnd == NULL)) {
+			ret = BLKPREP_DEFER;
+			goto out;
+		}
 
-	if (block > 0xffffffff) {
+		SCpnt->cmd_len = SD_EXT_CDB_SIZE;
+		memset(SCpnt->cmnd, 0, SCpnt->cmd_len);
+		SCpnt->cmnd[0] = VARIABLE_LENGTH_CMD;
+		SCpnt->cmnd[7] = 0x18;
+		SCpnt->cmnd[9] = (rq_data_dir(rq) == READ) ? READ_32 : WRITE_32;
+		SCpnt->cmnd[10] = protect | (blk_fua_rq(rq) ? 0x8 : 0);
+
+		/* LBA */
+		SCpnt->cmnd[12] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0;
+		SCpnt->cmnd[13] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0;
+		SCpnt->cmnd[14] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0;
+		SCpnt->cmnd[15] = sizeof(block) > 4 ? (unsigned char) (block >> 32) & 0xff : 0;
+		SCpnt->cmnd[16] = (unsigned char) (block >> 24) & 0xff;
+		SCpnt->cmnd[17] = (unsigned char) (block >> 16) & 0xff;
+		SCpnt->cmnd[18] = (unsigned char) (block >> 8) & 0xff;
+		SCpnt->cmnd[19] = (unsigned char) block & 0xff;
+
+		/* Expected Indirect LBA */
+		SCpnt->cmnd[20] = (unsigned char) (block >> 24) & 0xff;
+		SCpnt->cmnd[21] = (unsigned char) (block >> 16) & 0xff;
+		SCpnt->cmnd[22] = (unsigned char) (block >> 8) & 0xff;
+		SCpnt->cmnd[23] = (unsigned char) block & 0xff;
+
+		/* Transfer length */
+		SCpnt->cmnd[28] = (unsigned char) (this_count >> 24) & 0xff;
+		SCpnt->cmnd[29] = (unsigned char) (this_count >> 16) & 0xff;
+		SCpnt->cmnd[30] = (unsigned char) (this_count >> 8) & 0xff;
+		SCpnt->cmnd[31] = (unsigned char) this_count & 0xff;
+	} else if (block > 0xffffffff) {
 		SCpnt->cmnd[0] += READ_16 - READ_6;
-		SCpnt->cmnd[1] |= blk_fua_rq(rq) ? 0x8 : 0;
+		SCpnt->cmnd[1] = protect | (blk_fua_rq(rq) ? 0x8 : 0);
 		SCpnt->cmnd[2] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0;
 		SCpnt->cmnd[3] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0;
 		SCpnt->cmnd[4] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0;
@@ -572,7 +612,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 			this_count = 0xffff;
 
 		SCpnt->cmnd[0] += READ_10 - READ_6;
-		SCpnt->cmnd[1] |= blk_fua_rq(rq) ? 0x8 : 0;
+		SCpnt->cmnd[1] = protect | (blk_fua_rq(rq) ? 0x8 : 0);
 		SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff;
 		SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff;
 		SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff;
@@ -1047,6 +1087,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 	int result = SCpnt->result;
 	unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt);
 	struct scsi_sense_hdr sshdr;
+	struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
 	int sense_valid = 0;
 	int sense_deferred = 0;
 
@@ -1108,6 +1149,10 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt))
 		sd_dif_complete(SCpnt, good_bytes);
 
+	if (scsi_host_dif_capable(sdkp->device->host, sdkp->protection_type)
+	    == SD_DIF_TYPE2_PROTECTION && SCpnt->cmnd != SCpnt->request->cmd)
+		mempool_free(SCpnt->cmnd, sd_cdb_pool);
+
 	return good_bytes;
 }
 
@@ -1271,12 +1316,7 @@ void sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer)
 
 	sdkp->protection_type = type;
 
-	switch (type) {
-	case SD_DIF_TYPE1_PROTECTION:
-	case SD_DIF_TYPE3_PROTECTION:
-		break;
-
-	default:
+	if (type > SD_DIF_TYPE3_PROTECTION) {
 		sd_printk(KERN_ERR, sdkp, "formatted with unsupported "	\
 			  "protection type %u. Disabling disk!\n", type);
 		sdkp->capacity = 0;
@@ -2323,8 +2363,24 @@ static int __init init_sd(void)
 	if (err)
 		goto err_out_class;
 
+	sd_cdb_cache = kmem_cache_create("sd_ext_cdb", SD_EXT_CDB_SIZE,
+					 0, 0, NULL);
+	if (!sd_cdb_cache) {
+		printk(KERN_ERR "sd: can't init extended cdb cache\n");
+		goto err_out_class;
+	}
+
+	sd_cdb_pool = mempool_create_slab_pool(SD_MEMPOOL_SIZE, sd_cdb_cache);
+	if (!sd_cdb_pool) {
+		printk(KERN_ERR "sd: can't init extended cdb pool\n");
+		goto err_out_cache;
+	}
+
 	return 0;
 
+err_out_cache:
+	kmem_cache_destroy(sd_cdb_cache);
+
 err_out_class:
 	class_unregister(&sd_disk_class);
 err_out:
@@ -2344,6 +2400,9 @@ static void __exit exit_sd(void)
 
 	SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n"));
 
+	mempool_destroy(sd_cdb_pool);
+	kmem_cache_destroy(sd_cdb_cache);
+
 	scsi_unregister_driver(&sd_template.gendrv);
 	class_unregister(&sd_disk_class);
 
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index ce1f5f899fe3..e374804d26fb 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -37,6 +37,11 @@
  */
 #define SD_LAST_BUGGY_SECTORS	8
 
+enum {
+	SD_EXT_CDB_SIZE = 32,	/* Extended CDB size */
+	SD_MEMPOOL_SIZE = 2,	/* CDB pool size */
+};
+
 struct scsi_disk {
 	struct scsi_driver *driver;	/* always &sd_template */
 	struct scsi_device *device;
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 084478e14d24..34c46ab5c31b 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -129,6 +129,9 @@ struct scsi_cmnd;
 #define MI_REPORT_TARGET_PGS  0x0a
 /* values for maintenance out */
 #define MO_SET_TARGET_PGS     0x0a
+/* values for variable length command */
+#define READ_32		      0x09
+#define WRITE_32	      0x0b
 
 /* Values for T10/04-262r7 */
 #define	ATA_16		      0x85	/* 16-byte pass-thru */
-- 
cgit v1.2.3


From 293500a23f4b0698cb04abfecfc9a954d8ab2742 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 2 Oct 2009 02:40:04 +0000
Subject: connector: Keep the skb in cn_callback_data

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/cn_queue.c  |  3 ++-
 drivers/connector/connector.c | 11 +++++------
 include/linux/connector.h     |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index 4a1dfe1f4ba9..b4cfac93f723 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -78,8 +78,9 @@ void cn_queue_wrapper(struct work_struct *work)
 	struct cn_callback_entry *cbq =
 		container_of(work, struct cn_callback_entry, work);
 	struct cn_callback_data *d = &cbq->data;
+	struct cn_msg *msg = NLMSG_DATA(nlmsg_hdr(d->skb));
 
-	d->callback(d->callback_priv);
+	d->callback(msg);
 
 	d->destruct_data(d->ddata);
 	d->ddata = NULL;
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 74f52af79563..fc9887fa453f 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -129,10 +129,11 @@ EXPORT_SYMBOL_GPL(cn_netlink_send);
 /*
  * Callback helper - queues work and setup destructor for given data.
  */
-static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), void *data)
+static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *), void *data)
 {
 	struct cn_callback_entry *__cbq, *__new_cbq;
 	struct cn_dev *dev = &cdev;
+	struct cn_msg *msg = NLMSG_DATA(nlmsg_hdr(skb));
 	int err = -ENODEV;
 
 	spin_lock_bh(&dev->cbdev->queue_lock);
@@ -140,7 +141,7 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
 		if (cn_cb_equal(&__cbq->id.id, &msg->id)) {
 			if (likely(!work_pending(&__cbq->work) &&
 					__cbq->data.ddata == NULL)) {
-				__cbq->data.callback_priv = msg;
+				__cbq->data.skb = skb;
 
 				__cbq->data.ddata = data;
 				__cbq->data.destruct_data = destruct_data;
@@ -156,7 +157,7 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
 				__new_cbq = kzalloc(sizeof(struct cn_callback_entry), GFP_ATOMIC);
 				if (__new_cbq) {
 					d = &__new_cbq->data;
-					d->callback_priv = msg;
+					d->skb = skb;
 					d->callback = __cbq->data.callback;
 					d->ddata = data;
 					d->destruct_data = destruct_data;
@@ -191,7 +192,6 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
  */
 static void cn_rx_skb(struct sk_buff *__skb)
 {
-	struct cn_msg *msg;
 	struct nlmsghdr *nlh;
 	int err;
 	struct sk_buff *skb;
@@ -208,8 +208,7 @@ static void cn_rx_skb(struct sk_buff *__skb)
 			return;
 		}
 
-		msg = NLMSG_DATA(nlh);
-		err = cn_call_callback(msg, (void (*)(void *))kfree_skb, skb);
+		err = cn_call_callback(skb, (void (*)(void *))kfree_skb, skb);
 		if (err < 0)
 			kfree_skb(skb);
 	}
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 47ebf416f512..05a7a14126d8 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -134,8 +134,8 @@ struct cn_callback_id {
 struct cn_callback_data {
 	void (*destruct_data) (void *);
 	void *ddata;
-	
-	void *callback_priv;
+
+	struct sk_buff *skb;
 	void (*callback) (struct cn_msg *);
 
 	void *free;
-- 
cgit v1.2.3


From 7069331dbe7155f23966f5944109f909fea0c7e4 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 2 Oct 2009 02:40:05 +0000
Subject: connector: Provide the sender's credentials to the callback

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/connector/cn_test.c      | 2 +-
 Documentation/connector/connector.txt  | 8 ++++----
 drivers/connector/cn_queue.c           | 7 ++++---
 drivers/connector/connector.c          | 4 ++--
 drivers/md/dm-log-userspace-transfer.c | 2 +-
 drivers/staging/dst/dcore.c            | 2 +-
 drivers/staging/pohmelfs/config.c      | 2 +-
 drivers/video/uvesafb.c                | 2 +-
 drivers/w1/w1_netlink.c                | 2 +-
 include/linux/connector.h              | 6 +++---
 10 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c
index 1711adc33373..b07add3467f1 100644
--- a/Documentation/connector/cn_test.c
+++ b/Documentation/connector/cn_test.c
@@ -34,7 +34,7 @@ static char cn_test_name[] = "cn_test";
 static struct sock *nls;
 static struct timer_list cn_test_timer;
 
-static void cn_test_callback(struct cn_msg *msg)
+static void cn_test_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	pr_info("%s: %lu: idx=%x, val=%x, seq=%u, ack=%u, len=%d: %s.\n",
 	        __func__, jiffies, msg->id.idx, msg->id.val,
diff --git a/Documentation/connector/connector.txt b/Documentation/connector/connector.txt
index 81e6bf6ead57..78c9466a9aa8 100644
--- a/Documentation/connector/connector.txt
+++ b/Documentation/connector/connector.txt
@@ -23,7 +23,7 @@ handling, etc...  The Connector driver allows any kernelspace agents to use
 netlink based networking for inter-process communication in a significantly
 easier way:
 
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *));
+int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
 void cn_netlink_send(struct cn_msg *msg, u32 __group, int gfp_mask);
 
 struct cb_id
@@ -53,15 +53,15 @@ struct cn_msg
 Connector interfaces.
 /*****************************************/
 
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *));
+int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
 
  Registers new callback with connector core.
 
  struct cb_id *id		- unique connector's user identifier.
 				  It must be registered in connector.h for legal in-kernel users.
  char *name			- connector's callback symbolic name.
- void (*callback) (void *)	- connector's callback.
-				  Argument must be dereferenced to struct cn_msg *.
+ void (*callback) (struct cn..)	- connector's callback.
+				  cn_msg and the sender's credentials
 
 
 void cn_del_callback(struct cb_id *id);
diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index b4cfac93f723..163c3e3d0d11 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -79,8 +79,9 @@ void cn_queue_wrapper(struct work_struct *work)
 		container_of(work, struct cn_callback_entry, work);
 	struct cn_callback_data *d = &cbq->data;
 	struct cn_msg *msg = NLMSG_DATA(nlmsg_hdr(d->skb));
+	struct netlink_skb_parms *nsp = &NETLINK_CB(d->skb);
 
-	d->callback(msg);
+	d->callback(msg, nsp);
 
 	d->destruct_data(d->ddata);
 	d->ddata = NULL;
@@ -90,7 +91,7 @@ void cn_queue_wrapper(struct work_struct *work)
 
 static struct cn_callback_entry *
 cn_queue_alloc_callback_entry(char *name, struct cb_id *id,
-			      void (*callback)(struct cn_msg *))
+			      void (*callback)(struct cn_msg *, struct netlink_skb_parms *))
 {
 	struct cn_callback_entry *cbq;
 
@@ -124,7 +125,7 @@ int cn_cb_equal(struct cb_id *i1, struct cb_id *i2)
 }
 
 int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id,
-			  void (*callback)(struct cn_msg *))
+			  void (*callback)(struct cn_msg *, struct netlink_skb_parms *))
 {
 	struct cn_callback_entry *cbq, *__cbq;
 	int found = 0;
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index fc9887fa453f..e59f0ab8f828 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -269,7 +269,7 @@ static void cn_notify(struct cb_id *id, u32 notify_event)
  * May sleep.
  */
 int cn_add_callback(struct cb_id *id, char *name,
-		    void (*callback)(struct cn_msg *))
+		    void (*callback)(struct cn_msg *, struct netlink_skb_parms *))
 {
 	int err;
 	struct cn_dev *dev = &cdev;
@@ -351,7 +351,7 @@ static int cn_ctl_msg_equals(struct cn_ctl_msg *m1, struct cn_ctl_msg *m2)
  *
  * Used for notification of a request's processing.
  */
-static void cn_callback(struct cn_msg *msg)
+static void cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	struct cn_ctl_msg *ctl;
 	struct cn_ctl_entry *ent;
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index ba0edad2d048..556131f7d847 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -129,7 +129,7 @@ static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
  * This is the connector callback that delivers data
  * that was sent from userspace.
  */
-static void cn_ulog_callback(void *data)
+static void cn_ulog_callback(void *data, struct netlink_skb_parms *nsp)
 {
 	struct cn_msg *msg = (struct cn_msg *)data;
 	struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index ac8577358ba0..3943c91e6c96 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -847,7 +847,7 @@ static dst_command_func dst_commands[] = {
 /*
  * Configuration parser.
  */
-static void cn_dst_callback(struct cn_msg *msg)
+static void cn_dst_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	struct dst_ctl *ctl;
 	int err;
diff --git a/drivers/staging/pohmelfs/config.c b/drivers/staging/pohmelfs/config.c
index 90f962ee5fd8..c9162b3f0bf3 100644
--- a/drivers/staging/pohmelfs/config.c
+++ b/drivers/staging/pohmelfs/config.c
@@ -527,7 +527,7 @@ out_unlock:
 	return err;
 }
 
-static void pohmelfs_cn_callback(struct cn_msg *msg)
+static void pohmelfs_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	int err;
 
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index e98baf6916b8..aa7cd959cced 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -67,7 +67,7 @@ static DEFINE_MUTEX(uvfb_lock);
  * find the kernel part of the task struct, copy the registers and
  * the buffer contents and then complete the task.
  */
-static void uvesafb_cn_callback(struct cn_msg *msg)
+static void uvesafb_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	struct uvesafb_task *utask;
 	struct uvesafb_ktask *task;
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c
index 52ccb3d3a963..45c126fea31d 100644
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -306,7 +306,7 @@ static int w1_netlink_send_error(struct cn_msg *rcmsg, struct w1_netlink_msg *rm
 	return error;
 }
 
-static void w1_cn_callback(struct cn_msg *msg)
+static void w1_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	struct w1_netlink_msg *m = (struct w1_netlink_msg *)(msg + 1);
 	struct w1_netlink_cmd *cmd;
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 05a7a14126d8..545728e20b63 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -136,7 +136,7 @@ struct cn_callback_data {
 	void *ddata;
 
 	struct sk_buff *skb;
-	void (*callback) (struct cn_msg *);
+	void (*callback) (struct cn_msg *, struct netlink_skb_parms *);
 
 	void *free;
 };
@@ -167,11 +167,11 @@ struct cn_dev {
 	struct cn_queue_dev *cbdev;
 };
 
-int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *));
+int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
 void cn_del_callback(struct cb_id *);
 int cn_netlink_send(struct cn_msg *, u32, gfp_t);
 
-int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *));
+int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *, struct netlink_skb_parms *));
 void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id);
 
 int queue_cn_work(struct cn_callback_entry *cbq, struct work_struct *work);
-- 
cgit v1.2.3


From f1489cfb173509a3c13444b46b6c989bad4f5b16 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 2 Oct 2009 02:40:07 +0000
Subject: connector: Removed the destruct_data callback since it is always
 kfree_skb()

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/cn_queue.c  |  4 ++--
 drivers/connector/connector.c | 11 +++--------
 include/linux/connector.h     |  3 ---
 3 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index 163c3e3d0d11..210338ea222f 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -83,8 +83,8 @@ void cn_queue_wrapper(struct work_struct *work)
 
 	d->callback(msg, nsp);
 
-	d->destruct_data(d->ddata);
-	d->ddata = NULL;
+	kfree_skb(d->skb);
+	d->skb = NULL;
 
 	kfree(d->free);
 }
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index e59f0ab8f828..f06024668f99 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -129,7 +129,7 @@ EXPORT_SYMBOL_GPL(cn_netlink_send);
 /*
  * Callback helper - queues work and setup destructor for given data.
  */
-static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *), void *data)
+static int cn_call_callback(struct sk_buff *skb)
 {
 	struct cn_callback_entry *__cbq, *__new_cbq;
 	struct cn_dev *dev = &cdev;
@@ -140,12 +140,9 @@ static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *),
 	list_for_each_entry(__cbq, &dev->cbdev->queue_list, callback_entry) {
 		if (cn_cb_equal(&__cbq->id.id, &msg->id)) {
 			if (likely(!work_pending(&__cbq->work) &&
-					__cbq->data.ddata == NULL)) {
+					__cbq->data.skb == NULL)) {
 				__cbq->data.skb = skb;
 
-				__cbq->data.ddata = data;
-				__cbq->data.destruct_data = destruct_data;
-
 				if (queue_cn_work(__cbq, &__cbq->work))
 					err = 0;
 				else
@@ -159,8 +156,6 @@ static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *),
 					d = &__new_cbq->data;
 					d->skb = skb;
 					d->callback = __cbq->data.callback;
-					d->ddata = data;
-					d->destruct_data = destruct_data;
 					d->free = __new_cbq;
 
 					__new_cbq->pdev = __cbq->pdev;
@@ -208,7 +203,7 @@ static void cn_rx_skb(struct sk_buff *__skb)
 			return;
 		}
 
-		err = cn_call_callback(skb, (void (*)(void *))kfree_skb, skb);
+		err = cn_call_callback(skb);
 		if (err < 0)
 			kfree_skb(skb);
 	}
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 545728e20b63..3a14615fd35c 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -132,9 +132,6 @@ struct cn_callback_id {
 };
 
 struct cn_callback_data {
-	void (*destruct_data) (void *);
-	void *ddata;
-
 	struct sk_buff *skb;
 	void (*callback) (struct cn_msg *, struct netlink_skb_parms *);
 
-- 
cgit v1.2.3


From b8b9e1b8128d8854cf55740f9ceba3010143520d Mon Sep 17 00:00:00 2001
From: Jayamohan Kallickal <jayamohank@serverengines.com>
Date: Tue, 22 Sep 2009 08:21:22 +0530
Subject: [SCSI] libiscsi: iscsi_session_setup to allow for private space

This patch contains changes that allow iscsi_session_setup
to allocate private space for LLD's

Signed-off-by: Jayamohan Kallickal <jayamohank@serverengines.com>
Acked-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/infiniband/ulp/iser/iscsi_iser.c | 2 +-
 drivers/scsi/be2iscsi/be_iscsi.c         | 6 ++++--
 drivers/scsi/be2iscsi/be_main.h          | 5 +++++
 drivers/scsi/bnx2i/bnx2i_iscsi.c         | 2 +-
 drivers/scsi/cxgb3i/cxgb3i_iscsi.c       | 2 +-
 drivers/scsi/iscsi_tcp.c                 | 2 +-
 drivers/scsi/libiscsi.c                  | 6 ++++--
 include/scsi/libiscsi.h                  | 3 ++-
 8 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 0ba6ec876296..add9188663ff 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -426,7 +426,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
 	 * because we preallocate so many resources
 	 */
 	cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
-					  ISCSI_DEF_XMIT_CMDS_MAX,
+					  ISCSI_DEF_XMIT_CMDS_MAX, 0,
 					  sizeof(struct iscsi_iser_task),
 					  initial_cmdsn, 0);
 	if (!cls_session)
diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c
index b23526cb39d7..f18e643f3589 100644
--- a/drivers/scsi/be2iscsi/be_iscsi.c
+++ b/drivers/scsi/be2iscsi/be_iscsi.c
@@ -44,9 +44,10 @@ struct iscsi_cls_session *beiscsi_session_create(struct iscsi_endpoint *ep,
 	struct Scsi_Host *shost;
 	struct beiscsi_endpoint *beiscsi_ep;
 	struct iscsi_cls_session *cls_session;
-	struct iscsi_session *sess;
 	struct beiscsi_hba *phba;
 	struct iscsi_task *task;
+	struct iscsi_session *sess;
+	struct beiscsi_session *beiscsi_sess;
 	struct beiscsi_io_task *io_task;
 	unsigned int max_size, num_cmd;
 	dma_addr_t bus_add;
@@ -73,7 +74,8 @@ struct iscsi_cls_session *beiscsi_session_create(struct iscsi_endpoint *ep,
 
 	 cls_session = iscsi_session_setup(&beiscsi_iscsi_transport,
 					   shost, cmds_max,
-					   sizeof(struct beiscsi_io_task),
+					   sizeof(*beiscsi_sess),
+					   sizeof(*io_task),
 					   initial_cmdsn, ISCSI_MAX_TARGET);
 	if (!cls_session)
 		return NULL;
diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h
index 2520c39c594d..387e363b0ec7 100644
--- a/drivers/scsi/be2iscsi/be_main.h
+++ b/drivers/scsi/be2iscsi/be_main.h
@@ -327,6 +327,10 @@ struct beiscsi_hba {
 	struct be_ctrl_info ctrl;
 };
 
+struct beiscsi_session {
+	struct pci_pool *bhs_pool;
+};
+
 /**
  * struct beiscsi_conn - iscsi connection structure
  */
@@ -338,6 +342,7 @@ struct beiscsi_conn {
 	struct beiscsi_endpoint *ep;
 	unsigned short login_in_progress;
 	struct sgl_handle *plogin_sgl_handle;
+	struct beiscsi_session *beiscsi_sess;
 };
 
 /* This structure is used by the chip */
diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
index 9a7ba71f1af4..cafb888c2376 100644
--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
@@ -1243,7 +1243,7 @@ bnx2i_session_create(struct iscsi_endpoint *ep,
 		cmds_max = BNX2I_SQ_WQES_MIN;
 
 	cls_session = iscsi_session_setup(&bnx2i_iscsi_transport, shost,
-					  cmds_max, sizeof(struct bnx2i_cmd),
+					  cmds_max, 0, sizeof(struct bnx2i_cmd),
 					  initial_cmdsn, ISCSI_MAX_TARGET);
 	if (!cls_session)
 		return NULL;
diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
index c399f485aa7d..2631bddd255e 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
@@ -422,7 +422,7 @@ cxgb3i_session_create(struct iscsi_endpoint *ep, u16 cmds_max, u16 qdepth,
 	BUG_ON(hba != iscsi_host_priv(shost));
 
 	cls_session = iscsi_session_setup(&cxgb3i_iscsi_transport, shost,
-					  cmds_max,
+					  cmds_max, 0,
 					  sizeof(struct iscsi_tcp_task) +
 					  sizeof(struct cxgb3i_task_data),
 					  initial_cmdsn, ISCSI_MAX_TARGET);
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 2b1b834a098b..edc49ca49cea 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -811,7 +811,7 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
 		goto free_host;
 
 	cls_session = iscsi_session_setup(&iscsi_sw_tcp_transport, shost,
-					  cmds_max,
+					  cmds_max, 0,
 					  sizeof(struct iscsi_tcp_task) +
 					  sizeof(struct iscsi_sw_tcp_hdrbuf),
 					  initial_cmdsn, 0);
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 8dc73c489a17..f1a4246f890c 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2436,7 +2436,7 @@ static void iscsi_host_dec_session_cnt(struct Scsi_Host *shost)
  */
 struct iscsi_cls_session *
 iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
-		    uint16_t cmds_max, int cmd_task_size,
+		    uint16_t cmds_max, int dd_size, int cmd_task_size,
 		    uint32_t initial_cmdsn, unsigned int id)
 {
 	struct iscsi_host *ihost = shost_priv(shost);
@@ -2486,7 +2486,8 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
 	scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX;
 
 	cls_session = iscsi_alloc_session(shost, iscsit,
-					  sizeof(struct iscsi_session));
+					  sizeof(struct iscsi_session) +
+					  dd_size);
 	if (!cls_session)
 		goto dec_session_count;
 	session = cls_session->dd_data;
@@ -2503,6 +2504,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
 	session->max_cmdsn = initial_cmdsn + 1;
 	session->max_r2t = 1;
 	session->tt = iscsit;
+	session->dd_data = cls_session->dd_data + sizeof(*session);
 	mutex_init(&session->eh_mutex);
 	spin_lock_init(&session->lock);
 
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 887e57e3e223..a72edd4eceec 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -303,6 +303,7 @@ struct iscsi_session {
 	int			cmds_max;	/* size of cmds array */
 	struct iscsi_task	**cmds;		/* Original Cmds arr */
 	struct iscsi_pool	cmdpool;	/* PDU's pool */
+	void			*dd_data;	/* LLD private data */
 };
 
 enum {
@@ -363,7 +364,7 @@ extern int iscsi_target_alloc(struct scsi_target *starget);
  */
 extern struct iscsi_cls_session *
 iscsi_session_setup(struct iscsi_transport *, struct Scsi_Host *shost,
-		    uint16_t, int, uint32_t, unsigned int);
+		    uint16_t, int, int, uint32_t, unsigned int);
 extern void iscsi_session_teardown(struct iscsi_cls_session *);
 extern void iscsi_session_recovery_timedout(struct iscsi_cls_session *);
 extern int iscsi_set_param(struct iscsi_cls_conn *cls_conn,
-- 
cgit v1.2.3


From 8e2967555571659d2c8a70dd120710110ed7bba4 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Sat, 3 Oct 2009 16:26:03 +0200
Subject: cfq-iosched: implement slower async initiate and queue ramp up

This slowly ramps up the async queue depth based on the time
passed since the sync IO, and doesn't allow async at all until
a sync slice period has passed.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       |  8 ++++++++
 block/cfq-iosched.c    | 56 ++++++++++++++++++++++++++++++++------------------
 include/linux/blkdev.h |  4 ++++
 3 files changed, 48 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index ddaaea4fdffc..a8c7fbe52e24 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2492,6 +2492,14 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 
+int kblockd_schedule_delayed_work(struct request_queue *q,
+				  struct delayed_work *work,
+				  unsigned long delay)
+{
+	return queue_delayed_work(kblockd_workqueue, work, delay);
+}
+EXPORT_SYMBOL(kblockd_schedule_delayed_work);
+
 int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 70b48ea0e3e9..fce8a749f4be 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -150,7 +150,7 @@ struct cfq_data {
 	 * idle window management
 	 */
 	struct timer_list idle_slice_timer;
-	struct work_struct unplug_work;
+	struct delayed_work unplug_work;
 
 	struct cfq_queue *active_queue;
 	struct cfq_io_context *active_cic;
@@ -268,11 +268,13 @@ static inline int cfq_bio_sync(struct bio *bio)
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
  */
-static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
+static inline void cfq_schedule_dispatch(struct cfq_data *cfqd,
+					 unsigned long delay)
 {
 	if (cfqd->busy_queues) {
 		cfq_log(cfqd, "schedule dispatch");
-		kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
+		kblockd_schedule_delayed_work(cfqd->queue, &cfqd->unplug_work,
+						delay);
 	}
 }
 
@@ -1316,8 +1318,6 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
 	 * Does this cfqq already have too much IO in flight?
 	 */
 	if (cfqq->dispatched >= max_dispatch) {
-		unsigned long load_at = cfqd->last_end_sync_rq + cfq_slice_sync;
-
 		/*
 		 * idle queue must always only have a single IO in flight
 		 */
@@ -1331,20 +1331,36 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
 			return 0;
 
 		/*
-		 * If a sync request has completed recently, don't overload
-		 * the dispatch queue yet with async requests.
+		 * Sole queue user, allow bigger slice
 		 */
-		if (cfqd->cfq_desktop && !cfq_cfqq_sync(cfqq)
-		    && time_before(jiffies, load_at))
-			return 0;
+		max_dispatch *= 4;
+	}
+
+	/*
+	 * Async queues must wait a bit before being allowed dispatch.
+	 * We also ramp up the dispatch depth gradually for async IO,
+	 * based on the last sync IO we serviced
+	 */
+	if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_desktop) {
+		unsigned long last_sync = jiffies - cfqd->last_end_sync_rq;
+		unsigned int depth;
 
 		/*
-		 * we are the only queue, allow up to 4 times of 'quantum'
+		 * must wait a bit longer
 		 */
-		if (cfqq->dispatched >= 4 * max_dispatch)
+		if (last_sync < cfq_slice_sync) {
+			cfq_schedule_dispatch(cfqd, cfq_slice_sync - last_sync);
 			return 0;
+		}
+
+		depth = last_sync / cfq_slice_sync;
+		if (depth < max_dispatch)
+			max_dispatch = depth;
 	}
 
+	if (cfqq->dispatched >= max_dispatch)
+		return 0;
+
 	/*
 	 * Dispatch a request from this cfqq
 	 */
@@ -1389,7 +1405,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 
 	if (unlikely(cfqd->active_queue == cfqq)) {
 		__cfq_slice_expired(cfqd, cfqq, 0);
-		cfq_schedule_dispatch(cfqd);
+		cfq_schedule_dispatch(cfqd, 0);
 	}
 
 	kmem_cache_free(cfq_pool, cfqq);
@@ -1484,7 +1500,7 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	if (unlikely(cfqq == cfqd->active_queue)) {
 		__cfq_slice_expired(cfqd, cfqq, 0);
-		cfq_schedule_dispatch(cfqd);
+		cfq_schedule_dispatch(cfqd, 0);
 	}
 
 	cfq_put_queue(cfqq);
@@ -2201,7 +2217,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 	}
 
 	if (!rq_in_driver(cfqd))
-		cfq_schedule_dispatch(cfqd);
+		cfq_schedule_dispatch(cfqd, 0);
 }
 
 /*
@@ -2331,7 +2347,7 @@ queue_fail:
 	if (cic)
 		put_io_context(cic->ioc);
 
-	cfq_schedule_dispatch(cfqd);
+	cfq_schedule_dispatch(cfqd, 0);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	cfq_log(cfqd, "set_request fail");
 	return 1;
@@ -2340,7 +2356,7 @@ queue_fail:
 static void cfq_kick_queue(struct work_struct *work)
 {
 	struct cfq_data *cfqd =
-		container_of(work, struct cfq_data, unplug_work);
+		container_of(work, struct cfq_data, unplug_work.work);
 	struct request_queue *q = cfqd->queue;
 
 	spin_lock_irq(q->queue_lock);
@@ -2394,7 +2410,7 @@ static void cfq_idle_slice_timer(unsigned long data)
 expire:
 	cfq_slice_expired(cfqd, timed_out);
 out_kick:
-	cfq_schedule_dispatch(cfqd);
+	cfq_schedule_dispatch(cfqd, 0);
 out_cont:
 	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 }
@@ -2402,7 +2418,7 @@ out_cont:
 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
 {
 	del_timer_sync(&cfqd->idle_slice_timer);
-	cancel_work_sync(&cfqd->unplug_work);
+	cancel_delayed_work_sync(&cfqd->unplug_work);
 }
 
 static void cfq_put_async_queues(struct cfq_data *cfqd)
@@ -2484,7 +2500,7 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
 	cfqd->idle_slice_timer.data = (unsigned long) cfqd;
 
-	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
+	INIT_DELAYED_WORK(&cfqd->unplug_work, cfq_kick_queue);
 
 	cfqd->cfq_quantum = cfq_quantum;
 	cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1a03b715dfad..a7323930d2ba 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1147,7 +1147,11 @@ static inline void put_dev_sector(Sector p)
 }
 
 struct work_struct;
+struct delayed_work;
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
+int kblockd_schedule_delayed_work(struct request_queue *q,
+					struct delayed_work *work,
+				 	unsigned long delay);
 
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
-- 
cgit v1.2.3


From ac481c20ef8f6c6f2be75d581863f40c43874ef7 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Sat, 3 Oct 2009 20:52:01 +0200
Subject: block: Topology ioctls

Not all users of the topology information want to use libblkid.  Provide
the topology information through bdev ioctls.

Also clarify sector size comments for existing BLK ioctls.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/compat_ioctl.c   | 13 +++++++++++++
 block/ioctl.c          | 17 +++++++++++++++--
 include/linux/blkdev.h | 35 ++++++++++++++++++++++++++++++-----
 include/linux/fs.h     |  4 ++++
 4 files changed, 62 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 7865a34e0faa..9bd086c1a4d5 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -21,6 +21,11 @@ static int compat_put_int(unsigned long arg, int val)
 	return put_user(val, (compat_int_t __user *)compat_ptr(arg));
 }
 
+static int compat_put_uint(unsigned long arg, unsigned int val)
+{
+	return put_user(val, (compat_uint_t __user *)compat_ptr(arg));
+}
+
 static int compat_put_long(unsigned long arg, long val)
 {
 	return put_user(val, (compat_long_t __user *)compat_ptr(arg));
@@ -734,6 +739,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	switch (cmd) {
 	case HDIO_GETGEO:
 		return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
+	case BLKPBSZGET:
+		return compat_put_uint(arg, bdev_physical_block_size(bdev));
+	case BLKIOMIN:
+		return compat_put_uint(arg, bdev_io_min(bdev));
+	case BLKIOOPT:
+		return compat_put_uint(arg, bdev_io_opt(bdev));
+	case BLKALIGNOFF:
+		return compat_put_int(arg, bdev_alignment_offset(bdev));
 	case BLKFLSBUF:
 	case BLKROSET:
 	case BLKDISCARD:
diff --git a/block/ioctl.c b/block/ioctl.c
index d3e6b5827a34..1f4d1de12b09 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -138,6 +138,11 @@ static int put_int(unsigned long arg, int val)
 	return put_user(val, (int __user *)arg);
 }
 
+static int put_uint(unsigned long arg, unsigned int val)
+{
+	return put_user(val, (unsigned int __user *)arg);
+}
+
 static int put_long(unsigned long arg, long val)
 {
 	return put_user(val, (long __user *)arg);
@@ -263,10 +268,18 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
 	case BLKROGET:
 		return put_int(arg, bdev_read_only(bdev) != 0);
-	case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
+	case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
 		return put_int(arg, block_size(bdev));
-	case BLKSSZGET: /* get block device hardware sector size */
+	case BLKSSZGET: /* get block device logical block size */
 		return put_int(arg, bdev_logical_block_size(bdev));
+	case BLKPBSZGET: /* get block device physical block size */
+		return put_uint(arg, bdev_physical_block_size(bdev));
+	case BLKIOMIN:
+		return put_uint(arg, bdev_io_min(bdev));
+	case BLKIOOPT:
+		return put_uint(arg, bdev_io_opt(bdev));
+	case BLKALIGNOFF:
+		return put_int(arg, bdev_alignment_offset(bdev));
 	case BLKSECTGET:
 		return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
 	case BLKRASET:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a7323930d2ba..25119041e034 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1081,25 +1081,37 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q)
 	return q->limits.physical_block_size;
 }
 
+static inline int bdev_physical_block_size(struct block_device *bdev)
+{
+	return queue_physical_block_size(bdev_get_queue(bdev));
+}
+
 static inline unsigned int queue_io_min(struct request_queue *q)
 {
 	return q->limits.io_min;
 }
 
+static inline int bdev_io_min(struct block_device *bdev)
+{
+	return queue_io_min(bdev_get_queue(bdev));
+}
+
 static inline unsigned int queue_io_opt(struct request_queue *q)
 {
 	return q->limits.io_opt;
 }
 
+static inline int bdev_io_opt(struct block_device *bdev)
+{
+	return queue_io_opt(bdev_get_queue(bdev));
+}
+
 static inline int queue_alignment_offset(struct request_queue *q)
 {
-	if (q && q->limits.misaligned)
+	if (q->limits.misaligned)
 		return -1;
 
-	if (q && q->limits.alignment_offset)
-		return q->limits.alignment_offset;
-
-	return 0;
+	return q->limits.alignment_offset;
 }
 
 static inline int queue_sector_alignment_offset(struct request_queue *q,
@@ -1109,6 +1121,19 @@ static inline int queue_sector_alignment_offset(struct request_queue *q,
 		& (q->limits.io_min - 1);
 }
 
+static inline int bdev_alignment_offset(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q->limits.misaligned)
+		return -1;
+
+	if (bdev != bdev->bd_contains)
+		return bdev->bd_part->alignment_offset;
+
+	return q->limits.alignment_offset;
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2adaa2529f18..883eaacfd924 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -300,6 +300,10 @@ struct inodes_stat_t {
 #define BLKTRACESTOP _IO(0x12,117)
 #define BLKTRACETEARDOWN _IO(0x12,118)
 #define BLKDISCARD _IO(0x12,119)
+#define BLKIOMIN _IO(0x12,120)
+#define BLKIOOPT _IO(0x12,121)
+#define BLKALIGNOFF _IO(0x12,122)
+#define BLKPBSZGET _IO(0x12,123)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
-- 
cgit v1.2.3


From 0f78ab9899e9d6acb09d5465def618704255963b Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Sun, 4 Oct 2009 21:04:38 +0200
Subject: Revert "Seperate read and write statistics of in_flight requests"

This reverts commit a9327cac440be4d8333bba975cbbf76045096275.

Corrado Zoccolo <czoccolo@gmail.com> reports:

"with 2.6.32-rc1 I started getting the following strange output from
"iostat -kx 2":
Linux 2.6.31bisect (et2) 	04/10/2009 	_i686_	(2 CPU)

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
          10,70    0,00    3,16   15,75    0,00   70,38

Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s
avgrq-sz avgqu-sz   await  svctm  %util
sda              18,22     0,00    0,67    0,01    14,77     0,02
43,94     0,01   10,53 39043915,03 2629219,87
sdb              60,89     9,68   50,79    3,04  1724,43    50,52
65,95     0,70   13,06 488437,47 2629219,87

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           2,72    0,00    0,74    0,00    0,00   96,53

Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0,00     0,00    0,00    0,00     0,00     0,00
0,00     0,00    0,00   0,00 100,00
sdb               0,00     0,00    0,00    0,00     0,00     0,00
0,00     0,00    0,00   0,00 100,00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           6,68    0,00    0,99    0,00    0,00   92,33

Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0,00     0,00    0,00    0,00     0,00     0,00
0,00     0,00    0,00   0,00 100,00
sdb               0,00     0,00    0,00    0,00     0,00     0,00
0,00     0,00    0,00   0,00 100,00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           4,40    0,00    0,73    1,47    0,00   93,40

Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0,00     0,00    0,00    0,00     0,00     0,00
0,00     0,00    0,00   0,00 100,00
sdb               0,00     4,00    0,00    3,00     0,00    28,00
18,67     0,06   19,50 333,33 100,00

Global values for service time and utilization are garbage. For
interval values, utilization is always 100%, and service time is
higher than normal.

I bisected it down to:
[a9327cac440be4d8333bba975cbbf76045096275] Seperate read and write
statistics of in_flight requests
and verified that reverting just that commit indeed solves the issue
on 2.6.32-rc1."

So until this is debugged, revert the bad commit.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c      |  6 +++---
 block/blk-merge.c     |  2 +-
 block/genhd.c         |  4 +---
 drivers/md/dm.c       | 16 ++++++----------
 fs/partitions/check.c | 12 +-----------
 include/linux/genhd.h | 21 +++++++--------------
 6 files changed, 19 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index a8c7fbe52e24..81f34311659a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -70,7 +70,7 @@ static void drive_stat_acct(struct request *rq, int new_io)
 		part_stat_inc(cpu, part, merges[rw]);
 	else {
 		part_round_stats(cpu, part);
-		part_inc_in_flight(part, rw);
+		part_inc_in_flight(part);
 	}
 
 	part_stat_unlock();
@@ -1032,7 +1032,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
 
 	if (part->in_flight) {
 		__part_stat_add(cpu, part, time_in_queue,
-				part_in_flight(part) * (now - part->stamp));
+				part->in_flight * (now - part->stamp));
 		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
 	}
 	part->stamp = now;
@@ -1739,7 +1739,7 @@ static void blk_account_io_done(struct request *req)
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part, rw);
+		part_dec_in_flight(part);
 
 		part_stat_unlock();
 	}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 99cb5cf1f447..b0de8574fdc8 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -351,7 +351,7 @@ static void blk_account_io_merge(struct request *req)
 		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part, rq_data_dir(req));
+		part_dec_in_flight(part);
 
 		part_stat_unlock();
 	}
diff --git a/block/genhd.c b/block/genhd.c
index 517e4332cb37..5a0861da324d 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -869,7 +869,6 @@ static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -889,7 +888,6 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_alignment_offset.attr,
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
-	&dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
@@ -1055,7 +1053,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   part_stat_read(hd, merges[1]),
 			   (unsigned long long)part_stat_read(hd, sectors[1]),
 			   jiffies_to_msecs(part_stat_read(hd, ticks[1])),
-			   part_in_flight(hd),
+			   hd->in_flight,
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
 			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
 			);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 376f1ab48a24..23e76fe0d359 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -130,7 +130,7 @@ struct mapped_device {
 	/*
 	 * A list of ios that arrived while we were suspended.
 	 */
-	atomic_t pending[2];
+	atomic_t pending;
 	wait_queue_head_t wait;
 	struct work_struct work;
 	struct bio_list deferred;
@@ -453,14 +453,13 @@ static void start_io_acct(struct dm_io *io)
 {
 	struct mapped_device *md = io->md;
 	int cpu;
-	int rw = bio_data_dir(io->bio);
 
 	io->start_time = jiffies;
 
 	cpu = part_stat_lock();
 	part_round_stats(cpu, &dm_disk(md)->part0);
 	part_stat_unlock();
-	dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
+	dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
 }
 
 static void end_io_acct(struct dm_io *io)
@@ -480,9 +479,8 @@ static void end_io_acct(struct dm_io *io)
 	 * After this is decremented the bio must not be touched if it is
 	 * a barrier.
 	 */
-	dm_disk(md)->part0.in_flight[rw] = pending =
-		atomic_dec_return(&md->pending[rw]);
-	pending += atomic_read(&md->pending[rw^0x1]);
+	dm_disk(md)->part0.in_flight = pending =
+		atomic_dec_return(&md->pending);
 
 	/* nudge anyone waiting on suspend queue */
 	if (!pending)
@@ -1787,8 +1785,7 @@ static struct mapped_device *alloc_dev(int minor)
 	if (!md->disk)
 		goto bad_disk;
 
-	atomic_set(&md->pending[0], 0);
-	atomic_set(&md->pending[1], 0);
+	atomic_set(&md->pending, 0);
 	init_waitqueue_head(&md->wait);
 	INIT_WORK(&md->work, dm_wq_work);
 	init_waitqueue_head(&md->eventq);
@@ -2091,8 +2088,7 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
 				break;
 			}
 			spin_unlock_irqrestore(q->queue_lock, flags);
-		} else if (!atomic_read(&md->pending[0]) &&
-					!atomic_read(&md->pending[1]))
+		} else if (!atomic_read(&md->pending))
 			break;
 
 		if (interruptible == TASK_INTERRUPTIBLE &&
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e10cbad..f38fee0311a7 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -248,19 +248,11 @@ ssize_t part_stat_show(struct device *dev,
 		part_stat_read(p, merges[WRITE]),
 		(unsigned long long)part_stat_read(p, sectors[WRITE]),
 		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
-		part_in_flight(p),
+		p->in_flight,
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
 		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
 }
 
-ssize_t part_inflight_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct hd_struct *p = dev_to_part(dev);
-
-	return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
-}
-
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 ssize_t part_fail_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
@@ -289,7 +281,6 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -301,7 +292,6 @@ static struct attribute *part_attrs[] = {
 	&dev_attr_size.attr,
 	&dev_attr_alignment_offset.attr,
 	&dev_attr_stat.attr,
-	&dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 297df45ffd0a..7beaa21b3880 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -98,7 +98,7 @@ struct hd_struct {
 	int make_it_fail;
 #endif
 	unsigned long stamp;
-	int in_flight[2];
+	int in_flight;
 #ifdef	CONFIG_SMP
 	struct disk_stats *dkstats;
 #else
@@ -322,23 +322,18 @@ static inline void free_part_stats(struct hd_struct *part)
 #define part_stat_sub(cpu, gendiskp, field, subnd)			\
 	part_stat_add(cpu, gendiskp, field, -subnd)
 
-static inline void part_inc_in_flight(struct hd_struct *part, int rw)
+static inline void part_inc_in_flight(struct hd_struct *part)
 {
-	part->in_flight[rw]++;
+	part->in_flight++;
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight[rw]++;
+		part_to_disk(part)->part0.in_flight++;
 }
 
-static inline void part_dec_in_flight(struct hd_struct *part, int rw)
+static inline void part_dec_in_flight(struct hd_struct *part)
 {
-	part->in_flight[rw]--;
+	part->in_flight--;
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight[rw]--;
-}
-
-static inline int part_in_flight(struct hd_struct *part)
-{
-	return part->in_flight[0] + part->in_flight[1];
+		part_to_disk(part)->part0.in_flight--;
 }
 
 /* block/blk-core.c */
@@ -551,8 +546,6 @@ extern ssize_t part_size_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
 extern ssize_t part_stat_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
-extern ssize_t part_inflight_show(struct device *dev,
-			      struct device_attribute *attr, char *buf);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 extern ssize_t part_fail_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
-- 
cgit v1.2.3


From a99bbaf5ee6bad1aca0c88ea65ec6e5373e86184 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 4 Oct 2009 16:11:37 +0400
Subject: headers: remove sched.h from poll.h

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/dtlk.c                    | 1 +
 drivers/char/ipmi/ipmi_devintf.c       | 1 +
 drivers/char/ipmi/ipmi_msghandler.c    | 1 +
 drivers/firewire/core-cdev.c           | 1 +
 drivers/hid/hidraw.c                   | 1 +
 drivers/infiniband/core/ucm.c          | 1 +
 drivers/infiniband/core/user_mad.c     | 1 +
 drivers/infiniband/core/uverbs_main.c  | 1 +
 drivers/input/evdev.c                  | 1 +
 drivers/input/input.c                  | 1 +
 drivers/input/joydev.c                 | 1 +
 drivers/input/misc/uinput.c            | 1 +
 drivers/input/mousedev.c               | 1 +
 drivers/isdn/divert/divert_procfs.c    | 1 +
 drivers/media/dvb/dvb-core/dmxdev.c    | 1 +
 drivers/media/dvb/dvb-core/dvb_demux.c | 1 +
 drivers/media/radio/radio-cadet.c      | 1 +
 drivers/media/video/cpia.c             | 1 +
 drivers/mfd/ucb1400_core.c             | 1 +
 drivers/usb/gadget/inode.c             | 1 +
 drivers/xen/xenfs/xenbus.c             | 1 +
 fs/anon_inodes.c                       | 2 ++
 fs/coda/psdev.c                        | 1 +
 fs/select.c                            | 1 +
 include/linux/poll.h                   | 2 +-
 net/rfkill/core.c                      | 1 +
 26 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c
index 52e06589821d..045c930e6320 100644
--- a/drivers/char/dtlk.c
+++ b/drivers/char/dtlk.c
@@ -56,6 +56,7 @@
 #include <linux/errno.h>	/* for -EBUSY */
 #include <linux/ioport.h>	/* for request_region */
 #include <linux/delay.h>	/* for loops_per_jiffy */
+#include <linux/sched.h>
 #include <linux/smp_lock.h>	/* cycle_kernel_lock() */
 #include <asm/io.h>		/* for inb_p, outb_p, inb, outb, etc. */
 #include <asm/uaccess.h>	/* for get_user, etc. */
diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c
index 41fc11dc921c..65545de3dbf4 100644
--- a/drivers/char/ipmi/ipmi_devintf.c
+++ b/drivers/char/ipmi/ipmi_devintf.c
@@ -36,6 +36,7 @@
 #include <linux/errno.h>
 #include <asm/system.h>
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/ipmi.h>
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 09050797c76a..ec5e3f8df648 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -35,6 +35,7 @@
 #include <linux/errno.h>
 #include <asm/system.h>
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index ced186d7e9a9..5089331544ed 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -33,6 +33,7 @@
 #include <linux/mutex.h>
 #include <linux/poll.h>
 #include <linux/preempt.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/time.h>
 #include <linux/uaccess.h>
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 0c6639ea03dd..ba05275e5104 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -30,6 +30,7 @@
 #include <linux/major.h>
 #include <linux/hid.h>
 #include <linux/mutex.h>
+#include <linux/sched.h>
 #include <linux/smp_lock.h>
 
 #include <linux/hidraw.h>
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 51bd9669cb1f..f504c9b00c1b 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -38,6 +38,7 @@
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/cdev.h>
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 8c46f2257098..7de02969ed7d 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -44,6 +44,7 @@
 #include <linux/mutex.h>
 #include <linux/kref.h>
 #include <linux/compat.h>
+#include <linux/sched.h>
 #include <linux/semaphore.h>
 
 #include <asm/uaccess.h>
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index d3fff9e008a3..aec0fbdfe7f0 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -40,6 +40,7 @@
 #include <linux/err.h>
 #include <linux/fs.h>
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/cdev.h>
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 1148140d08a1..dee6706038aa 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -13,6 +13,7 @@
 #define EVDEV_BUFFER_SIZE	64
 
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/init.h>
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 16ec33f27c5d..c6f88ebb40c7 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -17,6 +17,7 @@
 #include <linux/random.h>
 #include <linux/major.h>
 #include <linux/proc_fs.h>
+#include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/poll.h>
 #include <linux/device.h>
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index 901b2525993e..b1bd6dd32286 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -18,6 +18,7 @@
 #include <linux/input.h>
 #include <linux/kernel.h>
 #include <linux/major.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/miscdevice.h>
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index c5a49aba418f..d3f57245420a 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -30,6 +30,7 @@
  *		- first public version
  */
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/init.h>
diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c
index 966b8868f792..a13d80f7da17 100644
--- a/drivers/input/mousedev.c
+++ b/drivers/input/mousedev.c
@@ -13,6 +13,7 @@
 #define MOUSEDEV_MINORS		32
 #define MOUSEDEV_MIX		31
 
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/poll.h>
diff --git a/drivers/isdn/divert/divert_procfs.c b/drivers/isdn/divert/divert_procfs.c
index 8b256a617c8a..3697c409bec6 100644
--- a/drivers/isdn/divert/divert_procfs.c
+++ b/drivers/isdn/divert/divert_procfs.c
@@ -16,6 +16,7 @@
 #else
 #include <linux/fs.h>
 #endif
+#include <linux/sched.h>
 #include <linux/isdnif.h>
 #include <net/net_namespace.h>
 #include "isdn_divert.h"
diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c
index 516414983593..c37790ad92d0 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.c
+++ b/drivers/media/dvb/dvb-core/dmxdev.c
@@ -20,6 +20,7 @@
  *
  */
 
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/media/dvb/dvb-core/dvb_demux.c b/drivers/media/dvb/dvb-core/dvb_demux.c
index eef6d3616626..91c537bca8ad 100644
--- a/drivers/media/dvb/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb/dvb-core/dvb_demux.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/media/radio/radio-cadet.c b/drivers/media/radio/radio-cadet.c
index 8b1440136c45..482d0f3be5ff 100644
--- a/drivers/media/radio/radio-cadet.c
+++ b/drivers/media/radio/radio-cadet.c
@@ -38,6 +38,7 @@
 #include <linux/videodev2.h>	/* V4L2 API defs		*/
 #include <linux/param.h>
 #include <linux/pnp.h>
+#include <linux/sched.h>
 #include <linux/io.h>		/* outb, outb_p			*/
 #include <media/v4l2-device.h>
 #include <media/v4l2-ioctl.h>
diff --git a/drivers/media/video/cpia.c b/drivers/media/video/cpia.c
index 43ab0adf3b61..2377313c041a 100644
--- a/drivers/media/video/cpia.c
+++ b/drivers/media/video/cpia.c
@@ -31,6 +31,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/vmalloc.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/ctype.h>
diff --git a/drivers/mfd/ucb1400_core.c b/drivers/mfd/ucb1400_core.c
index 2afc08006e6d..fa294b6d600a 100644
--- a/drivers/mfd/ucb1400_core.c
+++ b/drivers/mfd/ucb1400_core.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/ucb1400.h>
 
 unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel,
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index c44367fea185..bf0f6520c6df 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -30,6 +30,7 @@
 #include <linux/wait.h>
 #include <linux/compiler.h>
 #include <asm/uaccess.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/smp_lock.h>
diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenfs/xenbus.c
index a9592d981b10..6c4269b836b7 100644
--- a/drivers/xen/xenfs/xenbus.c
+++ b/drivers/xen/xenfs/xenbus.c
@@ -43,6 +43,7 @@
 #include <linux/fs.h>
 #include <linux/poll.h>
 #include <linux/mutex.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/mount.h>
 #include <linux/pagemap.h>
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index d11c51fc2a3f..2ca7a7cafdbf 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -8,8 +8,10 @@
  *
  */
 
+#include <linux/cred.h>
 #include <linux/file.h>
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/fs.h>
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 0376ac66c44a..be4392ca2098 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -22,6 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/major.h>
 #include <linux/time.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/ioport.h>
 #include <linux/fcntl.h>
diff --git a/fs/select.c b/fs/select.c
index a201fc370223..fd38ce2e32e3 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/syscalls.h>
 #include <linux/module.h>
 #include <linux/slab.h>
diff --git a/include/linux/poll.h b/include/linux/poll.h
index fa287f25138d..6673743946f7 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -6,10 +6,10 @@
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
+#include <linux/ktime.h>
 #include <linux/wait.h>
 #include <linux/string.h>
 #include <linux/fs.h>
-#include <linux/sched.h>
 #include <asm/uaccess.h>
 
 /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index dbeaf2983822..ba2efb960c60 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -27,6 +27,7 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/rfkill.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/miscdevice.h>
 #include <linux/wait.h>
-- 
cgit v1.2.3


From dfee5614e4d83a32cef9193a8b19bc1d8900f93d Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 2 Oct 2009 09:19:09 +1000
Subject: drm/radeon/kms: respect single crtc cards, only create one crtc. (v2)

Also add single crtc for RN50 chips.

changes in v2:
fix vblank init to respect single crtc flag
fix r100 mode bandwidth to respect single crtc flag

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r100.c                   | 8 +++++---
 drivers/gpu/drm/radeon/radeon_display.c         | 6 +++++-
 drivers/gpu/drm/radeon/radeon_encoders.c        | 6 +++++-
 drivers/gpu/drm/radeon/radeon_fb.c              | 7 ++++++-
 drivers/gpu/drm/radeon/radeon_irq_kms.c         | 6 +++++-
 drivers/gpu/drm/radeon/radeon_legacy_encoders.c | 5 ++++-
 include/drm/drm_pciids.h                        | 4 ++--
 7 files changed, 32 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 7bea923b1b29..5fe12c02d2dd 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2135,9 +2135,11 @@ void r100_bandwidth_update(struct radeon_device *rdev)
 		mode1 = &rdev->mode_info.crtcs[0]->base.mode;
 		pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8;
 	}
-	if (rdev->mode_info.crtcs[1]->base.enabled) {
-		mode2 = &rdev->mode_info.crtcs[1]->base.mode;
-		pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8;
+	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
+		if (rdev->mode_info.crtcs[1]->base.enabled) {
+			mode2 = &rdev->mode_info.crtcs[1]->base.mode;
+			pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8;
+		}
 	}
 
 	min_mem_eff.full = rfixed_const_8(0);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 5d8141b13765..44cfcfdf1352 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -724,7 +724,11 @@ int radeon_modeset_init(struct radeon_device *rdev)
 	if (ret) {
 		return ret;
 	}
-	/* allocate crtcs - TODO single crtc */
+
+	if (rdev->flags & RADEON_SINGLE_CRTC)
+		num_crtc = 1;
+
+	/* allocate crtcs */
 	for (i = 0; i < num_crtc; i++) {
 		radeon_crtc_init(rdev->ddev, i);
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index 621646752cd2..a65ab1a0dad2 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -1345,6 +1345,7 @@ radeon_atombios_set_dig_info(struct radeon_encoder *radeon_encoder)
 void
 radeon_add_atom_encoder(struct drm_device *dev, uint32_t encoder_id, uint32_t supported_device)
 {
+	struct radeon_device *rdev = dev->dev_private;
 	struct drm_encoder *encoder;
 	struct radeon_encoder *radeon_encoder;
 
@@ -1364,7 +1365,10 @@ radeon_add_atom_encoder(struct drm_device *dev, uint32_t encoder_id, uint32_t su
 		return;
 
 	encoder = &radeon_encoder->base;
-	encoder->possible_crtcs = 0x3;
+	if (rdev->flags & RADEON_SINGLE_CRTC)
+		encoder->possible_crtcs = 0x1;
+	else
+		encoder->possible_crtcs = 0x3;
 	encoder->possible_clones = 0;
 
 	radeon_encoder->enc_priv = NULL;
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 1ba704eedefb..325e40b5e0b6 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -146,6 +146,7 @@ int radeonfb_create(struct drm_device *dev,
 	unsigned long tmp;
 	bool fb_tiled = false; /* useful for testing */
 	u32 tiling_flags = 0;
+	int crtc_count;
 
 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
@@ -217,7 +218,11 @@ int radeonfb_create(struct drm_device *dev,
 	rfbdev = info->par;
 	rfbdev->helper.funcs = &radeon_fb_helper_funcs;
 	rfbdev->helper.dev = dev;
-	ret = drm_fb_helper_init_crtc_count(&rfbdev->helper, 2,
+	if (rdev->flags & RADEON_SINGLE_CRTC)
+		crtc_count = 1;
+	else
+		crtc_count = 2;
+	ret = drm_fb_helper_init_crtc_count(&rfbdev->helper, crtc_count,
 					    RADEONFB_CONN_LIMIT);
 	if (ret)
 		goto out_unref;
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 1841145a7c4f..8e0a8759e428 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -83,8 +83,12 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev)
 int radeon_irq_kms_init(struct radeon_device *rdev)
 {
 	int r = 0;
+	int num_crtc = 2;
 
-	r = drm_vblank_init(rdev->ddev, 2);
+	if (rdev->flags & RADEON_SINGLE_CRTC)
+		num_crtc = 1;
+
+	r = drm_vblank_init(rdev->ddev, num_crtc);
 	if (r) {
 		return r;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
index b1547f700d73..0ebbd292b90c 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -1318,7 +1318,10 @@ radeon_add_legacy_encoder(struct drm_device *dev, uint32_t encoder_id, uint32_t
 		return;
 
 	encoder = &radeon_encoder->base;
-	encoder->possible_crtcs = 0x3;
+	if (rdev->flags & RADEON_SINGLE_CRTC)
+		encoder->possible_crtcs = 0x1;
+	else
+		encoder->possible_crtcs = 0x3;
 	encoder->possible_clones = 0;
 
 	radeon_encoder->enc_priv = NULL;
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 853508499d20..923361b220f2 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -80,7 +80,7 @@
 	{0x1002, 0x5158, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV200}, \
 	{0x1002, 0x5159, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV100}, \
 	{0x1002, 0x515A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV100}, \
-	{0x1002, 0x515E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV100}, \
+	{0x1002, 0x515E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV100|RADEON_SINGLE_CRTC}, \
 	{0x1002, 0x5460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x5462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x5464, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
@@ -113,7 +113,7 @@
 	{0x1002, 0x5962, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV280}, \
 	{0x1002, 0x5964, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV280}, \
 	{0x1002, 0x5965, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV280}, \
-	{0x1002, 0x5969, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV100}, \
+	{0x1002, 0x5969, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV100|RADEON_SINGLE_CRTC}, \
 	{0x1002, 0x5a41, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS400|RADEON_IS_IGP|RADEON_IS_IGPGART}, \
 	{0x1002, 0x5a42, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS400|RADEON_IS_IGP|RADEON_IS_MOBILITY|RADEON_IS_IGPGART}, \
 	{0x1002, 0x5a61, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS400|RADEON_IS_IGP|RADEON_IS_IGPGART}, \
-- 
cgit v1.2.3


From 068143d38804825d59d951a192cfadd2e22f457d Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 5 Oct 2009 09:58:02 +1000
Subject: drm/fb: add setcmap and fix 8-bit support.

This adds support for the setcmap api and fixes the 8bpp
support at least on radeon hardware. It adds a new load_lut
hook which can be called once the color map is setup.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_fb_helper.c             | 55 ++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/intel_display.c        |  1 +
 drivers/gpu/drm/i915/intel_fb.c             |  3 +-
 drivers/gpu/drm/radeon/atombios_crtc.c      |  1 +
 drivers/gpu/drm/radeon/radeon_fb.c          |  3 +-
 drivers/gpu/drm/radeon/radeon_legacy_crtc.c |  1 +
 include/drm/drm_crtc_helper.h               |  3 ++
 include/drm/drm_fb_helper.h                 |  4 ++-
 8 files changed, 63 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 819ddcbfcce5..3746bd2f0f08 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -454,6 +454,48 @@ out_free:
 }
 EXPORT_SYMBOL(drm_fb_helper_init_crtc_count);
 
+int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_device *dev = fb_helper->dev;
+	u16 *red, *green, *blue, *transp;
+	struct drm_crtc *crtc;
+	int i, rc = 0;
+	int start;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+		for (i = 0; i < fb_helper->crtc_count; i++) {
+			if (crtc->base.id == fb_helper->crtc_info[i].crtc_id)
+				break;
+		}
+		if (i == fb_helper->crtc_count)
+			continue;
+
+		red = cmap->red;
+		green = cmap->green;
+		blue = cmap->blue;
+		transp = cmap->transp;
+		start = cmap->start;
+
+		for (i = 0; i < cmap->len; i++) {
+			u16 hred, hgreen, hblue, htransp = 0xffff;
+
+			hred = *red++;
+			hgreen = *green++;
+			hblue = *blue++;
+
+			if (transp)
+				htransp = *transp++;
+
+			fb_helper->funcs->gamma_set(crtc, hred, hgreen, hblue, start++);
+		}
+		crtc_funcs->load_lut(crtc);
+	}
+	return rc;
+}
+EXPORT_SYMBOL(drm_fb_helper_setcmap);
+
 int drm_fb_helper_setcolreg(unsigned regno,
 			    unsigned red,
 			    unsigned green,
@@ -485,20 +527,21 @@ int drm_fb_helper_setcolreg(unsigned regno,
 		}
 
 		if (regno < 16) {
+			u32 *pal = fb->pseudo_palette;
 			switch (fb->depth) {
 			case 15:
-				fb->pseudo_palette[regno] = ((red & 0xf800) >> 1) |
+				pal[regno] = ((red & 0xf800) >> 1) |
 					((green & 0xf800) >>  6) |
 					((blue & 0xf800) >> 11);
 				break;
 			case 16:
-				fb->pseudo_palette[regno] = (red & 0xf800) |
+				pal[regno] = (red & 0xf800) |
 					((green & 0xfc00) >>  5) |
 					((blue  & 0xf800) >> 11);
 				break;
 			case 24:
 			case 32:
-				fb->pseudo_palette[regno] =
+				pal[regno] =
 					(((red >> 8) & 0xff) << info->var.red.offset) |
 					(((green >> 8) & 0xff) << info->var.green.offset) |
 					(((blue >> 8) & 0xff) << info->var.blue.offset);
@@ -851,10 +894,12 @@ void drm_fb_helper_free(struct drm_fb_helper *helper)
 }
 EXPORT_SYMBOL(drm_fb_helper_free);
 
-void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch)
+void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch,
+			    uint32_t depth)
 {
 	info->fix.type = FB_TYPE_PACKED_PIXELS;
-	info->fix.visual = FB_VISUAL_TRUECOLOR;
+	info->fix.visual = depth == 8 ? FB_VISUAL_PSEUDOCOLOR :
+		FB_VISUAL_TRUECOLOR;
 	info->fix.type_aux = 0;
 	info->fix.xpanstep = 1; /* doing it in hw */
 	info->fix.ypanstep = 1; /* doing it in hw */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 155719ff99d1..a840cb1bd36a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3513,6 +3513,7 @@ static const struct drm_crtc_helper_funcs intel_helper_funcs = {
 	.mode_set_base = intel_pipe_set_base,
 	.prepare = intel_crtc_prepare,
 	.commit = intel_crtc_commit,
+	.load_lut = intel_crtc_load_lut,
 };
 
 static const struct drm_crtc_funcs intel_crtc_funcs = {
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index e85d7e9eed7d..3ee8db1fbcd0 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -60,6 +60,7 @@ static struct fb_ops intelfb_ops = {
 	.fb_imageblit = cfb_imageblit,
 	.fb_pan_display = drm_fb_helper_pan_display,
 	.fb_blank = drm_fb_helper_blank,
+	.fb_setcmap = drm_fb_helper_setcmap,
 };
 
 static struct drm_fb_helper_funcs intel_fb_helper_funcs = {
@@ -206,7 +207,7 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 
 //	memset(info->screen_base, 0, size);
 
-	drm_fb_helper_fill_fix(info, fb->pitch);
+	drm_fb_helper_fill_fix(info, fb->pitch, fb->depth);
 	drm_fb_helper_fill_var(info, fb, fb_width, fb_height);
 
 	/* FIXME: we really shouldn't expose mmio space at all */
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 6a015929deee..14fa9701aeb3 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -733,6 +733,7 @@ static const struct drm_crtc_helper_funcs atombios_helper_funcs = {
 	.mode_set_base = atombios_crtc_set_base,
 	.prepare = atombios_crtc_prepare,
 	.commit = atombios_crtc_commit,
+	.load_lut = radeon_crtc_load_lut,
 };
 
 void radeon_atombios_init_crtc(struct drm_device *dev,
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 325e40b5e0b6..c32f44096fec 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -55,6 +55,7 @@ static struct fb_ops radeonfb_ops = {
 	.fb_imageblit = cfb_imageblit,
 	.fb_pan_display = drm_fb_helper_pan_display,
 	.fb_blank = drm_fb_helper_blank,
+	.fb_setcmap = drm_fb_helper_setcmap,
 };
 
 /**
@@ -239,7 +240,7 @@ int radeonfb_create(struct drm_device *dev,
 
 	strcpy(info->fix.id, "radeondrmfb");
 
-	drm_fb_helper_fill_fix(info, fb->pitch);
+	drm_fb_helper_fill_fix(info, fb->pitch, fb->depth);
 
 	info->flags = FBINFO_DEFAULT;
 	info->fbops = &radeonfb_ops;
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 2b997a15fb1f..36410f85d705 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -1053,6 +1053,7 @@ static const struct drm_crtc_helper_funcs legacy_helper_funcs = {
 	.mode_set_base = radeon_crtc_set_base,
 	.prepare = radeon_crtc_prepare,
 	.commit = radeon_crtc_commit,
+	.load_lut = radeon_crtc_load_lut,
 };
 
 
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index ef47dfd8e5e9..b29e20168b5f 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -61,6 +61,9 @@ struct drm_crtc_helper_funcs {
 	/* Move the crtc on the current fb to the given position *optional* */
 	int (*mode_set_base)(struct drm_crtc *crtc, int x, int y,
 			     struct drm_framebuffer *old_fb);
+
+	/* reload the current crtc LUT */
+	void (*load_lut)(struct drm_crtc *crtc);
 };
 
 struct drm_encoder_helper_funcs {
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 4aa5740ce59f..f1ed08559fc7 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -98,9 +98,11 @@ int drm_fb_helper_setcolreg(unsigned regno,
 void drm_fb_helper_restore(void);
 void drm_fb_helper_fill_var(struct fb_info *info, struct drm_framebuffer *fb,
 			    uint32_t fb_width, uint32_t fb_height);
-void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch);
+void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch,
+			    uint32_t depth);
 
 int drm_fb_helper_add_connector(struct drm_connector *connector);
 int drm_fb_helper_parse_command_line(struct drm_device *dev);
+int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info);
 
 #endif
-- 
cgit v1.2.3


From 9c501935a3cdcf6b1d35aaee3aa11c7a7051a305 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 5 Oct 2009 00:24:36 -0700
Subject: net: Support inclusion of <linux/socket.h> before <sys/socket.h>

The following user-space program fails to compile:

    #include <linux/socket.h>
    #include <sys/socket.h>
    int main() { return 0; }

The reason is that <linux/socket.h> tests __GLIBC__ to decide whether it
should define various structures and macros that are now defined for
user-space by <sys/socket.h>, but __GLIBC__ is not defined if no libc
headers have yet been included.

It seems safe to drop support for libc 5 now.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Bastian Blank <waldi@debian.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 3b461dffe244..3273a0c5043b 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -16,7 +16,7 @@ struct __kernel_sockaddr_storage {
 				/* _SS_MAXSIZE value minus size of ss_family */
 } __attribute__ ((aligned(_K_SS_ALIGNSIZE)));	/* force desired alignment */
 
-#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
+#ifdef __KERNEL__
 
 #include <asm/socket.h>			/* arch-dependent defines	*/
 #include <linux/sockios.h>		/* the SIOCxxx I/O controls	*/
@@ -100,21 +100,6 @@ struct cmsghdr {
 			     ((mhdr)->msg_controllen - \
 			      ((char *)(cmsg) - (char *)(mhdr)->msg_control)))
 
-/*
- *	This mess will go away with glibc
- */
- 
-#ifdef __KERNEL__
-#define __KINLINE static inline
-#elif  defined(__GNUC__) 
-#define __KINLINE static __inline__
-#elif defined(__cplusplus)
-#define __KINLINE static inline
-#else
-#define __KINLINE static
-#endif
-
-
 /*
  *	Get the next cmsg header
  *
@@ -128,7 +113,7 @@ struct cmsghdr {
  *	ancillary object DATA.				--ANK (980731)
  */
  
-__KINLINE struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size,
+static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size,
 					       struct cmsghdr *__cmsg)
 {
 	struct cmsghdr * __ptr;
@@ -140,7 +125,7 @@ __KINLINE struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size,
 	return __ptr;
 }
 
-__KINLINE struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg)
+static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg)
 {
 	return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
 }
-- 
cgit v1.2.3


From 23e018a1b083ecb4b8bb2fb43d58e7c19b5d7959 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 5 Oct 2009 08:52:35 +0200
Subject: block: get rid of kblock_schedule_delayed_work()

It was briefly introduced to allow CFQ to to delayed scheduling,
but we ended up removing that feature again. So lets kill the
function and export, and just switch CFQ back to the normal work
schedule since it is now passing in a '0' delay from all call
sites.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       |  8 --------
 block/cfq-iosched.c    | 24 +++++++++++-------------
 include/linux/blkdev.h |  4 ----
 3 files changed, 11 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 81f34311659a..73ecbed02605 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2492,14 +2492,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 
-int kblockd_schedule_delayed_work(struct request_queue *q,
-				  struct delayed_work *work,
-				  unsigned long delay)
-{
-	return queue_delayed_work(kblockd_workqueue, work, delay);
-}
-EXPORT_SYMBOL(kblockd_schedule_delayed_work);
-
 int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ae14cbaf9d0e..690ebd96dc42 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -150,7 +150,7 @@ struct cfq_data {
 	 * idle window management
 	 */
 	struct timer_list idle_slice_timer;
-	struct delayed_work unplug_work;
+	struct work_struct unplug_work;
 
 	struct cfq_queue *active_queue;
 	struct cfq_io_context *active_cic;
@@ -268,13 +268,11 @@ static inline int cfq_bio_sync(struct bio *bio)
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
  */
-static inline void cfq_schedule_dispatch(struct cfq_data *cfqd,
-					 unsigned long delay)
+static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
 {
 	if (cfqd->busy_queues) {
 		cfq_log(cfqd, "schedule dispatch");
-		kblockd_schedule_delayed_work(cfqd->queue, &cfqd->unplug_work,
-						delay);
+		kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
 	}
 }
 
@@ -1400,7 +1398,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 
 	if (unlikely(cfqd->active_queue == cfqq)) {
 		__cfq_slice_expired(cfqd, cfqq, 0);
-		cfq_schedule_dispatch(cfqd, 0);
+		cfq_schedule_dispatch(cfqd);
 	}
 
 	kmem_cache_free(cfq_pool, cfqq);
@@ -1495,7 +1493,7 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	if (unlikely(cfqq == cfqd->active_queue)) {
 		__cfq_slice_expired(cfqd, cfqq, 0);
-		cfq_schedule_dispatch(cfqd, 0);
+		cfq_schedule_dispatch(cfqd);
 	}
 
 	cfq_put_queue(cfqq);
@@ -2213,7 +2211,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 	}
 
 	if (!rq_in_driver(cfqd))
-		cfq_schedule_dispatch(cfqd, 0);
+		cfq_schedule_dispatch(cfqd);
 }
 
 /*
@@ -2343,7 +2341,7 @@ queue_fail:
 	if (cic)
 		put_io_context(cic->ioc);
 
-	cfq_schedule_dispatch(cfqd, 0);
+	cfq_schedule_dispatch(cfqd);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	cfq_log(cfqd, "set_request fail");
 	return 1;
@@ -2352,7 +2350,7 @@ queue_fail:
 static void cfq_kick_queue(struct work_struct *work)
 {
 	struct cfq_data *cfqd =
-		container_of(work, struct cfq_data, unplug_work.work);
+		container_of(work, struct cfq_data, unplug_work);
 	struct request_queue *q = cfqd->queue;
 
 	spin_lock_irq(q->queue_lock);
@@ -2406,7 +2404,7 @@ static void cfq_idle_slice_timer(unsigned long data)
 expire:
 	cfq_slice_expired(cfqd, timed_out);
 out_kick:
-	cfq_schedule_dispatch(cfqd, 0);
+	cfq_schedule_dispatch(cfqd);
 out_cont:
 	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 }
@@ -2414,7 +2412,7 @@ out_cont:
 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
 {
 	del_timer_sync(&cfqd->idle_slice_timer);
-	cancel_delayed_work_sync(&cfqd->unplug_work);
+	cancel_work_sync(&cfqd->unplug_work);
 }
 
 static void cfq_put_async_queues(struct cfq_data *cfqd)
@@ -2496,7 +2494,7 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
 	cfqd->idle_slice_timer.data = (unsigned long) cfqd;
 
-	INIT_DELAYED_WORK(&cfqd->unplug_work, cfq_kick_queue);
+	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
 
 	cfqd->cfq_quantum = cfq_quantum;
 	cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 25119041e034..221cecd86bd3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1172,11 +1172,7 @@ static inline void put_dev_sector(Sector p)
 }
 
 struct work_struct;
-struct delayed_work;
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
-int kblockd_schedule_delayed_work(struct request_queue *q,
-					struct delayed_work *work,
-				 	unsigned long delay);
 
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
-- 
cgit v1.2.3


From 3d76c082907e8f83c5d5c4572f38d53ad8f00c4b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 28 Sep 2009 07:46:32 -0700
Subject: rcu: Clean up code based on review feedback from Josh Triplett, part
 3

Whitespace fixes, updated comments, and trivial code movement.

o	Fix whitespace error in RCU_HEAD_INIT()

o	Move "So where is rcu_write_lock()" comment so that it does
	not come between the rcu_read_unlock() header comment and
	the rcu_read_unlock() definition.

o	Move the module_param statements for blimit, qhimark, and
	qlowmark to immediately follow the corresponding
	definitions.

o	In __rcu_offline_cpu(), move the assignment to rdp_me
	inside the "if" statement, given that rdp_me is not used
	outside of that "if" statement.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <12541491931164-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 14 +++++++-------
 kernel/rcutree.c         | 10 +++++-----
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 70331218e4b4..3ebd0b7bcb08 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -77,7 +77,7 @@ extern int rcu_scheduler_active;
 #error "Unknown RCU implementation specified to kernel configuration"
 #endif
 
-#define RCU_HEAD_INIT 	{ .next = NULL, .func = NULL }
+#define RCU_HEAD_INIT	{ .next = NULL, .func = NULL }
 #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
 #define INIT_RCU_HEAD(ptr) do { \
        (ptr)->next = NULL; (ptr)->func = NULL; \
@@ -129,12 +129,6 @@ static inline void rcu_read_lock(void)
 	rcu_read_acquire();
 }
 
-/**
- * rcu_read_unlock - marks the end of an RCU read-side critical section.
- *
- * See rcu_read_lock() for more information.
- */
-
 /*
  * So where is rcu_write_lock()?  It does not exist, as there is no
  * way for writers to lock out RCU readers.  This is a feature, not
@@ -144,6 +138,12 @@ static inline void rcu_read_lock(void)
  * used as well.  RCU does not care how the writers keep out of each
  * others' way, as long as they do so.
  */
+
+/**
+ * rcu_read_unlock - marks the end of an RCU read-side critical section.
+ *
+ * See rcu_read_lock() for more information.
+ */
 static inline void rcu_read_unlock(void)
 {
 	rcu_read_release();
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 81af59b8dd88..d5597830faf5 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -122,6 +122,10 @@ static int blimit = 10;		/* Maximum callbacks per softirq. */
 static int qhimark = 10000;	/* If this many pending, ignore blimit. */
 static int qlowmark = 100;	/* Once only this many pending, use blimit. */
 
+module_param(blimit, int, 0);
+module_param(qhimark, int, 0);
+module_param(qlowmark, int, 0);
+
 static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
 static int rcu_pending(int cpu);
 
@@ -878,8 +882,8 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 	 * indefinitely delay callbacks, you have far worse things to
 	 * be worrying about.
 	 */
-	rdp_me = rsp->rda[smp_processor_id()];
 	if (rdp->nxtlist != NULL) {
+		rdp_me = rsp->rda[smp_processor_id()];
 		*rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
 		rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
 		rdp->nxtlist = NULL;
@@ -1575,7 +1579,3 @@ void __init __rcu_init(void)
 }
 
 #include "rcutree_plugin.h"
-
-module_param(blimit, int, 0);
-module_param(qhimark, int, 0);
-module_param(qlowmark, int, 0);
-- 
cgit v1.2.3


From b8c00ac5b50b54491657f8b6740db1df50149944 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 6 Oct 2009 13:54:01 +1000
Subject: drm/fb: add more correct 8/16/24/32 bpp fb support.

The previous patches had some unwanted side effects, I've fixed
the lack of 32bpp working, and fixed up 16bpp so it should also work.

this also adds the interface to allow the driver to set a preferred
console depth so for example low memory rn50 can set it to 8bpp.
It also catches 24bpp on cards that can't do it and forces 32bpp.

Tested on r100/r600/i945.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_fb_helper.c         | 95 +++++++++++++++++++++------------
 drivers/gpu/drm/i915/intel_display.c    | 10 ++++
 drivers/gpu/drm/i915/intel_drv.h        |  2 +
 drivers/gpu/drm/i915/intel_fb.c         |  7 ++-
 drivers/gpu/drm/radeon/radeon_display.c | 39 +++++++-------
 drivers/gpu/drm/radeon/radeon_fb.c      |  8 ++-
 drivers/gpu/drm/radeon/radeon_mode.h    |  2 +
 include/drm/drm_fb_helper.h             |  3 ++
 8 files changed, 110 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 3746bd2f0f08..23dc9c115fd9 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -454,6 +454,54 @@ out_free:
 }
 EXPORT_SYMBOL(drm_fb_helper_init_crtc_count);
 
+static void setcolreg(struct drm_crtc *crtc, u16 red, u16 green,
+		     u16 blue, u16 regno, struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_framebuffer *fb = fb_helper->fb;
+	int pindex;
+
+	pindex = regno;
+
+	if (fb->bits_per_pixel == 16) {
+		pindex = regno << 3;
+
+		if (fb->depth == 16 && regno > 63)
+			return;
+		if (fb->depth == 15 && regno > 31)
+			return;
+
+		if (fb->depth == 16) {
+			u16 r, g, b;
+			int i;
+			if (regno < 32) {
+				for (i = 0; i < 8; i++)
+					fb_helper->funcs->gamma_set(crtc, red,
+						green, blue, pindex + i);
+			}
+
+			fb_helper->funcs->gamma_get(crtc, &r,
+						    &g, &b,
+						    pindex >> 1);
+
+			for (i = 0; i < 4; i++)
+				fb_helper->funcs->gamma_set(crtc, r,
+							    green, b,
+							    (pindex >> 1) + i);
+		}
+	}
+
+	if (fb->depth != 16)
+		fb_helper->funcs->gamma_set(crtc, red, green, blue, pindex);
+
+	if (regno < 16 && info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
+		((u32 *) fb->pseudo_palette)[regno] =
+			(regno << info->var.red.offset) |
+			(regno << info->var.green.offset) |
+			(regno << info->var.blue.offset);
+	}
+}
+
 int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info)
 {
 	struct drm_fb_helper *fb_helper = info->par;
@@ -488,7 +536,7 @@ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info)
 			if (transp)
 				htransp = *transp++;
 
-			fb_helper->funcs->gamma_set(crtc, hred, hgreen, hblue, start++);
+			setcolreg(crtc, hred, hgreen, hblue, start++, info);
 		}
 		crtc_funcs->load_lut(crtc);
 	}
@@ -508,9 +556,11 @@ int drm_fb_helper_setcolreg(unsigned regno,
 	struct drm_crtc *crtc;
 	int i;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct drm_framebuffer *fb = fb_helper->fb;
+	if (regno > 255)
+		return 1;
 
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
 		for (i = 0; i < fb_helper->crtc_count; i++) {
 			if (crtc->base.id == fb_helper->crtc_info[i].crtc_id)
 				break;
@@ -518,36 +568,9 @@ int drm_fb_helper_setcolreg(unsigned regno,
 		if (i == fb_helper->crtc_count)
 			continue;
 
-		if (regno > 255)
-			return 1;
-
-		if (fb->depth == 8) {
-			fb_helper->funcs->gamma_set(crtc, red, green, blue, regno);
-			return 0;
-		}
 
-		if (regno < 16) {
-			u32 *pal = fb->pseudo_palette;
-			switch (fb->depth) {
-			case 15:
-				pal[regno] = ((red & 0xf800) >> 1) |
-					((green & 0xf800) >>  6) |
-					((blue & 0xf800) >> 11);
-				break;
-			case 16:
-				pal[regno] = (red & 0xf800) |
-					((green & 0xfc00) >>  5) |
-					((blue  & 0xf800) >> 11);
-				break;
-			case 24:
-			case 32:
-				pal[regno] =
-					(((red >> 8) & 0xff) << info->var.red.offset) |
-					(((green >> 8) & 0xff) << info->var.green.offset) |
-					(((blue >> 8) & 0xff) << info->var.blue.offset);
-				break;
-			}
-		}
+		setcolreg(crtc, red, green, blue, regno, info);
+		crtc_funcs->load_lut(crtc);
 	}
 	return 0;
 }
@@ -717,6 +740,7 @@ int drm_fb_helper_pan_display(struct fb_var_screeninfo *var,
 EXPORT_SYMBOL(drm_fb_helper_pan_display);
 
 int drm_fb_helper_single_fb_probe(struct drm_device *dev,
+				  int preferred_bpp,
 				  int (*fb_create)(struct drm_device *dev,
 						   uint32_t fb_width,
 						   uint32_t fb_height,
@@ -739,6 +763,11 @@ int drm_fb_helper_single_fb_probe(struct drm_device *dev,
 	struct drm_fb_helper *fb_helper;
 	uint32_t surface_depth = 24, surface_bpp = 32;
 
+	/* if driver picks 8 or 16 by default use that
+	   for both depth/bpp */
+	if (preferred_bpp != surface_bpp) {
+		surface_depth = surface_bpp = preferred_bpp;
+	}
 	/* first up get a count of crtcs now in use and new min/maxes width/heights */
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct drm_fb_helper_connector *fb_help_conn = connector->fb_helper_private;
@@ -899,7 +928,7 @@ void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch,
 {
 	info->fix.type = FB_TYPE_PACKED_PIXELS;
 	info->fix.visual = depth == 8 ? FB_VISUAL_PSEUDOCOLOR :
-		FB_VISUAL_TRUECOLOR;
+		FB_VISUAL_DIRECTCOLOR;
 	info->fix.type_aux = 0;
 	info->fix.xpanstep = 1; /* doing it in hw */
 	info->fix.ypanstep = 1; /* doing it in hw */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a840cb1bd36a..893903962e54 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2922,6 +2922,16 @@ void intel_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
 	intel_crtc->lut_b[regno] = blue >> 8;
 }
 
+void intel_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green,
+			     u16 *blue, int regno)
+{
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+	*red = intel_crtc->lut_r[regno] << 8;
+	*green = intel_crtc->lut_g[regno] << 8;
+	*blue = intel_crtc->lut_b[regno] << 8;
+}
+
 static void intel_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
 				 u16 *blue, uint32_t size)
 {
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index b9e47f1e1cc0..aa96b5221358 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -173,6 +173,8 @@ extern int intelfb_resize(struct drm_device *dev, struct drm_crtc *crtc);
 extern void intelfb_restore(void);
 extern void intel_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
 				    u16 blue, int regno);
+extern void intel_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green,
+				    u16 *blue, int regno);
 
 extern int intel_framebuffer_create(struct drm_device *dev,
 				    struct drm_mode_fb_cmd *mode_cmd,
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 3ee8db1fbcd0..2b0fe54cd92c 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -65,6 +65,7 @@ static struct fb_ops intelfb_ops = {
 
 static struct drm_fb_helper_funcs intel_fb_helper_funcs = {
 	.gamma_set = intel_crtc_fb_gamma_set,
+	.gamma_get = intel_crtc_fb_gamma_get,
 };
 
 
@@ -124,6 +125,10 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 	struct device *device = &dev->pdev->dev;
 	int size, ret, mmio_bar = IS_I9XX(dev) ? 0 : 1;
 
+	/* we don't do packed 24bpp */
+	if (surface_bpp == 24)
+		surface_bpp = 32;
+
 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
 
@@ -245,7 +250,7 @@ int intelfb_probe(struct drm_device *dev)
 	int ret;
 
 	DRM_DEBUG("\n");
-	ret = drm_fb_helper_single_fb_probe(dev, intelfb_create);
+	ret = drm_fb_helper_single_fb_probe(dev, 32, intelfb_create);
 	return ret;
 }
 EXPORT_SYMBOL(intelfb_probe);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 44cfcfdf1352..3655d91993a6 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -106,24 +106,33 @@ void radeon_crtc_load_lut(struct drm_crtc *crtc)
 		legacy_crtc_load_lut(crtc);
 }
 
-/** Sets the color ramps on behalf of RandR */
+/** Sets the color ramps on behalf of fbcon */
 void radeon_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
 			      u16 blue, int regno)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 
-	if (regno == 0)
-		DRM_DEBUG("gamma set %d\n", radeon_crtc->crtc_id);
 	radeon_crtc->lut_r[regno] = red >> 6;
 	radeon_crtc->lut_g[regno] = green >> 6;
 	radeon_crtc->lut_b[regno] = blue >> 6;
 }
 
+/** Gets the color ramps on behalf of fbcon */
+void radeon_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green,
+			      u16 *blue, int regno)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+
+	*red = radeon_crtc->lut_r[regno] << 6;
+	*green = radeon_crtc->lut_g[regno] << 6;
+	*blue = radeon_crtc->lut_b[regno] << 6;
+}
+
 static void radeon_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
 				  u16 *blue, uint32_t size)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-	int i, j;
+	int i;
 
 	if (size != 256) {
 		return;
@@ -132,23 +141,11 @@ static void radeon_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
 		return;
 	}
 
-	if (crtc->fb->depth == 16) {
-		for (i = 0; i < 64; i++) {
-			if (i <= 31) {
-				for (j = 0; j < 8; j++) {
-					radeon_crtc->lut_r[i * 8 + j] = red[i] >> 6;
-					radeon_crtc->lut_b[i * 8 + j] = blue[i] >> 6;
-				}
-			}
-			for (j = 0; j < 4; j++)
-				radeon_crtc->lut_g[i * 4 + j] = green[i] >> 6;
-		}
-	} else {
-		for (i = 0; i < 256; i++) {
-			radeon_crtc->lut_r[i] = red[i] >> 6;
-			radeon_crtc->lut_g[i] = green[i] >> 6;
-			radeon_crtc->lut_b[i] = blue[i] >> 6;
-		}
+	/* userspace palettes are always correct as is */
+	for (i = 0; i < 256; i++) {
+		radeon_crtc->lut_r[i] = red[i] >> 6;
+		radeon_crtc->lut_g[i] = green[i] >> 6;
+		radeon_crtc->lut_b[i] = blue[i] >> 6;
 	}
 
 	radeon_crtc_load_lut(crtc);
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index c32f44096fec..b38c4c8e2c61 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -124,6 +124,7 @@ static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp, bo
 
 static struct drm_fb_helper_funcs radeon_fb_helper_funcs = {
 	.gamma_set = radeon_crtc_fb_gamma_set,
+	.gamma_get = radeon_crtc_fb_gamma_get,
 };
 
 int radeonfb_create(struct drm_device *dev,
@@ -151,6 +152,11 @@ int radeonfb_create(struct drm_device *dev,
 
 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
+
+	/* avivo can't scanout real 24bpp */
+	if ((surface_bpp == 24) && ASIC_IS_AVIVO(rdev))
+		surface_bpp = 32;
+
 	mode_cmd.bpp = surface_bpp;
 	/* need to align pitch with crtc limits */
 	mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp, fb_tiled) * ((mode_cmd.bpp + 1) / 8);
@@ -315,7 +321,7 @@ int radeon_parse_options(char *options)
 
 int radeonfb_probe(struct drm_device *dev)
 {
-	return drm_fb_helper_single_fb_probe(dev, &radeonfb_create);
+	return drm_fb_helper_single_fb_probe(dev, 32, &radeonfb_create);
 }
 
 int radeonfb_remove(struct drm_device *dev, struct drm_framebuffer *fb)
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 570a58729daf..e61226817ccf 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -407,6 +407,8 @@ extern void
 radeon_combios_encoder_dpms_scratch_regs(struct drm_encoder *encoder, bool on);
 extern void radeon_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
 				     u16 blue, int regno);
+extern void radeon_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green,
+				     u16 *blue, int regno);
 struct drm_framebuffer *radeon_framebuffer_create(struct drm_device *dev,
 						  struct drm_mode_fb_cmd *mode_cmd,
 						  struct drm_gem_object *obj);
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index f1ed08559fc7..58c892a2cbfa 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -39,6 +39,8 @@ struct drm_fb_helper_crtc {
 struct drm_fb_helper_funcs {
 	void (*gamma_set)(struct drm_crtc *crtc, u16 red, u16 green,
 			  u16 blue, int regno);
+	void (*gamma_get)(struct drm_crtc *crtc, u16 *red, u16 *green,
+			  u16 *blue, int regno);
 };
 
 /* mode specified on the command line */
@@ -71,6 +73,7 @@ struct drm_fb_helper {
 };
 
 int drm_fb_helper_single_fb_probe(struct drm_device *dev,
+				  int preferred_bpp,
 				  int (*fb_create)(struct drm_device *dev,
 						   uint32_t fb_width,
 						   uint32_t fb_height,
-- 
cgit v1.2.3


From f1bce7f80e3b400cf29787b0afa9c3042b959017 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 16 Sep 2009 04:16:04 +0900
Subject: libata: cosmetic updates

We're about to add more SATA_* and ATA_ACPI_FILTER_* constants.
Reformat them in preparation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-acpi.c | 10 ----------
 include/linux/ata.h       |  6 +++---
 include/linux/libata.h    |  9 +++++++++
 3 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 01964b6e6f6b..719e7d237dc8 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -20,16 +20,6 @@
 
 #include <acpi/acpi_bus.h>
 
-enum {
-	ATA_ACPI_FILTER_SETXFER	= 1 << 0,
-	ATA_ACPI_FILTER_LOCK	= 1 << 1,
-	ATA_ACPI_FILTER_DIPM	= 1 << 2,
-
-	ATA_ACPI_FILTER_DEFAULT	= ATA_ACPI_FILTER_SETXFER |
-				  ATA_ACPI_FILTER_LOCK |
-				  ATA_ACPI_FILTER_DIPM,
-};
-
 static unsigned int ata_acpi_gtf_filter = ATA_ACPI_FILTER_DEFAULT;
 module_param_named(acpi_gtf_filter, ata_acpi_gtf_filter, int, 0644);
 MODULE_PARM_DESC(acpi_gtf_filter, "filter mask for ACPI _GTF commands, set to filter out (0x1=set xfermode, 0x2=lock/freeze lock, 0x4=DIPM)");
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 6299a259ed19..7c5beafa972c 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -334,9 +334,9 @@ enum {
 	SETFEATURES_SATA_DISABLE = 0x90, /* Disable use of SATA feature */
 
 	/* SETFEATURE Sector counts for SATA features */
-	SATA_AN			= 0x05,  /* Asynchronous Notification */
-	SATA_DIPM		= 0x03,  /* Device Initiated Power Management */
-	SATA_FPDMA_AA		= 0x02,  /* DMA Setup FIS Auto-Activate */
+	SATA_FPDMA_AA		= 0x02, /* FPDMA Setup FIS Auto-Activate */
+	SATA_DIPM		= 0x03,	/* Device Initiated Power Management */
+	SATA_AN			= 0x05,	/* Asynchronous Notification */
 
 	/* feature values for SET_MAX */
 	ATA_SET_MAX_ADDR	= 0x00,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 76319bf03e37..5b2f7491fb26 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -418,6 +418,15 @@ enum {
 				  ATA_TIMING_ACTIVE | ATA_TIMING_RECOVER |
 				  ATA_TIMING_DMACK_HOLD | ATA_TIMING_CYCLE |
 				  ATA_TIMING_UDMA,
+
+	/* ACPI constants */
+	ATA_ACPI_FILTER_SETXFER	= 1 << 0,
+	ATA_ACPI_FILTER_LOCK	= 1 << 1,
+	ATA_ACPI_FILTER_DIPM	= 1 << 2,
+
+	ATA_ACPI_FILTER_DEFAULT	= ATA_ACPI_FILTER_SETXFER |
+				  ATA_ACPI_FILTER_LOCK |
+				  ATA_ACPI_FILTER_DIPM,
 };
 
 enum ata_xfer_mask {
-- 
cgit v1.2.3


From fa5b561c4ea170caf9759109acc2e961a7e83bea Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 16 Sep 2009 04:17:02 +0900
Subject: libata: implement more acpi filtering options

Currently libata-acpi can only filter DIPM among SATA feature enables
via _GTF.  This patch adds the capability to filter out FPDMA non-zero
offset, in-order guarantee and auto-activation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-acpi.c | 19 +++++++++++++++----
 include/linux/ata.h       |  3 +++
 include/linux/libata.h    |  2 ++
 3 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 719e7d237dc8..960c6a7caa83 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -22,7 +22,7 @@
 
 static unsigned int ata_acpi_gtf_filter = ATA_ACPI_FILTER_DEFAULT;
 module_param_named(acpi_gtf_filter, ata_acpi_gtf_filter, int, 0644);
-MODULE_PARM_DESC(acpi_gtf_filter, "filter mask for ACPI _GTF commands, set to filter out (0x1=set xfermode, 0x2=lock/freeze lock, 0x4=DIPM)");
+MODULE_PARM_DESC(acpi_gtf_filter, "filter mask for ACPI _GTF commands, set to filter out (0x1=set xfermode, 0x2=lock/freeze lock, 0x4=DIPM, 0x8=FPDMA non-zero offset, 0x10=FPDMA DMA Setup FIS auto-activate)");
 
 #define NO_PORT_MULT		0xffff
 #define SATA_ADR(root, pmp)	(((root) << 16) | (pmp))
@@ -637,12 +637,23 @@ static int ata_acpi_filter_tf(const struct ata_taskfile *tf,
 			return 1;
 	}
 
-	if (ata_acpi_gtf_filter & ATA_ACPI_FILTER_DIPM) {
+	if (tf->command == ATA_CMD_SET_FEATURES &&
+	    tf->feature == SETFEATURES_SATA_ENABLE) {
 		/* inhibit enabling DIPM */
-		if (tf->command == ATA_CMD_SET_FEATURES &&
-		    tf->feature == SETFEATURES_SATA_ENABLE &&
+		if (ata_acpi_gtf_filter & ATA_ACPI_FILTER_DIPM &&
 		    tf->nsect == SATA_DIPM)
 			return 1;
+
+		/* inhibit FPDMA non-zero offset */
+		if (ata_acpi_gtf_filter & ATA_ACPI_FILTER_FPDMA_OFFSET &&
+		    (tf->nsect == SATA_FPDMA_OFFSET ||
+		     tf->nsect == SATA_FPDMA_IN_ORDER))
+			return 1;
+
+		/* inhibit FPDMA auto activation */
+		if (ata_acpi_gtf_filter & ATA_ACPI_FILTER_FPDMA_AA &&
+		    tf->nsect == SATA_FPDMA_AA)
+			return 1;
 	}
 
 	return 0;
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 7c5beafa972c..4fb357312b3b 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -334,9 +334,12 @@ enum {
 	SETFEATURES_SATA_DISABLE = 0x90, /* Disable use of SATA feature */
 
 	/* SETFEATURE Sector counts for SATA features */
+	SATA_FPDMA_OFFSET	= 0x01,	/* FPDMA non-zero buffer offsets */
 	SATA_FPDMA_AA		= 0x02, /* FPDMA Setup FIS Auto-Activate */
 	SATA_DIPM		= 0x03,	/* Device Initiated Power Management */
+	SATA_FPDMA_IN_ORDER	= 0x04,	/* FPDMA in-order data delivery */
 	SATA_AN			= 0x05,	/* Asynchronous Notification */
+	SATA_SSP		= 0x06,	/* Software Settings Preservation */
 
 	/* feature values for SET_MAX */
 	ATA_SET_MAX_ADDR	= 0x00,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5b2f7491fb26..aa52794d2a03 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -423,6 +423,8 @@ enum {
 	ATA_ACPI_FILTER_SETXFER	= 1 << 0,
 	ATA_ACPI_FILTER_LOCK	= 1 << 1,
 	ATA_ACPI_FILTER_DIPM	= 1 << 2,
+	ATA_ACPI_FILTER_FPDMA_OFFSET = 1 << 3,	/* FPDMA non-zero offset */
+	ATA_ACPI_FILTER_FPDMA_AA = 1 << 4,	/* FPDMA auto activate */
 
 	ATA_ACPI_FILTER_DEFAULT	= ATA_ACPI_FILTER_SETXFER |
 				  ATA_ACPI_FILTER_LOCK |
-- 
cgit v1.2.3


From 110f66d25c33c2259b1125255fa7063ab07b8340 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 16 Sep 2009 04:17:28 +0900
Subject: libata: make gtf_filter per-dev

Add ->gtf_filter to ata_device and set it to ata_acpi_gtf_filter when
initializing ata_link.  This is to allow quirks which apply different
gtf filters.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-acpi.c | 17 +++++++++--------
 drivers/ata/libata-core.c |  3 +++
 drivers/ata/libata.h      |  2 ++
 include/linux/libata.h    |  1 +
 4 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 960c6a7caa83..b0882cddfd4c 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -20,7 +20,7 @@
 
 #include <acpi/acpi_bus.h>
 
-static unsigned int ata_acpi_gtf_filter = ATA_ACPI_FILTER_DEFAULT;
+unsigned int ata_acpi_gtf_filter = ATA_ACPI_FILTER_DEFAULT;
 module_param_named(acpi_gtf_filter, ata_acpi_gtf_filter, int, 0644);
 MODULE_PARM_DESC(acpi_gtf_filter, "filter mask for ACPI _GTF commands, set to filter out (0x1=set xfermode, 0x2=lock/freeze lock, 0x4=DIPM, 0x8=FPDMA non-zero offset, 0x10=FPDMA DMA Setup FIS auto-activate)");
 
@@ -603,10 +603,11 @@ static void ata_acpi_gtf_to_tf(struct ata_device *dev,
 	tf->command = gtf->tf[6];	/* 0x1f7 */
 }
 
-static int ata_acpi_filter_tf(const struct ata_taskfile *tf,
+static int ata_acpi_filter_tf(struct ata_device *dev,
+			      const struct ata_taskfile *tf,
 			      const struct ata_taskfile *ptf)
 {
-	if (ata_acpi_gtf_filter & ATA_ACPI_FILTER_SETXFER) {
+	if (dev->gtf_filter & ATA_ACPI_FILTER_SETXFER) {
 		/* libata doesn't use ACPI to configure transfer mode.
 		 * It will only confuse device configuration.  Skip.
 		 */
@@ -615,7 +616,7 @@ static int ata_acpi_filter_tf(const struct ata_taskfile *tf,
 			return 1;
 	}
 
-	if (ata_acpi_gtf_filter & ATA_ACPI_FILTER_LOCK) {
+	if (dev->gtf_filter & ATA_ACPI_FILTER_LOCK) {
 		/* BIOS writers, sorry but we don't wanna lock
 		 * features unless the user explicitly said so.
 		 */
@@ -640,18 +641,18 @@ static int ata_acpi_filter_tf(const struct ata_taskfile *tf,
 	if (tf->command == ATA_CMD_SET_FEATURES &&
 	    tf->feature == SETFEATURES_SATA_ENABLE) {
 		/* inhibit enabling DIPM */
-		if (ata_acpi_gtf_filter & ATA_ACPI_FILTER_DIPM &&
+		if (dev->gtf_filter & ATA_ACPI_FILTER_DIPM &&
 		    tf->nsect == SATA_DIPM)
 			return 1;
 
 		/* inhibit FPDMA non-zero offset */
-		if (ata_acpi_gtf_filter & ATA_ACPI_FILTER_FPDMA_OFFSET &&
+		if (dev->gtf_filter & ATA_ACPI_FILTER_FPDMA_OFFSET &&
 		    (tf->nsect == SATA_FPDMA_OFFSET ||
 		     tf->nsect == SATA_FPDMA_IN_ORDER))
 			return 1;
 
 		/* inhibit FPDMA auto activation */
-		if (ata_acpi_gtf_filter & ATA_ACPI_FILTER_FPDMA_AA &&
+		if (dev->gtf_filter & ATA_ACPI_FILTER_FPDMA_AA &&
 		    tf->nsect == SATA_FPDMA_AA)
 			return 1;
 	}
@@ -705,7 +706,7 @@ static int ata_acpi_run_tf(struct ata_device *dev,
 		pptf = &ptf;
 	}
 
-	if (!ata_acpi_filter_tf(&tf, pptf)) {
+	if (!ata_acpi_filter_tf(dev, &tf, pptf)) {
 		rtf = tf;
 		err_mask = ata_exec_internal(dev, &rtf, NULL,
 					     DMA_NONE, NULL, 0, 0);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 0ddaf43d68c6..b525a0981348 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5591,6 +5591,9 @@ void ata_link_init(struct ata_port *ap, struct ata_link *link, int pmp)
 
 		dev->link = link;
 		dev->devno = dev - link->device;
+#ifdef CONFIG_ATA_ACPI
+		dev->gtf_filter = ata_acpi_gtf_filter;
+#endif
 		ata_dev_init(dev);
 	}
 }
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index be8e2628f82c..823e63096362 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -118,6 +118,8 @@ extern void ata_lpm_schedule(struct ata_port *ap, enum link_pm);
 
 /* libata-acpi.c */
 #ifdef CONFIG_ATA_ACPI
+extern unsigned int ata_acpi_gtf_filter;
+
 extern void ata_acpi_associate_sata_port(struct ata_port *ap);
 extern void ata_acpi_associate(struct ata_host *host);
 extern void ata_acpi_dissociate(struct ata_host *host);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index aa52794d2a03..87698640c091 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -598,6 +598,7 @@ struct ata_device {
 #ifdef CONFIG_ATA_ACPI
 	acpi_handle		acpi_handle;
 	union acpi_object	*gtf_cache;
+	unsigned int		gtf_filter;
 #endif
 	/* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */
 	u64			n_sectors;	/* size of device, if ATA */
-- 
cgit v1.2.3


From acf442dc560437858e6a4c904678052616f8226e Mon Sep 17 00:00:00 2001
From: Amit Kucheria <amit.kucheria@verdurent.com>
Date: Mon, 5 Oct 2009 21:43:44 -0700
Subject: Input: fix rx51 board keymap

The original driver was written with the KEY() macro defined as (col,
row) instead of (row, col) as defined by the matrix keypad
infrastructure. So the keymap was defined accordingly. Since the
driver that was merged upstream uses the matrix keypad infrastructure,
modify the keymap accordingly.

While we are at it, fix the comments in twl4030.h and define
PERSISTENT_KEY as (r,c) instead of (c, r)

Tested on a RX51 (N900) device.

Signed-off-by: Amit Kucheria <amit.kucheria@verdurent.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 arch/arm/mach-omap2/board-rx51-peripherals.c | 78 ++++++++++++++--------------
 include/linux/i2c/twl4030.h                  |  6 +--
 2 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index 56d931a425f7..b5ce7a079995 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -37,49 +37,49 @@
 
 static int rx51_keymap[] = {
 	KEY(0, 0, KEY_Q),
-	KEY(0, 1, KEY_W),
-	KEY(0, 2, KEY_E),
-	KEY(0, 3, KEY_R),
-	KEY(0, 4, KEY_T),
-	KEY(0, 5, KEY_Y),
-	KEY(0, 6, KEY_U),
-	KEY(0, 7, KEY_I),
-	KEY(1, 0, KEY_O),
+	KEY(0, 1, KEY_O),
+	KEY(0, 2, KEY_P),
+	KEY(0, 3, KEY_COMMA),
+	KEY(0, 4, KEY_BACKSPACE),
+	KEY(0, 6, KEY_A),
+	KEY(0, 7, KEY_S),
+	KEY(1, 0, KEY_W),
 	KEY(1, 1, KEY_D),
-	KEY(1, 2, KEY_DOT),
-	KEY(1, 3, KEY_V),
-	KEY(1, 4, KEY_DOWN),
-	KEY(2, 0, KEY_P),
-	KEY(2, 1, KEY_F),
+	KEY(1, 2, KEY_F),
+	KEY(1, 3, KEY_G),
+	KEY(1, 4, KEY_H),
+	KEY(1, 5, KEY_J),
+	KEY(1, 6, KEY_K),
+	KEY(1, 7, KEY_L),
+	KEY(2, 0, KEY_E),
+	KEY(2, 1, KEY_DOT),
 	KEY(2, 2, KEY_UP),
-	KEY(2, 3, KEY_B),
-	KEY(2, 4, KEY_RIGHT),
-	KEY(3, 0, KEY_COMMA),
-	KEY(3, 1, KEY_G),
-	KEY(3, 2, KEY_ENTER),
+	KEY(2, 3, KEY_ENTER),
+	KEY(2, 5, KEY_Z),
+	KEY(2, 6, KEY_X),
+	KEY(2, 7, KEY_C),
+	KEY(3, 0, KEY_R),
+	KEY(3, 1, KEY_V),
+	KEY(3, 2, KEY_B),
 	KEY(3, 3, KEY_N),
-	KEY(4, 0, KEY_BACKSPACE),
-	KEY(4, 1, KEY_H),
-	KEY(4, 3, KEY_M),
+	KEY(3, 4, KEY_M),
+	KEY(3, 5, KEY_SPACE),
+	KEY(3, 6, KEY_SPACE),
+	KEY(3, 7, KEY_LEFT),
+	KEY(4, 0, KEY_T),
+	KEY(4, 1, KEY_DOWN),
+	KEY(4, 2, KEY_RIGHT),
 	KEY(4, 4, KEY_LEFTCTRL),
-	KEY(5, 1, KEY_J),
-	KEY(5, 2, KEY_Z),
-	KEY(5, 3, KEY_SPACE),
-	KEY(5, 4, KEY_LEFTSHIFT),
-	KEY(6, 0, KEY_A),
-	KEY(6, 1, KEY_K),
-	KEY(6, 2, KEY_X),
-	KEY(6, 3, KEY_SPACE),
-	KEY(6, 4, KEY_FN),
-	KEY(7, 0, KEY_S),
-	KEY(7, 1, KEY_L),
-	KEY(7, 2, KEY_C),
-	KEY(7, 3, KEY_LEFT),
-	KEY(0xff, 0, KEY_F6),
-	KEY(0xff, 1, KEY_F7),
-	KEY(0xff, 2, KEY_F8),
-	KEY(0xff, 4, KEY_F9),
-	KEY(0xff, 5, KEY_F10),
+	KEY(4, 5, KEY_RIGHTALT),
+	KEY(4, 6, KEY_LEFTSHIFT),
+	KEY(5, 0, KEY_Y),
+	KEY(6, 0, KEY_U),
+	KEY(7, 0, KEY_I),
+	KEY(7, 1, KEY_F7),
+	KEY(7, 2, KEY_F8),
+	KEY(0xff, 2, KEY_F9),
+	KEY(0xff, 4, KEY_F10),
+	KEY(0xff, 5, KEY_F11),
 };
 
 static struct twl4030_keypad_data rx51_kp_data = {
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index 3fd21d7cb6bf..007572536ea9 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -305,11 +305,11 @@ struct twl4030_madc_platform_data {
 	int		irq_line;
 };
 
-/* Boards have uniqe mappings of {col, row} --> keycode.
- * Column and row are 4 bits, but range only from 0..7.
+/* Boards have uniqe mappings of {row, col} --> keycode.
+ * Column and row are 8 bits each, but range only from 0..7.
  * a PERSISTENT_KEY is "always on" and never reported.
  */
-#define PERSISTENT_KEY(c, r)	KEY((c), (r), KEY_RESERVED)
+#define PERSISTENT_KEY(r, c)	KEY((r), (c), KEY_RESERVED)
 
 struct twl4030_keypad_data {
 	const struct matrix_keymap_data *keymap_data;
-- 
cgit v1.2.3


From ea2a4d3a3a929ef494952bba57a0ef1a8a877881 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 6 Oct 2009 10:34:13 +0200
Subject: [S390] 64-bit register support for 31-bit processes

From: Heiko Carstens <heiko.carstens@de.ibm.com>
From: Martin Schwidefsky <schwidefsky@de.ibm.com>

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/elf.h      | 12 +++++++
 arch/s390/include/asm/ptrace.h   |  4 +++
 arch/s390/include/asm/ucontext.h | 15 +++++++++
 arch/s390/kernel/compat_signal.c | 35 +++++++++++++++++++-
 arch/s390/kernel/ptrace.c        | 70 ++++++++++++++++++++++++++++++++++++++++
 arch/s390/kernel/setup.c         | 15 +++++++--
 include/linux/elf.h              |  1 +
 7 files changed, 148 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 74d0bbb7d955..e885442c1dfe 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -92,6 +92,18 @@
 /* Keep this the last entry.  */
 #define R_390_NUM	61
 
+/* Bits present in AT_HWCAP. */
+#define HWCAP_S390_ESAN3	1
+#define HWCAP_S390_ZARCH	2
+#define HWCAP_S390_STFLE	4
+#define HWCAP_S390_MSA		8
+#define HWCAP_S390_LDISP	16
+#define HWCAP_S390_EIMM		32
+#define HWCAP_S390_DFP		64
+#define HWCAP_S390_HPAGE	128
+#define HWCAP_S390_ETF3EH	256
+#define HWCAP_S390_HIGH_GPRS	512
+
 /*
  * These are used to set parameters in the core dumps.
  */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 539263fc9ab9..95dcf183a28d 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -311,6 +311,10 @@ typedef struct
 	__u32		orig_gpr2;
 } s390_compat_regs;
 
+typedef struct
+{
+	__u32		gprs_high[NUM_GPRS];
+} s390_compat_regs_high;
 
 #ifdef __KERNEL__
 
diff --git a/arch/s390/include/asm/ucontext.h b/arch/s390/include/asm/ucontext.h
index d69bec0b03f5..cfb874e66c9a 100644
--- a/arch/s390/include/asm/ucontext.h
+++ b/arch/s390/include/asm/ucontext.h
@@ -9,6 +9,21 @@
 #ifndef _ASM_S390_UCONTEXT_H
 #define _ASM_S390_UCONTEXT_H
 
+#define UC_EXTENDED	0x00000001
+
+#ifndef __s390x__
+
+struct ucontext_extended {
+	unsigned long	  uc_flags;
+	struct ucontext  *uc_link;
+	stack_t		  uc_stack;
+	_sigregs	  uc_mcontext;
+	unsigned long	  uc_sigmask[2];
+	unsigned long	  uc_gprs_high[16];
+};
+
+#endif
+
 struct ucontext {
 	unsigned long	  uc_flags;
 	struct ucontext  *uc_link;
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index b537cb0e9b55..eee999853a7c 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -39,6 +39,7 @@ typedef struct
 	struct sigcontext32 sc;
 	_sigregs32 sregs;
 	int signo;
+	__u32 gprs_high[NUM_GPRS];
 	__u8 retcode[S390_SYSCALL_SIZE];
 } sigframe32;
 
@@ -48,6 +49,7 @@ typedef struct
 	__u8 retcode[S390_SYSCALL_SIZE];
 	compat_siginfo_t info;
 	struct ucontext32 uc;
+	__u32 gprs_high[NUM_GPRS];
 } rt_sigframe32;
 
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
@@ -344,6 +346,30 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 	return 0;
 }
 
+static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs)
+{
+	__u32 gprs_high[NUM_GPRS];
+	int i;
+
+	for (i = 0; i < NUM_GPRS; i++)
+		gprs_high[i] = regs->gprs[i] >> 32;
+
+	return __copy_to_user(uregs, &gprs_high, sizeof(gprs_high));
+}
+
+static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs)
+{
+	__u32 gprs_high[NUM_GPRS];
+	int err, i;
+
+	err = __copy_from_user(&gprs_high, uregs, sizeof(gprs_high));
+	if (err)
+		return err;
+	for (i = 0; i < NUM_GPRS; i++)
+		*(__u32 *)&regs->gprs[i] = gprs_high[i];
+	return 0;
+}
+
 asmlinkage long sys32_sigreturn(void)
 {
 	struct pt_regs *regs = task_pt_regs(current);
@@ -363,6 +389,8 @@ asmlinkage long sys32_sigreturn(void)
 
 	if (restore_sigregs32(regs, &frame->sregs))
 		goto badframe;
+	if (restore_sigregs_gprs_high(regs, frame->gprs_high))
+		goto badframe;
 
 	return regs->gprs[2];
 
@@ -394,6 +422,8 @@ asmlinkage long sys32_rt_sigreturn(void)
 
 	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
 		goto badframe;
+	if (restore_sigregs_gprs_high(regs, frame->gprs_high))
+		goto badframe;
 
 	err = __get_user(ss_sp, &frame->uc.uc_stack.ss_sp);
 	st.ss_sp = compat_ptr(ss_sp);
@@ -474,6 +504,8 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
 
 	if (save_sigregs32(regs, &frame->sregs))
 		goto give_sigsegv;
+	if (save_sigregs_gprs_high(regs, frame->gprs_high))
+		goto give_sigsegv;
 	if (__put_user((unsigned long) &frame->sregs, &frame->sc.sregs))
 		goto give_sigsegv;
 
@@ -529,13 +561,14 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
 		goto give_sigsegv;
 
 	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(sas_ss_flags(regs->gprs[15]),
 	                  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= save_sigregs32(regs, &frame->uc.uc_mcontext);
+	err |= save_sigregs_gprs_high(regs, frame->gprs_high);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
 		goto give_sigsegv;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index a8738676b26c..653c6a178740 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -57,6 +57,7 @@
 enum s390_regset {
 	REGSET_GENERAL,
 	REGSET_FP,
+	REGSET_GENERAL_EXTENDED,
 };
 
 static void
@@ -879,6 +880,67 @@ static int s390_compat_regs_set(struct task_struct *target,
 	return rc;
 }
 
+static int s390_compat_regs_high_get(struct task_struct *target,
+				     const struct user_regset *regset,
+				     unsigned int pos, unsigned int count,
+				     void *kbuf, void __user *ubuf)
+{
+	compat_ulong_t *gprs_high;
+
+	gprs_high = (compat_ulong_t *)
+		&task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
+	if (kbuf) {
+		compat_ulong_t *k = kbuf;
+		while (count > 0) {
+			*k++ = *gprs_high;
+			gprs_high += 2;
+			count -= sizeof(*k);
+		}
+	} else {
+		compat_ulong_t __user *u = ubuf;
+		while (count > 0) {
+			if (__put_user(*gprs_high, u++))
+				return -EFAULT;
+			gprs_high += 2;
+			count -= sizeof(*u);
+		}
+	}
+	return 0;
+}
+
+static int s390_compat_regs_high_set(struct task_struct *target,
+				     const struct user_regset *regset,
+				     unsigned int pos, unsigned int count,
+				     const void *kbuf, const void __user *ubuf)
+{
+	compat_ulong_t *gprs_high;
+	int rc = 0;
+
+	gprs_high = (compat_ulong_t *)
+		&task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
+	if (kbuf) {
+		const compat_ulong_t *k = kbuf;
+		while (count > 0) {
+			*gprs_high = *k++;
+			*gprs_high += 2;
+			count -= sizeof(*k);
+		}
+	} else {
+		const compat_ulong_t  __user *u = ubuf;
+		while (count > 0 && !rc) {
+			unsigned long word;
+			rc = __get_user(word, u++);
+			if (rc)
+				break;
+			*gprs_high = word;
+			*gprs_high += 2;
+			count -= sizeof(*u);
+		}
+	}
+
+	return rc;
+}
+
 static const struct user_regset s390_compat_regsets[] = {
 	[REGSET_GENERAL] = {
 		.core_note_type = NT_PRSTATUS,
@@ -896,6 +958,14 @@ static const struct user_regset s390_compat_regsets[] = {
 		.get = s390_fpregs_get,
 		.set = s390_fpregs_set,
 	},
+	[REGSET_GENERAL_EXTENDED] = {
+		.core_note_type = NT_PRXSTATUS,
+		.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
+		.size = sizeof(compat_long_t),
+		.align = sizeof(compat_long_t),
+		.get = s390_compat_regs_high_get,
+		.set = s390_compat_regs_high_set,
+	},
 };
 
 static const struct user_regset_view user_s390_compat_view = {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 9ed13a1ed376..061479ff029f 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -729,7 +729,7 @@ static void __init setup_hwcaps(void)
 
 	if ((facility_list & (1UL << (31 - 22)))
 	    && (facility_list & (1UL << (31 - 30))))
-		elf_hwcap |= 1UL << 8;
+		elf_hwcap |= HWCAP_S390_ETF3EH;
 
 	/*
 	 * Check for additional facilities with store-facility-list-extended.
@@ -748,11 +748,20 @@ static void __init setup_hwcaps(void)
 	    __stfle(&facility_list_extended, 1) > 0) {
 		if ((facility_list_extended & (1ULL << (63 - 42)))
 		    && (facility_list_extended & (1ULL << (63 - 44))))
-			elf_hwcap |= 1UL << 6;
+			elf_hwcap |= HWCAP_S390_DFP;
 	}
 
+	/*
+	 * Huge page support HWCAP_S390_HPAGE is bit 7.
+	 */
 	if (MACHINE_HAS_HPAGE)
-		elf_hwcap |= 1UL << 7;
+		elf_hwcap |= HWCAP_S390_HPAGE;
+
+	/*
+	 * 64-bit register support for 31-bit processes
+	 * HWCAP_S390_HIGH_GPRS is bit 9.
+	 */
+	elf_hwcap |= HWCAP_S390_HIGH_GPRS;
 
 	switch (S390_lowcore.cpu_id.machine) {
 	case 0x9672:
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 45a937be6d38..90a4ed0ea0e5 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -361,6 +361,7 @@ typedef struct elf64_shdr {
 #define NT_PPC_VSX	0x102		/* PowerPC VSX registers */
 #define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
 #define NT_386_IOPERM	0x201		/* x86 io permission bitmap (1=deny) */
+#define NT_PRXSTATUS	0x300		/* s390 upper register halves */
 
 
 /* Note header in a PT_NOTE section */
-- 
cgit v1.2.3


From e13dbd7d75d1ecc315c6e3071b3c4e8fba4f6bec Mon Sep 17 00:00:00 2001
From: Chuck Ebbert <cebbert@redhat.com>
Date: Tue, 6 Oct 2009 07:38:51 -0400
Subject: perf_events: Make ABI definitions available to userspace

Signed-off-by: Chuck Ebbert <cebbert@redhat.com>
LKML-Reference: <200910061138.n96BcqkJ004709@int-mx03.intmail.prod.int.phx2.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index cff4a101f266..3f384d4b163a 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -126,6 +126,7 @@ header-y += nfs_mount.h
 header-y += nl80211.h
 header-y += param.h
 header-y += pci_regs.h
+header-y += perf_event.h
 header-y += pfkeyv2.h
 header-y += pg.h
 header-y += phantom.h
-- 
cgit v1.2.3


From 906010b2134e14a2e377decbadd357b3d0ab9c6a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 21 Sep 2009 16:08:49 +0200
Subject: perf_event: Provide vmalloc() based mmap() backing

Some architectures such as Sparc, ARM and MIPS (basically
everything with flush_dcache_page()) need to deal with dcache
aliases by carefully placing pages in both kernel and user maps.

These architectures typically have to use vmalloc_user() for this.

However, on other architectures, vmalloc() is not needed and has
the downsides of being more restricted and slower than regular
allocations.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: David Miller <davem@davemloft.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1254830228.21044.272.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/sparc/Kconfig         |   2 +
 include/linux/perf_event.h |   5 +
 init/Kconfig               |  18 ++++
 kernel/perf_event.c        | 248 +++++++++++++++++++++++++++++++++------------
 tools/perf/design.txt      |   3 +
 5 files changed, 214 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 97fca4695e0b..9b70a2f28dc7 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -26,6 +26,7 @@ config SPARC
 	select RTC_CLASS
 	select RTC_DRV_M48T59
 	select HAVE_PERF_EVENTS
+	select PERF_USE_VMALLOC
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_API_DEBUG
 
@@ -48,6 +49,7 @@ config SPARC64
 	select RTC_DRV_SUN4V
 	select RTC_DRV_STARFIRE
 	select HAVE_PERF_EVENTS
+	select PERF_USE_VMALLOC
 
 config ARCH_DEFCONFIG
 	string
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3a9d36d1e92a..2e6d95f97419 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -442,6 +442,7 @@ enum perf_callchain_context {
 #include <linux/hrtimer.h>
 #include <linux/fs.h>
 #include <linux/pid_namespace.h>
+#include <linux/workqueue.h>
 #include <asm/atomic.h>
 
 #define PERF_MAX_STACK_DEPTH		255
@@ -513,6 +514,10 @@ struct file;
 
 struct perf_mmap_data {
 	struct rcu_head			rcu_head;
+#ifdef CONFIG_PERF_USE_VMALLOC
+	struct work_struct		work;
+#endif
+	int				data_order;
 	int				nr_pages;	/* nr of data pages  */
 	int				writable;	/* are we writable   */
 	int				nr_locked;	/* nr pages mlocked  */
diff --git a/init/Kconfig b/init/Kconfig
index c7bac39d6c61..09c5c6431f42 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -921,6 +921,11 @@ config HAVE_PERF_EVENTS
 	help
 	  See tools/perf/design.txt for details.
 
+config PERF_USE_VMALLOC
+	bool
+	help
+	  See tools/perf/design.txt for details
+
 menu "Kernel Performance Events And Counters"
 
 config PERF_EVENTS
@@ -976,6 +981,19 @@ config PERF_COUNTERS
 
 	  Say N if unsure.
 
+config DEBUG_PERF_USE_VMALLOC
+	default n
+	bool "Debug: use vmalloc to back perf mmap() buffers"
+	depends on PERF_EVENTS && DEBUG_KERNEL
+	select PERF_USE_VMALLOC
+	help
+	 Use vmalloc memory to back perf mmap() buffers.
+
+	 Mostly useful for debugging the vmalloc code on platforms
+	 that don't require it.
+
+	 Say N if unsure.
+
 endmenu
 
 config VM_EVENT_COUNTERS
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index e491fb087939..9d0b5c665883 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -20,6 +20,7 @@
 #include <linux/percpu.h>
 #include <linux/ptrace.h>
 #include <linux/vmstat.h>
+#include <linux/vmalloc.h>
 #include <linux/hardirq.h>
 #include <linux/rculist.h>
 #include <linux/uaccess.h>
@@ -2091,49 +2092,31 @@ unlock:
 	rcu_read_unlock();
 }
 
-static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static unsigned long perf_data_size(struct perf_mmap_data *data)
 {
-	struct perf_event *event = vma->vm_file->private_data;
-	struct perf_mmap_data *data;
-	int ret = VM_FAULT_SIGBUS;
-
-	if (vmf->flags & FAULT_FLAG_MKWRITE) {
-		if (vmf->pgoff == 0)
-			ret = 0;
-		return ret;
-	}
-
-	rcu_read_lock();
-	data = rcu_dereference(event->data);
-	if (!data)
-		goto unlock;
-
-	if (vmf->pgoff == 0) {
-		vmf->page = virt_to_page(data->user_page);
-	} else {
-		int nr = vmf->pgoff - 1;
-
-		if ((unsigned)nr > data->nr_pages)
-			goto unlock;
+	return data->nr_pages << (PAGE_SHIFT + data->data_order);
+}
 
-		if (vmf->flags & FAULT_FLAG_WRITE)
-			goto unlock;
+#ifndef CONFIG_PERF_USE_VMALLOC
 
-		vmf->page = virt_to_page(data->data_pages[nr]);
-	}
+/*
+ * Back perf_mmap() with regular GFP_KERNEL-0 pages.
+ */
 
-	get_page(vmf->page);
-	vmf->page->mapping = vma->vm_file->f_mapping;
-	vmf->page->index   = vmf->pgoff;
+static struct page *
+perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+{
+	if (pgoff > data->nr_pages)
+		return NULL;
 
-	ret = 0;
-unlock:
-	rcu_read_unlock();
+	if (pgoff == 0)
+		return virt_to_page(data->user_page);
 
-	return ret;
+	return virt_to_page(data->data_pages[pgoff - 1]);
 }
 
-static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_mmap_data *
+perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
 {
 	struct perf_mmap_data *data;
 	unsigned long size;
@@ -2158,19 +2141,10 @@ static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
 			goto fail_data_pages;
 	}
 
+	data->data_order = 0;
 	data->nr_pages = nr_pages;
-	atomic_set(&data->lock, -1);
-
-	if (event->attr.watermark) {
-		data->watermark = min_t(long, PAGE_SIZE * nr_pages,
-				      event->attr.wakeup_watermark);
-	}
-	if (!data->watermark)
-		data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
 
-	rcu_assign_pointer(event->data, data);
-
-	return 0;
+	return data;
 
 fail_data_pages:
 	for (i--; i >= 0; i--)
@@ -2182,7 +2156,7 @@ fail_user_page:
 	kfree(data);
 
 fail:
-	return -ENOMEM;
+	return NULL;
 }
 
 static void perf_mmap_free_page(unsigned long addr)
@@ -2193,28 +2167,169 @@ static void perf_mmap_free_page(unsigned long addr)
 	__free_page(page);
 }
 
-static void __perf_mmap_data_free(struct rcu_head *rcu_head)
+static void perf_mmap_data_free(struct perf_mmap_data *data)
 {
-	struct perf_mmap_data *data;
 	int i;
 
-	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
-
 	perf_mmap_free_page((unsigned long)data->user_page);
 	for (i = 0; i < data->nr_pages; i++)
 		perf_mmap_free_page((unsigned long)data->data_pages[i]);
+}
+
+#else
+
+/*
+ * Back perf_mmap() with vmalloc memory.
+ *
+ * Required for architectures that have d-cache aliasing issues.
+ */
+
+static struct page *
+perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+{
+	if (pgoff > (1UL << data->data_order))
+		return NULL;
+
+	return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
+}
+
+static void perf_mmap_unmark_page(void *addr)
+{
+	struct page *page = vmalloc_to_page(addr);
+
+	page->mapping = NULL;
+}
+
+static void perf_mmap_data_free_work(struct work_struct *work)
+{
+	struct perf_mmap_data *data;
+	void *base;
+	int i, nr;
+
+	data = container_of(work, struct perf_mmap_data, work);
+	nr = 1 << data->data_order;
+
+	base = data->user_page;
+	for (i = 0; i < nr + 1; i++)
+		perf_mmap_unmark_page(base + (i * PAGE_SIZE));
+
+	vfree(base);
+}
+
+static void perf_mmap_data_free(struct perf_mmap_data *data)
+{
+	schedule_work(&data->work);
+}
+
+static struct perf_mmap_data *
+perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+{
+	struct perf_mmap_data *data;
+	unsigned long size;
+	void *all_buf;
 
+	WARN_ON(atomic_read(&event->mmap_count));
+
+	size = sizeof(struct perf_mmap_data);
+	size += sizeof(void *);
+
+	data = kzalloc(size, GFP_KERNEL);
+	if (!data)
+		goto fail;
+
+	INIT_WORK(&data->work, perf_mmap_data_free_work);
+
+	all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
+	if (!all_buf)
+		goto fail_all_buf;
+
+	data->user_page = all_buf;
+	data->data_pages[0] = all_buf + PAGE_SIZE;
+	data->data_order = ilog2(nr_pages);
+	data->nr_pages = 1;
+
+	return data;
+
+fail_all_buf:
+	kfree(data);
+
+fail:
+	return NULL;
+}
+
+#endif
+
+static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct perf_event *event = vma->vm_file->private_data;
+	struct perf_mmap_data *data;
+	int ret = VM_FAULT_SIGBUS;
+
+	if (vmf->flags & FAULT_FLAG_MKWRITE) {
+		if (vmf->pgoff == 0)
+			ret = 0;
+		return ret;
+	}
+
+	rcu_read_lock();
+	data = rcu_dereference(event->data);
+	if (!data)
+		goto unlock;
+
+	if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
+		goto unlock;
+
+	vmf->page = perf_mmap_to_page(data, vmf->pgoff);
+	if (!vmf->page)
+		goto unlock;
+
+	get_page(vmf->page);
+	vmf->page->mapping = vma->vm_file->f_mapping;
+	vmf->page->index   = vmf->pgoff;
+
+	ret = 0;
+unlock:
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static void
+perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
+{
+	long max_size = perf_data_size(data);
+
+	atomic_set(&data->lock, -1);
+
+	if (event->attr.watermark) {
+		data->watermark = min_t(long, max_size,
+					event->attr.wakeup_watermark);
+	}
+
+	if (!data->watermark)
+		data->watermark = max_t(long, PAGE_SIZE, max_size / 2);
+
+
+	rcu_assign_pointer(event->data, data);
+}
+
+static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
+{
+	struct perf_mmap_data *data;
+
+	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
+	perf_mmap_data_free(data);
 	kfree(data);
 }
 
-static void perf_mmap_data_free(struct perf_event *event)
+static void perf_mmap_data_release(struct perf_event *event)
 {
 	struct perf_mmap_data *data = event->data;
 
 	WARN_ON(atomic_read(&event->mmap_count));
 
 	rcu_assign_pointer(event->data, NULL);
-	call_rcu(&data->rcu_head, __perf_mmap_data_free);
+	call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
 }
 
 static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2230,11 +2345,12 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
 	WARN_ON_ONCE(event->ctx->parent_ctx);
 	if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
+		unsigned long size = perf_data_size(event->data);
 		struct user_struct *user = current_user();
 
-		atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm);
+		atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
 		vma->vm_mm->locked_vm -= event->data->nr_locked;
-		perf_mmap_data_free(event);
+		perf_mmap_data_release(event);
 		mutex_unlock(&event->mmap_mutex);
 	}
 }
@@ -2252,6 +2368,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	unsigned long user_locked, user_lock_limit;
 	struct user_struct *user = current_user();
 	unsigned long locked, lock_limit;
+	struct perf_mmap_data *data;
 	unsigned long vma_size;
 	unsigned long nr_pages;
 	long user_extra, extra;
@@ -2314,10 +2431,15 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	}
 
 	WARN_ON(event->data);
-	ret = perf_mmap_data_alloc(event, nr_pages);
-	if (ret)
+
+	data = perf_mmap_data_alloc(event, nr_pages);
+	ret = -ENOMEM;
+	if (!data)
 		goto unlock;
 
+	ret = 0;
+	perf_mmap_data_init(event, data);
+
 	atomic_set(&event->mmap_count, 1);
 	atomic_long_add(user_extra, &user->locked_vm);
 	vma->vm_mm->locked_vm += extra;
@@ -2505,7 +2627,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
 	if (!data->writable)
 		return true;
 
-	mask = (data->nr_pages << PAGE_SHIFT) - 1;
+	mask = perf_data_size(data) - 1;
 
 	offset = (offset - tail) & mask;
 	head   = (head   - tail) & mask;
@@ -2610,7 +2732,7 @@ void perf_output_copy(struct perf_output_handle *handle,
 		      const void *buf, unsigned int len)
 {
 	unsigned int pages_mask;
-	unsigned int offset;
+	unsigned long offset;
 	unsigned int size;
 	void **pages;
 
@@ -2619,12 +2741,14 @@ void perf_output_copy(struct perf_output_handle *handle,
 	pages		= handle->data->data_pages;
 
 	do {
-		unsigned int page_offset;
+		unsigned long page_offset;
+		unsigned long page_size;
 		int nr;
 
 		nr	    = (offset >> PAGE_SHIFT) & pages_mask;
-		page_offset = offset & (PAGE_SIZE - 1);
-		size	    = min_t(unsigned int, PAGE_SIZE - page_offset, len);
+		page_size   = 1UL << (handle->data->data_order + PAGE_SHIFT);
+		page_offset = offset & (page_size - 1);
+		size	    = min_t(unsigned int, page_size - page_offset, len);
 
 		memcpy(pages[nr] + page_offset, buf, size);
 
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index f1946d107b10..fdd42a824c98 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -455,3 +455,6 @@ will need at least this:
 
 If your architecture does have hardware capabilities, you can override the
 weak stub hw_perf_event_init() to register hardware counters.
+
+Architectures that have d-cache aliassing issues, such as Sparc and ARM,
+should select PERF_USE_VMALLOC in order to avoid these for perf mmap().
-- 
cgit v1.2.3


From 26a50744b21fff65bd754874072857bee8967f4d Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Tue, 6 Oct 2009 01:09:50 -0500
Subject: tracing/events: Add 'signed' field to format files

The sign info used for filters in the kernel is also useful to
applications that process the trace stream.  Add it to the format
files and make it available to userspace.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: rostedt@goodmis.org
Cc: lizf@cn.fujitsu.com
Cc: hch@infradead.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1254809398-8078-2-git-send-email-tzanussi@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/ftrace.h              | 15 +++++++++------
 kernel/trace/ring_buffer.c          | 15 +++++++++------
 kernel/trace/trace_events.c         | 24 ++++++++++++------------
 kernel/trace/trace_export.c         | 25 ++++++++++++++-----------
 kernel/trace/trace_syscalls.c       | 20 +++++++++++++-------
 tools/perf/util/trace-event-parse.c | 24 ++++++++++++++++++++++++
 tools/perf/util/trace-event.h       |  1 +
 7 files changed, 82 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index cc0d9667e182..c9bbcab95fbe 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -120,9 +120,10 @@
 #undef __field
 #define __field(type, item)					\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",	\
 			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
+			       (unsigned int)sizeof(field.item),	\
+			       (unsigned int)is_signed_type(type));	\
 	if (!ret)							\
 		return 0;
 
@@ -132,19 +133,21 @@
 #undef __array
 #define __array(type, item, len)						\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",	\
 			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
+			       (unsigned int)sizeof(field.item),	\
+			       (unsigned int)is_signed_type(type));	\
 	if (!ret)							\
 		return 0;
 
 #undef __dynamic_array
 #define __dynamic_array(type, item, len)				       \
 	ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\
-			       "offset:%u;\tsize:%u;\n",		       \
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",	       \
 			       (unsigned int)offsetof(typeof(field),	       \
 					__data_loc_##item),		       \
-			       (unsigned int)sizeof(field.__data_loc_##item)); \
+			       (unsigned int)sizeof(field.__data_loc_##item), \
+			       (unsigned int)is_signed_type(type));	\
 	if (!ret)							       \
 		return 0;
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index d4ff01970547..e43c928356ee 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s)
 	int ret;
 
 	ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
-			       "offset:0;\tsize:%u;\n",
-			       (unsigned int)sizeof(field.time_stamp));
+			       "offset:0;\tsize:%u;\tsigned:%u;\n",
+			       (unsigned int)sizeof(field.time_stamp),
+			       (unsigned int)is_signed_type(u64));
 
 	ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
-			       "offset:%u;\tsize:%u;\n",
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",
 			       (unsigned int)offsetof(typeof(field), commit),
-			       (unsigned int)sizeof(field.commit));
+			       (unsigned int)sizeof(field.commit),
+			       (unsigned int)is_signed_type(long));
 
 	ret = trace_seq_printf(s, "\tfield: char data;\t"
-			       "offset:%u;\tsize:%u;\n",
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",
 			       (unsigned int)offsetof(typeof(field), data),
-			       (unsigned int)BUF_PAGE_SIZE);
+			       (unsigned int)BUF_PAGE_SIZE,
+			       (unsigned int)is_signed_type(char));
 
 	return ret;
 }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d128f65778e6..cf3cabf6ce14 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -507,7 +507,7 @@ extern char *__bad_type_size(void);
 #define FIELD(type, name)						\
 	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
 	#type, "common_" #name, offsetof(typeof(field), name),		\
-		sizeof(field.name)
+		sizeof(field.name), is_signed_type(type)
 
 static int trace_write_header(struct trace_seq *s)
 {
@@ -515,17 +515,17 @@ static int trace_write_header(struct trace_seq *s)
 
 	/* struct trace_entry */
 	return trace_seq_printf(s,
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\n",
-				FIELD(unsigned short, type),
-				FIELD(unsigned char, flags),
-				FIELD(unsigned char, preempt_count),
-				FIELD(int, pid),
-				FIELD(int, lock_depth));
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\n",
+			FIELD(unsigned short, type),
+			FIELD(unsigned char, flags),
+			FIELD(unsigned char, preempt_count),
+			FIELD(int, pid),
+			FIELD(int, lock_depth));
 }
 
 static ssize_t
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 9753fcc61bc5..31da218ee10f 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -66,44 +66,47 @@ static void __used ____ftrace_check_##name(void)		\
 #undef __field
 #define __field(type, item)						\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%zu;\tsize:%zu;\n",		\
+			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
 			       offsetof(typeof(field), item),		\
-			       sizeof(field.item));			\
+			       sizeof(field.item), is_signed_type(type)); \
 	if (!ret)							\
 		return 0;
 
 #undef __field_desc
 #define __field_desc(type, container, item)				\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%zu;\tsize:%zu;\n",		\
+			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
 			       offsetof(typeof(field), container.item),	\
-			       sizeof(field.container.item));		\
+			       sizeof(field.container.item),		\
+			       is_signed_type(type));			\
 	if (!ret)							\
 		return 0;
 
 #undef __array
 #define __array(type, item, len)					\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
-			       "offset:%zu;\tsize:%zu;\n",		\
-			       offsetof(typeof(field), item),	\
-			       sizeof(field.item));		\
+			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
+			       offsetof(typeof(field), item),		\
+			       sizeof(field.item), is_signed_type(type)); \
 	if (!ret)							\
 		return 0;
 
 #undef __array_desc
 #define __array_desc(type, container, item, len)			\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
-			       "offset:%zu;\tsize:%zu;\n",		\
+			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
 			       offsetof(typeof(field), container.item),	\
-			       sizeof(field.container.item));		\
+			       sizeof(field.container.item),		\
+			       is_signed_type(type));			\
 	if (!ret)							\
 		return 0;
 
 #undef __dynamic_array
 #define __dynamic_array(type, item)					\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%zu;\tsize:0;\n",		\
-			       offsetof(typeof(field), item));		\
+			       "offset:%zu;\tsize:0;\tsigned:%u;\n",	\
+			       offsetof(typeof(field), item),		\
+			       is_signed_type(type));			\
 	if (!ret)							\
 		return 0;
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 527e17eae575..d99abc427c39 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -103,7 +103,8 @@ extern char *__bad_type_size(void);
 #define SYSCALL_FIELD(type, name)					\
 	sizeof(type) != sizeof(trace.name) ?				\
 		__bad_type_size() :					\
-		#type, #name, offsetof(typeof(trace), name), sizeof(trace.name)
+		#type, #name, offsetof(typeof(trace), name),		\
+		sizeof(trace.name), is_signed_type(type)
 
 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
 {
@@ -120,7 +121,8 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
 	if (!entry)
 		return 0;
 
-	ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
+	ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
+			       "\tsigned:%u;\n",
 			       SYSCALL_FIELD(int, nr));
 	if (!ret)
 		return 0;
@@ -130,8 +132,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
 				        entry->args[i]);
 		if (!ret)
 			return 0;
-		ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
-				       sizeof(unsigned long));
+		ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
+				       "\tsigned:%u;\n", offset,
+				       sizeof(unsigned long),
+				       is_signed_type(unsigned long));
 		if (!ret)
 			return 0;
 		offset += sizeof(unsigned long);
@@ -163,8 +167,10 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
 	struct syscall_trace_exit trace;
 
 	ret = trace_seq_printf(s,
-			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
+			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
+			       "\tsigned:%u;\n"
+			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
+			       "\tsigned:%u;\n",
 			       SYSCALL_FIELD(int, nr),
 			       SYSCALL_FIELD(long, ret));
 	if (!ret)
@@ -212,7 +218,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
 	if (ret)
 		return ret;
 
-	ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0,
+	ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
 				 FILTER_OTHER);
 
 	return ret;
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 55b41b9e3834..be8412d699a1 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -894,6 +894,21 @@ static int event_read_fields(struct event *event, struct format_field **fields)
 		field->size = strtoul(token, NULL, 0);
 		free_token(token);
 
+		if (read_expected(EVENT_OP, (char *)";") < 0)
+			goto fail_expect;
+
+		if (read_expected(EVENT_ITEM, (char *)"signed") < 0)
+			goto fail_expect;
+
+		if (read_expected(EVENT_OP, (char *)":") < 0)
+			goto fail_expect;
+
+		if (read_expect_type(EVENT_ITEM, &token))
+			goto fail;
+		if (strtoul(token, NULL, 0))
+			field->flags |= FIELD_IS_SIGNED;
+		free_token(token);
+
 		if (read_expected(EVENT_OP, (char *)";") < 0)
 			goto fail_expect;
 
@@ -2843,6 +2858,15 @@ static void parse_header_field(char *type,
 		return;
 	*size = atoi(token);
 	free_token(token);
+	if (read_expected(EVENT_OP, (char *)";") < 0)
+		return;
+	if (read_expected(EVENT_ITEM, (char *)"signed") < 0)
+		return;
+	if (read_expected(EVENT_OP, (char *)":") < 0)
+		return;
+	if (read_expect_type(EVENT_ITEM, &token) < 0)
+		return;
+	free_token(token);
 	if (read_expected(EVENT_OP, (char *)";") < 0)
 		return;
 	if (read_expect_type(EVENT_NEWLINE, &token) < 0)
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 162c3e6deb93..00b440df66d8 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -26,6 +26,7 @@ enum {
 enum format_flags {
 	FIELD_IS_ARRAY		= 1,
 	FIELD_IS_POINTER	= 2,
+	FIELD_IS_SIGNED		= 4,
 };
 
 struct format_field {
-- 
cgit v1.2.3


From 316d315bffa4026f28085f6b24ebcebede370ac7 Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@suse.de>
Date: Tue, 6 Oct 2009 20:16:55 +0200
Subject: block: Seperate read and write statistics of in_flight requests v2

Commit a9327cac440be4d8333bba975cbbf76045096275 added seperate read
and write statistics of in_flight requests. And exported the number
of read and write requests in progress seperately through sysfs.

But  Corrado Zoccolo <czoccolo@gmail.com> reported getting strange
output from "iostat -kx 2". Global values for service time and
utilization were garbage. For interval values, utilization was always
100%, and service time is higher than normal.

So this was reverted by commit 0f78ab9899e9d6acb09d5465def618704255963b

The problem was in part_round_stats_single(), I missed the following:
        if (now == part->stamp)
                return;

-       if (part->in_flight) {
+       if (part_in_flight(part)) {
                __part_stat_add(cpu, part, time_in_queue,
                                part_in_flight(part) * (now - part->stamp));
                __part_stat_add(cpu, part, io_ticks, (now - part->stamp));

With this chunk included, the reported regression gets fixed.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>

--
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c      |  8 ++++----
 block/blk-merge.c     |  2 +-
 block/genhd.c         |  4 +++-
 drivers/md/dm.c       | 16 ++++++++++------
 fs/partitions/check.c | 12 +++++++++++-
 include/linux/genhd.h | 21 ++++++++++++++-------
 6 files changed, 43 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 73ecbed02605..ac0fa10f8fa5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -70,7 +70,7 @@ static void drive_stat_acct(struct request *rq, int new_io)
 		part_stat_inc(cpu, part, merges[rw]);
 	else {
 		part_round_stats(cpu, part);
-		part_inc_in_flight(part);
+		part_inc_in_flight(part, rw);
 	}
 
 	part_stat_unlock();
@@ -1030,9 +1030,9 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
 	if (now == part->stamp)
 		return;
 
-	if (part->in_flight) {
+	if (part_in_flight(part)) {
 		__part_stat_add(cpu, part, time_in_queue,
-				part->in_flight * (now - part->stamp));
+				part_in_flight(part) * (now - part->stamp));
 		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
 	}
 	part->stamp = now;
@@ -1739,7 +1739,7 @@ static void blk_account_io_done(struct request *req)
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part);
+		part_dec_in_flight(part, rw);
 
 		part_stat_unlock();
 	}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index b0de8574fdc8..99cb5cf1f447 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -351,7 +351,7 @@ static void blk_account_io_merge(struct request *req)
 		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part);
+		part_dec_in_flight(part, rq_data_dir(req));
 
 		part_stat_unlock();
 	}
diff --git a/block/genhd.c b/block/genhd.c
index 5a0861da324d..517e4332cb37 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -869,6 +869,7 @@ static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -888,6 +889,7 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_alignment_offset.attr,
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
+	&dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
@@ -1053,7 +1055,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   part_stat_read(hd, merges[1]),
 			   (unsigned long long)part_stat_read(hd, sectors[1]),
 			   jiffies_to_msecs(part_stat_read(hd, ticks[1])),
-			   hd->in_flight,
+			   part_in_flight(hd),
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
 			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
 			);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 23e76fe0d359..376f1ab48a24 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -130,7 +130,7 @@ struct mapped_device {
 	/*
 	 * A list of ios that arrived while we were suspended.
 	 */
-	atomic_t pending;
+	atomic_t pending[2];
 	wait_queue_head_t wait;
 	struct work_struct work;
 	struct bio_list deferred;
@@ -453,13 +453,14 @@ static void start_io_acct(struct dm_io *io)
 {
 	struct mapped_device *md = io->md;
 	int cpu;
+	int rw = bio_data_dir(io->bio);
 
 	io->start_time = jiffies;
 
 	cpu = part_stat_lock();
 	part_round_stats(cpu, &dm_disk(md)->part0);
 	part_stat_unlock();
-	dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
+	dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
 }
 
 static void end_io_acct(struct dm_io *io)
@@ -479,8 +480,9 @@ static void end_io_acct(struct dm_io *io)
 	 * After this is decremented the bio must not be touched if it is
 	 * a barrier.
 	 */
-	dm_disk(md)->part0.in_flight = pending =
-		atomic_dec_return(&md->pending);
+	dm_disk(md)->part0.in_flight[rw] = pending =
+		atomic_dec_return(&md->pending[rw]);
+	pending += atomic_read(&md->pending[rw^0x1]);
 
 	/* nudge anyone waiting on suspend queue */
 	if (!pending)
@@ -1785,7 +1787,8 @@ static struct mapped_device *alloc_dev(int minor)
 	if (!md->disk)
 		goto bad_disk;
 
-	atomic_set(&md->pending, 0);
+	atomic_set(&md->pending[0], 0);
+	atomic_set(&md->pending[1], 0);
 	init_waitqueue_head(&md->wait);
 	INIT_WORK(&md->work, dm_wq_work);
 	init_waitqueue_head(&md->eventq);
@@ -2088,7 +2091,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
 				break;
 			}
 			spin_unlock_irqrestore(q->queue_lock, flags);
-		} else if (!atomic_read(&md->pending))
+		} else if (!atomic_read(&md->pending[0]) &&
+					!atomic_read(&md->pending[1]))
 			break;
 
 		if (interruptible == TASK_INTERRUPTIBLE &&
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index f38fee0311a7..7b685e10cbad 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -248,11 +248,19 @@ ssize_t part_stat_show(struct device *dev,
 		part_stat_read(p, merges[WRITE]),
 		(unsigned long long)part_stat_read(p, sectors[WRITE]),
 		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
-		p->in_flight,
+		part_in_flight(p),
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
 		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
 }
 
+ssize_t part_inflight_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct hd_struct *p = dev_to_part(dev);
+
+	return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
+}
+
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 ssize_t part_fail_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
@@ -281,6 +289,7 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -292,6 +301,7 @@ static struct attribute *part_attrs[] = {
 	&dev_attr_size.attr,
 	&dev_attr_alignment_offset.attr,
 	&dev_attr_stat.attr,
+	&dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7beaa21b3880..297df45ffd0a 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -98,7 +98,7 @@ struct hd_struct {
 	int make_it_fail;
 #endif
 	unsigned long stamp;
-	int in_flight;
+	int in_flight[2];
 #ifdef	CONFIG_SMP
 	struct disk_stats *dkstats;
 #else
@@ -322,18 +322,23 @@ static inline void free_part_stats(struct hd_struct *part)
 #define part_stat_sub(cpu, gendiskp, field, subnd)			\
 	part_stat_add(cpu, gendiskp, field, -subnd)
 
-static inline void part_inc_in_flight(struct hd_struct *part)
+static inline void part_inc_in_flight(struct hd_struct *part, int rw)
 {
-	part->in_flight++;
+	part->in_flight[rw]++;
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight++;
+		part_to_disk(part)->part0.in_flight[rw]++;
 }
 
-static inline void part_dec_in_flight(struct hd_struct *part)
+static inline void part_dec_in_flight(struct hd_struct *part, int rw)
 {
-	part->in_flight--;
+	part->in_flight[rw]--;
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight--;
+		part_to_disk(part)->part0.in_flight[rw]--;
+}
+
+static inline int part_in_flight(struct hd_struct *part)
+{
+	return part->in_flight[0] + part->in_flight[1];
 }
 
 /* block/blk-core.c */
@@ -546,6 +551,8 @@ extern ssize_t part_size_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
 extern ssize_t part_stat_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
+extern ssize_t part_inflight_show(struct device *dev,
+			      struct device_attribute *attr, char *buf);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 extern ssize_t part_fail_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
-- 
cgit v1.2.3


From f8d1e548931cfa5ea9a082e020c2a47d27e5d793 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 7 Oct 2009 11:13:58 +1100
Subject: futex: Fix typo in FUTEX_WAIT/WAKE_BITSET_PRIVATE definitions

Looks like a typo, FUTEX_WAKE_BITS should be FUTEX_WAIT_BITSET.

Signed-off-by: Anton Blanchard <anton@samba.org>
LKML-Reference: <20091007001358.GE16073@kryten>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/futex.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 34956c8fdebf..78b92ec9edbd 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -38,8 +38,8 @@ union ktime;
 #define FUTEX_LOCK_PI_PRIVATE	(FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG)
 #define FUTEX_UNLOCK_PI_PRIVATE	(FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG)
 #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
-#define FUTEX_WAIT_BITSET_PRIVATE	(FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG)
-#define FUTEX_WAKE_BITSET_PRIVATE	(FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAIT_BITSET_PRIVATE	(FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAKE_BITSET_PRIVATE	(FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAIT_REQUEUE_PI_PRIVATE	(FUTEX_WAIT_REQUEUE_PI | \
 					 FUTEX_PRIVATE_FLAG)
 #define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
-- 
cgit v1.2.3


From 1f56f4a2b4d12c1c348cab23024024396ec7cddc Mon Sep 17 00:00:00 2001
From: Gabe Black <gabe.black@ni.com>
Date: Tue, 6 Oct 2009 09:19:45 -0500
Subject: PCI quirk: TI XIO200a erroneously reports support for fast b2b
 transfers

This quirk will disable fast back to back transfer on the secondary bus
segment of the TI Bridge.

Signed-off-by: Gabe Black <gabe.black@ni.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/quirks.c    | 19 +++++++++++++++++++
 include/linux/pci_ids.h |  1 +
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 6099facecd79..efa6534a6593 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -670,6 +670,25 @@ static void __devinit quirk_vt8235_acpi(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8235,	quirk_vt8235_acpi);
 
+/*
+ * TI XIO2000a PCIe-PCI Bridge erroneously reports it supports fast back-to-back:
+ *	Disable fast back-to-back on the secondary bus segment
+ */
+static void __devinit quirk_xio2000a(struct pci_dev *dev)
+{
+	struct pci_dev *pdev;
+	u16 command;
+
+	dev_warn(&dev->dev, "TI XIO2000a quirk detected; "
+		"secondary bus fast back-to-back transfers disabled\n");
+	list_for_each_entry(pdev, &dev->subordinate->devices, bus_list) {
+		pci_read_config_word(pdev, PCI_COMMAND, &command);
+		if (command & PCI_COMMAND_FAST_BACK)
+			pci_write_config_word(pdev, PCI_COMMAND, command & ~PCI_COMMAND_FAST_BACK);
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XIO2000A,
+			quirk_xio2000a);
 
 #ifdef CONFIG_X86_IO_APIC 
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index da1fda8623e0..f490e7a7307a 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -776,6 +776,7 @@
 #define PCI_DEVICE_ID_TI_X515		0x8036
 #define PCI_DEVICE_ID_TI_XX12		0x8039
 #define PCI_DEVICE_ID_TI_XX12_FM	0x803b
+#define PCI_DEVICE_ID_TI_XIO2000A	0x8231
 #define PCI_DEVICE_ID_TI_1130		0xac12
 #define PCI_DEVICE_ID_TI_1031		0xac13
 #define PCI_DEVICE_ID_TI_1131		0xac15
-- 
cgit v1.2.3


From e7247a15ff3bbdab0a8b402dffa1171e5c05a8e0 Mon Sep 17 00:00:00 2001
From: "jolsa@redhat.com" <jolsa@redhat.com>
Date: Wed, 7 Oct 2009 19:00:35 +0200
Subject: tracing: correct module boundaries for ftrace_release

When the module is about the unload we release its call records.
The ftrace_release function was given wrong values representing
the module core boundaries, thus not releasing its call records.

Plus making ftrace_release function module specific.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
LKML-Reference: <1254934835-363-3-git-send-email-jolsa@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  2 +-
 kernel/trace/ftrace.c  | 12 ++++--------
 2 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index cd3d2abaf30a..0b4f97d24d7f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -241,7 +241,7 @@ extern void ftrace_enable_daemon(void);
 # define ftrace_set_filter(buf, len, reset)	do { } while (0)
 # define ftrace_disable_daemon()		do { } while (0)
 # define ftrace_enable_daemon()			do { } while (0)
-static inline void ftrace_release(void *start, unsigned long size) { }
+static inline void ftrace_release_mod(struct module *mod) {}
 static inline int register_ftrace_command(struct ftrace_func_command *cmd)
 {
 	return -EINVAL;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 46592feab5a6..c701476a648b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2658,19 +2658,17 @@ static int ftrace_convert_nops(struct module *mod,
 }
 
 #ifdef CONFIG_MODULES
-void ftrace_release(void *start, void *end)
+void ftrace_release_mod(struct module *mod)
 {
 	struct dyn_ftrace *rec;
 	struct ftrace_page *pg;
-	unsigned long s = (unsigned long)start;
-	unsigned long e = (unsigned long)end;
 
-	if (ftrace_disabled || !start || start == end)
+	if (ftrace_disabled)
 		return;
 
 	mutex_lock(&ftrace_lock);
 	do_for_each_ftrace_rec(pg, rec) {
-		if ((rec->ip >= s) && (rec->ip < e)) {
+		if (within_module_core(rec->ip, mod)) {
 			/*
 			 * rec->ip is changed in ftrace_free_rec()
 			 * It should not between s and e if record was freed.
@@ -2702,9 +2700,7 @@ static int ftrace_module_notify(struct notifier_block *self,
 				   mod->num_ftrace_callsites);
 		break;
 	case MODULE_STATE_GOING:
-		ftrace_release(mod->ftrace_callsites,
-			       mod->ftrace_callsites +
-			       mod->num_ftrace_callsites);
+		ftrace_release_mod(mod);
 		break;
 	}
 
-- 
cgit v1.2.3


From cc9b0b9bea9a0057840261204a6e01c7e19d444c Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 9 Oct 2009 12:41:30 +0200
Subject: IRQ: Change __softirq_pending to unsigned int in
 asm-generic/hardirq.h.

Since the beginnings in aafe4dbed0bf6cbdb2e9f03e1d42f8a540d8541d
("asm-generic: add generic versions of common headers") the generic
version of <asm/hardirq.h> defined __softirq_pending as unsigned long.

Which is different from other architectures for no apparent good reason
and was causing the following warning:

  kernel/time/tick-sched.c: In function 'tick_nohz_stop_sched_tick':
  kernel/time/tick-sched.c:261: warning: format '%02x' expects type 'unsigned int', but argument 2 has type 'long unsigned int'

Reported and initial patch by Wu Zhangjin <wuzhangjin@gmail.com>.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
[ Arnd points out that we really should make sure parisc and alpha are
  ok with this, since they have also been converted to use the generic
  hardirq.h file. But neither seems to use it, although parisc does
  build a IRQSTAT_SIRQ_PEND #define into asm-offsets - but that also
  appears unused..    - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/hardirq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h
index 23bb4dad4962..62f59080e5cc 100644
--- a/include/asm-generic/hardirq.h
+++ b/include/asm-generic/hardirq.h
@@ -6,7 +6,7 @@
 #include <linux/irq.h>
 
 typedef struct {
-	unsigned long __softirq_pending;
+	unsigned int __softirq_pending;
 } ____cacheline_aligned irq_cpustat_t;
 
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
-- 
cgit v1.2.3


From a4720c650b68a5fe7faed2edeb0ad12645f7ae63 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 9 Oct 2009 12:43:12 -0400
Subject: USB: serial: don't call release without attach

This patch (as1295) fixes a recently-added bug in the USB serial core.
If certain kinds of errors occur during probing, the core may call a
serial driver's release method without previously calling the attach
method.  This causes some drivers (io_ti in particular) to perform an
invalid memory access.

The patch adds a new flag to keep track of whether or not attach has
been called.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Jean-Denis Girard <jd.girard@sysnux.pf>
CC: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/usb-serial.c | 6 +++++-
 include/linux/usb/serial.h      | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index aa6b2ae951ae..2d0f75d63ff0 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -156,7 +156,8 @@ static void destroy_serial(struct kref *kref)
 	if (serial->minor != SERIAL_TTY_NO_MINOR)
 		return_serial(serial);
 
-	serial->type->release(serial);
+	if (serial->attached)
+		serial->type->release(serial);
 
 	/* Now that nothing is using the ports, they can be freed */
 	for (i = 0; i < serial->num_port_pointers; ++i) {
@@ -1059,12 +1060,15 @@ int usb_serial_probe(struct usb_interface *interface,
 		module_put(type->driver.owner);
 		if (retval < 0)
 			goto probe_error;
+		serial->attached = 1;
 		if (retval > 0) {
 			/* quietly accept this device, but don't bind to a
 			   serial port as it's about to disappear */
 			serial->num_ports = 0;
 			goto exit;
 		}
+	} else {
+		serial->attached = 1;
 	}
 
 	if (get_free_serial(serial, num_ports, &minor) == NULL) {
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index c17eb64d7213..ce911ebf91e8 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -150,6 +150,7 @@ struct usb_serial {
 	struct usb_interface		*interface;
 	unsigned char			disconnected:1;
 	unsigned char			suspending:1;
+	unsigned char			attached:1;
 	unsigned char			minor;
 	unsigned char			num_ports;
 	unsigned char			num_port_pointers;
-- 
cgit v1.2.3


From d43c36dc6b357fa1806800f18aa30123c747a6d1 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 7 Oct 2009 17:09:06 +0400
Subject: headers: remove sched.h from interrupt.h

After m68k's task_thread_info() doesn't refer to current,
it's possible to remove sched.h from interrupt.h and not break m68k!
Many thanks to Heiko Carstens for allowing this.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
 arch/arm/kernel/time.c                          | 1 +
 arch/arm/mach-integrator/pci_v3.c               | 1 +
 arch/arm/plat-s3c24xx/adc.c                     | 1 +
 arch/blackfin/kernel/time.c                     | 1 +
 arch/m32r/kernel/smp.c                          | 1 +
 arch/um/drivers/line.c                          | 1 +
 arch/um/drivers/port_kern.c                     | 1 +
 arch/um/kernel/irq.c                            | 1 +
 arch/x86/kernel/cpu/mcheck/mce_intel.c          | 1 +
 arch/x86/kernel/pci-gart_64.c                   | 1 +
 arch/x86/kernel/reboot.c                        | 1 +
 arch/xtensa/kernel/time.c                       | 1 +
 crypto/aead.c                                   | 1 +
 drivers/char/applicom.c                         | 1 +
 drivers/char/epca.c                             | 1 +
 drivers/char/generic_serial.c                   | 1 +
 drivers/char/istallion.c                        | 1 +
 drivers/char/nozomi.c                           | 1 +
 drivers/char/pty.c                              | 1 +
 drivers/char/rio/riocmd.c                       | 1 +
 drivers/char/rio/rioctrl.c                      | 1 +
 drivers/char/rio/riotty.c                       | 1 +
 drivers/char/ser_a2232.c                        | 1 +
 drivers/char/stallion.c                         | 1 +
 drivers/char/tlclk.c                            | 1 +
 drivers/hwmon/sht15.c                           | 1 +
 drivers/ieee1394/raw1394.c                      | 1 +
 drivers/ieee1394/video1394.c                    | 1 +
 drivers/infiniband/core/iwcm.c                  | 1 +
 drivers/infiniband/core/ucma.c                  | 1 +
 drivers/infiniband/hw/cxgb3/iwch_provider.c     | 1 +
 drivers/infiniband/hw/cxgb3/iwch_qp.c           | 1 +
 drivers/infiniband/hw/ipath/ipath_driver.c      | 1 +
 drivers/infiniband/hw/ipath/ipath_iba7220.c     | 1 +
 drivers/infiniband/hw/ipath/ipath_intr.c        | 1 +
 drivers/infiniband/hw/ipath/ipath_qp.c          | 1 +
 drivers/infiniband/hw/ipath/ipath_ruc.c         | 1 +
 drivers/infiniband/hw/ipath/ipath_ud.c          | 1 +
 drivers/infiniband/hw/ipath/ipath_user_pages.c  | 1 +
 drivers/infiniband/hw/ipath/ipath_user_sdma.c   | 1 +
 drivers/infiniband/hw/ipath/ipath_verbs_mcast.c | 1 +
 drivers/input/keyboard/hilkbd.c                 | 1 +
 drivers/input/keyboard/sunkbd.c                 | 1 +
 drivers/input/serio/libps2.c                    | 1 +
 drivers/input/serio/serio_raw.c                 | 1 +
 drivers/input/serio/serport.c                   | 1 +
 drivers/isdn/capi/kcapi.c                       | 1 +
 drivers/isdn/hisax/arcofi.c                     | 1 +
 drivers/isdn/hisax/hfc_2bds0.c                  | 1 +
 drivers/isdn/hisax/hfc_pci.c                    | 1 +
 drivers/isdn/hysdn/hysdn_procconf.c             | 1 +
 drivers/isdn/hysdn/hysdn_proclog.c              | 1 +
 drivers/isdn/pcbit/drv.c                        | 1 +
 drivers/isdn/pcbit/layer2.c                     | 1 +
 drivers/isdn/sc/init.c                          | 1 +
 drivers/lguest/interrupts_and_traps.c           | 1 +
 drivers/media/dvb/dvb-core/dvb_net.c            | 1 +
 drivers/media/video/meye.c                      | 1 +
 drivers/media/video/videobuf-core.c             | 1 +
 drivers/media/video/videobuf-dma-sg.c           | 1 +
 drivers/message/fusion/mptlan.c                 | 1 +
 drivers/mfd/ucb1x00-core.c                      | 1 +
 drivers/misc/hpilo.c                            | 1 +
 drivers/misc/ibmasm/command.c                   | 1 +
 drivers/misc/ibmasm/event.c                     | 1 +
 drivers/misc/ibmasm/r_heartbeat.c               | 1 +
 drivers/misc/phantom.c                          | 1 +
 drivers/mtd/devices/m25p80.c                    | 1 +
 drivers/mtd/devices/sst25l.c                    | 1 +
 drivers/net/bonding/bond_sysfs.c                | 1 +
 drivers/net/depca.c                             | 1 +
 drivers/net/e100.c                              | 1 +
 drivers/net/eql.c                               | 1 +
 drivers/net/ethoc.c                             | 1 +
 drivers/net/ewrk3.c                             | 1 +
 drivers/net/forcedeth.c                         | 1 +
 drivers/net/hamachi.c                           | 1 +
 drivers/net/hamradio/baycom_epp.c               | 1 +
 drivers/net/hamradio/baycom_ser_fdx.c           | 1 +
 drivers/net/hamradio/baycom_ser_hdx.c           | 1 +
 drivers/net/hamradio/hdlcdrv.c                  | 1 +
 drivers/net/hp100.c                             | 1 +
 drivers/net/igb/igb_ethtool.c                   | 1 +
 drivers/net/irda/toim3232-sir.c                 | 1 +
 drivers/net/ns83820.c                           | 1 +
 drivers/net/pcnet32.c                           | 1 +
 drivers/net/sb1000.c                            | 1 +
 drivers/net/sis900.c                            | 1 +
 drivers/net/skfp/skfddi.c                       | 1 +
 drivers/net/skge.c                              | 1 +
 drivers/net/slip.c                              | 1 +
 drivers/net/sungem.c                            | 1 +
 drivers/net/tokenring/ibmtr.c                   | 1 +
 drivers/net/typhoon.c                           | 1 +
 drivers/net/wan/cosa.c                          | 1 +
 drivers/net/wan/cycx_x25.c                      | 1 +
 drivers/net/wan/dscc4.c                         | 1 +
 drivers/net/wan/farsync.c                       | 1 +
 drivers/net/wireless/b43/pio.c                  | 1 +
 drivers/net/wireless/b43legacy/main.c           | 1 +
 drivers/net/wireless/b43legacy/phy.c            | 1 +
 drivers/net/wireless/hostap/hostap_info.c       | 1 +
 drivers/net/wireless/hostap/hostap_ioctl.c      | 1 +
 drivers/net/wireless/ipw2x00/ipw2200.c          | 1 +
 drivers/net/wireless/iwlwifi/iwl-3945.c         | 1 +
 drivers/net/wireless/iwlwifi/iwl-4965.c         | 1 +
 drivers/net/wireless/iwlwifi/iwl-5000.c         | 1 +
 drivers/net/wireless/iwlwifi/iwl-agn.c          | 1 +
 drivers/net/wireless/iwlwifi/iwl-core.c         | 1 +
 drivers/net/wireless/iwlwifi/iwl-hcmd.c         | 1 +
 drivers/net/wireless/iwlwifi/iwl-tx.c           | 1 +
 drivers/net/wireless/iwlwifi/iwl3945-base.c     | 1 +
 drivers/net/wireless/iwmc3200wifi/cfg80211.c    | 1 +
 drivers/net/wireless/iwmc3200wifi/commands.c    | 1 +
 drivers/net/wireless/iwmc3200wifi/main.c        | 1 +
 drivers/net/wireless/iwmc3200wifi/rx.c          | 1 +
 drivers/net/wireless/libertas/cmd.c             | 1 +
 drivers/net/wireless/libertas/tx.c              | 1 +
 drivers/net/wireless/prism54/isl_ioctl.c        | 1 +
 drivers/net/wireless/prism54/islpci_dev.c       | 1 +
 drivers/net/wireless/prism54/islpci_mgt.c       | 1 +
 drivers/net/wireless/rt2x00/rt2x00debug.c       | 1 +
 drivers/pci/pcie/aer/aerdrv.c                   | 1 +
 drivers/rtc/interface.c                         | 1 +
 drivers/rtc/rtc-dev.c                           | 1 +
 drivers/uio/uio.c                               | 1 +
 drivers/uwb/whc-rc.c                            | 1 +
 fs/file.c                                       | 1 +
 include/linux/interrupt.h                       | 2 +-
 include/linux/mmc/host.h                        | 1 +
 kernel/irq/handle.c                             | 1 +
 kernel/mutex-debug.c                            | 1 +
 kernel/time/timekeeping.c                       | 1 +
 lib/debugobjects.c                              | 1 +
 lib/fault-inject.c                              | 1 +
 mm/vmalloc.c                                    | 1 +
 net/irda/ircomm/ircomm_tty_attach.c             | 1 +
 net/irda/irlan/irlan_common.c                   | 1 +
 net/irda/irlan/irlan_eth.c                      | 1 +
 net/irda/irnet/irnet_irda.c                     | 1 +
 net/irda/irnet/irnet_ppp.c                      | 1 +
 net/mac80211/rc80211_pid_debugfs.c              | 1 +
 net/netfilter/nf_conntrack_core.c               | 1 +
 net/sunrpc/xprtrdma/svc_rdma_transport.c        | 1 +
 net/wireless/core.c                             | 1 +
 145 files changed, 145 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 4cdc4a0bd02d..d38cdf2c8276 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/time.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/timex.h>
 #include <linux/errno.h>
diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c
index 901cc205015e..148d25fc636f 100644
--- a/arch/arm/mach-integrator/pci_v3.c
+++ b/arch/arm/mach-integrator/pci_v3.c
@@ -31,6 +31,7 @@
 
 #include <mach/hardware.h>
 #include <asm/irq.h>
+#include <asm/signal.h>
 #include <asm/system.h>
 #include <asm/mach/pci.h>
 #include <asm/irq_regs.h>
diff --git a/arch/arm/plat-s3c24xx/adc.c b/arch/arm/plat-s3c24xx/adc.c
index 11117a7ba911..4d36b784fb8b 100644
--- a/arch/arm/plat-s3c24xx/adc.c
+++ b/arch/arm/plat-s3c24xx/adc.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
+#include <linux/sched.h>
 #include <linux/list.h>
 #include <linux/err.h>
 #include <linux/clk.h>
diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c
index e5069fe6861e..bd3b53da295e 100644
--- a/arch/blackfin/kernel/time.c
+++ b/arch/blackfin/kernel/time.c
@@ -14,6 +14,7 @@
 #include <linux/time.h>
 #include <linux/irq.h>
 #include <linux/delay.h>
+#include <linux/sched.h>
 
 #include <asm/blackfin.h>
 #include <asm/time.h>
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index 1b7598e6f6e8..8a88f1f0a3e2 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -17,6 +17,7 @@
 
 #include <linux/irq.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 14a102e877d6..cf8a97f34518 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -5,6 +5,7 @@
 
 #include "linux/irqreturn.h"
 #include "linux/kd.h"
+#include "linux/sched.h"
 #include "chan_kern.h"
 #include "irq_kern.h"
 #include "irq_user.h"
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index 19930081d3d8..4ebc8a34738f 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -7,6 +7,7 @@
 #include "linux/interrupt.h"
 #include "linux/list.h"
 #include "linux/mutex.h"
+#include "linux/workqueue.h"
 #include "asm/atomic.h"
 #include "init.h"
 #include "irq_kern.h"
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 454cdb43e351..039270b9b73b 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -10,6 +10,7 @@
 #include "linux/interrupt.h"
 #include "linux/kernel_stat.h"
 #include "linux/module.h"
+#include "linux/sched.h"
 #include "linux/seq_file.h"
 #include "as-layout.h"
 #include "kern_util.h"
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 889f665fe93d..7c785634af2b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
+#include <linux/sched.h>
 #include <asm/apic.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 98a827ee9ed7..a7f1b64f86e0 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -16,6 +16,7 @@
 #include <linux/agp_backend.h>
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/pci.h>
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 27349f92a6d7..a1a3cdda06e1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -4,6 +4,7 @@
 #include <linux/pm.h>
 #include <linux/efi.h>
 #include <linux/dmi.h>
+#include <linux/sched.h>
 #include <linux/tboot.h>
 #include <acpi/reboot.h>
 #include <asm/io.h>
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 19085ff0484a..19f7df30937f 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/errno.h>
+#include <linux/sched.h>
 #include <linux/time.h>
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
diff --git a/crypto/aead.c b/crypto/aead.c
index d9aa733db164..0a55da70845e 100644
--- a/crypto/aead.c
+++ b/crypto/aead.c
@@ -18,6 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/rtnetlink.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 
diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index 73a0765344b6..fe2cb2f5db17 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/miscdevice.h>
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index 9d589e3144de..dde5134713e2 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -30,6 +30,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/serial.h>
 #include <linux/delay.h>
 #include <linux/ctype.h>
diff --git a/drivers/char/generic_serial.c b/drivers/char/generic_serial.c
index 9e4e569dc00d..d400cbd280f2 100644
--- a/drivers/char/generic_serial.c
+++ b/drivers/char/generic_serial.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/tty.h>
+#include <linux/sched.h>
 #include <linux/serial.h>
 #include <linux/mm.h>
 #include <linux/generic_serial.h>
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index ab2f3349c5c4..402838f4083e 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -19,6 +19,7 @@
 /*****************************************************************************/
 
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/interrupt.h>
diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index ec58d8c387ff..d3400b20444f 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -48,6 +48,7 @@
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
+#include <linux/sched.h>
 #include <linux/serial.h>
 #include <linux/interrupt.h>
 #include <linux/kmod.h>
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index e066c4fdf81b..62f282e67638 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -18,6 +18,7 @@
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/fcntl.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/major.h>
 #include <linux/mm.h>
diff --git a/drivers/char/rio/riocmd.c b/drivers/char/rio/riocmd.c
index 01f2654d5a2e..f121357e5af0 100644
--- a/drivers/char/rio/riocmd.c
+++ b/drivers/char/rio/riocmd.c
@@ -32,6 +32,7 @@
 */
 
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/tty.h>
diff --git a/drivers/char/rio/rioctrl.c b/drivers/char/rio/rioctrl.c
index 74339559f0b9..780506326a73 100644
--- a/drivers/char/rio/rioctrl.c
+++ b/drivers/char/rio/rioctrl.c
@@ -31,6 +31,7 @@
 */
 
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <asm/io.h>
diff --git a/drivers/char/rio/riotty.c b/drivers/char/rio/riotty.c
index 2fb49e89b324..47fab7c33073 100644
--- a/drivers/char/rio/riotty.c
+++ b/drivers/char/rio/riotty.c
@@ -33,6 +33,7 @@
 #define __EXPLICIT_DEF_H__
 
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/tty.h>
diff --git a/drivers/char/ser_a2232.c b/drivers/char/ser_a2232.c
index 33a2b531802e..9610861d1f5f 100644
--- a/drivers/char/ser_a2232.c
+++ b/drivers/char/ser_a2232.c
@@ -89,6 +89,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/sched.h>
 #include <linux/tty.h>
 
 #include <asm/setup.h>
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 53e504f41b20..db6dcfa35ba0 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -27,6 +27,7 @@
 /*****************************************************************************/
 
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/tty.h>
diff --git a/drivers/char/tlclk.c b/drivers/char/tlclk.c
index 8f2284be68e1..80ea6bcfffdc 100644
--- a/drivers/char/tlclk.c
+++ b/drivers/char/tlclk.c
@@ -32,6 +32,7 @@
 #include <linux/kernel.h>	/* printk() */
 #include <linux/fs.h>		/* everything... */
 #include <linux/errno.h>	/* error codes */
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c
index 303c02694c3c..2da6fb2c325e 100644
--- a/drivers/hwmon/sht15.c
+++ b/drivers/hwmon/sht15.c
@@ -30,6 +30,7 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
+#include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/jiffies.h>
 #include <linux/err.h>
diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c
index 0bc3d78ce7b1..8aa56ac07e29 100644
--- a/drivers/ieee1394/raw1394.c
+++ b/drivers/ieee1394/raw1394.c
@@ -29,6 +29,7 @@
 
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
diff --git a/drivers/ieee1394/video1394.c b/drivers/ieee1394/video1394.c
index d287ba79821d..949064a05675 100644
--- a/drivers/ieee1394/video1394.c
+++ b/drivers/ieee1394/video1394.c
@@ -30,6 +30,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/wait.h>
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 625fec5a741c..0f89909abce9 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -40,6 +40,7 @@
 #include <linux/idr.h>
 #include <linux/interrupt.h>
 #include <linux/rbtree.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 #include <linux/completion.h>
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 4346a24568fb..bb96d3c4b0f4 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -34,6 +34,7 @@
 #include <linux/file.h>
 #include <linux/mutex.h>
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/idr.h>
 #include <linux/in.h>
 #include <linux/in6.h>
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 03cfaecc3bb7..ed7175549ebd 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -37,6 +37,7 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/list.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 6e8653471941..1cecf98829ac 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -29,6 +29,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include <linux/sched.h>
 #include "iwch_provider.h"
 #include "iwch.h"
 #include "iwch_cm.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 04e88b600558..013d1380e77c 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -31,6 +31,7 @@
  * SOFTWARE.
  */
 
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/idr.h>
 #include <linux/pci.h>
diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
index b2a9d4c155d1..a805402dd4ae 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba7220.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c
@@ -37,6 +37,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/pci.h>
+#include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <rdma/ib_verbs.h>
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 6c21b4b5ec71..c0a03ac03ee7 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -33,6 +33,7 @@
 
 #include <linux/pci.h>
 #include <linux/delay.h>
+#include <linux/sched.h>
 
 #include "ipath_kernel.h"
 #include "ipath_verbs.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 3a5a89b609c4..cb2d3ef2ae12 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -32,6 +32,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/sched.h>
 #include <linux/vmalloc.h>
 
 #include "ipath_verbs.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 2296832f94da..1f95bbaf7602 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -31,6 +31,7 @@
  * SOFTWARE.
  */
 
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 
 #include "ipath_verbs.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 6076cb61bf6a..7420715256a9 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -31,6 +31,7 @@
  * SOFTWARE.
  */
 
+#include <linux/sched.h>
 #include <rdma/ib_smi.h>
 
 #include "ipath_verbs.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index 855911e7396d..82878e348627 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -33,6 +33,7 @@
 
 #include <linux/mm.h>
 #include <linux/device.h>
+#include <linux/sched.h>
 
 #include "ipath_kernel.h"
 
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
index 7bff4b9baa0a..be78f6643c06 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -33,6 +33,7 @@
 #include <linux/types.h>
 #include <linux/device.h>
 #include <linux/dmapool.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/highmem.h>
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index d73e32232879..6923e1d986da 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -32,6 +32,7 @@
  */
 
 #include <linux/rculist.h>
+#include <linux/sched.h>
 
 #include "ipath_verbs.h"
 
diff --git a/drivers/input/keyboard/hilkbd.c b/drivers/input/keyboard/hilkbd.c
index e9d639ec283d..5f72440b50c8 100644
--- a/drivers/input/keyboard/hilkbd.c
+++ b/drivers/input/keyboard/hilkbd.c
@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 #include <linux/hil.h>
 #include <linux/io.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <asm/irq.h>
 #ifdef CONFIG_HP300
diff --git a/drivers/input/keyboard/sunkbd.c b/drivers/input/keyboard/sunkbd.c
index 472b56639cdb..a99a04b03ee4 100644
--- a/drivers/input/keyboard/sunkbd.c
+++ b/drivers/input/keyboard/sunkbd.c
@@ -27,6 +27,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index 769ba65a585a..f3876acc3e83 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -13,6 +13,7 @@
 
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/input.h>
diff --git a/drivers/input/serio/serio_raw.c b/drivers/input/serio/serio_raw.c
index b03009bb7468..27fdaaffbb40 100644
--- a/drivers/input/serio/serio_raw.c
+++ b/drivers/input/serio/serio_raw.c
@@ -9,6 +9,7 @@
  * the Free Software Foundation.
  */
 
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/poll.h>
diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c
index b9694b6445d0..6d345112bcb7 100644
--- a/drivers/input/serio/serport.c
+++ b/drivers/input/serio/serport.c
@@ -15,6 +15,7 @@
 
 #include <asm/uaccess.h>
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/init.h>
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 57d26360f64e..dc506ab99cac 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -18,6 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/proc_fs.h>
+#include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
diff --git a/drivers/isdn/hisax/arcofi.c b/drivers/isdn/hisax/arcofi.c
index d30ce5b978c2..85a8fd8dd0b7 100644
--- a/drivers/isdn/hisax/arcofi.c
+++ b/drivers/isdn/hisax/arcofi.c
@@ -10,6 +10,7 @@
  *
  */
  
+#include <linux/sched.h>
 #include "hisax.h"
 #include "isdnl1.h"
 #include "isac.h"
diff --git a/drivers/isdn/hisax/hfc_2bds0.c b/drivers/isdn/hisax/hfc_2bds0.c
index 5c46a7130e06..8d22f50760eb 100644
--- a/drivers/isdn/hisax/hfc_2bds0.c
+++ b/drivers/isdn/hisax/hfc_2bds0.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/sched.h>
 #include "hisax.h"
 #include "hfc_2bds0.h"
 #include "isdnl1.h"
diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c
index d110a77940a4..10914731b304 100644
--- a/drivers/isdn/hisax/hfc_pci.c
+++ b/drivers/isdn/hisax/hfc_pci.c
@@ -20,6 +20,7 @@
 #include "hfc_pci.h"
 #include "isdnl1.h"
 #include <linux/pci.h>
+#include <linux/sched.h>
 #include <linux/interrupt.h>
 
 static const char *hfcpci_revision = "$Revision: 1.48.2.4 $";
diff --git a/drivers/isdn/hysdn/hysdn_procconf.c b/drivers/isdn/hysdn/hysdn_procconf.c
index 8f9f4912de32..90b35e1a4b7e 100644
--- a/drivers/isdn/hysdn/hysdn_procconf.c
+++ b/drivers/isdn/hysdn/hysdn_procconf.c
@@ -11,6 +11,7 @@
  *
  */
 
+#include <linux/cred.h>
 #include <linux/module.h>
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
diff --git a/drivers/isdn/hysdn/hysdn_proclog.c b/drivers/isdn/hysdn/hysdn_proclog.c
index 8991d2c8ee4a..8bcae28c4409 100644
--- a/drivers/isdn/hysdn/hysdn_proclog.c
+++ b/drivers/isdn/hysdn/hysdn_proclog.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
+#include <linux/sched.h>
 #include <linux/smp_lock.h>
 
 #include "hysdn_defs.h"
diff --git a/drivers/isdn/pcbit/drv.c b/drivers/isdn/pcbit/drv.c
index 8c66bcb953a1..123c1d6c43b4 100644
--- a/drivers/isdn/pcbit/drv.c
+++ b/drivers/isdn/pcbit/drv.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 
 #include <linux/types.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/drivers/isdn/pcbit/layer2.c b/drivers/isdn/pcbit/layer2.c
index e075e8d2fce0..30f0f45e3139 100644
--- a/drivers/isdn/pcbit/layer2.c
+++ b/drivers/isdn/pcbit/layer2.c
@@ -27,6 +27,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c
index dd0acd06750b..5a0774880d56 100644
--- a/drivers/isdn/sc/init.c
+++ b/drivers/isdn/sc/init.c
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
+#include <linux/sched.h>
 #include "includes.h"
 #include "hardware.h"
 #include "card.h"
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 18648180db02..daaf86631647 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -16,6 +16,7 @@
 #include <linux/uaccess.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include "lg.h"
 
 /* Allow Guests to use a non-128 (ie. non-Linux) syscall trap. */
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index 8c9ae0a3a272..0241a7c5c34a 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -63,6 +63,7 @@
 #include <asm/uaccess.h>
 #include <linux/crc32.h>
 #include <linux/mutex.h>
+#include <linux/sched.h>
 
 #include "dvb_demux.h"
 #include "dvb_net.h"
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 4b1bc05a462c..01e1eefcf1eb 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -28,6 +28,7 @@
  */
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/videodev.h>
 #include <media/v4l2-common.h>
diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c
index f1ccf98c0a6f..8e93c6f25c83 100644
--- a/drivers/media/video/videobuf-core.c
+++ b/drivers/media/video/videobuf-core.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/mm.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index 53cdd67cebe1..032ebae0134a 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index bc2ec2182c00..34f3f36f819b 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -56,6 +56,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/fs.h>
+#include <linux/sched.h>
 
 #define my_VERSION	MPT_LINUX_VERSION_COMMON
 #define MYNAM		"mptlan"
diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index fea9085fe52c..60c3988f3cf3 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -18,6 +18,7 @@
  */
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/errno.h>
diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c
index 1ad27c6abcca..a92a3a742b43 100644
--- a/drivers/misc/hpilo.c
+++ b/drivers/misc/hpilo.c
@@ -18,6 +18,7 @@
 #include <linux/device.h>
 #include <linux/file.h>
 #include <linux/cdev.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/delay.h>
 #include <linux/uaccess.h>
diff --git a/drivers/misc/ibmasm/command.c b/drivers/misc/ibmasm/command.c
index 276d3fb68094..e2031739aa29 100644
--- a/drivers/misc/ibmasm/command.c
+++ b/drivers/misc/ibmasm/command.c
@@ -22,6 +22,7 @@
  *
  */
 
+#include <linux/sched.h>
 #include "ibmasm.h"
 #include "lowlevel.h"
 
diff --git a/drivers/misc/ibmasm/event.c b/drivers/misc/ibmasm/event.c
index 68a0a5b94795..572d41ffc186 100644
--- a/drivers/misc/ibmasm/event.c
+++ b/drivers/misc/ibmasm/event.c
@@ -22,6 +22,7 @@
  *
  */
 
+#include <linux/sched.h>
 #include "ibmasm.h"
 #include "lowlevel.h"
 
diff --git a/drivers/misc/ibmasm/r_heartbeat.c b/drivers/misc/ibmasm/r_heartbeat.c
index bec9e2c44bef..2de487ac788c 100644
--- a/drivers/misc/ibmasm/r_heartbeat.c
+++ b/drivers/misc/ibmasm/r_heartbeat.c
@@ -20,6 +20,7 @@
  *
  */
 
+#include <linux/sched.h>
 #include "ibmasm.h"
 #include "dot_command.h"
 
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index 90a95ce8dc34..04c27266f567 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/cdev.h>
 #include <linux/phantom.h>
+#include <linux/sched.h>
 #include <linux/smp_lock.h>
 
 #include <asm/atomic.h>
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 379c316f329e..4c19269de91a 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/mutex.h>
 #include <linux/math64.h>
+#include <linux/sched.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
diff --git a/drivers/mtd/devices/sst25l.c b/drivers/mtd/devices/sst25l.c
index c2baf3353f84..0a11721f146e 100644
--- a/drivers/mtd/devices/sst25l.c
+++ b/drivers/mtd/devices/sst25l.c
@@ -20,6 +20,7 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index ff449de6f3c0..8762a27a2a18 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -22,6 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/sched.h>
 #include <linux/sysdev.h>
 #include <linux/fs.h>
 #include <linux/types.h>
diff --git a/drivers/net/depca.c b/drivers/net/depca.c
index 9686c1fa28f1..7a3bdac84abe 100644
--- a/drivers/net/depca.c
+++ b/drivers/net/depca.c
@@ -237,6 +237,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 679965c2bb86..5d2f48f02251 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -151,6 +151,7 @@
 #include <linux/moduleparam.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/init.h>
diff --git a/drivers/net/eql.c b/drivers/net/eql.c
index d4d9a3eda695..f5b96cadeb25 100644
--- a/drivers/net/eql.c
+++ b/drivers/net/eql.c
@@ -111,6 +111,7 @@
  * Sorry, I had to rewrite most of this for 2.5.x -DaveM
  */
 
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c
index 34d0c69e67f7..96f5b2a2d2c5 100644
--- a/drivers/net/ethoc.c
+++ b/drivers/net/ethoc.c
@@ -17,6 +17,7 @@
 #include <linux/mii.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
+#include <linux/sched.h>
 #include <net/ethoc.h>
 
 static int buffer_size = 0x8000; /* 32 KBytes */
diff --git a/drivers/net/ewrk3.c b/drivers/net/ewrk3.c
index b2a5ec8f3721..dd4ba01fd92d 100644
--- a/drivers/net/ewrk3.c
+++ b/drivers/net/ewrk3.c
@@ -145,6 +145,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 0a1c2bb27d4d..e1da4666f204 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -49,6 +49,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/delay.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/ethtool.h>
 #include <linux/timer.h>
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index 1d5064a09aca..f7519a594945 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -145,6 +145,7 @@ static int tx_params[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
 /* Time in jiffies before concluding the transmitter is hung. */
 #define TX_TIMEOUT  (5*HZ)
 
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 7bcaf7c66243..e344c84c0ef9 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -44,6 +44,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
 #include <linux/fs.h>
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c
index aa4488e871b2..ed60fd664273 100644
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ b/drivers/net/hamradio/baycom_ser_fdx.c
@@ -71,6 +71,7 @@
 
 /*****************************************************************************/
 
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/string.h>
diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c
index 88c593596020..1686f6dcbbce 100644
--- a/drivers/net/hamradio/baycom_ser_hdx.c
+++ b/drivers/net/hamradio/baycom_ser_hdx.c
@@ -61,6 +61,7 @@
 
 /*****************************************************************************/
 
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/string.h>
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index 0013c409782c..91c5790c9581 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -42,6 +42,7 @@
 
 /*****************************************************************************/
 
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/net.h>
diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c
index a9a1a99f02dd..dd8665138062 100644
--- a/drivers/net/hp100.c
+++ b/drivers/net/hp100.c
@@ -98,6 +98,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
diff --git a/drivers/net/igb/igb_ethtool.c b/drivers/net/igb/igb_ethtool.c
index d004c359244c..deaea8fa1032 100644
--- a/drivers/net/igb/igb_ethtool.c
+++ b/drivers/net/igb/igb_ethtool.c
@@ -34,6 +34,7 @@
 #include <linux/interrupt.h>
 #include <linux/if_ether.h>
 #include <linux/ethtool.h>
+#include <linux/sched.h>
 
 #include "igb.h"
 
diff --git a/drivers/net/irda/toim3232-sir.c b/drivers/net/irda/toim3232-sir.c
index fcf287b749db..99e1ec02a011 100644
--- a/drivers/net/irda/toim3232-sir.c
+++ b/drivers/net/irda/toim3232-sir.c
@@ -120,6 +120,7 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 
 #include <net/irda/irda.h>
 
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index c594e1946476..57fd483dbb1f 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -111,6 +111,7 @@
 #include <linux/compiler.h>
 #include <linux/prefetch.h>
 #include <linux/ethtool.h>
+#include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/if_vlan.h>
 #include <linux/rtnetlink.h>
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 6d28b18e7e28..c1b3f09f452c 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -31,6 +31,7 @@ static const char *const version =
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c
index ee366c5a8fa3..c9c70ab0cce0 100644
--- a/drivers/net/sb1000.c
+++ b/drivers/net/sb1000.c
@@ -36,6 +36,7 @@ static char version[] = "sb1000.c:v1.1.2 6/01/98 (fventuri@mediaone.net)\n";
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/interrupt.h>
 #include <linux/errno.h>
diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c
index 97949d0a699b..c072f7f36acf 100644
--- a/drivers/net/sis900.c
+++ b/drivers/net/sis900.c
@@ -52,6 +52,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/timer.h>
 #include <linux/errno.h>
diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index 38a508b4aad9..b27156eaf267 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -73,6 +73,7 @@ static const char * const boot_msg =
 
 /* Include files */
 
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 01f6811f1324..8f5414348e86 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -37,6 +37,7 @@
 #include <linux/crc32.h>
 #include <linux/dma-mapping.h>
 #include <linux/debugfs.h>
+#include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/mii.h>
 #include <asm/irq.h>
diff --git a/drivers/net/slip.c b/drivers/net/slip.c
index e17c535a577e..fe3cebb984de 100644
--- a/drivers/net/slip.c
+++ b/drivers/net/slip.c
@@ -67,6 +67,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <linux/bitops.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 305ec3d783db..7019a0d1a82b 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -38,6 +38,7 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/in.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/delay.h>
diff --git a/drivers/net/tokenring/ibmtr.c b/drivers/net/tokenring/ibmtr.c
index 525bbc5b9c9d..36cb2423bcf1 100644
--- a/drivers/net/tokenring/ibmtr.c
+++ b/drivers/net/tokenring/ibmtr.c
@@ -108,6 +108,7 @@ in the event that chatty debug messages are desired - jjs 12/30/98 */
 #define IBMTR_DEBUG_MESSAGES 0
 
 #include <linux/module.h>
+#include <linux/sched.h>
 
 #ifdef PCMCIA		/* required for ibmtr_cs.c to build */
 #undef MODULE		/* yes, really */
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index d6d345229fe9..5921f5bdd764 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -108,6 +108,7 @@ static const int multicast_filter_limit = 32;
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/timer.h>
 #include <linux/errno.h>
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 66360a2a14c2..e2c33c06190b 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -76,6 +76,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/fs.h>
diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c
index 2573c18b6aa5..cd8cb95c5bd7 100644
--- a/drivers/net/wan/cycx_x25.c
+++ b/drivers/net/wan/cycx_x25.c
@@ -84,6 +84,7 @@
 #include <linux/kernel.h>	/* printk(), and other useful stuff */
 #include <linux/module.h>
 #include <linux/string.h>	/* inline memset(), etc. */
+#include <linux/sched.h>
 #include <linux/slab.h>		/* kmalloc(), kfree() */
 #include <linux/stddef.h>	/* offsetof(), etc. */
 #include <linux/wanrouter.h>	/* WAN router definitions */
diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index 81c8aec9df92..07d00b4cf48a 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c
@@ -81,6 +81,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/list.h>
diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 3e90eb816181..beda387f2fc7 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/version.h>
 #include <linux/pci.h>
+#include <linux/sched.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/if.h>
diff --git a/drivers/net/wireless/b43/pio.c b/drivers/net/wireless/b43/pio.c
index 9c1397996e0a..5e87650b07fb 100644
--- a/drivers/net/wireless/b43/pio.c
+++ b/drivers/net/wireless/b43/pio.c
@@ -30,6 +30,7 @@
 #include "xmit.h"
 
 #include <linux/delay.h>
+#include <linux/sched.h>
 
 
 static u16 generate_cookie(struct b43_pio_txqueue *q,
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 1d9223b3d4c4..4b60148a5e61 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -37,6 +37,7 @@
 #include <linux/firmware.h>
 #include <linux/wireless.h>
 #include <linux/workqueue.h>
+#include <linux/sched.h>
 #include <linux/skbuff.h>
 #include <linux/dma-mapping.h>
 #include <net/dst.h>
diff --git a/drivers/net/wireless/b43legacy/phy.c b/drivers/net/wireless/b43legacy/phy.c
index 11319ec2d64a..aaf227203a98 100644
--- a/drivers/net/wireless/b43legacy/phy.c
+++ b/drivers/net/wireless/b43legacy/phy.c
@@ -31,6 +31,7 @@
 
 #include <linux/delay.h>
 #include <linux/pci.h>
+#include <linux/sched.h>
 #include <linux/types.h>
 
 #include "b43legacy.h"
diff --git a/drivers/net/wireless/hostap/hostap_info.c b/drivers/net/wireless/hostap/hostap_info.c
index 6fa14a4e4b53..4dfb40a84c96 100644
--- a/drivers/net/wireless/hostap/hostap_info.c
+++ b/drivers/net/wireless/hostap/hostap_info.c
@@ -1,6 +1,7 @@
 /* Host AP driver Info Frame processing (part of hostap.o module) */
 
 #include <linux/if_arp.h>
+#include <linux/sched.h>
 #include "hostap_wlan.h"
 #include "hostap.h"
 #include "hostap_ap.h"
diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c
index 3f2bda881a4f..9419cebca8a5 100644
--- a/drivers/net/wireless/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/hostap/hostap_ioctl.c
@@ -1,6 +1,7 @@
 /* ioctl() (mostly Linux Wireless Extensions) routines for Host AP driver */
 
 #include <linux/types.h>
+#include <linux/sched.h>
 #include <linux/ethtool.h>
 #include <linux/if_arp.h>
 #include <net/lib80211.h>
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index 8d58e6ed4e7d..827824d45de9 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -30,6 +30,7 @@
 
 ******************************************************************************/
 
+#include <linux/sched.h>
 #include "ipw2200.h"
 
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index e70c5b0af364..68136172b823 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -30,6 +30,7 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
+#include <linux/sched.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/wireless.h>
diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c
index a22a0501c190..6f703a041847 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965.c
@@ -30,6 +30,7 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
+#include <linux/sched.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/wireless.h>
diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index eb08f4411000..d6bc0e051043 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -29,6 +29,7 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
+#include <linux/sched.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/wireless.h>
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index cdc07c477457..313d3e5ee84b 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -33,6 +33,7 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
+#include <linux/sched.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/wireless.h>
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 484d5c1a7312..2dc928755454 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -29,6 +29,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/etherdevice.h>
+#include <linux/sched.h>
 #include <net/mac80211.h>
 
 #include "iwl-eeprom.h"
diff --git a/drivers/net/wireless/iwlwifi/iwl-hcmd.c b/drivers/net/wireless/iwlwifi/iwl-hcmd.c
index 532c8d6cd8da..a6856daf14cb 100644
--- a/drivers/net/wireless/iwlwifi/iwl-hcmd.c
+++ b/drivers/net/wireless/iwlwifi/iwl-hcmd.c
@@ -28,6 +28,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <net/mac80211.h>
 
 #include "iwl-dev.h" /* FIXME: remove */
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index c18907544701..fb9bcfa6d947 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -28,6 +28,7 @@
  *****************************************************************************/
 
 #include <linux/etherdevice.h>
+#include <linux/sched.h>
 #include <net/mac80211.h>
 #include "iwl-eeprom.h"
 #include "iwl-dev.h"
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index c390dbd877e4..aa49230422f3 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -33,6 +33,7 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
+#include <linux/sched.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/wireless.h>
diff --git a/drivers/net/wireless/iwmc3200wifi/cfg80211.c b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
index a56a2b0ac99a..f3c55658225b 100644
--- a/drivers/net/wireless/iwmc3200wifi/cfg80211.c
+++ b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
@@ -23,6 +23,7 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/sched.h>
 #include <linux/etherdevice.h>
 #include <linux/wireless.h>
 #include <linux/ieee80211.h>
diff --git a/drivers/net/wireless/iwmc3200wifi/commands.c b/drivers/net/wireless/iwmc3200wifi/commands.c
index 23b52fa2605f..84158b6d35d8 100644
--- a/drivers/net/wireless/iwmc3200wifi/commands.c
+++ b/drivers/net/wireless/iwmc3200wifi/commands.c
@@ -40,6 +40,7 @@
 #include <linux/wireless.h>
 #include <linux/etherdevice.h>
 #include <linux/ieee80211.h>
+#include <linux/sched.h>
 
 #include "iwm.h"
 #include "bus.h"
diff --git a/drivers/net/wireless/iwmc3200wifi/main.c b/drivers/net/wireless/iwmc3200wifi/main.c
index d668e4756324..222eb2cf1b30 100644
--- a/drivers/net/wireless/iwmc3200wifi/main.c
+++ b/drivers/net/wireless/iwmc3200wifi/main.c
@@ -38,6 +38,7 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/sched.h>
 #include <linux/ieee80211.h>
 #include <linux/wireless.h>
 
diff --git a/drivers/net/wireless/iwmc3200wifi/rx.c b/drivers/net/wireless/iwmc3200wifi/rx.c
index 40dbcbc16593..771a301003c9 100644
--- a/drivers/net/wireless/iwmc3200wifi/rx.c
+++ b/drivers/net/wireless/iwmc3200wifi/rx.c
@@ -38,6 +38,7 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/sched.h>
 #include <linux/etherdevice.h>
 #include <linux/wireless.h>
 #include <linux/ieee80211.h>
diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c
index 685098148e10..0a324dcd264c 100644
--- a/drivers/net/wireless/libertas/cmd.c
+++ b/drivers/net/wireless/libertas/cmd.c
@@ -6,6 +6,7 @@
 #include <net/iw_handler.h>
 #include <net/lib80211.h>
 #include <linux/kfifo.h>
+#include <linux/sched.h>
 #include "host.h"
 #include "hostcmd.h"
 #include "decl.h"
diff --git a/drivers/net/wireless/libertas/tx.c b/drivers/net/wireless/libertas/tx.c
index 4c018f7a0a8d..8c3766a6e8e7 100644
--- a/drivers/net/wireless/libertas/tx.c
+++ b/drivers/net/wireless/libertas/tx.c
@@ -3,6 +3,7 @@
   */
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/sched.h>
 
 #include "hostcmd.h"
 #include "radiotap.h"
diff --git a/drivers/net/wireless/prism54/isl_ioctl.c b/drivers/net/wireless/prism54/isl_ioctl.c
index 4c97c6ad6f5d..bc08464d8323 100644
--- a/drivers/net/wireless/prism54/isl_ioctl.c
+++ b/drivers/net/wireless/prism54/isl_ioctl.c
@@ -19,6 +19,7 @@
  *
  */
 
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/if_arp.h>
diff --git a/drivers/net/wireless/prism54/islpci_dev.c b/drivers/net/wireless/prism54/islpci_dev.c
index e26d7b3ceab5..2505be56ae39 100644
--- a/drivers/net/wireless/prism54/islpci_dev.c
+++ b/drivers/net/wireless/prism54/islpci_dev.c
@@ -23,6 +23,7 @@
 #include <linux/netdevice.h>
 #include <linux/ethtool.h>
 #include <linux/pci.h>
+#include <linux/sched.h>
 #include <linux/etherdevice.h>
 #include <linux/delay.h>
 #include <linux/if_arp.h>
diff --git a/drivers/net/wireless/prism54/islpci_mgt.c b/drivers/net/wireless/prism54/islpci_mgt.c
index f7c677e2094d..69d2f882fd06 100644
--- a/drivers/net/wireless/prism54/islpci_mgt.c
+++ b/drivers/net/wireless/prism54/islpci_mgt.c
@@ -20,6 +20,7 @@
 #include <linux/netdevice.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/sched.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
diff --git a/drivers/net/wireless/rt2x00/rt2x00debug.c b/drivers/net/wireless/rt2x00/rt2x00debug.c
index 7b3ee8c2eaef..68bc9bb1dbf9 100644
--- a/drivers/net/wireless/rt2x00/rt2x00debug.c
+++ b/drivers/net/wireless/rt2x00/rt2x00debug.c
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/uaccess.h>
 
 #include "rt2x00.h"
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 2ce8f9ccc66e..d49ecc94bd49 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -17,6 +17,7 @@
 
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/pm.h>
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 4cdb31a362ca..a0c816238aa9 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -12,6 +12,7 @@
 */
 
 #include <linux/rtc.h>
+#include <linux/sched.h>
 #include <linux/log2.h>
 
 int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index 8a11de9552cd..62227cd52410 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -13,6 +13,7 @@
 
 #include <linux/module.h>
 #include <linux/rtc.h>
+#include <linux/sched.h>
 #include "rtc-core.h"
 
 static dev_t rtc_devt;
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index a9d707047202..e941367dd28f 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -19,6 +19,7 @@
 #include <linux/device.h>
 #include <linux/mm.h>
 #include <linux/idr.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/kobject.h>
 #include <linux/uio_driver.h>
diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c
index 1d9a6f54658e..01950c62dc8d 100644
--- a/drivers/uwb/whc-rc.c
+++ b/drivers/uwb/whc-rc.c
@@ -42,6 +42,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/sched.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
diff --git a/fs/file.c b/fs/file.c
index f313314f996f..87e129030ab1 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -10,6 +10,7 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/time.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/file.h>
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index b78cf8194957..7ca72b74eec7 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -10,7 +10,6 @@
 #include <linux/irqreturn.h>
 #include <linux/irqnr.h>
 #include <linux/hardirq.h>
-#include <linux/sched.h>
 #include <linux/irqflags.h>
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -610,6 +609,7 @@ extern void debug_poll_all_shared_irqs(void);
 static inline void debug_poll_all_shared_irqs(void) { }
 #endif
 
+struct seq_file;
 int show_interrupts(struct seq_file *p, void *v);
 
 struct irq_desc;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 81bb42358595..eaf36364b7d4 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -11,6 +11,7 @@
 #define LINUX_MMC_HOST_H
 
 #include <linux/leds.h>
+#include <linux/sched.h>
 
 #include <linux/mmc/core.h>
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index a81cf80554db..17c71bb565c6 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/irq.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/random.h>
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index 50d022e5a560..ec815a960b5d 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/poison.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index fb0f46fa1ecd..c3a4e2907eaa 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -13,6 +13,7 @@
 #include <linux/percpu.h>
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/sched.h>
 #include <linux/sysdev.h>
 #include <linux/clocksource.h>
 #include <linux/jiffies.h>
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 2755a3bd16a1..eae56fddfa3b 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -9,6 +9,7 @@
  */
 #include <linux/debugobjects.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/hash.h>
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index f97af55bdd96..7e65af70635e 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -1,6 +1,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/random.h>
+#include <linux/sched.h>
 #include <linux/stat.h>
 #include <linux/types.h>
 #include <linux/fs.h>
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5e7aed0802bf..0f551a4a44cd 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/highmem.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c
index eafc010907c2..3c1754023022 100644
--- a/net/irda/ircomm/ircomm_tty_attach.c
+++ b/net/irda/ircomm/ircomm_tty_attach.c
@@ -30,6 +30,7 @@
  ********************************************************************/
 
 #include <linux/init.h>
+#include <linux/sched.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/irlmp.h>
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index 62116829b817..315ead3cb926 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -30,6 +30,7 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/proc_fs.h>
+#include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/random.h>
 #include <linux/netdevice.h>
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 7b6b631f647f..d340110f5c0c 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -30,6 +30,7 @@
 #include <linux/inetdevice.h>
 #include <linux/if_arp.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <net/arp.h>
 
 #include <net/irda/irda.h>
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index cf9a4b531a98..cccc2e93234f 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -9,6 +9,7 @@
  */
 
 #include "irnet_irda.h"		/* Private header */
+#include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <asm/unaligned.h>
 
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 68cbcb19cbd8..7dea882dbb75 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -13,6 +13,7 @@
  *	2) as a control channel (write commands, read events)
  */
 
+#include <linux/sched.h>
 #include <linux/smp_lock.h>
 #include "irnet_ppp.h"		/* Private header */
 /* Please put other headers in irnet.h - Thanks */
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
index a59043fbb0ff..45667054a5f3 100644
--- a/net/mac80211/rc80211_pid_debugfs.c
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -6,6 +6,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/poll.h>
 #include <linux/netdevice.h>
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 7c9ec3dee96e..ca6e68dcd8a8 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/netfilter.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
 #include <linux/vmalloc.h>
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 0cf5e8c27a10..3fa5751af0ec 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -42,6 +42,7 @@
 #include <linux/sunrpc/svc_xprt.h>
 #include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/rpc_rdma.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 45b2be3274db..a595f712b5bf 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -14,6 +14,7 @@
 #include <linux/device.h>
 #include <linux/etherdevice.h>
 #include <linux/rtnetlink.h>
+#include <linux/sched.h>
 #include <net/genetlink.h>
 #include <net/cfg80211.h>
 #include "nl80211.h"
-- 
cgit v1.2.3


From c01226c3145d173a0d38f9d5b4f229cc23d99ae2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 21 Sep 2009 16:37:12 +0200
Subject: warn about use of uninstalled kernel headers

User applications frequently hit problems when they try to use
the kernel headers directly, rather than the exported headers.

This adds an explicit warning for this case, and points to
a URL holding an explanation of why this is wrong and what
to do about it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/linux/kernel.h     | 6 ++++++
 scripts/headers_install.pl | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d3cd23f30039..f4e3184fa054 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -659,6 +659,12 @@ extern int do_sysinfo(struct sysinfo *info);
 
 #endif /* __KERNEL__ */
 
+#ifndef __EXPORTED_HEADERS__
+#ifndef __KERNEL__
+#warning Attempt to use kernel headers from user space, see http://kernelnewbies.org/KernelHeaders
+#endif /* __KERNEL__ */
+#endif /* __EXPORTED_HEADERS__ */
+
 #define SI_LOAD_SHIFT	16
 struct sysinfo {
 	long uptime;			/* Seconds since boot */
diff --git a/scripts/headers_install.pl b/scripts/headers_install.pl
index c6ae4052ab43..b89ca2c58fdb 100644
--- a/scripts/headers_install.pl
+++ b/scripts/headers_install.pl
@@ -20,7 +20,7 @@ use strict;
 
 my ($readdir, $installdir, $arch, @files) = @ARGV;
 
-my $unifdef = "scripts/unifdef -U__KERNEL__";
+my $unifdef = "scripts/unifdef -U__KERNEL__ -D__EXPORTED_HEADERS__";
 
 foreach my $file (@files) {
 	local *INFILE;
-- 
cgit v1.2.3


From c44fc770845163f8d9e573f37f92a7b7a7ade14e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 19 Sep 2009 06:50:42 +0200
Subject: tracing: Move syscalls metadata handling from arch to core

Most of the syscalls metadata processing is done from arch.
But these operations are mostly generic accross archs. Especially now
that we have a common variable name that expresses the number of
syscalls supported by an arch: NR_syscalls, the only remaining bits
that need to reside in arch is the syscall nr to addr translation.

v2: Compare syscalls symbols only after the "sys" prefix so that we
    avoid spurious mismatches with archs that have syscalls wrappers,
    in which case syscalls symbols have "SyS" prefixed aliases.
    (Reported by: Heiko Carstens)

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
---
 arch/s390/kernel/ftrace.c     | 67 +--------------------------------
 arch/x86/kernel/ftrace.c      | 76 +-------------------------------------
 include/trace/syscall.h       |  2 +-
 kernel/trace/trace_syscalls.c | 86 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 91 insertions(+), 140 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 57bdcb1e3cdf..7c5752c3423d 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -206,73 +206,10 @@ out:
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 
-extern unsigned long __start_syscalls_metadata[];
-extern unsigned long __stop_syscalls_metadata[];
 extern unsigned int sys_call_table[];
 
-static struct syscall_metadata **syscalls_metadata;
-
-struct syscall_metadata *syscall_nr_to_meta(int nr)
-{
-	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
-		return NULL;
-
-	return syscalls_metadata[nr];
-}
-
-int syscall_name_to_nr(char *name)
-{
-	int i;
-
-	if (!syscalls_metadata)
-		return -1;
-	for (i = 0; i < NR_syscalls; i++)
-		if (syscalls_metadata[i])
-			if (!strcmp(syscalls_metadata[i]->name, name))
-				return i;
-	return -1;
-}
-
-void set_syscall_enter_id(int num, int id)
-{
-	syscalls_metadata[num]->enter_id = id;
-}
-
-void set_syscall_exit_id(int num, int id)
+unsigned long __init arch_syscall_addr(int nr)
 {
-	syscalls_metadata[num]->exit_id = id;
-}
-
-static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
-{
-	struct syscall_metadata *start;
-	struct syscall_metadata *stop;
-	char str[KSYM_SYMBOL_LEN];
-
-	start = (struct syscall_metadata *)__start_syscalls_metadata;
-	stop = (struct syscall_metadata *)__stop_syscalls_metadata;
-	kallsyms_lookup(syscall, NULL, NULL, NULL, str);
-
-	for ( ; start < stop; start++) {
-		if (start->name && !strcmp(start->name + 3, str + 3))
-			return start;
-	}
-	return NULL;
-}
-
-static int __init arch_init_ftrace_syscalls(void)
-{
-	struct syscall_metadata *meta;
-	int i;
-	syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls,
-				    GFP_KERNEL);
-	if (!syscalls_metadata)
-		return -ENOMEM;
-	for (i = 0; i < NR_syscalls; i++) {
-		meta = find_syscall_meta((unsigned long)sys_call_table[i]);
-		syscalls_metadata[i] = meta;
-	}
-	return 0;
+	return (unsigned long)sys_call_table[nr];
 }
-arch_initcall(arch_init_ftrace_syscalls);
 #endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 25e6f5fc4b1e..5a1b9758fd62 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -470,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 
-extern unsigned long __start_syscalls_metadata[];
-extern unsigned long __stop_syscalls_metadata[];
 extern unsigned long *sys_call_table;
 
-static struct syscall_metadata **syscalls_metadata;
-
-static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
-{
-	struct syscall_metadata *start;
-	struct syscall_metadata *stop;
-	char str[KSYM_SYMBOL_LEN];
-
-
-	start = (struct syscall_metadata *)__start_syscalls_metadata;
-	stop = (struct syscall_metadata *)__stop_syscalls_metadata;
-	kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
-
-	for ( ; start < stop; start++) {
-		if (start->name && !strcmp(start->name, str))
-			return start;
-	}
-	return NULL;
-}
-
-struct syscall_metadata *syscall_nr_to_meta(int nr)
-{
-	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
-		return NULL;
-
-	return syscalls_metadata[nr];
-}
-
-int syscall_name_to_nr(char *name)
-{
-	int i;
-
-	if (!syscalls_metadata)
-		return -1;
-
-	for (i = 0; i < NR_syscalls; i++) {
-		if (syscalls_metadata[i]) {
-			if (!strcmp(syscalls_metadata[i]->name, name))
-				return i;
-		}
-	}
-	return -1;
-}
-
-void set_syscall_enter_id(int num, int id)
-{
-	syscalls_metadata[num]->enter_id = id;
-}
-
-void set_syscall_exit_id(int num, int id)
+unsigned long __init arch_syscall_addr(int nr)
 {
-	syscalls_metadata[num]->exit_id = id;
-}
-
-static int __init arch_init_ftrace_syscalls(void)
-{
-	int i;
-	struct syscall_metadata *meta;
-	unsigned long **psys_syscall_table = &sys_call_table;
-
-	syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
-					NR_syscalls, GFP_KERNEL);
-	if (!syscalls_metadata) {
-		WARN_ON(1);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < NR_syscalls; i++) {
-		meta = find_syscall_meta(psys_syscall_table[i]);
-		syscalls_metadata[i] = meta;
-	}
-	return 0;
+	return (unsigned long)(&sys_call_table)[nr];
 }
-arch_initcall(arch_init_ftrace_syscalls);
 #endif
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 5dc283ba5ae0..e972f0a40f8d 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -33,7 +33,7 @@ struct syscall_metadata {
 };
 
 #ifdef CONFIG_FTRACE_SYSCALLS
-extern struct syscall_metadata *syscall_nr_to_meta(int nr);
+extern unsigned long arch_syscall_addr(int nr);
 extern int syscall_name_to_nr(char *name);
 void set_syscall_enter_id(int num, int id);
 void set_syscall_exit_id(int num, int id);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 9fbce6c9d2e1..8bda4bff2286 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -14,6 +14,69 @@ static int sys_refcount_exit;
 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
 
+extern unsigned long __start_syscalls_metadata[];
+extern unsigned long __stop_syscalls_metadata[];
+
+static struct syscall_metadata **syscalls_metadata;
+
+static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
+{
+	struct syscall_metadata *start;
+	struct syscall_metadata *stop;
+	char str[KSYM_SYMBOL_LEN];
+
+
+	start = (struct syscall_metadata *)__start_syscalls_metadata;
+	stop = (struct syscall_metadata *)__stop_syscalls_metadata;
+	kallsyms_lookup(syscall, NULL, NULL, NULL, str);
+
+	for ( ; start < stop; start++) {
+		/*
+		 * Only compare after the "sys" prefix. Archs that use
+		 * syscall wrappers may have syscalls symbols aliases prefixed
+		 * with "SyS" instead of "sys", leading to an unwanted
+		 * mismatch.
+		 */
+		if (start->name && !strcmp(start->name + 3, str + 3))
+			return start;
+	}
+	return NULL;
+}
+
+static struct syscall_metadata *syscall_nr_to_meta(int nr)
+{
+	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
+		return NULL;
+
+	return syscalls_metadata[nr];
+}
+
+int syscall_name_to_nr(char *name)
+{
+	int i;
+
+	if (!syscalls_metadata)
+		return -1;
+
+	for (i = 0; i < NR_syscalls; i++) {
+		if (syscalls_metadata[i]) {
+			if (!strcmp(syscalls_metadata[i]->name, name))
+				return i;
+		}
+	}
+	return -1;
+}
+
+void set_syscall_enter_id(int num, int id)
+{
+	syscalls_metadata[num]->enter_id = id;
+}
+
+void set_syscall_exit_id(int num, int id)
+{
+	syscalls_metadata[num]->exit_id = id;
+}
+
 enum print_line_t
 print_syscall_enter(struct trace_iterator *iter, int flags)
 {
@@ -375,6 +438,29 @@ struct trace_event event_syscall_exit = {
 	.trace			= print_syscall_exit,
 };
 
+int __init init_ftrace_syscalls(void)
+{
+	struct syscall_metadata *meta;
+	unsigned long addr;
+	int i;
+
+	syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
+					NR_syscalls, GFP_KERNEL);
+	if (!syscalls_metadata) {
+		WARN_ON(1);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < NR_syscalls; i++) {
+		addr = arch_syscall_addr(i);
+		meta = find_syscall_meta(addr);
+		syscalls_metadata[i] = meta;
+	}
+
+	return 0;
+}
+core_initcall(init_ftrace_syscalls);
+
 #ifdef CONFIG_EVENT_PROFILE
 
 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
-- 
cgit v1.2.3


From 43046b606673c9c991919ff75b980b72541e9ede Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 14 Oct 2009 09:16:42 -0700
Subject: workqueue: add 'flush_delayed_work()' to run and wait for delayed
 work

It basically turns a delayed work into an immediate work, and then waits
for it to finish.
---
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 7ef0c7b94f31..cf24c20de9e4 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -207,6 +207,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
+extern void flush_delayed_work(struct delayed_work *work);
 
 extern int schedule_work(struct work_struct *work);
 extern int schedule_work_on(int cpu, struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index addfe2df93b1..ccefe574dcf7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -639,6 +639,24 @@ int schedule_delayed_work(struct delayed_work *dwork,
 }
 EXPORT_SYMBOL(schedule_delayed_work);
 
+/**
+ * flush_delayed_work - block until a dwork_struct's callback has terminated
+ * @dwork: the delayed work which is to be flushed
+ *
+ * Any timeout is cancelled, and any pending work is run immediately.
+ */
+void flush_delayed_work(struct delayed_work *dwork)
+{
+	if (del_timer(&dwork->timer)) {
+		struct cpu_workqueue_struct *cwq;
+		cwq = wq_per_cpu(keventd_wq, get_cpu());
+		__queue_work(cwq, &dwork->work);
+		put_cpu();
+	}
+	flush_work(&dwork->work);
+}
+EXPORT_SYMBOL(flush_delayed_work);
+
 /**
  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
  * @cpu: cpu to use
-- 
cgit v1.2.3


From 6fb2915df7f0747d9044da9dbff5b46dc2e20830 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 15 Oct 2009 11:21:42 +0800
Subject: tracing/profile: Add filter support

- Add an ioctl to allocate a filter for a perf event.

- Free the filter when the associated perf event is to be freed.

- Do the filtering in perf_swevent_match().

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <4AD69546.8050401@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  11 ++-
 include/linux/perf_counter.h       |   1 +
 include/linux/perf_event.h         |   6 ++
 kernel/perf_event.c                |  80 ++++++++++++++++++++--
 kernel/trace/trace.h               |   3 +-
 kernel/trace/trace_events_filter.c | 133 +++++++++++++++++++++++++++++--------
 6 files changed, 199 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4ec5e67e18cf..d11770472bc8 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -144,7 +144,7 @@ extern char			*trace_profile_buf_nmi;
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
 
 extern void destroy_preds(struct ftrace_event_call *call);
-extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern int filter_match_preds(struct event_filter *filter, void *rec);
 extern int filter_current_check_discard(struct ring_buffer *buffer,
 					struct ftrace_event_call *call,
 					void *rec,
@@ -186,4 +186,13 @@ do {									\
 		__trace_printk(ip, fmt, ##args);			\
 } while (0)
 
+#ifdef CONFIG_EVENT_PROFILE
+struct perf_event;
+extern int ftrace_profile_enable(int event_id);
+extern void ftrace_profile_disable(int event_id);
+extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
+				     char *filter_str);
+extern void ftrace_profile_free_filter(struct perf_event *event);
+#endif
+
 #endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7b7fbf433cff..91a2b4309e7a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -225,6 +225,7 @@ struct perf_counter_attr {
 #define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
 #define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
 #define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
+#define PERF_COUNTER_IOC_SET_FILTER	_IOW('$', 6, char *)
 
 enum perf_counter_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2e6d95f97419..df9d964c15fc 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -221,6 +221,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_RESET		_IO ('$', 3)
 #define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, u64)
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
@@ -633,7 +634,12 @@ struct perf_event {
 
 	struct pid_namespace		*ns;
 	u64				id;
+
+#ifdef CONFIG_EVENT_PROFILE
+	struct event_filter		*filter;
 #endif
+
+#endif /* CONFIG_PERF_EVENTS */
 };
 
 /**
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 9d0b5c665883..12b5ec39bf97 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -28,6 +28,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
+#include <linux/ftrace_event.h>
 
 #include <asm/irq_regs.h>
 
@@ -1658,6 +1659,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
 	return ERR_PTR(err);
 }
 
+static void perf_event_free_filter(struct perf_event *event);
+
 static void free_event_rcu(struct rcu_head *head)
 {
 	struct perf_event *event;
@@ -1665,6 +1668,7 @@ static void free_event_rcu(struct rcu_head *head)
 	event = container_of(head, struct perf_event, rcu_head);
 	if (event->ns)
 		put_pid_ns(event->ns);
+	perf_event_free_filter(event);
 	kfree(event);
 }
 
@@ -1974,7 +1978,8 @@ unlock:
 	return ret;
 }
 
-int perf_event_set_output(struct perf_event *event, int output_fd);
+static int perf_event_set_output(struct perf_event *event, int output_fd);
+static int perf_event_set_filter(struct perf_event *event, void __user *arg);
 
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
@@ -2002,6 +2007,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case PERF_EVENT_IOC_SET_OUTPUT:
 		return perf_event_set_output(event, arg);
 
+	case PERF_EVENT_IOC_SET_FILTER:
+		return perf_event_set_filter(event, (void __user *)arg);
+
 	default:
 		return -ENOTTY;
 	}
@@ -3806,9 +3814,14 @@ static int perf_swevent_is_counting(struct perf_event *event)
 	return 1;
 }
 
+static int perf_tp_event_match(struct perf_event *event,
+				struct perf_sample_data *data);
+
 static int perf_swevent_match(struct perf_event *event,
 				enum perf_type_id type,
-				u32 event_id, struct pt_regs *regs)
+				u32 event_id,
+				struct perf_sample_data *data,
+				struct pt_regs *regs)
 {
 	if (!perf_swevent_is_counting(event))
 		return 0;
@@ -3826,6 +3839,10 @@ static int perf_swevent_match(struct perf_event *event,
 			return 0;
 	}
 
+	if (event->attr.type == PERF_TYPE_TRACEPOINT &&
+	    !perf_tp_event_match(event, data))
+		return 0;
+
 	return 1;
 }
 
@@ -3842,7 +3859,7 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-		if (perf_swevent_match(event, type, event_id, regs))
+		if (perf_swevent_match(event, type, event_id, data, regs))
 			perf_swevent_add(event, nr, nmi, data, regs);
 	}
 	rcu_read_unlock();
@@ -4086,6 +4103,7 @@ static const struct pmu perf_ops_task_clock = {
 };
 
 #ifdef CONFIG_EVENT_PROFILE
+
 void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 			  int entry_size)
 {
@@ -4109,8 +4127,15 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
 
-extern int ftrace_profile_enable(int);
-extern void ftrace_profile_disable(int);
+static int perf_tp_event_match(struct perf_event *event,
+				struct perf_sample_data *data)
+{
+	void *record = data->raw->data;
+
+	if (likely(!event->filter) || filter_match_preds(event->filter, record))
+		return 1;
+	return 0;
+}
 
 static void tp_perf_event_destroy(struct perf_event *event)
 {
@@ -4135,12 +4160,53 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
 
 	return &perf_ops_generic;
 }
+
+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
+{
+	char *filter_str;
+	int ret;
+
+	if (event->attr.type != PERF_TYPE_TRACEPOINT)
+		return -EINVAL;
+
+	filter_str = strndup_user(arg, PAGE_SIZE);
+	if (IS_ERR(filter_str))
+		return PTR_ERR(filter_str);
+
+	ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
+
+	kfree(filter_str);
+	return ret;
+}
+
+static void perf_event_free_filter(struct perf_event *event)
+{
+	ftrace_profile_free_filter(event);
+}
+
 #else
+
+static int perf_tp_event_match(struct perf_event *event,
+				struct perf_sample_data *data)
+{
+	return 1;
+}
+
 static const struct pmu *tp_perf_event_init(struct perf_event *event)
 {
 	return NULL;
 }
-#endif
+
+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
+{
+	return -ENOENT;
+}
+
+static void perf_event_free_filter(struct perf_event *event)
+{
+}
+
+#endif /* CONFIG_EVENT_PROFILE */
 
 atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
@@ -4394,7 +4460,7 @@ err_size:
 	goto out;
 }
 
-int perf_event_set_output(struct perf_event *event, int output_fd)
+static int perf_event_set_output(struct perf_event *event, int output_fd)
 {
 	struct perf_event *output_event = NULL;
 	struct file *output_file = NULL;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ffe53ddbe67a..4959ada9e0bb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -743,7 +743,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 		     struct ring_buffer *buffer,
 		     struct ring_buffer_event *event)
 {
-	if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
+	if (unlikely(call->filter_active) &&
+	    !filter_match_preds(call->filter, rec)) {
 		ring_buffer_discard_commit(buffer, event);
 		return 1;
 	}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 273845fce393..e27bb6acc2dd 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/mutex.h>
+#include <linux/perf_event.h>
 
 #include "trace.h"
 #include "trace_output.h"
@@ -363,9 +364,8 @@ static void filter_build_regex(struct filter_pred *pred)
 }
 
 /* return 1 if event matches, 0 otherwise (discard) */
-int filter_match_preds(struct ftrace_event_call *call, void *rec)
+int filter_match_preds(struct event_filter *filter, void *rec)
 {
-	struct event_filter *filter = call->filter;
 	int match, top = 0, val1 = 0, val2 = 0;
 	int stack[MAX_FILTER_PRED];
 	struct filter_pred *pred;
@@ -538,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call)
 		filter->preds[i]->fn = filter_pred_none;
 }
 
-void destroy_preds(struct ftrace_event_call *call)
+static void __free_preds(struct event_filter *filter)
 {
-	struct event_filter *filter = call->filter;
 	int i;
 
 	if (!filter)
@@ -553,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call)
 	kfree(filter->preds);
 	kfree(filter->filter_string);
 	kfree(filter);
+}
+
+void destroy_preds(struct ftrace_event_call *call)
+{
+	__free_preds(call->filter);
 	call->filter = NULL;
+	call->filter_active = 0;
 }
 
-static int init_preds(struct ftrace_event_call *call)
+static struct event_filter *__alloc_preds(void)
 {
 	struct event_filter *filter;
 	struct filter_pred *pred;
 	int i;
 
-	if (call->filter)
-		return 0;
-
-	filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-	if (!call->filter)
-		return -ENOMEM;
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return ERR_PTR(-ENOMEM);
 
 	filter->n_preds = 0;
 
@@ -583,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call)
 		filter->preds[i] = pred;
 	}
 
-	return 0;
+	return filter;
 
 oom:
-	destroy_preds(call);
+	__free_preds(filter);
+	return ERR_PTR(-ENOMEM);
+}
+
+static int init_preds(struct ftrace_event_call *call)
+{
+	if (call->filter)
+		return 0;
+
+	call->filter_active = 0;
+	call->filter = __alloc_preds();
+	if (IS_ERR(call->filter))
+		return PTR_ERR(call->filter);
 
-	return -ENOMEM;
+	return 0;
 }
 
 static int init_subsystem_preds(struct event_subsystem *system)
@@ -629,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
 
 static int filter_add_pred_fn(struct filter_parse_state *ps,
 			      struct ftrace_event_call *call,
+			      struct event_filter *filter,
 			      struct filter_pred *pred,
 			      filter_pred_fn_t fn)
 {
-	struct event_filter *filter = call->filter;
 	int idx, err;
 
 	if (filter->n_preds == MAX_FILTER_PRED) {
@@ -647,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
 		return err;
 
 	filter->n_preds++;
-	call->filter_active = 1;
 
 	return 0;
 }
@@ -726,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
 
 static int filter_add_pred(struct filter_parse_state *ps,
 			   struct ftrace_event_call *call,
+			   struct event_filter *filter,
 			   struct filter_pred *pred,
 			   bool dry_run)
 {
@@ -795,7 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
 
 add_pred_fn:
 	if (!dry_run)
-		return filter_add_pred_fn(ps, call, pred, fn);
+		return filter_add_pred_fn(ps, call, filter, pred, fn);
 	return 0;
 }
 
@@ -1154,6 +1168,7 @@ static int check_preds(struct filter_parse_state *ps)
 }
 
 static int replace_preds(struct ftrace_event_call *call,
+			 struct event_filter *filter,
 			 struct filter_parse_state *ps,
 			 char *filter_string,
 			 bool dry_run)
@@ -1200,7 +1215,7 @@ static int replace_preds(struct ftrace_event_call *call,
 add_pred:
 		if (!pred)
 			return -ENOMEM;
-		err = filter_add_pred(ps, call, pred, dry_run);
+		err = filter_add_pred(ps, call, filter, pred, dry_run);
 		filter_free_pred(pred);
 		if (err)
 			return err;
@@ -1216,6 +1231,7 @@ static int replace_system_preds(struct event_subsystem *system,
 				char *filter_string)
 {
 	struct ftrace_event_call *call;
+	struct event_filter *filter;
 	int err;
 	bool fail = true;
 
@@ -1228,17 +1244,19 @@ static int replace_system_preds(struct event_subsystem *system,
 			continue;
 
 		/* try to see if the filter can be applied */
-		err = replace_preds(call, ps, filter_string, true);
+		err = replace_preds(call, filter, ps, filter_string, true);
 		if (err)
 			continue;
 
 		/* really apply the filter */
 		filter_disable_preds(call);
-		err = replace_preds(call, ps, filter_string, false);
+		err = replace_preds(call, filter, ps, filter_string, false);
 		if (err)
 			filter_disable_preds(call);
-		else
-			replace_filter_string(call->filter, filter_string);
+		else {
+			call->filter_active = 1;
+			replace_filter_string(filter, filter_string);
+		}
 		fail = false;
 	}
 
@@ -1252,7 +1270,6 @@ static int replace_system_preds(struct event_subsystem *system,
 int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 {
 	int err;
-
 	struct filter_parse_state *ps;
 
 	mutex_lock(&event_mutex);
@@ -1283,10 +1300,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 		goto out;
 	}
 
-	err = replace_preds(call, ps, filter_string, false);
+	err = replace_preds(call, call->filter, ps, filter_string, false);
 	if (err)
 		append_filter_err(ps, call->filter);
-
+	else
+		call->filter_active = 1;
 out:
 	filter_opstack_clear(ps);
 	postfix_clear(ps);
@@ -1301,7 +1319,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 				 char *filter_string)
 {
 	int err;
-
 	struct filter_parse_state *ps;
 
 	mutex_lock(&event_mutex);
@@ -1345,3 +1362,67 @@ out_unlock:
 	return err;
 }
 
+#ifdef CONFIG_EVENT_PROFILE
+
+void ftrace_profile_free_filter(struct perf_event *event)
+{
+	struct event_filter *filter = event->filter;
+
+	event->filter = NULL;
+	__free_preds(filter);
+}
+
+int ftrace_profile_set_filter(struct perf_event *event, int event_id,
+			      char *filter_str)
+{
+	int err;
+	struct event_filter *filter;
+	struct filter_parse_state *ps;
+	struct ftrace_event_call *call = NULL;
+
+	mutex_lock(&event_mutex);
+
+	list_for_each_entry(call, &ftrace_events, list) {
+		if (call->id == event_id)
+			break;
+	}
+	if (!call)
+		return -EINVAL;
+
+	if (event->filter)
+		return -EEXIST;
+
+	filter = __alloc_preds();
+	if (IS_ERR(filter))
+		return PTR_ERR(filter);
+
+	err = -ENOMEM;
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		goto free_preds;
+
+	parse_init(ps, filter_ops, filter_str);
+	err = filter_parse(ps);
+	if (err)
+		goto free_ps;
+
+	err = replace_preds(call, filter, ps, filter_str, false);
+	if (!err)
+		event->filter = filter;
+
+free_ps:
+	filter_opstack_clear(ps);
+	postfix_clear(ps);
+	kfree(ps);
+
+free_preds:
+	if (err)
+		__free_preds(filter);
+
+	mutex_unlock(&event_mutex);
+
+	return err;
+}
+
+#endif /* CONFIG_EVENT_PROFILE */
+
-- 
cgit v1.2.3


From 434a83c3fbb951908a3a52040f7f0e0b8ba00dd0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 15 Oct 2009 11:50:39 +0200
Subject: events: Harmonize event field names and print output names

Now that we can filter based on fields via perf record, people
will start using filter expressions and will expect them to
be obvious.

The primary way to see which fields are available is by looking
at the trace output, such as:

  gcc-18676 [000]   343.011728: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.012727: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.032692: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.033690: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.034687: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.035686: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.036684: irq_handler_entry: irq=0 handler=timer

While 'irq==0' filters work, the 'handler==<x>' filter expression
does not work:

  $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1
   Error: failed to set filter with 22 (Invalid argument)

The problem is that while an 'irq' field exists and is recognized
as a filter field - 'handler' does not exist - its name is 'name'
in the output.

To solve this, we need to synchronize the printout and the field
names, wherever possible.

In cases where the printout prints a non-field, we enclose
that information in square brackets, such as:

  perf-1380  [013]   724.903505: softirq_exit: vec=9 [action=RCU]
  perf-1380  [013]   724.904482: softirq_exit: vec=1 [action=TIMER]

This way users can use filter expressions more intuitively: all
fields that show up as 'primary' (non-bracketed) information is
filterable.

This patch harmonizes the field names for all irq, bkl, power,
sched and timer events.

We might in fact think about dropping the print format bit of
generic tracepoints altogether, and just print the fields that
are being recorded.

Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/bkl.h   | 18 +++++-----
 include/trace/events/irq.h   |  8 ++---
 include/trace/events/power.h |  2 --
 include/trace/events/sched.h | 44 ++++++++++++------------
 include/trace/events/timer.h | 79 ++++++++++++++++++++++----------------------
 5 files changed, 74 insertions(+), 77 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h
index 8abd620a490e..1af72dc24278 100644
--- a/include/trace/events/bkl.h
+++ b/include/trace/events/bkl.h
@@ -13,7 +13,7 @@ TRACE_EVENT(lock_kernel,
 	TP_ARGS(func, file, line),
 
 	TP_STRUCT__entry(
-		__field(	int,		lock_depth		)
+		__field(	int,		depth			)
 		__field_ext(	const char *,	func, FILTER_PTR_STRING	)
 		__field_ext(	const char *,	file, FILTER_PTR_STRING	)
 		__field(	int,		line			)
@@ -21,13 +21,13 @@ TRACE_EVENT(lock_kernel,
 
 	TP_fast_assign(
 		/* We want to record the lock_depth after lock is acquired */
-		__entry->lock_depth = current->lock_depth + 1;
+		__entry->depth = current->lock_depth + 1;
 		__entry->func = func;
 		__entry->file = file;
 		__entry->line = line;
 	),
 
-	TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth,
+	TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth,
 		  __entry->file, __entry->line, __entry->func)
 );
 
@@ -38,20 +38,20 @@ TRACE_EVENT(unlock_kernel,
 	TP_ARGS(func, file, line),
 
 	TP_STRUCT__entry(
-		__field(int,		lock_depth)
-		__field(const char *,	func)
-		__field(const char *,	file)
-		__field(int,		line)
+		__field(int,		depth		)
+		__field(const char *,	func		)
+		__field(const char *,	file		)
+		__field(int,		line		)
 	),
 
 	TP_fast_assign(
-		__entry->lock_depth = current->lock_depth;
+		__entry->depth = current->lock_depth;
 		__entry->func = func;
 		__entry->file = file;
 		__entry->line = line;
 	),
 
-	TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth,
+	TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth,
 		  __entry->file, __entry->line, __entry->func)
 );
 
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index b89f9db4a404..dcfcd4407623 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -48,7 +48,7 @@ TRACE_EVENT(irq_handler_entry,
 		__assign_str(name, action->name);
 	),
 
-	TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name))
+	TP_printk("irq=%d name=%s", __entry->irq, __get_str(name))
 );
 
 /**
@@ -78,7 +78,7 @@ TRACE_EVENT(irq_handler_exit,
 		__entry->ret	= ret;
 	),
 
-	TP_printk("irq=%d return=%s",
+	TP_printk("irq=%d ret=%s",
 		  __entry->irq, __entry->ret ? "handled" : "unhandled")
 );
 
@@ -107,7 +107,7 @@ TRACE_EVENT(softirq_entry,
 		__entry->vec = (int)(h - vec);
 	),
 
-	TP_printk("softirq=%d action=%s", __entry->vec,
+	TP_printk("vec=%d [action=%s]", __entry->vec,
 		  show_softirq_name(__entry->vec))
 );
 
@@ -136,7 +136,7 @@ TRACE_EVENT(softirq_exit,
 		__entry->vec = (int)(h - vec);
 	),
 
-	TP_printk("softirq=%d action=%s", __entry->vec,
+	TP_printk("vec=%d [action=%s]", __entry->vec,
 		  show_softirq_name(__entry->vec))
 );
 
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index ea6d579261ad..9bb96e5a2848 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -16,8 +16,6 @@ enum {
 };
 #endif
 
-
-
 TRACE_EVENT(power_start,
 
 	TP_PROTO(unsigned int type, unsigned int state),
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 4069c43f4187..b50b9856c59f 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -26,7 +26,7 @@ TRACE_EVENT(sched_kthread_stop,
 		__entry->pid	= t->pid;
 	),
 
-	TP_printk("task %s:%d", __entry->comm, __entry->pid)
+	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
 );
 
 /*
@@ -46,7 +46,7 @@ TRACE_EVENT(sched_kthread_stop_ret,
 		__entry->ret	= ret;
 	),
 
-	TP_printk("ret %d", __entry->ret)
+	TP_printk("ret=%d", __entry->ret)
 );
 
 /*
@@ -73,7 +73,7 @@ TRACE_EVENT(sched_wait_task,
 		__entry->prio	= p->prio;
 	),
 
-	TP_printk("task %s:%d [%d]",
+	TP_printk("comm=%s pid=%d prio=%d",
 		  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -94,7 +94,7 @@ TRACE_EVENT(sched_wakeup,
 		__field(	pid_t,	pid			)
 		__field(	int,	prio			)
 		__field(	int,	success			)
-		__field(	int,	cpu			)
+		__field(	int,	target_cpu		)
 	),
 
 	TP_fast_assign(
@@ -102,12 +102,12 @@ TRACE_EVENT(sched_wakeup,
 		__entry->pid		= p->pid;
 		__entry->prio		= p->prio;
 		__entry->success	= success;
-		__entry->cpu		= task_cpu(p);
+		__entry->target_cpu	= task_cpu(p);
 	),
 
-	TP_printk("task %s:%d [%d] success=%d [%03d]",
+	TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
 		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success, __entry->cpu)
+		  __entry->success, __entry->target_cpu)
 );
 
 /*
@@ -127,7 +127,7 @@ TRACE_EVENT(sched_wakeup_new,
 		__field(	pid_t,	pid			)
 		__field(	int,	prio			)
 		__field(	int,	success			)
-		__field(	int,	cpu			)
+		__field(	int,	target_cpu		)
 	),
 
 	TP_fast_assign(
@@ -135,12 +135,12 @@ TRACE_EVENT(sched_wakeup_new,
 		__entry->pid		= p->pid;
 		__entry->prio		= p->prio;
 		__entry->success	= success;
-		__entry->cpu		= task_cpu(p);
+		__entry->target_cpu	= task_cpu(p);
 	),
 
-	TP_printk("task %s:%d [%d] success=%d [%03d]",
+	TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
 		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success, __entry->cpu)
+		  __entry->success, __entry->target_cpu)
 );
 
 /*
@@ -176,7 +176,7 @@ TRACE_EVENT(sched_switch,
 		__entry->next_prio	= next->prio;
 	),
 
-	TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]",
+	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
 		__entry->prev_state ?
 		  __print_flags(__entry->prev_state, "|",
@@ -211,7 +211,7 @@ TRACE_EVENT(sched_migrate_task,
 		__entry->dest_cpu	= dest_cpu;
 	),
 
-	TP_printk("task %s:%d [%d] from: %d  to: %d",
+	TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
 		  __entry->comm, __entry->pid, __entry->prio,
 		  __entry->orig_cpu, __entry->dest_cpu)
 );
@@ -237,7 +237,7 @@ TRACE_EVENT(sched_process_free,
 		__entry->prio		= p->prio;
 	),
 
-	TP_printk("task %s:%d [%d]",
+	TP_printk("comm=%s pid=%d prio=%d",
 		  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -262,7 +262,7 @@ TRACE_EVENT(sched_process_exit,
 		__entry->prio		= p->prio;
 	),
 
-	TP_printk("task %s:%d [%d]",
+	TP_printk("comm=%s pid=%d prio=%d",
 		  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -287,7 +287,7 @@ TRACE_EVENT(sched_process_wait,
 		__entry->prio		= current->prio;
 	),
 
-	TP_printk("task %s:%d [%d]",
+	TP_printk("comm=%s pid=%d prio=%d",
 		  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -314,7 +314,7 @@ TRACE_EVENT(sched_process_fork,
 		__entry->child_pid	= child->pid;
 	),
 
-	TP_printk("parent %s:%d  child %s:%d",
+	TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
 		__entry->parent_comm, __entry->parent_pid,
 		__entry->child_comm, __entry->child_pid)
 );
@@ -340,7 +340,7 @@ TRACE_EVENT(sched_signal_send,
 		__entry->sig	= sig;
 	),
 
-	TP_printk("sig: %d  task %s:%d",
+	TP_printk("sig=%d comm=%s pid=%d",
 		  __entry->sig, __entry->comm, __entry->pid)
 );
 
@@ -374,7 +374,7 @@ TRACE_EVENT(sched_stat_wait,
 		__perf_count(delay);
 	),
 
-	TP_printk("task: %s:%d wait: %Lu [ns]",
+	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
 			__entry->comm, __entry->pid,
 			(unsigned long long)__entry->delay)
 );
@@ -406,7 +406,7 @@ TRACE_EVENT(sched_stat_runtime,
 		__perf_count(runtime);
 	),
 
-	TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]",
+	TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
 			__entry->comm, __entry->pid,
 			(unsigned long long)__entry->runtime,
 			(unsigned long long)__entry->vruntime)
@@ -437,7 +437,7 @@ TRACE_EVENT(sched_stat_sleep,
 		__perf_count(delay);
 	),
 
-	TP_printk("task: %s:%d sleep: %Lu [ns]",
+	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
 			__entry->comm, __entry->pid,
 			(unsigned long long)__entry->delay)
 );
@@ -467,7 +467,7 @@ TRACE_EVENT(sched_stat_iowait,
 		__perf_count(delay);
 	),
 
-	TP_printk("task: %s:%d iowait: %Lu [ns]",
+	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
 			__entry->comm, __entry->pid,
 			(unsigned long long)__entry->delay)
 );
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 1844c48d640e..e5ce87a0498d 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -26,7 +26,7 @@ TRACE_EVENT(timer_init,
 		__entry->timer	= timer;
 	),
 
-	TP_printk("timer %p", __entry->timer)
+	TP_printk("timer=%p", __entry->timer)
 );
 
 /**
@@ -54,7 +54,7 @@ TRACE_EVENT(timer_start,
 		__entry->now		= jiffies;
 	),
 
-	TP_printk("timer %p: func %pf, expires %lu, timeout %ld",
+	TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]",
 		  __entry->timer, __entry->function, __entry->expires,
 		  (long)__entry->expires - __entry->now)
 );
@@ -81,7 +81,7 @@ TRACE_EVENT(timer_expire_entry,
 		__entry->now		= jiffies;
 	),
 
-	TP_printk("timer %p: now %lu", __entry->timer, __entry->now)
+	TP_printk("timer=%p now=%lu", __entry->timer, __entry->now)
 );
 
 /**
@@ -108,7 +108,7 @@ TRACE_EVENT(timer_expire_exit,
 		__entry->timer	= timer;
 	),
 
-	TP_printk("timer %p", __entry->timer)
+	TP_printk("timer=%p", __entry->timer)
 );
 
 /**
@@ -129,7 +129,7 @@ TRACE_EVENT(timer_cancel,
 		__entry->timer	= timer;
 	),
 
-	TP_printk("timer %p", __entry->timer)
+	TP_printk("timer=%p", __entry->timer)
 );
 
 /**
@@ -140,24 +140,24 @@ TRACE_EVENT(timer_cancel,
  */
 TRACE_EVENT(hrtimer_init,
 
-	TP_PROTO(struct hrtimer *timer, clockid_t clockid,
+	TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid,
 		 enum hrtimer_mode mode),
 
-	TP_ARGS(timer, clockid, mode),
+	TP_ARGS(hrtimer, clockid, mode),
 
 	TP_STRUCT__entry(
-		__field( void *,		timer		)
+		__field( void *,		hrtimer		)
 		__field( clockid_t,		clockid		)
 		__field( enum hrtimer_mode,	mode		)
 	),
 
 	TP_fast_assign(
-		__entry->timer		= timer;
+		__entry->hrtimer	= hrtimer;
 		__entry->clockid	= clockid;
 		__entry->mode		= mode;
 	),
 
-	TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer,
+	TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer,
 		  __entry->clockid == CLOCK_REALTIME ?
 			"CLOCK_REALTIME" : "CLOCK_MONOTONIC",
 		  __entry->mode == HRTIMER_MODE_ABS ?
@@ -170,26 +170,26 @@ TRACE_EVENT(hrtimer_init,
  */
 TRACE_EVENT(hrtimer_start,
 
-	TP_PROTO(struct hrtimer *timer),
+	TP_PROTO(struct hrtimer *hrtimer),
 
-	TP_ARGS(timer),
+	TP_ARGS(hrtimer),
 
 	TP_STRUCT__entry(
-		__field( void *,	timer		)
+		__field( void *,	hrtimer		)
 		__field( void *,	function	)
 		__field( s64,		expires		)
 		__field( s64,		softexpires	)
 	),
 
 	TP_fast_assign(
-		__entry->timer		= timer;
-		__entry->function	= timer->function;
-		__entry->expires	= hrtimer_get_expires(timer).tv64;
-		__entry->softexpires	= hrtimer_get_softexpires(timer).tv64;
+		__entry->hrtimer	= hrtimer;
+		__entry->function	= hrtimer->function;
+		__entry->expires	= hrtimer_get_expires(hrtimer).tv64;
+		__entry->softexpires	= hrtimer_get_softexpires(hrtimer).tv64;
 	),
 
-	TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu",
-		  __entry->timer, __entry->function,
+	TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu",
+		  __entry->hrtimer, __entry->function,
 		  (unsigned long long)ktime_to_ns((ktime_t) {
 				  .tv64 = __entry->expires }),
 		  (unsigned long long)ktime_to_ns((ktime_t) {
@@ -206,23 +206,22 @@ TRACE_EVENT(hrtimer_start,
  */
 TRACE_EVENT(hrtimer_expire_entry,
 
-	TP_PROTO(struct hrtimer *timer, ktime_t *now),
+	TP_PROTO(struct hrtimer *hrtimer, ktime_t *now),
 
-	TP_ARGS(timer, now),
+	TP_ARGS(hrtimer, now),
 
 	TP_STRUCT__entry(
-		__field( void *,	timer	)
+		__field( void *,	hrtimer	)
 		__field( s64,		now	)
 	),
 
 	TP_fast_assign(
-		__entry->timer	= timer;
-		__entry->now	= now->tv64;
+		__entry->hrtimer	= hrtimer;
+		__entry->now		= now->tv64;
 	),
 
-	TP_printk("hrtimer %p, now %llu", __entry->timer,
-		  (unsigned long long)ktime_to_ns((ktime_t) {
-				  .tv64 = __entry->now }))
+	TP_printk("hrtimer=%p now=%llu", __entry->hrtimer,
+		  (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
  );
 
 /**
@@ -234,40 +233,40 @@ TRACE_EVENT(hrtimer_expire_entry,
  */
 TRACE_EVENT(hrtimer_expire_exit,
 
-	TP_PROTO(struct hrtimer *timer),
+	TP_PROTO(struct hrtimer *hrtimer),
 
-	TP_ARGS(timer),
+	TP_ARGS(hrtimer),
 
 	TP_STRUCT__entry(
-		__field( void *,	timer	)
+		__field( void *,	hrtimer	)
 	),
 
 	TP_fast_assign(
-		__entry->timer	= timer;
+		__entry->hrtimer	= hrtimer;
 	),
 
-	TP_printk("hrtimer %p", __entry->timer)
+	TP_printk("hrtimer=%p", __entry->hrtimer)
 );
 
 /**
  * hrtimer_cancel - called when the hrtimer is canceled
- * @timer:	pointer to struct hrtimer
+ * @hrtimer:	pointer to struct hrtimer
  */
 TRACE_EVENT(hrtimer_cancel,
 
-	TP_PROTO(struct hrtimer *timer),
+	TP_PROTO(struct hrtimer *hrtimer),
 
-	TP_ARGS(timer),
+	TP_ARGS(hrtimer),
 
 	TP_STRUCT__entry(
-		__field( void *,	timer	)
+		__field( void *,	hrtimer	)
 	),
 
 	TP_fast_assign(
-		__entry->timer	= timer;
+		__entry->hrtimer	= hrtimer;
 	),
 
-	TP_printk("hrtimer %p", __entry->timer)
+	TP_printk("hrtimer=%p", __entry->hrtimer)
 );
 
 /**
@@ -302,7 +301,7 @@ TRACE_EVENT(itimer_state,
 		__entry->interval_usec	= value->it_interval.tv_usec;
 	),
 
-	TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu",
+	TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu",
 		  __entry->which, __entry->expires,
 		  __entry->value_sec, __entry->value_usec,
 		  __entry->interval_sec, __entry->interval_usec)
@@ -332,7 +331,7 @@ TRACE_EVENT(itimer_expire,
 		__entry->pid	= pid_nr(pid);
 	),
 
-	    TP_printk("which %d, pid %d, now %lu", __entry->which,
+	    TP_printk("which=%d pid=%d now=%lu", __entry->which,
 		      (int) __entry->pid, __entry->now)
 );
 
-- 
cgit v1.2.3