From 099f53cb50e45ef617a9f1d63ceec799e489418b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 8 Apr 2009 14:28:37 -0700 Subject: async_tx: rename zero_sum to val 'zero_sum' does not properly describe the operation of generating parity and checking that it validates against an existing buffer. Change the name of the operation to 'val' (for 'validate'). This is in anticipation of the p+q case where it is a requirement to identify the target parity buffers separately from the source buffers, because the target parity buffers will not have corresponding pq coefficients. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/async_tx.h | 2 +- include/linux/dmaengine.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 5fc2ef8d97fa..513150d8c25b 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -117,7 +117,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, dma_async_tx_callback cb_fn, void *cb_fn_param); struct dma_async_tx_descriptor * -async_xor_zero_sum(struct page *dest, struct page **src_list, +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, u32 *result, enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 2e2aa3df170c..6768727d00d7 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -55,8 +55,8 @@ enum dma_transaction_type { DMA_PQ_XOR, DMA_DUAL_XOR, DMA_PQ_UPDATE, - DMA_ZERO_SUM, - DMA_PQ_ZERO_SUM, + DMA_XOR_VAL, + DMA_PQ_VAL, DMA_MEMSET, DMA_MEMCPY_CRC32C, DMA_INTERRUPT, @@ -214,7 +214,7 @@ struct dma_async_tx_descriptor { * @device_free_chan_resources: release DMA channel's resources * @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_xor: prepares a xor operation - * @device_prep_dma_zero_sum: prepares a zero_sum operation + * @device_prep_dma_xor_val: prepares a xor validation operation * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation @@ -243,7 +243,7 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_xor)( struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags); - struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( + struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, size_t len, u32 *result, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( -- cgit v1.2.3 From 88ba2aa586c874681c072101287e15d40de7e6e2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 9 Apr 2009 16:16:18 -0700 Subject: async_tx: kill ASYNC_TX_DEP_ACK flag In support of inter-channel chaining async_tx utilizes an ack flag to gate whether a dependent operation can be chained to another. While the flag is not set the chain can be considered open for appending. Setting the ack flag closes the chain and flags the descriptor for garbage collection. The ASYNC_TX_DEP_ACK flag essentially means "close the chain after adding this dependency". Since each operation can only have one child the api now implicitly sets the ack flag at dependency submission time. This removes an unnecessary management burden from clients of the api. [ Impact: clean up and enforce one dependency per operation ] Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 9 ++++----- crypto/async_tx/async_memcpy.c | 2 +- crypto/async_tx/async_memset.c | 2 +- crypto/async_tx/async_tx.c | 4 ++-- crypto/async_tx/async_xor.c | 6 ++---- drivers/md/raid5.c | 25 +++++++++++-------------- include/linux/async_tx.h | 4 +--- 7 files changed, 22 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 4af12180d191..76feda8541dc 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -80,8 +80,8 @@ acknowledged by the application before the offload engine driver is allowed to recycle (or free) the descriptor. A descriptor can be acked by one of the following methods: 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted -2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent - descriptor of a new operation. +2/ submitting an unacknowledged descriptor as a dependency to another + async_tx call will implicitly set the acknowledged state. 3/ calling async_tx_ack() on the descriptor. 3.4 When does the operation execute? @@ -136,10 +136,9 @@ int run_xor_copy_xor(struct page **xor_srcs, tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); - tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, - ASYNC_TX_DEP_ACK, tx, NULL, NULL); + tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, tx, NULL, NULL); tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, - ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, + ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx, complete_xor_copy_xor, NULL); async_tx_issue_pending_all(); diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index ddccfb01c416..7117ec6f1b74 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -35,7 +35,7 @@ * @src: src page * @offset: offset in pages to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, + * @flags: ASYNC_TX_ACK * @depend_tx: memcpy depends on the result of this transaction * @cb_fn: function to call when the memcpy completes * @cb_param: parameter to pass to the callback routine diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index 5b5eb99bb244..b2f133885b7f 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -35,7 +35,7 @@ * @val: fill value * @offset: offset in pages to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @flags: ASYNC_TX_ACK * @depend_tx: memset depends on the result of this transaction * @cb_fn: function to call when the memcpy completes * @cb_param: parameter to pass to the callback routine diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 06eb6cc09fef..3766bc3d7d89 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -223,7 +223,7 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, if (flags & ASYNC_TX_ACK) async_tx_ack(tx); - if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) + if (depend_tx) async_tx_ack(depend_tx); } EXPORT_SYMBOL_GPL(async_tx_submit); @@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(async_tx_submit); /** * async_trigger_callback - schedules the callback function to be run after * any dependent operations have been completed. - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @flags: ASYNC_TX_ACK * @depend_tx: 'callback' requires the completion of this transaction * @cb_fn: function to call after depend_tx completes * @cb_param: parameter to pass to the callback routine diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index e0580b0ea533..3cc5dc763b54 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -105,7 +105,6 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, _cb_param); depend_tx = tx; - flags |= ASYNC_TX_DEP_ACK; if (src_cnt > xor_src_cnt) { /* drop completed sources */ @@ -168,8 +167,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, * @offset: offset in pages to start transaction * @src_cnt: number of source pages * @len: length in bytes - * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, - * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, ASYNC_TX_ACK * @depend_tx: xor depends on the result of this transaction. * @cb_fn: function to call when the xor completes * @cb_param: parameter to pass to the callback routine @@ -230,7 +228,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) * @src_cnt: number of source pages * @len: length in bytes * @result: 0 if sum == 0 else non-zero - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @flags: ASYNC_TX_ACK * @depend_tx: xor depends on the result of this transaction. * @cb_fn: function to call when the xor completes * @cb_param: parameter to pass to the callback routine diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f8d2d35ed298..0ef5362c8d02 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -525,14 +525,12 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, bio_page = bio_iovec_idx(bio, i)->bv_page; if (frombio) tx = async_memcpy(page, bio_page, page_offset, - b_offset, clen, - ASYNC_TX_DEP_ACK, - tx, NULL, NULL); + b_offset, clen, 0, + tx, NULL, NULL); else tx = async_memcpy(bio_page, page, b_offset, - page_offset, clen, - ASYNC_TX_DEP_ACK, - tx, NULL, NULL); + page_offset, clen, 0, + tx, NULL, NULL); } if (clen < len) /* hit end of page */ break; @@ -615,8 +613,7 @@ static void ops_run_biofill(struct stripe_head *sh) } atomic_inc(&sh->count); - async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, - ops_complete_biofill, sh); + async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_biofill, sh); } static void ops_complete_compute5(void *stripe_head_ref) @@ -701,8 +698,8 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) } tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, - ops_complete_prexor, sh); + ASYNC_TX_XOR_DROP_DST, tx, + ops_complete_prexor, sh); return tx; } @@ -809,7 +806,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST * for the synchronous xor case */ - flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | + flags = ASYNC_TX_ACK | (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); atomic_inc(&sh->count); @@ -858,7 +855,7 @@ static void ops_run_check(struct stripe_head *sh) &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); atomic_inc(&sh->count); - tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, + tx = async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_check, sh); } @@ -2687,8 +2684,8 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, /* place all the copies on one channel */ tx = async_memcpy(sh2->dev[dd_idx].page, - sh->dev[i].page, 0, 0, STRIPE_SIZE, - ASYNC_TX_DEP_ACK, tx, NULL, NULL); + sh->dev[i].page, 0, 0, STRIPE_SIZE, + 0, tx, NULL, NULL); set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 513150d8c25b..9f14cd540cd2 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -58,13 +58,11 @@ struct dma_chan_ref { * array. * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a * dependency chain - * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. */ enum async_tx_flags { ASYNC_TX_XOR_ZERO_DST = (1 << 0), ASYNC_TX_XOR_DROP_DST = (1 << 1), - ASYNC_TX_ACK = (1 << 3), - ASYNC_TX_DEP_ACK = (1 << 4), + ASYNC_TX_ACK = (1 << 2), }; #ifdef CONFIG_DMA_ENGINE -- cgit v1.2.3 From a08abd8ca890a377521d65d493d174bebcaf694b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 3 Jun 2009 11:43:59 -0700 Subject: async_tx: structify submission arguments, add scribble Prepare the api for the arrival of a new parameter, 'scribble'. This will allow callers to identify scratchpad memory for dma address or page address conversions. As this adds yet another parameter, take this opportunity to convert the common submission parameters (flags, dependency, callback, and callback argument) into an object that is passed by reference. Also, take this opportunity to fix up the kerneldoc and add notes about the relevant ASYNC_TX_* flags for each routine. [ Impact: moves api pass-by-value parameters to a pass-by-reference struct ] Signed-off-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 6 +- crypto/async_tx/async_memcpy.c | 26 +++---- crypto/async_tx/async_memset.c | 25 +++---- crypto/async_tx/async_tx.c | 51 +++++++------- crypto/async_tx/async_xor.c | 123 +++++++++++++++++----------------- drivers/md/raid5.c | 59 +++++++++------- include/linux/async_tx.h | 84 ++++++++++++++--------- 7 files changed, 200 insertions(+), 174 deletions(-) (limited to 'include') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 76feda8541dc..dfe0475f7919 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -54,11 +54,7 @@ features surfaced as a result: 3.1 General format of the API: struct dma_async_tx_descriptor * -async_(, - enum async_tx_flags flags, - struct dma_async_tx_descriptor *dependency, - dma_async_tx_callback callback_routine, - void *callback_parameter); +async_(, struct async_submit ctl *submit) 3.2 Supported operations: memcpy - memory copy between a source and a destination buffer diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index 7117ec6f1b74..89e05556f3df 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -33,28 +33,28 @@ * async_memcpy - attempt to copy memory with a dma engine. * @dest: destination page * @src: src page - * @offset: offset in pages to start transaction + * @dest_offset: offset into 'dest' to start transaction + * @src_offset: offset into 'src' to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK - * @depend_tx: memcpy depends on the result of this transaction - * @cb_fn: function to call when the memcpy completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK */ struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, - unsigned int src_offset, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + unsigned int src_offset, size_t len, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY, &dest, 1, &src, 1, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; if (device) { dma_addr_t dma_dest, dma_src; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + unsigned long dma_prep_flags; + dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; dma_dest = dma_map_page(device->dev, dest, dest_offset, len, DMA_FROM_DEVICE); @@ -67,13 +67,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { void *dest_buf, *src_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; src_buf = kmap_atomic(src, KM_USER1) + src_offset; @@ -83,7 +83,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, kunmap_atomic(dest_buf, KM_USER0); kunmap_atomic(src_buf, KM_USER1); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index b2f133885b7f..c14437238f4c 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -35,26 +35,23 @@ * @val: fill value * @offset: offset in pages to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK - * @depend_tx: memset depends on the result of this transaction - * @cb_fn: function to call when the memcpy completes - * @cb_param: parameter to pass to the callback routine + * + * honored flags: ASYNC_TX_ACK */ struct dma_async_tx_descriptor * -async_memset(struct page *dest, int val, unsigned int offset, - size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_memset(struct page *dest, int val, unsigned int offset, size_t len, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET, &dest, 1, NULL, 0, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; if (device) { dma_addr_t dma_dest; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + unsigned long dma_prep_flags; + dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; dma_dest = dma_map_page(device->dev, dest, offset, len, DMA_FROM_DEVICE); @@ -64,19 +61,19 @@ async_memset(struct page *dest, int val, unsigned int offset, if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { /* run the memset synchronously */ void *dest_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); - dest_buf = (void *) (((char *) page_address(dest)) + offset); + dest_buf = page_address(dest) + offset; /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); memset(dest_buf, val, len); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 3766bc3d7d89..802a5ce437d9 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -45,13 +45,15 @@ static void __exit async_tx_exit(void) /** * __async_tx_find_channel - find a channel to carry out the operation or let * the transaction execute synchronously - * @depend_tx: transaction dependency + * @submit: transaction dependency and submission modifiers * @tx_type: transaction type */ struct dma_chan * -__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type) +__async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type) { + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; + /* see if we can keep the chain on one channel */ if (depend_tx && dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) @@ -144,13 +146,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, /** - * submit_disposition - while holding depend_tx->lock we must avoid submitting - * new operations to prevent a circular locking dependency with - * drivers that already hold a channel lock when calling - * async_tx_run_dependencies. + * submit_disposition - flags for routing an incoming operation * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly + * + * while holding depend_tx->lock we must avoid submitting new operations + * to prevent a circular locking dependency with drivers that already + * hold a channel lock when calling async_tx_run_dependencies. */ enum submit_disposition { ASYNC_TX_SUBMITTED, @@ -160,11 +163,12 @@ enum submit_disposition { void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, - enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + struct async_submit_ctl *submit) { - tx->callback = cb_fn; - tx->callback_param = cb_param; + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; + + tx->callback = submit->cb_fn; + tx->callback_param = submit->cb_param; if (depend_tx) { enum submit_disposition s; @@ -220,7 +224,7 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, tx->tx_submit(tx); } - if (flags & ASYNC_TX_ACK) + if (submit->flags & ASYNC_TX_ACK) async_tx_ack(tx); if (depend_tx) @@ -229,21 +233,20 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, EXPORT_SYMBOL_GPL(async_tx_submit); /** - * async_trigger_callback - schedules the callback function to be run after - * any dependent operations have been completed. - * @flags: ASYNC_TX_ACK - * @depend_tx: 'callback' requires the completion of this transaction - * @cb_fn: function to call after depend_tx completes - * @cb_param: parameter to pass to the callback routine + * async_trigger_callback - schedules the callback function to be run + * @submit: submission and completion parameters + * + * honored flags: ASYNC_TX_ACK + * + * The callback is run after any dependent operations have completed. */ struct dma_async_tx_descriptor * -async_trigger_callback(enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_trigger_callback(struct async_submit_ctl *submit) { struct dma_chan *chan; struct dma_device *device; struct dma_async_tx_descriptor *tx; + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; if (depend_tx) { chan = depend_tx->chan; @@ -262,14 +265,14 @@ async_trigger_callback(enum async_tx_flags flags, if (tx) { pr_debug("%s: (async)\n", __func__); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { pr_debug("%s: (sync)\n", __func__); /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 3cc5dc763b54..691fa98a18c4 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -34,18 +34,16 @@ static __async_inline struct dma_async_tx_descriptor * do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, - enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + struct async_submit_ctl *submit) { struct dma_device *dma = chan->device; dma_addr_t *dma_src = (dma_addr_t *) src_list; struct dma_async_tx_descriptor *tx = NULL; int src_off = 0; int i; - dma_async_tx_callback _cb_fn; - void *_cb_param; - enum async_tx_flags async_flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + void *cb_param_orig = submit->cb_param; + enum async_tx_flags flags_orig = submit->flags; enum dma_ctrl_flags dma_flags; int xor_src_cnt; dma_addr_t dma_dest; @@ -63,7 +61,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, } while (src_cnt) { - async_flags = flags; + submit->flags = flags_orig; dma_flags = 0; xor_src_cnt = min(src_cnt, dma->max_xor); /* if we are submitting additional xors, leave the chain open, @@ -71,15 +69,15 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, * buffer mapped */ if (src_cnt > xor_src_cnt) { - async_flags &= ~ASYNC_TX_ACK; + submit->flags &= ~ASYNC_TX_ACK; dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; - _cb_fn = NULL; - _cb_param = NULL; + submit->cb_fn = NULL; + submit->cb_param = NULL; } else { - _cb_fn = cb_fn; - _cb_param = cb_param; + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; } - if (_cb_fn) + if (submit->cb_fn) dma_flags |= DMA_PREP_INTERRUPT; /* Since we have clobbered the src_list we are committed @@ -90,7 +88,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, xor_src_cnt, len, dma_flags); if (unlikely(!tx)) - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); /* spin wait for the preceeding transactions to complete */ while (unlikely(!tx)) { @@ -101,10 +99,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, dma_flags); } - async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, - _cb_param); - - depend_tx = tx; + async_tx_submit(chan, tx, submit); + submit->depend_tx = tx; if (src_cnt > xor_src_cnt) { /* drop completed sources */ @@ -123,8 +119,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, static void do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - dma_async_tx_callback cb_fn, void *cb_param) + int src_cnt, size_t len, struct async_submit_ctl *submit) { int i; int xor_src_cnt; @@ -139,7 +134,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, /* set destination address */ dest_buf = page_address(dest) + offset; - if (flags & ASYNC_TX_XOR_ZERO_DST) + if (submit->flags & ASYNC_TX_XOR_ZERO_DST) memset(dest_buf, 0, len); while (src_cnt > 0) { @@ -152,33 +147,35 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, src_off += xor_src_cnt; } - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } /** * async_xor - attempt to xor a set of blocks with a dma engine. - * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST - * flag must be set to not include dest data in the calculation. The - * assumption with dma eninges is that they only use the destination - * buffer as a source when it is explicity specified in the source list. * @dest: destination page - * @src_list: array of source pages (if the dest is also a source it must be - * at index zero). The contents of this array may be overwritten. - * @offset: offset in pages to start transaction + * @src_list: array of source pages + * @offset: common src/dst offset to start transaction * @src_cnt: number of source pages * @len: length in bytes - * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, ASYNC_TX_ACK - * @depend_tx: xor depends on the result of this transaction. - * @cb_fn: function to call when the xor completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST + * + * xor_blocks always uses the dest as a source so the + * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in + * the calculation. The assumption with dma eninges is that they only + * use the destination buffer as a source when it is explicity specified + * in the source list. + * + * src_list note: if the dest is also a source it must be at index zero. + * The contents of this array will be overwritten if a scribble region + * is not specified. */ struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + int src_cnt, size_t len, struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, &dest, 1, src_list, src_cnt, len); BUG_ON(src_cnt <= 1); @@ -188,7 +185,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, pr_debug("%s (async): len: %zu\n", __func__, len); return do_async_xor(chan, dest, src_list, offset, src_cnt, len, - flags, depend_tx, cb_fn, cb_param); + submit); } else { /* run the xor synchronously */ pr_debug("%s (sync): len: %zu\n", __func__, len); @@ -196,16 +193,15 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, /* in the sync case the dest is an implied source * (assumes the dest is the first source) */ - if (flags & ASYNC_TX_XOR_DROP_DST) { + if (submit->flags & ASYNC_TX_XOR_DROP_DST) { src_cnt--; src_list++; } /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); - do_sync_xor(dest, src_list, offset, src_cnt, len, - flags, cb_fn, cb_param); + do_sync_xor(dest, src_list, offset, src_cnt, len, submit); return NULL; } @@ -222,25 +218,25 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) /** * async_xor_val - attempt a xor parity check with a dma engine. * @dest: destination page used if the xor is performed synchronously - * @src_list: array of source pages. The dest page must be listed as a source - * at index zero. The contents of this array may be overwritten. + * @src_list: array of source pages * @offset: offset in pages to start transaction * @src_cnt: number of source pages * @len: length in bytes * @result: 0 if sum == 0 else non-zero - * @flags: ASYNC_TX_ACK - * @depend_tx: xor depends on the result of this transaction. - * @cb_fn: function to call when the xor completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK + * + * src_list note: if the dest is also a source it must be at index zero. + * The contents of this array will be overwritten if a scribble region + * is not specified. */ struct dma_async_tx_descriptor * -async_xor_val(struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - u32 *result, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, u32 *result, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR_VAL, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL, &dest, 1, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; @@ -250,11 +246,12 @@ async_xor_val(struct page *dest, struct page **src_list, if (device && src_cnt <= device->max_xor) { dma_addr_t *dma_src = (dma_addr_t *) src_list; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + unsigned long dma_prep_flags; int i; pr_debug("%s: (async) len: %zu\n", __func__, len); + dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; for (i = 0; i < src_cnt; i++) dma_src[i] = dma_map_page(device->dev, src_list[i], offset, len, DMA_TO_DEVICE); @@ -263,7 +260,7 @@ async_xor_val(struct page *dest, struct page **src_list, len, result, dma_prep_flags); if (unlikely(!tx)) { - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); while (!tx) { dma_async_issue_pending(chan); @@ -273,23 +270,23 @@ async_xor_val(struct page *dest, struct page **src_list, } } - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { - unsigned long xor_flags = flags; + enum async_tx_flags flags_orig = submit->flags; pr_debug("%s: (sync) len: %zu\n", __func__, len); - xor_flags |= ASYNC_TX_XOR_DROP_DST; - xor_flags &= ~ASYNC_TX_ACK; + submit->flags |= ASYNC_TX_XOR_DROP_DST; + submit->flags &= ~ASYNC_TX_ACK; - tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, - depend_tx, NULL, NULL); + tx = async_xor(dest, src_list, offset, src_cnt, len, submit); async_tx_quiesce(&tx); *result = page_is_zero(dest, offset, len) ? 0 : 1; - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); + submit->flags = flags_orig; } return tx; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0ef5362c8d02..e1920f23579f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -499,11 +499,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, struct page *bio_page; int i; int page_offset; + struct async_submit_ctl submit; if (bio->bi_sector >= sector) page_offset = (signed)(bio->bi_sector - sector) * 512; else page_offset = (signed)(sector - bio->bi_sector) * -512; + + init_async_submit(&submit, 0, tx, NULL, NULL, NULL); bio_for_each_segment(bvl, bio, i) { int len = bio_iovec_idx(bio, i)->bv_len; int clen; @@ -525,13 +528,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, bio_page = bio_iovec_idx(bio, i)->bv_page; if (frombio) tx = async_memcpy(page, bio_page, page_offset, - b_offset, clen, 0, - tx, NULL, NULL); + b_offset, clen, &submit); else tx = async_memcpy(bio_page, page, b_offset, - page_offset, clen, 0, - tx, NULL, NULL); + page_offset, clen, &submit); } + /* chain the operations */ + submit.depend_tx = tx; + if (clen < len) /* hit end of page */ break; page_offset += len; @@ -590,6 +594,7 @@ static void ops_run_biofill(struct stripe_head *sh) { struct dma_async_tx_descriptor *tx = NULL; raid5_conf_t *conf = sh->raid_conf; + struct async_submit_ctl submit; int i; pr_debug("%s: stripe %llu\n", __func__, @@ -613,7 +618,8 @@ static void ops_run_biofill(struct stripe_head *sh) } atomic_inc(&sh->count); - async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_biofill, sh); + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); + async_trigger_callback(&submit); } static void ops_complete_compute5(void *stripe_head_ref) @@ -645,6 +651,7 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) struct page *xor_dest = tgt->page; int count = 0; struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; int i; pr_debug("%s: stripe %llu block: %d\n", @@ -657,13 +664,12 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) atomic_inc(&sh->count); + init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, + ops_complete_compute5, sh, NULL); if (unlikely(count == 1)) - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, - 0, NULL, ops_complete_compute5, sh); + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); else - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_XOR_ZERO_DST, NULL, - ops_complete_compute5, sh); + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); return tx; } @@ -683,6 +689,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) int disks = sh->disks; struct page *xor_srcs[disks]; int count = 0, pd_idx = sh->pd_idx, i; + struct async_submit_ctl submit; /* existing parity data subtracted */ struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; @@ -697,9 +704,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) xor_srcs[count++] = dev->page; } - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_XOR_DROP_DST, tx, - ops_complete_prexor, sh); + init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx, + ops_complete_prexor, sh, NULL); + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); return tx; } @@ -772,7 +779,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) /* kernel stack size limits the total number of disks */ int disks = sh->disks; struct page *xor_srcs[disks]; - + struct async_submit_ctl submit; int count = 0, pd_idx = sh->pd_idx, i; struct page *xor_dest; int prexor = 0; @@ -811,13 +818,11 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) atomic_inc(&sh->count); - if (unlikely(count == 1)) { - flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, - flags, tx, ops_complete_postxor, sh); - } else - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - flags, tx, ops_complete_postxor, sh); + init_async_submit(&submit, flags, tx, ops_complete_postxor, sh, NULL); + if (unlikely(count == 1)) + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); + else + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); } static void ops_complete_check(void *stripe_head_ref) @@ -838,6 +843,7 @@ static void ops_run_check(struct stripe_head *sh) int disks = sh->disks; struct page *xor_srcs[disks]; struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; int count = 0, pd_idx = sh->pd_idx, i; struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; @@ -851,12 +857,13 @@ static void ops_run_check(struct stripe_head *sh) xor_srcs[count++] = dev->page; } + init_async_submit(&submit, 0, NULL, NULL, NULL, NULL); tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); + &sh->ops.zero_sum_result, &submit); atomic_inc(&sh->count); - tx = async_trigger_callback(ASYNC_TX_ACK, tx, - ops_complete_check, sh); + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL); + tx = async_trigger_callback(&submit); } static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) @@ -2664,6 +2671,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, if (i != sh->pd_idx && i != sh->qd_idx) { int dd_idx, j; struct stripe_head *sh2; + struct async_submit_ctl submit; sector_t bn = compute_blocknr(sh, i, 1); sector_t s = raid5_compute_sector(conf, bn, 0, @@ -2683,9 +2691,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, } /* place all the copies on one channel */ + init_async_submit(&submit, 0, tx, NULL, NULL, NULL); tx = async_memcpy(sh2->dev[dd_idx].page, sh->dev[i].page, 0, 0, STRIPE_SIZE, - 0, tx, NULL, NULL); + &submit); set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 9f14cd540cd2..00cfb637ddf2 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -65,6 +65,22 @@ enum async_tx_flags { ASYNC_TX_ACK = (1 << 2), }; +/** + * struct async_submit_ctl - async_tx submission/completion modifiers + * @flags: submission modifiers + * @depend_tx: parent dependency of the current operation being submitted + * @cb_fn: callback routine to run at operation completion + * @cb_param: parameter for the callback routine + * @scribble: caller provided space for dma/page address conversions + */ +struct async_submit_ctl { + enum async_tx_flags flags; + struct dma_async_tx_descriptor *depend_tx; + dma_async_tx_callback cb_fn; + void *cb_param; + void *scribble; +}; + #ifdef CONFIG_DMA_ENGINE #define async_tx_issue_pending_all dma_issue_pending_all #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL @@ -73,8 +89,8 @@ enum async_tx_flags { #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ __async_tx_find_channel(dep, type) struct dma_chan * -__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type); +__async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type); #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ #else static inline void async_tx_issue_pending_all(void) @@ -83,9 +99,10 @@ static inline void async_tx_issue_pending_all(void) } static inline struct dma_chan * -async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type, struct page **dst, int dst_count, - struct page **src, int src_count, size_t len) +async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type, struct page **dst, + int dst_count, struct page **src, int src_count, + size_t len) { return NULL; } @@ -97,46 +114,53 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, * @cb_fn_param: parameter to pass to the callback routine */ static inline void -async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) +async_tx_sync_epilog(struct async_submit_ctl *submit) +{ + if (submit->cb_fn) + submit->cb_fn(submit->cb_param); +} + +typedef union { + unsigned long addr; + struct page *page; + dma_addr_t dma; +} addr_conv_t; + +static inline void +init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags, + struct dma_async_tx_descriptor *tx, + dma_async_tx_callback cb_fn, void *cb_param, + addr_conv_t *scribble) { - if (cb_fn) - cb_fn(cb_fn_param); + args->flags = flags; + args->depend_tx = tx; + args->cb_fn = cb_fn; + args->cb_param = cb_param; + args->scribble = scribble; } -void -async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, - enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + int src_cnt, size_t len, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_xor_val(struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - u32 *result, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, u32 *result, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, - unsigned int src_offset, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + unsigned int src_offset, size_t len, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memset(struct page *dest, int val, unsigned int offset, - size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + size_t len, struct async_submit_ctl *submit); -struct dma_async_tx_descriptor * -async_trigger_callback(enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ -- cgit v1.2.3 From e74e396204bfcb67570ba4517b08f5918e69afea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 30 Mar 2009 19:07:44 +0900 Subject: percpu: use dynamic percpu allocator as the default percpu allocator This patch makes most !CONFIG_HAVE_SETUP_PER_CPU_AREA archs use dynamic percpu allocator. The first chunk is allocated using embedding helper and 8k is reserved for modules. This ensures that the new allocator behaves almost identically to the original allocator as long as static percpu variables are concerned, so it shouldn't introduce much breakage. s390 and alpha use custom SHIFT_PERCPU_PTR() to work around addressing range limit the addressing model imposes. Unfortunately, this breaks if the address is specified using a variable, so for now, the two archs aren't converted. The following architectures are affected by this change. * sh * arm * cris * mips * sparc(32) * blackfin * avr32 * parisc (broken, under investigation) * m32r * powerpc(32) As this change makes the dynamic allocator the default one, CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is replaced with its invert - CONFIG_HAVE_LEGACY_PER_CPU_AREA, which is added to yet-to-be converted archs. These archs implement their own setup_per_cpu_areas() and the conversion is not trivial. * powerpc(64) * sparc(64) * ia64 * alpha * s390 Boot and batch alloc/free tests on x86_32 with debug code (x86_32 doesn't use default first chunk initialization). Compile tested on sparc(32), powerpc(32), arm and alpha. Kyle McMartin reported that this change breaks parisc. The problem is still under investigation and he is okay with pushing this patch forward and fixing parisc later. [ Impact: use dynamic allocator for most archs w/o custom percpu setup ] Signed-off-by: Tejun Heo Acked-by: Rusty Russell Acked-by: David S. Miller Acked-by: Benjamin Herrenschmidt Acked-by: Martin Schwidefsky Reviewed-by: Christoph Lameter Cc: Paul Mundt Cc: Russell King Cc: Mikael Starvik Cc: Ralf Baechle Cc: Bryan Wu Cc: Kyle McMartin Cc: Matthew Wilcox Cc: Grant Grundler Cc: Hirokazu Takata Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Heiko Carstens Cc: Ingo Molnar --- arch/alpha/Kconfig | 3 +++ arch/ia64/Kconfig | 3 +++ arch/powerpc/Kconfig | 3 +++ arch/s390/Kconfig | 3 +++ arch/sparc/Kconfig | 3 +++ arch/x86/Kconfig | 3 --- include/linux/percpu.h | 12 +++++++++--- init/main.c | 24 ------------------------ kernel/module.c | 6 +++--- mm/Makefile | 2 +- mm/allocpercpu.c | 28 ++++++++++++++++++++++++++++ mm/percpu.c | 40 +++++++++++++++++++++++++++++++++++++++- 12 files changed, 95 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 9fb8aae5c391..05d86407188c 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -70,6 +70,9 @@ config AUTO_IRQ_AFFINITY depends on SMP default y +config HAVE_LEGACY_PER_CPU_AREA + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 170042b420d4..328d2f8b8c3f 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -89,6 +89,9 @@ config GENERIC_TIME_VSYSCALL bool default y +config HAVE_LEGACY_PER_CPU_AREA + def_bool y + config HAVE_SETUP_PER_CPU_AREA def_bool y diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index bf6cedfa05db..a774c2acbe69 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -46,6 +46,9 @@ config GENERIC_HARDIRQS_NO__DO_IRQ bool default y +config HAVE_LEGACY_PER_CPU_AREA + def_bool PPC64 + config HAVE_SETUP_PER_CPU_AREA def_bool PPC64 diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a14dba0e4d67..f4a3cc62d28f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -75,6 +75,9 @@ config VIRT_CPU_ACCOUNTING config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y +config HAVE_LEGACY_PER_CPU_AREA + def_bool y + mainmenu "Linux Kernel Configuration" config S390 diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 3f8b6a92eabd..7a8698b913fe 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -92,6 +92,9 @@ config AUDIT_ARCH bool default y +config HAVE_LEGACY_PER_CPU_AREA + def_bool y if SPARC64 + config HAVE_SETUP_PER_CPU_AREA def_bool y if SPARC64 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d1430ef6b4f9..a48a90076d83 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -149,9 +149,6 @@ config ARCH_HAS_CACHE_LINE_SIZE config HAVE_SETUP_PER_CPU_AREA def_bool y -config HAVE_DYNAMIC_PER_CPU_AREA - def_bool y - config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 26fd9d12f050..e5000343dd61 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -34,7 +34,7 @@ #ifdef CONFIG_SMP -#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) @@ -80,7 +80,7 @@ extern ssize_t __init pcpu_embed_first_chunk( extern void *__alloc_reserved_percpu(size_t size, size_t align); -#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ +#else /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ struct percpu_data { void *ptrs[1]; @@ -99,11 +99,15 @@ struct percpu_data { (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) -#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ +#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ extern void *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void *__pdata); +#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA +extern void __init setup_per_cpu_areas(void); +#endif + #else /* CONFIG_SMP */ #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) @@ -124,6 +128,8 @@ static inline void free_percpu(void *p) kfree(p); } +static inline void __init setup_per_cpu_areas(void) { } + #endif /* CONFIG_SMP */ #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ diff --git a/init/main.c b/init/main.c index 09131ec090c1..602d724afa5c 100644 --- a/init/main.c +++ b/init/main.c @@ -357,7 +357,6 @@ static void __init smp_init(void) #define smp_init() do { } while (0) #endif -static inline void setup_per_cpu_areas(void) { } static inline void setup_nr_cpu_ids(void) { } static inline void smp_prepare_cpus(unsigned int maxcpus) { } @@ -378,29 +377,6 @@ static void __init setup_nr_cpu_ids(void) nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; } -#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; - -EXPORT_SYMBOL(__per_cpu_offset); - -static void __init setup_per_cpu_areas(void) -{ - unsigned long size, i; - char *ptr; - unsigned long nr_possible_cpus = num_possible_cpus(); - - /* Copy section for each CPU (we discard the original) */ - size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE); - ptr = alloc_bootmem_pages(size * nr_possible_cpus); - - for_each_possible_cpu(i) { - __per_cpu_offset[i] = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - ptr += size; - } -} -#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ - /* Called by boot processor to activate the rest. */ static void __init smp_init(void) { diff --git a/kernel/module.c b/kernel/module.c index 38928fcaff2b..f5934954fa99 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -364,7 +364,7 @@ EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP -#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA static void *percpu_modalloc(unsigned long size, unsigned long align, const char *name) @@ -389,7 +389,7 @@ static void percpu_modfree(void *freeme) free_percpu(freeme); } -#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ +#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */ /* Number of blocks used and allocated. */ static unsigned int pcpu_num_used, pcpu_num_allocated; @@ -535,7 +535,7 @@ static int percpu_modinit(void) } __initcall(percpu_modinit); -#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ +#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ static unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, diff --git a/mm/Makefile b/mm/Makefile index 5e0bd6426693..c77c6487552f 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -33,7 +33,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o -ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA obj-$(CONFIG_SMP) += percpu.o else obj-$(CONFIG_SMP) += allocpercpu.o diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index dfdee6a47359..df34ceae0c67 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c @@ -5,6 +5,8 @@ */ #include #include +#include +#include #ifndef cache_line_size #define cache_line_size() L1_CACHE_BYTES @@ -147,3 +149,29 @@ void free_percpu(void *__pdata) kfree(__percpu_disguise(__pdata)); } EXPORT_SYMBOL_GPL(free_percpu); + +/* + * Generic percpu area setup. + */ +#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; + +EXPORT_SYMBOL(__per_cpu_offset); + +void __init setup_per_cpu_areas(void) +{ + unsigned long size, i; + char *ptr; + unsigned long nr_possible_cpus = num_possible_cpus(); + + /* Copy section for each CPU (we discard the original) */ + size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE); + ptr = alloc_bootmem_pages(size * nr_possible_cpus); + + for_each_possible_cpu(i) { + __per_cpu_offset[i] = ptr - __per_cpu_start; + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + ptr += size; + } +} +#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ diff --git a/mm/percpu.c b/mm/percpu.c index b70f2acd8853..b14984566f5a 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -43,7 +43,7 @@ * * To use this allocator, arch code should do the followings. * - * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA + * - drop CONFIG_HAVE_LEGACY_PER_CPU_AREA * * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate * regular address to percpu pointer and back if they need to be @@ -1275,3 +1275,41 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, reserved_size, dyn_size, pcpue_unit_size, pcpue_ptr, NULL); } + +/* + * Generic percpu area setup. + * + * The embedding helper is used because its behavior closely resembles + * the original non-dynamic generic percpu area setup. This is + * important because many archs have addressing restrictions and might + * fail if the percpu area is located far away from the previous + * location. As an added bonus, in non-NUMA cases, embedding is + * generally a good idea TLB-wise because percpu area can piggy back + * on the physical linear memory mapping which uses large page + * mappings on applicable archs. + */ +#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +void __init setup_per_cpu_areas(void) +{ + size_t static_size = __per_cpu_end - __per_cpu_start; + ssize_t unit_size; + unsigned long delta; + unsigned int cpu; + + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, -1); + if (unit_size < 0) + panic("Failed to initialized percpu areas."); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + cpu * unit_size; +} +#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ -- cgit v1.2.3 From 405d967dc70002991f8fc35c20e0d3cbc7614f63 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jun 2009 15:13:38 +0900 Subject: linker script: throw away .discard section x86 throws away .discard section but no other archs do. Also, .discard is not thrown away while linking modules. Make every arch and module linking throw it away. This will be used to define dummy variables for percpu declarations and definitions. This patch is based on Ivan Kokshaysky's alpha percpu patch. [ Impact: always throw away everything in .discard ] Signed-off-by: Tejun Heo Cc: Ivan Kokshaysky Cc: Richard Henderson Cc: Russell King Cc: Haavard Skinnemoen Cc: Bryan Wu Cc: Mikael Starvik Cc: Jesper Nilsson Cc: David Howells Cc: Yoshinori Sato Cc: Tony Luck Cc: Hirokazu Takata Cc: Geert Uytterhoeven Cc: Michal Simek Cc: Ralf Baechle Cc: Kyle McMartin Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Paul Mundt Cc: David S. Miller Cc: Jeff Dike Cc: Chris Zankel Cc: Rusty Russell Cc: Ingo Molnar --- Makefile | 2 +- arch/alpha/kernel/vmlinux.lds.S | 1 + arch/arm/kernel/vmlinux.lds.S | 1 + arch/avr32/kernel/vmlinux.lds.S | 1 + arch/blackfin/kernel/vmlinux.lds.S | 1 + arch/cris/kernel/vmlinux.lds.S | 1 + arch/frv/kernel/vmlinux.lds.S | 2 ++ arch/h8300/kernel/vmlinux.lds.S | 1 + arch/ia64/kernel/vmlinux.lds.S | 1 + arch/m32r/kernel/vmlinux.lds.S | 1 + arch/m68k/kernel/vmlinux-std.lds | 1 + arch/m68k/kernel/vmlinux-sun3.lds | 1 + arch/m68knommu/kernel/vmlinux.lds.S | 1 + arch/microblaze/kernel/vmlinux.lds.S | 2 ++ arch/mips/kernel/vmlinux.lds.S | 1 + arch/mn10300/kernel/vmlinux.lds.S | 1 + arch/parisc/kernel/vmlinux.lds.S | 1 + arch/powerpc/kernel/vmlinux.lds.S | 1 + arch/s390/kernel/vmlinux.lds.S | 1 + arch/sh/kernel/vmlinux.lds.S | 1 + arch/sparc/kernel/vmlinux.lds.S | 1 + arch/um/kernel/dyn.lds.S | 2 ++ arch/um/kernel/uml.lds.S | 2 ++ arch/xtensa/kernel/vmlinux.lds.S | 1 + include/asm-generic/vmlinux.lds.h | 8 ++++++++ scripts/module-common.lds | 8 ++++++++ 26 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 scripts/module-common.lds (limited to 'include') diff --git a/Makefile b/Makefile index 46e1c9d03d51..12245be05122 100644 --- a/Makefile +++ b/Makefile @@ -327,7 +327,7 @@ CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ MODFLAGS = -DMODULE CFLAGS_MODULE = $(MODFLAGS) AFLAGS_MODULE = $(MODFLAGS) -LDFLAGS_MODULE = +LDFLAGS_MODULE = -T $(srctree)/scripts/module-common.lds CFLAGS_KERNEL = AFLAGS_KERNEL = CFLAGS_GCOV = -fprofile-arcs -ftest-coverage diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index b9d6568e5f7f..75fe1d6877e9 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -139,6 +139,7 @@ SECTIONS EXIT_TEXT EXIT_DATA *(.exitcall.exit) + *(.discard) } .mdebug 0 : { diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 6c0779792546..e256c57b8981 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -82,6 +82,7 @@ SECTIONS EXIT_TEXT EXIT_DATA *(.exitcall.exit) + *(.discard) *(.ARM.exidx.exit.text) *(.ARM.extab.exit.text) #ifndef CONFIG_MMU diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S index 7910d41eb886..b8324608ec0c 100644 --- a/arch/avr32/kernel/vmlinux.lds.S +++ b/arch/avr32/kernel/vmlinux.lds.S @@ -131,6 +131,7 @@ SECTIONS /DISCARD/ : { EXIT_DATA *(.exitcall.exit) + *(.discard) } DWARF_DEBUG diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index 6ac307ca0d80..6e8eabd8f0a6 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -280,5 +280,6 @@ SECTIONS /DISCARD/ : { *(.exitcall.exit) + *(.discard) } } diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S index 0d2adfc794d4..a3175ebb38cc 100644 --- a/arch/cris/kernel/vmlinux.lds.S +++ b/arch/cris/kernel/vmlinux.lds.S @@ -145,6 +145,7 @@ SECTIONS EXIT_TEXT EXIT_DATA *(.exitcall.exit) + *(.discard) } dram_end = dram_start + (CONFIG_ETRAX_DRAM_SIZE - __CONFIG_ETRAX_VMEM_SIZE)*1024*1024; diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 22d9787406ed..64b5a5e4d35e 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -177,6 +177,8 @@ SECTIONS .debug_ranges 0 : { *(.debug_ranges) } .comment 0 : { *(.comment) } + + /DISCARD/ : { *(.discard) } } __kernel_image_size_no_bss = __bss_start - __kernel_image_start; diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index 43a87b9085b6..03d6c0df33db 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -154,6 +154,7 @@ SECTIONS } /DISCARD/ : { *(.exitcall.exit) + *(.discard) } .romfs : { diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 4a95e86b9ac2..13d958975874 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -29,6 +29,7 @@ SECTIONS EXIT_TEXT EXIT_DATA *(.exitcall.exit) + *(.discard) *(.IA_64.unwind.exit.text) *(.IA_64.unwind_info.exit.text) } diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index 4179adf6c624..480a49944cfd 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -125,6 +125,7 @@ SECTIONS EXIT_TEXT EXIT_DATA *(.exitcall.exit) + *(.discard) } /* Stabs debugging sections. */ diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index 01d212bb05a6..905a797ada93 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -87,6 +87,7 @@ SECTIONS EXIT_TEXT EXIT_DATA *(.exitcall.exit) + *(.discard) } /* Stabs debugging sections. */ diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index c192f773db96..47d04be322aa 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -82,6 +82,7 @@ __init_begin = .; EXIT_TEXT EXIT_DATA *(.exitcall.exit) + *(.discard) } .crap : { diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S index b7fe505e358d..68111a61a77f 100644 --- a/arch/m68knommu/kernel/vmlinux.lds.S +++ b/arch/m68knommu/kernel/vmlinux.lds.S @@ -188,6 +188,7 @@ SECTIONS { EXIT_TEXT EXIT_DATA *(.exitcall.exit) + *(.discard) } .bss : { diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S index d34d38dcd12c..a207543c5927 100644 --- a/arch/microblaze/kernel/vmlinux.lds.S +++ b/arch/microblaze/kernel/vmlinux.lds.S @@ -162,4 +162,6 @@ SECTIONS { } . = ALIGN(4096); _end = .; + + /DISCARD/ : { *(.discard) } } diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 58738c8d754f..45901609b741 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -179,6 +179,7 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { *(.exitcall.exit) + *(.discard) /* ABI crap starts here */ *(.MIPS.options) diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S index 24de6b90f401..5d9f2f96ad92 100644 --- a/arch/mn10300/kernel/vmlinux.lds.S +++ b/arch/mn10300/kernel/vmlinux.lds.S @@ -146,6 +146,7 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { *(.exitcall.exit) + *(.discard) } STABS_DEBUG diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index fd2cc4fd2b65..ccf58341845a 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -240,6 +240,7 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { *(.exitcall.exit) + *(.discard) #ifdef CONFIG_64BIT /* temporary hack until binutils is fixed to not emit these * for static binaries diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8ef8a14abc95..7fca9355fd3d 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -40,6 +40,7 @@ SECTIONS /* Sections to be discarded. */ /DISCARD/ : { *(.exitcall.exit) + *(.discard) EXIT_DATA } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index a53db23ee092..98867dfea469 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -161,6 +161,7 @@ SECTIONS /DISCARD/ : { EXIT_DATA *(.exitcall.exit) + *(.discard) } /* Debugging sections. */ diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index f53c76acaede..766976d27b21 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -171,6 +171,7 @@ SECTIONS */ /DISCARD/ : { *(.exitcall.exit) + *(.discard) } STABS_DEBUG diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index fcbbd000ec08..d63cf914667d 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -175,6 +175,7 @@ SECTIONS EXIT_TEXT EXIT_DATA *(.exitcall.exit) + *(.discard) } STABS_DEBUG diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 9975e1ab44fb..2916d6eadffd 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -156,4 +156,6 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + + /DISCARD/ : { *(.discard) } } diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 11b835248b86..1f8a622cabe1 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -100,4 +100,6 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + + /DISCARD/ : { *(.discard) } } diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 41c159cd872f..b1e24638acd7 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -287,6 +287,7 @@ SECTIONS EXIT_TEXT EXIT_DATA *(.exitcall.exit) + *(.discard) } .xt.lit : { *(.xt.lit) } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 55413e568f07..a19120c4e109 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -628,6 +628,14 @@ #define INITRAMFS #endif +#define DISCARDS \ + /DISCARD/ : { \ + EXIT_TEXT \ + EXIT_DATA \ + *(.exitcall.exit) \ + *(.discard) \ + } + /** * PERCPU_VADDR - define output section for percpu area * @vaddr: explicit base address (optional) diff --git a/scripts/module-common.lds b/scripts/module-common.lds new file mode 100644 index 000000000000..47a1f9ae0ede --- /dev/null +++ b/scripts/module-common.lds @@ -0,0 +1,8 @@ +/* + * Common module linker script, always used when linking a module. + * Archs are free to supply their own linker scripts. ld will + * combine them automatically. + */ +SECTIONS { + /DISCARD/ : { *(.discard) } +} -- cgit v1.2.3 From 7c756e6e19e71f0327760d8955f7077118ebb2b1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jun 2009 15:13:50 +0900 Subject: percpu: implement optional weak percpu definitions Some archs (alpha and s390) need to use weak definitions for percpu variables in modules so that the compiler generates external references for them. This patch implements weak percpu definitions which arch can enable by defining ARCH_NEEDS_WEAK_PER_CPU in arch percpu header file. This weak definition adds the following two restrictions on percpu variable definitions. 1. percpu symbols must be unique whether static or not 2. percpu variables can't be defined inside a function To ensure that these restrictions are observed in generic code, config option DEBUG_FORCE_WEAK_PER_CPU enables weak percpu definitions for all cases. This patch is inspired by Ivan Kokshaysky's alpha percpu patch. [ Impact: stricter rules for percpu variables, one more debug config option ] Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: David Howells Cc: Ivan Kokshaysky --- include/linux/percpu-defs.h | 65 ++++++++++++++++++++++++++++++++++++++------- lib/Kconfig.debug | 15 +++++++++++ 2 files changed, 71 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 8f921d74f49f..cf32838ad0fa 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -10,21 +10,68 @@ /* * Base implementations of per-CPU variable declarations and definitions, where * the section in which the variable is to be placed is provided by the - * 'section' argument. This may be used to affect the parameters governing the + * 'sec' argument. This may be used to affect the parameters governing the * variable's storage. * * NOTE! The sections for the DECLARE and for the DEFINE must match, lest * linkage errors occur due the compiler generating the wrong code to access * that section. */ -#define DECLARE_PER_CPU_SECTION(type, name, section) \ - extern \ - __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU_SECTION(type, name, section) \ - __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name +#define __PCPU_ATTRS(sec) \ + __attribute__((section(PER_CPU_BASE_SECTION sec))) \ + PER_CPU_ATTRIBUTES + +#define __PCPU_DUMMY_ATTRS \ + __attribute__((section(".discard"), unused)) + +/* + * s390 and alpha modules require percpu variables to be defined as + * weak to force the compiler to generate GOT based external + * references for them. This is necessary because percpu sections + * will be located outside of the usually addressable area. + * + * This definition puts the following two extra restrictions when + * defining percpu variables. + * + * 1. The symbol must be globally unique, even the static ones. + * 2. Static percpu variables cannot be defined inside a function. + * + * Archs which need weak percpu definitions should define + * ARCH_NEEDS_WEAK_PER_CPU in asm/percpu.h when necessary. + * + * To ensure that the generic code observes the above two + * restrictions, if CONFIG_DEBUG_FORCE_WEAK_PER_CPU is set weak + * definition is used for all cases. + */ +#if defined(ARCH_NEEDS_WEAK_PER_CPU) || defined(CONFIG_DEBUG_FORCE_WEAK_PER_CPU) +/* + * __pcpu_scope_* dummy variable is used to enforce scope. It + * receives the static modifier when it's used in front of + * DEFINE_PER_CPU() and will trigger build failure if + * DECLARE_PER_CPU() is used for the same variable. + * + * __pcpu_unique_* dummy variable is used to enforce symbol uniqueness + * such that hidden weak symbol collision, which will cause unrelated + * variables to share the same address, can be detected during build. + */ +#define DECLARE_PER_CPU_SECTION(type, name, sec) \ + extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ + extern __PCPU_ATTRS(sec) __weak __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU_SECTION(type, name, sec) \ + __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ + __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ + __PCPU_ATTRS(sec) __weak __typeof__(type) per_cpu__##name +#else +/* + * Normal declaration and definition macros. + */ +#define DECLARE_PER_CPU_SECTION(type, name, sec) \ + extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU_SECTION(type, name, sec) \ + __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name +#endif /* * Variant on the per-CPU variable declaration/definition theme used for diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 23067ab1a73c..77e0d8b1b7c5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -777,6 +777,21 @@ config DEBUG_BLOCK_EXT_DEVT Say N if you are unsure. +config DEBUG_FORCE_WEAK_PER_CPU + bool "Force weak per-cpu definitions" + depends on DEBUG_KERNEL + help + s390 and alpha require percpu variables in modules to be + defined weak to work around addressing range issue which + puts the following two restrictions on percpu variable + definitions. + + 1. percpu symbols must be unique whether static or not + 2. percpu variables can't be defined inside a function + + To ensure that generic code follows the above rules, this + option forces all percpu variables to be defined as weak. + config LKDTM tristate "Linux Kernel Dump Test Tool Module" depends on DEBUG_KERNEL -- cgit v1.2.3 From dcf52fb71d988ba945054308f661bddf9b2455fb Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 22 Jun 2009 20:41:45 +0000 Subject: ACPI: remove unused acpi_device_ops .stop method No drivers use the .stop method, so remove it. Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Chiang Signed-off-by: Len Brown --- drivers/acpi/scan.c | 5 ----- include/acpi/acpi_bus.h | 2 -- 2 files changed, 7 deletions(-) (limited to 'include') diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 781435d7e369..4a89f081160f 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -426,9 +426,6 @@ static int acpi_device_probe(struct device * dev) if (acpi_drv->ops.notify) { ret = acpi_device_install_notify_handler(acpi_dev); if (ret) { - if (acpi_drv->ops.stop) - acpi_drv->ops.stop(acpi_dev, - acpi_dev->removal_type); if (acpi_drv->ops.remove) acpi_drv->ops.remove(acpi_dev, acpi_dev->removal_type); @@ -452,8 +449,6 @@ static int acpi_device_remove(struct device * dev) if (acpi_drv) { if (acpi_drv->ops.notify) acpi_device_remove_notify_handler(acpi_dev); - if (acpi_drv->ops.stop) - acpi_drv->ops.stop(acpi_dev, acpi_dev->removal_type); if (acpi_drv->ops.remove) acpi_drv->ops.remove(acpi_dev, acpi_dev->removal_type); } diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c65e4ce6c3af..79a6c5ebe908 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -89,7 +89,6 @@ struct acpi_device; typedef int (*acpi_op_add) (struct acpi_device * device); typedef int (*acpi_op_remove) (struct acpi_device * device, int type); typedef int (*acpi_op_start) (struct acpi_device * device); -typedef int (*acpi_op_stop) (struct acpi_device * device, int type); typedef int (*acpi_op_suspend) (struct acpi_device * device, pm_message_t state); typedef int (*acpi_op_resume) (struct acpi_device * device); @@ -106,7 +105,6 @@ struct acpi_device_ops { acpi_op_add add; acpi_op_remove remove; acpi_op_start start; - acpi_op_stop stop; acpi_op_suspend suspend; acpi_op_resume resume; acpi_op_bind bind; -- cgit v1.2.3 From 1a8dd307cc0a2119be4e578c517795464e6dabba Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 29 Jun 2009 17:45:39 +0900 Subject: percpu: use __weak only in the definition of weak percpu variables __weak is necessary only for definition and might even not work in declaration. Drop it from declaration. This change was suggested by Ivan Kokshaysky. Signed-off-by: Tejun Heo Acked-by: Ivan Kokshaysky --- include/linux/percpu-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index cf32838ad0fa..9b7a53cc16eb 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -56,7 +56,7 @@ */ #define DECLARE_PER_CPU_SECTION(type, name, sec) \ extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ - extern __PCPU_ATTRS(sec) __weak __typeof__(type) per_cpu__##name + extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name #define DEFINE_PER_CPU_SECTION(type, name, sec) \ __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ -- cgit v1.2.3 From b294a290d24d1196d68399cc3a9b8c50bfb55abd Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Tue, 30 Jun 2009 02:13:01 -0400 Subject: Revert "power: remove POWER_SUPPLY_PROP_CAPACITY_LEVEL" This reverts commit 8efe444038a205e79b38b7ad03878824901849a8 and 4cbc76eadf56399cd11fb736b33c53aec9caab8c. Richard@laptop.org was apparently using CAPACITY_LEVEL for debugging battery/EC problems, and was upset that it was removed. This readds it. Conflicts: Documentation/power_supply_class.txt Signed-off-by: Andres Salomon Signed-off-by: Anton Vorontsov --- Documentation/power/power_supply_class.txt | 2 ++ drivers/power/olpc_battery.c | 9 +++++++++ drivers/power/power_supply_sysfs.c | 6 ++++++ include/linux/power_supply.h | 10 ++++++++++ 4 files changed, 27 insertions(+) (limited to 'include') diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt index c6cd4956047c..709d95571d7b 100644 --- a/Documentation/power/power_supply_class.txt +++ b/Documentation/power/power_supply_class.txt @@ -108,6 +108,8 @@ relative, time-based measurements. ENERGY_FULL, ENERGY_EMPTY - same as above but for energy. CAPACITY - capacity in percents. +CAPACITY_LEVEL - capacity level. This corresponds to +POWER_SUPPLY_CAPACITY_LEVEL_*. TEMP - temperature of the power supply. TEMP_AMBIENT - ambient temperature. diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c index 58e419299cd6..3a589df09376 100644 --- a/drivers/power/olpc_battery.c +++ b/drivers/power/olpc_battery.c @@ -276,6 +276,14 @@ static int olpc_bat_get_property(struct power_supply *psy, return ret; val->intval = ec_byte; break; + case POWER_SUPPLY_PROP_CAPACITY_LEVEL: + if (ec_byte & BAT_STAT_FULL) + val->intval = POWER_SUPPLY_CAPACITY_LEVEL_FULL; + else if (ec_byte & BAT_STAT_LOW) + val->intval = POWER_SUPPLY_CAPACITY_LEVEL_LOW; + else + val->intval = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL; + break; case POWER_SUPPLY_PROP_TEMP: ret = olpc_ec_cmd(EC_BAT_TEMP, NULL, 0, (void *)&ec_word, 2); if (ret) @@ -321,6 +329,7 @@ static enum power_supply_property olpc_bat_props[] = { POWER_SUPPLY_PROP_VOLTAGE_AVG, POWER_SUPPLY_PROP_CURRENT_AVG, POWER_SUPPLY_PROP_CAPACITY, + POWER_SUPPLY_PROP_CAPACITY_LEVEL, POWER_SUPPLY_PROP_TEMP, POWER_SUPPLY_PROP_TEMP_AMBIENT, POWER_SUPPLY_PROP_MANUFACTURER, diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c index da73591017f9..9deabbde6fd6 100644 --- a/drivers/power/power_supply_sysfs.c +++ b/drivers/power/power_supply_sysfs.c @@ -51,6 +51,9 @@ static ssize_t power_supply_show_property(struct device *dev, "Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe", "NiCd", "LiMn" }; + static char *capacity_level_text[] = { + "Unknown", "Critical", "Low", "Normal", "High", "Full" + }; ssize_t ret; struct power_supply *psy = dev_get_drvdata(dev); const ptrdiff_t off = attr - power_supply_attrs; @@ -71,6 +74,8 @@ static ssize_t power_supply_show_property(struct device *dev, return sprintf(buf, "%s\n", health_text[value.intval]); else if (off == POWER_SUPPLY_PROP_TECHNOLOGY) return sprintf(buf, "%s\n", technology_text[value.intval]); + else if (off == POWER_SUPPLY_PROP_CAPACITY_LEVEL) + return sprintf(buf, "%s\n", capacity_level_text[value.intval]); else if (off >= POWER_SUPPLY_PROP_MODEL_NAME) return sprintf(buf, "%s\n", value.strval); @@ -109,6 +114,7 @@ static struct device_attribute power_supply_attrs[] = { POWER_SUPPLY_ATTR(energy_now), POWER_SUPPLY_ATTR(energy_avg), POWER_SUPPLY_ATTR(capacity), + POWER_SUPPLY_ATTR(capacity_level), POWER_SUPPLY_ATTR(temp), POWER_SUPPLY_ATTR(temp_ambient), POWER_SUPPLY_ATTR(time_to_empty_now), diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 594c494ac3f0..0ab6aa171241 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -58,6 +58,15 @@ enum { POWER_SUPPLY_TECHNOLOGY_LiMn, }; +enum { + POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN = 0, + POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL, + POWER_SUPPLY_CAPACITY_LEVEL_LOW, + POWER_SUPPLY_CAPACITY_LEVEL_NORMAL, + POWER_SUPPLY_CAPACITY_LEVEL_HIGH, + POWER_SUPPLY_CAPACITY_LEVEL_FULL, +}; + enum power_supply_property { /* Properties of type `int' */ POWER_SUPPLY_PROP_STATUS = 0, @@ -89,6 +98,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_ENERGY_NOW, POWER_SUPPLY_PROP_ENERGY_AVG, POWER_SUPPLY_PROP_CAPACITY, /* in percents! */ + POWER_SUPPLY_PROP_CAPACITY_LEVEL, POWER_SUPPLY_PROP_TEMP, POWER_SUPPLY_PROP_TEMP_AMBIENT, POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW, -- cgit v1.2.3 From ee8076ed3e1cdd0cd1e61318386932669c90b92f Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Thu, 2 Jul 2009 09:45:18 -0400 Subject: power_supply: Add a charge_type property, and use it for olpc driver This adds a new sysfs file called 'charge_type' which displays the type of charging (unknown, n/a, trickle charge, or fast charging). This allows things like battery diagnostics to determine what the battery/EC is doing without resorting to changing the 'status' sysfs output. Signed-off-by: Andres Salomon Acked-by: Mark Brown Signed-off-by: Anton Vorontsov --- Documentation/power/power_supply_class.txt | 5 +++++ drivers/power/olpc_battery.c | 9 +++++++++ drivers/power/power_supply_sysfs.c | 6 ++++++ include/linux/power_supply.h | 8 ++++++++ 4 files changed, 28 insertions(+) (limited to 'include') diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt index 709d95571d7b..9f16c5178b66 100644 --- a/Documentation/power/power_supply_class.txt +++ b/Documentation/power/power_supply_class.txt @@ -76,6 +76,11 @@ STATUS - this attribute represents operating status (charging, full, discharging (i.e. powering a load), etc.). This corresponds to BATTERY_STATUS_* values, as defined in battery.h. +CHARGE_TYPE - batteries can typically charge at different rates. +This defines trickle and fast charges. For batteries that +are already charged or discharging, 'n/a' can be displayed (or +'unknown', if the status is not known). + HEALTH - represents health of the battery, values corresponds to POWER_SUPPLY_HEALTH_*, defined in battery.h. diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c index 602bbd008f78..8fefe5a73558 100644 --- a/drivers/power/olpc_battery.c +++ b/drivers/power/olpc_battery.c @@ -233,6 +233,14 @@ static int olpc_bat_get_property(struct power_supply *psy, if (ret) return ret; break; + case POWER_SUPPLY_PROP_CHARGE_TYPE: + if (ec_byte & BAT_STAT_TRICKLE) + val->intval = POWER_SUPPLY_CHARGE_TYPE_TRICKLE; + else if (ec_byte & BAT_STAT_CHARGING) + val->intval = POWER_SUPPLY_CHARGE_TYPE_FAST; + else + val->intval = POWER_SUPPLY_CHARGE_TYPE_NONE; + break; case POWER_SUPPLY_PROP_PRESENT: val->intval = !!(ec_byte & (BAT_STAT_PRESENT | BAT_STAT_TRICKLE)); @@ -325,6 +333,7 @@ static int olpc_bat_get_property(struct power_supply *psy, static enum power_supply_property olpc_bat_props[] = { POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_CHARGE_TYPE, POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_HEALTH, POWER_SUPPLY_PROP_TECHNOLOGY, diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c index 9deabbde6fd6..08144393d64b 100644 --- a/drivers/power/power_supply_sysfs.c +++ b/drivers/power/power_supply_sysfs.c @@ -43,6 +43,9 @@ static ssize_t power_supply_show_property(struct device *dev, static char *status_text[] = { "Unknown", "Charging", "Discharging", "Not charging", "Full" }; + static char *charge_type[] = { + "Unknown", "N/A", "Trickle", "Fast" + }; static char *health_text[] = { "Unknown", "Good", "Overheat", "Dead", "Over voltage", "Unspecified failure", "Cold", @@ -70,6 +73,8 @@ static ssize_t power_supply_show_property(struct device *dev, if (off == POWER_SUPPLY_PROP_STATUS) return sprintf(buf, "%s\n", status_text[value.intval]); + else if (off == POWER_SUPPLY_PROP_CHARGE_TYPE) + return sprintf(buf, "%s\n", charge_type[value.intval]); else if (off == POWER_SUPPLY_PROP_HEALTH) return sprintf(buf, "%s\n", health_text[value.intval]); else if (off == POWER_SUPPLY_PROP_TECHNOLOGY) @@ -86,6 +91,7 @@ static ssize_t power_supply_show_property(struct device *dev, static struct device_attribute power_supply_attrs[] = { /* Properties of type `int' */ POWER_SUPPLY_ATTR(status), + POWER_SUPPLY_ATTR(charge_type), POWER_SUPPLY_ATTR(health), POWER_SUPPLY_ATTR(present), POWER_SUPPLY_ATTR(online), diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 0ab6aa171241..4c7c6fc35487 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -38,6 +38,13 @@ enum { POWER_SUPPLY_STATUS_FULL, }; +enum { + POWER_SUPPLY_CHARGE_TYPE_UNKNOWN = 0, + POWER_SUPPLY_CHARGE_TYPE_NONE, + POWER_SUPPLY_CHARGE_TYPE_TRICKLE, + POWER_SUPPLY_CHARGE_TYPE_FAST, +}; + enum { POWER_SUPPLY_HEALTH_UNKNOWN = 0, POWER_SUPPLY_HEALTH_GOOD, @@ -70,6 +77,7 @@ enum { enum power_supply_property { /* Properties of type `int' */ POWER_SUPPLY_PROP_STATUS = 0, + POWER_SUPPLY_PROP_CHARGE_TYPE, POWER_SUPPLY_PROP_HEALTH, POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_ONLINE, -- cgit v1.2.3 From 788e5abc5441e9046dd91c995c6f1f75bbd144bf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:10:58 +0900 Subject: percpu: drop @unit_size from embed first chunk allocator The only extra feature @unit_size provides is making dead space at the end of the first chunk which doesn't have any valid usecase. Drop the parameter. This will increase consistency with generalized 4k allocator. James Bottomley spotted missing conversion for the default setup_per_cpu_areas() which caused build breakage on all arcsh which use it. [ Impact: drop unused code path ] Signed-off-by: Tejun Heo Cc: James Bottomley Cc: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 2 +- include/linux/percpu.h | 2 +- mm/percpu.c | 18 ++++++------------ 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 29a3eef7cf4a..14728206fb52 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -342,7 +342,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) return -EINVAL; return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, - reserve - PERCPU_FIRST_CHUNK_RESERVE, -1); + reserve - PERCPU_FIRST_CHUNK_RESERVE); } /* diff --git a/include/linux/percpu.h b/include/linux/percpu.h index e5000343dd61..83bff053bd1c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -69,7 +69,7 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, extern ssize_t __init pcpu_embed_first_chunk( size_t static_size, size_t reserved_size, - ssize_t dyn_size, ssize_t unit_size); + ssize_t dyn_size); /* * Use this to get to a cpu's version of the per-cpu object diff --git a/mm/percpu.c b/mm/percpu.c index 19dd83b5cbdc..fc6babe6e554 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1207,7 +1207,6 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes * @dyn_size: free size for dynamic allocation in bytes, -1 for auto - * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto * * This is a helper to ease setting up embedded first percpu chunk and * can be called where pcpu_setup_first_chunk() is expected. @@ -1219,9 +1218,9 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) * page size. * * When @dyn_size is positive, dynamic area might be larger than - * specified to fill page alignment. Also, when @dyn_size is auto, - * @dyn_size does not fill the whole first chunk but only what's - * necessary for page alignment after static and reserved areas. + * specified to fill page alignment. When @dyn_size is auto, + * @dyn_size is just big enough to fill page alignment after static + * and reserved areas. * * If the needed size is smaller than the minimum or specified unit * size, the leftover is returned to the bootmem allocator. @@ -1231,7 +1230,7 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) * percpu access on success, -errno on failure. */ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, - ssize_t dyn_size, ssize_t unit_size) + ssize_t dyn_size) { size_t chunk_size; unsigned int cpu; @@ -1242,12 +1241,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, if (dyn_size != 0) dyn_size = pcpue_size - static_size - reserved_size; - if (unit_size >= 0) { - BUG_ON(unit_size < pcpue_size); - pcpue_unit_size = unit_size; - } else - pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); - + pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); chunk_size = pcpue_unit_size * num_possible_cpus(); pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, @@ -1304,7 +1298,7 @@ void __init setup_per_cpu_areas(void) * what the legacy allocator did. */ unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE, - PERCPU_DYNAMIC_RESERVE, -1); + PERCPU_DYNAMIC_RESERVE); if (unit_size < 0) panic("Failed to initialized percpu areas."); -- cgit v1.2.3 From d4b95f80399471e4bce5e992700ff7f06ef91f6a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:10:59 +0900 Subject: x86,percpu: generalize 4k first chunk allocator Generalize and move x86 setup_pcpu_4k() into pcpu_4k_first_chunk(). setup_pcpu_4k() now is a simple wrapper around the generalized version. Other than taking size parameters and using arch supplied callbacks to allocate/free memory, pcpu_4k_first_chunk() is identical to the original implementation. This simplifies arch code and will help converting more archs to dynamic percpu allocator. While at it, s/pcpu_populate_pte_fn_t/pcpu_fc_populate_pte_fn_t/ for consistency. [ Impact: code reorganization and generalization ] Signed-off-by: Tejun Heo Cc: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 78 ++++++++++---------------------------- include/linux/percpu.h | 12 +++++- mm/percpu.c | 85 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 113 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 14728206fb52..ab896b31e80b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -123,6 +123,19 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, #endif } +/* + * Helpers for first chunk memory allocation + */ +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size) +{ + return pcpu_alloc_bootmem(cpu, size, size); +} + +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + free_bootmem(__pa(ptr), size); +} + /* * Large page remap allocator * @@ -346,22 +359,11 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) } /* - * 4k page allocator + * 4k allocator * - * This is the basic allocator. Static percpu area is allocated - * page-by-page and most of initialization is done by the generic - * setup function. + * Boring fallback 4k allocator. This allocator puts more pressure on + * PTE TLBs but other than that behaves nicely on both UMA and NUMA. */ -static struct page **pcpu4k_pages __initdata; -static int pcpu4k_nr_static_pages __initdata; - -static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) -{ - if (pageno < pcpu4k_nr_static_pages) - return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno]; - return NULL; -} - static void __init pcpu4k_populate_pte(unsigned long addr) { populate_extra_pte(addr); @@ -369,51 +371,9 @@ static void __init pcpu4k_populate_pte(unsigned long addr) static ssize_t __init setup_pcpu_4k(size_t static_size) { - size_t pages_size; - unsigned int cpu; - int i, j; - ssize_t ret; - - pcpu4k_nr_static_pages = PFN_UP(static_size); - - /* unaligned allocations can't be freed, round up to page size */ - pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() - * sizeof(pcpu4k_pages[0])); - pcpu4k_pages = alloc_bootmem(pages_size); - - /* allocate and copy */ - j = 0; - for_each_possible_cpu(cpu) - for (i = 0; i < pcpu4k_nr_static_pages; i++) { - void *ptr; - - ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); - if (!ptr) { - pr_warning("PERCPU: failed to allocate " - "4k page for cpu%u\n", cpu); - goto enomem; - } - - memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); - pcpu4k_pages[j++] = virt_to_page(ptr); - } - - /* we're ready, commit */ - pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", - pcpu4k_nr_static_pages, static_size); - - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, -1, - -1, NULL, pcpu4k_populate_pte); - goto out_free_ar; - -enomem: - while (--j >= 0) - free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE); - ret = -ENOMEM; -out_free_ar: - free_bootmem(__pa(pcpu4k_pages), pages_size); - return ret; + return pcpu_4k_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + pcpu_fc_alloc, pcpu_fc_free, + pcpu4k_populate_pte); } /* for explicit first chunk allocator selection */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 83bff053bd1c..41b5bfab4195 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -59,18 +59,26 @@ extern void *pcpu_base_addr; typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); -typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); +typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); +typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); +typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, ssize_t dyn_size, ssize_t unit_size, void *base_addr, - pcpu_populate_pte_fn_t populate_pte_fn); + pcpu_fc_populate_pte_fn_t populate_pte_fn); extern ssize_t __init pcpu_embed_first_chunk( size_t static_size, size_t reserved_size, ssize_t dyn_size); +extern ssize_t __init pcpu_4k_first_chunk( + size_t static_size, size_t reserved_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_populate_pte_fn_t populate_pte_fn); + /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's diff --git a/mm/percpu.c b/mm/percpu.c index fc6babe6e554..27b0f40a3ea8 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1037,7 +1037,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, ssize_t dyn_size, ssize_t unit_size, void *base_addr, - pcpu_populate_pte_fn_t populate_pte_fn) + pcpu_fc_populate_pte_fn_t populate_pte_fn) { static struct vm_struct first_vm; static int smap[2], dmap[2]; @@ -1270,6 +1270,89 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, pcpue_unit_size, pcpue_ptr, NULL); } +/* + * 4k page first chunk setup helper. + */ +static struct page **pcpu4k_pages __initdata; +static int pcpu4k_nr_static_pages __initdata; + +static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) +{ + if (pageno < pcpu4k_nr_static_pages) + return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno]; + return NULL; +} + +/** + * pcpu_4k_first_chunk - map the first chunk using PAGE_SIZE pages + * @static_size: the size of static percpu area in bytes + * @reserved_size: the size of reserved percpu area in bytes + * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE + * @free_fn: funtion to free percpu page, always called with PAGE_SIZE + * @populate_pte_fn: function to populate pte + * + * This is a helper to ease setting up embedded first percpu chunk and + * can be called where pcpu_setup_first_chunk() is expected. + * + * This is the basic allocator. Static percpu area is allocated + * page-by-page into vmalloc area. + * + * RETURNS: + * The determined pcpu_unit_size which can be used to initialize + * percpu access on success, -errno on failure. + */ +ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_populate_pte_fn_t populate_pte_fn) +{ + size_t pages_size; + unsigned int cpu; + int i, j; + ssize_t ret; + + pcpu4k_nr_static_pages = PFN_UP(static_size); + + /* unaligned allocations can't be freed, round up to page size */ + pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() * + sizeof(pcpu4k_pages[0])); + pcpu4k_pages = alloc_bootmem(pages_size); + + /* allocate and copy */ + j = 0; + for_each_possible_cpu(cpu) + for (i = 0; i < pcpu4k_nr_static_pages; i++) { + void *ptr; + + ptr = alloc_fn(cpu, PAGE_SIZE); + if (!ptr) { + pr_warning("PERCPU: failed to allocate " + "4k page for cpu%u\n", cpu); + goto enomem; + } + + memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); + pcpu4k_pages[j++] = virt_to_page(ptr); + } + + /* we're ready, commit */ + pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", + pcpu4k_nr_static_pages, static_size); + + ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, + reserved_size, -1, + -1, NULL, populate_pte_fn); + goto out_free_ar; + +enomem: + while (--j >= 0) + free_fn(page_address(pcpu4k_pages[j]), PAGE_SIZE); + ret = -ENOMEM; +out_free_ar: + free_bootmem(__pa(pcpu4k_pages), pages_size); + return ret; +} + /* * Generic percpu area setup. * -- cgit v1.2.3 From 8c4bfc6e8801616ab2e01c38140b2159b388d2ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:10:59 +0900 Subject: x86,percpu: generalize lpage first chunk allocator Generalize and move x86 setup_pcpu_lpage() into pcpu_lpage_first_chunk(). setup_pcpu_lpage() now is a simple wrapper around the generalized version. Other than taking size parameters and using arch supplied callbacks to allocate/free/map memory, pcpu_lpage_first_chunk() is identical to the original implementation. This simplifies arch code and will help converting more archs to dynamic percpu allocator. While at it, factor out pcpu_calc_fc_sizes() which is common to pcpu_embed_first_chunk() and pcpu_lpage_first_chunk(). [ Impact: code reorganization and generalization ] Signed-off-by: Tejun Heo Cc: Ingo Molnar --- arch/x86/include/asm/percpu.h | 9 -- arch/x86/kernel/setup_percpu.c | 169 +++------------------------------ arch/x86/mm/pageattr.c | 1 + include/linux/percpu.h | 27 ++++++ mm/percpu.c | 209 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 244 insertions(+), 171 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 103f1ddb0d85..a18c038a3079 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -156,15 +156,6 @@ do { \ /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); -#ifdef CONFIG_NEED_MULTIPLE_NODES -void *pcpu_lpage_remapped(void *kaddr); -#else -static inline void *pcpu_lpage_remapped(void *kaddr) -{ - return NULL; -} -#endif - #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ab896b31e80b..4f2e0ac9130b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -137,44 +137,21 @@ static void __init pcpu_fc_free(void *ptr, size_t size) } /* - * Large page remap allocator - * - * This allocator uses PMD page as unit. A PMD page is allocated for - * each cpu and each is remapped into vmalloc area using PMD mapping. - * As PMD page is quite large, only part of it is used for the first - * chunk. Unused part is returned to the bootmem allocator. - * - * So, the PMD pages are mapped twice - once to the physical mapping - * and to the vmalloc area for the first percpu chunk. The double - * mapping does add one more PMD TLB entry pressure but still is much - * better than only using 4k mappings while still being NUMA friendly. + * Large page remapping allocator */ #ifdef CONFIG_NEED_MULTIPLE_NODES -struct pcpul_ent { - unsigned int cpu; - void *ptr; -}; - -static size_t pcpul_size; -static struct pcpul_ent *pcpul_map; -static struct vm_struct pcpul_vm; - -static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) +static void __init pcpul_map(void *ptr, size_t size, void *addr) { - size_t off = (size_t)pageno << PAGE_SHIFT; + pmd_t *pmd, pmd_v; - if (off >= pcpul_size) - return NULL; - - return virt_to_page(pcpul_map[cpu].ptr + off); + pmd = populate_extra_pmd((unsigned long)addr); + pmd_v = pfn_pmd(page_to_pfn(virt_to_page(ptr)), PAGE_KERNEL_LARGE); + set_pmd(pmd, pmd_v); } static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) { - size_t map_size, dyn_size; - unsigned int cpu; - int i, j; - ssize_t ret; + size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; if (!chosen) { size_t vm_size = VMALLOC_END - VMALLOC_START; @@ -198,134 +175,10 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) return -EINVAL; } - /* - * Currently supports only single page. Supporting multiple - * pages won't be too difficult if it ever becomes necessary. - */ - pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + - PERCPU_DYNAMIC_RESERVE); - if (pcpul_size > PMD_SIZE) { - pr_warning("PERCPU: static data is larger than large page, " - "can't use large page\n"); - return -EINVAL; - } - dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; - - /* allocate pointer array and alloc large pages */ - map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); - pcpul_map = alloc_bootmem(map_size); - - for_each_possible_cpu(cpu) { - pcpul_map[cpu].cpu = cpu; - pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, - PMD_SIZE); - if (!pcpul_map[cpu].ptr) { - pr_warning("PERCPU: failed to allocate large page " - "for cpu%u\n", cpu); - goto enomem; - } - - /* - * Only use pcpul_size bytes and give back the rest. - * - * Ingo: The 2MB up-rounding bootmem is needed to make - * sure the partial 2MB page is still fully RAM - it's - * not well-specified to have a PAT-incompatible area - * (unmapped RAM, device memory, etc.) in that hole. - */ - free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), - PMD_SIZE - pcpul_size); - - memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); - } - - /* allocate address and map */ - pcpul_vm.flags = VM_ALLOC; - pcpul_vm.size = num_possible_cpus() * PMD_SIZE; - vm_area_register_early(&pcpul_vm, PMD_SIZE); - - for_each_possible_cpu(cpu) { - pmd_t *pmd, pmd_v; - - pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + - cpu * PMD_SIZE); - pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), - PAGE_KERNEL_LARGE); - set_pmd(pmd, pmd_v); - } - - /* we're ready, commit */ - pr_info("PERCPU: Remapped at %p with large pages, static data " - "%zu bytes\n", pcpul_vm.addr, static_size); - - ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, dyn_size, - PMD_SIZE, pcpul_vm.addr, NULL); - - /* sort pcpul_map array for pcpu_lpage_remapped() */ - for (i = 0; i < num_possible_cpus() - 1; i++) - for (j = i + 1; j < num_possible_cpus(); j++) - if (pcpul_map[i].ptr > pcpul_map[j].ptr) { - struct pcpul_ent tmp = pcpul_map[i]; - pcpul_map[i] = pcpul_map[j]; - pcpul_map[j] = tmp; - } - - return ret; - -enomem: - for_each_possible_cpu(cpu) - if (pcpul_map[cpu].ptr) - free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); - free_bootmem(__pa(pcpul_map), map_size); - return -ENOMEM; -} - -/** - * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area - * @kaddr: the kernel address in question - * - * Determine whether @kaddr falls in the pcpul recycled area. This is - * used by pageattr to detect VM aliases and break up the pcpu PMD - * mapping such that the same physical page is not mapped under - * different attributes. - * - * The recycled area is always at the tail of a partially used PMD - * page. - * - * RETURNS: - * Address of corresponding remapped pcpu address if match is found; - * otherwise, NULL. - */ -void *pcpu_lpage_remapped(void *kaddr) -{ - void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); - unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; - int left = 0, right = num_possible_cpus() - 1; - int pos; - - /* pcpul in use at all? */ - if (!pcpul_map) - return NULL; - - /* okay, perform binary search */ - while (left <= right) { - pos = (left + right) / 2; - - if (pcpul_map[pos].ptr < pmd_addr) - left = pos + 1; - else if (pcpul_map[pos].ptr > pmd_addr) - right = pos - 1; - else { - /* it shouldn't be in the area for the first chunk */ - WARN_ON(offset < pcpul_size); - - return pcpul_vm.addr + - pcpul_map[pos].cpu * PMD_SIZE + offset; - } - } - - return NULL; + return pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + reserve - PERCPU_FIRST_CHUNK_RESERVE, + PMD_SIZE, + pcpu_fc_alloc, pcpu_fc_free, pcpul_map); } #else static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1b734d7a8966..c106f7852424 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 41b5bfab4195..9f6bfd7d4b92 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -62,6 +62,7 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); +typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, @@ -79,6 +80,32 @@ extern ssize_t __init pcpu_4k_first_chunk( pcpu_fc_free_fn_t free_fn, pcpu_fc_populate_pte_fn_t populate_pte_fn); +#ifdef CONFIG_NEED_MULTIPLE_NODES +extern ssize_t __init pcpu_lpage_first_chunk( + size_t static_size, size_t reserved_size, + ssize_t dyn_size, size_t lpage_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_map_fn_t map_fn); + +extern void *pcpu_lpage_remapped(void *kaddr); +#else +static inline ssize_t __init pcpu_lpage_first_chunk( + size_t static_size, size_t reserved_size, + ssize_t dyn_size, size_t lpage_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_map_fn_t map_fn) +{ + return -EINVAL; +} + +static inline void *pcpu_lpage_remapped(void *kaddr) +{ + return NULL; +} +#endif + /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's diff --git a/mm/percpu.c b/mm/percpu.c index f3fe7bc7378f..17db527ee2e2 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1190,6 +1190,19 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, return pcpu_unit_size; } +static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size, + ssize_t *dyn_sizep) +{ + size_t size_sum; + + size_sum = PFN_ALIGN(static_size + reserved_size + + (*dyn_sizep >= 0 ? *dyn_sizep : 0)); + if (*dyn_sizep != 0) + *dyn_sizep = size_sum - static_size - reserved_size; + + return size_sum; +} + /* * Embedding first chunk setup helper. */ @@ -1241,10 +1254,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, unsigned int cpu; /* determine parameters and allocate */ - pcpue_size = PFN_ALIGN(static_size + reserved_size + - (dyn_size >= 0 ? dyn_size : 0)); - if (dyn_size != 0) - dyn_size = pcpue_size - static_size - reserved_size; + pcpue_size = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); chunk_size = pcpue_unit_size * num_possible_cpus(); @@ -1390,6 +1400,197 @@ out_free_ar: return ret; } +/* + * Large page remapping first chunk setup helper + */ +#ifdef CONFIG_NEED_MULTIPLE_NODES +struct pcpul_ent { + unsigned int cpu; + void *ptr; +}; + +static size_t pcpul_size; +static size_t pcpul_unit_size; +static struct pcpul_ent *pcpul_map; +static struct vm_struct pcpul_vm; + +static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) +{ + size_t off = (size_t)pageno << PAGE_SHIFT; + + if (off >= pcpul_size) + return NULL; + + return virt_to_page(pcpul_map[cpu].ptr + off); +} + +/** + * pcpu_lpage_first_chunk - remap the first percpu chunk using large page + * @static_size: the size of static percpu area in bytes + * @reserved_size: the size of reserved percpu area in bytes + * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @lpage_size: the size of a large page + * @alloc_fn: function to allocate percpu lpage, always called with lpage_size + * @free_fn: function to free percpu memory, @size <= lpage_size + * @map_fn: function to map percpu lpage, always called with lpage_size + * + * This allocator uses large page as unit. A large page is allocated + * for each cpu and each is remapped into vmalloc area using large + * page mapping. As large page can be quite large, only part of it is + * used for the first chunk. Unused part is returned to the bootmem + * allocator. + * + * So, the large pages are mapped twice - once to the physical mapping + * and to the vmalloc area for the first percpu chunk. The double + * mapping does add one more large TLB entry pressure but still is + * much better than only using 4k mappings while still being NUMA + * friendly. + * + * RETURNS: + * The determined pcpu_unit_size which can be used to initialize + * percpu access on success, -errno on failure. + */ +ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, + ssize_t dyn_size, size_t lpage_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_map_fn_t map_fn) +{ + size_t size_sum; + size_t map_size; + unsigned int cpu; + int i, j; + ssize_t ret; + + /* + * Currently supports only single page. Supporting multiple + * pages won't be too difficult if it ever becomes necessary. + */ + size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); + + pcpul_unit_size = lpage_size; + pcpul_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); + if (pcpul_size > pcpul_unit_size) { + pr_warning("PERCPU: static data is larger than large page, " + "can't use large page\n"); + return -EINVAL; + } + + /* allocate pointer array and alloc large pages */ + map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); + pcpul_map = alloc_bootmem(map_size); + + for_each_possible_cpu(cpu) { + void *ptr; + + ptr = alloc_fn(cpu, lpage_size); + if (!ptr) { + pr_warning("PERCPU: failed to allocate large page " + "for cpu%u\n", cpu); + goto enomem; + } + + /* + * Only use pcpul_size bytes and give back the rest. + * + * Ingo: The lpage_size up-rounding bootmem is needed + * to make sure the partial lpage is still fully RAM - + * it's not well-specified to have a incompatible area + * (unmapped RAM, device memory, etc.) in that hole. + */ + free_fn(ptr + pcpul_size, lpage_size - pcpul_size); + + pcpul_map[cpu].cpu = cpu; + pcpul_map[cpu].ptr = ptr; + + memcpy(ptr, __per_cpu_load, static_size); + } + + /* allocate address and map */ + pcpul_vm.flags = VM_ALLOC; + pcpul_vm.size = num_possible_cpus() * pcpul_unit_size; + vm_area_register_early(&pcpul_vm, pcpul_unit_size); + + for_each_possible_cpu(cpu) + map_fn(pcpul_map[cpu].ptr, pcpul_unit_size, + pcpul_vm.addr + cpu * pcpul_unit_size); + + /* we're ready, commit */ + pr_info("PERCPU: Remapped at %p with large pages, static data " + "%zu bytes\n", pcpul_vm.addr, static_size); + + ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, + reserved_size, dyn_size, pcpul_unit_size, + pcpul_vm.addr, NULL); + + /* sort pcpul_map array for pcpu_lpage_remapped() */ + for (i = 0; i < num_possible_cpus() - 1; i++) + for (j = i + 1; j < num_possible_cpus(); j++) + if (pcpul_map[i].ptr > pcpul_map[j].ptr) { + struct pcpul_ent tmp = pcpul_map[i]; + pcpul_map[i] = pcpul_map[j]; + pcpul_map[j] = tmp; + } + + return ret; + +enomem: + for_each_possible_cpu(cpu) + if (pcpul_map[cpu].ptr) + free_fn(pcpul_map[cpu].ptr, pcpul_size); + free_bootmem(__pa(pcpul_map), map_size); + return -ENOMEM; +} + +/** + * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area + * @kaddr: the kernel address in question + * + * Determine whether @kaddr falls in the pcpul recycled area. This is + * used by pageattr to detect VM aliases and break up the pcpu large + * page mapping such that the same physical page is not mapped under + * different attributes. + * + * The recycled area is always at the tail of a partially used large + * page. + * + * RETURNS: + * Address of corresponding remapped pcpu address if match is found; + * otherwise, NULL. + */ +void *pcpu_lpage_remapped(void *kaddr) +{ + unsigned long unit_mask = pcpul_unit_size - 1; + void *lpage_addr = (void *)((unsigned long)kaddr & ~unit_mask); + unsigned long offset = (unsigned long)kaddr & unit_mask; + int left = 0, right = num_possible_cpus() - 1; + int pos; + + /* pcpul in use at all? */ + if (!pcpul_map) + return NULL; + + /* okay, perform binary search */ + while (left <= right) { + pos = (left + right) / 2; + + if (pcpul_map[pos].ptr < lpage_addr) + left = pos + 1; + else if (pcpul_map[pos].ptr > lpage_addr) + right = pos - 1; + else { + /* it shouldn't be in the area for the first chunk */ + WARN_ON(offset < pcpul_size); + + return pcpul_vm.addr + + pcpul_map[pos].cpu * pcpul_unit_size + offset; + } + } + + return NULL; +} +#endif + /* * Generic percpu area setup. * -- cgit v1.2.3 From 38a6be525460f52ac6f2de1c3f73c5615a8853cd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:10:59 +0900 Subject: percpu: simplify pcpu_setup_first_chunk() Now that all first chunk allocator helpers allocate and map the first chunk themselves, there's no need to have optional default alloc/map in pcpu_setup_first_chunk(). Drop @populate_pte_fn and only leave @dyn_size optional and make all other params mandatory. This makes it much easier to follow what pcpu_setup_first_chunk() is doing and what actual differences tweaking each parameter results in. [ Impact: drop unused code path ] Signed-off-by: Tejun Heo Cc: Ingo Molnar --- arch/sparc/kernel/smp_64.c | 2 +- include/linux/percpu.h | 5 +-- mm/percpu.c | 104 +++++++++++++-------------------------------- 3 files changed, 33 insertions(+), 78 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index fa44eaf8d897..ccad7b20ae75 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1528,7 +1528,7 @@ void __init setup_per_cpu_areas(void) pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size, PERCPU_MODULE_RESERVE, dyn_size, - PCPU_CHUNK_SIZE, vm.addr, NULL); + PCPU_CHUNK_SIZE, vm.addr); free_bootmem(__pa(pcpur_ptrs), ptrs_size); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 9f6bfd7d4b92..ec64357e1762 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -66,9 +66,8 @@ typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, - ssize_t dyn_size, ssize_t unit_size, - void *base_addr, - pcpu_fc_populate_pte_fn_t populate_pte_fn); + ssize_t dyn_size, size_t unit_size, + void *base_addr); extern ssize_t __init pcpu_embed_first_chunk( size_t static_size, size_t reserved_size, diff --git a/mm/percpu.c b/mm/percpu.c index 17db527ee2e2..21d938a10662 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -983,24 +983,22 @@ EXPORT_SYMBOL_GPL(free_percpu); * pcpu_setup_first_chunk - initialize the first percpu chunk * @get_page_fn: callback to fetch page pointer * @static_size: the size of static percpu area in bytes - * @reserved_size: the size of reserved percpu area in bytes + * @reserved_size: the size of reserved percpu area in bytes, 0 for none * @dyn_size: free size for dynamic allocation in bytes, -1 for auto - * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto - * @base_addr: mapped address, NULL for auto - * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary + * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE + * @base_addr: mapped address * * Initialize the first percpu chunk which contains the kernel static * perpcu area. This function is to be called from arch percpu area - * setup path. The first two parameters are mandatory. The rest are - * optional. + * setup path. * * @get_page_fn() should return pointer to percpu page given cpu * number and page number. It should at least return enough pages to * cover the static area. The returned pages for static area should - * have been initialized with valid data. If @unit_size is specified, - * it can also return pages after the static area. NULL return - * indicates end of pages for the cpu. Note that @get_page_fn() must - * return the same number of pages for all cpus. + * have been initialized with valid data. It can also return pages + * after the static area. NULL return indicates end of pages for the + * cpu. Note that @get_page_fn() must return the same number of pages + * for all cpus. * * @reserved_size, if non-zero, specifies the amount of bytes to * reserve after the static area in the first chunk. This reserves @@ -1015,17 +1013,12 @@ EXPORT_SYMBOL_GPL(free_percpu); * non-negative value makes percpu leave alone the area beyond * @static_size + @reserved_size + @dyn_size. * - * @unit_size, if non-negative, specifies unit size and must be - * aligned to PAGE_SIZE and equal to or larger than @static_size + - * @reserved_size + if non-negative, @dyn_size. - * - * Non-null @base_addr means that the caller already allocated virtual - * region for the first chunk and mapped it. percpu must not mess - * with the chunk. Note that @base_addr with 0 @unit_size or non-NULL - * @populate_pte_fn doesn't make any sense. + * @unit_size specifies unit size and must be aligned to PAGE_SIZE and + * equal to or larger than @static_size + @reserved_size + if + * non-negative, @dyn_size. * - * @populate_pte_fn is used to populate the pagetable. NULL means the - * caller already populated the pagetable. + * The caller should have mapped the first chunk at @base_addr and + * copied static data to each unit. * * If the first chunk ends up with both reserved and dynamic areas, it * is served by two chunks - one to serve the core static and reserved @@ -1040,9 +1033,8 @@ EXPORT_SYMBOL_GPL(free_percpu); */ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, - ssize_t dyn_size, ssize_t unit_size, - void *base_addr, - pcpu_fc_populate_pte_fn_t populate_pte_fn) + ssize_t dyn_size, size_t unit_size, + void *base_addr) { static struct vm_struct first_vm; static int smap[2], dmap[2]; @@ -1050,27 +1042,18 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, (dyn_size >= 0 ? dyn_size : 0); struct pcpu_chunk *schunk, *dchunk = NULL; unsigned int cpu; - int nr_pages; - int err, i; + int i, nr_pages; /* santiy checks */ BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); BUG_ON(!static_size); - if (unit_size >= 0) { - BUG_ON(unit_size < size_sum); - BUG_ON(unit_size & ~PAGE_MASK); - BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); - } else - BUG_ON(base_addr); - BUG_ON(base_addr && populate_pte_fn); - - if (unit_size >= 0) - pcpu_unit_pages = unit_size >> PAGE_SHIFT; - else - pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, - PFN_UP(size_sum)); + BUG_ON(!base_addr); + BUG_ON(unit_size < size_sum); + BUG_ON(unit_size & ~PAGE_MASK); + BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); + pcpu_unit_pages = unit_size >> PAGE_SHIFT; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) @@ -1079,6 +1062,10 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, if (dyn_size < 0) dyn_size = pcpu_unit_size - static_size - reserved_size; + first_vm.flags = VM_ALLOC; + first_vm.size = pcpu_chunk_size; + first_vm.addr = base_addr; + /* * Allocate chunk slots. The additional last slot is for * empty chunks. @@ -1101,6 +1088,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, schunk->map = smap; schunk->map_alloc = ARRAY_SIZE(smap); schunk->page = schunk->page_ar; + schunk->immutable = true; if (reserved_size) { schunk->free_size = reserved_size; @@ -1124,31 +1112,13 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, dchunk->map = dmap; dchunk->map_alloc = ARRAY_SIZE(dmap); dchunk->page = schunk->page_ar; /* share page map with schunk */ + dchunk->immutable = true; dchunk->contig_hint = dchunk->free_size = dyn_size; dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; dchunk->map[dchunk->map_used++] = dchunk->free_size; } - /* allocate vm address */ - first_vm.flags = VM_ALLOC; - first_vm.size = pcpu_chunk_size; - - if (!base_addr) - vm_area_register_early(&first_vm, PAGE_SIZE); - else { - /* - * Pages already mapped. No need to remap into - * vmalloc area. In this case the first chunks can't - * be mapped or unmapped by percpu and are marked - * immutable. - */ - first_vm.addr = base_addr; - schunk->immutable = true; - if (dchunk) - dchunk->immutable = true; - } - /* assign pages */ nr_pages = -1; for_each_possible_cpu(cpu) { @@ -1168,19 +1138,6 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, BUG_ON(nr_pages != i); } - /* map them */ - if (populate_pte_fn) { - for_each_possible_cpu(cpu) - for (i = 0; i < nr_pages; i++) - populate_pte_fn(pcpu_chunk_addr(schunk, - cpu, i)); - - err = pcpu_map(schunk, 0, nr_pages); - if (err) - panic("failed to setup static percpu area, err=%d\n", - err); - } - /* link the first chunk in */ pcpu_first_chunk = dchunk ?: schunk; pcpu_chunk_relocate(pcpu_first_chunk, -1); @@ -1282,7 +1239,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, return pcpu_setup_first_chunk(pcpue_get_page, static_size, reserved_size, dyn_size, - pcpue_unit_size, pcpue_ptr, NULL); + pcpue_unit_size, pcpue_ptr); } /* @@ -1387,8 +1344,7 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, reserved_size, -1, - pcpu4k_unit_pages << PAGE_SHIFT, vm.addr, - NULL); + pcpu4k_unit_pages << PAGE_SHIFT, vm.addr); goto out_free_ar; enomem: @@ -1521,7 +1477,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, reserved_size, dyn_size, pcpul_unit_size, - pcpul_vm.addr, NULL); + pcpul_vm.addr); /* sort pcpul_map array for pcpu_lpage_remapped() */ for (i = 0; i < num_possible_cpus() - 1; i++) -- cgit v1.2.3 From ce3141a277ff6cc37e51008b8888dc2cb7456ef1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:11:00 +0900 Subject: percpu: drop pcpu_chunk->page[] percpu core doesn't need to tack all the allocated pages. It needs to know whether certain pages are populated and a way to reverse map address to page when freeing. This patch drops pcpu_chunk->page[] and use populated bitmap and vmalloc_to_page() lookup instead. Using vmalloc_to_page() exclusively is also possible but complicates first chunk handling, inflates cache footprint and prevents non-standard memory allocation for percpu memory. pcpu_chunk->page[] was used to track each page's allocation and allowed asymmetric population which happens during failure path; however, with single bitmap for all units, this is no longer possible. Bite the bullet and rewrite (de)populate functions so that things are done in clearly separated steps such that asymmetric population doesn't happen. This makes the (de)population process much more modular and will also ease implementing non-standard memory usage in the future (e.g. large pages). This makes @get_page_fn parameter to pcpu_setup_first_chunk() unnecessary. The parameter is dropped and all first chunk helpers are updated accordingly. Please note that despite the volume most changes to first chunk helpers are symbol renames for variables which don't need to be referenced outside of the helper anymore. This change reduces memory usage and cache footprint of pcpu_chunk. Now only #unit_pages bits are necessary per chunk. [ Impact: reduced memory usage and cache footprint for bookkeeping ] Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: David Miller --- arch/sparc/kernel/smp_64.c | 42 ++-- include/linux/percpu.h | 3 +- mm/percpu.c | 604 ++++++++++++++++++++++++++++----------------- 3 files changed, 400 insertions(+), 249 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index ccad7b20ae75..f2f22ee97a7a 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1415,19 +1415,6 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, #endif } -static size_t pcpur_size __initdata; -static void **pcpur_ptrs __initdata; - -static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) -{ - size_t off = (size_t)pageno << PAGE_SHIFT; - - if (off >= pcpur_size) - return NULL; - - return virt_to_page(pcpur_ptrs[cpu] + off); -} - #define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL) static void __init pcpu_map_range(unsigned long start, unsigned long end, @@ -1491,25 +1478,26 @@ void __init setup_per_cpu_areas(void) size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start; static struct vm_struct vm; unsigned long delta, cpu; - size_t pcpu_unit_size; + size_t size_sum, pcpu_unit_size; size_t ptrs_size; + void **ptrs; - pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + - PERCPU_DYNAMIC_RESERVE); - dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE; + size_sum = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + PERCPU_DYNAMIC_RESERVE); + dyn_size = size_sum - static_size - PERCPU_MODULE_RESERVE; - ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); - pcpur_ptrs = alloc_bootmem(ptrs_size); + ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(ptrs[0])); + ptrs = alloc_bootmem(ptrs_size); for_each_possible_cpu(cpu) { - pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE, - PCPU_CHUNK_SIZE); + ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE, + PCPU_CHUNK_SIZE); - free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), - PCPU_CHUNK_SIZE - pcpur_size); + free_bootmem(__pa(ptrs[cpu] + size_sum), + PCPU_CHUNK_SIZE - size_sum); - memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); + memcpy(ptrs[cpu], __per_cpu_load, static_size); } /* allocate address and map */ @@ -1523,14 +1511,14 @@ void __init setup_per_cpu_areas(void) start += cpu * PCPU_CHUNK_SIZE; end = start + PCPU_CHUNK_SIZE; - pcpu_map_range(start, end, virt_to_page(pcpur_ptrs[cpu])); + pcpu_map_range(start, end, virt_to_page(ptrs[cpu])); } - pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size, + pcpu_unit_size = pcpu_setup_first_chunk(static_size, PERCPU_MODULE_RESERVE, dyn_size, PCPU_CHUNK_SIZE, vm.addr); - free_bootmem(__pa(pcpur_ptrs), ptrs_size); + free_bootmem(__pa(ptrs), ptrs_size); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { diff --git a/include/linux/percpu.h b/include/linux/percpu.h index ec64357e1762..63c8b7a23e66 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -58,13 +58,12 @@ extern void *pcpu_base_addr; -typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); -extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, +extern size_t __init pcpu_setup_first_chunk( size_t static_size, size_t reserved_size, ssize_t dyn_size, size_t unit_size, void *base_addr); diff --git a/mm/percpu.c b/mm/percpu.c index 639fce4d2caf..21756814d99f 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -94,8 +94,7 @@ struct pcpu_chunk { int map_alloc; /* # of map entries allocated */ int *map; /* allocation map */ bool immutable; /* no [de]population allowed */ - struct page **page; /* points to page array */ - struct page *page_ar[]; /* #cpus * UNIT_PAGES */ + unsigned long populated[]; /* populated bitmap */ }; static int pcpu_unit_pages __read_mostly; @@ -129,9 +128,9 @@ static int pcpu_reserved_chunk_limit; * Synchronization rules. * * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former - * protects allocation/reclaim paths, chunks and chunk->page arrays. - * The latter is a spinlock and protects the index data structures - - * chunk slots, chunks and area maps in chunks. + * protects allocation/reclaim paths, chunks, populated bitmap and + * vmalloc mapping. The latter is a spinlock and protects the index + * data structures - chunk slots, chunks and area maps in chunks. * * During allocation, pcpu_alloc_mutex is kept locked all the time and * pcpu_lock is grabbed and released as necessary. All actual memory @@ -188,16 +187,13 @@ static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, (pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT); } -static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk, - unsigned int cpu, int page_idx) +static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, + unsigned int cpu, int page_idx) { - return &chunk->page[pcpu_page_idx(cpu, page_idx)]; -} + /* must not be used on pre-mapped chunk */ + WARN_ON(chunk->immutable); -static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, - int page_idx) -{ - return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL; + return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); } /* set the pointer to a chunk in a page struct */ @@ -212,6 +208,34 @@ static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page) return (struct pcpu_chunk *)page->index; } +static void pcpu_next_unpop(struct pcpu_chunk *chunk, int *rs, int *re, int end) +{ + *rs = find_next_zero_bit(chunk->populated, end, *rs); + *re = find_next_bit(chunk->populated, end, *rs + 1); +} + +static void pcpu_next_pop(struct pcpu_chunk *chunk, int *rs, int *re, int end) +{ + *rs = find_next_bit(chunk->populated, end, *rs); + *re = find_next_zero_bit(chunk->populated, end, *rs + 1); +} + +/* + * (Un)populated page region iterators. Iterate over (un)populated + * page regions betwen @start and @end in @chunk. @rs and @re should + * be integer variables and will be set to start and end page index of + * the current region. + */ +#define pcpu_for_each_unpop_region(chunk, rs, re, start, end) \ + for ((rs) = (start), pcpu_next_unpop((chunk), &(rs), &(re), (end)); \ + (rs) < (re); \ + (rs) = (re) + 1, pcpu_next_unpop((chunk), &(rs), &(re), (end))) + +#define pcpu_for_each_pop_region(chunk, rs, re, start, end) \ + for ((rs) = (start), pcpu_next_pop((chunk), &(rs), &(re), (end)); \ + (rs) < (re); \ + (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) + /** * pcpu_mem_alloc - allocate memory * @size: bytes to allocate @@ -545,42 +569,197 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) } /** - * pcpu_unmap - unmap pages out of a pcpu_chunk + * pcpu_get_pages_and_bitmap - get temp pages array and bitmap + * @chunk: chunk of interest + * @bitmapp: output parameter for bitmap + * @may_alloc: may allocate the array + * + * Returns pointer to array of pointers to struct page and bitmap, + * both of which can be indexed with pcpu_page_idx(). The returned + * array is cleared to zero and *@bitmapp is copied from + * @chunk->populated. Note that there is only one array and bitmap + * and access exclusion is the caller's responsibility. + * + * CONTEXT: + * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. + * Otherwise, don't care. + * + * RETURNS: + * Pointer to temp pages array on success, NULL on failure. + */ +static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, + unsigned long **bitmapp, + bool may_alloc) +{ + static struct page **pages; + static unsigned long *bitmap; + size_t pages_size = num_possible_cpus() * pcpu_unit_pages * + sizeof(pages[0]); + size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * + sizeof(unsigned long); + + if (!pages || !bitmap) { + if (may_alloc && !pages) + pages = pcpu_mem_alloc(pages_size); + if (may_alloc && !bitmap) + bitmap = pcpu_mem_alloc(bitmap_size); + if (!pages || !bitmap) + return NULL; + } + + memset(pages, 0, pages_size); + bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); + + *bitmapp = bitmap; + return pages; +} + +/** + * pcpu_free_pages - free pages which were allocated for @chunk + * @chunk: chunk pages were allocated for + * @pages: array of pages to be freed, indexed by pcpu_page_idx() + * @populated: populated bitmap + * @page_start: page index of the first page to be freed + * @page_end: page index of the last page to be freed + 1 + * + * Free pages [@page_start and @page_end) in @pages for all units. + * The pages were allocated for @chunk. + */ +static void pcpu_free_pages(struct pcpu_chunk *chunk, + struct page **pages, unsigned long *populated, + int page_start, int page_end) +{ + unsigned int cpu; + int i; + + for_each_possible_cpu(cpu) { + for (i = page_start; i < page_end; i++) { + struct page *page = pages[pcpu_page_idx(cpu, i)]; + + if (page) + __free_page(page); + } + } +} + +/** + * pcpu_alloc_pages - allocates pages for @chunk + * @chunk: target chunk + * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() + * @populated: populated bitmap + * @page_start: page index of the first page to be allocated + * @page_end: page index of the last page to be allocated + 1 + * + * Allocate pages [@page_start,@page_end) into @pages for all units. + * The allocation is for @chunk. Percpu core doesn't care about the + * content of @pages and will pass it verbatim to pcpu_map_pages(). + */ +static int pcpu_alloc_pages(struct pcpu_chunk *chunk, + struct page **pages, unsigned long *populated, + int page_start, int page_end) +{ + const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; + unsigned int cpu; + int i; + + for_each_possible_cpu(cpu) { + for (i = page_start; i < page_end; i++) { + struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; + + *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); + if (!*pagep) { + pcpu_free_pages(chunk, pages, populated, + page_start, page_end); + return -ENOMEM; + } + } + } + return 0; +} + +/** + * pcpu_pre_unmap_flush - flush cache prior to unmapping + * @chunk: chunk the regions to be flushed belongs to + * @page_start: page index of the first page to be flushed + * @page_end: page index of the last page to be flushed + 1 + * + * Pages in [@page_start,@page_end) of @chunk are about to be + * unmapped. Flush cache. As each flushing trial can be very + * expensive, issue flush on the whole region at once rather than + * doing it for each cpu. This could be an overkill but is more + * scalable. + */ +static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, + int page_start, int page_end) +{ + unsigned int last = num_possible_cpus() - 1; + + flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), + pcpu_chunk_addr(chunk, last, page_end)); +} + +static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) +{ + unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); +} + +/** + * pcpu_unmap_pages - unmap pages out of a pcpu_chunk * @chunk: chunk of interest + * @pages: pages array which can be used to pass information to free + * @populated: populated bitmap * @page_start: page index of the first page to unmap * @page_end: page index of the last page to unmap + 1 - * @flush_tlb: whether to flush tlb or not * * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. - * If @flush is true, vcache is flushed before unmapping and tlb - * after. + * Corresponding elements in @pages were cleared by the caller and can + * be used to carry information to pcpu_free_pages() which will be + * called after all unmaps are finished. The caller should call + * proper pre/post flush functions. */ -static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, - bool flush_tlb) +static void pcpu_unmap_pages(struct pcpu_chunk *chunk, + struct page **pages, unsigned long *populated, + int page_start, int page_end) { - unsigned int last = num_possible_cpus() - 1; unsigned int cpu; + int i; - /* unmap must not be done on immutable chunk */ - WARN_ON(chunk->immutable); + for_each_possible_cpu(cpu) { + for (i = page_start; i < page_end; i++) { + struct page *page; - /* - * Each flushing trial can be very expensive, issue flush on - * the whole region at once rather than doing it for each cpu. - * This could be an overkill but is more scalable. - */ - flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), - pcpu_chunk_addr(chunk, last, page_end)); + page = pcpu_chunk_page(chunk, cpu, i); + WARN_ON(!page); + pages[pcpu_page_idx(cpu, i)] = page; + } + __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), + page_end - page_start); + } - for_each_possible_cpu(cpu) - unmap_kernel_range_noflush( - pcpu_chunk_addr(chunk, cpu, page_start), - (page_end - page_start) << PAGE_SHIFT); - - /* ditto as flush_cache_vunmap() */ - if (flush_tlb) - flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), - pcpu_chunk_addr(chunk, last, page_end)); + for (i = page_start; i < page_end; i++) + __clear_bit(i, populated); +} + +/** + * pcpu_post_unmap_tlb_flush - flush TLB after unmapping + * @chunk: pcpu_chunk the regions to be flushed belong to + * @page_start: page index of the first page to be flushed + * @page_end: page index of the last page to be flushed + 1 + * + * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush + * TLB for the regions. This can be skipped if the area is to be + * returned to vmalloc as vmalloc will handle TLB flushing lazily. + * + * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once + * for the whole region. + */ +static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, + int page_start, int page_end) +{ + unsigned int last = num_possible_cpus() - 1; + + flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), + pcpu_chunk_addr(chunk, last, page_end)); } static int __pcpu_map_pages(unsigned long addr, struct page **pages, @@ -591,35 +770,76 @@ static int __pcpu_map_pages(unsigned long addr, struct page **pages, } /** - * pcpu_map - map pages into a pcpu_chunk + * pcpu_map_pages - map pages into a pcpu_chunk * @chunk: chunk of interest + * @pages: pages array containing pages to be mapped + * @populated: populated bitmap * @page_start: page index of the first page to map * @page_end: page index of the last page to map + 1 * - * For each cpu, map pages [@page_start,@page_end) into @chunk. - * vcache is flushed afterwards. + * For each cpu, map pages [@page_start,@page_end) into @chunk. The + * caller is responsible for calling pcpu_post_map_flush() after all + * mappings are complete. + * + * This function is responsible for setting corresponding bits in + * @chunk->populated bitmap and whatever is necessary for reverse + * lookup (addr -> chunk). */ -static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) +static int pcpu_map_pages(struct pcpu_chunk *chunk, + struct page **pages, unsigned long *populated, + int page_start, int page_end) { - unsigned int last = num_possible_cpus() - 1; - unsigned int cpu; - int err; - - /* map must not be done on immutable chunk */ - WARN_ON(chunk->immutable); + unsigned int cpu, tcpu; + int i, err; for_each_possible_cpu(cpu) { err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), - pcpu_chunk_pagep(chunk, cpu, page_start), + &pages[pcpu_page_idx(cpu, page_start)], page_end - page_start); if (err < 0) - return err; + goto err; } + /* mapping successful, link chunk and mark populated */ + for (i = page_start; i < page_end; i++) { + for_each_possible_cpu(cpu) + pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], + chunk); + __set_bit(i, populated); + } + + return 0; + +err: + for_each_possible_cpu(tcpu) { + if (tcpu == cpu) + break; + __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), + page_end - page_start); + } + return err; +} + +/** + * pcpu_post_map_flush - flush cache after mapping + * @chunk: pcpu_chunk the regions to be flushed belong to + * @page_start: page index of the first page to be flushed + * @page_end: page index of the last page to be flushed + 1 + * + * Pages [@page_start,@page_end) of @chunk have been mapped. Flush + * cache. + * + * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once + * for the whole region. + */ +static void pcpu_post_map_flush(struct pcpu_chunk *chunk, + int page_start, int page_end) +{ + unsigned int last = num_possible_cpus() - 1; + /* flush at once, please read comments in pcpu_unmap() */ flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start), pcpu_chunk_addr(chunk, last, page_end)); - return 0; } /** @@ -636,39 +856,45 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) * CONTEXT: * pcpu_alloc_mutex. */ -static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, - bool flush) +static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) { int page_start = PFN_DOWN(off); int page_end = PFN_UP(off + size); - int unmap_start = -1; - int uninitialized_var(unmap_end); - unsigned int cpu; - int i; + struct page **pages; + unsigned long *populated; + int rs, re; + + /* quick path, check whether it's empty already */ + pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { + if (rs == page_start && re == page_end) + return; + break; + } - for (i = page_start; i < page_end; i++) { - for_each_possible_cpu(cpu) { - struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i); + /* immutable chunks can't be depopulated */ + WARN_ON(chunk->immutable); - if (!*pagep) - continue; + /* + * If control reaches here, there must have been at least one + * successful population attempt so the temp pages array must + * be available now. + */ + pages = pcpu_get_pages_and_bitmap(chunk, &populated, false); + BUG_ON(!pages); - __free_page(*pagep); + /* unmap and free */ + pcpu_pre_unmap_flush(chunk, page_start, page_end); - /* - * If it's partial depopulation, it might get - * populated or depopulated again. Mark the - * page gone. - */ - *pagep = NULL; + pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) + pcpu_unmap_pages(chunk, pages, populated, rs, re); - unmap_start = unmap_start < 0 ? i : unmap_start; - unmap_end = i + 1; - } - } + /* no need to flush tlb, vmalloc will handle it lazily */ + + pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) + pcpu_free_pages(chunk, pages, populated, rs, re); - if (unmap_start >= 0) - pcpu_unmap(chunk, unmap_start, unmap_end, flush); + /* commit new bitmap */ + bitmap_copy(chunk->populated, populated, pcpu_unit_pages); } /** @@ -685,50 +911,61 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, */ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) { - const gfp_t alloc_mask = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; int page_start = PFN_DOWN(off); int page_end = PFN_UP(off + size); - int map_start = -1; - int uninitialized_var(map_end); + int free_end = page_start, unmap_end = page_start; + struct page **pages; + unsigned long *populated; unsigned int cpu; - int i; + int rs, re, rc; - for (i = page_start; i < page_end; i++) { - if (pcpu_chunk_page_occupied(chunk, i)) { - if (map_start >= 0) { - if (pcpu_map(chunk, map_start, map_end)) - goto err; - map_start = -1; - } - continue; - } + /* quick path, check whether all pages are already there */ + pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) { + if (rs == page_start && re == page_end) + goto clear; + break; + } - map_start = map_start < 0 ? i : map_start; - map_end = i + 1; + /* need to allocate and map pages, this chunk can't be immutable */ + WARN_ON(chunk->immutable); - for_each_possible_cpu(cpu) { - struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i); + pages = pcpu_get_pages_and_bitmap(chunk, &populated, true); + if (!pages) + return -ENOMEM; - *pagep = alloc_pages_node(cpu_to_node(cpu), - alloc_mask, 0); - if (!*pagep) - goto err; - pcpu_set_page_chunk(*pagep, chunk); - } + /* alloc and map */ + pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { + rc = pcpu_alloc_pages(chunk, pages, populated, rs, re); + if (rc) + goto err_free; + free_end = re; } - if (map_start >= 0 && pcpu_map(chunk, map_start, map_end)) - goto err; + pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { + rc = pcpu_map_pages(chunk, pages, populated, rs, re); + if (rc) + goto err_unmap; + unmap_end = re; + } + pcpu_post_map_flush(chunk, page_start, page_end); + /* commit new bitmap */ + bitmap_copy(chunk->populated, populated, pcpu_unit_pages); +clear: for_each_possible_cpu(cpu) memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0, size); - return 0; -err: - /* likely under heavy memory pressure, give memory back */ - pcpu_depopulate_chunk(chunk, off, size, true); - return -ENOMEM; + +err_unmap: + pcpu_pre_unmap_flush(chunk, page_start, unmap_end); + pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end) + pcpu_unmap_pages(chunk, pages, populated, rs, re); + pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end); +err_free: + pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end) + pcpu_free_pages(chunk, pages, populated, rs, re); + return rc; } static void free_pcpu_chunk(struct pcpu_chunk *chunk) @@ -752,7 +989,6 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); chunk->map_alloc = PCPU_DFL_MAP_ALLOC; chunk->map[chunk->map_used++] = pcpu_unit_size; - chunk->page = chunk->page_ar; chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); if (!chunk->vm) { @@ -933,7 +1169,7 @@ static void pcpu_reclaim(struct work_struct *work) mutex_unlock(&pcpu_alloc_mutex); list_for_each_entry_safe(chunk, next, &todo, list) { - pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); + pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size); free_pcpu_chunk(chunk); } } @@ -981,7 +1217,6 @@ EXPORT_SYMBOL_GPL(free_percpu); /** * pcpu_setup_first_chunk - initialize the first percpu chunk - * @get_page_fn: callback to fetch page pointer * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes, 0 for none * @dyn_size: free size for dynamic allocation in bytes, -1 for auto @@ -992,14 +1227,6 @@ EXPORT_SYMBOL_GPL(free_percpu); * perpcu area. This function is to be called from arch percpu area * setup path. * - * @get_page_fn() should return pointer to percpu page given cpu - * number and page number. It should at least return enough pages to - * cover the static area. The returned pages for static area should - * have been initialized with valid data. It can also return pages - * after the static area. NULL return indicates end of pages for the - * cpu. Note that @get_page_fn() must return the same number of pages - * for all cpus. - * * @reserved_size, if non-zero, specifies the amount of bytes to * reserve after the static area in the first chunk. This reserves * the first chunk such that it's available only through reserved @@ -1031,8 +1258,7 @@ EXPORT_SYMBOL_GPL(free_percpu); * The determined pcpu_unit_size which can be used to initialize * percpu access. */ -size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, size_t reserved_size, +size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, ssize_t dyn_size, size_t unit_size, void *base_addr) { @@ -1041,8 +1267,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t size_sum = static_size + reserved_size + (dyn_size >= 0 ? dyn_size : 0); struct pcpu_chunk *schunk, *dchunk = NULL; - unsigned int cpu; - int i, nr_pages; + int i; /* santiy checks */ BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || @@ -1056,8 +1281,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, pcpu_unit_pages = unit_size >> PAGE_SHIFT; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; - pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) - + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); + pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + + BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); if (dyn_size < 0) dyn_size = pcpu_unit_size - static_size - reserved_size; @@ -1087,8 +1312,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, schunk->vm = &first_vm; schunk->map = smap; schunk->map_alloc = ARRAY_SIZE(smap); - schunk->page = schunk->page_ar; schunk->immutable = true; + bitmap_fill(schunk->populated, pcpu_unit_pages); if (reserved_size) { schunk->free_size = reserved_size; @@ -1106,38 +1331,19 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, /* init dynamic chunk if necessary */ if (dyn_size) { - dchunk = alloc_bootmem(sizeof(struct pcpu_chunk)); + dchunk = alloc_bootmem(pcpu_chunk_struct_size); INIT_LIST_HEAD(&dchunk->list); dchunk->vm = &first_vm; dchunk->map = dmap; dchunk->map_alloc = ARRAY_SIZE(dmap); - dchunk->page = schunk->page_ar; /* share page map with schunk */ dchunk->immutable = true; + bitmap_fill(dchunk->populated, pcpu_unit_pages); dchunk->contig_hint = dchunk->free_size = dyn_size; dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; dchunk->map[dchunk->map_used++] = dchunk->free_size; } - /* assign pages */ - nr_pages = -1; - for_each_possible_cpu(cpu) { - for (i = 0; i < pcpu_unit_pages; i++) { - struct page *page = get_page_fn(cpu, i); - - if (!page) - break; - *pcpu_chunk_pagep(schunk, cpu, i) = page; - } - - BUG_ON(i < PFN_UP(static_size)); - - if (nr_pages < 0) - nr_pages = i; - else - BUG_ON(nr_pages != i); - } - /* link the first chunk in */ pcpu_first_chunk = dchunk ?: schunk; pcpu_chunk_relocate(pcpu_first_chunk, -1); @@ -1160,23 +1366,6 @@ static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size, return size_sum; } -/* - * Embedding first chunk setup helper. - */ -static void *pcpue_ptr __initdata; -static size_t pcpue_size __initdata; -static size_t pcpue_unit_size __initdata; - -static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) -{ - size_t off = (size_t)pageno << PAGE_SHIFT; - - if (off >= pcpue_size) - return NULL; - - return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); -} - /** * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem * @static_size: the size of static percpu area in bytes @@ -1207,18 +1396,19 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, ssize_t dyn_size) { - size_t chunk_size; + size_t size_sum, unit_size, chunk_size; + void *base; unsigned int cpu; /* determine parameters and allocate */ - pcpue_size = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); + size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); - pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); - chunk_size = pcpue_unit_size * num_possible_cpus(); + unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); + chunk_size = unit_size * num_possible_cpus(); - pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, - __pa(MAX_DMA_ADDRESS)); - if (!pcpue_ptr) { + base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, + __pa(MAX_DMA_ADDRESS)); + if (!base) { pr_warning("PERCPU: failed to allocate %zu bytes for " "embedding\n", chunk_size); return -ENOMEM; @@ -1226,33 +1416,18 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, /* return the leftover and copy */ for_each_possible_cpu(cpu) { - void *ptr = pcpue_ptr + cpu * pcpue_unit_size; + void *ptr = base + cpu * unit_size; - free_bootmem(__pa(ptr + pcpue_size), - pcpue_unit_size - pcpue_size); + free_bootmem(__pa(ptr + size_sum), unit_size - size_sum); memcpy(ptr, __per_cpu_load, static_size); } /* we're ready, commit */ pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", - pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); + size_sum >> PAGE_SHIFT, base, static_size); - return pcpu_setup_first_chunk(pcpue_get_page, static_size, - reserved_size, dyn_size, - pcpue_unit_size, pcpue_ptr); -} - -/* - * 4k page first chunk setup helper. - */ -static struct page **pcpu4k_pages __initdata; -static int pcpu4k_unit_pages __initdata; - -static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) -{ - if (pageno < pcpu4k_unit_pages) - return pcpu4k_pages[cpu * pcpu4k_unit_pages + pageno]; - return NULL; + return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, + unit_size, base); } /** @@ -1279,23 +1454,25 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, pcpu_fc_populate_pte_fn_t populate_pte_fn) { static struct vm_struct vm; + int unit_pages; size_t pages_size; + struct page **pages; unsigned int cpu; int i, j; ssize_t ret; - pcpu4k_unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size, - PCPU_MIN_UNIT_SIZE)); + unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size, + PCPU_MIN_UNIT_SIZE)); /* unaligned allocations can't be freed, round up to page size */ - pages_size = PFN_ALIGN(pcpu4k_unit_pages * num_possible_cpus() * - sizeof(pcpu4k_pages[0])); - pcpu4k_pages = alloc_bootmem(pages_size); + pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() * + sizeof(pages[0])); + pages = alloc_bootmem(pages_size); /* allocate pages */ j = 0; for_each_possible_cpu(cpu) - for (i = 0; i < pcpu4k_unit_pages; i++) { + for (i = 0; i < unit_pages; i++) { void *ptr; ptr = alloc_fn(cpu, PAGE_SIZE); @@ -1304,25 +1481,24 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, "4k page for cpu%u\n", cpu); goto enomem; } - pcpu4k_pages[j++] = virt_to_page(ptr); + pages[j++] = virt_to_page(ptr); } /* allocate vm area, map the pages and copy static data */ vm.flags = VM_ALLOC; - vm.size = num_possible_cpus() * pcpu4k_unit_pages << PAGE_SHIFT; + vm.size = num_possible_cpus() * unit_pages << PAGE_SHIFT; vm_area_register_early(&vm, PAGE_SIZE); for_each_possible_cpu(cpu) { unsigned long unit_addr = (unsigned long)vm.addr + - (cpu * pcpu4k_unit_pages << PAGE_SHIFT); + (cpu * unit_pages << PAGE_SHIFT); - for (i = 0; i < pcpu4k_unit_pages; i++) + for (i = 0; i < unit_pages; i++) populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); /* pte already populated, the following shouldn't fail */ - ret = __pcpu_map_pages(unit_addr, - &pcpu4k_pages[cpu * pcpu4k_unit_pages], - pcpu4k_unit_pages); + ret = __pcpu_map_pages(unit_addr, &pages[cpu * unit_pages], + unit_pages); if (ret < 0) panic("failed to map percpu area, err=%zd\n", ret); @@ -1340,19 +1516,18 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, /* we're ready, commit */ pr_info("PERCPU: %d 4k pages per cpu, static data %zu bytes\n", - pcpu4k_unit_pages, static_size); + unit_pages, static_size); - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, - reserved_size, -1, - pcpu4k_unit_pages << PAGE_SHIFT, vm.addr); + ret = pcpu_setup_first_chunk(static_size, reserved_size, -1, + unit_pages << PAGE_SHIFT, vm.addr); goto out_free_ar; enomem: while (--j >= 0) - free_fn(page_address(pcpu4k_pages[j]), PAGE_SIZE); + free_fn(page_address(pages[j]), PAGE_SIZE); ret = -ENOMEM; out_free_ar: - free_bootmem(__pa(pcpu4k_pages), pages_size); + free_bootmem(__pa(pages), pages_size); return ret; } @@ -1370,16 +1545,6 @@ static size_t pcpul_unit_size; static struct pcpul_ent *pcpul_map; static struct vm_struct pcpul_vm; -static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) -{ - size_t off = (size_t)pageno << PAGE_SHIFT; - - if (off >= pcpul_size) - return NULL; - - return virt_to_page(pcpul_map[cpu].ptr + off); -} - /** * pcpu_lpage_first_chunk - remap the first percpu chunk using large page * @static_size: the size of static percpu area in bytes @@ -1475,9 +1640,8 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, pr_info("PERCPU: Remapped at %p with large pages, static data " "%zu bytes\n", pcpul_vm.addr, static_size); - ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, - reserved_size, dyn_size, pcpul_unit_size, - pcpul_vm.addr); + ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, + pcpul_unit_size, pcpul_vm.addr); /* sort pcpul_map array for pcpu_lpage_remapped() */ for (i = 0; i < num_possible_cpus() - 1; i++) -- cgit v1.2.3 From 2f39e637ea240efb74cf807d31c93a71a0b89174 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:11:00 +0900 Subject: percpu: allow non-linear / sparse cpu -> unit mapping Currently cpu and unit are always identity mapped. To allow more efficient large page support on NUMA and lazy allocation for possible but offline cpus, cpu -> unit mapping needs to be non-linear and/or sparse. This can be easily implemented by adding a cpu -> unit mapping array and using it whenever looking up the matching unit for a cpu. The only unusal conversion is in pcpu_chunk_addr_search(). The passed in address is unit0 based and unit0 might not be in use so it needs to be converted to address of an in-use unit. This is easily done by adding the unit offset for the current processor. [ Impact: allows non-linear/sparse cpu -> unit mapping, no visible change yet ] Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: David Miller --- arch/sparc/kernel/smp_64.c | 2 +- include/linux/percpu.h | 3 +- mm/percpu.c | 129 +++++++++++++++++++++++++++++++++------------ 3 files changed, 97 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index f2f22ee97a7a..6970333b48b8 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1516,7 +1516,7 @@ void __init setup_per_cpu_areas(void) pcpu_unit_size = pcpu_setup_first_chunk(static_size, PERCPU_MODULE_RESERVE, dyn_size, - PCPU_CHUNK_SIZE, vm.addr); + PCPU_CHUNK_SIZE, vm.addr, NULL); free_bootmem(__pa(ptrs), ptrs_size); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 63c8b7a23e66..1e0e8878dc2a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -57,6 +57,7 @@ #endif extern void *pcpu_base_addr; +extern const int *pcpu_unit_map; typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); @@ -66,7 +67,7 @@ typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); extern size_t __init pcpu_setup_first_chunk( size_t static_size, size_t reserved_size, ssize_t dyn_size, size_t unit_size, - void *base_addr); + void *base_addr, const int *unit_map); extern ssize_t __init pcpu_embed_first_chunk( size_t static_size, size_t reserved_size, diff --git a/mm/percpu.c b/mm/percpu.c index 21756814d99f..2196fae24f00 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -8,12 +8,13 @@ * * This is percpu allocator which can handle both static and dynamic * areas. Percpu areas are allocated in chunks in vmalloc area. Each - * chunk is consisted of num_possible_cpus() units and the first chunk - * is used for static percpu variables in the kernel image (special - * boot time alloc/init handling necessary as these areas need to be - * brought up before allocation services are running). Unit grows as - * necessary and all units grow or shrink in unison. When a chunk is - * filled up, another chunk is allocated. ie. in vmalloc area + * chunk is consisted of boot-time determined number of units and the + * first chunk is used for static percpu variables in the kernel image + * (special boot time alloc/init handling necessary as these areas + * need to be brought up before allocation services are running). + * Unit grows as necessary and all units grow or shrink in unison. + * When a chunk is filled up, another chunk is allocated. ie. in + * vmalloc area * * c0 c1 c2 * ------------------- ------------------- ------------ @@ -22,11 +23,13 @@ * * Allocation is done in offset-size areas of single unit space. Ie, * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0, - * c1:u1, c1:u2 and c1:u3. Percpu access can be done by configuring - * percpu base registers pcpu_unit_size apart. + * c1:u1, c1:u2 and c1:u3. On UMA, units corresponds directly to + * cpus. On NUMA, the mapping can be non-linear and even sparse. + * Percpu access can be done by configuring percpu base registers + * according to cpu to unit mapping and pcpu_unit_size. * - * There are usually many small percpu allocations many of them as - * small as 4 bytes. The allocator organizes chunks into lists + * There are usually many small percpu allocations many of them being + * as small as 4 bytes. The allocator organizes chunks into lists * according to free size and tries to allocate from the fullest one. * Each chunk keeps the maximum contiguous area size hint which is * guaranteed to be eqaul to or larger than the maximum contiguous @@ -99,14 +102,22 @@ struct pcpu_chunk { static int pcpu_unit_pages __read_mostly; static int pcpu_unit_size __read_mostly; +static int pcpu_nr_units __read_mostly; static int pcpu_chunk_size __read_mostly; static int pcpu_nr_slots __read_mostly; static size_t pcpu_chunk_struct_size __read_mostly; +/* cpus with the lowest and highest unit numbers */ +static unsigned int pcpu_first_unit_cpu __read_mostly; +static unsigned int pcpu_last_unit_cpu __read_mostly; + /* the address of the first chunk which starts with the kernel static area */ void *pcpu_base_addr __read_mostly; EXPORT_SYMBOL_GPL(pcpu_base_addr); +/* cpu -> unit map */ +const int *pcpu_unit_map __read_mostly; + /* * The first chunk which always exists. Note that unlike other * chunks, this one can be allocated and mapped in several different @@ -177,7 +188,7 @@ static int pcpu_chunk_slot(const struct pcpu_chunk *chunk) static int pcpu_page_idx(unsigned int cpu, int page_idx) { - return cpu * pcpu_unit_pages + page_idx; + return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx; } static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, @@ -321,6 +332,14 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) return pcpu_first_chunk; } + /* + * The address is relative to unit0 which might be unused and + * thus unmapped. Offset the address to the unit space of the + * current processor before looking it up in the vmalloc + * space. Note that any possible cpu id can be used here, so + * there's no need to worry about preemption or cpu hotplug. + */ + addr += pcpu_unit_map[smp_processor_id()] * pcpu_unit_size; return pcpu_get_page_chunk(vmalloc_to_page(addr)); } @@ -593,8 +612,7 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, { static struct page **pages; static unsigned long *bitmap; - size_t pages_size = num_possible_cpus() * pcpu_unit_pages * - sizeof(pages[0]); + size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); @@ -692,10 +710,9 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk, static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, int page_start, int page_end) { - unsigned int last = num_possible_cpus() - 1; - - flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), - pcpu_chunk_addr(chunk, last, page_end)); + flush_cache_vunmap( + pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), + pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); } static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) @@ -756,10 +773,9 @@ static void pcpu_unmap_pages(struct pcpu_chunk *chunk, static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, int page_start, int page_end) { - unsigned int last = num_possible_cpus() - 1; - - flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), - pcpu_chunk_addr(chunk, last, page_end)); + flush_tlb_kernel_range( + pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), + pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); } static int __pcpu_map_pages(unsigned long addr, struct page **pages, @@ -835,11 +851,9 @@ err: static void pcpu_post_map_flush(struct pcpu_chunk *chunk, int page_start, int page_end) { - unsigned int last = num_possible_cpus() - 1; - - /* flush at once, please read comments in pcpu_unmap() */ - flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start), - pcpu_chunk_addr(chunk, last, page_end)); + flush_cache_vmap( + pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), + pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); } /** @@ -953,8 +967,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) bitmap_copy(chunk->populated, populated, pcpu_unit_pages); clear: for_each_possible_cpu(cpu) - memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0, - size); + memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); return 0; err_unmap: @@ -1088,6 +1101,7 @@ area_found: mutex_unlock(&pcpu_alloc_mutex); + /* return address relative to unit0 */ return __addr_to_pcpu_ptr(chunk->vm->addr + off); fail_unlock: @@ -1222,6 +1236,7 @@ EXPORT_SYMBOL_GPL(free_percpu); * @dyn_size: free size for dynamic allocation in bytes, -1 for auto * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE * @base_addr: mapped address + * @unit_map: cpu -> unit map, NULL for sequential mapping * * Initialize the first percpu chunk which contains the kernel static * perpcu area. This function is to be called from arch percpu area @@ -1260,16 +1275,17 @@ EXPORT_SYMBOL_GPL(free_percpu); */ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, ssize_t dyn_size, size_t unit_size, - void *base_addr) + void *base_addr, const int *unit_map) { static struct vm_struct first_vm; static int smap[2], dmap[2]; size_t size_sum = static_size + reserved_size + (dyn_size >= 0 ? dyn_size : 0); struct pcpu_chunk *schunk, *dchunk = NULL; + unsigned int cpu, tcpu; int i; - /* santiy checks */ + /* sanity checks */ BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); BUG_ON(!static_size); @@ -1278,9 +1294,52 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, BUG_ON(unit_size & ~PAGE_MASK); BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); + /* determine number of units and verify and initialize pcpu_unit_map */ + if (unit_map) { + int first_unit = INT_MAX, last_unit = INT_MIN; + + for_each_possible_cpu(cpu) { + int unit = unit_map[cpu]; + + BUG_ON(unit < 0); + for_each_possible_cpu(tcpu) { + if (tcpu == cpu) + break; + /* the mapping should be one-to-one */ + BUG_ON(unit_map[tcpu] == unit); + } + + if (unit < first_unit) { + pcpu_first_unit_cpu = cpu; + first_unit = unit; + } + if (unit > last_unit) { + pcpu_last_unit_cpu = cpu; + last_unit = unit; + } + } + pcpu_nr_units = last_unit + 1; + pcpu_unit_map = unit_map; + } else { + int *identity_map; + + /* #units == #cpus, identity mapped */ + identity_map = alloc_bootmem(num_possible_cpus() * + sizeof(identity_map[0])); + + for_each_possible_cpu(cpu) + identity_map[cpu] = cpu; + + pcpu_first_unit_cpu = 0; + pcpu_last_unit_cpu = pcpu_nr_units - 1; + pcpu_nr_units = num_possible_cpus(); + pcpu_unit_map = identity_map; + } + + /* determine basic parameters */ pcpu_unit_pages = unit_size >> PAGE_SHIFT; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; - pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; + pcpu_chunk_size = pcpu_nr_units * pcpu_unit_size; pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); @@ -1349,7 +1408,7 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, pcpu_chunk_relocate(pcpu_first_chunk, -1); /* we're done */ - pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); + pcpu_base_addr = schunk->vm->addr; return pcpu_unit_size; } @@ -1427,7 +1486,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, size_sum >> PAGE_SHIFT, base, static_size); return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, - unit_size, base); + unit_size, base, NULL); } /** @@ -1519,7 +1578,7 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, unit_pages, static_size); ret = pcpu_setup_first_chunk(static_size, reserved_size, -1, - unit_pages << PAGE_SHIFT, vm.addr); + unit_pages << PAGE_SHIFT, vm.addr, NULL); goto out_free_ar; enomem: @@ -1641,7 +1700,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, "%zu bytes\n", pcpul_vm.addr, static_size); ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, - pcpul_unit_size, pcpul_vm.addr); + pcpul_unit_size, pcpul_vm.addr, NULL); /* sort pcpul_map array for pcpu_lpage_remapped() */ for (i = 0; i < num_possible_cpus() - 1; i++) -- cgit v1.2.3 From a530b7958612bafe2027e21359083dba84f0b3b4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:11:00 +0900 Subject: percpu: teach large page allocator about NUMA Large page first chunk allocator is primarily used for NUMA machines; however, its NUMA handling is extremely simplistic. Regardless of their proximity, each cpu is put into separate large page just to return most of the allocated space back wasting large amount of vmalloc space and increasing cache footprint. This patch teachs NUMA details to large page allocator. Given processor proximity information, pcpu_lpage_build_unit_map() will find fitting cpu -> unit mapping in which cpus in LOCAL_DISTANCE share the same large page and not too much virtual address space is wasted. This greatly reduces the unit and thus chunk size and wastes much less address space for the first chunk. For example, on 4/4 NUMA machine, the original code occupied 16MB of virtual space for the first chunk while the new code only uses 4MB - one 2MB page for each node. [ Impact: much better space efficiency on NUMA machines ] Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: Jan Beulich Cc: Andi Kleen Cc: David Miller --- arch/x86/kernel/setup_percpu.c | 72 +++++++-- include/linux/percpu.h | 24 ++- mm/percpu.c | 358 ++++++++++++++++++++++++++++++++--------- 3 files changed, 359 insertions(+), 95 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 4f2e0ac9130b..7501bb14bd51 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -149,36 +149,73 @@ static void __init pcpul_map(void *ptr, size_t size, void *addr) set_pmd(pmd, pmd_v); } +static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to) +{ + if (early_cpu_to_node(from) == early_cpu_to_node(to)) + return LOCAL_DISTANCE; + else + return REMOTE_DISTANCE; +} + static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; + size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; + size_t unit_map_size, unit_size; + int *unit_map; + int nr_units; + ssize_t ret; + + /* on non-NUMA, embedding is better */ + if (!chosen && !pcpu_need_numa()) + return -EINVAL; + + /* need PSE */ + if (!cpu_has_pse) { + pr_warning("PERCPU: lpage allocator requires PSE\n"); + return -EINVAL; + } + /* allocate and build unit_map */ + unit_map_size = num_possible_cpus() * sizeof(int); + unit_map = alloc_bootmem_nopanic(unit_map_size); + if (!unit_map) { + pr_warning("PERCPU: failed to allocate unit_map\n"); + return -ENOMEM; + } + + ret = pcpu_lpage_build_unit_map(static_size, + PERCPU_FIRST_CHUNK_RESERVE, + &dyn_size, &unit_size, PMD_SIZE, + unit_map, pcpu_lpage_cpu_distance); + if (ret < 0) { + pr_warning("PERCPU: failed to build unit_map\n"); + goto out_free; + } + nr_units = ret; + + /* do the parameters look okay? */ if (!chosen) { size_t vm_size = VMALLOC_END - VMALLOC_START; - size_t tot_size = num_possible_cpus() * PMD_SIZE; - - /* on non-NUMA, embedding is better */ - if (!pcpu_need_numa()) - return -EINVAL; + size_t tot_size = nr_units * unit_size; /* don't consume more than 20% of vmalloc area */ if (tot_size > vm_size / 5) { pr_info("PERCPU: too large chunk size %zuMB for " "large page remap\n", tot_size >> 20); - return -EINVAL; + ret = -EINVAL; + goto out_free; } } - /* need PSE */ - if (!cpu_has_pse) { - pr_warning("PERCPU: lpage allocator requires PSE\n"); - return -EINVAL; - } - - return pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, - reserve - PERCPU_FIRST_CHUNK_RESERVE, - PMD_SIZE, - pcpu_fc_alloc, pcpu_fc_free, pcpul_map); + ret = pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + dyn_size, unit_size, PMD_SIZE, + unit_map, nr_units, + pcpu_fc_alloc, pcpu_fc_free, pcpul_map); +out_free: + if (ret < 0) + free_bootmem(__pa(unit_map), unit_map_size); + return ret; } #else static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) @@ -299,7 +336,8 @@ void __init setup_per_cpu_areas(void) /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { - per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; + per_cpu_offset(cpu) = + delta + pcpu_unit_map[cpu] * pcpu_unit_size; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 1e0e8878dc2a..8ce91af4aa19 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -62,6 +62,7 @@ extern const int *pcpu_unit_map; typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); +typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); extern size_t __init pcpu_setup_first_chunk( @@ -80,18 +81,37 @@ extern ssize_t __init pcpu_4k_first_chunk( pcpu_fc_populate_pte_fn_t populate_pte_fn); #ifdef CONFIG_NEED_MULTIPLE_NODES +extern int __init pcpu_lpage_build_unit_map( + size_t static_size, size_t reserved_size, + ssize_t *dyn_sizep, size_t *unit_sizep, + size_t lpage_size, int *unit_map, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn); + extern ssize_t __init pcpu_lpage_first_chunk( size_t static_size, size_t reserved_size, - ssize_t dyn_size, size_t lpage_size, + size_t dyn_size, size_t unit_size, + size_t lpage_size, const int *unit_map, + int nr_units, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn); extern void *pcpu_lpage_remapped(void *kaddr); #else +static inline int pcpu_lpage_build_unit_map( + size_t static_size, size_t reserved_size, + ssize_t *dyn_sizep, size_t *unit_sizep, + size_t lpage_size, int *unit_map, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn) +{ + return -EINVAL; +} + static inline ssize_t __init pcpu_lpage_first_chunk( size_t static_size, size_t reserved_size, - ssize_t dyn_size, size_t lpage_size, + size_t dyn_size, size_t unit_size, + size_t lpage_size, const int *unit_map, + int nr_units, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn) diff --git a/mm/percpu.c b/mm/percpu.c index 2196fae24f00..b3d0bcff8c7c 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -1594,75 +1595,259 @@ out_free_ar: * Large page remapping first chunk setup helper */ #ifdef CONFIG_NEED_MULTIPLE_NODES + +/** + * pcpu_lpage_build_unit_map - build unit_map for large page remapping + * @static_size: the size of static percpu area in bytes + * @reserved_size: the size of reserved percpu area in bytes + * @dyn_sizep: in/out parameter for dynamic size, -1 for auto + * @unit_sizep: out parameter for unit size + * @unit_map: unit_map to be filled + * @cpu_distance_fn: callback to determine distance between cpus + * + * This function builds cpu -> unit map and determine other parameters + * considering needed percpu size, large page size and distances + * between CPUs in NUMA. + * + * CPUs which are of LOCAL_DISTANCE both ways are grouped together and + * may share units in the same large page. The returned configuration + * is guaranteed to have CPUs on different nodes on different large + * pages and >=75% usage of allocated virtual address space. + * + * RETURNS: + * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and + * returns the number of units to be allocated. -errno on failure. + */ +int __init pcpu_lpage_build_unit_map(size_t static_size, size_t reserved_size, + ssize_t *dyn_sizep, size_t *unit_sizep, + size_t lpage_size, int *unit_map, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn) +{ + static int group_map[NR_CPUS] __initdata; + static int group_cnt[NR_CPUS] __initdata; + int group_cnt_max = 0; + size_t size_sum, min_unit_size, alloc_size; + int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ + int last_allocs; + unsigned int cpu, tcpu; + int group, unit; + + /* + * Determine min_unit_size, alloc_size and max_upa such that + * alloc_size is multiple of lpage_size and is the smallest + * which can accomodate 4k aligned segments which are equal to + * or larger than min_unit_size. + */ + size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep); + min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); + + alloc_size = roundup(min_unit_size, lpage_size); + upa = alloc_size / min_unit_size; + while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) + upa--; + max_upa = upa; + + /* group cpus according to their proximity */ + for_each_possible_cpu(cpu) { + group = 0; + next_group: + for_each_possible_cpu(tcpu) { + if (cpu == tcpu) + break; + if (group_map[tcpu] == group && + (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || + cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { + group++; + goto next_group; + } + } + group_map[cpu] = group; + group_cnt[group]++; + group_cnt_max = max(group_cnt_max, group_cnt[group]); + } + + /* + * Expand unit size until address space usage goes over 75% + * and then as much as possible without using more address + * space. + */ + last_allocs = INT_MAX; + for (upa = max_upa; upa; upa--) { + int allocs = 0, wasted = 0; + + if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) + continue; + + for (group = 0; group_cnt[group]; group++) { + int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); + allocs += this_allocs; + wasted += this_allocs * upa - group_cnt[group]; + } + + /* + * Don't accept if wastage is over 25%. The + * greater-than comparison ensures upa==1 always + * passes the following check. + */ + if (wasted > num_possible_cpus() / 3) + continue; + + /* and then don't consume more memory */ + if (allocs > last_allocs) + break; + last_allocs = allocs; + best_upa = upa; + } + *unit_sizep = alloc_size / best_upa; + + /* assign units to cpus accordingly */ + unit = 0; + for (group = 0; group_cnt[group]; group++) { + for_each_possible_cpu(cpu) + if (group_map[cpu] == group) + unit_map[cpu] = unit++; + unit = roundup(unit, best_upa); + } + + return unit; /* unit contains aligned number of units */ +} + struct pcpul_ent { - unsigned int cpu; void *ptr; + void *map_addr; }; static size_t pcpul_size; -static size_t pcpul_unit_size; +static size_t pcpul_lpage_size; +static int pcpul_nr_lpages; static struct pcpul_ent *pcpul_map; -static struct vm_struct pcpul_vm; + +static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, + unsigned int *cpup) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + if (unit_map[cpu] == unit) { + if (cpup) + *cpup = cpu; + return true; + } + + return false; +} + +static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, + size_t reserved_size, size_t dyn_size, + size_t unit_size, size_t lpage_size, + const int *unit_map, int nr_units) +{ + int width = 1, v = nr_units; + char empty_str[] = "--------"; + int upl, lpl; /* units per lpage, lpage per line */ + unsigned int cpu; + int lpage, unit; + + while (v /= 10) + width++; + empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0'; + + upl = max_t(int, lpage_size / unit_size, 1); + lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1)); + + printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl, + static_size, reserved_size, dyn_size, unit_size, lpage_size); + + for (lpage = 0, unit = 0; unit < nr_units; unit++) { + if (!(unit % upl)) { + if (!(lpage++ % lpl)) { + printk("\n"); + printk("%spcpu-lpage: ", lvl); + } else + printk("| "); + } + if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) + printk("%0*d ", width, cpu); + else + printk("%s ", empty_str); + } + printk("\n"); +} /** * pcpu_lpage_first_chunk - remap the first percpu chunk using large page * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes - * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @dyn_size: free size for dynamic allocation in bytes + * @unit_size: unit size in bytes * @lpage_size: the size of a large page + * @unit_map: cpu -> unit mapping + * @nr_units: the number of units * @alloc_fn: function to allocate percpu lpage, always called with lpage_size * @free_fn: function to free percpu memory, @size <= lpage_size * @map_fn: function to map percpu lpage, always called with lpage_size * - * This allocator uses large page as unit. A large page is allocated - * for each cpu and each is remapped into vmalloc area using large - * page mapping. As large page can be quite large, only part of it is - * used for the first chunk. Unused part is returned to the bootmem - * allocator. - * - * So, the large pages are mapped twice - once to the physical mapping - * and to the vmalloc area for the first percpu chunk. The double - * mapping does add one more large TLB entry pressure but still is - * much better than only using 4k mappings while still being NUMA - * friendly. + * This allocator uses large page to build and map the first chunk. + * Unlike other helpers, the caller should always specify @dyn_size + * and @unit_size. These parameters along with @unit_map and + * @nr_units can be determined using pcpu_lpage_build_unit_map(). + * This two stage initialization is to allow arch code to evaluate the + * parameters before committing to it. + * + * Large pages are allocated as directed by @unit_map and other + * parameters and mapped to vmalloc space. Unused holes are returned + * to the page allocator. Note that these holes end up being actively + * mapped twice - once to the physical mapping and to the vmalloc area + * for the first percpu chunk. Depending on architecture, this might + * cause problem when changing page attributes of the returned area. + * These double mapped areas can be detected using + * pcpu_lpage_remapped(). * * RETURNS: * The determined pcpu_unit_size which can be used to initialize * percpu access on success, -errno on failure. */ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, - ssize_t dyn_size, size_t lpage_size, + size_t dyn_size, size_t unit_size, + size_t lpage_size, const int *unit_map, + int nr_units, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn) { - size_t size_sum; + static struct vm_struct vm; + size_t chunk_size = unit_size * nr_units; size_t map_size; unsigned int cpu; - int i, j; ssize_t ret; + int i, j, unit; - /* - * Currently supports only single page. Supporting multiple - * pages won't be too difficult if it ever becomes necessary. - */ - size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); + pcpul_lpage_dump_cfg(KERN_DEBUG, static_size, reserved_size, dyn_size, + unit_size, lpage_size, unit_map, nr_units); - pcpul_unit_size = lpage_size; - pcpul_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); - if (pcpul_size > pcpul_unit_size) { - pr_warning("PERCPU: static data is larger than large page, " - "can't use large page\n"); - return -EINVAL; - } + BUG_ON(chunk_size % lpage_size); + + pcpul_size = static_size + reserved_size + dyn_size; + pcpul_lpage_size = lpage_size; + pcpul_nr_lpages = chunk_size / lpage_size; /* allocate pointer array and alloc large pages */ - map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); + map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]); pcpul_map = alloc_bootmem(map_size); - for_each_possible_cpu(cpu) { + /* allocate all pages */ + for (i = 0; i < pcpul_nr_lpages; i++) { + size_t offset = i * lpage_size; + int first_unit = offset / unit_size; + int last_unit = (offset + lpage_size - 1) / unit_size; void *ptr; + /* find out which cpu is mapped to this unit */ + for (unit = first_unit; unit <= last_unit; unit++) + if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) + goto found; + continue; + found: ptr = alloc_fn(cpu, lpage_size); if (!ptr) { pr_warning("PERCPU: failed to allocate large page " @@ -1670,53 +1855,79 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, goto enomem; } - /* - * Only use pcpul_size bytes and give back the rest. - * - * Ingo: The lpage_size up-rounding bootmem is needed - * to make sure the partial lpage is still fully RAM - - * it's not well-specified to have a incompatible area - * (unmapped RAM, device memory, etc.) in that hole. - */ - free_fn(ptr + pcpul_size, lpage_size - pcpul_size); - - pcpul_map[cpu].cpu = cpu; - pcpul_map[cpu].ptr = ptr; + pcpul_map[i].ptr = ptr; + } - memcpy(ptr, __per_cpu_load, static_size); + /* return unused holes */ + for (unit = 0; unit < nr_units; unit++) { + size_t start = unit * unit_size; + size_t end = start + unit_size; + size_t off, next; + + /* don't free used part of occupied unit */ + if (pcpul_unit_to_cpu(unit, unit_map, NULL)) + start += pcpul_size; + + /* unit can span more than one page, punch the holes */ + for (off = start; off < end; off = next) { + void *ptr = pcpul_map[off / lpage_size].ptr; + next = min(roundup(off + 1, lpage_size), end); + if (ptr) + free_fn(ptr + off % lpage_size, next - off); + } } - /* allocate address and map */ - pcpul_vm.flags = VM_ALLOC; - pcpul_vm.size = num_possible_cpus() * pcpul_unit_size; - vm_area_register_early(&pcpul_vm, pcpul_unit_size); + /* allocate address, map and copy */ + vm.flags = VM_ALLOC; + vm.size = chunk_size; + vm_area_register_early(&vm, unit_size); + + for (i = 0; i < pcpul_nr_lpages; i++) { + if (!pcpul_map[i].ptr) + continue; + pcpul_map[i].map_addr = vm.addr + i * lpage_size; + map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr); + } for_each_possible_cpu(cpu) - map_fn(pcpul_map[cpu].ptr, pcpul_unit_size, - pcpul_vm.addr + cpu * pcpul_unit_size); + memcpy(vm.addr + unit_map[cpu] * unit_size, __per_cpu_load, + static_size); /* we're ready, commit */ pr_info("PERCPU: Remapped at %p with large pages, static data " - "%zu bytes\n", pcpul_vm.addr, static_size); + "%zu bytes\n", vm.addr, static_size); ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, - pcpul_unit_size, pcpul_vm.addr, NULL); - - /* sort pcpul_map array for pcpu_lpage_remapped() */ - for (i = 0; i < num_possible_cpus() - 1; i++) - for (j = i + 1; j < num_possible_cpus(); j++) - if (pcpul_map[i].ptr > pcpul_map[j].ptr) { - struct pcpul_ent tmp = pcpul_map[i]; - pcpul_map[i] = pcpul_map[j]; - pcpul_map[j] = tmp; - } + unit_size, vm.addr, unit_map); + + /* + * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped + * lpages are pushed to the end and trimmed. + */ + for (i = 0; i < pcpul_nr_lpages - 1; i++) + for (j = i + 1; j < pcpul_nr_lpages; j++) { + struct pcpul_ent tmp; + + if (!pcpul_map[j].ptr) + continue; + if (pcpul_map[i].ptr && + pcpul_map[i].ptr < pcpul_map[j].ptr) + continue; + + tmp = pcpul_map[i]; + pcpul_map[i] = pcpul_map[j]; + pcpul_map[j] = tmp; + } + + while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr) + pcpul_nr_lpages--; return ret; enomem: - for_each_possible_cpu(cpu) - if (pcpul_map[cpu].ptr) - free_fn(pcpul_map[cpu].ptr, pcpul_size); + for (i = 0; i < pcpul_nr_lpages; i++) + if (pcpul_map[i].ptr) + free_fn(pcpul_map[i].ptr, lpage_size); free_bootmem(__pa(pcpul_map), map_size); return -ENOMEM; } @@ -1739,10 +1950,10 @@ enomem: */ void *pcpu_lpage_remapped(void *kaddr) { - unsigned long unit_mask = pcpul_unit_size - 1; - void *lpage_addr = (void *)((unsigned long)kaddr & ~unit_mask); - unsigned long offset = (unsigned long)kaddr & unit_mask; - int left = 0, right = num_possible_cpus() - 1; + unsigned long lpage_mask = pcpul_lpage_size - 1; + void *lpage_addr = (void *)((unsigned long)kaddr & ~lpage_mask); + unsigned long offset = (unsigned long)kaddr & lpage_mask; + int left = 0, right = pcpul_nr_lpages - 1; int pos; /* pcpul in use at all? */ @@ -1757,13 +1968,8 @@ void *pcpu_lpage_remapped(void *kaddr) left = pos + 1; else if (pcpul_map[pos].ptr > lpage_addr) right = pos - 1; - else { - /* it shouldn't be in the area for the first chunk */ - WARN_ON(offset < pcpul_size); - - return pcpul_vm.addr + - pcpul_map[pos].cpu * pcpul_unit_size + offset; - } + else + return pcpul_map[pos].map_addr + offset; } return NULL; -- cgit v1.2.3 From 7a6d3c8b3049d07123628f2bf57127bba2cc878f Mon Sep 17 00:00:00 2001 From: Csaba Henk Date: Wed, 1 Jul 2009 17:28:41 -0700 Subject: fuse: make the number of max background requests and congestion threshold tunable The practical values for these limits depend on the design of the filesystem server so let userspace set them at initialization time. Signed-off-by: Csaba Henk Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 10 +++++----- fs/fuse/fuse_i.h | 12 ++++++------ fs/fuse/inode.c | 14 ++++++++++++++ include/linux/fuse.h | 9 +++++++-- 4 files changed, 32 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f58ecbc416c8..b152761c1bf6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -250,7 +250,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) static void flush_bg_queue(struct fuse_conn *fc) { - while (fc->active_background < FUSE_MAX_BACKGROUND && + while (fc->active_background < fc->max_background && !list_empty(&fc->bg_queue)) { struct fuse_req *req; @@ -280,11 +280,11 @@ __releases(&fc->lock) list_del(&req->intr_entry); req->state = FUSE_REQ_FINISHED; if (req->background) { - if (fc->num_background == FUSE_MAX_BACKGROUND) { + if (fc->num_background == fc->max_background) { fc->blocked = 0; wake_up_all(&fc->blocked_waitq); } - if (fc->num_background == FUSE_CONGESTION_THRESHOLD && + if (fc->num_background == fc->congestion_threshold && fc->connected && fc->bdi_initialized) { clear_bdi_congested(&fc->bdi, READ); clear_bdi_congested(&fc->bdi, WRITE); @@ -410,9 +410,9 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, { req->background = 1; fc->num_background++; - if (fc->num_background == FUSE_MAX_BACKGROUND) + if (fc->num_background == fc->max_background) fc->blocked = 1; - if (fc->num_background == FUSE_CONGESTION_THRESHOLD && + if (fc->num_background == fc->congestion_threshold && fc->bdi_initialized) { set_bdi_congested(&fc->bdi, READ); set_bdi_congested(&fc->bdi, WRITE); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 52b641fc0faf..6bcfab04396f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -25,12 +25,6 @@ /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 -/** Maximum number of outstanding background requests */ -#define FUSE_MAX_BACKGROUND 12 - -/** Congestion starts at 75% of maximum */ -#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) - /** Bias for fi->writectr, meaning new writepages must not be sent */ #define FUSE_NOWRITE INT_MIN @@ -349,6 +343,12 @@ struct fuse_conn { /** rbtree of fuse_files waiting for poll events indexed by ph */ struct rb_root polled_files; + /** Maximum number of outstanding background requests */ + unsigned max_background; + + /** Number of background requests at which congestion starts */ + unsigned congestion_threshold; + /** Number of requests currently in the background */ unsigned num_background; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f91ccc4a189d..9aa6f46d0c32 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -32,6 +32,12 @@ DEFINE_MUTEX(fuse_mutex); #define FUSE_DEFAULT_BLKSIZE 512 +/** Maximum number of outstanding background requests */ +#define FUSE_DEFAULT_MAX_BACKGROUND 12 + +/** Congestion starts at 75% of maximum */ +#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) + struct fuse_mount_data { int fd; unsigned rootmode; @@ -517,6 +523,8 @@ void fuse_conn_init(struct fuse_conn *fc) INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); atomic_set(&fc->num_waiting, 0); + fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; + fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; fc->khctr = 0; fc->polled_files = RB_ROOT; fc->reqctr = 0; @@ -736,6 +744,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) else { unsigned long ra_pages; + if (arg->minor >= 13) { + if (arg->max_background) + fc->max_background = arg->max_background; + if (arg->congestion_threshold) + fc->congestion_threshold = arg->congestion_threshold; + } if (arg->minor >= 6) { ra_pages = arg->max_readahead / PAGE_CACHE_SIZE; if (arg->flags & FUSE_ASYNC_READ) diff --git a/include/linux/fuse.h b/include/linux/fuse.h index cf593bf9fd32..b3700f0ac268 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -30,6 +30,10 @@ * - add umask flag to input argument of open, mknod and mkdir * - add notification messages for invalidation of inodes and * directory entries + * + * 7.13 + * - make max number of background requests and congestion threshold + * tunables */ #ifndef _LINUX_FUSE_H @@ -41,7 +45,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 12 +#define FUSE_KERNEL_MINOR_VERSION 13 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -427,7 +431,8 @@ struct fuse_init_out { __u32 minor; __u32 max_readahead; __u32 flags; - __u32 unused; + __u16 max_background; + __u16 congestion_threshold; __u32 max_write; }; -- cgit v1.2.3 From 37d217f029a56a6d385f99773fb27dfcb51f9a46 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 8 Jul 2009 18:17:58 +0200 Subject: fuse: document protocol version negotiation Clarify how the protocol version should be negotiated between kernel and userspace. Notably libfuse didn't correctly handle the case when the supported major versions didn't match. Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index b3700f0ac268..3e2925a34bf0 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -41,6 +41,26 @@ #include +/* + * Version negotiation: + * + * Both the kernel and userspace send the version they support in the + * INIT request and reply respectively. + * + * If the major versions match then both shall use the smallest + * of the two minor versions for communication. + * + * If the kernel supports a larger major version, then userspace shall + * reply with the major version it supports, ignore the rest of the + * INIT message and expect a new INIT message from the kernel with a + * matching major version. + * + * If the library supports a larger major version, then it shall fall + * back to the major protocol version sent by the kernel for + * communication and reply with that major version (and an arbitrary + * supported minor version). + */ + /** Version number of this interface */ #define FUSE_KERNEL_VERSION 7 -- cgit v1.2.3 From 023bf6f1b8bf58dc4da7f0dc1cf4787b0d5297c1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2009 11:27:40 +0900 Subject: linker script: unify usage of discard definition Discarded sections in different archs share some commonality but have considerable differences. This led to linker script for each arch implementing its own /DISCARD/ definition, which makes maintaining tedious and adding new entries error-prone. This patch makes all linker scripts to move discard definitions to the end of the linker script and use the common DISCARDS macro. As ld uses the first matching section definition, archs can include default discarded sections by including them earlier in the linker script. ia64 is notable because it first throws away some ia64 specific subsections and then include the rest of the sections into the final image, so those sections must be discarded before the inclusion. defconfig compile tested for x86, x86-64, powerpc, powerpc64, ia64, alpha, sparc, sparc64 and s390. Michal Simek tested microblaze. Signed-off-by: Tejun Heo Acked-by: Paul Mundt Acked-by: Mike Frysinger Tested-by: Michal Simek Cc: linux-arch@vger.kernel.org Cc: Michal Simek Cc: microblaze-uclinux@itee.uq.edu.au Cc: Sam Ravnborg Cc: Tony Luck --- arch/alpha/kernel/vmlinux.lds.S | 10 ++-------- arch/avr32/kernel/vmlinux.lds.S | 10 +++------- arch/blackfin/kernel/vmlinux.lds.S | 6 +----- arch/cris/kernel/vmlinux.lds.S | 10 ++-------- arch/frv/kernel/vmlinux.lds.S | 2 +- arch/h8300/kernel/vmlinux.lds.S | 6 ++---- arch/ia64/kernel/vmlinux.lds.S | 17 ++++++++--------- arch/m32r/kernel/vmlinux.lds.S | 11 +++-------- arch/m68k/kernel/vmlinux-std.lds | 11 +++-------- arch/m68k/kernel/vmlinux-sun3.lds | 10 ++-------- arch/m68knommu/kernel/vmlinux.lds.S | 8 +------- arch/microblaze/kernel/vmlinux.lds.S | 2 +- arch/mips/kernel/vmlinux.lds.S | 22 ++++++++++------------ arch/mn10300/kernel/vmlinux.lds.S | 9 +++------ arch/parisc/kernel/vmlinux.lds.S | 9 ++++----- arch/powerpc/kernel/vmlinux.lds.S | 10 +++------- arch/s390/kernel/vmlinux.lds.S | 10 +++------- arch/sh/kernel/vmlinux.lds.S | 11 ++++------- arch/sparc/kernel/vmlinux.lds.S | 9 ++------- arch/um/include/asm/common.lds.S | 5 ----- arch/um/kernel/dyn.lds.S | 2 +- arch/um/kernel/uml.lds.S | 2 +- arch/x86/kernel/vmlinux.lds.S | 11 ++++------- arch/xtensa/kernel/vmlinux.lds.S | 14 ++++---------- include/asm-generic/vmlinux.lds.h | 18 ++++++++++++------ 25 files changed, 80 insertions(+), 155 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 75fe1d6877e9..6dc03c35caa0 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -134,14 +134,6 @@ SECTIONS __bss_stop = .; _end = .; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - .mdebug 0 : { *(.mdebug) } @@ -151,4 +143,6 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + + DISCARDS } diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S index b8324608ec0c..c4b56654349a 100644 --- a/arch/avr32/kernel/vmlinux.lds.S +++ b/arch/avr32/kernel/vmlinux.lds.S @@ -124,15 +124,11 @@ SECTIONS _end = .; } + DWARF_DEBUG + /* When something in the kernel is NOT compiled as a module, the module * cleanup code and data are put into these segments. Both can then be * thrown away, as cleanup code is never called unless it's a module. */ - /DISCARD/ : { - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - - DWARF_DEBUG + DISCARDS } diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index 6e8eabd8f0a6..d7ffe299b979 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -277,9 +277,5 @@ SECTIONS DWARF_DEBUG - /DISCARD/ : - { - *(.exitcall.exit) - *(.discard) - } + DISCARDS } diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S index a3175ebb38cc..6c81836b9229 100644 --- a/arch/cris/kernel/vmlinux.lds.S +++ b/arch/cris/kernel/vmlinux.lds.S @@ -140,13 +140,7 @@ SECTIONS _end = .; __end = .; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - dram_end = dram_start + (CONFIG_ETRAX_DRAM_SIZE - __CONFIG_ETRAX_VMEM_SIZE)*1024*1024; + + DISCARDS } diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 64b5a5e4d35e..7dbf41f68b52 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -178,7 +178,7 @@ SECTIONS .comment 0 : { *(.comment) } - /DISCARD/ : { *(.discard) } + DISCARDS } __kernel_image_size_no_bss = __bss_start - __kernel_image_start; diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index 03d6c0df33db..662b02ecb86e 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -152,10 +152,6 @@ SECTIONS __end = . ; __ramstart = .; } - /DISCARD/ : { - *(.exitcall.exit) - *(.discard) - } .romfs : { *(.romfs*) @@ -166,4 +162,6 @@ SECTIONS COMMAND_START = . - 0x200 ; __ramend = . ; } + + DISCARDS } diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 13d958975874..eb4214d1c5af 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -24,15 +24,14 @@ PHDRS { } SECTIONS { - /* Sections to be discarded */ + /* unwind exit sections must be discarded before the rest of the + sections get included. */ /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) *(.IA_64.unwind.exit.text) *(.IA_64.unwind_info.exit.text) - } + *(.comment) + *(.note) + } v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ phys_start = _start - LOAD_OFFSET; @@ -317,7 +316,7 @@ SECTIONS .debug_funcnames 0 : { *(.debug_funcnames) } .debug_typenames 0 : { *(.debug_typenames) } .debug_varnames 0 : { *(.debug_varnames) } - /* These must appear regardless of . */ - /DISCARD/ : { *(.comment) } - /DISCARD/ : { *(.note) } + + /* Default discards */ + DISCARDS } diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index 480a49944cfd..de5e21cca6a5 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -120,14 +120,6 @@ SECTIONS _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - /* Stabs debugging sections. */ .stab 0 : { *(.stab) } .stabstr 0 : { *(.stabstr) } @@ -136,4 +128,7 @@ SECTIONS .stab.index 0 : { *(.stab.index) } .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index 905a797ada93..47eac19e8f61 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -82,14 +82,6 @@ SECTIONS _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - /* Stabs debugging sections. */ .stab 0 : { *(.stab) } .stabstr 0 : { *(.stabstr) } @@ -98,4 +90,7 @@ SECTIONS .stab.index 0 : { *(.stab.index) } .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index 47d04be322aa..03efaf04d7d7 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -77,14 +77,6 @@ __init_begin = .; _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - .crap : { /* Stabs debugging sections. */ *(.stab) @@ -97,4 +89,6 @@ __init_begin = .; *(.note) } + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S index 68111a61a77f..2736a5e309c0 100644 --- a/arch/m68knommu/kernel/vmlinux.lds.S +++ b/arch/m68knommu/kernel/vmlinux.lds.S @@ -184,13 +184,6 @@ SECTIONS { __init_end = .; } > INIT - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - .bss : { . = ALIGN(4); _sbss = . ; @@ -201,5 +194,6 @@ SECTIONS { _end = . ; } > BSS + DISCARDS } diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S index 81bebdcb18fe..ec5fa91a48d8 100644 --- a/arch/microblaze/kernel/vmlinux.lds.S +++ b/arch/microblaze/kernel/vmlinux.lds.S @@ -163,5 +163,5 @@ SECTIONS { . = ALIGN(4096); _end = .; - /DISCARD/ : { *(.discard) } + DISCARDS } diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 45901609b741..1474c18fb777 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -176,18 +176,6 @@ SECTIONS _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.discard) - - /* ABI crap starts here */ - *(.MIPS.options) - *(.options) - *(.pdr) - *(.reginfo) - } - /* These mark the ABI of the kernel for debuggers. */ .mdebug.abi32 : { KEEP(*(.mdebug.abi32)) @@ -213,4 +201,14 @@ SECTIONS *(.gptab.bss) *(.gptab.sbss) } + + /* Sections to be discarded */ + DISCARDS + /DISCARD/ : { + /* ABI crap starts here */ + *(.MIPS.options) + *(.options) + *(.pdr) + *(.reginfo) + } } diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S index 5609d4962a55..8fcd0f1e21de 100644 --- a/arch/mn10300/kernel/vmlinux.lds.S +++ b/arch/mn10300/kernel/vmlinux.lds.S @@ -115,13 +115,10 @@ SECTIONS . = ALIGN(PAGE_SIZE); pg0 = .; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_CALL - *(.discard) - } - STABS_DEBUG DWARF_DEBUG + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index ccf58341845a..aea1784edbd1 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -237,10 +237,12 @@ SECTIONS /* freed after init ends here */ _end = . ; + STABS_DEBUG + .note 0 : { *(.note) } + /* Sections to be discarded */ + DISCARDS /DISCARD/ : { - *(.exitcall.exit) - *(.discard) #ifdef CONFIG_64BIT /* temporary hack until binutils is fixed to not emit these * for static binaries @@ -253,7 +255,4 @@ SECTIONS *(.gnu.hash) #endif } - - STABS_DEBUG - .note 0 : { *(.note) } } diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7fca9355fd3d..244e3658983c 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -37,13 +37,6 @@ jiffies = jiffies_64 + 4; #endif SECTIONS { - /* Sections to be discarded. */ - /DISCARD/ : { - *(.exitcall.exit) - *(.discard) - EXIT_DATA - } - . = KERNELBASE; /* @@ -299,4 +292,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); _end = . ; PROVIDE32 (end = .); + + /* Sections to be discarded. */ + DISCARDS } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 98867dfea469..82415c75b996 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -157,14 +157,10 @@ SECTIONS _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - /* Debugging sections. */ STABS_DEBUG DWARF_DEBUG + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index 766976d27b21..0ce254bca92f 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -163,17 +163,14 @@ SECTIONS _end = . ; } + STABS_DEBUG + DWARF_DEBUG + /* * When something in the kernel is NOT compiled as a module, the * module cleanup code and data are put into these segments. Both * can then be thrown away, as cleanup code is never called unless * it's a module. */ - /DISCARD/ : { - *(.exitcall.exit) - *(.discard) - } - - STABS_DEBUG - DWARF_DEBUG + DISCARDS } diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index d63cf914667d..866390feb683 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -171,13 +171,8 @@ SECTIONS } _end = . ; - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - STABS_DEBUG DWARF_DEBUG + + DISCARDS } diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index cb0248616d49..37ecc5577a9a 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -123,8 +123,3 @@ __initramfs_end = .; } - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - } - diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 2916d6eadffd..715a188c0472 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -157,5 +157,5 @@ SECTIONS DWARF_DEBUG - /DISCARD/ : { *(.discard) } + DISCARDS } diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 1f8a622cabe1..2ebd39765db8 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -101,5 +101,5 @@ SECTIONS DWARF_DEBUG - /DISCARD/ : { *(.discard) } + DISCARDS } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 367e87882041..b600c843710b 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -387,15 +387,12 @@ SECTIONS _end = .; } - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.eh_frame) - *(.discard) - } - STABS_DEBUG DWARF_DEBUG + + /* Sections to be discarded */ + DISCARDS + /DISCARD/ : { *(.eh_frame) } } diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index b1e24638acd7..921b6ff3b645 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -280,16 +280,6 @@ SECTIONS *(.ResetVector.text) } - /* Sections to be discarded */ - /DISCARD/ : - { - *(.exit.literal) - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - .xt.lit : { *(.xt.lit) } .xt.prop : { *(.xt.prop) } @@ -322,4 +312,8 @@ SECTIONS *(.xt.lit) *(.gnu.linkonce.p*) } + + /* Sections to be discarded */ + DISCARDS + /DISCARD/ : { *(.exit.literal) } } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c5c18ac878ab..ab8ea9b7741e 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -35,13 +35,10 @@ * __bss_stop = .; * _end = .; * - * /DISCARD/ : { - * EXIT_TEXT - * EXIT_DATA - * EXIT_CALL - * } * STABS_DEBUG * DWARF_DEBUG + * + * DISCARDS // must be the last * } * * [__init_begin, __init_end] is the init section that may be freed after init @@ -629,11 +626,20 @@ #define INIT_RAM_FS #endif +/* + * Default discarded sections. + * + * Some archs want to discard exit text/data at runtime rather than + * link time due to cross-section references such as alt instructions, + * bug table, eh_frame, etc. DISCARDS must be the last of output + * section definitions so that such archs put those in earlier section + * definitions. + */ #define DISCARDS \ /DISCARD/ : { \ EXIT_TEXT \ EXIT_DATA \ - *(.exitcall.exit) \ + EXIT_CALL \ *(.discard) \ } -- cgit v1.2.3 From 5a421ce3c062a87db0a9e7f2a0a7ee0a5b869aab Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 10 Jul 2009 12:37:40 +0300 Subject: nfsd41: gather and report statistics also for v4.1 ops Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- include/linux/nfs4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index bd2eba530667..aff924a24abb 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -234,7 +234,7 @@ enum nfs_opnum4 { Needs to be updated if more operations are defined in future.*/ #define FIRST_NFS4_OP OP_ACCESS -#define LAST_NFS4_OP OP_RELEASE_LOCKOWNER +#define LAST_NFS4_OP OP_RECLAIM_COMPLETE enum nfsstat4 { NFS4_OK = 0, -- cgit v1.2.3 From 4bd9b0f4afc76cf972578c702e1bc1b6f2d10ba5 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 24 Jun 2009 15:37:45 -0400 Subject: nfsd41: use globals for DRC limits The version 4.1 DRC memory limit and tracking variables are server wide and session specific. Replace struct svc_serv fields with globals. Stop using the svc_serv sv_lock. Add a spinlock to serialize access to the DRC limit management variables which change on session creation and deletion (usage counter) or (future) administrative action to adjust the total DRC memory limit. Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy --- fs/nfsd/nfs4state.c | 10 +++++----- fs/nfsd/nfssvc.c | 19 +++++++++++++++---- include/linux/nfsd/nfsd.h | 3 +++ include/linux/sunrpc/svc.h | 2 -- 4 files changed, 23 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 980a216a48c8..2e6a44e3d2fe 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -430,11 +430,11 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; - spin_lock(&nfsd_serv->sv_lock); - if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) - np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; - nfsd_serv->sv_drc_pages_used += np; - spin_unlock(&nfsd_serv->sv_lock); + spin_lock(&nfsd_drc_lock); + if (np + nfsd_drc_pages_used > nfsd_drc_max_pages) + np = nfsd_drc_max_pages - nfsd_drc_pages_used; + nfsd_drc_pages_used += np; + spin_unlock(&nfsd_drc_lock); if (np <= 0) { status = nfserr_resource; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index d4c9884cd54b..78d8fcd883fb 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -67,6 +67,16 @@ struct timeval nfssvc_boot; DEFINE_MUTEX(nfsd_mutex); struct svc_serv *nfsd_serv; +/* + * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used. + * nfsd_drc_max_pages limits the total amount of memory available for + * version 4.1 DRC caches. + * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. + */ +spinlock_t nfsd_drc_lock; +unsigned int nfsd_drc_max_pages; +unsigned int nfsd_drc_pages_used; + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; static struct svc_version * nfsd_acl_version[] = { @@ -238,11 +248,12 @@ static void set_max_drc(void) { /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ #define NFSD_DRC_SIZE_SHIFT 7 - nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() + nfsd_drc_max_pages = nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT; - nfsd_serv->sv_drc_pages_used = 0; - dprintk("%s svc_drc_max_pages %u\n", __func__, - nfsd_serv->sv_drc_max_pages); + nfsd_drc_pages_used = 0; + spin_lock_init(&nfsd_drc_lock); + dprintk("%s nfsd_drc_max_pages %u\n", __func__, + nfsd_drc_max_pages); } int nfsd_create_serv(void) diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 2b49d676d0c9..2571f856908f 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -56,6 +56,9 @@ extern struct svc_version nfsd_version2, nfsd_version3, extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; extern struct svc_serv *nfsd_serv; +extern spinlock_t nfsd_drc_lock; +extern unsigned int nfsd_drc_max_pages; +extern unsigned int nfsd_drc_pages_used; extern struct seq_operations nfs_exports_op; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index ea8009695c69..52e8cb0a7569 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -94,8 +94,6 @@ struct svc_serv { struct module * sv_module; /* optional module to count when * adding threads */ svc_thread_fn sv_function; /* main function for threads */ - unsigned int sv_drc_max_pages; /* Total pages for DRC */ - unsigned int sv_drc_pages_used;/* DRC pages used */ #if defined(CONFIG_NFS_V4_1) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same -- cgit v1.2.3 From d782c3f95c9263dc0b98e7115f75f1e18b9600b3 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 22 Jun 2009 13:17:08 +0800 Subject: drm/mode: add the CVT algorithm in kernel space Add the CVT algorithm in kernel space. And this function can be called to generate the required modeline. I copied it from the file of xserver/hw/xfree86/modes/xf86cvt.c. What I have done is to translate it by using integer calculation. This is to avoid the float-point calculation in kernel space. [airlied:- cleaned up some bits] Signed-off-by: Zhao Yakui Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_modes.c | 219 ++++++++++++++++++++++++++++++++++++++++++++ include/drm/drm_crtc.h | 3 + 2 files changed, 222 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 54f492a488a9..0dbc7e4f8643 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -8,6 +8,7 @@ * Copyright © 2007 Dave Airlie * Copyright © 2007-2008 Intel Corporation * Jesse Barnes + * Copyright 2005-2006 Luc Verhaegen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -61,6 +62,224 @@ void drm_mode_debug_printmodeline(struct drm_display_mode *mode) } EXPORT_SYMBOL(drm_mode_debug_printmodeline); +/** + * drm_cvt_mode -create a modeline based on CVT algorithm + * @dev: DRM device + * @hdisplay: hdisplay size + * @vdisplay: vdisplay size + * @vrefresh : vrefresh rate + * @reduced : Whether the GTF calculation is simplified + * @interlaced:Whether the interlace is supported + * + * LOCKING: + * none. + * + * return the modeline based on CVT algorithm + * + * This function is called to generate the modeline based on CVT algorithm + * according to the hdisplay, vdisplay, vrefresh. + * It is based from the VESA(TM) Coordinated Video Timing Generator by + * Graham Loveridge April 9, 2003 available at + * http://www.vesa.org/public/CVT/CVTd6r1.xls + * + * And it is copied from xf86CVTmode in xserver/hw/xfree86/modes/xf86cvt.c. + * What I have done is to translate it by using integer calculation. + */ +#define HV_FACTOR 1000 +struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay, + int vdisplay, int vrefresh, + bool reduced, bool interlaced) +{ + /* 1) top/bottom margin size (% of height) - default: 1.8, */ +#define CVT_MARGIN_PERCENTAGE 18 + /* 2) character cell horizontal granularity (pixels) - default 8 */ +#define CVT_H_GRANULARITY 8 + /* 3) Minimum vertical porch (lines) - default 3 */ +#define CVT_MIN_V_PORCH 3 + /* 4) Minimum number of vertical back porch lines - default 6 */ +#define CVT_MIN_V_BPORCH 6 + /* Pixel Clock step (kHz) */ +#define CVT_CLOCK_STEP 250 + struct drm_display_mode *drm_mode; + bool margins = false; + unsigned int vfieldrate, hperiod; + int hdisplay_rnd, hmargin, vdisplay_rnd, vmargin, vsync; + int interlace; + + /* allocate the drm_display_mode structure. If failure, we will + * return directly + */ + drm_mode = drm_mode_create(dev); + if (!drm_mode) + return NULL; + + /* the CVT default refresh rate is 60Hz */ + if (!vrefresh) + vrefresh = 60; + + /* the required field fresh rate */ + if (interlaced) + vfieldrate = vrefresh * 2; + else + vfieldrate = vrefresh; + + /* horizontal pixels */ + hdisplay_rnd = hdisplay - (hdisplay % CVT_H_GRANULARITY); + + /* determine the left&right borders */ + hmargin = 0; + if (margins) { + hmargin = hdisplay_rnd * CVT_MARGIN_PERCENTAGE / 1000; + hmargin -= hmargin % CVT_H_GRANULARITY; + } + /* find the total active pixels */ + drm_mode->hdisplay = hdisplay_rnd + 2 * hmargin; + + /* find the number of lines per field */ + if (interlaced) + vdisplay_rnd = vdisplay / 2; + else + vdisplay_rnd = vdisplay; + + /* find the top & bottom borders */ + vmargin = 0; + if (margins) + vmargin = vdisplay_rnd * CVT_MARGIN_PERCENTAGE / 1000; + + drm_mode->vdisplay = vdisplay_rnd + 2 * vmargin; + + /* Interlaced */ + if (interlaced) + interlace = 1; + else + interlace = 0; + + /* Determine VSync Width from aspect ratio */ + if (!(vdisplay % 3) && ((vdisplay * 4 / 3) == hdisplay)) + vsync = 4; + else if (!(vdisplay % 9) && ((vdisplay * 16 / 9) == hdisplay)) + vsync = 5; + else if (!(vdisplay % 10) && ((vdisplay * 16 / 10) == hdisplay)) + vsync = 6; + else if (!(vdisplay % 4) && ((vdisplay * 5 / 4) == hdisplay)) + vsync = 7; + else if (!(vdisplay % 9) && ((vdisplay * 15 / 9) == hdisplay)) + vsync = 7; + else /* custom */ + vsync = 10; + + if (!reduced) { + /* simplify the GTF calculation */ + /* 4) Minimum time of vertical sync + back porch interval (µs) + * default 550.0 + */ + int tmp1, tmp2; +#define CVT_MIN_VSYNC_BP 550 + /* 3) Nominal HSync width (% of line period) - default 8 */ +#define CVT_HSYNC_PERCENTAGE 8 + unsigned int hblank_percentage; + int vsyncandback_porch, vback_porch, hblank; + + /* estimated the horizontal period */ + tmp1 = HV_FACTOR * 1000000 - + CVT_MIN_VSYNC_BP * HV_FACTOR * vfieldrate; + tmp2 = (vdisplay_rnd + 2 * vmargin + CVT_MIN_V_PORCH) * 2 + + interlace; + hperiod = tmp1 * 2 / (tmp2 * vfieldrate); + + tmp1 = CVT_MIN_VSYNC_BP * HV_FACTOR / hperiod + 1; + /* 9. Find number of lines in sync + backporch */ + if (tmp1 < (vsync + CVT_MIN_V_PORCH)) + vsyncandback_porch = vsync + CVT_MIN_V_PORCH; + else + vsyncandback_porch = tmp1; + /* 10. Find number of lines in back porch */ + vback_porch = vsyncandback_porch - vsync; + drm_mode->vtotal = vdisplay_rnd + 2 * vmargin + + vsyncandback_porch + CVT_MIN_V_PORCH; + /* 5) Definition of Horizontal blanking time limitation */ + /* Gradient (%/kHz) - default 600 */ +#define CVT_M_FACTOR 600 + /* Offset (%) - default 40 */ +#define CVT_C_FACTOR 40 + /* Blanking time scaling factor - default 128 */ +#define CVT_K_FACTOR 128 + /* Scaling factor weighting - default 20 */ +#define CVT_J_FACTOR 20 +#define CVT_M_PRIME (CVT_M_FACTOR * CVT_K_FACTOR / 256) +#define CVT_C_PRIME ((CVT_C_FACTOR - CVT_J_FACTOR) * CVT_K_FACTOR / 256 + \ + CVT_J_FACTOR) + /* 12. Find ideal blanking duty cycle from formula */ + hblank_percentage = CVT_C_PRIME * HV_FACTOR - CVT_M_PRIME * + hperiod / 1000; + /* 13. Blanking time */ + if (hblank_percentage < 20 * HV_FACTOR) + hblank_percentage = 20 * HV_FACTOR; + hblank = drm_mode->hdisplay * hblank_percentage / + (100 * HV_FACTOR - hblank_percentage); + hblank -= hblank % (2 * CVT_H_GRANULARITY); + /* 14. find the total pixes per line */ + drm_mode->htotal = drm_mode->hdisplay + hblank; + drm_mode->hsync_end = drm_mode->hdisplay + hblank / 2; + drm_mode->hsync_start = drm_mode->hsync_end - + (drm_mode->htotal * CVT_HSYNC_PERCENTAGE) / 100; + drm_mode->hsync_start += CVT_H_GRANULARITY - + drm_mode->hsync_start % CVT_H_GRANULARITY; + /* fill the Vsync values */ + drm_mode->vsync_start = drm_mode->vdisplay + CVT_MIN_V_PORCH; + drm_mode->vsync_end = drm_mode->vsync_start + vsync; + } else { + /* Reduced blanking */ + /* Minimum vertical blanking interval time (µs)- default 460 */ +#define CVT_RB_MIN_VBLANK 460 + /* Fixed number of clocks for horizontal sync */ +#define CVT_RB_H_SYNC 32 + /* Fixed number of clocks for horizontal blanking */ +#define CVT_RB_H_BLANK 160 + /* Fixed number of lines for vertical front porch - default 3*/ +#define CVT_RB_VFPORCH 3 + int vbilines; + int tmp1, tmp2; + /* 8. Estimate Horizontal period. */ + tmp1 = HV_FACTOR * 1000000 - + CVT_RB_MIN_VBLANK * HV_FACTOR * vfieldrate; + tmp2 = vdisplay_rnd + 2 * vmargin; + hperiod = tmp1 / (tmp2 * vfieldrate); + /* 9. Find number of lines in vertical blanking */ + vbilines = CVT_RB_MIN_VBLANK * HV_FACTOR / hperiod + 1; + /* 10. Check if vertical blanking is sufficient */ + if (vbilines < (CVT_RB_VFPORCH + vsync + CVT_MIN_V_BPORCH)) + vbilines = CVT_RB_VFPORCH + vsync + CVT_MIN_V_BPORCH; + /* 11. Find total number of lines in vertical field */ + drm_mode->vtotal = vdisplay_rnd + 2 * vmargin + vbilines; + /* 12. Find total number of pixels in a line */ + drm_mode->htotal = drm_mode->hdisplay + CVT_RB_H_BLANK; + /* Fill in HSync values */ + drm_mode->hsync_end = drm_mode->hdisplay + CVT_RB_H_BLANK / 2; + drm_mode->hsync_start = drm_mode->hsync_end = CVT_RB_H_SYNC; + } + /* 15/13. Find pixel clock frequency (kHz for xf86) */ + drm_mode->clock = drm_mode->htotal * HV_FACTOR * 1000 / hperiod; + drm_mode->clock -= drm_mode->clock % CVT_CLOCK_STEP; + /* 18/16. Find actual vertical frame frequency */ + /* ignore - just set the mode flag for interlaced */ + if (interlaced) + drm_mode->vtotal *= 2; + /* Fill the mode line name */ + drm_mode_set_name(drm_mode); + if (reduced) + drm_mode->flags |= (DRM_MODE_FLAG_PHSYNC | + DRM_MODE_FLAG_NVSYNC); + else + drm_mode->flags |= (DRM_MODE_FLAG_PVSYNC | + DRM_MODE_FLAG_NHSYNC); + if (interlaced) + drm_mode->flags |= DRM_MODE_FLAG_INTERLACE; + + return drm_mode; +} +EXPORT_SYMBOL(drm_cvt_mode); + /** * drm_mode_set_name - set the name on a mode * @mode: name will be set in this mode diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 7300fb866767..820bc0977e5e 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -736,4 +736,7 @@ extern int drm_mode_gamma_get_ioctl(struct drm_device *dev, extern int drm_mode_gamma_set_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern bool drm_detect_hdmi_monitor(struct edid *edid); +extern struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, + int hdisplay, int vdisplay, int vrefresh, + bool reduced, bool interlaced); #endif /* __DRM_CRTC_H__ */ -- cgit v1.2.3 From 26bbdadad356ec02d33657858d91675f3e9aca94 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 22 Jun 2009 13:17:09 +0800 Subject: drm/mode: add the GTF algorithm in kernel space Add the GTF algorithm in kernel space. And this function can be called to generate the required modeline. I copied it from the file of xserver/hw/xfree86/modes/xf86gtf.c. What I have done is to translate it by using integer calculation. This is to avoid the float-point calculation in kernel space. At the same tie I also refer to the function of fb_get_mode in drivers/video/fbmon.c Signed-off-by: Zhao Yakui Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_modes.c | 197 ++++++++++++++++++++++++++++++++++++++++++++ include/drm/drm_crtc.h | 3 + 2 files changed, 200 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 0dbc7e4f8643..fd489d76fbbc 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -9,6 +9,7 @@ * Copyright © 2007-2008 Intel Corporation * Jesse Barnes * Copyright 2005-2006 Luc Verhaegen + * Copyright (c) 2001, Andy Ritger aritger@nvidia.com * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -280,6 +281,202 @@ struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay, } EXPORT_SYMBOL(drm_cvt_mode); +/** + * drm_gtf_mode - create the modeline based on GTF algorithm + * + * @dev :drm device + * @hdisplay :hdisplay size + * @vdisplay :vdisplay size + * @vrefresh :vrefresh rate. + * @interlaced :whether the interlace is supported + * @margins :whether the margin is supported + * + * LOCKING. + * none. + * + * return the modeline based on GTF algorithm + * + * This function is to create the modeline based on the GTF algorithm. + * Generalized Timing Formula is derived from: + * GTF Spreadsheet by Andy Morrish (1/5/97) + * available at http://www.vesa.org + * + * And it is copied from the file of xserver/hw/xfree86/modes/xf86gtf.c. + * What I have done is to translate it by using integer calculation. + * I also refer to the function of fb_get_mode in the file of + * drivers/video/fbmon.c + */ +struct drm_display_mode *drm_gtf_mode(struct drm_device *dev, int hdisplay, + int vdisplay, int vrefresh, + bool interlaced, int margins) +{ + /* 1) top/bottom margin size (% of height) - default: 1.8, */ +#define GTF_MARGIN_PERCENTAGE 18 + /* 2) character cell horizontal granularity (pixels) - default 8 */ +#define GTF_CELL_GRAN 8 + /* 3) Minimum vertical porch (lines) - default 3 */ +#define GTF_MIN_V_PORCH 1 + /* width of vsync in lines */ +#define V_SYNC_RQD 3 + /* width of hsync as % of total line */ +#define H_SYNC_PERCENT 8 + /* min time of vsync + back porch (microsec) */ +#define MIN_VSYNC_PLUS_BP 550 + /* blanking formula gradient */ +#define GTF_M 600 + /* blanking formula offset */ +#define GTF_C 40 + /* blanking formula scaling factor */ +#define GTF_K 128 + /* blanking formula scaling factor */ +#define GTF_J 20 + /* C' and M' are part of the Blanking Duty Cycle computation */ +#define GTF_C_PRIME (((GTF_C - GTF_J) * GTF_K / 256) + GTF_J) +#define GTF_M_PRIME (GTF_K * GTF_M / 256) + struct drm_display_mode *drm_mode; + unsigned int hdisplay_rnd, vdisplay_rnd, vfieldrate_rqd; + int top_margin, bottom_margin; + int interlace; + unsigned int hfreq_est; + int vsync_plus_bp, vback_porch; + unsigned int vtotal_lines, vfieldrate_est, hperiod; + unsigned int vfield_rate, vframe_rate; + int left_margin, right_margin; + unsigned int total_active_pixels, ideal_duty_cycle; + unsigned int hblank, total_pixels, pixel_freq; + int hsync, hfront_porch, vodd_front_porch_lines; + unsigned int tmp1, tmp2; + + drm_mode = drm_mode_create(dev); + if (!drm_mode) + return NULL; + + /* 1. In order to give correct results, the number of horizontal + * pixels requested is first processed to ensure that it is divisible + * by the character size, by rounding it to the nearest character + * cell boundary: + */ + hdisplay_rnd = (hdisplay + GTF_CELL_GRAN / 2) / GTF_CELL_GRAN; + hdisplay_rnd = hdisplay_rnd * GTF_CELL_GRAN; + + /* 2. If interlace is requested, the number of vertical lines assumed + * by the calculation must be halved, as the computation calculates + * the number of vertical lines per field. + */ + if (interlaced) + vdisplay_rnd = vdisplay / 2; + else + vdisplay_rnd = vdisplay; + + /* 3. Find the frame rate required: */ + if (interlaced) + vfieldrate_rqd = vrefresh * 2; + else + vfieldrate_rqd = vrefresh; + + /* 4. Find number of lines in Top margin: */ + top_margin = 0; + if (margins) + top_margin = (vdisplay_rnd * GTF_MARGIN_PERCENTAGE + 500) / + 1000; + /* 5. Find number of lines in bottom margin: */ + bottom_margin = top_margin; + + /* 6. If interlace is required, then set variable interlace: */ + if (interlaced) + interlace = 1; + else + interlace = 0; + + /* 7. Estimate the Horizontal frequency */ + { + tmp1 = (1000000 - MIN_VSYNC_PLUS_BP * vfieldrate_rqd) / 500; + tmp2 = (vdisplay_rnd + 2 * top_margin + GTF_MIN_V_PORCH) * + 2 + interlace; + hfreq_est = (tmp2 * 1000 * vfieldrate_rqd) / tmp1; + } + + /* 8. Find the number of lines in V sync + back porch */ + /* [V SYNC+BP] = RINT(([MIN VSYNC+BP] * hfreq_est / 1000000)) */ + vsync_plus_bp = MIN_VSYNC_PLUS_BP * hfreq_est / 1000; + vsync_plus_bp = (vsync_plus_bp + 500) / 1000; + /* 9. Find the number of lines in V back porch alone: */ + vback_porch = vsync_plus_bp - V_SYNC_RQD; + /* 10. Find the total number of lines in Vertical field period: */ + vtotal_lines = vdisplay_rnd + top_margin + bottom_margin + + vsync_plus_bp + GTF_MIN_V_PORCH; + /* 11. Estimate the Vertical field frequency: */ + vfieldrate_est = hfreq_est / vtotal_lines; + /* 12. Find the actual horizontal period: */ + hperiod = 1000000 / (vfieldrate_rqd * vtotal_lines); + + /* 13. Find the actual Vertical field frequency: */ + vfield_rate = hfreq_est / vtotal_lines; + /* 14. Find the Vertical frame frequency: */ + if (interlaced) + vframe_rate = vfield_rate / 2; + else + vframe_rate = vfield_rate; + /* 15. Find number of pixels in left margin: */ + if (margins) + left_margin = (hdisplay_rnd * GTF_MARGIN_PERCENTAGE + 500) / + 1000; + else + left_margin = 0; + + /* 16.Find number of pixels in right margin: */ + right_margin = left_margin; + /* 17.Find total number of active pixels in image and left and right */ + total_active_pixels = hdisplay_rnd + left_margin + right_margin; + /* 18.Find the ideal blanking duty cycle from blanking duty cycle */ + ideal_duty_cycle = GTF_C_PRIME * 1000 - + (GTF_M_PRIME * 1000000 / hfreq_est); + /* 19.Find the number of pixels in the blanking time to the nearest + * double character cell: */ + hblank = total_active_pixels * ideal_duty_cycle / + (100000 - ideal_duty_cycle); + hblank = (hblank + GTF_CELL_GRAN) / (2 * GTF_CELL_GRAN); + hblank = hblank * 2 * GTF_CELL_GRAN; + /* 20.Find total number of pixels: */ + total_pixels = total_active_pixels + hblank; + /* 21.Find pixel clock frequency: */ + pixel_freq = total_pixels * hfreq_est / 1000; + /* Stage 1 computations are now complete; I should really pass + * the results to another function and do the Stage 2 computations, + * but I only need a few more values so I'll just append the + * computations here for now */ + /* 17. Find the number of pixels in the horizontal sync period: */ + hsync = H_SYNC_PERCENT * total_pixels / 100; + hsync = (hsync + GTF_CELL_GRAN / 2) / GTF_CELL_GRAN; + hsync = hsync * GTF_CELL_GRAN; + /* 18. Find the number of pixels in horizontal front porch period */ + hfront_porch = hblank / 2 - hsync; + /* 36. Find the number of lines in the odd front porch period: */ + vodd_front_porch_lines = GTF_MIN_V_PORCH ; + + /* finally, pack the results in the mode struct */ + drm_mode->hdisplay = hdisplay_rnd; + drm_mode->hsync_start = hdisplay_rnd + hfront_porch; + drm_mode->hsync_end = drm_mode->hsync_start + hsync; + drm_mode->htotal = total_pixels; + drm_mode->vdisplay = vdisplay_rnd; + drm_mode->vsync_start = vdisplay_rnd + vodd_front_porch_lines; + drm_mode->vsync_end = drm_mode->vsync_start + V_SYNC_RQD; + drm_mode->vtotal = vtotal_lines; + + drm_mode->clock = pixel_freq; + + drm_mode_set_name(drm_mode); + drm_mode->flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC; + + if (interlaced) { + drm_mode->vtotal *= 2; + drm_mode->flags |= DRM_MODE_FLAG_INTERLACE; + } + + return drm_mode; +} +EXPORT_SYMBOL(drm_gtf_mode); /** * drm_mode_set_name - set the name on a mode * @mode: name will be set in this mode diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 820bc0977e5e..125994d8ac0b 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -739,4 +739,7 @@ extern bool drm_detect_hdmi_monitor(struct edid *edid); extern struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay, int vdisplay, int vrefresh, bool reduced, bool interlaced); +extern struct drm_display_mode *drm_gtf_mode(struct drm_device *dev, + int hdisplay, int vdisplay, int vrefresh, + bool interlaced, int margins); #endif /* __DRM_CRTC_H__ */ -- cgit v1.2.3 From 3d39cecc4841e8d4c4abdb401d10180f5faaded0 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 8 Jul 2009 15:23:30 +0100 Subject: intel-iommu: Remove superfluous iova_alloc_lock from IOVA code We only ever obtain this lock immediately before the iova_rbtree_lock, and release it immediately after the iova_rbtree_lock. So ditch it and just use iova_rbtree_lock. [v2: Remove the lockdep bits this time too] Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 3 --- drivers/pci/iova.c | 16 ++++------------ include/linux/iova.h | 1 - 3 files changed, 4 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index c5f7c73cbb55..d6a857397ec3 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1309,7 +1309,6 @@ static void iommu_detach_domain(struct dmar_domain *domain, } static struct iova_domain reserved_iova_list; -static struct lock_class_key reserved_alloc_key; static struct lock_class_key reserved_rbtree_key; static void dmar_init_reserved_ranges(void) @@ -1320,8 +1319,6 @@ static void dmar_init_reserved_ranges(void) init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); - lockdep_set_class(&reserved_iova_list.iova_alloc_lock, - &reserved_alloc_key); lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, &reserved_rbtree_key); diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 46dd440e2315..7914951ef29a 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c @@ -22,7 +22,6 @@ void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit) { - spin_lock_init(&iovad->iova_alloc_lock); spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; iovad->cached32_node = NULL; @@ -205,7 +204,6 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool size_aligned) { - unsigned long flags; struct iova *new_iova; int ret; @@ -219,11 +217,9 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, if (size_aligned) size = __roundup_pow_of_two(size); - spin_lock_irqsave(&iovad->iova_alloc_lock, flags); ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn, new_iova, size_aligned); - spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); if (ret) { free_iova_mem(new_iova); return NULL; @@ -381,8 +377,7 @@ reserve_iova(struct iova_domain *iovad, struct iova *iova; unsigned int overlap = 0; - spin_lock_irqsave(&iovad->iova_alloc_lock, flags); - spin_lock(&iovad->iova_rbtree_lock); + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { if (__is_range_overlap(node, pfn_lo, pfn_hi)) { iova = container_of(node, struct iova, node); @@ -402,8 +397,7 @@ reserve_iova(struct iova_domain *iovad, iova = __insert_new_range(iovad, pfn_lo, pfn_hi); finish: - spin_unlock(&iovad->iova_rbtree_lock); - spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return iova; } @@ -420,8 +414,7 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) unsigned long flags; struct rb_node *node; - spin_lock_irqsave(&from->iova_alloc_lock, flags); - spin_lock(&from->iova_rbtree_lock); + spin_lock_irqsave(&from->iova_rbtree_lock, flags); for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { struct iova *iova = container_of(node, struct iova, node); struct iova *new_iova; @@ -430,6 +423,5 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", iova->pfn_lo, iova->pfn_lo); } - spin_unlock(&from->iova_rbtree_lock); - spin_unlock_irqrestore(&from->iova_alloc_lock, flags); + spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); } diff --git a/include/linux/iova.h b/include/linux/iova.h index 228f6c94b69c..76a0759e88ec 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -28,7 +28,6 @@ struct iova { /* holds all the iova translations for a domain */ struct iova_domain { - spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */ spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ struct rb_node *cached32_node; /* Save last alloced node */ -- cgit v1.2.3 From a76761b621bcd8336065c4fe3a74f046858bc34c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 15 Jul 2009 23:35:14 +0900 Subject: percpu: add dummy pcpu_lpage_remapped() for !CONFIG_SMP !CONFIG_SMP was missing pcpu_lpage_remapped() definition causing build failure. Add dummy implementation. This was discovered by linux-next testing. Signed-off-by: Tejun Heo Cc: Randy Dunlap Cc: Kamalesh Babulal Cc: Stephen Rothwell --- include/linux/percpu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 8ce91af4aa19..e134c8229631 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -184,6 +184,11 @@ static inline void free_percpu(void *p) static inline void __init setup_per_cpu_areas(void) { } +static inline void *pcpu_lpage_remapped(void *kaddr) +{ + return NULL; +} + #endif /* CONFIG_SMP */ #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ -- cgit v1.2.3 From 719a72b7c75bb239ca6184190ab994b71a31c6dc Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 17 Jul 2009 14:59:55 +0000 Subject: usb: r8a66597-hcd platform data on_chip support Convert the r8a66597-hcd driver to use the on_chip flag from platform data to enable on chip behaviour instead of relying on CONFIG_SUPERH_ON_CHIP_R8A66597 ugliness. This makes the code cleaner and also allows us to support both external and internal r8a66597 with the same kernel. It also makes the Kconfig part more future proof since we with this patch can add support for new processors with on-chip r8a66597 without modifying the Kconfig. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/boards/mach-se/7724/setup.c | 1 + arch/sh/kernel/cpu/sh4a/setup-sh7366.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7723.c | 2 +- drivers/usb/host/Kconfig | 7 -- drivers/usb/host/r8a66597-hcd.c | 187 +++++++++++++++++++-------------- drivers/usb/host/r8a66597.h | 76 ++++++-------- include/linux/usb/r8a66597.h | 3 + 7 files changed, 147 insertions(+), 131 deletions(-) (limited to 'include') diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index 8fed45a2fb85..4fb7e48e2843 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -304,6 +304,7 @@ static struct platform_device sh_eth_device = { }; static struct r8a66597_platdata sh7724_usb0_host_data = { + .on_chip = 1, }; static struct resource sh7724_usb0_host_resources[] = { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c index c18f7d09281b..f6d208813564 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c @@ -40,7 +40,7 @@ static struct platform_device iic_device = { }; static struct r8a66597_platdata r8a66597_data = { - /* This set zero to all members */ + .on_chip = 1, }; static struct resource usb_host_resources[] = { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c index e1bb80b2a27b..28516499a2c4 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c @@ -398,7 +398,7 @@ static struct platform_device rtc_device = { }; static struct r8a66597_platdata r8a66597_data = { - /* This set zero to all members */ + .on_chip = 1, }; static struct resource sh7723_usb_host_resources[] = { diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index 1a920c70b5a1..f21ca7d27a43 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -336,13 +336,6 @@ config USB_R8A66597_HCD To compile this driver as a module, choose M here: the module will be called r8a66597-hcd. -config SUPERH_ON_CHIP_R8A66597 - boolean "Enable SuperH on-chip R8A66597 USB" - depends on USB_R8A66597_HCD && (CPU_SUBTYPE_SH7366 || CPU_SUBTYPE_SH7723 || CPU_SUBTYPE_SH7724) - help - This driver enables support for the on-chip R8A66597 in the - SH7366, SH7723 and SH7724 processors. - config USB_WHCI_HCD tristate "Wireless USB Host Controller Interface (WHCI) driver (EXPERIMENTAL)" depends on EXPERIMENTAL diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index 09895a97c10b..82dce3e0d4d7 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -91,43 +91,43 @@ static int r8a66597_clock_enable(struct r8a66597 *r8a66597) u16 tmp; int i = 0; -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) -#if defined(CONFIG_HAVE_CLK) - clk_enable(r8a66597->clk); + if (r8a66597->pdata->on_chip) { +#ifdef CONFIG_HAVE_CLK + clk_enable(r8a66597->clk); #endif - do { - r8a66597_write(r8a66597, SCKE, SYSCFG0); - tmp = r8a66597_read(r8a66597, SYSCFG0); - if (i++ > 1000) { - printk(KERN_ERR "r8a66597: register access fail.\n"); - return -ENXIO; - } - } while ((tmp & SCKE) != SCKE); - r8a66597_write(r8a66597, 0x04, 0x02); -#else - do { - r8a66597_write(r8a66597, USBE, SYSCFG0); - tmp = r8a66597_read(r8a66597, SYSCFG0); - if (i++ > 1000) { - printk(KERN_ERR "r8a66597: register access fail.\n"); - return -ENXIO; - } - } while ((tmp & USBE) != USBE); - r8a66597_bclr(r8a66597, USBE, SYSCFG0); - r8a66597_mdfy(r8a66597, get_xtal_from_pdata(r8a66597->pdata), XTAL, - SYSCFG0); + do { + r8a66597_write(r8a66597, SCKE, SYSCFG0); + tmp = r8a66597_read(r8a66597, SYSCFG0); + if (i++ > 1000) { + printk(KERN_ERR "r8a66597: reg access fail.\n"); + return -ENXIO; + } + } while ((tmp & SCKE) != SCKE); + r8a66597_write(r8a66597, 0x04, 0x02); + } else { + do { + r8a66597_write(r8a66597, USBE, SYSCFG0); + tmp = r8a66597_read(r8a66597, SYSCFG0); + if (i++ > 1000) { + printk(KERN_ERR "r8a66597: reg access fail.\n"); + return -ENXIO; + } + } while ((tmp & USBE) != USBE); + r8a66597_bclr(r8a66597, USBE, SYSCFG0); + r8a66597_mdfy(r8a66597, get_xtal_from_pdata(r8a66597->pdata), + XTAL, SYSCFG0); - i = 0; - r8a66597_bset(r8a66597, XCKE, SYSCFG0); - do { - msleep(1); - tmp = r8a66597_read(r8a66597, SYSCFG0); - if (i++ > 500) { - printk(KERN_ERR "r8a66597: register access fail.\n"); - return -ENXIO; - } - } while ((tmp & SCKE) != SCKE); -#endif /* #if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) */ + i = 0; + r8a66597_bset(r8a66597, XCKE, SYSCFG0); + do { + msleep(1); + tmp = r8a66597_read(r8a66597, SYSCFG0); + if (i++ > 500) { + printk(KERN_ERR "r8a66597: reg access fail.\n"); + return -ENXIO; + } + } while ((tmp & SCKE) != SCKE); + } return 0; } @@ -136,15 +136,16 @@ static void r8a66597_clock_disable(struct r8a66597 *r8a66597) { r8a66597_bclr(r8a66597, SCKE, SYSCFG0); udelay(1); -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) -#if defined(CONFIG_HAVE_CLK) - clk_disable(r8a66597->clk); -#endif -#else - r8a66597_bclr(r8a66597, PLLC, SYSCFG0); - r8a66597_bclr(r8a66597, XCKE, SYSCFG0); - r8a66597_bclr(r8a66597, USBE, SYSCFG0); + + if (r8a66597->pdata->on_chip) { +#ifdef CONFIG_HAVE_CLK + clk_disable(r8a66597->clk); #endif + } else { + r8a66597_bclr(r8a66597, PLLC, SYSCFG0); + r8a66597_bclr(r8a66597, XCKE, SYSCFG0); + r8a66597_bclr(r8a66597, USBE, SYSCFG0); + } } static void r8a66597_enable_port(struct r8a66597 *r8a66597, int port) @@ -205,7 +206,7 @@ static int enable_controller(struct r8a66597 *r8a66597) r8a66597_bset(r8a66597, SIGNE | SACKE, INTENB1); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) + for (port = 0; port < r8a66597->max_root_hub; port++) r8a66597_enable_port(r8a66597, port); return 0; @@ -218,7 +219,7 @@ static void disable_controller(struct r8a66597 *r8a66597) r8a66597_write(r8a66597, 0, INTENB0); r8a66597_write(r8a66597, 0, INTSTS0); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) + for (port = 0; port < r8a66597->max_root_hub; port++) r8a66597_disable_port(r8a66597, port); r8a66597_clock_disable(r8a66597); @@ -249,11 +250,12 @@ static int is_hub_limit(char *devpath) return ((strlen(devpath) >= 4) ? 1 : 0); } -static void get_port_number(char *devpath, u16 *root_port, u16 *hub_port) +static void get_port_number(struct r8a66597 *r8a66597, + char *devpath, u16 *root_port, u16 *hub_port) { if (root_port) { *root_port = (devpath[0] & 0x0F) - 1; - if (*root_port >= R8A66597_MAX_ROOT_HUB) + if (*root_port >= r8a66597->max_root_hub) printk(KERN_ERR "r8a66597: Illegal root port number.\n"); } if (hub_port) @@ -355,7 +357,8 @@ static int make_r8a66597_device(struct r8a66597 *r8a66597, INIT_LIST_HEAD(&dev->device_list); list_add_tail(&dev->device_list, &r8a66597->child_device); - get_port_number(urb->dev->devpath, &dev->root_port, &dev->hub_port); + get_port_number(r8a66597, urb->dev->devpath, + &dev->root_port, &dev->hub_port); if (!is_child_device(urb->dev->devpath)) r8a66597->root_hub[dev->root_port].dev = dev; @@ -420,7 +423,7 @@ static void free_usb_address(struct r8a66597 *r8a66597, list_del(&dev->device_list); kfree(dev); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) { + for (port = 0; port < r8a66597->max_root_hub; port++) { if (r8a66597->root_hub[port].dev == dev) { r8a66597->root_hub[port].dev = NULL; break; @@ -495,10 +498,20 @@ static void r8a66597_pipe_toggle(struct r8a66597 *r8a66597, r8a66597_bset(r8a66597, SQCLR, pipe->pipectr); } +static inline unsigned short mbw_value(struct r8a66597 *r8a66597) +{ + if (r8a66597->pdata->on_chip) + return MBW_32; + else + return MBW_16; +} + /* this function must be called with interrupt disabled */ static inline void cfifo_change(struct r8a66597 *r8a66597, u16 pipenum) { - r8a66597_mdfy(r8a66597, MBW | pipenum, MBW | CURPIPE, CFIFOSEL); + unsigned short mbw = mbw_value(r8a66597); + + r8a66597_mdfy(r8a66597, mbw | pipenum, mbw | CURPIPE, CFIFOSEL); r8a66597_reg_wait(r8a66597, CFIFOSEL, CURPIPE, pipenum); } @@ -506,11 +519,13 @@ static inline void cfifo_change(struct r8a66597 *r8a66597, u16 pipenum) static inline void fifo_change_from_pipe(struct r8a66597 *r8a66597, struct r8a66597_pipe *pipe) { + unsigned short mbw = mbw_value(r8a66597); + cfifo_change(r8a66597, 0); - r8a66597_mdfy(r8a66597, MBW | 0, MBW | CURPIPE, D0FIFOSEL); - r8a66597_mdfy(r8a66597, MBW | 0, MBW | CURPIPE, D1FIFOSEL); + r8a66597_mdfy(r8a66597, mbw | 0, mbw | CURPIPE, D0FIFOSEL); + r8a66597_mdfy(r8a66597, mbw | 0, mbw | CURPIPE, D1FIFOSEL); - r8a66597_mdfy(r8a66597, MBW | pipe->info.pipenum, MBW | CURPIPE, + r8a66597_mdfy(r8a66597, mbw | pipe->info.pipenum, mbw | CURPIPE, pipe->fifosel); r8a66597_reg_wait(r8a66597, pipe->fifosel, CURPIPE, pipe->info.pipenum); } @@ -742,9 +757,13 @@ static void enable_r8a66597_pipe_dma(struct r8a66597 *r8a66597, struct r8a66597_pipe *pipe, struct urb *urb) { -#if !defined(CONFIG_SUPERH_ON_CHIP_R8A66597) int i; struct r8a66597_pipe_info *info = &pipe->info; + unsigned short mbw = mbw_value(r8a66597); + + /* pipe dma is only for external controlles */ + if (r8a66597->pdata->on_chip) + return; if ((pipe->info.pipenum != 0) && (info->type != R8A66597_INT)) { for (i = 0; i < R8A66597_MAX_DMA_CHANNEL; i++) { @@ -763,8 +782,8 @@ static void enable_r8a66597_pipe_dma(struct r8a66597 *r8a66597, set_pipe_reg_addr(pipe, i); cfifo_change(r8a66597, 0); - r8a66597_mdfy(r8a66597, MBW | pipe->info.pipenum, - MBW | CURPIPE, pipe->fifosel); + r8a66597_mdfy(r8a66597, mbw | pipe->info.pipenum, + mbw | CURPIPE, pipe->fifosel); r8a66597_reg_wait(r8a66597, pipe->fifosel, CURPIPE, pipe->info.pipenum); @@ -772,7 +791,6 @@ static void enable_r8a66597_pipe_dma(struct r8a66597 *r8a66597, break; } } -#endif /* #if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) */ } /* this function must be called with interrupt disabled */ @@ -1769,7 +1787,7 @@ static void r8a66597_timer(unsigned long _r8a66597) spin_lock_irqsave(&r8a66597->lock, flags); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) + for (port = 0; port < r8a66597->max_root_hub; port++) r8a66597_root_hub_control(r8a66597, port); spin_unlock_irqrestore(&r8a66597->lock, flags); @@ -1807,7 +1825,7 @@ static void set_address_zero(struct r8a66597 *r8a66597, struct urb *urb) u16 root_port, hub_port; if (usb_address == 0) { - get_port_number(urb->dev->devpath, + get_port_number(r8a66597, urb->dev->devpath, &root_port, &hub_port); set_devadd_reg(r8a66597, 0, get_r8a66597_usb_speed(urb->dev->speed), @@ -2082,7 +2100,7 @@ static int r8a66597_hub_status_data(struct usb_hcd *hcd, char *buf) *buf = 0; /* initialize (no change) */ - for (i = 0; i < R8A66597_MAX_ROOT_HUB; i++) { + for (i = 0; i < r8a66597->max_root_hub; i++) { if (r8a66597->root_hub[i].port & 0xffff0000) *buf |= 1 << (i + 1); } @@ -2097,11 +2115,11 @@ static void r8a66597_hub_descriptor(struct r8a66597 *r8a66597, { desc->bDescriptorType = 0x29; desc->bHubContrCurrent = 0; - desc->bNbrPorts = R8A66597_MAX_ROOT_HUB; + desc->bNbrPorts = r8a66597->max_root_hub; desc->bDescLength = 9; desc->bPwrOn2PwrGood = 0; desc->wHubCharacteristics = cpu_to_le16(0x0011); - desc->bitmap[0] = ((1 << R8A66597_MAX_ROOT_HUB) - 1) << 1; + desc->bitmap[0] = ((1 << r8a66597->max_root_hub) - 1) << 1; desc->bitmap[1] = ~0; } @@ -2129,7 +2147,7 @@ static int r8a66597_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, } break; case ClearPortFeature: - if (wIndex > R8A66597_MAX_ROOT_HUB) + if (wIndex > r8a66597->max_root_hub) goto error; if (wLength != 0) goto error; @@ -2162,12 +2180,12 @@ static int r8a66597_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, *buf = 0x00; break; case GetPortStatus: - if (wIndex > R8A66597_MAX_ROOT_HUB) + if (wIndex > r8a66597->max_root_hub) goto error; *(__le32 *)buf = cpu_to_le32(rh->port); break; case SetPortFeature: - if (wIndex > R8A66597_MAX_ROOT_HUB) + if (wIndex > r8a66597->max_root_hub) goto error; if (wLength != 0) goto error; @@ -2216,7 +2234,7 @@ static int r8a66597_bus_suspend(struct usb_hcd *hcd) dbg("%s", __func__); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) { + for (port = 0; port < r8a66597->max_root_hub; port++) { struct r8a66597_root_hub *rh = &r8a66597->root_hub[port]; unsigned long dvstctr_reg = get_dvstctr_reg(port); @@ -2247,7 +2265,7 @@ static int r8a66597_bus_resume(struct usb_hcd *hcd) dbg("%s", __func__); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) { + for (port = 0; port < r8a66597->max_root_hub; port++) { struct r8a66597_root_hub *rh = &r8a66597->root_hub[port]; unsigned long dvstctr_reg = get_dvstctr_reg(port); @@ -2314,7 +2332,7 @@ static int r8a66597_suspend(struct device *dev) disable_controller(r8a66597); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) { + for (port = 0; port < r8a66597->max_root_hub; port++) { struct r8a66597_root_hub *rh = &r8a66597->root_hub[port]; rh->port = 0x00000000; @@ -2354,8 +2372,9 @@ static int __init_or_module r8a66597_remove(struct platform_device *pdev) del_timer_sync(&r8a66597->rh_timer); usb_remove_hcd(hcd); iounmap((void *)r8a66597->reg); -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK) - clk_put(r8a66597->clk); +#ifdef CONFIG_HAVE_CLK + if (r8a66597->pdata->on_chip) + clk_put(r8a66597->clk); #endif usb_put_hcd(hcd); return 0; @@ -2363,7 +2382,7 @@ static int __init_or_module r8a66597_remove(struct platform_device *pdev) static int __devinit r8a66597_probe(struct platform_device *pdev) { -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK) +#ifdef CONFIG_HAVE_CLK char clk_name[8]; #endif struct resource *res = NULL, *ires; @@ -2425,15 +2444,20 @@ static int __devinit r8a66597_probe(struct platform_device *pdev) r8a66597->pdata = pdev->dev.platform_data; r8a66597->irq_sense_low = irq_trigger == IRQF_TRIGGER_LOW; -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK) - snprintf(clk_name, sizeof(clk_name), "usb%d", pdev->id); - r8a66597->clk = clk_get(&pdev->dev, clk_name); - if (IS_ERR(r8a66597->clk)) { - dev_err(&pdev->dev, "cannot get clock \"%s\"\n", clk_name); - ret = PTR_ERR(r8a66597->clk); - goto clean_up2; - } + if (r8a66597->pdata->on_chip) { +#ifdef CONFIG_HAVE_CLK + snprintf(clk_name, sizeof(clk_name), "usb%d", pdev->id); + r8a66597->clk = clk_get(&pdev->dev, clk_name); + if (IS_ERR(r8a66597->clk)) { + dev_err(&pdev->dev, "cannot get clock \"%s\"\n", + clk_name); + ret = PTR_ERR(r8a66597->clk); + goto clean_up2; + } #endif + r8a66597->max_root_hub = 1; + } else + r8a66597->max_root_hub = 2; spin_lock_init(&r8a66597->lock); init_timer(&r8a66597->rh_timer); @@ -2463,8 +2487,9 @@ static int __devinit r8a66597_probe(struct platform_device *pdev) return 0; clean_up3: -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK) - clk_put(r8a66597->clk); +#ifdef CONFIG_HAVE_CLK + if (r8a66597->pdata->on_chip) + clk_put(r8a66597->clk); clean_up2: #endif usb_put_hcd(hcd); diff --git a/drivers/usb/host/r8a66597.h b/drivers/usb/host/r8a66597.h index d72680b433f9..eecbd917bc81 100644 --- a/drivers/usb/host/r8a66597.h +++ b/drivers/usb/host/r8a66597.h @@ -26,7 +26,7 @@ #ifndef __R8A66597_H__ #define __R8A66597_H__ -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK) +#ifdef CONFIG_HAVE_CLK #include #endif @@ -193,13 +193,9 @@ #define REW 0x4000 /* b14: Buffer rewind */ #define DCLRM 0x2000 /* b13: DMA buffer clear mode */ #define DREQE 0x1000 /* b12: DREQ output enable */ -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) -#define MBW 0x0800 -#else -#define MBW 0x0400 /* b10: Maximum bit width for FIFO access */ -#endif #define MBW_8 0x0000 /* 8bit */ #define MBW_16 0x0400 /* 16bit */ +#define MBW_32 0x0800 /* 32bit */ #define BIGEND 0x0100 /* b8: Big endian mode */ #define BYTE_LITTLE 0x0000 /* little dendian */ #define BYTE_BIG 0x0100 /* big endifan */ @@ -405,11 +401,7 @@ #define R8A66597_MAX_NUM_PIPE 10 #define R8A66597_BUF_BSIZE 8 #define R8A66597_MAX_DEVICE 10 -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) -#define R8A66597_MAX_ROOT_HUB 1 -#else #define R8A66597_MAX_ROOT_HUB 2 -#endif #define R8A66597_MAX_SAMPLING 5 #define R8A66597_RH_POLL_TIME 10 #define R8A66597_MAX_DMA_CHANNEL 2 @@ -487,7 +479,7 @@ struct r8a66597_root_hub { struct r8a66597 { spinlock_t lock; unsigned long reg; -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK) +#ifdef CONFIG_HAVE_CLK struct clk *clk; #endif struct r8a66597_platdata *pdata; @@ -504,6 +496,7 @@ struct r8a66597 { unsigned short interval_map; unsigned char pipe_cnt[R8A66597_MAX_NUM_PIPE]; unsigned char dma_map; + unsigned int max_root_hub; struct list_head child_device; unsigned long child_connect_map[4]; @@ -550,21 +543,22 @@ static inline void r8a66597_read_fifo(struct r8a66597 *r8a66597, unsigned long offset, u16 *buf, int len) { -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) unsigned long fifoaddr = r8a66597->reg + offset; unsigned long count; - count = len / 4; - insl(fifoaddr, buf, count); + if (r8a66597->pdata->on_chip) { + count = len / 4; + insl(fifoaddr, buf, count); - if (len & 0x00000003) { - unsigned long tmp = inl(fifoaddr); - memcpy((unsigned char *)buf + count * 4, &tmp, len & 0x03); + if (len & 0x00000003) { + unsigned long tmp = inl(fifoaddr); + memcpy((unsigned char *)buf + count * 4, &tmp, + len & 0x03); + } + } else { + len = (len + 1) / 2; + insw(fifoaddr, buf, len); } -#else - len = (len + 1) / 2; - insw(r8a66597->reg + offset, buf, len); -#endif } static inline void r8a66597_write(struct r8a66597 *r8a66597, u16 val, @@ -578,33 +572,33 @@ static inline void r8a66597_write_fifo(struct r8a66597 *r8a66597, int len) { unsigned long fifoaddr = r8a66597->reg + offset; -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) unsigned long count; unsigned char *pb; int i; - count = len / 4; - outsl(fifoaddr, buf, count); + if (r8a66597->pdata->on_chip) { + count = len / 4; + outsl(fifoaddr, buf, count); + + if (len & 0x00000003) { + pb = (unsigned char *)buf + count * 4; + for (i = 0; i < (len & 0x00000003); i++) { + if (r8a66597_read(r8a66597, CFIFOSEL) & BIGEND) + outb(pb[i], fifoaddr + i); + else + outb(pb[i], fifoaddr + 3 - i); + } + } + } else { + int odd = len & 0x0001; - if (len & 0x00000003) { - pb = (unsigned char *)buf + count * 4; - for (i = 0; i < (len & 0x00000003); i++) { - if (r8a66597_read(r8a66597, CFIFOSEL) & BIGEND) - outb(pb[i], fifoaddr + i); - else - outb(pb[i], fifoaddr + 3 - i); + len = len / 2; + outsw(fifoaddr, buf, len); + if (unlikely(odd)) { + buf = &buf[len]; + outb((unsigned char)*buf, fifoaddr); } } -#else - int odd = len & 0x0001; - - len = len / 2; - outsw(fifoaddr, buf, len); - if (unlikely(odd)) { - buf = &buf[len]; - outb((unsigned char)*buf, fifoaddr); - } -#endif } static inline void r8a66597_mdfy(struct r8a66597 *r8a66597, diff --git a/include/linux/usb/r8a66597.h b/include/linux/usb/r8a66597.h index e9f0384fa20c..460ee3f6a2c6 100644 --- a/include/linux/usb/r8a66597.h +++ b/include/linux/usb/r8a66597.h @@ -31,6 +31,9 @@ struct r8a66597_platdata { /* This ops can controll port power instead of DVSTCTR register. */ void (*port_power)(int port, int power); + /* set one = on chip controller, set zero = external controller */ + unsigned on_chip:1; + /* (external controller only) set R8A66597_PLATDATA_XTAL_nnMHZ */ unsigned xtal:2; -- cgit v1.2.3 From fbd90375d7531927d312766b548376d909811b4d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Jul 2009 13:40:14 +0200 Subject: hrtimer: Remove cb_entry from struct hrtimer It's unused, remove it. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner LKML-Reference: --- include/linux/hrtimer.h | 2 -- kernel/hrtimer.c | 1 - 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 54648e625efd..40e7d54fc424 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -91,7 +91,6 @@ enum hrtimer_restart { * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) - * @cb_entry: list head to enqueue an expired timer into the callback list * @start_site: timer statistics field to store the site where the timer * was started * @start_comm: timer statistics field to store the name of the process which @@ -108,7 +107,6 @@ struct hrtimer { enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; unsigned long state; - struct list_head cb_entry; #ifdef CONFIG_TIMER_STATS int start_pid; void *start_site; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 43d151f185b6..052a0f53e4eb 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1092,7 +1092,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, clock_id = CLOCK_MONOTONIC; timer->base = &cpu_base->clock_base[clock_id]; - INIT_LIST_HEAD(&timer->cb_entry); hrtimer_init_timer_hres(timer); #ifdef CONFIG_TIMER_STATS -- cgit v1.2.3 From cf4f1e76c49dacfde0680b170b9a9b6a42f296bb Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 22 Jul 2009 14:32:03 +0000 Subject: usb: move r8a66597 register defines Move r8a66597 hardware register definitions from the host controller header file to the platform data header file. With this change in place we can easily share register definitions between the host controller driver and a future gadget driver. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/usb/host/r8a66597.h | 366 ------------------------------------------ include/linux/usb/r8a66597.h | 372 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 370 insertions(+), 368 deletions(-) (limited to 'include') diff --git a/drivers/usb/host/r8a66597.h b/drivers/usb/host/r8a66597.h index eecbd917bc81..228e3fb23854 100644 --- a/drivers/usb/host/r8a66597.h +++ b/drivers/usb/host/r8a66597.h @@ -32,372 +32,6 @@ #include -#define SYSCFG0 0x00 -#define SYSCFG1 0x02 -#define SYSSTS0 0x04 -#define SYSSTS1 0x06 -#define DVSTCTR0 0x08 -#define DVSTCTR1 0x0A -#define TESTMODE 0x0C -#define PINCFG 0x0E -#define DMA0CFG 0x10 -#define DMA1CFG 0x12 -#define CFIFO 0x14 -#define D0FIFO 0x18 -#define D1FIFO 0x1C -#define CFIFOSEL 0x20 -#define CFIFOCTR 0x22 -#define CFIFOSIE 0x24 -#define D0FIFOSEL 0x28 -#define D0FIFOCTR 0x2A -#define D1FIFOSEL 0x2C -#define D1FIFOCTR 0x2E -#define INTENB0 0x30 -#define INTENB1 0x32 -#define INTENB2 0x34 -#define BRDYENB 0x36 -#define NRDYENB 0x38 -#define BEMPENB 0x3A -#define SOFCFG 0x3C -#define INTSTS0 0x40 -#define INTSTS1 0x42 -#define INTSTS2 0x44 -#define BRDYSTS 0x46 -#define NRDYSTS 0x48 -#define BEMPSTS 0x4A -#define FRMNUM 0x4C -#define UFRMNUM 0x4E -#define USBADDR 0x50 -#define USBREQ 0x54 -#define USBVAL 0x56 -#define USBINDX 0x58 -#define USBLENG 0x5A -#define DCPCFG 0x5C -#define DCPMAXP 0x5E -#define DCPCTR 0x60 -#define PIPESEL 0x64 -#define PIPECFG 0x68 -#define PIPEBUF 0x6A -#define PIPEMAXP 0x6C -#define PIPEPERI 0x6E -#define PIPE1CTR 0x70 -#define PIPE2CTR 0x72 -#define PIPE3CTR 0x74 -#define PIPE4CTR 0x76 -#define PIPE5CTR 0x78 -#define PIPE6CTR 0x7A -#define PIPE7CTR 0x7C -#define PIPE8CTR 0x7E -#define PIPE9CTR 0x80 -#define PIPE1TRE 0x90 -#define PIPE1TRN 0x92 -#define PIPE2TRE 0x94 -#define PIPE2TRN 0x96 -#define PIPE3TRE 0x98 -#define PIPE3TRN 0x9A -#define PIPE4TRE 0x9C -#define PIPE4TRN 0x9E -#define PIPE5TRE 0xA0 -#define PIPE5TRN 0xA2 -#define DEVADD0 0xD0 -#define DEVADD1 0xD2 -#define DEVADD2 0xD4 -#define DEVADD3 0xD6 -#define DEVADD4 0xD8 -#define DEVADD5 0xDA -#define DEVADD6 0xDC -#define DEVADD7 0xDE -#define DEVADD8 0xE0 -#define DEVADD9 0xE2 -#define DEVADDA 0xE4 - -/* System Configuration Control Register */ -#define XTAL 0xC000 /* b15-14: Crystal selection */ -#define XTAL48 0x8000 /* 48MHz */ -#define XTAL24 0x4000 /* 24MHz */ -#define XTAL12 0x0000 /* 12MHz */ -#define XCKE 0x2000 /* b13: External clock enable */ -#define PLLC 0x0800 /* b11: PLL control */ -#define SCKE 0x0400 /* b10: USB clock enable */ -#define PCSDIS 0x0200 /* b9: not CS wakeup */ -#define LPSME 0x0100 /* b8: Low power sleep mode */ -#define HSE 0x0080 /* b7: Hi-speed enable */ -#define DCFM 0x0040 /* b6: Controller function select */ -#define DRPD 0x0020 /* b5: D+/- pull down control */ -#define DPRPU 0x0010 /* b4: D+ pull up control */ -#define USBE 0x0001 /* b0: USB module operation enable */ - -/* System Configuration Status Register */ -#define OVCBIT 0x8000 /* b15-14: Over-current bit */ -#define OVCMON 0xC000 /* b15-14: Over-current monitor */ -#define SOFEA 0x0020 /* b5: SOF monitor */ -#define IDMON 0x0004 /* b3: ID-pin monitor */ -#define LNST 0x0003 /* b1-0: D+, D- line status */ -#define SE1 0x0003 /* SE1 */ -#define FS_KSTS 0x0002 /* Full-Speed K State */ -#define FS_JSTS 0x0001 /* Full-Speed J State */ -#define LS_JSTS 0x0002 /* Low-Speed J State */ -#define LS_KSTS 0x0001 /* Low-Speed K State */ -#define SE0 0x0000 /* SE0 */ - -/* Device State Control Register */ -#define EXTLP0 0x0400 /* b10: External port */ -#define VBOUT 0x0200 /* b9: VBUS output */ -#define WKUP 0x0100 /* b8: Remote wakeup */ -#define RWUPE 0x0080 /* b7: Remote wakeup sense */ -#define USBRST 0x0040 /* b6: USB reset enable */ -#define RESUME 0x0020 /* b5: Resume enable */ -#define UACT 0x0010 /* b4: USB bus enable */ -#define RHST 0x0007 /* b1-0: Reset handshake status */ -#define HSPROC 0x0004 /* HS handshake is processing */ -#define HSMODE 0x0003 /* Hi-Speed mode */ -#define FSMODE 0x0002 /* Full-Speed mode */ -#define LSMODE 0x0001 /* Low-Speed mode */ -#define UNDECID 0x0000 /* Undecided */ - -/* Test Mode Register */ -#define UTST 0x000F /* b3-0: Test select */ -#define H_TST_PACKET 0x000C /* HOST TEST Packet */ -#define H_TST_SE0_NAK 0x000B /* HOST TEST SE0 NAK */ -#define H_TST_K 0x000A /* HOST TEST K */ -#define H_TST_J 0x0009 /* HOST TEST J */ -#define H_TST_NORMAL 0x0000 /* HOST Normal Mode */ -#define P_TST_PACKET 0x0004 /* PERI TEST Packet */ -#define P_TST_SE0_NAK 0x0003 /* PERI TEST SE0 NAK */ -#define P_TST_K 0x0002 /* PERI TEST K */ -#define P_TST_J 0x0001 /* PERI TEST J */ -#define P_TST_NORMAL 0x0000 /* PERI Normal Mode */ - -/* Data Pin Configuration Register */ -#define LDRV 0x8000 /* b15: Drive Current Adjust */ -#define VIF1 0x0000 /* VIF = 1.8V */ -#define VIF3 0x8000 /* VIF = 3.3V */ -#define INTA 0x0001 /* b1: USB INT-pin active */ - -/* DMAx Pin Configuration Register */ -#define DREQA 0x4000 /* b14: Dreq active select */ -#define BURST 0x2000 /* b13: Burst mode */ -#define DACKA 0x0400 /* b10: Dack active select */ -#define DFORM 0x0380 /* b9-7: DMA mode select */ -#define CPU_ADR_RD_WR 0x0000 /* Address + RD/WR mode (CPU bus) */ -#define CPU_DACK_RD_WR 0x0100 /* DACK + RD/WR mode (CPU bus) */ -#define CPU_DACK_ONLY 0x0180 /* DACK only mode (CPU bus) */ -#define SPLIT_DACK_ONLY 0x0200 /* DACK only mode (SPLIT bus) */ -#define DENDA 0x0040 /* b6: Dend active select */ -#define PKTM 0x0020 /* b5: Packet mode */ -#define DENDE 0x0010 /* b4: Dend enable */ -#define OBUS 0x0004 /* b2: OUTbus mode */ - -/* CFIFO/DxFIFO Port Select Register */ -#define RCNT 0x8000 /* b15: Read count mode */ -#define REW 0x4000 /* b14: Buffer rewind */ -#define DCLRM 0x2000 /* b13: DMA buffer clear mode */ -#define DREQE 0x1000 /* b12: DREQ output enable */ -#define MBW_8 0x0000 /* 8bit */ -#define MBW_16 0x0400 /* 16bit */ -#define MBW_32 0x0800 /* 32bit */ -#define BIGEND 0x0100 /* b8: Big endian mode */ -#define BYTE_LITTLE 0x0000 /* little dendian */ -#define BYTE_BIG 0x0100 /* big endifan */ -#define ISEL 0x0020 /* b5: DCP FIFO port direction select */ -#define CURPIPE 0x000F /* b2-0: PIPE select */ - -/* CFIFO/DxFIFO Port Control Register */ -#define BVAL 0x8000 /* b15: Buffer valid flag */ -#define BCLR 0x4000 /* b14: Buffer clear */ -#define FRDY 0x2000 /* b13: FIFO ready */ -#define DTLN 0x0FFF /* b11-0: FIFO received data length */ - -/* Interrupt Enable Register 0 */ -#define VBSE 0x8000 /* b15: VBUS interrupt */ -#define RSME 0x4000 /* b14: Resume interrupt */ -#define SOFE 0x2000 /* b13: Frame update interrupt */ -#define DVSE 0x1000 /* b12: Device state transition interrupt */ -#define CTRE 0x0800 /* b11: Control transfer stage transition interrupt */ -#define BEMPE 0x0400 /* b10: Buffer empty interrupt */ -#define NRDYE 0x0200 /* b9: Buffer not ready interrupt */ -#define BRDYE 0x0100 /* b8: Buffer ready interrupt */ - -/* Interrupt Enable Register 1 */ -#define OVRCRE 0x8000 /* b15: Over-current interrupt */ -#define BCHGE 0x4000 /* b14: USB us chenge interrupt */ -#define DTCHE 0x1000 /* b12: Detach sense interrupt */ -#define ATTCHE 0x0800 /* b11: Attach sense interrupt */ -#define EOFERRE 0x0040 /* b6: EOF error interrupt */ -#define SIGNE 0x0020 /* b5: SETUP IGNORE interrupt */ -#define SACKE 0x0010 /* b4: SETUP ACK interrupt */ - -/* BRDY Interrupt Enable/Status Register */ -#define BRDY9 0x0200 /* b9: PIPE9 */ -#define BRDY8 0x0100 /* b8: PIPE8 */ -#define BRDY7 0x0080 /* b7: PIPE7 */ -#define BRDY6 0x0040 /* b6: PIPE6 */ -#define BRDY5 0x0020 /* b5: PIPE5 */ -#define BRDY4 0x0010 /* b4: PIPE4 */ -#define BRDY3 0x0008 /* b3: PIPE3 */ -#define BRDY2 0x0004 /* b2: PIPE2 */ -#define BRDY1 0x0002 /* b1: PIPE1 */ -#define BRDY0 0x0001 /* b1: PIPE0 */ - -/* NRDY Interrupt Enable/Status Register */ -#define NRDY9 0x0200 /* b9: PIPE9 */ -#define NRDY8 0x0100 /* b8: PIPE8 */ -#define NRDY7 0x0080 /* b7: PIPE7 */ -#define NRDY6 0x0040 /* b6: PIPE6 */ -#define NRDY5 0x0020 /* b5: PIPE5 */ -#define NRDY4 0x0010 /* b4: PIPE4 */ -#define NRDY3 0x0008 /* b3: PIPE3 */ -#define NRDY2 0x0004 /* b2: PIPE2 */ -#define NRDY1 0x0002 /* b1: PIPE1 */ -#define NRDY0 0x0001 /* b1: PIPE0 */ - -/* BEMP Interrupt Enable/Status Register */ -#define BEMP9 0x0200 /* b9: PIPE9 */ -#define BEMP8 0x0100 /* b8: PIPE8 */ -#define BEMP7 0x0080 /* b7: PIPE7 */ -#define BEMP6 0x0040 /* b6: PIPE6 */ -#define BEMP5 0x0020 /* b5: PIPE5 */ -#define BEMP4 0x0010 /* b4: PIPE4 */ -#define BEMP3 0x0008 /* b3: PIPE3 */ -#define BEMP2 0x0004 /* b2: PIPE2 */ -#define BEMP1 0x0002 /* b1: PIPE1 */ -#define BEMP0 0x0001 /* b0: PIPE0 */ - -/* SOF Pin Configuration Register */ -#define TRNENSEL 0x0100 /* b8: Select transaction enable period */ -#define BRDYM 0x0040 /* b6: BRDY clear timing */ -#define INTL 0x0020 /* b5: Interrupt sense select */ -#define EDGESTS 0x0010 /* b4: */ -#define SOFMODE 0x000C /* b3-2: SOF pin select */ -#define SOF_125US 0x0008 /* SOF OUT 125us Frame Signal */ -#define SOF_1MS 0x0004 /* SOF OUT 1ms Frame Signal */ -#define SOF_DISABLE 0x0000 /* SOF OUT Disable */ - -/* Interrupt Status Register 0 */ -#define VBINT 0x8000 /* b15: VBUS interrupt */ -#define RESM 0x4000 /* b14: Resume interrupt */ -#define SOFR 0x2000 /* b13: SOF frame update interrupt */ -#define DVST 0x1000 /* b12: Device state transition interrupt */ -#define CTRT 0x0800 /* b11: Control transfer stage transition interrupt */ -#define BEMP 0x0400 /* b10: Buffer empty interrupt */ -#define NRDY 0x0200 /* b9: Buffer not ready interrupt */ -#define BRDY 0x0100 /* b8: Buffer ready interrupt */ -#define VBSTS 0x0080 /* b7: VBUS input port */ -#define DVSQ 0x0070 /* b6-4: Device state */ -#define DS_SPD_CNFG 0x0070 /* Suspend Configured */ -#define DS_SPD_ADDR 0x0060 /* Suspend Address */ -#define DS_SPD_DFLT 0x0050 /* Suspend Default */ -#define DS_SPD_POWR 0x0040 /* Suspend Powered */ -#define DS_SUSP 0x0040 /* Suspend */ -#define DS_CNFG 0x0030 /* Configured */ -#define DS_ADDS 0x0020 /* Address */ -#define DS_DFLT 0x0010 /* Default */ -#define DS_POWR 0x0000 /* Powered */ -#define DVSQS 0x0030 /* b5-4: Device state */ -#define VALID 0x0008 /* b3: Setup packet detected flag */ -#define CTSQ 0x0007 /* b2-0: Control transfer stage */ -#define CS_SQER 0x0006 /* Sequence error */ -#define CS_WRND 0x0005 /* Control write nodata status stage */ -#define CS_WRSS 0x0004 /* Control write status stage */ -#define CS_WRDS 0x0003 /* Control write data stage */ -#define CS_RDSS 0x0002 /* Control read status stage */ -#define CS_RDDS 0x0001 /* Control read data stage */ -#define CS_IDST 0x0000 /* Idle or setup stage */ - -/* Interrupt Status Register 1 */ -#define OVRCR 0x8000 /* b15: Over-current interrupt */ -#define BCHG 0x4000 /* b14: USB bus chenge interrupt */ -#define DTCH 0x1000 /* b12: Detach sense interrupt */ -#define ATTCH 0x0800 /* b11: Attach sense interrupt */ -#define EOFERR 0x0040 /* b6: EOF-error interrupt */ -#define SIGN 0x0020 /* b5: Setup ignore interrupt */ -#define SACK 0x0010 /* b4: Setup acknowledge interrupt */ - -/* Frame Number Register */ -#define OVRN 0x8000 /* b15: Overrun error */ -#define CRCE 0x4000 /* b14: Received data error */ -#define FRNM 0x07FF /* b10-0: Frame number */ - -/* Micro Frame Number Register */ -#define UFRNM 0x0007 /* b2-0: Micro frame number */ - -/* Default Control Pipe Maxpacket Size Register */ -/* Pipe Maxpacket Size Register */ -#define DEVSEL 0xF000 /* b15-14: Device address select */ -#define MAXP 0x007F /* b6-0: Maxpacket size of default control pipe */ - -/* Default Control Pipe Control Register */ -#define BSTS 0x8000 /* b15: Buffer status */ -#define SUREQ 0x4000 /* b14: Send USB request */ -#define CSCLR 0x2000 /* b13: complete-split status clear */ -#define CSSTS 0x1000 /* b12: complete-split status */ -#define SUREQCLR 0x0800 /* b11: stop setup request */ -#define SQCLR 0x0100 /* b8: Sequence toggle bit clear */ -#define SQSET 0x0080 /* b7: Sequence toggle bit set */ -#define SQMON 0x0040 /* b6: Sequence toggle bit monitor */ -#define PBUSY 0x0020 /* b5: pipe busy */ -#define PINGE 0x0010 /* b4: ping enable */ -#define CCPL 0x0004 /* b2: Enable control transfer complete */ -#define PID 0x0003 /* b1-0: Response PID */ -#define PID_STALL11 0x0003 /* STALL */ -#define PID_STALL 0x0002 /* STALL */ -#define PID_BUF 0x0001 /* BUF */ -#define PID_NAK 0x0000 /* NAK */ - -/* Pipe Window Select Register */ -#define PIPENM 0x0007 /* b2-0: Pipe select */ - -/* Pipe Configuration Register */ -#define R8A66597_TYP 0xC000 /* b15-14: Transfer type */ -#define R8A66597_ISO 0xC000 /* Isochronous */ -#define R8A66597_INT 0x8000 /* Interrupt */ -#define R8A66597_BULK 0x4000 /* Bulk */ -#define R8A66597_BFRE 0x0400 /* b10: Buffer ready interrupt mode select */ -#define R8A66597_DBLB 0x0200 /* b9: Double buffer mode select */ -#define R8A66597_CNTMD 0x0100 /* b8: Continuous transfer mode select */ -#define R8A66597_SHTNAK 0x0080 /* b7: Transfer end NAK */ -#define R8A66597_DIR 0x0010 /* b4: Transfer direction select */ -#define R8A66597_EPNUM 0x000F /* b3-0: Eendpoint number select */ - -/* Pipe Buffer Configuration Register */ -#define BUFSIZE 0x7C00 /* b14-10: Pipe buffer size */ -#define BUFNMB 0x007F /* b6-0: Pipe buffer number */ -#define PIPE0BUF 256 -#define PIPExBUF 64 - -/* Pipe Maxpacket Size Register */ -#define MXPS 0x07FF /* b10-0: Maxpacket size */ - -/* Pipe Cycle Configuration Register */ -#define IFIS 0x1000 /* b12: Isochronous in-buffer flush mode select */ -#define IITV 0x0007 /* b2-0: Isochronous interval */ - -/* Pipex Control Register */ -#define BSTS 0x8000 /* b15: Buffer status */ -#define INBUFM 0x4000 /* b14: IN buffer monitor (Only for PIPE1 to 5) */ -#define CSCLR 0x2000 /* b13: complete-split status clear */ -#define CSSTS 0x1000 /* b12: complete-split status */ -#define ATREPM 0x0400 /* b10: Auto repeat mode */ -#define ACLRM 0x0200 /* b9: Out buffer auto clear mode */ -#define SQCLR 0x0100 /* b8: Sequence toggle bit clear */ -#define SQSET 0x0080 /* b7: Sequence toggle bit set */ -#define SQMON 0x0040 /* b6: Sequence toggle bit monitor */ -#define PBUSY 0x0020 /* b5: pipe busy */ -#define PID 0x0003 /* b1-0: Response PID */ - -/* PIPExTRE */ -#define TRENB 0x0200 /* b9: Transaction counter enable */ -#define TRCLR 0x0100 /* b8: Transaction counter clear */ - -/* PIPExTRN */ -#define TRNCNT 0xFFFF /* b15-0: Transaction counter */ - -/* DEVADDx */ -#define UPPHUB 0x7800 -#define HUBPORT 0x0700 -#define USBSPD 0x00C0 -#define RTPORT 0x0001 - #define R8A66597_MAX_NUM_PIPE 10 #define R8A66597_BUF_BSIZE 8 #define R8A66597_MAX_DEVICE 10 diff --git a/include/linux/usb/r8a66597.h b/include/linux/usb/r8a66597.h index 460ee3f6a2c6..26d216734057 100644 --- a/include/linux/usb/r8a66597.h +++ b/include/linux/usb/r8a66597.h @@ -28,7 +28,7 @@ #define R8A66597_PLATDATA_XTAL_48MHZ 0x03 struct r8a66597_platdata { - /* This ops can controll port power instead of DVSTCTR register. */ + /* This callback can control port power instead of DVSTCTR register. */ void (*port_power)(int port, int power); /* set one = on chip controller, set zero = external controller */ @@ -43,5 +43,373 @@ struct r8a66597_platdata { /* set one = big endian, set zero = little endian */ unsigned endian:1; }; -#endif + +/* Register definitions */ +#define SYSCFG0 0x00 +#define SYSCFG1 0x02 +#define SYSSTS0 0x04 +#define SYSSTS1 0x06 +#define DVSTCTR0 0x08 +#define DVSTCTR1 0x0A +#define TESTMODE 0x0C +#define PINCFG 0x0E +#define DMA0CFG 0x10 +#define DMA1CFG 0x12 +#define CFIFO 0x14 +#define D0FIFO 0x18 +#define D1FIFO 0x1C +#define CFIFOSEL 0x20 +#define CFIFOCTR 0x22 +#define CFIFOSIE 0x24 +#define D0FIFOSEL 0x28 +#define D0FIFOCTR 0x2A +#define D1FIFOSEL 0x2C +#define D1FIFOCTR 0x2E +#define INTENB0 0x30 +#define INTENB1 0x32 +#define INTENB2 0x34 +#define BRDYENB 0x36 +#define NRDYENB 0x38 +#define BEMPENB 0x3A +#define SOFCFG 0x3C +#define INTSTS0 0x40 +#define INTSTS1 0x42 +#define INTSTS2 0x44 +#define BRDYSTS 0x46 +#define NRDYSTS 0x48 +#define BEMPSTS 0x4A +#define FRMNUM 0x4C +#define UFRMNUM 0x4E +#define USBADDR 0x50 +#define USBREQ 0x54 +#define USBVAL 0x56 +#define USBINDX 0x58 +#define USBLENG 0x5A +#define DCPCFG 0x5C +#define DCPMAXP 0x5E +#define DCPCTR 0x60 +#define PIPESEL 0x64 +#define PIPECFG 0x68 +#define PIPEBUF 0x6A +#define PIPEMAXP 0x6C +#define PIPEPERI 0x6E +#define PIPE1CTR 0x70 +#define PIPE2CTR 0x72 +#define PIPE3CTR 0x74 +#define PIPE4CTR 0x76 +#define PIPE5CTR 0x78 +#define PIPE6CTR 0x7A +#define PIPE7CTR 0x7C +#define PIPE8CTR 0x7E +#define PIPE9CTR 0x80 +#define PIPE1TRE 0x90 +#define PIPE1TRN 0x92 +#define PIPE2TRE 0x94 +#define PIPE2TRN 0x96 +#define PIPE3TRE 0x98 +#define PIPE3TRN 0x9A +#define PIPE4TRE 0x9C +#define PIPE4TRN 0x9E +#define PIPE5TRE 0xA0 +#define PIPE5TRN 0xA2 +#define DEVADD0 0xD0 +#define DEVADD1 0xD2 +#define DEVADD2 0xD4 +#define DEVADD3 0xD6 +#define DEVADD4 0xD8 +#define DEVADD5 0xDA +#define DEVADD6 0xDC +#define DEVADD7 0xDE +#define DEVADD8 0xE0 +#define DEVADD9 0xE2 +#define DEVADDA 0xE4 + +/* System Configuration Control Register */ +#define XTAL 0xC000 /* b15-14: Crystal selection */ +#define XTAL48 0x8000 /* 48MHz */ +#define XTAL24 0x4000 /* 24MHz */ +#define XTAL12 0x0000 /* 12MHz */ +#define XCKE 0x2000 /* b13: External clock enable */ +#define PLLC 0x0800 /* b11: PLL control */ +#define SCKE 0x0400 /* b10: USB clock enable */ +#define PCSDIS 0x0200 /* b9: not CS wakeup */ +#define LPSME 0x0100 /* b8: Low power sleep mode */ +#define HSE 0x0080 /* b7: Hi-speed enable */ +#define DCFM 0x0040 /* b6: Controller function select */ +#define DRPD 0x0020 /* b5: D+/- pull down control */ +#define DPRPU 0x0010 /* b4: D+ pull up control */ +#define USBE 0x0001 /* b0: USB module operation enable */ + +/* System Configuration Status Register */ +#define OVCBIT 0x8000 /* b15-14: Over-current bit */ +#define OVCMON 0xC000 /* b15-14: Over-current monitor */ +#define SOFEA 0x0020 /* b5: SOF monitor */ +#define IDMON 0x0004 /* b3: ID-pin monitor */ +#define LNST 0x0003 /* b1-0: D+, D- line status */ +#define SE1 0x0003 /* SE1 */ +#define FS_KSTS 0x0002 /* Full-Speed K State */ +#define FS_JSTS 0x0001 /* Full-Speed J State */ +#define LS_JSTS 0x0002 /* Low-Speed J State */ +#define LS_KSTS 0x0001 /* Low-Speed K State */ +#define SE0 0x0000 /* SE0 */ + +/* Device State Control Register */ +#define EXTLP0 0x0400 /* b10: External port */ +#define VBOUT 0x0200 /* b9: VBUS output */ +#define WKUP 0x0100 /* b8: Remote wakeup */ +#define RWUPE 0x0080 /* b7: Remote wakeup sense */ +#define USBRST 0x0040 /* b6: USB reset enable */ +#define RESUME 0x0020 /* b5: Resume enable */ +#define UACT 0x0010 /* b4: USB bus enable */ +#define RHST 0x0007 /* b1-0: Reset handshake status */ +#define HSPROC 0x0004 /* HS handshake is processing */ +#define HSMODE 0x0003 /* Hi-Speed mode */ +#define FSMODE 0x0002 /* Full-Speed mode */ +#define LSMODE 0x0001 /* Low-Speed mode */ +#define UNDECID 0x0000 /* Undecided */ + +/* Test Mode Register */ +#define UTST 0x000F /* b3-0: Test select */ +#define H_TST_PACKET 0x000C /* HOST TEST Packet */ +#define H_TST_SE0_NAK 0x000B /* HOST TEST SE0 NAK */ +#define H_TST_K 0x000A /* HOST TEST K */ +#define H_TST_J 0x0009 /* HOST TEST J */ +#define H_TST_NORMAL 0x0000 /* HOST Normal Mode */ +#define P_TST_PACKET 0x0004 /* PERI TEST Packet */ +#define P_TST_SE0_NAK 0x0003 /* PERI TEST SE0 NAK */ +#define P_TST_K 0x0002 /* PERI TEST K */ +#define P_TST_J 0x0001 /* PERI TEST J */ +#define P_TST_NORMAL 0x0000 /* PERI Normal Mode */ + +/* Data Pin Configuration Register */ +#define LDRV 0x8000 /* b15: Drive Current Adjust */ +#define VIF1 0x0000 /* VIF = 1.8V */ +#define VIF3 0x8000 /* VIF = 3.3V */ +#define INTA 0x0001 /* b1: USB INT-pin active */ + +/* DMAx Pin Configuration Register */ +#define DREQA 0x4000 /* b14: Dreq active select */ +#define BURST 0x2000 /* b13: Burst mode */ +#define DACKA 0x0400 /* b10: Dack active select */ +#define DFORM 0x0380 /* b9-7: DMA mode select */ +#define CPU_ADR_RD_WR 0x0000 /* Address + RD/WR mode (CPU bus) */ +#define CPU_DACK_RD_WR 0x0100 /* DACK + RD/WR mode (CPU bus) */ +#define CPU_DACK_ONLY 0x0180 /* DACK only mode (CPU bus) */ +#define SPLIT_DACK_ONLY 0x0200 /* DACK only mode (SPLIT bus) */ +#define DENDA 0x0040 /* b6: Dend active select */ +#define PKTM 0x0020 /* b5: Packet mode */ +#define DENDE 0x0010 /* b4: Dend enable */ +#define OBUS 0x0004 /* b2: OUTbus mode */ + +/* CFIFO/DxFIFO Port Select Register */ +#define RCNT 0x8000 /* b15: Read count mode */ +#define REW 0x4000 /* b14: Buffer rewind */ +#define DCLRM 0x2000 /* b13: DMA buffer clear mode */ +#define DREQE 0x1000 /* b12: DREQ output enable */ +#define MBW_8 0x0000 /* 8bit */ +#define MBW_16 0x0400 /* 16bit */ +#define MBW_32 0x0800 /* 32bit */ +#define BIGEND 0x0100 /* b8: Big endian mode */ +#define BYTE_LITTLE 0x0000 /* little dendian */ +#define BYTE_BIG 0x0100 /* big endifan */ +#define ISEL 0x0020 /* b5: DCP FIFO port direction select */ +#define CURPIPE 0x000F /* b2-0: PIPE select */ + +/* CFIFO/DxFIFO Port Control Register */ +#define BVAL 0x8000 /* b15: Buffer valid flag */ +#define BCLR 0x4000 /* b14: Buffer clear */ +#define FRDY 0x2000 /* b13: FIFO ready */ +#define DTLN 0x0FFF /* b11-0: FIFO received data length */ + +/* Interrupt Enable Register 0 */ +#define VBSE 0x8000 /* b15: VBUS interrupt */ +#define RSME 0x4000 /* b14: Resume interrupt */ +#define SOFE 0x2000 /* b13: Frame update interrupt */ +#define DVSE 0x1000 /* b12: Device state transition interrupt */ +#define CTRE 0x0800 /* b11: Control transfer stage transition interrupt */ +#define BEMPE 0x0400 /* b10: Buffer empty interrupt */ +#define NRDYE 0x0200 /* b9: Buffer not ready interrupt */ +#define BRDYE 0x0100 /* b8: Buffer ready interrupt */ + +/* Interrupt Enable Register 1 */ +#define OVRCRE 0x8000 /* b15: Over-current interrupt */ +#define BCHGE 0x4000 /* b14: USB us chenge interrupt */ +#define DTCHE 0x1000 /* b12: Detach sense interrupt */ +#define ATTCHE 0x0800 /* b11: Attach sense interrupt */ +#define EOFERRE 0x0040 /* b6: EOF error interrupt */ +#define SIGNE 0x0020 /* b5: SETUP IGNORE interrupt */ +#define SACKE 0x0010 /* b4: SETUP ACK interrupt */ + +/* BRDY Interrupt Enable/Status Register */ +#define BRDY9 0x0200 /* b9: PIPE9 */ +#define BRDY8 0x0100 /* b8: PIPE8 */ +#define BRDY7 0x0080 /* b7: PIPE7 */ +#define BRDY6 0x0040 /* b6: PIPE6 */ +#define BRDY5 0x0020 /* b5: PIPE5 */ +#define BRDY4 0x0010 /* b4: PIPE4 */ +#define BRDY3 0x0008 /* b3: PIPE3 */ +#define BRDY2 0x0004 /* b2: PIPE2 */ +#define BRDY1 0x0002 /* b1: PIPE1 */ +#define BRDY0 0x0001 /* b1: PIPE0 */ + +/* NRDY Interrupt Enable/Status Register */ +#define NRDY9 0x0200 /* b9: PIPE9 */ +#define NRDY8 0x0100 /* b8: PIPE8 */ +#define NRDY7 0x0080 /* b7: PIPE7 */ +#define NRDY6 0x0040 /* b6: PIPE6 */ +#define NRDY5 0x0020 /* b5: PIPE5 */ +#define NRDY4 0x0010 /* b4: PIPE4 */ +#define NRDY3 0x0008 /* b3: PIPE3 */ +#define NRDY2 0x0004 /* b2: PIPE2 */ +#define NRDY1 0x0002 /* b1: PIPE1 */ +#define NRDY0 0x0001 /* b1: PIPE0 */ + +/* BEMP Interrupt Enable/Status Register */ +#define BEMP9 0x0200 /* b9: PIPE9 */ +#define BEMP8 0x0100 /* b8: PIPE8 */ +#define BEMP7 0x0080 /* b7: PIPE7 */ +#define BEMP6 0x0040 /* b6: PIPE6 */ +#define BEMP5 0x0020 /* b5: PIPE5 */ +#define BEMP4 0x0010 /* b4: PIPE4 */ +#define BEMP3 0x0008 /* b3: PIPE3 */ +#define BEMP2 0x0004 /* b2: PIPE2 */ +#define BEMP1 0x0002 /* b1: PIPE1 */ +#define BEMP0 0x0001 /* b0: PIPE0 */ + +/* SOF Pin Configuration Register */ +#define TRNENSEL 0x0100 /* b8: Select transaction enable period */ +#define BRDYM 0x0040 /* b6: BRDY clear timing */ +#define INTL 0x0020 /* b5: Interrupt sense select */ +#define EDGESTS 0x0010 /* b4: */ +#define SOFMODE 0x000C /* b3-2: SOF pin select */ +#define SOF_125US 0x0008 /* SOF OUT 125us Frame Signal */ +#define SOF_1MS 0x0004 /* SOF OUT 1ms Frame Signal */ +#define SOF_DISABLE 0x0000 /* SOF OUT Disable */ + +/* Interrupt Status Register 0 */ +#define VBINT 0x8000 /* b15: VBUS interrupt */ +#define RESM 0x4000 /* b14: Resume interrupt */ +#define SOFR 0x2000 /* b13: SOF frame update interrupt */ +#define DVST 0x1000 /* b12: Device state transition interrupt */ +#define CTRT 0x0800 /* b11: Control transfer stage transition interrupt */ +#define BEMP 0x0400 /* b10: Buffer empty interrupt */ +#define NRDY 0x0200 /* b9: Buffer not ready interrupt */ +#define BRDY 0x0100 /* b8: Buffer ready interrupt */ +#define VBSTS 0x0080 /* b7: VBUS input port */ +#define DVSQ 0x0070 /* b6-4: Device state */ +#define DS_SPD_CNFG 0x0070 /* Suspend Configured */ +#define DS_SPD_ADDR 0x0060 /* Suspend Address */ +#define DS_SPD_DFLT 0x0050 /* Suspend Default */ +#define DS_SPD_POWR 0x0040 /* Suspend Powered */ +#define DS_SUSP 0x0040 /* Suspend */ +#define DS_CNFG 0x0030 /* Configured */ +#define DS_ADDS 0x0020 /* Address */ +#define DS_DFLT 0x0010 /* Default */ +#define DS_POWR 0x0000 /* Powered */ +#define DVSQS 0x0030 /* b5-4: Device state */ +#define VALID 0x0008 /* b3: Setup packet detected flag */ +#define CTSQ 0x0007 /* b2-0: Control transfer stage */ +#define CS_SQER 0x0006 /* Sequence error */ +#define CS_WRND 0x0005 /* Control write nodata status stage */ +#define CS_WRSS 0x0004 /* Control write status stage */ +#define CS_WRDS 0x0003 /* Control write data stage */ +#define CS_RDSS 0x0002 /* Control read status stage */ +#define CS_RDDS 0x0001 /* Control read data stage */ +#define CS_IDST 0x0000 /* Idle or setup stage */ + +/* Interrupt Status Register 1 */ +#define OVRCR 0x8000 /* b15: Over-current interrupt */ +#define BCHG 0x4000 /* b14: USB bus chenge interrupt */ +#define DTCH 0x1000 /* b12: Detach sense interrupt */ +#define ATTCH 0x0800 /* b11: Attach sense interrupt */ +#define EOFERR 0x0040 /* b6: EOF-error interrupt */ +#define SIGN 0x0020 /* b5: Setup ignore interrupt */ +#define SACK 0x0010 /* b4: Setup acknowledge interrupt */ + +/* Frame Number Register */ +#define OVRN 0x8000 /* b15: Overrun error */ +#define CRCE 0x4000 /* b14: Received data error */ +#define FRNM 0x07FF /* b10-0: Frame number */ + +/* Micro Frame Number Register */ +#define UFRNM 0x0007 /* b2-0: Micro frame number */ + +/* Default Control Pipe Maxpacket Size Register */ +/* Pipe Maxpacket Size Register */ +#define DEVSEL 0xF000 /* b15-14: Device address select */ +#define MAXP 0x007F /* b6-0: Maxpacket size of default control pipe */ + +/* Default Control Pipe Control Register */ +#define BSTS 0x8000 /* b15: Buffer status */ +#define SUREQ 0x4000 /* b14: Send USB request */ +#define CSCLR 0x2000 /* b13: complete-split status clear */ +#define CSSTS 0x1000 /* b12: complete-split status */ +#define SUREQCLR 0x0800 /* b11: stop setup request */ +#define SQCLR 0x0100 /* b8: Sequence toggle bit clear */ +#define SQSET 0x0080 /* b7: Sequence toggle bit set */ +#define SQMON 0x0040 /* b6: Sequence toggle bit monitor */ +#define PBUSY 0x0020 /* b5: pipe busy */ +#define PINGE 0x0010 /* b4: ping enable */ +#define CCPL 0x0004 /* b2: Enable control transfer complete */ +#define PID 0x0003 /* b1-0: Response PID */ +#define PID_STALL11 0x0003 /* STALL */ +#define PID_STALL 0x0002 /* STALL */ +#define PID_BUF 0x0001 /* BUF */ +#define PID_NAK 0x0000 /* NAK */ + +/* Pipe Window Select Register */ +#define PIPENM 0x0007 /* b2-0: Pipe select */ + +/* Pipe Configuration Register */ +#define R8A66597_TYP 0xC000 /* b15-14: Transfer type */ +#define R8A66597_ISO 0xC000 /* Isochronous */ +#define R8A66597_INT 0x8000 /* Interrupt */ +#define R8A66597_BULK 0x4000 /* Bulk */ +#define R8A66597_BFRE 0x0400 /* b10: Buffer ready interrupt mode select */ +#define R8A66597_DBLB 0x0200 /* b9: Double buffer mode select */ +#define R8A66597_CNTMD 0x0100 /* b8: Continuous transfer mode select */ +#define R8A66597_SHTNAK 0x0080 /* b7: Transfer end NAK */ +#define R8A66597_DIR 0x0010 /* b4: Transfer direction select */ +#define R8A66597_EPNUM 0x000F /* b3-0: Eendpoint number select */ + +/* Pipe Buffer Configuration Register */ +#define BUFSIZE 0x7C00 /* b14-10: Pipe buffer size */ +#define BUFNMB 0x007F /* b6-0: Pipe buffer number */ +#define PIPE0BUF 256 +#define PIPExBUF 64 + +/* Pipe Maxpacket Size Register */ +#define MXPS 0x07FF /* b10-0: Maxpacket size */ + +/* Pipe Cycle Configuration Register */ +#define IFIS 0x1000 /* b12: Isochronous in-buffer flush mode select */ +#define IITV 0x0007 /* b2-0: Isochronous interval */ + +/* Pipex Control Register */ +#define BSTS 0x8000 /* b15: Buffer status */ +#define INBUFM 0x4000 /* b14: IN buffer monitor (Only for PIPE1 to 5) */ +#define CSCLR 0x2000 /* b13: complete-split status clear */ +#define CSSTS 0x1000 /* b12: complete-split status */ +#define ATREPM 0x0400 /* b10: Auto repeat mode */ +#define ACLRM 0x0200 /* b9: Out buffer auto clear mode */ +#define SQCLR 0x0100 /* b8: Sequence toggle bit clear */ +#define SQSET 0x0080 /* b7: Sequence toggle bit set */ +#define SQMON 0x0040 /* b6: Sequence toggle bit monitor */ +#define PBUSY 0x0020 /* b5: pipe busy */ +#define PID 0x0003 /* b1-0: Response PID */ + +/* PIPExTRE */ +#define TRENB 0x0200 /* b9: Transaction counter enable */ +#define TRCLR 0x0100 /* b8: Transaction counter clear */ + +/* PIPExTRN */ +#define TRNCNT 0xFFFF /* b15-0: Transaction counter */ + +/* DEVADDx */ +#define UPPHUB 0x7800 +#define HUBPORT 0x0700 +#define USBSPD 0x00C0 +#define RTPORT 0x0001 + +#endif /* __LINUX_USB_R8A66597_H */ -- cgit v1.2.3 From 2c59b0b70b9d5d61c726f179724660c4c2423f31 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 22 Jul 2009 14:41:35 +0000 Subject: usb: m66592-udc platform data on_chip support Convert the m66592-udc driver to use the on_chip flag from platform data to enable on chip behaviour instead of relying on CONFIG_SUPERH_BUILT_IN_M66592 ugliness. This makes the code cleaner and also allows us to support both external and internal m66592 with the same kernel. It also makes the Kconfig part more future proof since we with this patch can add support for new processors with on-chip m66592 without modifying the Kconfig. The patch adds a m66592 header file for platform data and ties in platform data to the existing m66592 devices. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/boards/mach-highlander/setup.c | 7 + arch/sh/boards/mach-x3proto/setup.c | 7 + arch/sh/kernel/cpu/sh4a/setup-sh7722.c | 8 +- drivers/usb/gadget/Kconfig | 10 -- drivers/usb/gadget/m66592-udc.c | 252 +++++++++++++++++++-------------- drivers/usb/gadget/m66592-udc.h | 89 ++++++------ include/linux/usb/m66592.h | 44 ++++++ 7 files changed, 257 insertions(+), 160 deletions(-) create mode 100644 include/linux/usb/m66592.h (limited to 'include') diff --git a/arch/sh/boards/mach-highlander/setup.c b/arch/sh/boards/mach-highlander/setup.c index 1639f8915000..566e69d8d729 100644 --- a/arch/sh/boards/mach-highlander/setup.c +++ b/arch/sh/boards/mach-highlander/setup.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,11 @@ static struct platform_device r8a66597_usb_host_device = { .resource = r8a66597_usb_host_resources, }; +static struct m66592_platdata usbf_platdata = { + .xtal = M66592_PLATDATA_XTAL_24MHZ, + .vif = 1, +}; + static struct resource m66592_usb_peripheral_resources[] = { [0] = { .name = "m66592_udc", @@ -81,6 +87,7 @@ static struct platform_device m66592_usb_peripheral_device = { .dev = { .dma_mask = NULL, /* don't use dma */ .coherent_dma_mask = 0xffffffff, + .platform_data = &usbf_platdata, }, .num_resources = ARRAY_SIZE(m66592_usb_peripheral_resources), .resource = m66592_usb_peripheral_resources, diff --git a/arch/sh/boards/mach-x3proto/setup.c b/arch/sh/boards/mach-x3proto/setup.c index 8913ae39a802..efe4cb9f8a77 100644 --- a/arch/sh/boards/mach-x3proto/setup.c +++ b/arch/sh/boards/mach-x3proto/setup.c @@ -17,6 +17,7 @@ #include #include #include +#include #include static struct resource heartbeat_resources[] = { @@ -89,6 +90,11 @@ static struct platform_device r8a66597_usb_host_device = { .resource = r8a66597_usb_host_resources, }; +static struct m66592_platdata usbf_platdata = { + .xtal = M66592_PLATDATA_XTAL_24MHZ, + .vif = 1, +}; + static struct resource m66592_usb_peripheral_resources[] = { [0] = { .name = "m66592_udc", @@ -109,6 +115,7 @@ static struct platform_device m66592_usb_peripheral_device = { .dev = { .dma_mask = NULL, /* don't use dma */ .coherent_dma_mask = 0xffffffff, + .platform_data = &usbf_platdata, }, .num_resources = ARRAY_SIZE(m66592_usb_peripheral_resources), .resource = m66592_usb_peripheral_resources, diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c index ea524a2da3e4..0bad14a44238 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -47,9 +48,13 @@ static struct platform_device rtc_device = { .resource = rtc_resources, }; +static struct m66592_platdata usbf_platdata = { + .on_chip = 1, +}; + static struct resource usbf_resources[] = { [0] = { - .name = "m66592_udc", + .name = "USBF", .start = 0x04480000, .end = 0x044800FF, .flags = IORESOURCE_MEM, @@ -67,6 +72,7 @@ static struct platform_device usbf_device = { .dev = { .dma_mask = NULL, .coherent_dma_mask = 0xffffffff, + .platform_data = &usbf_platdata, }, .num_resources = ARRAY_SIZE(usbf_resources), .resource = usbf_resources, diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 7f8e83a954ac..b7f10bc25c2c 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -360,16 +360,6 @@ config USB_M66592 default USB_GADGET select USB_GADGET_SELECTED -config SUPERH_BUILT_IN_M66592 - boolean "Enable SuperH built-in USB like the M66592" - depends on USB_GADGET_M66592 && CPU_SUBTYPE_SH7722 - help - SH7722 has USB like the M66592. - - The transfer rate is very slow when use "Ethernet Gadget". - However, this problem is improved if change a value of - NET_IP_ALIGN to 4. - # # Controllers available only in discrete form (and all PCI controllers) # diff --git a/drivers/usb/gadget/m66592-udc.c b/drivers/usb/gadget/m66592-udc.c index 0dddd2f8ff35..a61c70caff12 100644 --- a/drivers/usb/gadget/m66592-udc.c +++ b/drivers/usb/gadget/m66592-udc.c @@ -31,38 +31,12 @@ #include "m66592-udc.h" - MODULE_DESCRIPTION("M66592 USB gadget driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yoshihiro Shimoda"); MODULE_ALIAS("platform:m66592_udc"); -#define DRIVER_VERSION "26 Jun 2009" - -/* module parameters */ -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) -static unsigned short endian = M66592_LITTLE; -module_param(endian, ushort, 0644); -MODULE_PARM_DESC(endian, "data endian: big=0, little=0 (default=0)"); -#else -static unsigned short clock = M66592_XTAL24; -module_param(clock, ushort, 0644); -MODULE_PARM_DESC(clock, "input clock: 48MHz=32768, 24MHz=16384, 12MHz=0 " - "(default=16384)"); - -static unsigned short vif = M66592_LDRV; -module_param(vif, ushort, 0644); -MODULE_PARM_DESC(vif, "input VIF: 3.3V=32768, 1.5V=0 (default=32768)"); - -static unsigned short endian; -module_param(endian, ushort, 0644); -MODULE_PARM_DESC(endian, "data endian: big=256, little=0 (default=0)"); - -static unsigned short irq_sense = M66592_INTL; -module_param(irq_sense, ushort, 0644); -MODULE_PARM_DESC(irq_sense, "IRQ sense: low level=2, falling edge=0 " - "(default=2)"); -#endif +#define DRIVER_VERSION "21 July 2009" static const char udc_name[] = "m66592_udc"; static const char *m66592_ep_name[] = { @@ -244,6 +218,7 @@ static inline int get_buffer_size(struct m66592 *m66592, u16 pipenum) static inline void pipe_change(struct m66592 *m66592, u16 pipenum) { struct m66592_ep *ep = m66592->pipenum2ep[pipenum]; + unsigned short mbw; if (ep->use_dma) return; @@ -252,7 +227,12 @@ static inline void pipe_change(struct m66592 *m66592, u16 pipenum) ndelay(450); - m66592_bset(m66592, M66592_MBW, ep->fifosel); + if (m66592->pdata->on_chip) + mbw = M66592_MBW_32; + else + mbw = M66592_MBW_16; + + m66592_bset(m66592, mbw, ep->fifosel); } static int pipe_buffer_setting(struct m66592 *m66592, @@ -332,6 +312,7 @@ static void pipe_buffer_release(struct m66592 *m66592, static void pipe_initialize(struct m66592_ep *ep) { struct m66592 *m66592 = ep->m66592; + unsigned short mbw; m66592_mdfy(m66592, 0, M66592_CURPIPE, ep->fifosel); @@ -343,7 +324,12 @@ static void pipe_initialize(struct m66592_ep *ep) ndelay(450); - m66592_bset(m66592, M66592_MBW, ep->fifosel); + if (m66592->pdata->on_chip) + mbw = M66592_MBW_32; + else + mbw = M66592_MBW_16; + + m66592_bset(m66592, mbw, ep->fifosel); } } @@ -359,15 +345,13 @@ static void m66592_ep_setting(struct m66592 *m66592, struct m66592_ep *ep, ep->fifosel = M66592_D0FIFOSEL; ep->fifoctr = M66592_D0FIFOCTR; ep->fifotrn = M66592_D0FIFOTRN; -#if !defined(CONFIG_SUPERH_BUILT_IN_M66592) - } else if (m66592->num_dma == 1) { + } else if (!m66592->pdata->on_chip && m66592->num_dma == 1) { m66592->num_dma++; ep->use_dma = 1; ep->fifoaddr = M66592_D1FIFO; ep->fifosel = M66592_D1FIFOSEL; ep->fifoctr = M66592_D1FIFOCTR; ep->fifotrn = M66592_D1FIFOTRN; -#endif } else { ep->use_dma = 0; ep->fifoaddr = M66592_CFIFO; @@ -612,76 +596,120 @@ static void start_ep0(struct m66592_ep *ep, struct m66592_request *req) } } -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) static void init_controller(struct m66592 *m66592) { - m66592_bset(m66592, M66592_HSE, M66592_SYSCFG); /* High spd */ - m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG); - m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG); - m66592_bset(m66592, M66592_USBE, M66592_SYSCFG); + unsigned int endian; - /* This is a workaound for SH7722 2nd cut */ - m66592_bset(m66592, 0x8000, M66592_DVSTCTR); - m66592_bset(m66592, 0x1000, M66592_TESTMODE); - m66592_bclr(m66592, 0x8000, M66592_DVSTCTR); + if (m66592->pdata->on_chip) { + if (m66592->pdata->endian) + endian = 0; /* big endian */ + else + endian = M66592_LITTLE; /* little endian */ - m66592_bset(m66592, M66592_INTL, M66592_INTENB1); + m66592_bset(m66592, M66592_HSE, M66592_SYSCFG); /* High spd */ + m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG); + m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG); + m66592_bset(m66592, M66592_USBE, M66592_SYSCFG); - m66592_write(m66592, 0, M66592_CFBCFG); - m66592_write(m66592, 0, M66592_D0FBCFG); - m66592_bset(m66592, endian, M66592_CFBCFG); - m66592_bset(m66592, endian, M66592_D0FBCFG); -} -#else /* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */ -static void init_controller(struct m66592 *m66592) -{ - m66592_bset(m66592, (vif & M66592_LDRV) | (endian & M66592_BIGEND), - M66592_PINCFG); - m66592_bset(m66592, M66592_HSE, M66592_SYSCFG); /* High spd */ - m66592_mdfy(m66592, clock & M66592_XTAL, M66592_XTAL, M66592_SYSCFG); + /* This is a workaound for SH7722 2nd cut */ + m66592_bset(m66592, 0x8000, M66592_DVSTCTR); + m66592_bset(m66592, 0x1000, M66592_TESTMODE); + m66592_bclr(m66592, 0x8000, M66592_DVSTCTR); - m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG); - m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG); - m66592_bset(m66592, M66592_USBE, M66592_SYSCFG); + m66592_bset(m66592, M66592_INTL, M66592_INTENB1); + + m66592_write(m66592, 0, M66592_CFBCFG); + m66592_write(m66592, 0, M66592_D0FBCFG); + m66592_bset(m66592, endian, M66592_CFBCFG); + m66592_bset(m66592, endian, M66592_D0FBCFG); + } else { + unsigned int clock, vif, irq_sense; + + if (m66592->pdata->endian) + endian = M66592_BIGEND; /* big endian */ + else + endian = 0; /* little endian */ + + if (m66592->pdata->vif) + vif = M66592_LDRV; /* 3.3v */ + else + vif = 0; /* 1.5v */ + + switch (m66592->pdata->xtal) { + case M66592_PLATDATA_XTAL_12MHZ: + clock = M66592_XTAL12; + break; + case M66592_PLATDATA_XTAL_24MHZ: + clock = M66592_XTAL24; + break; + case M66592_PLATDATA_XTAL_48MHZ: + clock = M66592_XTAL48; + break; + default: + pr_warning("m66592-udc: xtal configuration error\n"); + clock = 0; + } - m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG); + switch (m66592->irq_trigger) { + case IRQF_TRIGGER_LOW: + irq_sense = M66592_INTL; + break; + case IRQF_TRIGGER_FALLING: + irq_sense = 0; + break; + default: + pr_warning("m66592-udc: irq trigger config error\n"); + irq_sense = 0; + } - msleep(3); + m66592_bset(m66592, + (vif & M66592_LDRV) | (endian & M66592_BIGEND), + M66592_PINCFG); + m66592_bset(m66592, M66592_HSE, M66592_SYSCFG); /* High spd */ + m66592_mdfy(m66592, clock & M66592_XTAL, M66592_XTAL, + M66592_SYSCFG); + m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG); + m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG); + m66592_bset(m66592, M66592_USBE, M66592_SYSCFG); - m66592_bset(m66592, M66592_RCKE | M66592_PLLC, M66592_SYSCFG); + m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG); + + msleep(3); - msleep(1); + m66592_bset(m66592, M66592_RCKE | M66592_PLLC, M66592_SYSCFG); - m66592_bset(m66592, M66592_SCKE, M66592_SYSCFG); + msleep(1); - m66592_bset(m66592, irq_sense & M66592_INTL, M66592_INTENB1); - m66592_write(m66592, M66592_BURST | M66592_CPU_ADR_RD_WR, - M66592_DMA0CFG); + m66592_bset(m66592, M66592_SCKE, M66592_SYSCFG); + + m66592_bset(m66592, irq_sense & M66592_INTL, M66592_INTENB1); + m66592_write(m66592, M66592_BURST | M66592_CPU_ADR_RD_WR, + M66592_DMA0CFG); + } } -#endif /* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */ static void disable_controller(struct m66592 *m66592) { -#if !defined(CONFIG_SUPERH_BUILT_IN_M66592) - m66592_bclr(m66592, M66592_SCKE, M66592_SYSCFG); - udelay(1); - m66592_bclr(m66592, M66592_PLLC, M66592_SYSCFG); - udelay(1); - m66592_bclr(m66592, M66592_RCKE, M66592_SYSCFG); - udelay(1); - m66592_bclr(m66592, M66592_XCKE, M66592_SYSCFG); -#endif + if (!m66592->pdata->on_chip) { + m66592_bclr(m66592, M66592_SCKE, M66592_SYSCFG); + udelay(1); + m66592_bclr(m66592, M66592_PLLC, M66592_SYSCFG); + udelay(1); + m66592_bclr(m66592, M66592_RCKE, M66592_SYSCFG); + udelay(1); + m66592_bclr(m66592, M66592_XCKE, M66592_SYSCFG); + } } static void m66592_start_xclock(struct m66592 *m66592) { -#if !defined(CONFIG_SUPERH_BUILT_IN_M66592) u16 tmp; - tmp = m66592_read(m66592, M66592_SYSCFG); - if (!(tmp & M66592_XCKE)) - m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG); -#endif + if (!m66592->pdata->on_chip) { + tmp = m66592_read(m66592, M66592_SYSCFG); + if (!(tmp & M66592_XCKE)) + m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG); + } } /*-------------------------------------------------------------------------*/ @@ -1169,8 +1197,7 @@ static irqreturn_t m66592_irq(int irq, void *_m66592) intsts0 = m66592_read(m66592, M66592_INTSTS0); intenb0 = m66592_read(m66592, M66592_INTENB0); -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) - if (!intsts0 && !intenb0) { + if (m66592->pdata->on_chip && !intsts0 && !intenb0) { /* * When USB clock stops, it cannot read register. Even if a * clock stops, the interrupt occurs. So this driver turn on @@ -1180,7 +1207,6 @@ static irqreturn_t m66592_irq(int irq, void *_m66592) intsts0 = m66592_read(m66592, M66592_INTSTS0); intenb0 = m66592_read(m66592, M66592_INTENB0); } -#endif savepipe = m66592_read(m66592, M66592_CFIFOSEL); @@ -1526,9 +1552,11 @@ static int __exit m66592_remove(struct platform_device *pdev) iounmap(m66592->reg); free_irq(platform_get_irq(pdev, 0), m66592); m66592_free_request(&m66592->ep[0].ep, m66592->ep0_req); -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK) - clk_disable(m66592->clk); - clk_put(m66592->clk); +#ifdef CONFIG_HAVE_CLK + if (m66592->pdata->on_chip) { + clk_disable(m66592->clk); + clk_put(m66592->clk); + } #endif kfree(m66592); return 0; @@ -1540,11 +1568,10 @@ static void nop_completion(struct usb_ep *ep, struct usb_request *r) static int __init m66592_probe(struct platform_device *pdev) { - struct resource *res; - int irq; + struct resource *res, *ires; void __iomem *reg = NULL; struct m66592 *m66592 = NULL; -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK) +#ifdef CONFIG_HAVE_CLK char clk_name[8]; #endif int ret = 0; @@ -1557,10 +1584,11 @@ static int __init m66592_probe(struct platform_device *pdev) goto clean_up; } - irq = platform_get_irq(pdev, 0); - if (irq < 0) { + ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!ires) { ret = -ENODEV; - pr_err("platform_get_irq error.\n"); + dev_err(&pdev->dev, + "platform_get_resource IORESOURCE_IRQ error.\n"); goto clean_up; } @@ -1571,6 +1599,12 @@ static int __init m66592_probe(struct platform_device *pdev) goto clean_up; } + if (pdev->dev.platform_data == NULL) { + dev_err(&pdev->dev, "no platform data\n"); + ret = -ENODEV; + goto clean_up; + } + /* initialize ucd */ m66592 = kzalloc(sizeof(struct m66592), GFP_KERNEL); if (m66592 == NULL) { @@ -1578,6 +1612,9 @@ static int __init m66592_probe(struct platform_device *pdev) goto clean_up; } + m66592->pdata = pdev->dev.platform_data; + m66592->irq_trigger = ires->flags & IRQF_TRIGGER_MASK; + spin_lock_init(&m66592->lock); dev_set_drvdata(&pdev->dev, m66592); @@ -1595,22 +1632,25 @@ static int __init m66592_probe(struct platform_device *pdev) m66592->timer.data = (unsigned long)m66592; m66592->reg = reg; - ret = request_irq(irq, m66592_irq, IRQF_DISABLED | IRQF_SHARED, + ret = request_irq(ires->start, m66592_irq, IRQF_DISABLED | IRQF_SHARED, udc_name, m66592); if (ret < 0) { pr_err("request_irq error (%d)\n", ret); goto clean_up; } -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK) - snprintf(clk_name, sizeof(clk_name), "usbf%d", pdev->id); - m66592->clk = clk_get(&pdev->dev, clk_name); - if (IS_ERR(m66592->clk)) { - dev_err(&pdev->dev, "cannot get clock \"%s\"\n", clk_name); - ret = PTR_ERR(m66592->clk); - goto clean_up2; +#ifdef CONFIG_HAVE_CLK + if (m66592->pdata->on_chip) { + snprintf(clk_name, sizeof(clk_name), "usbf%d", pdev->id); + m66592->clk = clk_get(&pdev->dev, clk_name); + if (IS_ERR(m66592->clk)) { + dev_err(&pdev->dev, "cannot get clock \"%s\"\n", + clk_name); + ret = PTR_ERR(m66592->clk); + goto clean_up2; + } + clk_enable(m66592->clk); } - clk_enable(m66592->clk); #endif INIT_LIST_HEAD(&m66592->gadget.ep_list); m66592->gadget.ep0 = &m66592->ep[0].ep; @@ -1652,12 +1692,14 @@ static int __init m66592_probe(struct platform_device *pdev) return 0; clean_up3: -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK) - clk_disable(m66592->clk); - clk_put(m66592->clk); +#ifdef CONFIG_HAVE_CLK + if (m66592->pdata->on_chip) { + clk_disable(m66592->clk); + clk_put(m66592->clk); + } clean_up2: #endif - free_irq(irq, m66592); + free_irq(ires->start, m66592); clean_up: if (m66592) { if (m66592->ep0_req) diff --git a/drivers/usb/gadget/m66592-udc.h b/drivers/usb/gadget/m66592-udc.h index 9a9c2bf9fbd5..8b960deed680 100644 --- a/drivers/usb/gadget/m66592-udc.h +++ b/drivers/usb/gadget/m66592-udc.h @@ -23,10 +23,12 @@ #ifndef __M66592_UDC_H__ #define __M66592_UDC_H__ -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK) +#ifdef CONFIG_HAVE_CLK #include #endif +#include + #define M66592_SYSCFG 0x00 #define M66592_XTAL 0xC000 /* b15-14: Crystal selection */ #define M66592_XTAL48 0x8000 /* 48MHz */ @@ -76,11 +78,11 @@ #define M66592_P_TST_J 0x0001 /* PERI TEST J */ #define M66592_P_TST_NORMAL 0x0000 /* PERI Normal Mode */ -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) +/* built-in registers */ #define M66592_CFBCFG 0x0A #define M66592_D0FBCFG 0x0C #define M66592_LITTLE 0x0100 /* b8: Little endian mode */ -#else +/* external chip case */ #define M66592_PINCFG 0x0A #define M66592_LDRV 0x8000 /* b15: Drive Current Adjust */ #define M66592_BIGEND 0x0100 /* b8: Big endian mode */ @@ -100,8 +102,8 @@ #define M66592_PKTM 0x0020 /* b5: Packet mode */ #define M66592_DENDE 0x0010 /* b4: Dend enable */ #define M66592_OBUS 0x0004 /* b2: OUTbus mode */ -#endif /* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */ +/* common case */ #define M66592_CFIFO 0x10 #define M66592_D0FIFO 0x14 #define M66592_D1FIFO 0x18 @@ -113,13 +115,9 @@ #define M66592_REW 0x4000 /* b14: Buffer rewind */ #define M66592_DCLRM 0x2000 /* b13: DMA buffer clear mode */ #define M66592_DREQE 0x1000 /* b12: DREQ output enable */ -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) -#define M66592_MBW 0x0800 /* b11: Maximum bit width for FIFO */ -#else -#define M66592_MBW 0x0400 /* b10: Maximum bit width for FIFO */ -#define M66592_MBW_8 0x0000 /* 8bit */ -#define M66592_MBW_16 0x0400 /* 16bit */ -#endif /* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */ +#define M66592_MBW_8 0x0000 /* 8bit */ +#define M66592_MBW_16 0x0400 /* 16bit */ +#define M66592_MBW_32 0x0800 /* 32bit */ #define M66592_TRENB 0x0200 /* b9: Transaction counter enable */ #define M66592_TRCLR 0x0100 /* b8: Transaction counter clear */ #define M66592_DEZPM 0x0080 /* b7: Zero-length packet mode */ @@ -480,9 +478,11 @@ struct m66592_ep { struct m66592 { spinlock_t lock; void __iomem *reg; -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK) +#ifdef CONFIG_HAVE_CLK struct clk *clk; #endif + struct m66592_platdata *pdata; + unsigned long irq_trigger; struct usb_gadget gadget; struct usb_gadget_driver *driver; @@ -546,13 +546,13 @@ static inline void m66592_read_fifo(struct m66592 *m66592, { unsigned long fifoaddr = (unsigned long)m66592->reg + offset; -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) - len = (len + 3) / 4; - insl(fifoaddr, buf, len); -#else - len = (len + 1) / 2; - insw(fifoaddr, buf, len); -#endif + if (m66592->pdata->on_chip) { + len = (len + 3) / 4; + insl(fifoaddr, buf, len); + } else { + len = (len + 1) / 2; + insw(fifoaddr, buf, len); + } } static inline void m66592_write(struct m66592 *m66592, u16 val, @@ -566,33 +566,34 @@ static inline void m66592_write_fifo(struct m66592 *m66592, void *buf, unsigned long len) { unsigned long fifoaddr = (unsigned long)m66592->reg + offset; -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) - unsigned long count; - unsigned char *pb; - int i; - - count = len / 4; - outsl(fifoaddr, buf, count); - - if (len & 0x00000003) { - pb = buf + count * 4; - for (i = 0; i < (len & 0x00000003); i++) { - if (m66592_read(m66592, M66592_CFBCFG)) /* little */ - outb(pb[i], fifoaddr + (3 - i)); - else - outb(pb[i], fifoaddr + i); + + if (m66592->pdata->on_chip) { + unsigned long count; + unsigned char *pb; + int i; + + count = len / 4; + outsl(fifoaddr, buf, count); + + if (len & 0x00000003) { + pb = buf + count * 4; + for (i = 0; i < (len & 0x00000003); i++) { + if (m66592_read(m66592, M66592_CFBCFG)) /* le */ + outb(pb[i], fifoaddr + (3 - i)); + else + outb(pb[i], fifoaddr + i); + } + } + } else { + unsigned long odd = len & 0x0001; + + len = len / 2; + outsw(fifoaddr, buf, len); + if (odd) { + unsigned char *p = buf + len*2; + outb(*p, fifoaddr); } } -#else - unsigned long odd = len & 0x0001; - - len = len / 2; - outsw(fifoaddr, buf, len); - if (odd) { - unsigned char *p = buf + len*2; - outb(*p, fifoaddr); - } -#endif /* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */ } static inline void m66592_mdfy(struct m66592 *m66592, u16 val, u16 pat, diff --git a/include/linux/usb/m66592.h b/include/linux/usb/m66592.h new file mode 100644 index 000000000000..cda9625e7df0 --- /dev/null +++ b/include/linux/usb/m66592.h @@ -0,0 +1,44 @@ +/* + * M66592 driver platform data + * + * Copyright (C) 2009 Renesas Solutions Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __LINUX_USB_M66592_H +#define __LINUX_USB_M66592_H + +#define M66592_PLATDATA_XTAL_12MHZ 0x01 +#define M66592_PLATDATA_XTAL_24MHZ 0x02 +#define M66592_PLATDATA_XTAL_48MHZ 0x03 + +struct m66592_platdata { + /* one = on chip controller, zero = external controller */ + unsigned on_chip:1; + + /* one = big endian, zero = little endian */ + unsigned endian:1; + + /* (external controller only) M66592_PLATDATA_XTAL_nnMHZ */ + unsigned xtal:2; + + /* (external controller only) one = 3.3V, zero = 1.5V */ + unsigned vif:1; + +}; + +#endif /* __LINUX_USB_M66592_H */ + -- cgit v1.2.3 From 0c193054a4c1cf190d2f23e5e91bd14402e43912 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 27 Jul 2009 19:09:19 -0400 Subject: nfsd41: hange from page to memory based drc limits NFSD_SLOT_CACHE_SIZE is the size of all encoded operation responses (excluding the sequence operation) that we want to cache. For now, keep NFSD_SLOT_CACHE_SIZE at PAGE_SIZE. It will be reduced when the DRC is changed from page based to memory based. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 28 +++++++++++++--------------- fs/nfsd/nfssvc.c | 13 ++++++------- include/linux/nfsd/nfsd.h | 4 ++-- include/linux/nfsd/state.h | 1 + 4 files changed, 22 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 70cba3fbfa6d..e2b11b1b515c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -414,31 +414,31 @@ gen_sessionid(struct nfsd4_session *ses) /* * Give the client the number of slots it requests bound by - * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages. + * NFSD_MAX_SLOTS_PER_SESSION and by nfsd_drc_max_mem. * - * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we - * should (up to a point) re-negotiate active sessions and reduce their - * slot usage to make rooom for new connections. For now we just fail the - * create session. + * If we run out of reserved DRC memory we should (up to a point) re-negotiate + * active sessions and reduce their slot usage to make rooom for new + * connections. For now we just fail the create session. */ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) { - int np; + int mem; if (fchan->maxreqs < 1) return nfserr_inval; else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; - np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; + mem = fchan->maxreqs * NFSD_SLOT_CACHE_SIZE; spin_lock(&nfsd_drc_lock); - if (np + nfsd_drc_pages_used > nfsd_drc_max_pages) - np = nfsd_drc_max_pages - nfsd_drc_pages_used; - nfsd_drc_pages_used += np; + if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) + mem = ((nfsd_drc_max_mem - nfsd_drc_mem_used) / + NFSD_SLOT_CACHE_SIZE) * NFSD_SLOT_CACHE_SIZE; + nfsd_drc_mem_used += mem; spin_unlock(&nfsd_drc_lock); - fchan->maxreqs = np / NFSD_PAGES_PER_SLOT; + fchan->maxreqs = mem / NFSD_SLOT_CACHE_SIZE; if (fchan->maxreqs == 0) return nfserr_resource; return 0; @@ -465,9 +465,7 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp, fchan->maxresp_sz = maxcount; session_fchan->maxresp_sz = fchan->maxresp_sz; - /* Set the max response cached size our default which is - * a multiple of PAGE_SIZE and small */ - session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; + session_fchan->maxresp_cached = NFSD_SLOT_CACHE_SIZE; fchan->maxresp_cached = session_fchan->maxresp_cached; /* Use the client's maxops if possible */ @@ -585,7 +583,7 @@ free_session(struct kref *kref) nfsd4_release_respages(e->ce_respages, e->ce_resused); } spin_lock(&nfsd_drc_lock); - nfsd_drc_pages_used -= ses->se_fchannel.maxreqs * NFSD_PAGES_PER_SLOT; + nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE; spin_unlock(&nfsd_drc_lock); kfree(ses); } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9be2a1932f8a..5a280a9cb540 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -74,8 +74,8 @@ struct svc_serv *nfsd_serv; * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. */ spinlock_t nfsd_drc_lock; -unsigned int nfsd_drc_max_pages; -unsigned int nfsd_drc_pages_used; +unsigned int nfsd_drc_max_mem; +unsigned int nfsd_drc_mem_used; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; @@ -247,12 +247,11 @@ void nfsd_reset_versions(void) static void set_max_drc(void) { #define NFSD_DRC_SIZE_SHIFT 10 - nfsd_drc_max_pages = nr_free_buffer_pages() - >> NFSD_DRC_SIZE_SHIFT; - nfsd_drc_pages_used = 0; + nfsd_drc_max_mem = (nr_free_buffer_pages() + >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; + nfsd_drc_mem_used = 0; spin_lock_init(&nfsd_drc_lock); - dprintk("%s nfsd_drc_max_pages %u\n", __func__, - nfsd_drc_max_pages); + dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem); } int nfsd_create_serv(void) diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 2571f856908f..2812ed52669d 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -57,8 +57,8 @@ extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; extern struct svc_serv *nfsd_serv; extern spinlock_t nfsd_drc_lock; -extern unsigned int nfsd_drc_max_pages; -extern unsigned int nfsd_drc_pages_used; +extern unsigned int nfsd_drc_max_mem; +extern unsigned int nfsd_drc_mem_used; extern struct seq_operations nfs_exports_op; diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 57ab2ed08459..a6c87d623891 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -96,6 +96,7 @@ struct nfs4_cb_conn { #define NFSD_MAX_SLOTS_PER_SESSION 128 /* Maximum number of pages per slot cache entry */ #define NFSD_PAGES_PER_SLOT 1 +#define NFSD_SLOT_CACHE_SIZE PAGE_SIZE /* Maximum number of operations per session compound */ #define NFSD_MAX_OPS_PER_COMPOUND 16 -- cgit v1.2.3 From 49557cc74c7bdf6a984be227ead9a84b3a26f053 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 23 Jul 2009 19:02:16 -0400 Subject: nfsd41: Use separate DRC for setclientid Instead of trying to share the generic 4.1 reply cache code for the CREATE_SESSION reply cache, it's simpler to handle CREATE_SESSION separately. The nfs41 single slot clientid DRC holds the results of create session processing. CREATE_SESSION can be preceeded by a SEQUENCE operation (an embedded CREATE_SESSION) and the create session single slot cache must be maintained. nfsd4_replay_cache_entry() and nfsd4_store_cache_entry() do not implement the replay of an embedded CREATE_SESSION. The clientid DRC slot does not need the inuse, cachethis or other fields that the multiple slot session cache uses. Replace the clientid DRC cache struct nfs4_slot cache with a new nfsd4_clid_slot cache. Save the xdr struct nfsd4_create_session into the cache at the end of processing, and on a replay, replace the struct for the replay request with the cached version all while under the state lock. nfsd4_proc_compound will handle both the solo and embedded CREATE_SESSION case via the normal use of encode_operation. Errors that do not change the create session cache: A create session NFS4ERR_STALE_CLIENTID error means that a client record (and associated create session slot) could not be found and therefore can't be changed. NFSERR_SEQ_MISORDERED errors do not change the slot cache. All other errors get cached. Remove the clientid DRC specific check in nfs4svc_encode_compoundres to put the session only if cstate.session is set which will now always be true. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4state.c | 64 +++++++++++++++++++++++++++------------------- fs/nfsd/nfs4xdr.c | 3 +-- include/linux/nfsd/state.h | 21 ++++++++++++++- include/linux/nfsd/xdr4.h | 12 --------- 5 files changed, 60 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d781658e8084..d606c6a427de 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1120,7 +1120,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, BUG_ON(op->status == nfs_ok); encode_op: - /* Only from SEQUENCE or CREATE_SESSION */ + /* Only from SEQUENCE */ if (resp->cstate.status == nfserr_replay_cache) { dprintk("%s NFS4.1 replay from cache\n", __func__); if (nfsd4_not_cached(resp)) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 99df8e7a687b..7729d092c8a5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -653,8 +653,6 @@ static inline void free_client(struct nfs4_client *clp) { shutdown_callback_client(clp); - nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages, - clp->cl_slot.sl_cache_entry.ce_resused); if (clp->cl_cred.cr_group_info) put_group_info(clp->cl_cred.cr_group_info); kfree(clp->cl_principal); @@ -1293,12 +1291,11 @@ out_copy: exid->clientid.cl_boot = new->cl_clientid.cl_boot; exid->clientid.cl_id = new->cl_clientid.cl_id; - new->cl_slot.sl_seqid = 0; exid->seqid = 1; nfsd4_set_ex_flags(new, exid); dprintk("nfsd4_exchange_id seqid %d flags %x\n", - new->cl_slot.sl_seqid, new->cl_exchange_flags); + new->cl_cs_slot.sl_seqid, new->cl_exchange_flags); status = nfs_ok; out: @@ -1334,15 +1331,35 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) return nfserr_seq_misordered; } +/* + * Cache the create session result into the create session single DRC + * slot cache by saving the xdr structure. sl_seqid has been set. + * Do this for solo or embedded create session operations. + */ +static void +nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, + struct nfsd4_clid_slot *slot, int nfserr) +{ + slot->sl_status = nfserr; + memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); +} + +static __be32 +nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, + struct nfsd4_clid_slot *slot) +{ + memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses)); + return slot->sl_status; +} + __be32 nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_create_session *cr_ses) { u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; - struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfs4_client *conf, *unconf; - struct nfsd4_slot *slot = NULL; + struct nfsd4_clid_slot *cs_slot = NULL; int status = 0; nfs4_lock_state(); @@ -1350,25 +1367,22 @@ nfsd4_create_session(struct svc_rqst *rqstp, conf = find_confirmed_client(&cr_ses->clientid); if (conf) { - slot = &conf->cl_slot; - status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid, - slot->sl_inuse); + cs_slot = &conf->cl_cs_slot; + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status == nfserr_replay_cache) { dprintk("Got a create_session replay! seqid= %d\n", - slot->sl_seqid); - cstate->slot = slot; - cstate->status = status; + cs_slot->sl_seqid); /* Return the cached reply status */ - status = nfsd4_replay_cache_entry(resp, NULL); + status = nfsd4_replay_create_session(cr_ses, cs_slot); goto out; - } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) { + } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { status = nfserr_seq_misordered; dprintk("Sequence misordered!\n"); dprintk("Expected seqid= %d but got seqid= %d\n", - slot->sl_seqid, cr_ses->seqid); + cs_slot->sl_seqid, cr_ses->seqid); goto out; } - conf->cl_slot.sl_seqid++; + cs_slot->sl_seqid++; } else if (unconf) { if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || (ip_addr != unconf->cl_addr)) { @@ -1376,16 +1390,15 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out; } - slot = &unconf->cl_slot; - status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid, - slot->sl_inuse); + cs_slot = &unconf->cl_cs_slot; + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status) { /* an unconfirmed replay returns misordered */ status = nfserr_seq_misordered; - goto out; + goto out_cache; } - slot->sl_seqid++; /* from 0 to 1 */ + cs_slot->sl_seqid++; /* from 0 to 1 */ move_to_confirmed(unconf); /* @@ -1406,12 +1419,11 @@ nfsd4_create_session(struct svc_rqst *rqstp, memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); - cr_ses->seqid = slot->sl_seqid; + cr_ses->seqid = cs_slot->sl_seqid; - slot->sl_inuse = true; - cstate->slot = slot; - /* Ensure a page is used for the cache */ - slot->sl_cache_entry.ce_cachethis = 1; +out_cache: + /* cache solo and embedded create sessions under the state lock */ + nfsd4_cache_create_session(cr_ses, cs_slot, status); out: nfs4_unlock_state(); dprintk("%s returns %d\n", __func__, ntohl(status)); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2dcc7feaa6ff..fdf632bf1cfe 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3313,8 +3313,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); resp->cstate.slot->sl_inuse = 0; } - if (resp->cstate.session) - nfsd4_put_session(resp->cstate.session); + nfsd4_put_session(resp->cstate.session); } return 1; } diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index a6c87d623891..58bb19784e12 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -127,6 +127,25 @@ struct nfsd4_channel_attrs { u32 rdma_attrs; }; +struct nfsd4_create_session { + clientid_t clientid; + struct nfs4_sessionid sessionid; + u32 seqid; + u32 flags; + struct nfsd4_channel_attrs fore_channel; + struct nfsd4_channel_attrs back_channel; + u32 callback_prog; + u32 uid; + u32 gid; +}; + +/* The single slot clientid cache structure */ +struct nfsd4_clid_slot { + u32 sl_seqid; + __be32 sl_status; + struct nfsd4_create_session sl_cr_ses; +}; + struct nfsd4_session { struct kref se_ref; struct list_head se_hash; /* hash by sessionid */ @@ -193,7 +212,7 @@ struct nfs4_client { /* for nfs41 */ struct list_head cl_sessions; - struct nfsd4_slot cl_slot; /* create_session slot */ + struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ u32 cl_exchange_flags; struct nfs4_sessionid cl_sessionid; }; diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 2bacf7535069..5e4beb0deb80 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -366,18 +366,6 @@ struct nfsd4_exchange_id { int spa_how; }; -struct nfsd4_create_session { - clientid_t clientid; - struct nfs4_sessionid sessionid; - u32 seqid; - u32 flags; - struct nfsd4_channel_attrs fore_channel; - struct nfsd4_channel_attrs back_channel; - u32 callback_prog; - u32 uid; - u32 gid; -}; - struct nfsd4_sequence { struct nfs4_sessionid sessionid; /* request/response */ u32 seqid; /* request/response */ -- cgit v1.2.3 From e5f5ccb646bc6009572b5c23201b5e81638ff150 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 23 Jul 2009 20:35:53 +0200 Subject: power_supply: get_by_name and set_charged functionality This adds a function that indicates that a battery is fully charged. It also includes functions to get a power_supply device from the class of registered devices by name reference. These can be used to find a specific battery to call power_supply_set_battery_charged() on. Some battery drivers might need this information to calibrate themselves. Signed-off-by: Daniel Mack Cc: Ian Molton Cc: Anton Vorontsov Cc: Matt Reimer Signed-off-by: Anton Vorontsov --- drivers/power/power_supply_core.c | 28 ++++++++++++++++++++++++++++ include/linux/power_supply.h | 3 +++ 2 files changed, 31 insertions(+) (limited to 'include') diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index 12cd6e36ff1d..cce75b40b435 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -116,6 +116,34 @@ int power_supply_is_system_supplied(void) } EXPORT_SYMBOL_GPL(power_supply_is_system_supplied); +int power_supply_set_battery_charged(struct power_supply *psy) +{ + if (psy->type == POWER_SUPPLY_TYPE_BATTERY && psy->set_charged) { + psy->set_charged(psy); + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(power_supply_set_battery_charged); + +static int power_supply_match_device_by_name(struct device *dev, void *data) +{ + const char *name = data; + struct power_supply *psy = dev_get_drvdata(dev); + + return strcmp(psy->name, name) == 0; +} + +struct power_supply *power_supply_get_by_name(char *name) +{ + struct device *dev = class_find_device(power_supply_class, NULL, name, + power_supply_match_device_by_name); + + return dev ? dev_get_drvdata(dev) : NULL; +} +EXPORT_SYMBOL_GPL(power_supply_get_by_name); + int power_supply_register(struct device *parent, struct power_supply *psy) { int rc = 0; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 4c7c6fc35487..b5d096d3a9be 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -144,6 +144,7 @@ struct power_supply { enum power_supply_property psp, union power_supply_propval *val); void (*external_power_changed)(struct power_supply *psy); + void (*set_charged)(struct power_supply *psy); /* For APM emulation, think legacy userspace. */ int use_for_apm; @@ -183,8 +184,10 @@ struct power_supply_info { int use_for_apm; }; +extern struct power_supply *power_supply_get_by_name(char *name); extern void power_supply_changed(struct power_supply *psy); extern int power_supply_am_i_supplied(struct power_supply *psy); +extern int power_supply_set_battery_charged(struct power_supply *psy); #if defined(CONFIG_POWER_SUPPLY) || defined(CONFIG_POWER_SUPPLY_MODULE) extern int power_supply_is_system_supplied(void); -- cgit v1.2.3 From ff663cf8705bea101d5f73cf471855c85242575e Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 23 Jul 2009 17:25:49 +0100 Subject: agp: Add generic support for graphics dma remapping New driver hooks for support graphics memory dma remapping are introduced in this patch. It makes generic code can tell if current device needs dma remapping, then call driver provided interfaces for mapping and unmapping. Change has also been made to handle scratch_page in remapping case. Signed-off-by: Zhenyu Wang Signed-off-by: David Woodhouse --- drivers/char/agp/agp.h | 6 ++++++ drivers/char/agp/backend.c | 20 ++++++++++++++++++++ drivers/char/agp/generic.c | 9 +++++++++ include/linux/agp_backend.h | 6 +++++- 4 files changed, 40 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h index ce110a3bf298..17e6d0d3ba36 100644 --- a/drivers/char/agp/agp.h +++ b/drivers/char/agp/agp.h @@ -121,6 +121,11 @@ struct agp_bridge_driver { void (*agp_destroy_pages)(struct agp_memory *); int (*agp_type_to_mask_type) (struct agp_bridge_data *, int); void (*chipset_flush)(struct agp_bridge_data *); + + int (*agp_map_page)(void *addr, dma_addr_t *ret); + void (*agp_unmap_page)(void *addr, dma_addr_t dma); + int (*agp_map_memory)(struct agp_memory *mem); + void (*agp_unmap_memory)(struct agp_memory *mem); }; struct agp_bridge_data { @@ -135,6 +140,7 @@ struct agp_bridge_data { u32 *gatt_table_real; unsigned long scratch_page; unsigned long scratch_page_real; + dma_addr_t scratch_page_dma; unsigned long gart_bus_addr; unsigned long gatt_bus_addr; u32 mode; diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c index 3bd7e503de41..19ac3663acdc 100644 --- a/drivers/char/agp/backend.c +++ b/drivers/char/agp/backend.c @@ -152,6 +152,15 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge) bridge->scratch_page_real = phys_to_gart(page_to_phys(page)); bridge->scratch_page = bridge->driver->mask_memory(bridge, phys_to_gart(page_to_phys(page)), 0); + + if (bridge->driver->agp_map_page && + bridge->driver->agp_map_page(phys_to_virt(page_to_phys(page)), + &bridge->scratch_page_dma)) { + dev_err(&bridge->dev->dev, + "unable to dma-map scratch page\n"); + rc = -ENOMEM; + goto err_out_nounmap; + } } size_value = bridge->driver->fetch_size(); @@ -191,6 +200,13 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge) return 0; err_out: + if (bridge->driver->needs_scratch_page && + bridge->driver->agp_unmap_page) { + void *va = gart_to_virt(bridge->scratch_page_real); + + bridge->driver->agp_unmap_page(va, bridge->scratch_page_dma); + } +err_out_nounmap: if (bridge->driver->needs_scratch_page) { void *va = gart_to_virt(bridge->scratch_page_real); @@ -221,6 +237,10 @@ static void agp_backend_cleanup(struct agp_bridge_data *bridge) bridge->driver->needs_scratch_page) { void *va = gart_to_virt(bridge->scratch_page_real); + if (bridge->driver->agp_unmap_page) + bridge->driver->agp_unmap_page(va, + bridge->scratch_page_dma); + bridge->driver->agp_destroy_page(va, AGP_PAGE_DESTROY_UNMAP); bridge->driver->agp_destroy_page(va, AGP_PAGE_DESTROY_FREE); } diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index a3bcc7ef42f9..28f0208c66a6 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -437,6 +437,12 @@ int agp_bind_memory(struct agp_memory *curr, off_t pg_start) curr->bridge->driver->cache_flush(); curr->is_flushed = true; } + + if (curr->bridge->driver->agp_map_memory) { + ret_val = curr->bridge->driver->agp_map_memory(curr); + if (ret_val) + return ret_val; + } ret_val = curr->bridge->driver->insert_memory(curr, pg_start, curr->type); if (ret_val != 0) @@ -478,6 +484,9 @@ int agp_unbind_memory(struct agp_memory *curr) if (ret_val != 0) return ret_val; + if (curr->bridge->driver->agp_unmap_memory) + curr->bridge->driver->agp_unmap_memory(curr); + curr->is_bound = false; curr->pg_start = 0; spin_lock(&curr->bridge->mapped_lock); diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h index 76fa794fdac0..8a294d65b9b1 100644 --- a/include/linux/agp_backend.h +++ b/include/linux/agp_backend.h @@ -79,9 +79,13 @@ struct agp_memory { u32 physical; bool is_bound; bool is_flushed; - bool vmalloc_flag; + bool vmalloc_flag; + bool sg_vmalloc_flag; /* list of agp_memory mapped to the aperture */ struct list_head mapped_list; + /* DMA-mapped addresses */ + struct scatterlist *sg_list; + int num_sg; }; #define AGP_NORMAL_MEMORY 0 -- cgit v1.2.3 From f692775d7e0a22477143cd884e45c955448ac7d2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 29 Jul 2009 09:28:45 +0100 Subject: intel-agp: fix sglist allocation to avoid vmalloc() Signed-off-by: David Woodhouse --- drivers/char/agp/intel-agp.c | 29 ++++++++++------------------- include/linux/agp_backend.h | 1 - 2 files changed, 10 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index b9d9886ff3c3..d8c80d8be5e2 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -198,39 +198,30 @@ static void intel_agp_unmap_page(struct page *page, dma_addr_t dma) static void intel_agp_free_sglist(struct agp_memory *mem) { + struct sg_table st; + + st.sgl = mem->sg_list; + st.orig_nents = st.nents = mem->page_count; + + sg_free_table(&st); - if (mem->sg_vmalloc_flag) - vfree(mem->sg_list); - else - kfree(mem->sg_list); - mem->sg_vmalloc_flag = 0; mem->sg_list = NULL; mem->num_sg = 0; } static int intel_agp_map_memory(struct agp_memory *mem) { + struct sg_table st; struct scatterlist *sg; int i; DBG("try mapping %lu pages\n", (unsigned long)mem->page_count); - if ((mem->page_count * sizeof(*mem->sg_list)) < 2*PAGE_SIZE) - mem->sg_list = kcalloc(mem->page_count, sizeof(*mem->sg_list), - GFP_KERNEL); - - if (mem->sg_list == NULL) { - mem->sg_list = vmalloc(mem->page_count * sizeof(*mem->sg_list)); - mem->sg_vmalloc_flag = 1; - } - - if (!mem->sg_list) { - mem->sg_vmalloc_flag = 0; + if (sg_alloc_table(&st, mem->page_count, GFP_KERNEL)) return -ENOMEM; - } - sg_init_table(mem->sg_list, mem->page_count); - sg = mem->sg_list; + mem->sg_list = sg = st.sgl; + for (i = 0 ; i < mem->page_count; i++, sg = sg_next(sg)) sg_set_page(sg, mem->pages[i], PAGE_SIZE, 0); diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h index 8a294d65b9b1..880130f7311f 100644 --- a/include/linux/agp_backend.h +++ b/include/linux/agp_backend.h @@ -80,7 +80,6 @@ struct agp_memory { bool is_bound; bool is_flushed; bool vmalloc_flag; - bool sg_vmalloc_flag; /* list of agp_memory mapped to the aperture */ struct list_head mapped_list; /* DMA-mapped addresses */ -- cgit v1.2.3 From 42c4ab41a176ee784c0f28c0b29025a8fc34f05a Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 29 Jul 2009 12:15:26 +0200 Subject: itimers: Merge ITIMER_VIRT and ITIMER_PROF Both cpu itimers have same data flow in the few places, this patch make unification of code related with VIRT and PROF itimers. Signed-off-by: Stanislaw Gruszka Acked-by: Peter Zijlstra Acked-by: Thomas Gleixner Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Mackerras Cc: Benjamin Herrenschmidt LKML-Reference: <1248862529-6063-2-git-send-email-sgruszka@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 14 ++++- kernel/fork.c | 9 +-- kernel/itimer.c | 146 +++++++++++++++++++++------------------------- kernel/posix-cpu-timers.c | 98 +++++++++++++++---------------- 4 files changed, 130 insertions(+), 137 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3ab08e4bb6b8..3b3efaddd953 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -470,6 +470,11 @@ struct pacct_struct { unsigned long ac_minflt, ac_majflt; }; +struct cpu_itimer { + cputime_t expires; + cputime_t incr; +}; + /** * struct task_cputime - collected CPU time counts * @utime: time spent in user mode, in &cputime_t units @@ -564,9 +569,12 @@ struct signal_struct { struct pid *leader_pid; ktime_t it_real_incr; - /* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */ - cputime_t it_prof_expires, it_virt_expires; - cputime_t it_prof_incr, it_virt_incr; + /* + * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use + * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these + * values are defined to 0 and 1 respectively + */ + struct cpu_itimer it[2]; /* * Thread group totals for process CPU timers. diff --git a/kernel/fork.c b/kernel/fork.c index 29b532e718f7..893ab0bf5e39 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -790,10 +791,10 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) thread_group_cputime_init(sig); /* Expiration times and increments. */ - sig->it_virt_expires = cputime_zero; - sig->it_virt_incr = cputime_zero; - sig->it_prof_expires = cputime_zero; - sig->it_prof_incr = cputime_zero; + sig->it[CPUCLOCK_PROF].expires = cputime_zero; + sig->it[CPUCLOCK_PROF].incr = cputime_zero; + sig->it[CPUCLOCK_VIRT].expires = cputime_zero; + sig->it[CPUCLOCK_VIRT].incr = cputime_zero; /* Cached expiration times. */ sig->cputime_expires.prof_exp = cputime_zero; diff --git a/kernel/itimer.c b/kernel/itimer.c index 58762f7077ec..852c88ddd1f0 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -41,10 +41,43 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer) return ktime_to_timeval(rem); } +static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, + struct itimerval *value) +{ + cputime_t cval, cinterval; + struct cpu_itimer *it = &tsk->signal->it[clock_id]; + + spin_lock_irq(&tsk->sighand->siglock); + + cval = it->expires; + cinterval = it->incr; + if (!cputime_eq(cval, cputime_zero)) { + struct task_cputime cputime; + cputime_t t; + + thread_group_cputimer(tsk, &cputime); + if (clock_id == CPUCLOCK_PROF) + t = cputime_add(cputime.utime, cputime.stime); + else + /* CPUCLOCK_VIRT */ + t = cputime.utime; + + if (cputime_le(cval, t)) + /* about to fire */ + cval = jiffies_to_cputime(1); + else + cval = cputime_sub(cval, t); + } + + spin_unlock_irq(&tsk->sighand->siglock); + + cputime_to_timeval(cval, &value->it_value); + cputime_to_timeval(cinterval, &value->it_interval); +} + int do_getitimer(int which, struct itimerval *value) { struct task_struct *tsk = current; - cputime_t cinterval, cval; switch (which) { case ITIMER_REAL: @@ -55,44 +88,10 @@ int do_getitimer(int which, struct itimerval *value) spin_unlock_irq(&tsk->sighand->siglock); break; case ITIMER_VIRTUAL: - spin_lock_irq(&tsk->sighand->siglock); - cval = tsk->signal->it_virt_expires; - cinterval = tsk->signal->it_virt_incr; - if (!cputime_eq(cval, cputime_zero)) { - struct task_cputime cputime; - cputime_t utime; - - thread_group_cputimer(tsk, &cputime); - utime = cputime.utime; - if (cputime_le(cval, utime)) { /* about to fire */ - cval = jiffies_to_cputime(1); - } else { - cval = cputime_sub(cval, utime); - } - } - spin_unlock_irq(&tsk->sighand->siglock); - cputime_to_timeval(cval, &value->it_value); - cputime_to_timeval(cinterval, &value->it_interval); + get_cpu_itimer(tsk, CPUCLOCK_VIRT, value); break; case ITIMER_PROF: - spin_lock_irq(&tsk->sighand->siglock); - cval = tsk->signal->it_prof_expires; - cinterval = tsk->signal->it_prof_incr; - if (!cputime_eq(cval, cputime_zero)) { - struct task_cputime times; - cputime_t ptime; - - thread_group_cputimer(tsk, ×); - ptime = cputime_add(times.utime, times.stime); - if (cputime_le(cval, ptime)) { /* about to fire */ - cval = jiffies_to_cputime(1); - } else { - cval = cputime_sub(cval, ptime); - } - } - spin_unlock_irq(&tsk->sighand->siglock); - cputime_to_timeval(cval, &value->it_value); - cputime_to_timeval(cinterval, &value->it_interval); + get_cpu_itimer(tsk, CPUCLOCK_PROF, value); break; default: return(-EINVAL); @@ -128,6 +127,36 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } +static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, + struct itimerval *value, struct itimerval *ovalue) +{ + cputime_t cval, cinterval, nval, ninterval; + struct cpu_itimer *it = &tsk->signal->it[clock_id]; + + nval = timeval_to_cputime(&value->it_value); + ninterval = timeval_to_cputime(&value->it_interval); + + spin_lock_irq(&tsk->sighand->siglock); + + cval = it->expires; + cinterval = it->incr; + if (!cputime_eq(cval, cputime_zero) || + !cputime_eq(nval, cputime_zero)) { + if (cputime_gt(nval, cputime_zero)) + nval = cputime_add(nval, jiffies_to_cputime(1)); + set_process_cpu_timer(tsk, clock_id, &nval, &cval); + } + it->expires = nval; + it->incr = ninterval; + + spin_unlock_irq(&tsk->sighand->siglock); + + if (ovalue) { + cputime_to_timeval(cval, &ovalue->it_value); + cputime_to_timeval(cinterval, &ovalue->it_interval); + } +} + /* * Returns true if the timeval is in canonical form */ @@ -139,7 +168,6 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) struct task_struct *tsk = current; struct hrtimer *timer; ktime_t expires; - cputime_t cval, cinterval, nval, ninterval; /* * Validate the timevals in value. @@ -174,48 +202,10 @@ again: spin_unlock_irq(&tsk->sighand->siglock); break; case ITIMER_VIRTUAL: - nval = timeval_to_cputime(&value->it_value); - ninterval = timeval_to_cputime(&value->it_interval); - spin_lock_irq(&tsk->sighand->siglock); - cval = tsk->signal->it_virt_expires; - cinterval = tsk->signal->it_virt_incr; - if (!cputime_eq(cval, cputime_zero) || - !cputime_eq(nval, cputime_zero)) { - if (cputime_gt(nval, cputime_zero)) - nval = cputime_add(nval, - jiffies_to_cputime(1)); - set_process_cpu_timer(tsk, CPUCLOCK_VIRT, - &nval, &cval); - } - tsk->signal->it_virt_expires = nval; - tsk->signal->it_virt_incr = ninterval; - spin_unlock_irq(&tsk->sighand->siglock); - if (ovalue) { - cputime_to_timeval(cval, &ovalue->it_value); - cputime_to_timeval(cinterval, &ovalue->it_interval); - } + set_cpu_itimer(tsk, CPUCLOCK_VIRT, value, ovalue); break; case ITIMER_PROF: - nval = timeval_to_cputime(&value->it_value); - ninterval = timeval_to_cputime(&value->it_interval); - spin_lock_irq(&tsk->sighand->siglock); - cval = tsk->signal->it_prof_expires; - cinterval = tsk->signal->it_prof_incr; - if (!cputime_eq(cval, cputime_zero) || - !cputime_eq(nval, cputime_zero)) { - if (cputime_gt(nval, cputime_zero)) - nval = cputime_add(nval, - jiffies_to_cputime(1)); - set_process_cpu_timer(tsk, CPUCLOCK_PROF, - &nval, &cval); - } - tsk->signal->it_prof_expires = nval; - tsk->signal->it_prof_incr = ninterval; - spin_unlock_irq(&tsk->sighand->siglock); - if (ovalue) { - cputime_to_timeval(cval, &ovalue->it_value); - cputime_to_timeval(cinterval, &ovalue->it_interval); - } + set_cpu_itimer(tsk, CPUCLOCK_PROF, value, ovalue); break; default: return -EINVAL; diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index bece7c0b67b2..9b2d5e4dc8c4 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -14,11 +14,11 @@ */ void update_rlimit_cpu(unsigned long rlim_new) { - cputime_t cputime; + cputime_t cputime = secs_to_cputime(rlim_new); + struct signal_struct *const sig = current->signal; - cputime = secs_to_cputime(rlim_new); - if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || - cputime_gt(current->signal->it_prof_expires, cputime)) { + if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) || + cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) { spin_lock_irq(¤t->sighand->siglock); set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); spin_unlock_irq(¤t->sighand->siglock); @@ -613,6 +613,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) break; } } else { + struct signal_struct *const sig = p->signal; + union cpu_time_count *exp = &timer->it.cpu.expires; + /* * For a process timer, set the cached expiration time. */ @@ -620,30 +623,27 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) default: BUG(); case CPUCLOCK_VIRT: - if (!cputime_eq(p->signal->it_virt_expires, + if (!cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) && - cputime_lt(p->signal->it_virt_expires, - timer->it.cpu.expires.cpu)) + cputime_lt(sig->it[CPUCLOCK_VIRT].expires, + exp->cpu)) break; - p->signal->cputime_expires.virt_exp = - timer->it.cpu.expires.cpu; + sig->cputime_expires.virt_exp = exp->cpu; break; case CPUCLOCK_PROF: - if (!cputime_eq(p->signal->it_prof_expires, + if (!cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) && - cputime_lt(p->signal->it_prof_expires, - timer->it.cpu.expires.cpu)) + cputime_lt(sig->it[CPUCLOCK_PROF].expires, + exp->cpu)) break; - i = p->signal->rlim[RLIMIT_CPU].rlim_cur; + i = sig->rlim[RLIMIT_CPU].rlim_cur; if (i != RLIM_INFINITY && - i <= cputime_to_secs(timer->it.cpu.expires.cpu)) + i <= cputime_to_secs(exp->cpu)) break; - p->signal->cputime_expires.prof_exp = - timer->it.cpu.expires.cpu; + sig->cputime_expires.prof_exp = exp->cpu; break; case CPUCLOCK_SCHED: - p->signal->cputime_expires.sched_exp = - timer->it.cpu.expires.sched; + sig->cputime_expires.sched_exp = exp->sched; break; } } @@ -1070,6 +1070,27 @@ static void stop_process_timers(struct task_struct *tsk) spin_unlock_irqrestore(&cputimer->lock, flags); } +static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, + cputime_t *expires, cputime_t cur_time, int signo) +{ + if (cputime_eq(it->expires, cputime_zero)) + return; + + if (cputime_ge(cur_time, it->expires)) { + it->expires = it->incr; + if (!cputime_eq(it->expires, cputime_zero)) + it->expires = cputime_add(it->expires, cur_time); + + __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); + } + + if (!cputime_eq(it->expires, cputime_zero) && + (cputime_eq(*expires, cputime_zero) || + cputime_lt(it->expires, *expires))) { + *expires = it->expires; + } +} + /* * Check for any per-thread CPU timers that have fired and move them * off the tsk->*_timers list onto the firing list. Per-thread timers @@ -1089,10 +1110,10 @@ static void check_process_timers(struct task_struct *tsk, * Don't sample the current process CPU clocks if there are no timers. */ if (list_empty(&timers[CPUCLOCK_PROF]) && - cputime_eq(sig->it_prof_expires, cputime_zero) && + cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) && sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && list_empty(&timers[CPUCLOCK_VIRT]) && - cputime_eq(sig->it_virt_expires, cputime_zero) && + cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) && list_empty(&timers[CPUCLOCK_SCHED])) { stop_process_timers(tsk); return; @@ -1152,38 +1173,11 @@ static void check_process_timers(struct task_struct *tsk, /* * Check for the special case process timers. */ - if (!cputime_eq(sig->it_prof_expires, cputime_zero)) { - if (cputime_ge(ptime, sig->it_prof_expires)) { - /* ITIMER_PROF fires and reloads. */ - sig->it_prof_expires = sig->it_prof_incr; - if (!cputime_eq(sig->it_prof_expires, cputime_zero)) { - sig->it_prof_expires = cputime_add( - sig->it_prof_expires, ptime); - } - __group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk); - } - if (!cputime_eq(sig->it_prof_expires, cputime_zero) && - (cputime_eq(prof_expires, cputime_zero) || - cputime_lt(sig->it_prof_expires, prof_expires))) { - prof_expires = sig->it_prof_expires; - } - } - if (!cputime_eq(sig->it_virt_expires, cputime_zero)) { - if (cputime_ge(utime, sig->it_virt_expires)) { - /* ITIMER_VIRTUAL fires and reloads. */ - sig->it_virt_expires = sig->it_virt_incr; - if (!cputime_eq(sig->it_virt_expires, cputime_zero)) { - sig->it_virt_expires = cputime_add( - sig->it_virt_expires, utime); - } - __group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk); - } - if (!cputime_eq(sig->it_virt_expires, cputime_zero) && - (cputime_eq(virt_expires, cputime_zero) || - cputime_lt(sig->it_virt_expires, virt_expires))) { - virt_expires = sig->it_virt_expires; - } - } + check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime, + SIGPROF); + check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime, + SIGVTALRM); + if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { unsigned long psecs = cputime_to_secs(ptime); cputime_t x; -- cgit v1.2.3 From 8356b5f9c424e5831715abbce747197c30d1fd71 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 29 Jul 2009 12:15:27 +0200 Subject: itimers: Fix periodic tics precision Measure ITIMER_PROF and ITIMER_VIRT timers interval error between real ticks and requested by user. Take it into account when scheduling next tick. This patch introduce possibility where time between two consecutive tics is smaller then requested interval, it preserve however dependency that n tick is generated not earlier than n*interval time - counting from the beginning of periodic signal generation. Signed-off-by: Stanislaw Gruszka Acked-by: Peter Zijlstra Acked-by: Thomas Gleixner Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Mackerras Cc: Benjamin Herrenschmidt LKML-Reference: <1248862529-6063-3-git-send-email-sgruszka@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ kernel/itimer.c | 24 +++++++++++++++++++++--- kernel/posix-cpu-timers.c | 20 +++++++++++++++++--- 3 files changed, 40 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3b3efaddd953..a069e65e8bb7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -473,6 +473,8 @@ struct pacct_struct { struct cpu_itimer { cputime_t expires; cputime_t incr; + u32 error; + u32 incr_error; }; /** diff --git a/kernel/itimer.c b/kernel/itimer.c index 852c88ddd1f0..21adff7b2a17 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -42,7 +42,7 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer) } static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, - struct itimerval *value) + struct itimerval *const value) { cputime_t cval, cinterval; struct cpu_itimer *it = &tsk->signal->it[clock_id]; @@ -127,14 +127,32 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } +static inline u32 cputime_sub_ns(cputime_t ct, s64 real_ns) +{ + struct timespec ts; + s64 cpu_ns; + + cputime_to_timespec(ct, &ts); + cpu_ns = timespec_to_ns(&ts); + + return (cpu_ns <= real_ns) ? 0 : cpu_ns - real_ns; +} + static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, - struct itimerval *value, struct itimerval *ovalue) + const struct itimerval *const value, + struct itimerval *const ovalue) { - cputime_t cval, cinterval, nval, ninterval; + cputime_t cval, nval, cinterval, ninterval; + s64 ns_ninterval, ns_nval; struct cpu_itimer *it = &tsk->signal->it[clock_id]; nval = timeval_to_cputime(&value->it_value); + ns_nval = timeval_to_ns(&value->it_value); ninterval = timeval_to_cputime(&value->it_interval); + ns_ninterval = timeval_to_ns(&value->it_interval); + + it->incr_error = cputime_sub_ns(ninterval, ns_ninterval); + it->error = cputime_sub_ns(nval, ns_nval); spin_lock_irq(&tsk->sighand->siglock); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 9b2d5e4dc8c4..b60d644ea4b3 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1070,6 +1070,8 @@ static void stop_process_timers(struct task_struct *tsk) spin_unlock_irqrestore(&cputimer->lock, flags); } +static u32 onecputick; + static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, cputime_t *expires, cputime_t cur_time, int signo) { @@ -1077,9 +1079,16 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, return; if (cputime_ge(cur_time, it->expires)) { - it->expires = it->incr; - if (!cputime_eq(it->expires, cputime_zero)) - it->expires = cputime_add(it->expires, cur_time); + if (!cputime_eq(it->incr, cputime_zero)) { + it->expires = cputime_add(it->expires, it->incr); + it->error += it->incr_error; + if (it->error >= onecputick) { + it->expires = cputime_sub(it->expires, + jiffies_to_cputime(1)); + it->error -= onecputick; + } + } else + it->expires = cputime_zero; __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); } @@ -1696,10 +1705,15 @@ static __init int init_posix_cpu_timers(void) .nsleep = thread_cpu_nsleep, .nsleep_restart = thread_cpu_nsleep_restart, }; + struct timespec ts; register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process); register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread); + cputime_to_timespec(jiffies_to_cputime(1), &ts); + onecputick = ts.tv_nsec; + WARN_ON(ts.tv_sec != 0); + return 0; } __initcall(init_posix_cpu_timers); -- cgit v1.2.3 From a42548a18866e87092db93b771e6c5b060d78401 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 29 Jul 2009 12:15:29 +0200 Subject: cputime: Optimize jiffies_to_cputime(1) For powerpc with CONFIG_VIRT_CPU_ACCOUNTING jiffies_to_cputime(1) is not compile time constant and run time calculations are quite expensive. To optimize we use precomputed value. For all other architectures is is preprocessor definition. Signed-off-by: Stanislaw Gruszka Acked-by: Peter Zijlstra Acked-by: Thomas Gleixner Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Mackerras Cc: Benjamin Herrenschmidt LKML-Reference: <1248862529-6063-5-git-send-email-sgruszka@redhat.com> Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/cputime.h | 1 + arch/powerpc/include/asm/cputime.h | 13 +++++++++++++ arch/powerpc/kernel/time.c | 4 ++++ arch/s390/include/asm/cputime.h | 1 + include/asm-generic/cputime.h | 1 + kernel/itimer.c | 4 ++-- kernel/posix-cpu-timers.c | 6 +++--- kernel/sched.c | 9 ++++----- 8 files changed, 29 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index d20b998cb91d..7fa8a8594660 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -30,6 +30,7 @@ typedef u64 cputime_t; typedef u64 cputime64_t; #define cputime_zero ((cputime_t)0) +#define cputime_one_jiffy jiffies_to_cputime(1) #define cputime_max ((~((cputime_t)0) >> 1) - 1) #define cputime_add(__a, __b) ((__a) + (__b)) #define cputime_sub(__a, __b) ((__a) - (__b)) diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index f42e623030ee..fa19f3fe05ff 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -18,6 +18,9 @@ #ifndef CONFIG_VIRT_CPU_ACCOUNTING #include +#ifdef __KERNEL__ +static inline void setup_cputime_one_jiffy(void) { } +#endif #else #include @@ -48,6 +51,11 @@ typedef u64 cputime64_t; #ifdef __KERNEL__ +/* + * One jiffy in timebase units computed during initialization + */ +extern cputime_t cputime_one_jiffy; + /* * Convert cputime <-> jiffies */ @@ -89,6 +97,11 @@ static inline cputime_t jiffies_to_cputime(const unsigned long jif) return ct; } +static inline void setup_cputime_one_jiffy(void) +{ + cputime_one_jiffy = jiffies_to_cputime(1); +} + static inline cputime64_t jiffies64_to_cputime64(const u64 jif) { cputime_t ct; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index eae4511ceeac..211d7b0cd370 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -193,6 +193,8 @@ EXPORT_SYMBOL(__cputime_clockt_factor); DEFINE_PER_CPU(unsigned long, cputime_last_delta); DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta); +cputime_t cputime_one_jiffy; + static void calc_cputime_factors(void) { struct div_result res; @@ -500,6 +502,7 @@ static int __init iSeries_tb_recal(void) tb_to_xs = divres.result_low; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; vdso_data->tb_to_xs = tb_to_xs; + setup_cputime_one_jiffy(); } else { printk( "Titan recalibrate: FAILED (difference > 4 percent)\n" @@ -945,6 +948,7 @@ void __init time_init(void) tb_ticks_per_usec = ppc_tb_freq / 1000000; tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); calc_cputime_factors(); + setup_cputime_one_jiffy(); /* * Calculate the length of each tick in ns. It will not be diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 7a3817a656df..24b1244aadb9 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -42,6 +42,7 @@ __div(unsigned long long n, unsigned int base) #endif /* __s390x__ */ #define cputime_zero (0ULL) +#define cputime_one_jiffy jiffies_to_cputime(1) #define cputime_max ((~0UL >> 1) - 1) #define cputime_add(__a, __b) ((__a) + (__b)) #define cputime_sub(__a, __b) ((__a) - (__b)) diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index 1c1fa422d18a..ca0f239f0e13 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -7,6 +7,7 @@ typedef unsigned long cputime_t; #define cputime_zero (0UL) +#define cputime_one_jiffy jiffies_to_cputime(1) #define cputime_max ((~0UL >> 1) - 1) #define cputime_add(__a, __b) ((__a) + (__b)) #define cputime_sub(__a, __b) ((__a) - (__b)) diff --git a/kernel/itimer.c b/kernel/itimer.c index 21adff7b2a17..8078a32d3b10 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -64,7 +64,7 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, if (cputime_le(cval, t)) /* about to fire */ - cval = jiffies_to_cputime(1); + cval = cputime_one_jiffy; else cval = cputime_sub(cval, t); } @@ -161,7 +161,7 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, if (!cputime_eq(cval, cputime_zero) || !cputime_eq(nval, cputime_zero)) { if (cputime_gt(nval, cputime_zero)) - nval = cputime_add(nval, jiffies_to_cputime(1)); + nval = cputime_add(nval, cputime_one_jiffy); set_process_cpu_timer(tsk, clock_id, &nval, &cval); } it->expires = nval; diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 69c92374355f..18bdde6f676f 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1086,7 +1086,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, it->error += it->incr_error; if (it->error >= onecputick) { it->expires = cputime_sub(it->expires, - jiffies_to_cputime(1)); + cputime_one_jiffy); it->error -= onecputick; } } else @@ -1461,7 +1461,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, if (!cputime_eq(*oldval, cputime_zero)) { if (cputime_le(*oldval, now.cpu)) { /* Just about to fire. */ - *oldval = jiffies_to_cputime(1); + *oldval = cputime_one_jiffy; } else { *oldval = cputime_sub(*oldval, now.cpu); } @@ -1712,7 +1712,7 @@ static __init int init_posix_cpu_timers(void) register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process); register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread); - cputime_to_timespec(jiffies_to_cputime(1), &ts); + cputime_to_timespec(cputime_one_jiffy, &ts); onecputick = ts.tv_nsec; WARN_ON(ts.tv_sec != 0); diff --git a/kernel/sched.c b/kernel/sched.c index 1b59e265273b..8f977d5cc515 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5031,17 +5031,16 @@ void account_idle_time(cputime_t cputime) */ void account_process_tick(struct task_struct *p, int user_tick) { - cputime_t one_jiffy = jiffies_to_cputime(1); - cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy); + cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); struct rq *rq = this_rq(); if (user_tick) - account_user_time(p, one_jiffy, one_jiffy_scaled); + account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) - account_system_time(p, HARDIRQ_OFFSET, one_jiffy, + account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, one_jiffy_scaled); else - account_idle_time(one_jiffy); + account_idle_time(cputime_one_jiffy); } /* -- cgit v1.2.3 From 8a4c47f346cc7a12d0897c05eb3cc1add26b487f Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 20 Jul 2009 13:48:04 +0800 Subject: drm: Remove the unused prefix in DRM_DEBUG_KMS/DRIVER/MODE We will have to add a prefix when using the macro defintion of DRM_DEBUG_KMS /DRM_DEBUG_DRIVER/MODE. It is not convenient. We should use the DRM_NAME as default prefix. So remove the prefix in the macro definition of DRM_DEBUG_KMS/DRIVER/MODE. Signed-off-by: Zhao Yakui Acked-by: Ian Romanick Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_modes.c | 8 +++----- drivers/gpu/drm/i915/i915_dma.c | 35 +++++++++++++++-------------------- drivers/gpu/drm/i915/intel_lvds.c | 10 +++------- drivers/gpu/drm/i915/intel_sdvo.c | 35 ++++++++++++++++------------------- include/drm/drmP.h | 18 +++++++++--------- 5 files changed, 46 insertions(+), 60 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index fd489d76fbbc..5eca2d5c5f23 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -40,7 +40,6 @@ #include "drm.h" #include "drm_crtc.h" -#define DRM_MODESET_DEBUG "drm_mode" /** * drm_mode_debug_printmodeline - debug print a mode * @dev: DRM device @@ -53,8 +52,8 @@ */ void drm_mode_debug_printmodeline(struct drm_display_mode *mode) { - DRM_DEBUG_MODE(DRM_MODESET_DEBUG, - "Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x\n", + DRM_DEBUG_MODE("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d " + "0x%x 0x%x\n", mode->base.id, mode->name, mode->vrefresh, mode->clock, mode->hdisplay, mode->hsync_start, mode->hsync_end, mode->htotal, @@ -819,8 +818,7 @@ void drm_mode_prune_invalid(struct drm_device *dev, list_del(&mode->head); if (verbose) { drm_mode_debug_printmodeline(mode); - DRM_DEBUG_MODE(DRM_MODESET_DEBUG, - "Not using %s mode %d\n", + DRM_DEBUG_MODE("Not using %s mode %d\n", mode->name, mode->status); } drm_mode_destroy(dev, mode); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 8c4783180bf6..14625e146f18 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -33,8 +33,6 @@ #include "i915_drm.h" #include "i915_drv.h" -#define I915_DRV "i915_drv" - /* Really want an OS-independent resettable timer. Would like to have * this loop run for (eg) 3 sec, but have the timer reset every time * the head pointer changes, so that EBUSY only happens if the ring @@ -101,7 +99,7 @@ static int i915_init_phys_hws(struct drm_device *dev) memset(dev_priv->hw_status_page, 0, PAGE_SIZE); I915_WRITE(HWS_PGA, dev_priv->dma_status_page); - DRM_DEBUG_DRIVER(I915_DRV, "Enabled hardware status page\n"); + DRM_DEBUG_DRIVER("Enabled hardware status page\n"); return 0; } @@ -187,8 +185,7 @@ static int i915_initialize(struct drm_device * dev, drm_i915_init_t * init) master_priv->sarea_priv = (drm_i915_sarea_t *) ((u8 *)master_priv->sarea->handle + init->sarea_priv_offset); } else { - DRM_DEBUG_DRIVER(I915_DRV, - "sarea not found assuming DRI2 userspace\n"); + DRM_DEBUG_DRIVER("sarea not found assuming DRI2 userspace\n"); } if (init->ring_size != 0) { @@ -238,7 +235,7 @@ static int i915_dma_resume(struct drm_device * dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; - DRM_DEBUG_DRIVER(I915_DRV, "%s\n", __func__); + DRM_DEBUG_DRIVER("%s\n", __func__); if (dev_priv->ring.map.handle == NULL) { DRM_ERROR("can not ioremap virtual address for" @@ -251,14 +248,14 @@ static int i915_dma_resume(struct drm_device * dev) DRM_ERROR("Can not find hardware status page\n"); return -EINVAL; } - DRM_DEBUG_DRIVER(I915_DRV, "hw status page @ %p\n", + DRM_DEBUG_DRIVER("hw status page @ %p\n", dev_priv->hw_status_page); if (dev_priv->status_gfx_addr != 0) I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); else I915_WRITE(HWS_PGA, dev_priv->dma_status_page); - DRM_DEBUG_DRIVER(I915_DRV, "Enabled hardware status page\n"); + DRM_DEBUG_DRIVER("Enabled hardware status page\n"); return 0; } @@ -552,7 +549,7 @@ static int i915_dispatch_flip(struct drm_device * dev) if (!master_priv->sarea_priv) return -EINVAL; - DRM_DEBUG_DRIVER(I915_DRV, "%s: page=%d pfCurrentPage=%d\n", + DRM_DEBUG_DRIVER("%s: page=%d pfCurrentPage=%d\n", __func__, dev_priv->current_page, master_priv->sarea_priv->pf_current_page); @@ -633,8 +630,7 @@ static int i915_batchbuffer(struct drm_device *dev, void *data, return -EINVAL; } - DRM_DEBUG_DRIVER(I915_DRV, - "i915 batchbuffer, start %x used %d cliprects %d\n", + DRM_DEBUG_DRIVER("i915 batchbuffer, start %x used %d cliprects %d\n", batch->start, batch->used, batch->num_cliprects); RING_LOCK_TEST_WITH_RETURN(dev, file_priv); @@ -681,8 +677,7 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data, void *batch_data; int ret; - DRM_DEBUG_DRIVER(I915_DRV, - "i915 cmdbuffer, buf %p sz %d cliprects %d\n", + DRM_DEBUG_DRIVER("i915 cmdbuffer, buf %p sz %d cliprects %d\n", cmdbuf->buf, cmdbuf->sz, cmdbuf->num_cliprects); RING_LOCK_TEST_WITH_RETURN(dev, file_priv); @@ -735,7 +730,7 @@ static int i915_flip_bufs(struct drm_device *dev, void *data, { int ret; - DRM_DEBUG_DRIVER(I915_DRV, "%s\n", __func__); + DRM_DEBUG_DRIVER("%s\n", __func__); RING_LOCK_TEST_WITH_RETURN(dev, file_priv); @@ -778,7 +773,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = dev_priv->num_fence_regs - dev_priv->fence_reg_start; break; default: - DRM_DEBUG_DRIVER(I915_DRV, "Unknown parameter %d\n", + DRM_DEBUG_DRIVER("Unknown parameter %d\n", param->param); return -EINVAL; } @@ -819,7 +814,7 @@ static int i915_setparam(struct drm_device *dev, void *data, dev_priv->fence_reg_start = param->value; break; default: - DRM_DEBUG_DRIVER(I915_DRV, "unknown parameter %d\n", + DRM_DEBUG_DRIVER("unknown parameter %d\n", param->param); return -EINVAL; } @@ -846,7 +841,7 @@ static int i915_set_status_page(struct drm_device *dev, void *data, return 0; } - DRM_DEBUG("set status page addr 0x%08x\n", (u32)hws->addr); + DRM_DEBUG_DRIVER("set status page addr 0x%08x\n", (u32)hws->addr); dev_priv->status_gfx_addr = hws->addr & (0x1ffff<<12); @@ -868,9 +863,9 @@ static int i915_set_status_page(struct drm_device *dev, void *data, memset(dev_priv->hw_status_page, 0, PAGE_SIZE); I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); - DRM_DEBUG_DRIVER(I915_DRV, "load hws HWS_PGA with gfx mem 0x%x\n", + DRM_DEBUG_DRIVER("load hws HWS_PGA with gfx mem 0x%x\n", dev_priv->status_gfx_addr); - DRM_DEBUG_DRIVER(I915_DRV, "load hws at %p\n", + DRM_DEBUG_DRIVER("load hws at %p\n", dev_priv->hw_status_page); return 0; } @@ -1310,7 +1305,7 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv) { struct drm_i915_file_private *i915_file_priv; - DRM_DEBUG_DRIVER(I915_DRV, "\n"); + DRM_DEBUG_DRIVER("\n"); i915_file_priv = (struct drm_i915_file_private *) kmalloc(sizeof(*i915_file_priv), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 9ab38efffecf..b59c65d19d81 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -38,8 +38,6 @@ #include "i915_drv.h" #include -#define I915_LVDS "i915_lvds" - /* * the following four scaling options are defined. * #define DRM_MODE_SCALE_NON_GPU 0 @@ -673,8 +671,7 @@ static int intel_lvds_set_property(struct drm_connector *connector, struct drm_crtc *crtc = connector->encoder->crtc; struct intel_lvds_priv *lvds_priv = intel_output->dev_priv; if (value == DRM_MODE_SCALE_NON_GPU) { - DRM_DEBUG_KMS(I915_LVDS, - "non_GPU property is unsupported\n"); + DRM_DEBUG_KMS("non_GPU property is unsupported\n"); return 0; } if (lvds_priv->fitting_mode == value) { @@ -731,8 +728,7 @@ static const struct drm_encoder_funcs intel_lvds_enc_funcs = { static int __init intel_no_lvds_dmi_callback(const struct dmi_system_id *id) { - DRM_DEBUG_KMS(I915_LVDS, - "Skipping LVDS initialization for %s\n", id->ident); + DRM_DEBUG_KMS("Skipping LVDS initialization for %s\n", id->ident); return 1; } @@ -1013,7 +1009,7 @@ out: return; failed: - DRM_DEBUG_KMS(I915_LVDS, "No LVDS modes found, disabling.\n"); + DRM_DEBUG_KMS("No LVDS modes found, disabling.\n"); if (intel_output->ddc_bus) intel_i2c_destroy(intel_output->ddc_bus); drm_connector_cleanup(connector); diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 4f0c30948bc4..abef69c8a49a 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -36,7 +36,6 @@ #include "intel_sdvo_regs.h" #undef SDVO_DEBUG -#define I915_SDVO "i915_sdvo" struct intel_sdvo_priv { u8 slave_addr; @@ -178,7 +177,7 @@ static bool intel_sdvo_read_byte(struct intel_output *intel_output, u8 addr, return true; } - DRM_DEBUG("i2c transfer returned %d\n", ret); + DRM_DEBUG_KMS("i2c transfer returned %d\n", ret); return false; } @@ -288,7 +287,7 @@ static void intel_sdvo_debug_write(struct intel_output *intel_output, u8 cmd, struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; int i; - DRM_DEBUG_KMS(I915_SDVO, "%s: W: %02X ", + DRM_DEBUG_KMS("%s: W: %02X ", SDVO_NAME(sdvo_priv), cmd); for (i = 0; i < args_len; i++) DRM_LOG_KMS("%02X ", ((u8 *)args)[i]); @@ -341,7 +340,7 @@ static void intel_sdvo_debug_response(struct intel_output *intel_output, struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; int i; - DRM_DEBUG_KMS(I915_SDVO, "%s: R: ", SDVO_NAME(sdvo_priv)); + DRM_DEBUG_KMS("%s: R: ", SDVO_NAME(sdvo_priv)); for (i = 0; i < response_len; i++) DRM_LOG_KMS("%02X ", ((u8 *)response)[i]); for (; i < 8; i++) @@ -658,10 +657,10 @@ static int intel_sdvo_get_clock_rate_mult(struct intel_output *intel_output) status = intel_sdvo_read_response(intel_output, &response, 1); if (status != SDVO_CMD_STATUS_SUCCESS) { - DRM_DEBUG("Couldn't get SDVO clock rate multiplier\n"); + DRM_DEBUG_KMS("Couldn't get SDVO clock rate multiplier\n"); return SDVO_CLOCK_RATE_MULT_1X; } else { - DRM_DEBUG("Current clock rate multiplier: %d\n", response); + DRM_DEBUG_KMS("Current clock rate multiplier: %d\n", response); } return response; @@ -942,14 +941,14 @@ static void intel_sdvo_set_tv_format(struct intel_output *output) format = &sdvo_priv->tv_format; memset(&unset, 0, sizeof(unset)); if (memcmp(format, &unset, sizeof(*format))) { - DRM_DEBUG("%s: Choosing default TV format of NTSC-M\n", + DRM_DEBUG_KMS("%s: Choosing default TV format of NTSC-M\n", SDVO_NAME(sdvo_priv)); format->ntsc_m = 1; intel_sdvo_write_cmd(output, SDVO_CMD_SET_TV_FORMAT, format, sizeof(*format)); status = intel_sdvo_read_response(output, NULL, 0); if (status != SDVO_CMD_STATUS_SUCCESS) - DRM_DEBUG("%s: Failed to set TV format\n", + DRM_DEBUG_KMS("%s: Failed to set TV format\n", SDVO_NAME(sdvo_priv)); } } @@ -1220,8 +1219,8 @@ static void intel_sdvo_dpms(struct drm_encoder *encoder, int mode) * a given it the status is a success, we succeeded. */ if (status == SDVO_CMD_STATUS_SUCCESS && !input1) { - DRM_DEBUG("First %s output reported failure to sync\n", - SDVO_NAME(sdvo_priv)); + DRM_DEBUG_KMS("First %s output reported failure to " + "sync\n", SDVO_NAME(sdvo_priv)); } if (0) @@ -1316,8 +1315,8 @@ static void intel_sdvo_restore(struct drm_connector *connector) intel_wait_for_vblank(dev); status = intel_sdvo_get_trained_inputs(intel_output, &input1, &input2); if (status == SDVO_CMD_STATUS_SUCCESS && !input1) - DRM_DEBUG("First %s output reported failure to sync\n", - SDVO_NAME(sdvo_priv)); + DRM_DEBUG_KMS("First %s output reported failure to " + "sync\n", SDVO_NAME(sdvo_priv)); } intel_sdvo_set_active_outputs(intel_output, sdvo_priv->save_active_outputs); @@ -1395,7 +1394,7 @@ int intel_sdvo_supports_hotplug(struct drm_connector *connector) u8 response[2]; u8 status; struct intel_output *intel_output; - DRM_DEBUG("\n"); + DRM_DEBUG_KMS("\n"); if (!connector) return 0; @@ -1460,7 +1459,7 @@ static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connect intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_ATTACHED_DISPLAYS, NULL, 0); status = intel_sdvo_read_response(intel_output, &response, 2); - DRM_DEBUG("SDVO response %d %d\n", response[0], response[1]); + DRM_DEBUG_KMS("SDVO response %d %d\n", response[0], response[1]); if (status != SDVO_CMD_STATUS_SUCCESS) return connector_status_unknown; @@ -1905,8 +1904,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) /* Read the regs to test if we can talk to the device */ for (i = 0; i < 0x40; i++) { if (!intel_sdvo_read_byte(intel_output, i, &ch[i])) { - DRM_DEBUG_KMS(I915_SDVO, - "No SDVO device found on SDVO%c\n", + DRM_DEBUG_KMS("No SDVO device found on SDVO%c\n", output_device == SDVOB ? 'B' : 'C'); goto err_i2c; } @@ -1989,8 +1987,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) sdvo_priv->controlled_output = 0; memcpy (bytes, &sdvo_priv->caps.output_flags, 2); - DRM_DEBUG_KMS(I915_SDVO, - "%s: Unknown SDVO output type (0x%02x%02x)\n", + DRM_DEBUG_KMS("%s: Unknown SDVO output type (0x%02x%02x)\n", SDVO_NAME(sdvo_priv), bytes[0], bytes[1]); encoder_type = DRM_MODE_ENCODER_NONE; @@ -2022,7 +2019,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) &sdvo_priv->pixel_clock_max); - DRM_DEBUG_KMS(I915_SDVO, "%s device VID/DID: %02X:%02X.%02X, " + DRM_DEBUG_KMS("%s device VID/DID: %02X:%02X.%02X, " "clock range %dMHz - %dMHz, " "input 1: %c, input 2: %c, " "output 1: %c, output 2: %c\n", diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 45b67d9c39c1..edbdb02a7a3f 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -174,19 +174,19 @@ extern void drm_ut_debug_printk(unsigned int request_level, __func__, fmt, ##args); \ } while (0) -#define DRM_DEBUG_DRIVER(prefix, fmt, args...) \ +#define DRM_DEBUG_DRIVER(fmt, args...) \ do { \ - drm_ut_debug_printk(DRM_UT_DRIVER, prefix, \ + drm_ut_debug_printk(DRM_UT_DRIVER, DRM_NAME, \ __func__, fmt, ##args); \ } while (0) -#define DRM_DEBUG_KMS(prefix, fmt, args...) \ +#define DRM_DEBUG_KMS(fmt, args...) \ do { \ - drm_ut_debug_printk(DRM_UT_KMS, prefix, \ + drm_ut_debug_printk(DRM_UT_KMS, DRM_NAME, \ __func__, fmt, ##args); \ } while (0) -#define DRM_DEBUG_MODE(prefix, fmt, args...) \ +#define DRM_DEBUG_MODE(fmt, args...) \ do { \ - drm_ut_debug_printk(DRM_UT_MODE, prefix, \ + drm_ut_debug_printk(DRM_UT_MODE, DRM_NAME, \ __func__, fmt, ##args); \ } while (0) #define DRM_LOG(fmt, args...) \ @@ -210,9 +210,9 @@ extern void drm_ut_debug_printk(unsigned int request_level, NULL, fmt, ##args); \ } while (0) #else -#define DRM_DEBUG_DRIVER(prefix, fmt, args...) do { } while (0) -#define DRM_DEBUG_KMS(prefix, fmt, args...) do { } while (0) -#define DRM_DEBUG_MODE(prefix, fmt, args...) do { } while (0) +#define DRM_DEBUG_DRIVER(fmt, args...) do { } while (0) +#define DRM_DEBUG_KMS(fmt, args...) do { } while (0) +#define DRM_DEBUG_MODE(fmt, args...) do { } while (0) #define DRM_DEBUG(fmt, arg...) do { } while (0) #define DRM_LOG(fmt, arg...) do { } while (0) #define DRM_LOG_KMS(fmt, args...) do { } while (0) -- cgit v1.2.3 From f940f37f022f7392ab81a35516222cbd46110b42 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 20 Jul 2009 13:48:05 +0800 Subject: drm: Remove the macro defintion of DRM_DEBUG_MODE Two macro definitions of DRM_DEBUG_KMS/MODE can be used to add the debug info related with KMS. It is confusing. So remove the macro definition of DRM_DEBUG_MODE. Instead it can be replaced by the DRM_DEBUG_KMS. Signed-off-by: Zhao Yakui Acked-by: Ian Romanick Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_modes.c | 4 ++-- include/drm/drmP.h | 7 ------- 2 files changed, 2 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 5eca2d5c5f23..6b4d2dc3cdd9 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -52,7 +52,7 @@ */ void drm_mode_debug_printmodeline(struct drm_display_mode *mode) { - DRM_DEBUG_MODE("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d " + DRM_DEBUG_KMS("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d " "0x%x 0x%x\n", mode->base.id, mode->name, mode->vrefresh, mode->clock, mode->hdisplay, mode->hsync_start, @@ -818,7 +818,7 @@ void drm_mode_prune_invalid(struct drm_device *dev, list_del(&mode->head); if (verbose) { drm_mode_debug_printmodeline(mode); - DRM_DEBUG_MODE("Not using %s mode %d\n", + DRM_DEBUG_KMS("Not using %s mode %d\n", mode->name, mode->status); } drm_mode_destroy(dev, mode); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index edbdb02a7a3f..6513d16cd029 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -88,7 +88,6 @@ struct drm_device; #define DRM_UT_CORE 0x01 #define DRM_UT_DRIVER 0x02 #define DRM_UT_KMS 0x04 -#define DRM_UT_MODE 0x08 extern void drm_ut_debug_printk(unsigned int request_level, const char *prefix, @@ -184,11 +183,6 @@ extern void drm_ut_debug_printk(unsigned int request_level, drm_ut_debug_printk(DRM_UT_KMS, DRM_NAME, \ __func__, fmt, ##args); \ } while (0) -#define DRM_DEBUG_MODE(fmt, args...) \ - do { \ - drm_ut_debug_printk(DRM_UT_MODE, DRM_NAME, \ - __func__, fmt, ##args); \ - } while (0) #define DRM_LOG(fmt, args...) \ do { \ drm_ut_debug_printk(DRM_UT_CORE, NULL, \ @@ -212,7 +206,6 @@ extern void drm_ut_debug_printk(unsigned int request_level, #else #define DRM_DEBUG_DRIVER(fmt, args...) do { } while (0) #define DRM_DEBUG_KMS(fmt, args...) do { } while (0) -#define DRM_DEBUG_MODE(fmt, args...) do { } while (0) #define DRM_DEBUG(fmt, arg...) do { } while (0) #define DRM_LOG(fmt, arg...) do { } while (0) #define DRM_LOG_KMS(fmt, args...) do { } while (0) -- cgit v1.2.3 From 87fdff81cd2d770f0adc742e21eb5e062ad20def Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 20 Jul 2009 13:48:06 +0800 Subject: DRM: Add the explanation about DRM debug level Add the explanation about DRM debug level in the drmP header file. This is to explain how/where to use the different DRM debug level. Signed-off-by: Zhao Yakui Signed-off-by: Dave Airlie --- include/drm/drmP.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 6513d16cd029..e0f1c1fee58b 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -88,6 +88,37 @@ struct drm_device; #define DRM_UT_CORE 0x01 #define DRM_UT_DRIVER 0x02 #define DRM_UT_KMS 0x04 +/* + * Three debug levels are defined. + * drm_core, drm_driver, drm_kms + * drm_core level can be used in the generic drm code. For example: + * drm_ioctl, drm_mm, drm_memory + * The macro definiton of DRM_DEBUG is used. + * DRM_DEBUG(fmt, args...) + * The debug info by using the DRM_DEBUG can be obtained by adding + * the boot option of "drm.debug=1". + * + * drm_driver level can be used in the specific drm driver. It is used + * to add the debug info related with the drm driver. For example: + * i915_drv, i915_dma, i915_gem, radeon_drv, + * The macro definition of DRM_DEBUG_DRIVER can be used. + * DRM_DEBUG_DRIVER(fmt, args...) + * The debug info by using the DRM_DEBUG_DRIVER can be obtained by + * adding the boot option of "drm.debug=0x02" + * + * drm_kms level can be used in the KMS code related with specific drm driver. + * It is used to add the debug info related with KMS mode. For example: + * the connector/crtc , + * The macro definition of DRM_DEBUG_KMS can be used. + * DRM_DEBUG_KMS(fmt, args...) + * The debug info by using the DRM_DEBUG_KMS can be obtained by + * adding the boot option of "drm.debug=0x04" + * + * If we add the boot option of "drm.debug=0x06", we can get the debug info by + * using the DRM_DEBUG_KMS and DRM_DEBUG_DRIVER. + * If we add the boot option of "drm.debug=0x05", we can get the debug info by + * using the DRM_DEBUG_KMS and DRM_DEBUG. + */ extern void drm_ut_debug_printk(unsigned int request_level, const char *prefix, -- cgit v1.2.3 From 2066facca4c7dfe9f5068ece0200a4dbf10f49e1 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sun, 2 Aug 2009 04:19:17 +0200 Subject: drm/kms: slave encoder interface. Define some helper functions to make easier to detach a KMS encoder implementation from the drm module of the GPU it's used in. This is mainly useful for some external I2C encoders known to be present on cards with GPUs from several different manufacturers. Signed-off-by: Francisco Jerez Signed-off-by: Dave Airlie --- drivers/gpu/drm/Makefile | 2 +- drivers/gpu/drm/drm_encoder_slave.c | 116 ++++++++++++++++++++++++++ include/drm/drm_encoder_slave.h | 162 ++++++++++++++++++++++++++++++++++++ 3 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/drm_encoder_slave.c create mode 100644 include/drm/drm_encoder_slave.h (limited to 'include') diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index fe23f29f7cba..5f0aec4f082a 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -11,7 +11,7 @@ drm-y := drm_auth.o drm_bufs.o drm_cache.o \ drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \ drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o \ drm_crtc.o drm_crtc_helper.o drm_modes.o drm_edid.o \ - drm_info.o drm_debugfs.o + drm_info.o drm_debugfs.o drm_encoder_slave.o drm-$(CONFIG_COMPAT) += drm_ioc32.o diff --git a/drivers/gpu/drm/drm_encoder_slave.c b/drivers/gpu/drm/drm_encoder_slave.c new file mode 100644 index 000000000000..6ffd600ccfae --- /dev/null +++ b/drivers/gpu/drm/drm_encoder_slave.c @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2009 Francisco Jerez. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include + +/** + * drm_i2c_encoder_init - Initialize an I2C slave encoder + * @dev: DRM device. + * @encoder: Encoder to be attached to the I2C device. You aren't + * required to have called drm_encoder_init() before. + * @adap: I2C adapter that will be used to communicate with + * the device. + * @info: Information that will be used to create the I2C device. + * Required fields are @addr and @type. + * + * Create an I2C device on the specified bus (the module containing its + * driver is transparently loaded) and attach it to the specified + * &drm_encoder_slave. The @slave_funcs field will be initialized with + * the hooks provided by the slave driver. + * + * Returns 0 on success or a negative errno on failure, in particular, + * -ENODEV is returned when no matching driver is found. + */ +int drm_i2c_encoder_init(struct drm_device *dev, + struct drm_encoder_slave *encoder, + struct i2c_adapter *adap, + const struct i2c_board_info *info) +{ + char modalias[sizeof(I2C_MODULE_PREFIX) + + I2C_NAME_SIZE]; + struct module *module = NULL; + struct i2c_client *client; + struct drm_i2c_encoder_driver *encoder_drv; + int err = 0; + + snprintf(modalias, sizeof(modalias), + "%s%s", I2C_MODULE_PREFIX, info->type); + request_module(modalias); + + client = i2c_new_device(adap, info); + if (!client) { + err = -ENOMEM; + goto fail; + } + + if (!client->driver) { + err = -ENODEV; + goto fail_unregister; + } + + module = client->driver->driver.owner; + if (!try_module_get(module)) { + err = -ENODEV; + goto fail_unregister; + } + + encoder->bus_priv = client; + + encoder_drv = to_drm_i2c_encoder_driver(client->driver); + + err = encoder_drv->encoder_init(client, dev, encoder); + if (err) + goto fail_unregister; + + return 0; + +fail_unregister: + i2c_unregister_device(client); + module_put(module); +fail: + return err; +} +EXPORT_SYMBOL(drm_i2c_encoder_init); + +/** + * drm_i2c_encoder_destroy - Unregister the I2C device backing an encoder + * @drm_encoder: Encoder to be unregistered. + * + * This should be called from the @destroy method of an I2C slave + * encoder driver once I2C access is no longer needed. + */ +void drm_i2c_encoder_destroy(struct drm_encoder *drm_encoder) +{ + struct drm_encoder_slave *encoder = to_encoder_slave(drm_encoder); + struct i2c_client *client = drm_i2c_encoder_get_client(drm_encoder); + struct module *module = client->driver->driver.owner; + + i2c_unregister_device(client); + encoder->bus_priv = NULL; + + module_put(module); +} +EXPORT_SYMBOL(drm_i2c_encoder_destroy); diff --git a/include/drm/drm_encoder_slave.h b/include/drm/drm_encoder_slave.h new file mode 100644 index 000000000000..821ec40c17d8 --- /dev/null +++ b/include/drm/drm_encoder_slave.h @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2009 Francisco Jerez. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DRM_ENCODER_SLAVE_H__ +#define __DRM_ENCODER_SLAVE_H__ + +#include +#include + +/** + * struct drm_encoder_slave_funcs - Entry points exposed by a slave encoder driver + * @set_config: Initialize any encoder-specific modesetting parameters. + * The meaning of the @params parameter is implementation + * dependent. It will usually be a structure with DVO port + * data format settings or timings. It's not required for + * the new parameters to take effect until the next mode + * is set. + * + * Most of its members are analogous to the function pointers in + * &drm_encoder_helper_funcs and they can optionally be used to + * initialize the latter. Connector-like methods (e.g. @get_modes and + * @set_property) will typically be wrapped around and only be called + * if the encoder is the currently selected one for the connector. + */ +struct drm_encoder_slave_funcs { + void (*set_config)(struct drm_encoder *encoder, + void *params); + + void (*destroy)(struct drm_encoder *encoder); + void (*dpms)(struct drm_encoder *encoder, int mode); + void (*save)(struct drm_encoder *encoder); + void (*restore)(struct drm_encoder *encoder); + bool (*mode_fixup)(struct drm_encoder *encoder, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode); + int (*mode_valid)(struct drm_encoder *encoder, + struct drm_display_mode *mode); + void (*mode_set)(struct drm_encoder *encoder, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode); + + enum drm_connector_status (*detect)(struct drm_encoder *encoder, + struct drm_connector *connector); + int (*get_modes)(struct drm_encoder *encoder, + struct drm_connector *connector); + int (*create_resources)(struct drm_encoder *encoder, + struct drm_connector *connector); + int (*set_property)(struct drm_encoder *encoder, + struct drm_connector *connector, + struct drm_property *property, + uint64_t val); + +}; + +/** + * struct drm_encoder_slave - Slave encoder struct + * @base: DRM encoder object. + * @slave_funcs: Slave encoder callbacks. + * @slave_priv: Slave encoder private data. + * @bus_priv: Bus specific data. + * + * A &drm_encoder_slave has two sets of callbacks, @slave_funcs and the + * ones in @base. The former are never actually called by the common + * CRTC code, it's just a convenience for splitting the encoder + * functions in an upper, GPU-specific layer and a (hopefully) + * GPU-agnostic lower layer: It's the GPU driver responsibility to + * call the slave methods when appropriate. + * + * drm_i2c_encoder_init() provides a way to get an implementation of + * this. + */ +struct drm_encoder_slave { + struct drm_encoder base; + + struct drm_encoder_slave_funcs *slave_funcs; + void *slave_priv; + void *bus_priv; +}; +#define to_encoder_slave(x) container_of((x), struct drm_encoder_slave, base) + +int drm_i2c_encoder_init(struct drm_device *dev, + struct drm_encoder_slave *encoder, + struct i2c_adapter *adap, + const struct i2c_board_info *info); + + +/** + * struct drm_i2c_encoder_driver + * + * Describes a device driver for an encoder connected to the GPU + * through an I2C bus. In addition to the entry points in @i2c_driver + * an @encoder_init function should be provided. It will be called to + * give the driver an opportunity to allocate any per-encoder data + * structures and to initialize the @slave_funcs and (optionally) + * @slave_priv members of @encoder. + */ +struct drm_i2c_encoder_driver { + struct i2c_driver i2c_driver; + + int (*encoder_init)(struct i2c_client *client, + struct drm_device *dev, + struct drm_encoder_slave *encoder); + +}; +#define to_drm_i2c_encoder_driver(x) container_of((x), \ + struct drm_i2c_encoder_driver, \ + i2c_driver) + +/** + * drm_i2c_encoder_get_client - Get the I2C client corresponding to an encoder + */ +static inline struct i2c_client *drm_i2c_encoder_get_client(struct drm_encoder *encoder) +{ + return (struct i2c_client *)to_encoder_slave(encoder)->bus_priv; +} + +/** + * drm_i2c_encoder_register - Register an I2C encoder driver + * @owner: Module containing the driver. + * @driver: Driver to be registered. + */ +static inline int drm_i2c_encoder_register(struct module *owner, + struct drm_i2c_encoder_driver *driver) +{ + return i2c_register_driver(owner, &driver->i2c_driver); +} + +/** + * drm_i2c_encoder_unregister - Unregister an I2C encoder driver + * @driver: Driver to be unregistered. + */ +static inline void drm_i2c_encoder_unregister(struct drm_i2c_encoder_driver *driver) +{ + return i2c_del_driver(&driver->i2c_driver); +} + +void drm_i2c_encoder_destroy(struct drm_encoder *encoder); + +#endif -- cgit v1.2.3 From 74bd3c26b90f39b9dcc05c471333da8998572b5d Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sun, 2 Aug 2009 04:19:18 +0200 Subject: drm: Define DRM_MODE_CONNECTOR_TV The existing TV connector types are often unsuitable either because there is no way to probe them until they're actually plugged in or because they can change during run time (e.g. 7-pin DIN connectors that behave as S-Video, Component, Composite or SCART depending on the adaptor plugged in). Signed-off-by: Francisco Jerez Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 1 + drivers/gpu/drm/drm_sysfs.c | 3 +++ include/drm/drm_mode.h | 1 + 3 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 9c758305472c..c7ab80b45e3f 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -146,6 +146,7 @@ static struct drm_conn_prop_enum_list drm_connector_enum_list[] = { DRM_MODE_CONNECTOR_DisplayPort, "DisplayPort", 0 }, { DRM_MODE_CONNECTOR_HDMIA, "HDMI Type A", 0 }, { DRM_MODE_CONNECTOR_HDMIB, "HDMI Type B", 0 }, + { DRM_MODE_CONNECTOR_TV, "TV", 0 }, }; static struct drm_prop_enum_list drm_encoder_enum_list[] = diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 85ec31b3ff00..adc179459c25 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -247,6 +247,7 @@ static ssize_t subconnector_show(struct device *device, case DRM_MODE_CONNECTOR_Composite: case DRM_MODE_CONNECTOR_SVIDEO: case DRM_MODE_CONNECTOR_Component: + case DRM_MODE_CONNECTOR_TV: prop = dev->mode_config.tv_subconnector_property; is_tv = 1; break; @@ -287,6 +288,7 @@ static ssize_t select_subconnector_show(struct device *device, case DRM_MODE_CONNECTOR_Composite: case DRM_MODE_CONNECTOR_SVIDEO: case DRM_MODE_CONNECTOR_Component: + case DRM_MODE_CONNECTOR_TV: prop = dev->mode_config.tv_select_subconnector_property; is_tv = 1; break; @@ -385,6 +387,7 @@ int drm_sysfs_connector_add(struct drm_connector *connector) case DRM_MODE_CONNECTOR_Composite: case DRM_MODE_CONNECTOR_SVIDEO: case DRM_MODE_CONNECTOR_Component: + case DRM_MODE_CONNECTOR_TV: for (i = 0; i < ARRAY_SIZE(connector_attrs_opt1); i++) { ret = device_create_file(&connector->kdev, &connector_attrs_opt1[i]); if (ret) diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h index ae304cc73c90..c51e9f528c8f 100644 --- a/include/drm/drm_mode.h +++ b/include/drm/drm_mode.h @@ -155,6 +155,7 @@ struct drm_mode_get_encoder { #define DRM_MODE_CONNECTOR_DisplayPort 10 #define DRM_MODE_CONNECTOR_HDMIA 11 #define DRM_MODE_CONNECTOR_HDMIB 12 +#define DRM_MODE_CONNECTOR_TV 13 struct drm_mode_get_connector { -- cgit v1.2.3 From aeaa1ad3ff32be833680e484d99ec29d892da1ff Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sun, 2 Aug 2009 04:19:19 +0200 Subject: drm: Define DRM_MODE_SUBCONNECTOR_SCART Signed-off-by: Francisco Jerez Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 2 ++ include/drm/drm_mode.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index c7ab80b45e3f..ed53c5c37ac4 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -108,6 +108,7 @@ static struct drm_prop_enum_list drm_tv_select_enum_list[] = { DRM_MODE_SUBCONNECTOR_Composite, "Composite" }, /* TV-out */ { DRM_MODE_SUBCONNECTOR_SVIDEO, "SVIDEO" }, /* TV-out */ { DRM_MODE_SUBCONNECTOR_Component, "Component" }, /* TV-out */ + { DRM_MODE_SUBCONNECTOR_SCART, "SCART" }, /* TV-out */ }; DRM_ENUM_NAME_FN(drm_get_tv_select_name, drm_tv_select_enum_list) @@ -118,6 +119,7 @@ static struct drm_prop_enum_list drm_tv_subconnector_enum_list[] = { DRM_MODE_SUBCONNECTOR_Composite, "Composite" }, /* TV-out */ { DRM_MODE_SUBCONNECTOR_SVIDEO, "SVIDEO" }, /* TV-out */ { DRM_MODE_SUBCONNECTOR_Component, "Component" }, /* TV-out */ + { DRM_MODE_SUBCONNECTOR_SCART, "SCART" }, /* TV-out */ }; DRM_ENUM_NAME_FN(drm_get_tv_subconnector_name, diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h index c51e9f528c8f..616aeb42b773 100644 --- a/include/drm/drm_mode.h +++ b/include/drm/drm_mode.h @@ -141,6 +141,7 @@ struct drm_mode_get_encoder { #define DRM_MODE_SUBCONNECTOR_Composite 5 #define DRM_MODE_SUBCONNECTOR_SVIDEO 6 #define DRM_MODE_SUBCONNECTOR_Component 8 +#define DRM_MODE_SUBCONNECTOR_SCART 9 #define DRM_MODE_CONNECTOR_Unknown 0 #define DRM_MODE_CONNECTOR_VGA 1 -- cgit v1.2.3 From b6b7902e54c7e8abbc213d8bdc290350c00ccfe5 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sun, 2 Aug 2009 04:19:20 +0200 Subject: drm: Define some new standard TV properties. Namely "brightness", "contrast" and "flicker reduction". Signed-off-by: Francisco Jerez Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 18 ++++++++++++++++++ include/drm/drm_crtc.h | 3 +++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index ed53c5c37ac4..a8c831134fc3 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -718,6 +718,24 @@ int drm_mode_create_tv_properties(struct drm_device *dev, int num_modes, drm_property_add_enum(dev->mode_config.tv_mode_property, i, i, modes[i]); + dev->mode_config.tv_brightness_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "brightness", 2); + dev->mode_config.tv_brightness_property->values[0] = 0; + dev->mode_config.tv_brightness_property->values[1] = 100; + + dev->mode_config.tv_contrast_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "contrast", 2); + dev->mode_config.tv_contrast_property->values[0] = 0; + dev->mode_config.tv_contrast_property->values[1] = 100; + + dev->mode_config.tv_flicker_reduction_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "flicker reduction", 2); + dev->mode_config.tv_flicker_reduction_property->values[0] = 0; + dev->mode_config.tv_flicker_reduction_property->values[1] = 100; + return 0; } EXPORT_SYMBOL(drm_mode_create_tv_properties); diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 125994d8ac0b..5f2cc0ca4c7d 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -572,6 +572,9 @@ struct drm_mode_config { struct drm_property *tv_right_margin_property; struct drm_property *tv_top_margin_property; struct drm_property *tv_bottom_margin_property; + struct drm_property *tv_brightness_property; + struct drm_property *tv_contrast_property; + struct drm_property *tv_flicker_reduction_property; /* Optional properties */ struct drm_property *scaling_mode_property; -- cgit v1.2.3 From fa56d4cb4022c8b313c3b99236e1b87effc3655b Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 23 Jun 2009 11:29:11 +0000 Subject: ide: allow ide_dev_read_id() to be called from the IRQ context * Un-static __ide_wait_stat(). * Allow ide_dev_read_id() helper to be called from the IRQ context by adding irq_ctx flag and using mdelay()/__ide_wait_stat() when needed. * Switch ide_driveid_update() to set irq_ctx flag. This change is needed for the consecutive patch which fixes races in handling of user-space SET XFER commands but for improved bisectability and clarity it is better to do it in a separate patch. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: David S. Miller --- drivers/ide/ide-iops.c | 6 +++--- drivers/ide/ide-probe.c | 31 +++++++++++++++++++++---------- include/linux/ide.h | 3 ++- 3 files changed, 26 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 2892b242bbe1..b99873845d21 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -102,8 +102,8 @@ EXPORT_SYMBOL(ide_fixstring); * setting a timer to wake up at half second intervals thereafter, * until timeout is achieved, before timing out. */ -static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, - unsigned long timeout, u8 *rstat) +int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, + unsigned long timeout, u8 *rstat) { ide_hwif_t *hwif = drive->hwif; const struct ide_tp_ops *tp_ops = hwif->tp_ops; @@ -316,7 +316,7 @@ int ide_driveid_update(ide_drive_t *drive) return 0; SELECT_MASK(drive, 1); - rc = ide_dev_read_id(drive, ATA_CMD_ID_ATA, id); + rc = ide_dev_read_id(drive, ATA_CMD_ID_ATA, id, 1); SELECT_MASK(drive, 0); if (rc) diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 1bb106f6221a..8de442cbee94 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -238,6 +238,7 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id) * @drive: drive to identify * @cmd: command to use * @id: buffer for IDENTIFY data + * @irq_ctx: flag set when called from the IRQ context * * Sends an ATA(PI) IDENTIFY request to a drive and waits for a response. * @@ -246,7 +247,7 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id) * 2 device aborted the command (refused to identify itself) */ -int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id) +int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id, int irq_ctx) { ide_hwif_t *hwif = drive->hwif; struct ide_io_ports *io_ports = &hwif->io_ports; @@ -263,7 +264,10 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id) tp_ops->write_devctl(hwif, ATA_NIEN | ATA_DEVCTL_OBS); /* take a deep breath */ - msleep(50); + if (irq_ctx) + mdelay(50); + else + msleep(50); if (io_ports->ctl_addr && (hwif->host_flags & IDE_HFLAG_BROKEN_ALTSTATUS) == 0) { @@ -295,12 +299,19 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id) timeout = ((cmd == ATA_CMD_ID_ATA) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; - if (ide_busy_sleep(drive, timeout, use_altstatus)) - return 1; - /* wait for IRQ and ATA_DRQ */ - msleep(50); - s = tp_ops->read_status(hwif); + if (irq_ctx) { + rc = __ide_wait_stat(drive, ATA_DRQ, BAD_R_STAT, timeout, &s); + if (rc) + return 1; + } else { + rc = ide_busy_sleep(drive, timeout, use_altstatus); + if (rc) + return 1; + + msleep(50); + s = tp_ops->read_status(hwif); + } if (OK_STAT(s, ATA_DRQ, BAD_R_STAT)) { /* drive returned ID */ @@ -406,10 +417,10 @@ static int do_probe (ide_drive_t *drive, u8 cmd) if (OK_STAT(stat, ATA_DRDY, ATA_BUSY) || present || cmd == ATA_CMD_ID_ATAPI) { - rc = ide_dev_read_id(drive, cmd, id); + rc = ide_dev_read_id(drive, cmd, id, 0); if (rc) /* failed: try again */ - rc = ide_dev_read_id(drive, cmd, id); + rc = ide_dev_read_id(drive, cmd, id, 0); stat = tp_ops->read_status(hwif); @@ -424,7 +435,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd) msleep(50); tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); (void)ide_busy_sleep(drive, WAIT_WORSTCASE, 0); - rc = ide_dev_read_id(drive, cmd, id); + rc = ide_dev_read_id(drive, cmd, id, 0); } /* ensure drive IRQ is clear */ diff --git a/include/linux/ide.h b/include/linux/ide.h index edc93a6d931d..cb6cd0459a5e 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1081,6 +1081,7 @@ extern void ide_fixstring(u8 *, const int, const int); int ide_busy_sleep(ide_drive_t *, unsigned long, int); +int __ide_wait_stat(ide_drive_t *, u8, u8, unsigned long, u8 *); int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); ide_startstop_t ide_do_park_unpark(ide_drive_t *, struct request *); @@ -1169,7 +1170,7 @@ int ide_no_data_taskfile(ide_drive_t *, struct ide_cmd *); int ide_taskfile_ioctl(ide_drive_t *, unsigned long); -int ide_dev_read_id(ide_drive_t *, u8, u16 *); +int ide_dev_read_id(ide_drive_t *, u8, u16 *, int); extern int ide_driveid_update(ide_drive_t *); extern int ide_config_drive_speed(ide_drive_t *, u8); -- cgit v1.2.3 From 665d66e8fad60a5a162c4615f27f916ad1a6d567 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 23 Jun 2009 11:35:51 +0000 Subject: ide: fix races in handling of user-space SET XFER commands * Make cmd->tf_flags field 'u16' and add IDE_TFLAG_SET_XFER taskfile flag. * Update ide_finish_cmd() to set xfer / re-read id if the new flag is set. * Convert set_xfer_rate() (write handler for /proc/ide/hd?/current_speed) and ide_cmd_ioctl() (HDIO_DRIVE_CMD ioctl handler) to use the new flag. * Remove no longer needed disable_irq_nosync() + enable_irq() from ide_config_drive_speed(). Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: David S. Miller --- drivers/ide/ide-ioctls.c | 8 ++------ drivers/ide/ide-iops.c | 10 ---------- drivers/ide/ide-proc.c | 10 ++-------- drivers/ide/ide-taskfile.c | 9 ++++++++- include/linux/ide.h | 3 ++- 5 files changed, 14 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index e246d3d3fbcc..d3440b5010a5 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -167,6 +167,8 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg) err = -EINVAL; goto abort; } + + cmd.tf_flags |= IDE_TFLAG_SET_XFER; } err = ide_raw_taskfile(drive, &cmd, buf, args[3]); @@ -174,12 +176,6 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg) args[0] = tf->status; args[1] = tf->error; args[2] = tf->nsect; - - if (!err && xfer_rate) { - /* active-retuning-calls future */ - ide_set_xfer_rate(drive, xfer_rate); - ide_driveid_update(drive); - } abort: if (copy_to_user((void __user *)arg, &args, 4)) err = -EFAULT; diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index b99873845d21..b14fa9a87c49 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -363,14 +363,6 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) * this point (lost interrupt). */ - /* - * FIXME: we race against the running IRQ here if - * this is called from non IRQ context. If we use - * disable_irq() we hang on the error path. Work - * is needed. - */ - disable_irq_nosync(hwif->irq); - udelay(1); tp_ops->dev_select(drive); SELECT_MASK(drive, 1); @@ -394,8 +386,6 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) SELECT_MASK(drive, 0); - enable_irq(hwif->irq); - if (error) { (void) ide_dump_status(drive, "set_drive_speed_status", stat); return error; diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index 3242698832a4..021de41655e6 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c @@ -195,7 +195,6 @@ ide_devset_get(xfer_rate, current_speed); static int set_xfer_rate (ide_drive_t *drive, int arg) { struct ide_cmd cmd; - int err; if (arg < XFER_PIO_0 || arg > XFER_UDMA_6) return -EINVAL; @@ -206,14 +205,9 @@ static int set_xfer_rate (ide_drive_t *drive, int arg) cmd.tf.nsect = (u8)arg; cmd.valid.out.tf = IDE_VALID_FEATURE | IDE_VALID_NSECT; cmd.valid.in.tf = IDE_VALID_NSECT; + cmd.tf_flags = IDE_TFLAG_SET_XFER; - err = ide_no_data_taskfile(drive, &cmd); - - if (!err) { - ide_set_xfer_rate(drive, (u8) arg); - ide_driveid_update(drive); - } - return err; + return ide_no_data_taskfile(drive, &cmd); } ide_devset_rw(current_speed, xfer_rate); diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 50336d51eebc..cc8633cbe133 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -324,10 +324,17 @@ static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd) void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat) { struct request *rq = drive->hwif->rq; - u8 err = ide_read_error(drive); + u8 err = ide_read_error(drive), nsect = cmd->tf.nsect; + u8 set_xfer = !!(cmd->tf_flags & IDE_TFLAG_SET_XFER); ide_complete_cmd(drive, cmd, stat, err); rq->errors = err; + + if (err == 0 && set_xfer) { + ide_set_xfer_rate(drive, nsect); + ide_driveid_update(drive); + } + ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq)); } diff --git a/include/linux/ide.h b/include/linux/ide.h index cb6cd0459a5e..803c1ae31237 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -258,6 +258,7 @@ enum { IDE_TFLAG_DYN = (1 << 5), IDE_TFLAG_FS = (1 << 6), IDE_TFLAG_MULTI_PIO = (1 << 7), + IDE_TFLAG_SET_XFER = (1 << 8), }; enum { @@ -294,7 +295,7 @@ struct ide_cmd { } out, in; } valid; - u8 tf_flags; + u16 tf_flags; u8 ftf_flags; /* for TASKFILE ioctl */ int protocol; -- cgit v1.2.3 From a75f0236292a5fca65f26efedca48bd07db1834d Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 12 Aug 2009 02:30:10 +0200 Subject: drm: Add more standard TV properties. Overscan, saturation, hue. Used in the nouveau driver for GPUs with integrated TV encoders. Signed-off-by: Francisco Jerez Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 18 ++++++++++++++++++ include/drm/drm_crtc.h | 3 +++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index a8c831134fc3..362a538cdedc 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -736,6 +736,24 @@ int drm_mode_create_tv_properties(struct drm_device *dev, int num_modes, dev->mode_config.tv_flicker_reduction_property->values[0] = 0; dev->mode_config.tv_flicker_reduction_property->values[1] = 100; + dev->mode_config.tv_overscan_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "overscan", 2); + dev->mode_config.tv_overscan_property->values[0] = 0; + dev->mode_config.tv_overscan_property->values[1] = 100; + + dev->mode_config.tv_saturation_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "saturation", 2); + dev->mode_config.tv_saturation_property->values[0] = 0; + dev->mode_config.tv_saturation_property->values[1] = 100; + + dev->mode_config.tv_hue_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "hue", 2); + dev->mode_config.tv_hue_property->values[0] = 0; + dev->mode_config.tv_hue_property->values[1] = 100; + return 0; } EXPORT_SYMBOL(drm_mode_create_tv_properties); diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 5f2cc0ca4c7d..db92a83f8ca9 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -575,6 +575,9 @@ struct drm_mode_config { struct drm_property *tv_brightness_property; struct drm_property *tv_contrast_property; struct drm_property *tv_flicker_reduction_property; + struct drm_property *tv_overscan_property; + struct drm_property *tv_saturation_property; + struct drm_property *tv_hue_property; /* Optional properties */ struct drm_property *scaling_mode_property; -- cgit v1.2.3 From 00ae4064b1445524752575dd84df227c0687c99d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:49 +0900 Subject: percpu: rename 4k first chunk allocator to page Page size isn't always 4k depending on arch and configuration. Rename 4k first chunk allocator to page. Signed-off-by: Tejun Heo Cc: David Howells --- Documentation/kernel-parameters.txt | 2 +- arch/x86/kernel/setup_percpu.c | 23 ++++++++++++----------- include/linux/percpu.h | 2 +- mm/percpu.c | 25 ++++++++++++++----------- 4 files changed, 28 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7936b801fe6a..12e9eb77ee0d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1920,7 +1920,7 @@ and is between 256 and 4096 characters. It is defined in the file See arch/parisc/kernel/pdc_chassis.c percpu_alloc= [X86] Select which percpu first chunk allocator to use. - Allowed values are one of "lpage", "embed" and "4k". + Allowed values are one of "lpage", "embed" and "page". See comments in arch/x86/kernel/setup_percpu.c for details on each allocator. This parameter is primarily for debugging and performance comparison. diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index a26ff61e2fb0..1e17711c29d6 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -249,21 +249,22 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) } /* - * 4k allocator + * Page allocator * - * Boring fallback 4k allocator. This allocator puts more pressure on - * PTE TLBs but other than that behaves nicely on both UMA and NUMA. + * Boring fallback 4k page allocator. This allocator puts more + * pressure on PTE TLBs but other than that behaves nicely on both UMA + * and NUMA. */ -static void __init pcpu4k_populate_pte(unsigned long addr) +static void __init pcpup_populate_pte(unsigned long addr) { populate_extra_pte(addr); } -static ssize_t __init setup_pcpu_4k(size_t static_size) +static ssize_t __init setup_pcpu_page(size_t static_size) { - return pcpu_4k_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, - pcpu_fc_alloc, pcpu_fc_free, - pcpu4k_populate_pte); + return pcpu_page_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + pcpu_fc_alloc, pcpu_fc_free, + pcpup_populate_pte); } /* for explicit first chunk allocator selection */ @@ -307,7 +308,7 @@ void __init setup_per_cpu_areas(void) */ ret = -EINVAL; if (strlen(pcpu_chosen_alloc)) { - if (strcmp(pcpu_chosen_alloc, "4k")) { + if (strcmp(pcpu_chosen_alloc, "page")) { if (!strcmp(pcpu_chosen_alloc, "lpage")) ret = setup_pcpu_lpage(static_size, true); else if (!strcmp(pcpu_chosen_alloc, "embed")) @@ -317,7 +318,7 @@ void __init setup_per_cpu_areas(void) "specified\n", pcpu_chosen_alloc); if (ret < 0) pr_warning("PERCPU: %s allocator failed (%zd), " - "falling back to 4k\n", + "falling back to page size\n", pcpu_chosen_alloc, ret); } } else { @@ -326,7 +327,7 @@ void __init setup_per_cpu_areas(void) ret = setup_pcpu_embed(static_size, false); } if (ret < 0) - ret = setup_pcpu_4k(static_size); + ret = setup_pcpu_page(static_size); if (ret < 0) panic("cannot allocate static percpu area (%zu bytes, err=%zd)", static_size, ret); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index e134c8229631..7989f61b03f3 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -74,7 +74,7 @@ extern ssize_t __init pcpu_embed_first_chunk( size_t static_size, size_t reserved_size, ssize_t dyn_size); -extern ssize_t __init pcpu_4k_first_chunk( +extern ssize_t __init pcpu_page_first_chunk( size_t static_size, size_t reserved_size, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, diff --git a/mm/percpu.c b/mm/percpu.c index cbddcbdab681..6feac7934904 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1497,15 +1497,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, } /** - * pcpu_4k_first_chunk - map the first chunk using PAGE_SIZE pages + * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE * @free_fn: funtion to free percpu page, always called with PAGE_SIZE * @populate_pte_fn: function to populate pte * - * This is a helper to ease setting up embedded first percpu chunk and - * can be called where pcpu_setup_first_chunk() is expected. + * This is a helper to ease setting up page-remapped first percpu + * chunk and can be called where pcpu_setup_first_chunk() is expected. * * This is the basic allocator. Static percpu area is allocated * page-by-page into vmalloc area. @@ -1514,12 +1514,13 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, * The determined pcpu_unit_size which can be used to initialize * percpu access on success, -errno on failure. */ -ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn, - pcpu_fc_populate_pte_fn_t populate_pte_fn) +ssize_t __init pcpu_page_first_chunk(size_t static_size, size_t reserved_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_populate_pte_fn_t populate_pte_fn) { static struct vm_struct vm; + char psize_str[16]; int unit_pages; size_t pages_size; struct page **pages; @@ -1527,6 +1528,8 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, int i, j; ssize_t ret; + snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); + unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size, PCPU_MIN_UNIT_SIZE)); @@ -1542,8 +1545,8 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, ptr = alloc_fn(cpu, PAGE_SIZE); if (!ptr) { - pr_warning("PERCPU: failed to allocate " - "4k page for cpu%u\n", cpu); + pr_warning("PERCPU: failed to allocate %s page " + "for cpu%u\n", psize_str, cpu); goto enomem; } pages[j++] = virt_to_page(ptr); @@ -1580,8 +1583,8 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, } /* we're ready, commit */ - pr_info("PERCPU: %d 4k pages/cpu @%p s%zu r%zu\n", - unit_pages, vm.addr, static_size, reserved_size); + pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu\n", + unit_pages, psize_str, vm.addr, static_size, reserved_size); ret = pcpu_setup_first_chunk(static_size, reserved_size, -1, unit_pages << PAGE_SHIFT, vm.addr, NULL); -- cgit v1.2.3 From 08fc45806103e59a37418e84719b878f9bb32540 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:49 +0900 Subject: percpu: build first chunk allocators selectively There's no need to build unused first chunk allocators in. Define CONFIG_NEED_PER_CPU_*_FIRST_CHUNK and let archs enable them selectively. Signed-off-by: Tejun Heo --- arch/x86/Kconfig | 10 ++++++++++ include/linux/percpu.h | 27 +++++---------------------- mm/percpu.c | 19 +++++++++++-------- 3 files changed, 26 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e06b2eeff9f2..f7ac27215512 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -150,6 +150,16 @@ config ARCH_HAS_CACHE_LINE_SIZE config HAVE_SETUP_PER_CPU_AREA def_bool y +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool y + +config NEED_PER_CPU_PAGE_FIRST_CHUNK + def_bool y + +config NEED_PER_CPU_LPAGE_FIRST_CHUNK + def_bool y + depends on NEED_MULTIPLE_NODES + config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 7989f61b03f3..e26788e0da4a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -70,17 +70,21 @@ extern size_t __init pcpu_setup_first_chunk( ssize_t dyn_size, size_t unit_size, void *base_addr, const int *unit_map); +#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK extern ssize_t __init pcpu_embed_first_chunk( size_t static_size, size_t reserved_size, ssize_t dyn_size); +#endif +#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK extern ssize_t __init pcpu_page_first_chunk( size_t static_size, size_t reserved_size, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_populate_pte_fn_t populate_pte_fn); +#endif -#ifdef CONFIG_NEED_MULTIPLE_NODES +#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK extern int __init pcpu_lpage_build_unit_map( size_t static_size, size_t reserved_size, ssize_t *dyn_sizep, size_t *unit_sizep, @@ -98,27 +102,6 @@ extern ssize_t __init pcpu_lpage_first_chunk( extern void *pcpu_lpage_remapped(void *kaddr); #else -static inline int pcpu_lpage_build_unit_map( - size_t static_size, size_t reserved_size, - ssize_t *dyn_sizep, size_t *unit_sizep, - size_t lpage_size, int *unit_map, - pcpu_fc_cpu_distance_fn_t cpu_distance_fn) -{ - return -EINVAL; -} - -static inline ssize_t __init pcpu_lpage_first_chunk( - size_t static_size, size_t reserved_size, - size_t dyn_size, size_t unit_size, - size_t lpage_size, const int *unit_map, - int nr_units, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn, - pcpu_fc_map_fn_t map_fn) -{ - return -EINVAL; -} - static inline void *pcpu_lpage_remapped(void *kaddr) { return NULL; diff --git a/mm/percpu.c b/mm/percpu.c index 6feac7934904..7971997de310 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1414,8 +1414,9 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, return pcpu_unit_size; } -static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size, - ssize_t *dyn_sizep) +static inline size_t pcpu_calc_fc_sizes(size_t static_size, + size_t reserved_size, + ssize_t *dyn_sizep) { size_t size_sum; @@ -1427,6 +1428,8 @@ static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size, return size_sum; } +#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ + !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) /** * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem * @static_size: the size of static percpu area in bytes @@ -1495,7 +1498,10 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, unit_size, base, NULL); } +#endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || + !CONFIG_HAVE_SETUP_PER_CPU_AREA */ +#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK /** * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages * @static_size: the size of static percpu area in bytes @@ -1598,12 +1604,9 @@ out_free_ar: free_bootmem(__pa(pages), pages_size); return ret; } +#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ -/* - * Large page remapping first chunk setup helper - */ -#ifdef CONFIG_NEED_MULTIPLE_NODES - +#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK /** * pcpu_lpage_build_unit_map - build unit_map for large page remapping * @static_size: the size of static percpu area in bytes @@ -1982,7 +1985,7 @@ void *pcpu_lpage_remapped(void *kaddr) return NULL; } -#endif +#endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */ /* * Generic percpu area setup. -- cgit v1.2.3 From f58dc01ba2ca9fe3ab2ba4ca43d9c8a735cf62d8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:50 +0900 Subject: percpu: generalize first chunk allocator selection Now that all first chunk allocators are in mm/percpu.c, it makes sense to make generalize percpu_alloc kernel parameter. Define PCPU_FC_* and set pcpu_chosen_fc using early_param() in mm/percpu.c. Arch code can use the set value to determine which first chunk allocator to use. Signed-off-by: Tejun Heo --- Documentation/kernel-parameters.txt | 11 ++++++----- arch/x86/kernel/setup_percpu.c | 24 ++++++------------------ include/linux/percpu.h | 12 ++++++++++++ mm/percpu.c | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 12e9eb77ee0d..dee9ce2e6cfa 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1919,11 +1919,12 @@ and is between 256 and 4096 characters. It is defined in the file Format: { 0 | 1 } See arch/parisc/kernel/pdc_chassis.c - percpu_alloc= [X86] Select which percpu first chunk allocator to use. - Allowed values are one of "lpage", "embed" and "page". - See comments in arch/x86/kernel/setup_percpu.c for - details on each allocator. This parameter is primarily - for debugging and performance comparison. + percpu_alloc= Select which percpu first chunk allocator to use. + Currently supported values are "embed", "page" and + "lpage". Archs may support subset or none of the + selections. See comments in mm/percpu.c for details + on each allocator. This parameter is primarily for + debugging and performance comparison. pf. [PARIDE] See Documentation/blockdev/paride.txt. diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 1e17711c29d6..b961d99e6416 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -267,16 +267,6 @@ static ssize_t __init setup_pcpu_page(size_t static_size) pcpup_populate_pte); } -/* for explicit first chunk allocator selection */ -static char pcpu_chosen_alloc[16] __initdata; - -static int __init percpu_alloc_setup(char *str) -{ - strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1); - return 0; -} -early_param("percpu_alloc", percpu_alloc_setup); - static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 @@ -307,19 +297,17 @@ void __init setup_per_cpu_areas(void) * each allocator for details. */ ret = -EINVAL; - if (strlen(pcpu_chosen_alloc)) { - if (strcmp(pcpu_chosen_alloc, "page")) { - if (!strcmp(pcpu_chosen_alloc, "lpage")) + if (pcpu_chosen_fc != PCPU_FC_AUTO) { + if (pcpu_chosen_fc != PCPU_FC_PAGE) { + if (pcpu_chosen_fc == PCPU_FC_LPAGE) ret = setup_pcpu_lpage(static_size, true); - else if (!strcmp(pcpu_chosen_alloc, "embed")) - ret = setup_pcpu_embed(static_size, true); else - pr_warning("PERCPU: unknown allocator %s " - "specified\n", pcpu_chosen_alloc); + ret = setup_pcpu_embed(static_size, true); + if (ret < 0) pr_warning("PERCPU: %s allocator failed (%zd), " "falling back to page size\n", - pcpu_chosen_alloc, ret); + pcpu_fc_names[pcpu_chosen_fc], ret); } } else { ret = setup_pcpu_lpage(static_size, false); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index e26788e0da4a..9be05cbe5ee0 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -59,6 +59,18 @@ extern void *pcpu_base_addr; extern const int *pcpu_unit_map; +enum pcpu_fc { + PCPU_FC_AUTO, + PCPU_FC_EMBED, + PCPU_FC_PAGE, + PCPU_FC_LPAGE, + + PCPU_FC_NR, +}; +extern const char *pcpu_fc_names[PCPU_FC_NR]; + +extern enum pcpu_fc pcpu_chosen_fc; + typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); diff --git a/mm/percpu.c b/mm/percpu.c index 7971997de310..7fb40bb1555a 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1414,6 +1414,38 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, return pcpu_unit_size; } +const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { + [PCPU_FC_AUTO] = "auto", + [PCPU_FC_EMBED] = "embed", + [PCPU_FC_PAGE] = "page", + [PCPU_FC_LPAGE] = "lpage", +}; + +enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO; + +static int __init percpu_alloc_setup(char *str) +{ + if (0) + /* nada */; +#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK + else if (!strcmp(str, "embed")) + pcpu_chosen_fc = PCPU_FC_EMBED; +#endif +#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK + else if (!strcmp(str, "page")) + pcpu_chosen_fc = PCPU_FC_PAGE; +#endif +#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK + else if (!strcmp(str, "lpage")) + pcpu_chosen_fc = PCPU_FC_LPAGE; +#endif + else + pr_warning("PERCPU: unknown allocator %s specified\n", str); + + return 0; +} +early_param("percpu_alloc", percpu_alloc_setup); + static inline size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size, ssize_t *dyn_sizep) -- cgit v1.2.3 From 9a7737691e90d3cce0e5248f91826c50e5aa3fcf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:50 +0900 Subject: percpu: drop @static_size from first chunk allocators First chunk allocators assume percpu areas have been linked using one of PERCPU_*() macros and depend on __per_cpu_load symbol defined by those macros, so there isn't much point in passing in static area size explicitly when it can be easily calculated from __per_cpu_start and __per_cpu_end. Drop @static_size from all percpu first chunk allocators and helpers. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 34 +++++++++++++++------------------- include/linux/percpu.h | 18 ++++++++---------- mm/percpu.c | 29 +++++++++++++---------------- 3 files changed, 36 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index b961d99e6416..8aad486c688f 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -157,7 +157,7 @@ static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } -static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) +static ssize_t __init setup_pcpu_lpage(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; @@ -184,8 +184,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) return -ENOMEM; } - ret = pcpu_lpage_build_unit_map(static_size, - PERCPU_FIRST_CHUNK_RESERVE, + ret = pcpu_lpage_build_unit_map(PERCPU_FIRST_CHUNK_RESERVE, &dyn_size, &unit_size, PMD_SIZE, unit_map, pcpu_lpage_cpu_distance); if (ret < 0) { @@ -208,9 +207,8 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) } } - ret = pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, - dyn_size, unit_size, PMD_SIZE, - unit_map, nr_units, + ret = pcpu_lpage_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, + unit_size, PMD_SIZE, unit_map, nr_units, pcpu_fc_alloc, pcpu_fc_free, pcpul_map); out_free: if (ret < 0) @@ -218,7 +216,7 @@ out_free: return ret; } #else -static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) +static ssize_t __init setup_pcpu_lpage(bool chosen) { return -EINVAL; } @@ -232,7 +230,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) * mapping so that it can use PMD mapping without additional TLB * pressure. */ -static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) +static ssize_t __init setup_pcpu_embed(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -244,7 +242,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) if (!chosen && (!cpu_has_pse || pcpu_need_numa())) return -EINVAL; - return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + return pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, reserve - PERCPU_FIRST_CHUNK_RESERVE); } @@ -260,9 +258,9 @@ static void __init pcpup_populate_pte(unsigned long addr) populate_extra_pte(addr); } -static ssize_t __init setup_pcpu_page(size_t static_size) +static ssize_t __init setup_pcpu_page(void) { - return pcpu_page_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + return pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, pcpu_fc_alloc, pcpu_fc_free, pcpup_populate_pte); } @@ -282,7 +280,6 @@ static inline void setup_percpu_segment(int cpu) void __init setup_per_cpu_areas(void) { - size_t static_size = __per_cpu_end - __per_cpu_start; unsigned int cpu; unsigned long delta; size_t pcpu_unit_size; @@ -300,9 +297,9 @@ void __init setup_per_cpu_areas(void) if (pcpu_chosen_fc != PCPU_FC_AUTO) { if (pcpu_chosen_fc != PCPU_FC_PAGE) { if (pcpu_chosen_fc == PCPU_FC_LPAGE) - ret = setup_pcpu_lpage(static_size, true); + ret = setup_pcpu_lpage(true); else - ret = setup_pcpu_embed(static_size, true); + ret = setup_pcpu_embed(true); if (ret < 0) pr_warning("PERCPU: %s allocator failed (%zd), " @@ -310,15 +307,14 @@ void __init setup_per_cpu_areas(void) pcpu_fc_names[pcpu_chosen_fc], ret); } } else { - ret = setup_pcpu_lpage(static_size, false); + ret = setup_pcpu_lpage(false); if (ret < 0) - ret = setup_pcpu_embed(static_size, false); + ret = setup_pcpu_embed(false); } if (ret < 0) - ret = setup_pcpu_page(static_size); + ret = setup_pcpu_page(); if (ret < 0) - panic("cannot allocate static percpu area (%zu bytes, err=%zd)", - static_size, ret); + panic("cannot initialize percpu area (err=%zd)", ret); pcpu_unit_size = ret; diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 9be05cbe5ee0..be2fc8fb9b6f 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -84,13 +84,12 @@ extern size_t __init pcpu_setup_first_chunk( #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK extern ssize_t __init pcpu_embed_first_chunk( - size_t static_size, size_t reserved_size, - ssize_t dyn_size); + size_t reserved_size, ssize_t dyn_size); #endif #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK extern ssize_t __init pcpu_page_first_chunk( - size_t static_size, size_t reserved_size, + size_t reserved_size, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_populate_pte_fn_t populate_pte_fn); @@ -98,16 +97,15 @@ extern ssize_t __init pcpu_page_first_chunk( #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK extern int __init pcpu_lpage_build_unit_map( - size_t static_size, size_t reserved_size, - ssize_t *dyn_sizep, size_t *unit_sizep, - size_t lpage_size, int *unit_map, + size_t reserved_size, ssize_t *dyn_sizep, + size_t *unit_sizep, size_t lpage_size, + int *unit_map, pcpu_fc_cpu_distance_fn_t cpu_distance_fn); extern ssize_t __init pcpu_lpage_first_chunk( - size_t static_size, size_t reserved_size, - size_t dyn_size, size_t unit_size, - size_t lpage_size, const int *unit_map, - int nr_units, + size_t reserved_size, size_t dyn_size, + size_t unit_size, size_t lpage_size, + const int *unit_map, int nr_units, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn); diff --git a/mm/percpu.c b/mm/percpu.c index 7fb40bb1555a..e2ac58a39bb2 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1464,7 +1464,6 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size, !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) /** * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem - * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes * @dyn_size: free size for dynamic allocation in bytes, -1 for auto * @@ -1489,9 +1488,9 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size, * The determined pcpu_unit_size which can be used to initialize * percpu access on success, -errno on failure. */ -ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, - ssize_t dyn_size) +ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) { + const size_t static_size = __per_cpu_end - __per_cpu_start; size_t size_sum, unit_size, chunk_size; void *base; unsigned int cpu; @@ -1536,7 +1535,6 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK /** * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages - * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE * @free_fn: funtion to free percpu page, always called with PAGE_SIZE @@ -1552,12 +1550,13 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, * The determined pcpu_unit_size which can be used to initialize * percpu access on success, -errno on failure. */ -ssize_t __init pcpu_page_first_chunk(size_t static_size, size_t reserved_size, +ssize_t __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_populate_pte_fn_t populate_pte_fn) { static struct vm_struct vm; + const size_t static_size = __per_cpu_end - __per_cpu_start; char psize_str[16]; int unit_pages; size_t pages_size; @@ -1641,7 +1640,6 @@ out_free_ar: #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK /** * pcpu_lpage_build_unit_map - build unit_map for large page remapping - * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes * @dyn_sizep: in/out parameter for dynamic size, -1 for auto * @unit_sizep: out parameter for unit size @@ -1661,13 +1659,14 @@ out_free_ar: * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and * returns the number of units to be allocated. -errno on failure. */ -int __init pcpu_lpage_build_unit_map(size_t static_size, size_t reserved_size, - ssize_t *dyn_sizep, size_t *unit_sizep, - size_t lpage_size, int *unit_map, +int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, + size_t *unit_sizep, size_t lpage_size, + int *unit_map, pcpu_fc_cpu_distance_fn_t cpu_distance_fn) { static int group_map[NR_CPUS] __initdata; static int group_cnt[NR_CPUS] __initdata; + const size_t static_size = __per_cpu_end - __per_cpu_start; int group_cnt_max = 0; size_t size_sum, min_unit_size, alloc_size; int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ @@ -1819,7 +1818,6 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, /** * pcpu_lpage_first_chunk - remap the first percpu chunk using large page - * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes * @dyn_size: free size for dynamic allocation in bytes * @unit_size: unit size in bytes @@ -1850,15 +1848,15 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, * The determined pcpu_unit_size which can be used to initialize * percpu access on success, -errno on failure. */ -ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, - size_t dyn_size, size_t unit_size, - size_t lpage_size, const int *unit_map, - int nr_units, +ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, + size_t unit_size, size_t lpage_size, + const int *unit_map, int nr_units, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn) { static struct vm_struct vm; + const size_t static_size = __per_cpu_end - __per_cpu_start; size_t chunk_size = unit_size * nr_units; size_t map_size; unsigned int cpu; @@ -2037,7 +2035,6 @@ EXPORT_SYMBOL(__per_cpu_offset); void __init setup_per_cpu_areas(void) { - size_t static_size = __per_cpu_end - __per_cpu_start; ssize_t unit_size; unsigned long delta; unsigned int cpu; @@ -2046,7 +2043,7 @@ void __init setup_per_cpu_areas(void) * Always reserve area for module percpu variables. That's * what the legacy allocator did. */ - unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE, + unit_size = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE); if (unit_size < 0) panic("Failed to initialized percpu areas."); -- cgit v1.2.3 From 1d9d32572163b30be81dbe1409dfa7ea9763d0e8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:50 +0900 Subject: percpu: make @dyn_size mandatory for pcpu_setup_first_chunk() Now that all actual first chunk allocation and copying happen in the first chunk allocators and helpers, there's no reason for pcpu_setup_first_chunk() to try to determine @dyn_size automatically. The only left user is page first chunk allocator. Make it determine dyn_size like other allocators and make @dyn_size mandatory for pcpu_setup_first_chunk(). Signed-off-by: Tejun Heo --- include/linux/percpu.h | 2 +- mm/percpu.c | 39 +++++++++++++++++++-------------------- 2 files changed, 20 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index be2fc8fb9b6f..0cfdd14d096a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -79,7 +79,7 @@ typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); extern size_t __init pcpu_setup_first_chunk( size_t static_size, size_t reserved_size, - ssize_t dyn_size, size_t unit_size, + size_t dyn_size, size_t unit_size, void *base_addr, const int *unit_map); #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK diff --git a/mm/percpu.c b/mm/percpu.c index e2ac58a39bb2..287f59cc5fb9 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1235,7 +1235,7 @@ EXPORT_SYMBOL_GPL(free_percpu); * pcpu_setup_first_chunk - initialize the first percpu chunk * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes, 0 for none - * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @dyn_size: free size for dynamic allocation in bytes * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE * @base_addr: mapped address * @unit_map: cpu -> unit map, NULL for sequential mapping @@ -1252,10 +1252,9 @@ EXPORT_SYMBOL_GPL(free_percpu); * limited offset range for symbol relocations to guarantee module * percpu symbols fall inside the relocatable range. * - * @dyn_size, if non-negative, determines the number of bytes - * available for dynamic allocation in the first chunk. Specifying - * non-negative value makes percpu leave alone the area beyond - * @static_size + @reserved_size + @dyn_size. + * @dyn_size determines the number of bytes available for dynamic + * allocation in the first chunk. The area between @static_size + + * @reserved_size + @dyn_size and @unit_size is unused. * * @unit_size specifies unit size and must be aligned to PAGE_SIZE and * equal to or larger than @static_size + @reserved_size + if @@ -1276,13 +1275,12 @@ EXPORT_SYMBOL_GPL(free_percpu); * percpu access. */ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, - ssize_t dyn_size, size_t unit_size, + size_t dyn_size, size_t unit_size, void *base_addr, const int *unit_map) { static struct vm_struct first_vm; static int smap[2], dmap[2]; - size_t size_sum = static_size + reserved_size + - (dyn_size >= 0 ? dyn_size : 0); + size_t size_sum = static_size + reserved_size + dyn_size; struct pcpu_chunk *schunk, *dchunk = NULL; unsigned int cpu, tcpu; int i; @@ -1345,9 +1343,6 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); - if (dyn_size < 0) - dyn_size = pcpu_unit_size - static_size - reserved_size; - first_vm.flags = VM_ALLOC; first_vm.size = pcpu_chunk_size; first_vm.addr = base_addr; @@ -1557,6 +1552,8 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, { static struct vm_struct vm; const size_t static_size = __per_cpu_end - __per_cpu_start; + ssize_t dyn_size = -1; + size_t size_sum, unit_size; char psize_str[16]; int unit_pages; size_t pages_size; @@ -1567,8 +1564,9 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); - unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size, - PCPU_MIN_UNIT_SIZE)); + size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); + unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); + unit_pages = unit_size >> PAGE_SHIFT; /* unaligned allocations can't be freed, round up to page size */ pages_size = PFN_ALIGN(unit_pages * nr_cpu_ids * sizeof(pages[0])); @@ -1591,12 +1589,12 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, /* allocate vm area, map the pages and copy static data */ vm.flags = VM_ALLOC; - vm.size = nr_cpu_ids * unit_pages << PAGE_SHIFT; + vm.size = nr_cpu_ids * unit_size; vm_area_register_early(&vm, PAGE_SIZE); for_each_possible_cpu(cpu) { - unsigned long unit_addr = (unsigned long)vm.addr + - (cpu * unit_pages << PAGE_SHIFT); + unsigned long unit_addr = + (unsigned long)vm.addr + cpu * unit_size; for (i = 0; i < unit_pages; i++) populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); @@ -1620,11 +1618,12 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, } /* we're ready, commit */ - pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu\n", - unit_pages, psize_str, vm.addr, static_size, reserved_size); + pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n", + unit_pages, psize_str, vm.addr, static_size, reserved_size, + dyn_size); - ret = pcpu_setup_first_chunk(static_size, reserved_size, -1, - unit_pages << PAGE_SHIFT, vm.addr, NULL); + ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, + unit_size, vm.addr, NULL); goto out_free_ar; enomem: -- cgit v1.2.3 From 3cbc85652767c38b252c8de55f9fd180b29e4c0d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:50 +0900 Subject: percpu: add @align to pcpu_fc_alloc_fn_t pcpu_fc_alloc_fn_t is about to see more interesting usage, add @align parameter. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 4 ++-- include/linux/percpu.h | 3 ++- mm/percpu.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 8aad486c688f..660cde133141 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -126,9 +126,9 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, /* * Helpers for first chunk memory allocation */ -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size) +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) { - return pcpu_alloc_bootmem(cpu, size, size); + return pcpu_alloc_bootmem(cpu, size, align); } static void __init pcpu_fc_free(void *ptr, size_t size) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 0cfdd14d096a..d385dbcf190b 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -71,7 +71,8 @@ extern const char *pcpu_fc_names[PCPU_FC_NR]; extern enum pcpu_fc pcpu_chosen_fc; -typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); +typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, + size_t align); typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); diff --git a/mm/percpu.c b/mm/percpu.c index 287f59cc5fb9..3316e3aac7ee 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1578,7 +1578,7 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, for (i = 0; i < unit_pages; i++) { void *ptr; - ptr = alloc_fn(cpu, PAGE_SIZE); + ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE); if (!ptr) { pr_warning("PERCPU: failed to allocate %s page " "for cpu%u\n", psize_str, cpu); @@ -1888,7 +1888,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, goto found; continue; found: - ptr = alloc_fn(cpu, lpage_size); + ptr = alloc_fn(cpu, lpage_size, lpage_size); if (!ptr) { pr_warning("PERCPU: failed to allocate large page " "for cpu%u\n", cpu); -- cgit v1.2.3 From 033e48fb82958053113178264ddb9d5038d5e38b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:51 +0900 Subject: percpu: move pcpu_lpage_build_unit_map() and pcpul_lpage_dump_cfg() upward Unit map handling will be generalized and extended and used for embedding sparse first chunk and other purposes. Relocate two unit_map related functions upward in preparation. This patch just moves the code without any actual change. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 14 +- mm/percpu.c | 339 +++++++++++++++++++++++++------------------------ 2 files changed, 180 insertions(+), 173 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d385dbcf190b..570fb18de2ba 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -78,6 +78,14 @@ typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); +#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK +extern int __init pcpu_lpage_build_unit_map( + size_t reserved_size, ssize_t *dyn_sizep, + size_t *unit_sizep, size_t lpage_size, + int *unit_map, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn); +#endif + extern size_t __init pcpu_setup_first_chunk( size_t static_size, size_t reserved_size, size_t dyn_size, size_t unit_size, @@ -97,12 +105,6 @@ extern ssize_t __init pcpu_page_first_chunk( #endif #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK -extern int __init pcpu_lpage_build_unit_map( - size_t reserved_size, ssize_t *dyn_sizep, - size_t *unit_sizep, size_t lpage_size, - int *unit_map, - pcpu_fc_cpu_distance_fn_t cpu_distance_fn); - extern ssize_t __init pcpu_lpage_first_chunk( size_t reserved_size, size_t dyn_size, size_t unit_size, size_t lpage_size, diff --git a/mm/percpu.c b/mm/percpu.c index 3316e3aac7ee..2b9c4b2a2fc0 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1231,6 +1231,178 @@ void free_percpu(void *ptr) } EXPORT_SYMBOL_GPL(free_percpu); +static inline size_t pcpu_calc_fc_sizes(size_t static_size, + size_t reserved_size, + ssize_t *dyn_sizep) +{ + size_t size_sum; + + size_sum = PFN_ALIGN(static_size + reserved_size + + (*dyn_sizep >= 0 ? *dyn_sizep : 0)); + if (*dyn_sizep != 0) + *dyn_sizep = size_sum - static_size - reserved_size; + + return size_sum; +} + +#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK +/** + * pcpu_lpage_build_unit_map - build unit_map for large page remapping + * @reserved_size: the size of reserved percpu area in bytes + * @dyn_sizep: in/out parameter for dynamic size, -1 for auto + * @unit_sizep: out parameter for unit size + * @unit_map: unit_map to be filled + * @cpu_distance_fn: callback to determine distance between cpus + * + * This function builds cpu -> unit map and determine other parameters + * considering needed percpu size, large page size and distances + * between CPUs in NUMA. + * + * CPUs which are of LOCAL_DISTANCE both ways are grouped together and + * may share units in the same large page. The returned configuration + * is guaranteed to have CPUs on different nodes on different large + * pages and >=75% usage of allocated virtual address space. + * + * RETURNS: + * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and + * returns the number of units to be allocated. -errno on failure. + */ +int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, + size_t *unit_sizep, size_t lpage_size, + int *unit_map, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn) +{ + static int group_map[NR_CPUS] __initdata; + static int group_cnt[NR_CPUS] __initdata; + const size_t static_size = __per_cpu_end - __per_cpu_start; + int group_cnt_max = 0; + size_t size_sum, min_unit_size, alloc_size; + int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ + int last_allocs; + unsigned int cpu, tcpu; + int group, unit; + + /* + * Determine min_unit_size, alloc_size and max_upa such that + * alloc_size is multiple of lpage_size and is the smallest + * which can accomodate 4k aligned segments which are equal to + * or larger than min_unit_size. + */ + size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep); + min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); + + alloc_size = roundup(min_unit_size, lpage_size); + upa = alloc_size / min_unit_size; + while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) + upa--; + max_upa = upa; + + /* group cpus according to their proximity */ + for_each_possible_cpu(cpu) { + group = 0; + next_group: + for_each_possible_cpu(tcpu) { + if (cpu == tcpu) + break; + if (group_map[tcpu] == group && + (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || + cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { + group++; + goto next_group; + } + } + group_map[cpu] = group; + group_cnt[group]++; + group_cnt_max = max(group_cnt_max, group_cnt[group]); + } + + /* + * Expand unit size until address space usage goes over 75% + * and then as much as possible without using more address + * space. + */ + last_allocs = INT_MAX; + for (upa = max_upa; upa; upa--) { + int allocs = 0, wasted = 0; + + if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) + continue; + + for (group = 0; group_cnt[group]; group++) { + int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); + allocs += this_allocs; + wasted += this_allocs * upa - group_cnt[group]; + } + + /* + * Don't accept if wastage is over 25%. The + * greater-than comparison ensures upa==1 always + * passes the following check. + */ + if (wasted > num_possible_cpus() / 3) + continue; + + /* and then don't consume more memory */ + if (allocs > last_allocs) + break; + last_allocs = allocs; + best_upa = upa; + } + *unit_sizep = alloc_size / best_upa; + + /* assign units to cpus accordingly */ + unit = 0; + for (group = 0; group_cnt[group]; group++) { + for_each_possible_cpu(cpu) + if (group_map[cpu] == group) + unit_map[cpu] = unit++; + unit = roundup(unit, best_upa); + } + + return unit; /* unit contains aligned number of units */ +} + +static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, + unsigned int *cpup); + +static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, + size_t reserved_size, size_t dyn_size, + size_t unit_size, size_t lpage_size, + const int *unit_map, int nr_units) +{ + int width = 1, v = nr_units; + char empty_str[] = "--------"; + int upl, lpl; /* units per lpage, lpage per line */ + unsigned int cpu; + int lpage, unit; + + while (v /= 10) + width++; + empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0'; + + upl = max_t(int, lpage_size / unit_size, 1); + lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1)); + + printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl, + static_size, reserved_size, dyn_size, unit_size, lpage_size); + + for (lpage = 0, unit = 0; unit < nr_units; unit++) { + if (!(unit % upl)) { + if (!(lpage++ % lpl)) { + printk("\n"); + printk("%spcpu-lpage: ", lvl); + } else + printk("| "); + } + if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) + printk("%0*d ", width, cpu); + else + printk("%s ", empty_str); + } + printk("\n"); +} +#endif + /** * pcpu_setup_first_chunk - initialize the first percpu chunk * @static_size: the size of static percpu area in bytes @@ -1441,20 +1613,6 @@ static int __init percpu_alloc_setup(char *str) } early_param("percpu_alloc", percpu_alloc_setup); -static inline size_t pcpu_calc_fc_sizes(size_t static_size, - size_t reserved_size, - ssize_t *dyn_sizep) -{ - size_t size_sum; - - size_sum = PFN_ALIGN(static_size + reserved_size + - (*dyn_sizep >= 0 ? *dyn_sizep : 0)); - if (*dyn_sizep != 0) - *dyn_sizep = size_sum - static_size - reserved_size; - - return size_sum; -} - #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) /** @@ -1637,122 +1795,6 @@ out_free_ar: #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK -/** - * pcpu_lpage_build_unit_map - build unit_map for large page remapping - * @reserved_size: the size of reserved percpu area in bytes - * @dyn_sizep: in/out parameter for dynamic size, -1 for auto - * @unit_sizep: out parameter for unit size - * @unit_map: unit_map to be filled - * @cpu_distance_fn: callback to determine distance between cpus - * - * This function builds cpu -> unit map and determine other parameters - * considering needed percpu size, large page size and distances - * between CPUs in NUMA. - * - * CPUs which are of LOCAL_DISTANCE both ways are grouped together and - * may share units in the same large page. The returned configuration - * is guaranteed to have CPUs on different nodes on different large - * pages and >=75% usage of allocated virtual address space. - * - * RETURNS: - * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and - * returns the number of units to be allocated. -errno on failure. - */ -int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, - size_t *unit_sizep, size_t lpage_size, - int *unit_map, - pcpu_fc_cpu_distance_fn_t cpu_distance_fn) -{ - static int group_map[NR_CPUS] __initdata; - static int group_cnt[NR_CPUS] __initdata; - const size_t static_size = __per_cpu_end - __per_cpu_start; - int group_cnt_max = 0; - size_t size_sum, min_unit_size, alloc_size; - int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ - int last_allocs; - unsigned int cpu, tcpu; - int group, unit; - - /* - * Determine min_unit_size, alloc_size and max_upa such that - * alloc_size is multiple of lpage_size and is the smallest - * which can accomodate 4k aligned segments which are equal to - * or larger than min_unit_size. - */ - size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep); - min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); - - alloc_size = roundup(min_unit_size, lpage_size); - upa = alloc_size / min_unit_size; - while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) - upa--; - max_upa = upa; - - /* group cpus according to their proximity */ - for_each_possible_cpu(cpu) { - group = 0; - next_group: - for_each_possible_cpu(tcpu) { - if (cpu == tcpu) - break; - if (group_map[tcpu] == group && - (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || - cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { - group++; - goto next_group; - } - } - group_map[cpu] = group; - group_cnt[group]++; - group_cnt_max = max(group_cnt_max, group_cnt[group]); - } - - /* - * Expand unit size until address space usage goes over 75% - * and then as much as possible without using more address - * space. - */ - last_allocs = INT_MAX; - for (upa = max_upa; upa; upa--) { - int allocs = 0, wasted = 0; - - if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) - continue; - - for (group = 0; group_cnt[group]; group++) { - int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); - allocs += this_allocs; - wasted += this_allocs * upa - group_cnt[group]; - } - - /* - * Don't accept if wastage is over 25%. The - * greater-than comparison ensures upa==1 always - * passes the following check. - */ - if (wasted > num_possible_cpus() / 3) - continue; - - /* and then don't consume more memory */ - if (allocs > last_allocs) - break; - last_allocs = allocs; - best_upa = upa; - } - *unit_sizep = alloc_size / best_upa; - - /* assign units to cpus accordingly */ - unit = 0; - for (group = 0; group_cnt[group]; group++) { - for_each_possible_cpu(cpu) - if (group_map[cpu] == group) - unit_map[cpu] = unit++; - unit = roundup(unit, best_upa); - } - - return unit; /* unit contains aligned number of units */ -} - struct pcpul_ent { void *ptr; void *map_addr; @@ -1778,43 +1820,6 @@ static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, return false; } -static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, - size_t reserved_size, size_t dyn_size, - size_t unit_size, size_t lpage_size, - const int *unit_map, int nr_units) -{ - int width = 1, v = nr_units; - char empty_str[] = "--------"; - int upl, lpl; /* units per lpage, lpage per line */ - unsigned int cpu; - int lpage, unit; - - while (v /= 10) - width++; - empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0'; - - upl = max_t(int, lpage_size / unit_size, 1); - lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1)); - - printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl, - static_size, reserved_size, dyn_size, unit_size, lpage_size); - - for (lpage = 0, unit = 0; unit < nr_units; unit++) { - if (!(unit % upl)) { - if (!(lpage++ % lpl)) { - printk("\n"); - printk("%spcpu-lpage: ", lvl); - } else - printk("| "); - } - if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) - printk("%0*d ", width, cpu); - else - printk("%s ", empty_str); - } - printk("\n"); -} - /** * pcpu_lpage_first_chunk - remap the first percpu chunk using large page * @reserved_size: the size of reserved percpu area in bytes -- cgit v1.2.3 From fd1e8a1fe2b54df6c185b4fa65f181f50b9c4d4e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:51 +0900 Subject: percpu: introduce pcpu_alloc_info and pcpu_group_info Till now, non-linear cpu->unit map was expressed using an integer array which maps each cpu to a unit and used only by lpage allocator. Although how many units have been placed in a single contiguos area (group) is known while building unit_map, the information is lost when the result is recorded into the unit_map array. For lpage allocator, as all allocations are done by lpages and whether two adjacent lpages are in the same group or not is irrelevant, this didn't cause any problem. Non-linear cpu->unit mapping will be used for sparse embedding and this grouping information is necessary for that. This patch introduces pcpu_alloc_info which contains all the information necessary for initializing percpu allocator. pcpu_alloc_info contains array of pcpu_group_info which describes how units are grouped and mapped to cpus. pcpu_group_info also has base_offset field to specify its offset from the chunk's base address. pcpu_build_alloc_info() initializes this field as if all groups are allocated back-to-back as is currently done but this will be used to sparsely place groups. pcpu_alloc_info is a rather complex data structure which contains a flexible array which in turn points to nested cpu_map arrays. * pcpu_alloc_alloc_info() and pcpu_free_alloc_info() are provided to help dealing with pcpu_alloc_info. * pcpu_lpage_build_unit_map() is updated to build pcpu_alloc_info, generalized and renamed to pcpu_build_alloc_info(). @cpu_distance_fn may be NULL indicating that all cpus are of LOCAL_DISTANCE. * pcpul_lpage_dump_cfg() is updated to process pcpu_alloc_info, generalized and renamed to pcpu_dump_alloc_info(). It now also prints which group each alloc unit belongs to. * pcpu_setup_first_chunk() now takes pcpu_alloc_info instead of the separate parameters. All first chunk allocators are updated to use pcpu_build_alloc_info() to build alloc_info and call pcpu_setup_first_chunk() with it. This has the side effect of packing units for sparse possible cpus. ie. if cpus 0, 2 and 4 are possible, they'll be assigned unit 0, 1 and 2 instead of 0, 2 and 4. * x86 setup_pcpu_lpage() is updated to deal with alloc_info. * sparc64 setup_per_cpu_areas() is updated to build alloc_info. Although the changes made by this patch are pretty pervasive, it doesn't cause any behavior difference other than packing of sparse cpus. It mostly changes how information is passed among initialization functions and makes room for more flexibility. Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: David Miller --- arch/sparc/kernel/smp_64.c | 24 +- arch/x86/kernel/setup_percpu.c | 38 ++- include/linux/percpu.h | 42 +++- mm/percpu.c | 529 +++++++++++++++++++++++++---------------- 4 files changed, 389 insertions(+), 244 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 9856d866b77b..a42a4a744d14 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1475,17 +1475,29 @@ static void __init pcpu_map_range(unsigned long start, unsigned long end, void __init setup_per_cpu_areas(void) { - size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start; static struct vm_struct vm; + struct pcpu_alloc_info *ai; unsigned long delta, cpu; size_t size_sum, pcpu_unit_size; size_t ptrs_size; void **ptrs; - size_sum = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + ai = pcpu_alloc_alloc_info(1, nr_cpu_ids); + + ai->static_size = __per_cpu_end - __per_cpu_start; + ai->reserved_size = PERCPU_MODULE_RESERVE; + + size_sum = PFN_ALIGN(ai->static_size + ai->reserved_size + PERCPU_DYNAMIC_RESERVE); - dyn_size = size_sum - static_size - PERCPU_MODULE_RESERVE; + ai->dyn_size = size_sum - ai->static_size - ai->reserved_size; + ai->unit_size = PCPU_CHUNK_SIZE; + ai->atom_size = PCPU_CHUNK_SIZE; + ai->alloc_size = PCPU_CHUNK_SIZE; + ai->groups[0].nr_units = nr_cpu_ids; + + for_each_possible_cpu(cpu) + ai->groups[0].cpu_map[cpu] = cpu; ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(ptrs[0])); ptrs = alloc_bootmem(ptrs_size); @@ -1497,7 +1509,7 @@ void __init setup_per_cpu_areas(void) free_bootmem(__pa(ptrs[cpu] + size_sum), PCPU_CHUNK_SIZE - size_sum); - memcpy(ptrs[cpu], __per_cpu_load, static_size); + memcpy(ptrs[cpu], __per_cpu_load, ai->static_size); } /* allocate address and map */ @@ -1514,9 +1526,7 @@ void __init setup_per_cpu_areas(void) pcpu_map_range(start, end, virt_to_page(ptrs[cpu])); } - pcpu_unit_size = pcpu_setup_first_chunk(static_size, - PERCPU_MODULE_RESERVE, dyn_size, - PCPU_CHUNK_SIZE, vm.addr, NULL); + pcpu_unit_size = pcpu_setup_first_chunk(ai, vm.addr); free_bootmem(__pa(ptrs), ptrs_size); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 660cde133141..db5f9c49fec5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -161,9 +161,7 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; - size_t unit_map_size, unit_size; - int *unit_map; - int nr_units; + struct pcpu_alloc_info *ai; ssize_t ret; /* on non-NUMA, embedding is better */ @@ -177,26 +175,22 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) } /* allocate and build unit_map */ - unit_map_size = nr_cpu_ids * sizeof(int); - unit_map = alloc_bootmem_nopanic(unit_map_size); - if (!unit_map) { - pr_warning("PERCPU: failed to allocate unit_map\n"); - return -ENOMEM; + ai = pcpu_build_alloc_info(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, + PMD_SIZE, pcpu_lpage_cpu_distance); + if (IS_ERR(ai)) { + pr_warning("PERCPU: failed to build unit_map (%ld)\n", + PTR_ERR(ai)); + return PTR_ERR(ai); } - ret = pcpu_lpage_build_unit_map(PERCPU_FIRST_CHUNK_RESERVE, - &dyn_size, &unit_size, PMD_SIZE, - unit_map, pcpu_lpage_cpu_distance); - if (ret < 0) { - pr_warning("PERCPU: failed to build unit_map\n"); - goto out_free; - } - nr_units = ret; - /* do the parameters look okay? */ if (!chosen) { size_t vm_size = VMALLOC_END - VMALLOC_START; - size_t tot_size = nr_units * unit_size; + size_t tot_size = 0; + int group; + + for (group = 0; group < ai->nr_groups; group++) + tot_size += ai->unit_size * ai->groups[group].nr_units; /* don't consume more than 20% of vmalloc area */ if (tot_size > vm_size / 5) { @@ -207,12 +201,10 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) } } - ret = pcpu_lpage_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, - unit_size, PMD_SIZE, unit_map, nr_units, - pcpu_fc_alloc, pcpu_fc_free, pcpul_map); + ret = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, + pcpul_map); out_free: - if (ret < 0) - free_bootmem(__pa(unit_map), unit_map_size); + pcpu_free_alloc_info(ai); return ret; } #else diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 570fb18de2ba..77b86be8ce4f 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -59,6 +59,25 @@ extern void *pcpu_base_addr; extern const int *pcpu_unit_map; +struct pcpu_group_info { + int nr_units; /* aligned # of units */ + unsigned long base_offset; /* base address offset */ + unsigned int *cpu_map; /* unit->cpu map, empty + * entries contain NR_CPUS */ +}; + +struct pcpu_alloc_info { + size_t static_size; + size_t reserved_size; + size_t dyn_size; + size_t unit_size; + size_t atom_size; + size_t alloc_size; + size_t __ai_size; /* internal, don't use */ + int nr_groups; /* 0 if grouping unnecessary */ + struct pcpu_group_info groups[]; +}; + enum pcpu_fc { PCPU_FC_AUTO, PCPU_FC_EMBED, @@ -78,18 +97,17 @@ typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); -#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK -extern int __init pcpu_lpage_build_unit_map( - size_t reserved_size, ssize_t *dyn_sizep, - size_t *unit_sizep, size_t lpage_size, - int *unit_map, +extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, + int nr_units); +extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai); + +extern struct pcpu_alloc_info * __init pcpu_build_alloc_info( + size_t reserved_size, ssize_t dyn_size, + size_t atom_size, pcpu_fc_cpu_distance_fn_t cpu_distance_fn); -#endif -extern size_t __init pcpu_setup_first_chunk( - size_t static_size, size_t reserved_size, - size_t dyn_size, size_t unit_size, - void *base_addr, const int *unit_map); +extern size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, + void *base_addr); #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK extern ssize_t __init pcpu_embed_first_chunk( @@ -106,9 +124,7 @@ extern ssize_t __init pcpu_page_first_chunk( #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK extern ssize_t __init pcpu_lpage_first_chunk( - size_t reserved_size, size_t dyn_size, - size_t unit_size, size_t lpage_size, - const int *unit_map, int nr_units, + const struct pcpu_alloc_info *ai, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn); diff --git a/mm/percpu.c b/mm/percpu.c index 2b9c4b2a2fc0..99f7fa682722 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -58,6 +58,7 @@ #include #include +#include #include #include #include @@ -1245,53 +1246,108 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size, return size_sum; } -#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK /** - * pcpu_lpage_build_unit_map - build unit_map for large page remapping + * pcpu_alloc_alloc_info - allocate percpu allocation info + * @nr_groups: the number of groups + * @nr_units: the number of units + * + * Allocate ai which is large enough for @nr_groups groups containing + * @nr_units units. The returned ai's groups[0].cpu_map points to the + * cpu_map array which is long enough for @nr_units and filled with + * NR_CPUS. It's the caller's responsibility to initialize cpu_map + * pointer of other groups. + * + * RETURNS: + * Pointer to the allocated pcpu_alloc_info on success, NULL on + * failure. + */ +struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, + int nr_units) +{ + struct pcpu_alloc_info *ai; + size_t base_size, ai_size; + void *ptr; + int unit; + + base_size = ALIGN(sizeof(*ai) + nr_groups * sizeof(ai->groups[0]), + __alignof__(ai->groups[0].cpu_map[0])); + ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]); + + ptr = alloc_bootmem_nopanic(PFN_ALIGN(ai_size)); + if (!ptr) + return NULL; + ai = ptr; + ptr += base_size; + + ai->groups[0].cpu_map = ptr; + + for (unit = 0; unit < nr_units; unit++) + ai->groups[0].cpu_map[unit] = NR_CPUS; + + ai->nr_groups = nr_groups; + ai->__ai_size = PFN_ALIGN(ai_size); + + return ai; +} + +/** + * pcpu_free_alloc_info - free percpu allocation info + * @ai: pcpu_alloc_info to free + * + * Free @ai which was allocated by pcpu_alloc_alloc_info(). + */ +void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) +{ + free_bootmem(__pa(ai), ai->__ai_size); +} + +/** + * pcpu_build_alloc_info - build alloc_info considering distances between CPUs * @reserved_size: the size of reserved percpu area in bytes - * @dyn_sizep: in/out parameter for dynamic size, -1 for auto - * @unit_sizep: out parameter for unit size - * @unit_map: unit_map to be filled - * @cpu_distance_fn: callback to determine distance between cpus + * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @atom_size: allocation atom size + * @cpu_distance_fn: callback to determine distance between cpus, optional * - * This function builds cpu -> unit map and determine other parameters - * considering needed percpu size, large page size and distances - * between CPUs in NUMA. + * This function determines grouping of units, their mappings to cpus + * and other parameters considering needed percpu size, allocation + * atom size and distances between CPUs. * - * CPUs which are of LOCAL_DISTANCE both ways are grouped together and - * may share units in the same large page. The returned configuration - * is guaranteed to have CPUs on different nodes on different large - * pages and >=75% usage of allocated virtual address space. + * Groups are always mutliples of atom size and CPUs which are of + * LOCAL_DISTANCE both ways are grouped together and share space for + * units in the same group. The returned configuration is guaranteed + * to have CPUs on different nodes on different groups and >=75% usage + * of allocated virtual address space. * * RETURNS: - * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and - * returns the number of units to be allocated. -errno on failure. + * On success, pointer to the new allocation_info is returned. On + * failure, ERR_PTR value is returned. */ -int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, - size_t *unit_sizep, size_t lpage_size, - int *unit_map, - pcpu_fc_cpu_distance_fn_t cpu_distance_fn) +struct pcpu_alloc_info * __init pcpu_build_alloc_info( + size_t reserved_size, ssize_t dyn_size, + size_t atom_size, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn) { static int group_map[NR_CPUS] __initdata; static int group_cnt[NR_CPUS] __initdata; const size_t static_size = __per_cpu_end - __per_cpu_start; - int group_cnt_max = 0; + int group_cnt_max = 0, nr_groups = 1, nr_units = 0; size_t size_sum, min_unit_size, alloc_size; int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ - int last_allocs; + int last_allocs, group, unit; unsigned int cpu, tcpu; - int group, unit; + struct pcpu_alloc_info *ai; + unsigned int *cpu_map; /* * Determine min_unit_size, alloc_size and max_upa such that - * alloc_size is multiple of lpage_size and is the smallest + * alloc_size is multiple of atom_size and is the smallest * which can accomodate 4k aligned segments which are equal to * or larger than min_unit_size. */ - size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep); + size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); - alloc_size = roundup(min_unit_size, lpage_size); + alloc_size = roundup(min_unit_size, atom_size); upa = alloc_size / min_unit_size; while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) upa--; @@ -1304,10 +1360,11 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, for_each_possible_cpu(tcpu) { if (cpu == tcpu) break; - if (group_map[tcpu] == group && + if (group_map[tcpu] == group && cpu_distance_fn && (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { group++; + nr_groups = max(nr_groups, group + 1); goto next_group; } } @@ -1328,7 +1385,7 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) continue; - for (group = 0; group_cnt[group]; group++) { + for (group = 0; group < nr_groups; group++) { int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); allocs += this_allocs; wasted += this_allocs * upa - group_cnt[group]; @@ -1348,75 +1405,122 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, last_allocs = allocs; best_upa = upa; } - *unit_sizep = alloc_size / best_upa; + upa = best_upa; + + /* allocate and fill alloc_info */ + for (group = 0; group < nr_groups; group++) + nr_units += roundup(group_cnt[group], upa); + + ai = pcpu_alloc_alloc_info(nr_groups, nr_units); + if (!ai) + return ERR_PTR(-ENOMEM); + cpu_map = ai->groups[0].cpu_map; + + for (group = 0; group < nr_groups; group++) { + ai->groups[group].cpu_map = cpu_map; + cpu_map += roundup(group_cnt[group], upa); + } + + ai->static_size = static_size; + ai->reserved_size = reserved_size; + ai->dyn_size = dyn_size; + ai->unit_size = alloc_size / upa; + ai->atom_size = atom_size; + ai->alloc_size = alloc_size; + + for (group = 0, unit = 0; group_cnt[group]; group++) { + struct pcpu_group_info *gi = &ai->groups[group]; + + /* + * Initialize base_offset as if all groups are located + * back-to-back. The caller should update this to + * reflect actual allocation. + */ + gi->base_offset = unit * ai->unit_size; - /* assign units to cpus accordingly */ - unit = 0; - for (group = 0; group_cnt[group]; group++) { for_each_possible_cpu(cpu) if (group_map[cpu] == group) - unit_map[cpu] = unit++; - unit = roundup(unit, best_upa); + gi->cpu_map[gi->nr_units++] = cpu; + gi->nr_units = roundup(gi->nr_units, upa); + unit += gi->nr_units; } + BUG_ON(unit != nr_units); - return unit; /* unit contains aligned number of units */ + return ai; } -static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, - unsigned int *cpup); - -static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, - size_t reserved_size, size_t dyn_size, - size_t unit_size, size_t lpage_size, - const int *unit_map, int nr_units) +/** + * pcpu_dump_alloc_info - print out information about pcpu_alloc_info + * @lvl: loglevel + * @ai: allocation info to dump + * + * Print out information about @ai using loglevel @lvl. + */ +static void pcpu_dump_alloc_info(const char *lvl, + const struct pcpu_alloc_info *ai) { - int width = 1, v = nr_units; + int group_width = 1, cpu_width = 1, width; char empty_str[] = "--------"; - int upl, lpl; /* units per lpage, lpage per line */ - unsigned int cpu; - int lpage, unit; + int alloc = 0, alloc_end = 0; + int group, v; + int upa, apl; /* units per alloc, allocs per line */ + + v = ai->nr_groups; + while (v /= 10) + group_width++; + v = num_possible_cpus(); while (v /= 10) - width++; - empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0'; + cpu_width++; + empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0'; - upl = max_t(int, lpage_size / unit_size, 1); - lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1)); + upa = ai->alloc_size / ai->unit_size; + width = upa * (cpu_width + 1) + group_width + 3; + apl = rounddown_pow_of_two(max(60 / width, 1)); - printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl, - static_size, reserved_size, dyn_size, unit_size, lpage_size); + printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu", + lvl, ai->static_size, ai->reserved_size, ai->dyn_size, + ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size); - for (lpage = 0, unit = 0; unit < nr_units; unit++) { - if (!(unit % upl)) { - if (!(lpage++ % lpl)) { + for (group = 0; group < ai->nr_groups; group++) { + const struct pcpu_group_info *gi = &ai->groups[group]; + int unit = 0, unit_end = 0; + + BUG_ON(gi->nr_units % upa); + for (alloc_end += gi->nr_units / upa; + alloc < alloc_end; alloc++) { + if (!(alloc % apl)) { printk("\n"); - printk("%spcpu-lpage: ", lvl); - } else - printk("| "); + printk("%spcpu-alloc: ", lvl); + } + printk("[%0*d] ", group_width, group); + + for (unit_end += upa; unit < unit_end; unit++) + if (gi->cpu_map[unit] != NR_CPUS) + printk("%0*d ", cpu_width, + gi->cpu_map[unit]); + else + printk("%s ", empty_str); } - if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) - printk("%0*d ", width, cpu); - else - printk("%s ", empty_str); } printk("\n"); } -#endif /** * pcpu_setup_first_chunk - initialize the first percpu chunk - * @static_size: the size of static percpu area in bytes - * @reserved_size: the size of reserved percpu area in bytes, 0 for none - * @dyn_size: free size for dynamic allocation in bytes - * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE + * @ai: pcpu_alloc_info describing how to percpu area is shaped * @base_addr: mapped address - * @unit_map: cpu -> unit map, NULL for sequential mapping * * Initialize the first percpu chunk which contains the kernel static * perpcu area. This function is to be called from arch percpu area * setup path. * - * @reserved_size, if non-zero, specifies the amount of bytes to + * @ai contains all information necessary to initialize the first + * chunk and prime the dynamic percpu allocator. + * + * @ai->static_size is the size of static percpu area. + * + * @ai->reserved_size, if non-zero, specifies the amount of bytes to * reserve after the static area in the first chunk. This reserves * the first chunk such that it's available only through reserved * percpu allocation. This is primarily used to serve module percpu @@ -1424,13 +1528,26 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, * limited offset range for symbol relocations to guarantee module * percpu symbols fall inside the relocatable range. * - * @dyn_size determines the number of bytes available for dynamic - * allocation in the first chunk. The area between @static_size + - * @reserved_size + @dyn_size and @unit_size is unused. + * @ai->dyn_size determines the number of bytes available for dynamic + * allocation in the first chunk. The area between @ai->static_size + + * @ai->reserved_size + @ai->dyn_size and @ai->unit_size is unused. * - * @unit_size specifies unit size and must be aligned to PAGE_SIZE and - * equal to or larger than @static_size + @reserved_size + if - * non-negative, @dyn_size. + * @ai->unit_size specifies unit size and must be aligned to PAGE_SIZE + * and equal to or larger than @ai->static_size + @ai->reserved_size + + * @ai->dyn_size. + * + * @ai->atom_size is the allocation atom size and used as alignment + * for vm areas. + * + * @ai->alloc_size is the allocation size and always multiple of + * @ai->atom_size. This is larger than @ai->atom_size if + * @ai->unit_size is larger than @ai->atom_size. + * + * @ai->nr_groups and @ai->groups describe virtual memory layout of + * percpu areas. Units which should be colocated are put into the + * same group. Dynamic VM areas will be allocated according to these + * groupings. If @ai->nr_groups is zero, a single group containing + * all units is assumed. * * The caller should have mapped the first chunk at @base_addr and * copied static data to each unit. @@ -1446,70 +1563,63 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, * The determined pcpu_unit_size which can be used to initialize * percpu access. */ -size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, - size_t dyn_size, size_t unit_size, - void *base_addr, const int *unit_map) +size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, + void *base_addr) { static struct vm_struct first_vm; static int smap[2], dmap[2]; - size_t size_sum = static_size + reserved_size + dyn_size; + size_t dyn_size = ai->dyn_size; + size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; struct pcpu_chunk *schunk, *dchunk = NULL; - unsigned int cpu, tcpu; - int i; + unsigned int cpu; + int *unit_map; + int group, unit, i; /* sanity checks */ BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); - BUG_ON(!static_size); + BUG_ON(ai->nr_groups <= 0); + BUG_ON(!ai->static_size); BUG_ON(!base_addr); - BUG_ON(unit_size < size_sum); - BUG_ON(unit_size & ~PAGE_MASK); - BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); + BUG_ON(ai->unit_size < size_sum); + BUG_ON(ai->unit_size & ~PAGE_MASK); + BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); + + pcpu_dump_alloc_info(KERN_DEBUG, ai); /* determine number of units and verify and initialize pcpu_unit_map */ - if (unit_map) { - int first_unit = INT_MAX, last_unit = INT_MIN; - - for_each_possible_cpu(cpu) { - int unit = unit_map[cpu]; - - BUG_ON(unit < 0); - for_each_possible_cpu(tcpu) { - if (tcpu == cpu) - break; - /* the mapping should be one-to-one */ - BUG_ON(unit_map[tcpu] == unit); - } + unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0])); - if (unit < first_unit) { - pcpu_first_unit_cpu = cpu; - first_unit = unit; - } - if (unit > last_unit) { - pcpu_last_unit_cpu = cpu; - last_unit = unit; - } - } - pcpu_nr_units = last_unit + 1; - pcpu_unit_map = unit_map; - } else { - int *identity_map; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + unit_map[cpu] = NR_CPUS; + pcpu_first_unit_cpu = NR_CPUS; - /* #units == #cpus, identity mapped */ - identity_map = alloc_bootmem(nr_cpu_ids * - sizeof(identity_map[0])); + for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { + const struct pcpu_group_info *gi = &ai->groups[group]; - for_each_possible_cpu(cpu) - identity_map[cpu] = cpu; + for (i = 0; i < gi->nr_units; i++) { + cpu = gi->cpu_map[i]; + if (cpu == NR_CPUS) + continue; - pcpu_first_unit_cpu = 0; - pcpu_last_unit_cpu = pcpu_nr_units - 1; - pcpu_nr_units = nr_cpu_ids; - pcpu_unit_map = identity_map; + BUG_ON(cpu > nr_cpu_ids || !cpu_possible(cpu)); + BUG_ON(unit_map[cpu] != NR_CPUS); + + unit_map[cpu] = unit + i; + if (pcpu_first_unit_cpu == NR_CPUS) + pcpu_first_unit_cpu = cpu; + } } + pcpu_last_unit_cpu = cpu; + pcpu_nr_units = unit; + + for_each_possible_cpu(cpu) + BUG_ON(unit_map[cpu] == NR_CPUS); + + pcpu_unit_map = unit_map; /* determine basic parameters */ - pcpu_unit_pages = unit_size >> PAGE_SHIFT; + pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = pcpu_nr_units * pcpu_unit_size; pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + @@ -1543,17 +1653,17 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, schunk->immutable = true; bitmap_fill(schunk->populated, pcpu_unit_pages); - if (reserved_size) { - schunk->free_size = reserved_size; + if (ai->reserved_size) { + schunk->free_size = ai->reserved_size; pcpu_reserved_chunk = schunk; - pcpu_reserved_chunk_limit = static_size + reserved_size; + pcpu_reserved_chunk_limit = ai->static_size + ai->reserved_size; } else { schunk->free_size = dyn_size; dyn_size = 0; /* dynamic area covered */ } schunk->contig_hint = schunk->free_size; - schunk->map[schunk->map_used++] = -static_size; + schunk->map[schunk->map_used++] = -ai->static_size; if (schunk->free_size) schunk->map[schunk->map_used++] = schunk->free_size; @@ -1643,44 +1753,47 @@ early_param("percpu_alloc", percpu_alloc_setup); */ ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) { - const size_t static_size = __per_cpu_end - __per_cpu_start; - size_t size_sum, unit_size, chunk_size; + struct pcpu_alloc_info *ai; + size_t size_sum, chunk_size; void *base; - unsigned int cpu; + int unit; + ssize_t ret; - /* determine parameters and allocate */ - size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); + ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL); + if (IS_ERR(ai)) + return PTR_ERR(ai); + BUG_ON(ai->nr_groups != 1); + BUG_ON(ai->groups[0].nr_units != num_possible_cpus()); - unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); - chunk_size = unit_size * nr_cpu_ids; + size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; + chunk_size = ai->unit_size * num_possible_cpus(); base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); if (!base) { pr_warning("PERCPU: failed to allocate %zu bytes for " "embedding\n", chunk_size); - return -ENOMEM; + ret = -ENOMEM; + goto out_free_ai; } /* return the leftover and copy */ - for (cpu = 0; cpu < nr_cpu_ids; cpu++) { - void *ptr = base + cpu * unit_size; - - if (cpu_possible(cpu)) { - free_bootmem(__pa(ptr + size_sum), - unit_size - size_sum); - memcpy(ptr, __per_cpu_load, static_size); - } else - free_bootmem(__pa(ptr), unit_size); + for (unit = 0; unit < num_possible_cpus(); unit++) { + void *ptr = base + unit * ai->unit_size; + + free_bootmem(__pa(ptr + size_sum), ai->unit_size - size_sum); + memcpy(ptr, __per_cpu_load, ai->static_size); } /* we're ready, commit */ pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", - PFN_DOWN(size_sum), base, static_size, reserved_size, dyn_size, - unit_size); + PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, + ai->dyn_size, ai->unit_size); - return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, - unit_size, base, NULL); + ret = pcpu_setup_first_chunk(ai, base); +out_free_ai: + pcpu_free_alloc_info(ai); + return ret; } #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || !CONFIG_HAVE_SETUP_PER_CPU_AREA */ @@ -1709,31 +1822,34 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_populate_pte_fn_t populate_pte_fn) { static struct vm_struct vm; - const size_t static_size = __per_cpu_end - __per_cpu_start; - ssize_t dyn_size = -1; - size_t size_sum, unit_size; + struct pcpu_alloc_info *ai; char psize_str[16]; int unit_pages; size_t pages_size; struct page **pages; - unsigned int cpu; - int i, j; + int unit, i, j; ssize_t ret; snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); - size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); - unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); - unit_pages = unit_size >> PAGE_SHIFT; + ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL); + if (IS_ERR(ai)) + return PTR_ERR(ai); + BUG_ON(ai->nr_groups != 1); + BUG_ON(ai->groups[0].nr_units != num_possible_cpus()); + + unit_pages = ai->unit_size >> PAGE_SHIFT; /* unaligned allocations can't be freed, round up to page size */ - pages_size = PFN_ALIGN(unit_pages * nr_cpu_ids * sizeof(pages[0])); + pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() * + sizeof(pages[0])); pages = alloc_bootmem(pages_size); /* allocate pages */ j = 0; - for_each_possible_cpu(cpu) + for (unit = 0; unit < num_possible_cpus(); unit++) for (i = 0; i < unit_pages; i++) { + unsigned int cpu = ai->groups[0].cpu_map[unit]; void *ptr; ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE); @@ -1747,18 +1863,18 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, /* allocate vm area, map the pages and copy static data */ vm.flags = VM_ALLOC; - vm.size = nr_cpu_ids * unit_size; + vm.size = num_possible_cpus() * ai->unit_size; vm_area_register_early(&vm, PAGE_SIZE); - for_each_possible_cpu(cpu) { + for (unit = 0; unit < num_possible_cpus(); unit++) { unsigned long unit_addr = - (unsigned long)vm.addr + cpu * unit_size; + (unsigned long)vm.addr + unit * ai->unit_size; for (i = 0; i < unit_pages; i++) populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); /* pte already populated, the following shouldn't fail */ - ret = __pcpu_map_pages(unit_addr, &pages[cpu * unit_pages], + ret = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages], unit_pages); if (ret < 0) panic("failed to map percpu area, err=%zd\n", ret); @@ -1772,16 +1888,15 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, */ /* copy static data */ - memcpy((void *)unit_addr, __per_cpu_load, static_size); + memcpy((void *)unit_addr, __per_cpu_load, ai->static_size); } /* we're ready, commit */ pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n", - unit_pages, psize_str, vm.addr, static_size, reserved_size, - dyn_size); + unit_pages, psize_str, vm.addr, ai->static_size, + ai->reserved_size, ai->dyn_size); - ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, - unit_size, vm.addr, NULL); + ret = pcpu_setup_first_chunk(ai, vm.addr); goto out_free_ar; enomem: @@ -1790,6 +1905,7 @@ enomem: ret = -ENOMEM; out_free_ar: free_bootmem(__pa(pages), pages_size); + pcpu_free_alloc_info(ai); return ret; } #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ @@ -1805,38 +1921,50 @@ static size_t pcpul_lpage_size; static int pcpul_nr_lpages; static struct pcpul_ent *pcpul_map; -static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, +static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai, unsigned int *cpup) { - unsigned int cpu; + int group, cunit; - for_each_possible_cpu(cpu) - if (unit_map[cpu] == unit) { + for (group = 0, cunit = 0; group < ai->nr_groups; group++) { + const struct pcpu_group_info *gi = &ai->groups[group]; + + if (unit < cunit + gi->nr_units) { if (cpup) - *cpup = cpu; + *cpup = gi->cpu_map[unit - cunit]; return true; } + cunit += gi->nr_units; + } return false; } +static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai) +{ + int group, unit, i; + + for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { + const struct pcpu_group_info *gi = &ai->groups[group]; + + for (i = 0; i < gi->nr_units; i++) + if (gi->cpu_map[i] == cpu) + return unit + i; + } + BUG(); +} + /** * pcpu_lpage_first_chunk - remap the first percpu chunk using large page - * @reserved_size: the size of reserved percpu area in bytes - * @dyn_size: free size for dynamic allocation in bytes - * @unit_size: unit size in bytes - * @lpage_size: the size of a large page - * @unit_map: cpu -> unit mapping - * @nr_units: the number of units + * @ai: pcpu_alloc_info * @alloc_fn: function to allocate percpu lpage, always called with lpage_size * @free_fn: function to free percpu memory, @size <= lpage_size * @map_fn: function to map percpu lpage, always called with lpage_size * * This allocator uses large page to build and map the first chunk. - * Unlike other helpers, the caller should always specify @dyn_size - * and @unit_size. These parameters along with @unit_map and - * @nr_units can be determined using pcpu_lpage_build_unit_map(). - * This two stage initialization is to allow arch code to evaluate the + * Unlike other helpers, the caller should provide fully initialized + * @ai. This can be done using pcpu_build_alloc_info(). This two + * stage initialization is to allow arch code to evaluate the * parameters before committing to it. * * Large pages are allocated as directed by @unit_map and other @@ -1852,27 +1980,26 @@ static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, * The determined pcpu_unit_size which can be used to initialize * percpu access on success, -errno on failure. */ -ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, - size_t unit_size, size_t lpage_size, - const int *unit_map, int nr_units, +ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn) { static struct vm_struct vm; - const size_t static_size = __per_cpu_end - __per_cpu_start; - size_t chunk_size = unit_size * nr_units; - size_t map_size; + const size_t lpage_size = ai->atom_size; + size_t chunk_size, map_size; unsigned int cpu; ssize_t ret; - int i, j, unit; + int i, j, unit, nr_units; - pcpul_lpage_dump_cfg(KERN_DEBUG, static_size, reserved_size, dyn_size, - unit_size, lpage_size, unit_map, nr_units); + nr_units = 0; + for (i = 0; i < ai->nr_groups; i++) + nr_units += ai->groups[i].nr_units; + chunk_size = ai->unit_size * nr_units; BUG_ON(chunk_size % lpage_size); - pcpul_size = static_size + reserved_size + dyn_size; + pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size; pcpul_lpage_size = lpage_size; pcpul_nr_lpages = chunk_size / lpage_size; @@ -1883,13 +2010,13 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, /* allocate all pages */ for (i = 0; i < pcpul_nr_lpages; i++) { size_t offset = i * lpage_size; - int first_unit = offset / unit_size; - int last_unit = (offset + lpage_size - 1) / unit_size; + int first_unit = offset / ai->unit_size; + int last_unit = (offset + lpage_size - 1) / ai->unit_size; void *ptr; /* find out which cpu is mapped to this unit */ for (unit = first_unit; unit <= last_unit; unit++) - if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) + if (pcpul_unit_to_cpu(unit, ai, &cpu)) goto found; continue; found: @@ -1905,12 +2032,12 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, /* return unused holes */ for (unit = 0; unit < nr_units; unit++) { - size_t start = unit * unit_size; - size_t end = start + unit_size; + size_t start = unit * ai->unit_size; + size_t end = start + ai->unit_size; size_t off, next; /* don't free used part of occupied unit */ - if (pcpul_unit_to_cpu(unit, unit_map, NULL)) + if (pcpul_unit_to_cpu(unit, ai, NULL)) start += pcpul_size; /* unit can span more than one page, punch the holes */ @@ -1925,7 +2052,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, /* allocate address, map and copy */ vm.flags = VM_ALLOC; vm.size = chunk_size; - vm_area_register_early(&vm, unit_size); + vm_area_register_early(&vm, ai->unit_size); for (i = 0; i < pcpul_nr_lpages; i++) { if (!pcpul_map[i].ptr) @@ -1935,15 +2062,15 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, } for_each_possible_cpu(cpu) - memcpy(vm.addr + unit_map[cpu] * unit_size, __per_cpu_load, - static_size); + memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size, + __per_cpu_load, ai->static_size); /* we're ready, commit */ pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n", - vm.addr, static_size, reserved_size, dyn_size, unit_size); + vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size, + ai->unit_size); - ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, - unit_size, vm.addr, unit_map); + ret = pcpu_setup_first_chunk(ai, vm.addr); /* * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped -- cgit v1.2.3 From fb435d5233f8b6f9b93c11d6304d8e98fed03234 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:51 +0900 Subject: percpu: add pcpu_unit_offsets[] Currently units are mapped sequentially into address space. This patch adds pcpu_unit_offsets[] which allows units to be mapped to arbitrary offsets from the chunk base address. This is necessary to allow sparse embedding which might would need to allocate address ranges and memory areas which aren't aligned to unit size but allocation atom size (page or large page size). This also simplifies things a bit by removing the need to calculate offset from unit number. With this change, there's no need for the arch code to know pcpu_unit_size. Update pcpu_setup_first_chunk() and first chunk allocators to return regular 0 or -errno return code instead of unit size or -errno. Signed-off-by: Tejun Heo Cc: David S. Miller --- arch/sparc/kernel/smp_64.c | 12 +++--- arch/x86/kernel/setup_percpu.c | 51 ++++++++++------------- include/linux/percpu.h | 16 ++++--- mm/percpu.c | 95 +++++++++++++++++++++--------------------- 4 files changed, 84 insertions(+), 90 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index a42a4a744d14..b03fd362c629 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1478,9 +1478,10 @@ void __init setup_per_cpu_areas(void) static struct vm_struct vm; struct pcpu_alloc_info *ai; unsigned long delta, cpu; - size_t size_sum, pcpu_unit_size; + size_t size_sum; size_t ptrs_size; void **ptrs; + int rc; ai = pcpu_alloc_alloc_info(1, nr_cpu_ids); @@ -1526,14 +1527,15 @@ void __init setup_per_cpu_areas(void) pcpu_map_range(start, end, virt_to_page(ptrs[cpu])); } - pcpu_unit_size = pcpu_setup_first_chunk(ai, vm.addr); + rc = pcpu_setup_first_chunk(ai, vm.addr); + if (rc) + panic("failed to setup percpu first chunk (%d)", rc); free_bootmem(__pa(ptrs), ptrs_size); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; - for_each_possible_cpu(cpu) { - __per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; - } + for_each_possible_cpu(cpu) + __per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; /* Setup %g5 for the boot cpu. */ __local_per_cpu_offset = __per_cpu_offset(smp_processor_id()); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index db5f9c49fec5..9becc5d4b518 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -157,12 +157,12 @@ static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } -static ssize_t __init setup_pcpu_lpage(bool chosen) +static int __init setup_pcpu_lpage(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; struct pcpu_alloc_info *ai; - ssize_t ret; + int rc; /* on non-NUMA, embedding is better */ if (!chosen && !pcpu_need_numa()) @@ -196,19 +196,18 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) if (tot_size > vm_size / 5) { pr_info("PERCPU: too large chunk size %zuMB for " "large page remap\n", tot_size >> 20); - ret = -EINVAL; + rc = -EINVAL; goto out_free; } } - ret = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, - pcpul_map); + rc = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, pcpul_map); out_free: pcpu_free_alloc_info(ai); - return ret; + return rc; } #else -static ssize_t __init setup_pcpu_lpage(bool chosen) +static int __init setup_pcpu_lpage(bool chosen) { return -EINVAL; } @@ -222,7 +221,7 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) * mapping so that it can use PMD mapping without additional TLB * pressure. */ -static ssize_t __init setup_pcpu_embed(bool chosen) +static int __init setup_pcpu_embed(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -250,7 +249,7 @@ static void __init pcpup_populate_pte(unsigned long addr) populate_extra_pte(addr); } -static ssize_t __init setup_pcpu_page(void) +static int __init setup_pcpu_page(void) { return pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, pcpu_fc_alloc, pcpu_fc_free, @@ -274,8 +273,7 @@ void __init setup_per_cpu_areas(void) { unsigned int cpu; unsigned long delta; - size_t pcpu_unit_size; - ssize_t ret; + int rc; pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); @@ -285,36 +283,33 @@ void __init setup_per_cpu_areas(void) * of large page mappings. Please read comments on top of * each allocator for details. */ - ret = -EINVAL; + rc = -EINVAL; if (pcpu_chosen_fc != PCPU_FC_AUTO) { if (pcpu_chosen_fc != PCPU_FC_PAGE) { if (pcpu_chosen_fc == PCPU_FC_LPAGE) - ret = setup_pcpu_lpage(true); + rc = setup_pcpu_lpage(true); else - ret = setup_pcpu_embed(true); + rc = setup_pcpu_embed(true); - if (ret < 0) - pr_warning("PERCPU: %s allocator failed (%zd), " + if (rc < 0) + pr_warning("PERCPU: %s allocator failed (%d), " "falling back to page size\n", - pcpu_fc_names[pcpu_chosen_fc], ret); + pcpu_fc_names[pcpu_chosen_fc], rc); } } else { - ret = setup_pcpu_lpage(false); - if (ret < 0) - ret = setup_pcpu_embed(false); + rc = setup_pcpu_lpage(false); + if (rc < 0) + rc = setup_pcpu_embed(false); } - if (ret < 0) - ret = setup_pcpu_page(); - if (ret < 0) - panic("cannot initialize percpu area (err=%zd)", ret); - - pcpu_unit_size = ret; + if (rc < 0) + rc = setup_pcpu_page(); + if (rc < 0) + panic("cannot initialize percpu area (err=%d)", rc); /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { - per_cpu_offset(cpu) = - delta + pcpu_unit_map[cpu] * pcpu_unit_size; + per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 77b86be8ce4f..a7ec840f596c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -57,7 +57,7 @@ #endif extern void *pcpu_base_addr; -extern const int *pcpu_unit_map; +extern const unsigned long *pcpu_unit_offsets; struct pcpu_group_info { int nr_units; /* aligned # of units */ @@ -106,25 +106,23 @@ extern struct pcpu_alloc_info * __init pcpu_build_alloc_info( size_t atom_size, pcpu_fc_cpu_distance_fn_t cpu_distance_fn); -extern size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, - void *base_addr); +extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, + void *base_addr); #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK -extern ssize_t __init pcpu_embed_first_chunk( - size_t reserved_size, ssize_t dyn_size); +extern int __init pcpu_embed_first_chunk(size_t reserved_size, + ssize_t dyn_size); #endif #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK -extern ssize_t __init pcpu_page_first_chunk( - size_t reserved_size, +extern int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_populate_pte_fn_t populate_pte_fn); #endif #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK -extern ssize_t __init pcpu_lpage_first_chunk( - const struct pcpu_alloc_info *ai, +extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn); diff --git a/mm/percpu.c b/mm/percpu.c index 99f7fa682722..653b02c40200 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -117,8 +117,8 @@ static unsigned int pcpu_last_unit_cpu __read_mostly; void *pcpu_base_addr __read_mostly; EXPORT_SYMBOL_GPL(pcpu_base_addr); -/* cpu -> unit map */ -const int *pcpu_unit_map __read_mostly; +static const int *pcpu_unit_map __read_mostly; /* cpu -> unit */ +const unsigned long *pcpu_unit_offsets __read_mostly; /* cpu -> unit offset */ /* * The first chunk which always exists. Note that unlike other @@ -196,8 +196,8 @@ static int pcpu_page_idx(unsigned int cpu, int page_idx) static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, unsigned int cpu, int page_idx) { - return (unsigned long)chunk->vm->addr + - (pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT); + return (unsigned long)chunk->vm->addr + pcpu_unit_offsets[cpu] + + (page_idx << PAGE_SHIFT); } static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, @@ -341,7 +341,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) * space. Note that any possible cpu id can be used here, so * there's no need to worry about preemption or cpu hotplug. */ - addr += pcpu_unit_map[smp_processor_id()] * pcpu_unit_size; + addr += pcpu_unit_offsets[smp_processor_id()]; return pcpu_get_page_chunk(vmalloc_to_page(addr)); } @@ -1560,17 +1560,17 @@ static void pcpu_dump_alloc_info(const char *lvl, * and available for dynamic allocation like any other chunks. * * RETURNS: - * The determined pcpu_unit_size which can be used to initialize - * percpu access. + * 0 on success, -errno on failure. */ -size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, - void *base_addr) +int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, + void *base_addr) { static struct vm_struct first_vm; static int smap[2], dmap[2]; size_t dyn_size = ai->dyn_size; size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; struct pcpu_chunk *schunk, *dchunk = NULL; + unsigned long *unit_off; unsigned int cpu; int *unit_map; int group, unit, i; @@ -1587,8 +1587,9 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, pcpu_dump_alloc_info(KERN_DEBUG, ai); - /* determine number of units and verify and initialize pcpu_unit_map */ + /* determine number of units and initialize unit_map and base */ unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0])); + unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0])); for (cpu = 0; cpu < nr_cpu_ids; cpu++) unit_map[cpu] = NR_CPUS; @@ -1606,6 +1607,8 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, BUG_ON(unit_map[cpu] != NR_CPUS); unit_map[cpu] = unit + i; + unit_off[cpu] = gi->base_offset + i * ai->unit_size; + if (pcpu_first_unit_cpu == NR_CPUS) pcpu_first_unit_cpu = cpu; } @@ -1617,6 +1620,7 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, BUG_ON(unit_map[cpu] == NR_CPUS); pcpu_unit_map = unit_map; + pcpu_unit_offsets = unit_off; /* determine basic parameters */ pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT; @@ -1688,7 +1692,7 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, /* we're done */ pcpu_base_addr = schunk->vm->addr; - return pcpu_unit_size; + return 0; } const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { @@ -1748,16 +1752,15 @@ early_param("percpu_alloc", percpu_alloc_setup); * size, the leftover is returned to the bootmem allocator. * * RETURNS: - * The determined pcpu_unit_size which can be used to initialize - * percpu access on success, -errno on failure. + * 0 on success, -errno on failure. */ -ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) +int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) { struct pcpu_alloc_info *ai; size_t size_sum, chunk_size; void *base; int unit; - ssize_t ret; + int rc; ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL); if (IS_ERR(ai)) @@ -1773,7 +1776,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) if (!base) { pr_warning("PERCPU: failed to allocate %zu bytes for " "embedding\n", chunk_size); - ret = -ENOMEM; + rc = -ENOMEM; goto out_free_ai; } @@ -1790,10 +1793,10 @@ ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, ai->dyn_size, ai->unit_size); - ret = pcpu_setup_first_chunk(ai, base); + rc = pcpu_setup_first_chunk(ai, base); out_free_ai: pcpu_free_alloc_info(ai); - return ret; + return rc; } #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || !CONFIG_HAVE_SETUP_PER_CPU_AREA */ @@ -1813,13 +1816,12 @@ out_free_ai: * page-by-page into vmalloc area. * * RETURNS: - * The determined pcpu_unit_size which can be used to initialize - * percpu access on success, -errno on failure. + * 0 on success, -errno on failure. */ -ssize_t __init pcpu_page_first_chunk(size_t reserved_size, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn, - pcpu_fc_populate_pte_fn_t populate_pte_fn) +int __init pcpu_page_first_chunk(size_t reserved_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_populate_pte_fn_t populate_pte_fn) { static struct vm_struct vm; struct pcpu_alloc_info *ai; @@ -1827,8 +1829,7 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, int unit_pages; size_t pages_size; struct page **pages; - int unit, i, j; - ssize_t ret; + int unit, i, j, rc; snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); @@ -1874,10 +1875,10 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); /* pte already populated, the following shouldn't fail */ - ret = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages], - unit_pages); - if (ret < 0) - panic("failed to map percpu area, err=%zd\n", ret); + rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages], + unit_pages); + if (rc < 0) + panic("failed to map percpu area, err=%d\n", rc); /* * FIXME: Archs with virtual cache should flush local @@ -1896,17 +1897,17 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, unit_pages, psize_str, vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size); - ret = pcpu_setup_first_chunk(ai, vm.addr); + rc = pcpu_setup_first_chunk(ai, vm.addr); goto out_free_ar; enomem: while (--j >= 0) free_fn(page_address(pages[j]), PAGE_SIZE); - ret = -ENOMEM; + rc = -ENOMEM; out_free_ar: free_bootmem(__pa(pages), pages_size); pcpu_free_alloc_info(ai); - return ret; + return rc; } #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ @@ -1977,20 +1978,18 @@ static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai) * pcpu_lpage_remapped(). * * RETURNS: - * The determined pcpu_unit_size which can be used to initialize - * percpu access on success, -errno on failure. + * 0 on success, -errno on failure. */ -ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn, - pcpu_fc_map_fn_t map_fn) +int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_map_fn_t map_fn) { static struct vm_struct vm; const size_t lpage_size = ai->atom_size; size_t chunk_size, map_size; unsigned int cpu; - ssize_t ret; - int i, j, unit, nr_units; + int i, j, unit, nr_units, rc; nr_units = 0; for (i = 0; i < ai->nr_groups; i++) @@ -2070,7 +2069,7 @@ ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size, ai->unit_size); - ret = pcpu_setup_first_chunk(ai, vm.addr); + rc = pcpu_setup_first_chunk(ai, vm.addr); /* * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped @@ -2094,7 +2093,7 @@ ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr) pcpul_nr_lpages--; - return ret; + return rc; enomem: for (i = 0; i < pcpul_nr_lpages; i++) @@ -2166,21 +2165,21 @@ EXPORT_SYMBOL(__per_cpu_offset); void __init setup_per_cpu_areas(void) { - ssize_t unit_size; unsigned long delta; unsigned int cpu; + int rc; /* * Always reserve area for module percpu variables. That's * what the legacy allocator did. */ - unit_size = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, - PERCPU_DYNAMIC_RESERVE); - if (unit_size < 0) + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE); + if (rc < 0) panic("Failed to initialized percpu areas."); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) - __per_cpu_offset[cpu] = delta + cpu * unit_size; + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; } #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ -- cgit v1.2.3 From ca23e405e06d5fffb005df004c72781f76062f51 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:52 +0900 Subject: vmalloc: implement pcpu_get_vm_areas() To directly use spread NUMA memories for percpu units, percpu allocator will be updated to allow sparsely mapping units in a chunk. As the distances between units can be very large, this makes allocating single vmap area for each chunk undesirable. This patch implements pcpu_get_vm_areas() and pcpu_free_vm_areas() which allocates and frees sparse congruent vmap areas. pcpu_get_vm_areas() take @offsets and @sizes array which define distances and sizes of vmap areas. It scans down from the top of vmalloc area looking for the top-most address which can accomodate all the areas. The top-down scan is to avoid interacting with regular vmallocs which can push up these congruent areas up little by little ending up wasting address space and page table. To speed up top-down scan, the highest possible address hint is maintained. Although the scan is linear from the hint, given the usual large holes between memory addresses between NUMA nodes, the scanning is highly likely to finish after finding the first hole for the last unit which is scanned first. Signed-off-by: Tejun Heo Cc: Nick Piggin --- include/linux/vmalloc.h | 6 + mm/vmalloc.c | 293 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 299 insertions(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index a43ebec3a7b9..227c2a585e4f 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -115,4 +115,10 @@ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); +struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, + const size_t *sizes, int nr_vms, + size_t align, gfp_t gfp_mask); + +void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); + #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 2eb461c3a46e..204b8243d8ab 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -265,6 +265,7 @@ struct vmap_area { static DEFINE_SPINLOCK(vmap_area_lock); static struct rb_root vmap_area_root = RB_ROOT; static LIST_HEAD(vmap_area_list); +static unsigned long vmap_area_pcpu_hole; static struct vmap_area *__find_vmap_area(unsigned long addr) { @@ -431,6 +432,15 @@ static void __free_vmap_area(struct vmap_area *va) RB_CLEAR_NODE(&va->rb_node); list_del_rcu(&va->list); + /* + * Track the highest possible candidate for pcpu area + * allocation. Areas outside of vmalloc area can be returned + * here too, consider only end addresses which fall inside + * vmalloc area proper. + */ + if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END) + vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end); + call_rcu(&va->rcu_head, rcu_free_va); } @@ -1038,6 +1048,9 @@ void __init vmalloc_init(void) va->va_end = va->va_start + tmp->size; __insert_vmap_area(va); } + + vmap_area_pcpu_hole = VMALLOC_END; + vmap_initialized = true; } @@ -1821,6 +1834,286 @@ void free_vm_area(struct vm_struct *area) } EXPORT_SYMBOL_GPL(free_vm_area); +static struct vmap_area *node_to_va(struct rb_node *n) +{ + return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; +} + +/** + * pvm_find_next_prev - find the next and prev vmap_area surrounding @end + * @end: target address + * @pnext: out arg for the next vmap_area + * @pprev: out arg for the previous vmap_area + * + * Returns: %true if either or both of next and prev are found, + * %false if no vmap_area exists + * + * Find vmap_areas end addresses of which enclose @end. ie. if not + * NULL, *pnext->va_end > @end and *pprev->va_end <= @end. + */ +static bool pvm_find_next_prev(unsigned long end, + struct vmap_area **pnext, + struct vmap_area **pprev) +{ + struct rb_node *n = vmap_area_root.rb_node; + struct vmap_area *va = NULL; + + while (n) { + va = rb_entry(n, struct vmap_area, rb_node); + if (end < va->va_end) + n = n->rb_left; + else if (end > va->va_end) + n = n->rb_right; + else + break; + } + + if (!va) + return false; + + if (va->va_end > end) { + *pnext = va; + *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); + } else { + *pprev = va; + *pnext = node_to_va(rb_next(&(*pprev)->rb_node)); + } + return true; +} + +/** + * pvm_determine_end - find the highest aligned address between two vmap_areas + * @pnext: in/out arg for the next vmap_area + * @pprev: in/out arg for the previous vmap_area + * @align: alignment + * + * Returns: determined end address + * + * Find the highest aligned address between *@pnext and *@pprev below + * VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned + * down address is between the end addresses of the two vmap_areas. + * + * Please note that the address returned by this function may fall + * inside *@pnext vmap_area. The caller is responsible for checking + * that. + */ +static unsigned long pvm_determine_end(struct vmap_area **pnext, + struct vmap_area **pprev, + unsigned long align) +{ + const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); + unsigned long addr; + + if (*pnext) + addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end); + else + addr = vmalloc_end; + + while (*pprev && (*pprev)->va_end > addr) { + *pnext = *pprev; + *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); + } + + return addr; +} + +/** + * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator + * @offsets: array containing offset of each area + * @sizes: array containing size of each area + * @nr_vms: the number of areas to allocate + * @align: alignment, all entries in @offsets and @sizes must be aligned to this + * @gfp_mask: allocation mask + * + * Returns: kmalloc'd vm_struct pointer array pointing to allocated + * vm_structs on success, %NULL on failure + * + * Percpu allocator wants to use congruent vm areas so that it can + * maintain the offsets among percpu areas. This function allocates + * congruent vmalloc areas for it. These areas tend to be scattered + * pretty far, distance between two areas easily going up to + * gigabytes. To avoid interacting with regular vmallocs, these areas + * are allocated from top. + * + * Despite its complicated look, this allocator is rather simple. It + * does everything top-down and scans areas from the end looking for + * matching slot. While scanning, if any of the areas overlaps with + * existing vmap_area, the base address is pulled down to fit the + * area. Scanning is repeated till all the areas fit and then all + * necessary data structres are inserted and the result is returned. + */ +struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, + const size_t *sizes, int nr_vms, + size_t align, gfp_t gfp_mask) +{ + const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align); + const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); + struct vmap_area **vas, *prev, *next; + struct vm_struct **vms; + int area, area2, last_area, term_area; + unsigned long base, start, end, last_end; + bool purged = false; + + gfp_mask &= GFP_RECLAIM_MASK; + + /* verify parameters and allocate data structures */ + BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align)); + for (last_area = 0, area = 0; area < nr_vms; area++) { + start = offsets[area]; + end = start + sizes[area]; + + /* is everything aligned properly? */ + BUG_ON(!IS_ALIGNED(offsets[area], align)); + BUG_ON(!IS_ALIGNED(sizes[area], align)); + + /* detect the area with the highest address */ + if (start > offsets[last_area]) + last_area = area; + + for (area2 = 0; area2 < nr_vms; area2++) { + unsigned long start2 = offsets[area2]; + unsigned long end2 = start2 + sizes[area2]; + + if (area2 == area) + continue; + + BUG_ON(start2 >= start && start2 < end); + BUG_ON(end2 <= end && end2 > start); + } + } + last_end = offsets[last_area] + sizes[last_area]; + + if (vmalloc_end - vmalloc_start < last_end) { + WARN_ON(true); + return NULL; + } + + vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask); + vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask); + if (!vas || !vms) + goto err_free; + + for (area = 0; area < nr_vms; area++) { + vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask); + vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask); + if (!vas[area] || !vms[area]) + goto err_free; + } +retry: + spin_lock(&vmap_area_lock); + + /* start scanning - we scan from the top, begin with the last area */ + area = term_area = last_area; + start = offsets[area]; + end = start + sizes[area]; + + if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) { + base = vmalloc_end - last_end; + goto found; + } + base = pvm_determine_end(&next, &prev, align) - end; + + while (true) { + BUG_ON(next && next->va_end <= base + end); + BUG_ON(prev && prev->va_end > base + end); + + /* + * base might have underflowed, add last_end before + * comparing. + */ + if (base + last_end < vmalloc_start + last_end) { + spin_unlock(&vmap_area_lock); + if (!purged) { + purge_vmap_area_lazy(); + purged = true; + goto retry; + } + goto err_free; + } + + /* + * If next overlaps, move base downwards so that it's + * right below next and then recheck. + */ + if (next && next->va_start < base + end) { + base = pvm_determine_end(&next, &prev, align) - end; + term_area = area; + continue; + } + + /* + * If prev overlaps, shift down next and prev and move + * base so that it's right below new next and then + * recheck. + */ + if (prev && prev->va_end > base + start) { + next = prev; + prev = node_to_va(rb_prev(&next->rb_node)); + base = pvm_determine_end(&next, &prev, align) - end; + term_area = area; + continue; + } + + /* + * This area fits, move on to the previous one. If + * the previous one is the terminal one, we're done. + */ + area = (area + nr_vms - 1) % nr_vms; + if (area == term_area) + break; + start = offsets[area]; + end = start + sizes[area]; + pvm_find_next_prev(base + end, &next, &prev); + } +found: + /* we've found a fitting base, insert all va's */ + for (area = 0; area < nr_vms; area++) { + struct vmap_area *va = vas[area]; + + va->va_start = base + offsets[area]; + va->va_end = va->va_start + sizes[area]; + __insert_vmap_area(va); + } + + vmap_area_pcpu_hole = base + offsets[last_area]; + + spin_unlock(&vmap_area_lock); + + /* insert all vm's */ + for (area = 0; area < nr_vms; area++) + insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC, + pcpu_get_vm_areas); + + kfree(vas); + return vms; + +err_free: + for (area = 0; area < nr_vms; area++) { + if (vas) + kfree(vas[area]); + if (vms) + kfree(vms[area]); + } + kfree(vas); + kfree(vms); + return NULL; +} + +/** + * pcpu_free_vm_areas - free vmalloc areas for percpu allocator + * @vms: vm_struct pointer array returned by pcpu_get_vm_areas() + * @nr_vms: the number of allocated areas + * + * Free vm_structs and the array allocated by pcpu_get_vm_areas(). + */ +void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) +{ + int i; + + for (i = 0; i < nr_vms; i++) + free_vm_area(vms[i]); + kfree(vms); +} #ifdef CONFIG_PROC_FS static void *s_start(struct seq_file *m, loff_t *pos) -- cgit v1.2.3 From c8826dd538602d730ed2c18c6753f1bbfa6c4933 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:52 +0900 Subject: percpu: update embedding first chunk allocator to handle sparse units Now that percpu core can handle very sparse units, given that vmalloc space is large enough, embedding first chunk allocator can use any memory to build the first chunk. This patch teaches pcpu_embed_first_chunk() about distances between cpus and to use alloc/free callbacks to allocate node specific areas for each group and use them for the first chunk. This brings the benefits of embedding allocator to NUMA configurations - no extra TLB pressure with the flexibility of unified dynamic allocator and no need to restructure arch code to build memory layout suitable for percpu. With units put into atom_size aligned groups according to cpu distances, using large page for dynamic chunks is also easily possible with falling back to reuglar pages if large allocation fails. Embedding allocator users are converted to specify NULL cpu_distance_fn, so this patch doesn't cause any visible behavior difference. Following patches will convert them. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 4 +- include/linux/percpu.h | 7 ++- mm/percpu.c | 113 +++++++++++++++++++++++++++++++---------- 3 files changed, 93 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 9becc5d4b518..67f6314de9f1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -234,7 +234,9 @@ static int __init setup_pcpu_embed(bool chosen) return -EINVAL; return pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, - reserve - PERCPU_FIRST_CHUNK_RESERVE); + reserve - PERCPU_FIRST_CHUNK_RESERVE, + PAGE_SIZE, NULL, pcpu_fc_alloc, + pcpu_fc_free); } /* diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a7ec840f596c..25359932740e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -110,8 +110,11 @@ extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, void *base_addr); #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK -extern int __init pcpu_embed_first_chunk(size_t reserved_size, - ssize_t dyn_size); +extern int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, + size_t atom_size, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn); #endif #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK diff --git a/mm/percpu.c b/mm/percpu.c index cc9c4c64606d..c2826d05505c 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1747,15 +1747,25 @@ early_param("percpu_alloc", percpu_alloc_setup); * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem * @reserved_size: the size of reserved percpu area in bytes * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @atom_size: allocation atom size + * @cpu_distance_fn: callback to determine distance between cpus, optional + * @alloc_fn: function to allocate percpu page + * @free_fn: funtion to free percpu page * * This is a helper to ease setting up embedded first percpu chunk and * can be called where pcpu_setup_first_chunk() is expected. * * If this function is used to setup the first chunk, it is allocated - * as a contiguous area using bootmem allocator and used as-is without - * being mapped into vmalloc area. This enables the first chunk to - * piggy back on the linear physical mapping which often uses larger - * page size. + * by calling @alloc_fn and used as-is without being mapped into + * vmalloc area. Allocations are always whole multiples of @atom_size + * aligned to @atom_size. + * + * This enables the first chunk to piggy back on the linear physical + * mapping which often uses larger page size. Please note that this + * can result in very sparse cpu->unit mapping on NUMA machines thus + * requiring large vmalloc address space. Don't use this allocator if + * vmalloc space is not orders of magnitude larger than distances + * between node memory addresses (ie. 32bit NUMA machines). * * When @dyn_size is positive, dynamic area might be larger than * specified to fill page alignment. When @dyn_size is auto, @@ -1763,53 +1773,88 @@ early_param("percpu_alloc", percpu_alloc_setup); * and reserved areas. * * If the needed size is smaller than the minimum or specified unit - * size, the leftover is returned to the bootmem allocator. + * size, the leftover is returned using @free_fn. * * RETURNS: * 0 on success, -errno on failure. */ -int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) +int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, + size_t atom_size, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn) { + void *base = (void *)ULONG_MAX; + void **areas = NULL; struct pcpu_alloc_info *ai; - size_t size_sum, chunk_size; - void *base; - int unit; - int rc; + size_t size_sum, areas_size; + int group, i, rc; - ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL); + ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, + cpu_distance_fn); if (IS_ERR(ai)) return PTR_ERR(ai); - BUG_ON(ai->nr_groups != 1); - BUG_ON(ai->groups[0].nr_units != num_possible_cpus()); size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; - chunk_size = ai->unit_size * num_possible_cpus(); + areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *)); - base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, - __pa(MAX_DMA_ADDRESS)); - if (!base) { - pr_warning("PERCPU: failed to allocate %zu bytes for " - "embedding\n", chunk_size); + areas = alloc_bootmem_nopanic(areas_size); + if (!areas) { rc = -ENOMEM; - goto out_free_ai; + goto out_free; } - /* return the leftover and copy */ - for (unit = 0; unit < num_possible_cpus(); unit++) { - void *ptr = base + unit * ai->unit_size; + /* allocate, copy and determine base address */ + for (group = 0; group < ai->nr_groups; group++) { + struct pcpu_group_info *gi = &ai->groups[group]; + unsigned int cpu = NR_CPUS; + void *ptr; + + for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++) + cpu = gi->cpu_map[i]; + BUG_ON(cpu == NR_CPUS); + + /* allocate space for the whole group */ + ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size); + if (!ptr) { + rc = -ENOMEM; + goto out_free_areas; + } + areas[group] = ptr; - free_bootmem(__pa(ptr + size_sum), ai->unit_size - size_sum); - memcpy(ptr, __per_cpu_load, ai->static_size); + base = min(ptr, base); + + for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) { + if (gi->cpu_map[i] == NR_CPUS) { + /* unused unit, free whole */ + free_fn(ptr, ai->unit_size); + continue; + } + /* copy and return the unused part */ + memcpy(ptr, __per_cpu_load, ai->static_size); + free_fn(ptr + size_sum, ai->unit_size - size_sum); + } } - /* we're ready, commit */ + /* base address is now known, determine group base offsets */ + for (group = 0; group < ai->nr_groups; group++) + ai->groups[group].base_offset = areas[group] - base; + pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, ai->dyn_size, ai->unit_size); rc = pcpu_setup_first_chunk(ai, base); -out_free_ai: + goto out_free; + +out_free_areas: + for (group = 0; group < ai->nr_groups; group++) + free_fn(areas[group], + ai->groups[group].nr_units * ai->unit_size); +out_free: pcpu_free_alloc_info(ai); + if (areas) + free_bootmem(__pa(areas), areas_size); return rc; } #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || @@ -2177,6 +2222,17 @@ void *pcpu_lpage_remapped(void *kaddr) unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); +static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, + size_t align) +{ + return __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); +} + +static void __init pcpu_dfl_fc_free(void *ptr, size_t size) +{ + free_bootmem(__pa(ptr), size); +} + void __init setup_per_cpu_areas(void) { unsigned long delta; @@ -2188,7 +2244,8 @@ void __init setup_per_cpu_areas(void) * what the legacy allocator did. */ rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, - PERCPU_DYNAMIC_RESERVE); + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, + pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); if (rc < 0) panic("Failed to initialized percpu areas."); -- cgit v1.2.3 From e933a73f48e3b2d40cfa56d81e2646f194b5a66a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:53 +0900 Subject: percpu: kill lpage first chunk allocator With x86 converted to embedding allocator, lpage doesn't have any user left. Kill it along with cpa handling code. Signed-off-by: Tejun Heo Cc: Jan Beulich --- Documentation/kernel-parameters.txt | 10 +- arch/x86/mm/pageattr.c | 20 +-- include/linux/percpu.h | 16 --- mm/percpu.c | 241 ------------------------------------ 4 files changed, 6 insertions(+), 281 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index dee9ce2e6cfa..e710093e3d32 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1920,11 +1920,11 @@ and is between 256 and 4096 characters. It is defined in the file See arch/parisc/kernel/pdc_chassis.c percpu_alloc= Select which percpu first chunk allocator to use. - Currently supported values are "embed", "page" and - "lpage". Archs may support subset or none of the - selections. See comments in mm/percpu.c for details - on each allocator. This parameter is primarily for - debugging and performance comparison. + Currently supported values are "embed" and "page". + Archs may support subset or none of the selections. + See comments in mm/percpu.c for details on each + allocator. This parameter is primarily for debugging + and performance comparison. pf. [PARIDE] See Documentation/blockdev/paride.txt. diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dce282f65700..f53cfc7f963d 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -687,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa) { struct cpa_data alias_cpa; unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); - unsigned long vaddr, remapped; + unsigned long vaddr; int ret; if (cpa->pfn >= max_pfn_mapped) @@ -745,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa) } #endif - /* - * If the PMD page was partially used for per-cpu remapping, - * the recycled area needs to be split and modified. Because - * the area is always proper subset of a PMD page - * cpa->numpages is guaranteed to be 1 for these areas, so - * there's no need to loop over and check for further remaps. - */ - remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr); - if (remapped) { - WARN_ON(cpa->numpages > 1); - alias_cpa = *cpa; - alias_cpa.vaddr = &remapped; - alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); - ret = __change_page_attr_set_clr(&alias_cpa, 0); - if (ret) - return ret; - } - return 0; } diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 25359932740e..878836ca999c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -82,7 +82,6 @@ enum pcpu_fc { PCPU_FC_AUTO, PCPU_FC_EMBED, PCPU_FC_PAGE, - PCPU_FC_LPAGE, PCPU_FC_NR, }; @@ -95,7 +94,6 @@ typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); -typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, int nr_units); @@ -124,20 +122,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_populate_pte_fn_t populate_pte_fn); #endif -#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK -extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn, - pcpu_fc_map_fn_t map_fn); - -extern void *pcpu_lpage_remapped(void *kaddr); -#else -static inline void *pcpu_lpage_remapped(void *kaddr) -{ - return NULL; -} -#endif - /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's diff --git a/mm/percpu.c b/mm/percpu.c index c2826d05505c..77933928107d 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1713,7 +1713,6 @@ const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { [PCPU_FC_AUTO] = "auto", [PCPU_FC_EMBED] = "embed", [PCPU_FC_PAGE] = "page", - [PCPU_FC_LPAGE] = "lpage", }; enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO; @@ -1729,10 +1728,6 @@ static int __init percpu_alloc_setup(char *str) #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK else if (!strcmp(str, "page")) pcpu_chosen_fc = PCPU_FC_PAGE; -#endif -#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK - else if (!strcmp(str, "lpage")) - pcpu_chosen_fc = PCPU_FC_LPAGE; #endif else pr_warning("PERCPU: unknown allocator %s specified\n", str); @@ -1970,242 +1965,6 @@ out_free_ar: } #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ -#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK -struct pcpul_ent { - void *ptr; - void *map_addr; -}; - -static size_t pcpul_size; -static size_t pcpul_lpage_size; -static int pcpul_nr_lpages; -static struct pcpul_ent *pcpul_map; - -static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai, - unsigned int *cpup) -{ - int group, cunit; - - for (group = 0, cunit = 0; group < ai->nr_groups; group++) { - const struct pcpu_group_info *gi = &ai->groups[group]; - - if (unit < cunit + gi->nr_units) { - if (cpup) - *cpup = gi->cpu_map[unit - cunit]; - return true; - } - cunit += gi->nr_units; - } - - return false; -} - -static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai) -{ - int group, unit, i; - - for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { - const struct pcpu_group_info *gi = &ai->groups[group]; - - for (i = 0; i < gi->nr_units; i++) - if (gi->cpu_map[i] == cpu) - return unit + i; - } - BUG(); -} - -/** - * pcpu_lpage_first_chunk - remap the first percpu chunk using large page - * @ai: pcpu_alloc_info - * @alloc_fn: function to allocate percpu lpage, always called with lpage_size - * @free_fn: function to free percpu memory, @size <= lpage_size - * @map_fn: function to map percpu lpage, always called with lpage_size - * - * This allocator uses large page to build and map the first chunk. - * Unlike other helpers, the caller should provide fully initialized - * @ai. This can be done using pcpu_build_alloc_info(). This two - * stage initialization is to allow arch code to evaluate the - * parameters before committing to it. - * - * Large pages are allocated as directed by @unit_map and other - * parameters and mapped to vmalloc space. Unused holes are returned - * to the page allocator. Note that these holes end up being actively - * mapped twice - once to the physical mapping and to the vmalloc area - * for the first percpu chunk. Depending on architecture, this might - * cause problem when changing page attributes of the returned area. - * These double mapped areas can be detected using - * pcpu_lpage_remapped(). - * - * RETURNS: - * 0 on success, -errno on failure. - */ -int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn, - pcpu_fc_map_fn_t map_fn) -{ - static struct vm_struct vm; - const size_t lpage_size = ai->atom_size; - size_t chunk_size, map_size; - unsigned int cpu; - int i, j, unit, nr_units, rc; - - nr_units = 0; - for (i = 0; i < ai->nr_groups; i++) - nr_units += ai->groups[i].nr_units; - - chunk_size = ai->unit_size * nr_units; - BUG_ON(chunk_size % lpage_size); - - pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size; - pcpul_lpage_size = lpage_size; - pcpul_nr_lpages = chunk_size / lpage_size; - - /* allocate pointer array and alloc large pages */ - map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]); - pcpul_map = alloc_bootmem(map_size); - - /* allocate all pages */ - for (i = 0; i < pcpul_nr_lpages; i++) { - size_t offset = i * lpage_size; - int first_unit = offset / ai->unit_size; - int last_unit = (offset + lpage_size - 1) / ai->unit_size; - void *ptr; - - /* find out which cpu is mapped to this unit */ - for (unit = first_unit; unit <= last_unit; unit++) - if (pcpul_unit_to_cpu(unit, ai, &cpu)) - goto found; - continue; - found: - ptr = alloc_fn(cpu, lpage_size, lpage_size); - if (!ptr) { - pr_warning("PERCPU: failed to allocate large page " - "for cpu%u\n", cpu); - goto enomem; - } - - pcpul_map[i].ptr = ptr; - } - - /* return unused holes */ - for (unit = 0; unit < nr_units; unit++) { - size_t start = unit * ai->unit_size; - size_t end = start + ai->unit_size; - size_t off, next; - - /* don't free used part of occupied unit */ - if (pcpul_unit_to_cpu(unit, ai, NULL)) - start += pcpul_size; - - /* unit can span more than one page, punch the holes */ - for (off = start; off < end; off = next) { - void *ptr = pcpul_map[off / lpage_size].ptr; - next = min(roundup(off + 1, lpage_size), end); - if (ptr) - free_fn(ptr + off % lpage_size, next - off); - } - } - - /* allocate address, map and copy */ - vm.flags = VM_ALLOC; - vm.size = chunk_size; - vm_area_register_early(&vm, ai->unit_size); - - for (i = 0; i < pcpul_nr_lpages; i++) { - if (!pcpul_map[i].ptr) - continue; - pcpul_map[i].map_addr = vm.addr + i * lpage_size; - map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr); - } - - for_each_possible_cpu(cpu) - memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size, - __per_cpu_load, ai->static_size); - - /* we're ready, commit */ - pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n", - vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size, - ai->unit_size); - - rc = pcpu_setup_first_chunk(ai, vm.addr); - - /* - * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped - * lpages are pushed to the end and trimmed. - */ - for (i = 0; i < pcpul_nr_lpages - 1; i++) - for (j = i + 1; j < pcpul_nr_lpages; j++) { - struct pcpul_ent tmp; - - if (!pcpul_map[j].ptr) - continue; - if (pcpul_map[i].ptr && - pcpul_map[i].ptr < pcpul_map[j].ptr) - continue; - - tmp = pcpul_map[i]; - pcpul_map[i] = pcpul_map[j]; - pcpul_map[j] = tmp; - } - - while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr) - pcpul_nr_lpages--; - - return rc; - -enomem: - for (i = 0; i < pcpul_nr_lpages; i++) - if (pcpul_map[i].ptr) - free_fn(pcpul_map[i].ptr, lpage_size); - free_bootmem(__pa(pcpul_map), map_size); - return -ENOMEM; -} - -/** - * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area - * @kaddr: the kernel address in question - * - * Determine whether @kaddr falls in the pcpul recycled area. This is - * used by pageattr to detect VM aliases and break up the pcpu large - * page mapping such that the same physical page is not mapped under - * different attributes. - * - * The recycled area is always at the tail of a partially used large - * page. - * - * RETURNS: - * Address of corresponding remapped pcpu address if match is found; - * otherwise, NULL. - */ -void *pcpu_lpage_remapped(void *kaddr) -{ - unsigned long lpage_mask = pcpul_lpage_size - 1; - void *lpage_addr = (void *)((unsigned long)kaddr & ~lpage_mask); - unsigned long offset = (unsigned long)kaddr & lpage_mask; - int left = 0, right = pcpul_nr_lpages - 1; - int pos; - - /* pcpul in use at all? */ - if (!pcpul_map) - return NULL; - - /* okay, perform binary search */ - while (left <= right) { - pos = (left + right) / 2; - - if (pcpul_map[pos].ptr < lpage_addr) - left = pos + 1; - else if (pcpul_map[pos].ptr > lpage_addr) - right = pos - 1; - else - return pcpul_map[pos].map_addr + offset; - } - - return NULL; -} -#endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */ - /* * Generic percpu area setup. * -- cgit v1.2.3 From 31089c13bcb18d2cd2a3ddfbe3a28666346f237e Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 14 Aug 2009 15:47:18 +0200 Subject: timekeeping: Introduce timekeeping_leap_insert Move the adjustment of xtime, wall_to_monotonic and the update of the vsyscall variables to the timekeeping code. Signed-off-by: John Stultz Signed-off-by: Martin Schwidefsky LKML-Reference: <20090814134807.609730216@de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/time.h | 1 + kernel/time/ntp.c | 7 ++----- kernel/time/timekeeping.c | 7 +++++++ 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/time.h b/include/linux/time.h index ea16c1a01d51..e7c844558884 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -147,6 +147,7 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern int timekeeping_valid_for_hres(void); extern void update_wall_time(void); extern void update_xtime_cache(u64 nsec); +extern void timekeeping_leap_insert(int leapsecond); struct tms; extern void do_sys_times(struct tms *); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 7fc64375ff43..4800f933910e 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -194,8 +194,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) case TIME_OK: break; case TIME_INS: - xtime.tv_sec--; - wall_to_monotonic.tv_sec++; + timekeeping_leap_insert(-1); time_state = TIME_OOP; printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); @@ -203,9 +202,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) res = HRTIMER_RESTART; break; case TIME_DEL: - xtime.tv_sec++; + timekeeping_leap_insert(1); time_tai--; - wall_to_monotonic.tv_sec--; time_state = TIME_WAIT; printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); @@ -219,7 +217,6 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) time_state = TIME_OK; break; } - update_vsyscall(&xtime, clock); write_sequnlock(&xtime_lock); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 02c0b2c9c674..b8b70fb545fc 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -58,6 +58,13 @@ void update_xtime_cache(u64 nsec) struct clocksource *clock; +/* must hold xtime_lock */ +void timekeeping_leap_insert(int leapsecond) +{ + xtime.tv_sec += leapsecond; + wall_to_monotonic.tv_sec -= leapsecond; + update_vsyscall(&xtime, clock); +} #ifdef CONFIG_GENERIC_TIME /** -- cgit v1.2.3 From a0f7d48bfb95a4c5172a2756dbc4b82afc8e9ae4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:19 +0200 Subject: timekeeping: Remove clocksource inline functions The three inline functions clocksource_read, clocksource_enable and clocksource_disable are simple wrappers of an indirect call plus the copy from and to the mult_orig value. The functions are exclusively used by the timekeeping code which has intimate knowledge of the clocksource anyway. Therefore remove the inline functions. No functional change. Signed-off-by: Martin Schwidefsky Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134807.903108946@de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 58 --------------------------------------------- kernel/time/timekeeping.c | 41 ++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 71 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 1219be4fb42e..a1ef46f61c81 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -267,64 +267,6 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) return (u32)tmp; } -/** - * clocksource_read: - Access the clocksource's current cycle value - * @cs: pointer to clocksource being read - * - * Uses the clocksource to return the current cycle_t value - */ -static inline cycle_t clocksource_read(struct clocksource *cs) -{ - return cs->read(cs); -} - -/** - * clocksource_enable: - enable clocksource - * @cs: pointer to clocksource - * - * Enables the specified clocksource. The clocksource callback - * function should start up the hardware and setup mult and field - * members of struct clocksource to reflect hardware capabilities. - */ -static inline int clocksource_enable(struct clocksource *cs) -{ - int ret = 0; - - if (cs->enable) - ret = cs->enable(cs); - - /* - * The frequency may have changed while the clocksource - * was disabled. If so the code in ->enable() must update - * the mult value to reflect the new frequency. Make sure - * mult_orig follows this change. - */ - cs->mult_orig = cs->mult; - - return ret; -} - -/** - * clocksource_disable: - disable clocksource - * @cs: pointer to clocksource - * - * Disables the specified clocksource. The clocksource callback - * function should power down the now unused hardware block to - * save power. - */ -static inline void clocksource_disable(struct clocksource *cs) -{ - /* - * Save mult_orig in mult so clocksource_enable() can - * restore the value regardless if ->enable() updates - * the value of mult or not. - */ - cs->mult = cs->mult_orig; - - if (cs->disable) - cs->disable(cs); -} - /** * cyc2ns - converts clocksource cycles to nanoseconds * @cs: Pointer to clocksource diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b8b70fb545fc..016a2591d719 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -79,7 +79,7 @@ static void clocksource_forward_now(void) cycle_t cycle_now, cycle_delta; s64 nsec; - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; @@ -114,7 +114,7 @@ void getnstimeofday(struct timespec *ts) *ts = xtime; /* read clocksource: */ - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; @@ -146,7 +146,7 @@ ktime_t ktime_get(void) nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec; /* read clocksource: */ - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; @@ -186,7 +186,7 @@ void ktime_get_ts(struct timespec *ts) tomono = wall_to_monotonic; /* read clocksource: */ - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; @@ -274,16 +274,29 @@ static void change_clocksource(void) clocksource_forward_now(); - if (clocksource_enable(new)) + if (new->enable && !new->enable(new)) return; + /* + * The frequency may have changed while the clocksource + * was disabled. If so the code in ->enable() must update + * the mult value to reflect the new frequency. Make sure + * mult_orig follows this change. + */ + new->mult_orig = new->mult; new->raw_time = clock->raw_time; old = clock; clock = new; - clocksource_disable(old); + /* + * Save mult_orig in mult so that the value can be restored + * regardless if ->enable() updates the value of mult or not. + */ + old->mult = old->mult_orig; + if (old->disable) + old->disable(old); clock->cycle_last = 0; - clock->cycle_last = clocksource_read(clock); + clock->cycle_last = clock->read(clock); clock->error = 0; clock->xtime_nsec = 0; clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); @@ -373,7 +386,7 @@ void getrawmonotonic(struct timespec *ts) seq = read_seqbegin(&xtime_lock); /* read clocksource: */ - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; @@ -435,9 +448,12 @@ void __init timekeeping_init(void) ntp_init(); clock = clocksource_get_next(); - clocksource_enable(clock); + if (clock->enable) + clock->enable(clock); + /* set mult_orig on enable */ + clock->mult_orig = clock->mult; clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); - clock->cycle_last = clocksource_read(clock); + clock->cycle_last = clock->read(clock); xtime.tv_sec = sec; xtime.tv_nsec = 0; @@ -477,8 +493,7 @@ static int timekeeping_resume(struct sys_device *dev) } update_xtime_cache(0); /* re-base the last cycle value */ - clock->cycle_last = 0; - clock->cycle_last = clocksource_read(clock); + clock->cycle_last = clock->read(clock); clock->error = 0; timekeeping_suspended = 0; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -630,7 +645,7 @@ void update_wall_time(void) return; #ifdef CONFIG_GENERIC_TIME - offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; + offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #else offset = clock->cycle_interval; #endif -- cgit v1.2.3 From f1b82746c1e93daf24e1ab9bfbd39bcdb2e7018b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:21 +0200 Subject: clocksource: Cleanup clocksource selection If a non high-resolution clocksource is first set as override clock and then registered it becomes active even if the system is in one-shot mode. Move the override check from sysfs_override_clocksource to the clocksource selection. That fixes the bug and simplifies the code. The check in clocksource_register for double registration of the same clocksource is removed without replacement. To find the initial clocksource a new weak function in jiffies.c is defined that returns the jiffies clocksource. The architecture code can then override the weak function with a more suitable clocksource, e.g. the TOD clock on s390. [ tglx: Folded in a fix from John Stultz ] Signed-off-by: Martin Schwidefsky Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134808.388024160@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/s390/kernel/time.c | 4 ++ include/linux/clocksource.h | 2 + kernel/time/clocksource.c | 134 +++++++++++++++++--------------------------- kernel/time/jiffies.c | 6 +- kernel/time/timekeeping.c | 4 +- 5 files changed, 64 insertions(+), 86 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index d4c8e9c47c81..afefe514df0f 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -205,6 +205,10 @@ static struct clocksource clocksource_tod = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +struct clocksource * __init clocksource_default_clock(void) +{ + return &clocksource_tod; +} void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) { diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index a1ef46f61c81..f263b3abf46e 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -322,6 +323,7 @@ extern void clocksource_touch_watchdog(void); extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_resume(void); +extern struct clocksource * __init __weak clocksource_default_clock(void); #ifdef CONFIG_GENERIC_TIME_VSYSCALL extern void update_vsyscall(struct timespec *ts, struct clocksource *c); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7466cb811251..e91662e87cde 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -21,7 +21,6 @@ * * TODO WishList: * o Allow clocksource drivers to be unregistered - * o get rid of clocksource_jiffies extern */ #include @@ -107,12 +106,9 @@ u64 timecounter_cyc2time(struct timecounter *tc, } EXPORT_SYMBOL(timecounter_cyc2time); -/* XXX - Would like a better way for initializing curr_clocksource */ -extern struct clocksource clocksource_jiffies; - /*[Clocksource internal variables]--------- * curr_clocksource: - * currently selected clocksource. Initialized to clocksource_jiffies. + * currently selected clocksource. * next_clocksource: * pending next selected clocksource. * clocksource_list: @@ -123,9 +119,8 @@ extern struct clocksource clocksource_jiffies; * override_name: * Name of the user-specified clocksource. */ -static struct clocksource *curr_clocksource = &clocksource_jiffies; +static struct clocksource *curr_clocksource; static struct clocksource *next_clocksource; -static struct clocksource *clocksource_override; static LIST_HEAD(clocksource_list); static DEFINE_SPINLOCK(clocksource_lock); static char override_name[32]; @@ -320,6 +315,7 @@ void clocksource_touch_watchdog(void) clocksource_resume_watchdog(); } +#ifdef CONFIG_GENERIC_TIME /** * clocksource_get_next - Returns the selected clocksource * @@ -339,56 +335,65 @@ struct clocksource *clocksource_get_next(void) } /** - * select_clocksource - Selects the best registered clocksource. + * clocksource_select - Select the best clocksource available * * Private function. Must hold clocksource_lock when called. * * Select the clocksource with the best rating, or the clocksource, * which is selected by userspace override. */ -static struct clocksource *select_clocksource(void) +static void clocksource_select(void) { - struct clocksource *next; + struct clocksource *best, *cs; if (list_empty(&clocksource_list)) - return NULL; + return; + /* First clocksource on the list has the best rating. */ + best = list_first_entry(&clocksource_list, struct clocksource, list); + /* Check for the override clocksource. */ + list_for_each_entry(cs, &clocksource_list, list) { + if (strcmp(cs->name, override_name) != 0) + continue; + /* + * Check to make sure we don't switch to a non-highres + * capable clocksource if the tick code is in oneshot + * mode (highres or nohz) + */ + if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && + tick_oneshot_mode_active()) { + /* Override clocksource cannot be used. */ + printk(KERN_WARNING "Override clocksource %s is not " + "HRT compatible. Cannot switch while in " + "HRT/NOHZ mode\n", cs->name); + override_name[0] = 0; + } else + /* Override clocksource can be used. */ + best = cs; + break; + } + if (curr_clocksource != best) + next_clocksource = best; +} - if (clocksource_override) - next = clocksource_override; - else - next = list_entry(clocksource_list.next, struct clocksource, - list); +#else /* CONFIG_GENERIC_TIME */ - if (next == curr_clocksource) - return NULL; +static void clocksource_select(void) { } - return next; -} +#endif /* * Enqueue the clocksource sorted by rating */ -static int clocksource_enqueue(struct clocksource *c) +static void clocksource_enqueue(struct clocksource *cs) { - struct list_head *tmp, *entry = &clocksource_list; - - list_for_each(tmp, &clocksource_list) { - struct clocksource *cs; + struct list_head *entry = &clocksource_list; + struct clocksource *tmp; - cs = list_entry(tmp, struct clocksource, list); - if (cs == c) - return -EBUSY; + list_for_each_entry(tmp, &clocksource_list, list) /* Keep track of the place, where to insert */ - if (cs->rating >= c->rating) - entry = tmp; - } - list_add(&c->list, entry); - - if (strlen(c->name) == strlen(override_name) && - !strcmp(c->name, override_name)) - clocksource_override = c; - - return 0; + if (tmp->rating >= cs->rating) + entry = &tmp->list; + list_add(&cs->list, entry); } /** @@ -397,19 +402,16 @@ static int clocksource_enqueue(struct clocksource *c) * * Returns -EBUSY if registration fails, zero otherwise. */ -int clocksource_register(struct clocksource *c) +int clocksource_register(struct clocksource *cs) { unsigned long flags; - int ret; spin_lock_irqsave(&clocksource_lock, flags); - ret = clocksource_enqueue(c); - if (!ret) - next_clocksource = select_clocksource(); + clocksource_enqueue(cs); + clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); - if (!ret) - clocksource_check_watchdog(c); - return ret; + clocksource_check_watchdog(cs); + return 0; } EXPORT_SYMBOL(clocksource_register); @@ -425,7 +427,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating) list_del(&cs->list); cs->rating = rating; clocksource_enqueue(cs); - next_clocksource = select_clocksource(); + clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); } @@ -438,9 +440,7 @@ void clocksource_unregister(struct clocksource *cs) spin_lock_irqsave(&clocksource_lock, flags); list_del(&cs->list); - if (clocksource_override == cs) - clocksource_override = NULL; - next_clocksource = select_clocksource(); + clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); } @@ -478,9 +478,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, struct sysdev_attribute *attr, const char *buf, size_t count) { - struct clocksource *ovr = NULL; size_t ret = count; - int len; /* strings from sysfs write are not 0 terminated! */ if (count >= sizeof(override_name)) @@ -495,37 +493,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, if (count > 0) memcpy(override_name, buf, count); override_name[count] = 0; - - len = strlen(override_name); - if (len) { - struct clocksource *cs; - - ovr = clocksource_override; - /* try to select it: */ - list_for_each_entry(cs, &clocksource_list, list) { - if (strlen(cs->name) == len && - !strcmp(cs->name, override_name)) - ovr = cs; - } - } - - /* - * Check to make sure we don't switch to a non-highres capable - * clocksource if the tick code is in oneshot mode (highres or nohz) - */ - if (tick_oneshot_mode_active() && ovr && - !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) { - printk(KERN_WARNING "%s clocksource is not HRT compatible. " - "Cannot switch while in HRT/NOHZ mode\n", ovr->name); - ovr = NULL; - override_name[0] = 0; - } - - /* Reselect, when the override name has changed */ - if (ovr != clocksource_override) { - clocksource_override = ovr; - next_clocksource = select_clocksource(); - } + clocksource_select(); spin_unlock_irq(&clocksource_lock); diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index c3f6c30816e3..5404a8456909 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -61,7 +61,6 @@ struct clocksource clocksource_jiffies = { .read = jiffies_read, .mask = 0xffffffff, /*32bits*/ .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ - .mult_orig = NSEC_PER_JIFFY << JIFFIES_SHIFT, .shift = JIFFIES_SHIFT, }; @@ -71,3 +70,8 @@ static int __init init_jiffies_clocksource(void) } core_initcall(init_jiffies_clocksource); + +struct clocksource * __init __weak clocksource_default_clock(void) +{ + return &clocksource_jiffies; +} diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b5673016089f..325a9b63265a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -269,7 +269,7 @@ static void change_clocksource(void) new = clocksource_get_next(); - if (clock == new) + if (!new || clock == new) return; clocksource_forward_now(); @@ -446,7 +446,7 @@ void __init timekeeping_init(void) ntp_init(); - clock = clocksource_get_next(); + clock = clocksource_default_clock(); if (clock->enable) clock->enable(clock); /* set mult_orig on enable */ -- cgit v1.2.3 From c55c87c892c1875deace0c8fc28787335277fdf2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:25 +0200 Subject: clocksource: Move watchdog downgrade to a work queue thread Move the downgrade of an unstable clocksource from the timer interrupt context into the process context of a work queue thread. This is needed to be able to do the clocksource switch with stop_machine. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134809.354926067@de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 1 + kernel/time/clocksource.c | 56 +++++++++++++++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index f263b3abf46e..19ad43af62d0 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -213,6 +213,7 @@ extern struct clocksource *clock; /* current clocksource */ #define CLOCK_SOURCE_WATCHDOG 0x10 #define CLOCK_SOURCE_VALID_FOR_HRES 0x20 +#define CLOCK_SOURCE_UNSTABLE 0x40 /* simplify initialization of mask field */ #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 56aaa749645d..f1508019bfb4 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -143,10 +143,13 @@ fs_initcall(clocksource_done_booting); static LIST_HEAD(watchdog_list); static struct clocksource *watchdog; static struct timer_list watchdog_timer; +static struct work_struct watchdog_work; static DEFINE_SPINLOCK(watchdog_lock); static cycle_t watchdog_last; static int watchdog_running; +static void clocksource_watchdog_work(struct work_struct *work); + /* * Interval: 0.5sec Threshold: 0.0625s */ @@ -158,15 +161,16 @@ static void clocksource_unstable(struct clocksource *cs, int64_t delta) printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", cs->name, delta); cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); - clocksource_change_rating(cs, 0); - list_del(&cs->wd_list); + cs->flags |= CLOCK_SOURCE_UNSTABLE; + schedule_work(&watchdog_work); } static void clocksource_watchdog(unsigned long data) { - struct clocksource *cs, *tmp; + struct clocksource *cs; cycle_t csnow, wdnow; int64_t wd_nsec, cs_nsec; + int next_cpu; spin_lock(&watchdog_lock); if (!watchdog_running) @@ -176,7 +180,12 @@ static void clocksource_watchdog(unsigned long data) wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); watchdog_last = wdnow; - list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { + list_for_each_entry(cs, &watchdog_list, wd_list) { + + /* Clocksource already marked unstable? */ + if (cs->flags & CLOCK_SOURCE_UNSTABLE) + continue; + csnow = cs->read(cs); /* Clocksource initialized ? */ @@ -207,19 +216,15 @@ static void clocksource_watchdog(unsigned long data) } } - if (!list_empty(&watchdog_list)) { - /* - * Cycle through CPUs to check if the CPUs stay - * synchronized to each other. - */ - int next_cpu = cpumask_next(raw_smp_processor_id(), - cpu_online_mask); - - if (next_cpu >= nr_cpu_ids) - next_cpu = cpumask_first(cpu_online_mask); - watchdog_timer.expires += WATCHDOG_INTERVAL; - add_timer_on(&watchdog_timer, next_cpu); - } + /* + * Cycle through CPUs to check if the CPUs stay synchronized + * to each other. + */ + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); out: spin_unlock(&watchdog_lock); } @@ -228,6 +233,7 @@ static inline void clocksource_start_watchdog(void) { if (watchdog_running || !watchdog || list_empty(&watchdog_list)) return; + INIT_WORK(&watchdog_work, clocksource_watchdog_work); init_timer(&watchdog_timer); watchdog_timer.function = clocksource_watchdog; watchdog_last = watchdog->read(watchdog); @@ -313,6 +319,22 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs) spin_unlock_irqrestore(&watchdog_lock, flags); } +static void clocksource_watchdog_work(struct work_struct *work) +{ + struct clocksource *cs, *tmp; + unsigned long flags; + + spin_lock_irqsave(&watchdog_lock, flags); + list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) + if (cs->flags & CLOCK_SOURCE_UNSTABLE) { + list_del_init(&cs->wd_list); + clocksource_change_rating(cs, 0); + } + /* Check if the watchdog timer needs to be stopped. */ + clocksource_stop_watchdog(); + spin_unlock(&watchdog_lock); +} + #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ static void clocksource_enqueue_watchdog(struct clocksource *cs) -- cgit v1.2.3 From 155ec60226ae0ae2aadaa57c951a58a359331030 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:26 +0200 Subject: timekeeping: Introduce struct timekeeper Add struct timekeeper to keep the internal values timekeeping.c needs in regard to the currently selected clock source. This moves the timekeeping intervals, xtime_nsec and the ntp error value from struct clocksource to struct timekeeper. The raw_time is removed from the clocksource as well. It gets treated like xtime as a global variable. Eventually xtime raw_time should be moved to struct timekeeper. [ tglx: minor cleanup ] Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134809.613209842@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/s390/kernel/time.c | 1 - include/linux/clocksource.h | 54 +--------- kernel/time/clocksource.c | 6 +- kernel/time/timekeeping.c | 235 +++++++++++++++++++++++++++++--------------- 4 files changed, 164 insertions(+), 132 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index afefe514df0f..e76c2e7a8b9a 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -280,7 +280,6 @@ void __init time_init(void) now = get_clock(); tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime); clocksource_tod.cycle_last = now; - clocksource_tod.raw_time = xtime; tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts); set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec); write_sequnlock_irqrestore(&xtime_lock, flags); diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 19ad43af62d0..e12e3095e2fb 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -155,8 +155,6 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @flags: flags describing special properties * @vread: vsyscall based read * @resume: resume function for the clocksource, if necessary - * @cycle_interval: Used internally by timekeeping core, please ignore. - * @xtime_interval: Used internally by timekeeping core, please ignore. */ struct clocksource { /* @@ -182,19 +180,12 @@ struct clocksource { #define CLKSRC_FSYS_MMIO_SET(mmio, addr) do { } while (0) #endif - /* timekeeping specific data, ignore */ - cycle_t cycle_interval; - u64 xtime_interval; - u32 raw_interval; /* * Second part is written at each timer interrupt * Keep it in a different cache line to dirty no * more than one cache line. */ cycle_t cycle_last ____cacheline_aligned_in_smp; - u64 xtime_nsec; - s64 error; - struct timespec raw_time; #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ @@ -203,8 +194,6 @@ struct clocksource { #endif }; -extern struct clocksource *clock; /* current clocksource */ - /* * Clock source flags bits:: */ @@ -270,50 +259,15 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) } /** - * cyc2ns - converts clocksource cycles to nanoseconds - * @cs: Pointer to clocksource - * @cycles: Cycles + * clocksource_cyc2ns - converts clocksource cycles to nanoseconds * - * Uses the clocksource and ntp ajdustment to convert cycle_ts to nanoseconds. + * Converts cycles to nanoseconds, using the given mult and shift. * * XXX - This could use some mult_lxl_ll() asm optimization */ -static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles) +static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) { - u64 ret = (u64)cycles; - ret = (ret * cs->mult) >> cs->shift; - return ret; -} - -/** - * clocksource_calculate_interval - Calculates a clocksource interval struct - * - * @c: Pointer to clocksource. - * @length_nsec: Desired interval length in nanoseconds. - * - * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment - * pair and interval request. - * - * Unless you're the timekeeping code, you should not be using this! - */ -static inline void clocksource_calculate_interval(struct clocksource *c, - unsigned long length_nsec) -{ - u64 tmp; - - /* Do the ns -> cycle conversion first, using original mult */ - tmp = length_nsec; - tmp <<= c->shift; - tmp += c->mult_orig/2; - do_div(tmp, c->mult_orig); - - c->cycle_interval = (cycle_t)tmp; - if (c->cycle_interval == 0) - c->cycle_interval = 1; - - /* Go back from cycles -> shifted ns, this time use ntp adjused mult */ - c->xtime_interval = (u64)c->cycle_interval * c->mult; - c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift; + return ((u64) cycles * mult) >> shift; } diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index f1508019bfb4..f18c9a6bdcf4 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -177,7 +177,8 @@ static void clocksource_watchdog(unsigned long data) goto out; wdnow = watchdog->read(watchdog); - wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); + wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask, + watchdog->mult, watchdog->shift); watchdog_last = wdnow; list_for_each_entry(cs, &watchdog_list, wd_list) { @@ -196,7 +197,8 @@ static void clocksource_watchdog(unsigned long data) } /* Check the deviation from the watchdog clocksource. */ - cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask); + cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) & + cs->mask, cs->mult, cs->shift); cs->wd_last = csnow; if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { clocksource_unstable(cs, cs_nsec - wd_nsec); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 325a9b63265a..7af45cbf6b13 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -19,6 +19,65 @@ #include #include +/* Structure holding internal timekeeping values. */ +struct timekeeper { + /* Current clocksource used for timekeeping. */ + struct clocksource *clock; + + /* Number of clock cycles in one NTP interval. */ + cycle_t cycle_interval; + /* Number of clock shifted nano seconds in one NTP interval. */ + u64 xtime_interval; + /* Raw nano seconds accumulated per NTP interval. */ + u32 raw_interval; + + /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */ + u64 xtime_nsec; + /* Difference between accumulated time and NTP time in ntp + * shifted nano seconds. */ + s64 ntp_error; +}; + +struct timekeeper timekeeper; + +/** + * timekeeper_setup_internals - Set up internals to use clocksource clock. + * + * @clock: Pointer to clocksource. + * + * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment + * pair and interval request. + * + * Unless you're the timekeeping code, you should not be using this! + */ +static void timekeeper_setup_internals(struct clocksource *clock) +{ + cycle_t interval; + u64 tmp; + + timekeeper.clock = clock; + clock->cycle_last = clock->read(clock); + + /* Do the ns -> cycle conversion first, using original mult */ + tmp = NTP_INTERVAL_LENGTH; + tmp <<= clock->shift; + tmp += clock->mult_orig/2; + do_div(tmp, clock->mult_orig); + if (tmp == 0) + tmp = 1; + + interval = (cycle_t) tmp; + timekeeper.cycle_interval = interval; + + /* Go back from cycles -> shifted ns */ + timekeeper.xtime_interval = (u64) interval * clock->mult; + timekeeper.raw_interval = + ((u64) interval * clock->mult_orig) >> clock->shift; + + timekeeper.xtime_nsec = 0; + + timekeeper.ntp_error = 0; +} /* * This read-write spinlock protects us from races in SMP while @@ -46,6 +105,11 @@ struct timespec xtime __attribute__ ((aligned (16))); struct timespec wall_to_monotonic __attribute__ ((aligned (16))); static unsigned long total_sleep_time; /* seconds */ +/* + * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. + */ +struct timespec raw_time; + /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -56,42 +120,42 @@ void update_xtime_cache(u64 nsec) timespec_add_ns(&xtime_cache, nsec); } -struct clocksource *clock; - /* must hold xtime_lock */ void timekeeping_leap_insert(int leapsecond) { xtime.tv_sec += leapsecond; wall_to_monotonic.tv_sec -= leapsecond; - update_vsyscall(&xtime, clock); + update_vsyscall(&xtime, timekeeper.clock); } #ifdef CONFIG_GENERIC_TIME /** - * clocksource_forward_now - update clock to the current time + * timekeeping_forward_now - update clock to the current time * * Forward the current clock to update its state since the last call to * update_wall_time(). This is useful before significant clock changes, * as it avoids having to deal with this time offset explicitly. */ -static void clocksource_forward_now(void) +static void timekeeping_forward_now(void) { cycle_t cycle_now, cycle_delta; + struct clocksource *clock; s64 nsec; + clock = timekeeper.clock; cycle_now = clock->read(clock); cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; - nsec = cyc2ns(clock, cycle_delta); + nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); /* If arch requires, add in gettimeoffset() */ nsec += arch_gettimeoffset(); timespec_add_ns(&xtime, nsec); - nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; - clock->raw_time.tv_nsec += nsec; + nsec = clocksource_cyc2ns(cycle_delta, clock->mult_orig, clock->shift); + timespec_add_ns(&raw_time, nsec); } /** @@ -103,6 +167,7 @@ static void clocksource_forward_now(void) void getnstimeofday(struct timespec *ts) { cycle_t cycle_now, cycle_delta; + struct clocksource *clock; unsigned long seq; s64 nsecs; @@ -114,13 +179,15 @@ void getnstimeofday(struct timespec *ts) *ts = xtime; /* read clocksource: */ + clock = timekeeper.clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = cyc2ns(clock, cycle_delta); + nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, + clock->shift); /* If arch requires, add in gettimeoffset() */ nsecs += arch_gettimeoffset(); @@ -135,6 +202,7 @@ EXPORT_SYMBOL(getnstimeofday); ktime_t ktime_get(void) { cycle_t cycle_now, cycle_delta; + struct clocksource *clock; unsigned int seq; s64 secs, nsecs; @@ -146,13 +214,15 @@ ktime_t ktime_get(void) nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec; /* read clocksource: */ + clock = timekeeper.clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs += cyc2ns(clock, cycle_delta); + nsecs += clocksource_cyc2ns(cycle_delta, clock->mult, + clock->shift); } while (read_seqretry(&xtime_lock, seq)); /* @@ -174,6 +244,7 @@ EXPORT_SYMBOL_GPL(ktime_get); void ktime_get_ts(struct timespec *ts) { cycle_t cycle_now, cycle_delta; + struct clocksource *clock; struct timespec tomono; unsigned int seq; s64 nsecs; @@ -186,13 +257,15 @@ void ktime_get_ts(struct timespec *ts) tomono = wall_to_monotonic; /* read clocksource: */ + clock = timekeeper.clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = cyc2ns(clock, cycle_delta); + nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, + clock->shift); } while (read_seqretry(&xtime_lock, seq)); @@ -233,7 +306,7 @@ int do_settimeofday(struct timespec *tv) write_seqlock_irqsave(&xtime_lock, flags); - clocksource_forward_now(); + timekeeping_forward_now(); ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec; ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec; @@ -243,10 +316,10 @@ int do_settimeofday(struct timespec *tv) update_xtime_cache(0); - clock->error = 0; + timekeeper.ntp_error = 0; ntp_clear(); - update_vsyscall(&xtime, clock); + update_vsyscall(&xtime, timekeeper.clock); write_sequnlock_irqrestore(&xtime_lock, flags); @@ -269,10 +342,10 @@ static void change_clocksource(void) new = clocksource_get_next(); - if (!new || clock == new) + if (!new || timekeeper.clock == new) return; - clocksource_forward_now(); + timekeeping_forward_now(); if (new->enable && !new->enable(new)) return; @@ -284,9 +357,9 @@ static void change_clocksource(void) */ new->mult_orig = new->mult; - new->raw_time = clock->raw_time; - old = clock; - clock = new; + old = timekeeper.clock; + timekeeper_setup_internals(new); + /* * Save mult_orig in mult so that the value can be restored * regardless if ->enable() updates the value of mult or not. @@ -295,22 +368,10 @@ static void change_clocksource(void) if (old->disable) old->disable(old); - clock->cycle_last = clock->read(clock); - clock->error = 0; - clock->xtime_nsec = 0; - clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); - tick_clock_notify(); - - /* - * We're holding xtime lock and waking up klogd would deadlock - * us on enqueue. So no printing! - printk(KERN_INFO "Time: %s clocksource has been installed.\n", - clock->name); - */ } #else /* GENERIC_TIME */ -static inline void clocksource_forward_now(void) { } +static inline void timekeeping_forward_now(void) { } static inline void change_clocksource(void) { } /** @@ -380,20 +441,23 @@ void getrawmonotonic(struct timespec *ts) unsigned long seq; s64 nsecs; cycle_t cycle_now, cycle_delta; + struct clocksource *clock; do { seq = read_seqbegin(&xtime_lock); /* read clocksource: */ + clock = timekeeper.clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; + nsecs = clocksource_cyc2ns(cycle_delta, clock->mult_orig, + clock->shift); - *ts = clock->raw_time; + *ts = raw_time; } while (read_seqretry(&xtime_lock, seq)); @@ -413,7 +477,7 @@ int timekeeping_valid_for_hres(void) do { seq = read_seqbegin(&xtime_lock); - ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; + ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; } while (read_seqretry(&xtime_lock, seq)); @@ -439,6 +503,7 @@ unsigned long __attribute__((weak)) read_persistent_clock(void) */ void __init timekeeping_init(void) { + struct clocksource *clock; unsigned long flags; unsigned long sec = read_persistent_clock(); @@ -451,11 +516,13 @@ void __init timekeeping_init(void) clock->enable(clock); /* set mult_orig on enable */ clock->mult_orig = clock->mult; - clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); - clock->cycle_last = clock->read(clock); + + timekeeper_setup_internals(clock); xtime.tv_sec = sec; xtime.tv_nsec = 0; + raw_time.tv_sec = 0; + raw_time.tv_nsec = 0; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); update_xtime_cache(0); @@ -492,8 +559,8 @@ static int timekeeping_resume(struct sys_device *dev) } update_xtime_cache(0); /* re-base the last cycle value */ - clock->cycle_last = clock->read(clock); - clock->error = 0; + timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); + timekeeper.ntp_error = 0; timekeeping_suspended = 0; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -514,7 +581,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) timekeeping_suspend_time = read_persistent_clock(); write_seqlock_irqsave(&xtime_lock, flags); - clocksource_forward_now(); + timekeeping_forward_now(); timekeeping_suspended = 1; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -549,7 +616,7 @@ device_initcall(timekeeping_init_device); * If the error is already larger, we look ahead even further * to compensate for late or lost adjustments. */ -static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, +static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, s64 *offset) { s64 tick_error, i; @@ -565,7 +632,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * here. This is tuned so that an error of about 1 msec is adjusted * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). */ - error2 = clock->error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); + error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); error2 = abs(error2); for (look_ahead = 0; error2 > 0; look_ahead++) error2 >>= 2; @@ -574,8 +641,9 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * Now calculate the error in (1 << look_ahead) ticks, but first * remove the single look ahead already included in the error. */ - tick_error = tick_length >> (NTP_SCALE_SHIFT - clock->shift + 1); - tick_error -= clock->xtime_interval >> 1; + tick_error = tick_length >> + (NTP_SCALE_SHIFT - timekeeper.clock->shift + 1); + tick_error -= timekeeper.xtime_interval >> 1; error = ((error - tick_error) >> look_ahead) + tick_error; /* Finally calculate the adjustment shift value. */ @@ -600,18 +668,19 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * this is optimized for the most common adjustments of -1,0,1, * for other values we can do a bit more work. */ -static void clocksource_adjust(s64 offset) +static void timekeeping_adjust(s64 offset) { - s64 error, interval = clock->cycle_interval; + s64 error, interval = timekeeper.cycle_interval; int adj; - error = clock->error >> (NTP_SCALE_SHIFT - clock->shift - 1); + error = timekeeper.ntp_error >> + (NTP_SCALE_SHIFT - timekeeper.clock->shift - 1); if (error > interval) { error >>= 2; if (likely(error <= interval)) adj = 1; else - adj = clocksource_bigadjust(error, &interval, &offset); + adj = timekeeping_bigadjust(error, &interval, &offset); } else if (error < -interval) { error >>= 2; if (likely(error >= -interval)) { @@ -619,15 +688,15 @@ static void clocksource_adjust(s64 offset) interval = -interval; offset = -offset; } else - adj = clocksource_bigadjust(error, &interval, &offset); + adj = timekeeping_bigadjust(error, &interval, &offset); } else return; - clock->mult += adj; - clock->xtime_interval += interval; - clock->xtime_nsec -= offset; - clock->error -= (interval - offset) << - (NTP_SCALE_SHIFT - clock->shift); + timekeeper.clock->mult += adj; + timekeeper.xtime_interval += interval; + timekeeper.xtime_nsec -= offset; + timekeeper.ntp_error -= (interval - offset) << + (NTP_SCALE_SHIFT - timekeeper.clock->shift); } /** @@ -637,53 +706,59 @@ static void clocksource_adjust(s64 offset) */ void update_wall_time(void) { + struct clocksource *clock; cycle_t offset; + s64 nsecs; /* Make sure we're fully resumed: */ if (unlikely(timekeeping_suspended)) return; + clock = timekeeper.clock; #ifdef CONFIG_GENERIC_TIME offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #else - offset = clock->cycle_interval; + offset = timekeeper.cycle_interval; #endif - clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift; + timekeeper.xtime_nsec = (s64)xtime.tv_nsec << clock->shift; /* normally this loop will run just once, however in the * case of lost or late ticks, it will accumulate correctly. */ - while (offset >= clock->cycle_interval) { + while (offset >= timekeeper.cycle_interval) { + u64 nsecps = (u64)NSEC_PER_SEC << clock->shift; + /* accumulate one interval */ - offset -= clock->cycle_interval; - clock->cycle_last += clock->cycle_interval; + offset -= timekeeper.cycle_interval; + clock->cycle_last += timekeeper.cycle_interval; - clock->xtime_nsec += clock->xtime_interval; - if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { - clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; + timekeeper.xtime_nsec += timekeeper.xtime_interval; + if (timekeeper.xtime_nsec >= nsecps) { + timekeeper.xtime_nsec -= nsecps; xtime.tv_sec++; second_overflow(); } - clock->raw_time.tv_nsec += clock->raw_interval; - if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) { - clock->raw_time.tv_nsec -= NSEC_PER_SEC; - clock->raw_time.tv_sec++; + raw_time.tv_nsec += timekeeper.raw_interval; + if (raw_time.tv_nsec >= NSEC_PER_SEC) { + raw_time.tv_nsec -= NSEC_PER_SEC; + raw_time.tv_sec++; } /* accumulate error between NTP and clock interval */ - clock->error += tick_length; - clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift); + timekeeper.ntp_error += tick_length; + timekeeper.ntp_error -= timekeeper.xtime_interval << + (NTP_SCALE_SHIFT - clock->shift); } /* correct the clock when NTP error is too big */ - clocksource_adjust(offset); + timekeeping_adjust(offset); /* * Since in the loop above, we accumulate any amount of time * in xtime_nsec over a second into xtime.tv_sec, its possible for * xtime_nsec to be fairly small after the loop. Further, if we're - * slightly speeding the clocksource up in clocksource_adjust(), + * slightly speeding the clocksource up in timekeeping_adjust(), * its possible the required corrective factor to xtime_nsec could * cause it to underflow. * @@ -695,24 +770,26 @@ void update_wall_time(void) * We'll correct this error next time through this function, when * xtime_nsec is not as small. */ - if (unlikely((s64)clock->xtime_nsec < 0)) { - s64 neg = -(s64)clock->xtime_nsec; - clock->xtime_nsec = 0; - clock->error += neg << (NTP_SCALE_SHIFT - clock->shift); + if (unlikely((s64)timekeeper.xtime_nsec < 0)) { + s64 neg = -(s64)timekeeper.xtime_nsec; + timekeeper.xtime_nsec = 0; + timekeeper.ntp_error += neg << (NTP_SCALE_SHIFT - clock->shift); } /* store full nanoseconds into xtime after rounding it up and * add the remainder to the error difference. */ - xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1; - clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; - clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift); + xtime.tv_nsec = ((s64)timekeeper.xtime_nsec >> clock->shift) + 1; + timekeeper.xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; + timekeeper.ntp_error += timekeeper.xtime_nsec << + (NTP_SCALE_SHIFT - clock->shift); - update_xtime_cache(cyc2ns(clock, offset)); + nsecs = clocksource_cyc2ns(offset, clock->mult, clock->shift); + update_xtime_cache(nsecs); /* check to see if there is a new clocksource to use */ change_clocksource(); - update_vsyscall(&xtime, clock); + update_vsyscall(&xtime, timekeeper.clock); } /** -- cgit v1.2.3 From 0a54419836254a27baecd9037103171bcbabaf67 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:28 +0200 Subject: timekeeping: Move NTP adjusted clock multiplier to struct timekeeper The clocksource structure has two multipliers, the unmodified multiplier clock->mult_orig and the NTP corrected multiplier clock->mult. The NTP multiplier is misplaced in the struct clocksource, this is private information of the timekeeping code. Add the mult field to the struct timekeeper to contain the NTP corrected value, keep the unmodifed multiplier in clock->mult and remove clock->mult_orig. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134810.149047645@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/arm/plat-omap/common.c | 7 ++---- include/linux/clocksource.h | 4 +--- kernel/time/timekeeping.c | 53 ++++++++++++++++++++------------------------- 3 files changed, 27 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c index ebcf006406f9..95587b6c0259 100644 --- a/arch/arm/plat-omap/common.c +++ b/arch/arm/plat-omap/common.c @@ -253,11 +253,8 @@ static struct clocksource clocksource_32k = { */ unsigned long long sched_clock(void) { - unsigned long long ret; - - ret = (unsigned long long)clocksource_32k.read(&clocksource_32k); - ret = (ret * clocksource_32k.mult_orig) >> clocksource_32k.shift; - return ret; + return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k), + clocksource_32k.mult, clocksource_32k.shift); } static int __init omap_init_clocksource_32k(void) diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index e12e3095e2fb..e34015effeb6 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -149,8 +149,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @disable: optional function to disable the clocksource * @mask: bitmask for two's complement * subtraction of non 64 bit counters - * @mult: cycle to nanosecond multiplier (adjusted by NTP) - * @mult_orig: cycle to nanosecond multiplier (unadjusted by NTP) + * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) * @flags: flags describing special properties * @vread: vsyscall based read @@ -168,7 +167,6 @@ struct clocksource { void (*disable)(struct clocksource *cs); cycle_t mask; u32 mult; - u32 mult_orig; u32 shift; unsigned long flags; cycle_t (*vread)(void); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index dfdab1cefe1e..f4056f6c2632 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -41,6 +41,8 @@ struct timekeeper { /* Shift conversion between clock shifted nano seconds and * ntp shifted nano seconds. */ int ntp_error_shift; + /* NTP adjusted clock multiplier */ + u32 mult; }; struct timekeeper timekeeper; @@ -66,8 +68,8 @@ static void timekeeper_setup_internals(struct clocksource *clock) /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; tmp <<= clock->shift; - tmp += clock->mult_orig/2; - do_div(tmp, clock->mult_orig); + tmp += clock->mult/2; + do_div(tmp, clock->mult); if (tmp == 0) tmp = 1; @@ -77,13 +79,20 @@ static void timekeeper_setup_internals(struct clocksource *clock) /* Go back from cycles -> shifted ns */ timekeeper.xtime_interval = (u64) interval * clock->mult; timekeeper.raw_interval = - ((u64) interval * clock->mult_orig) >> clock->shift; + ((u64) interval * clock->mult) >> clock->shift; timekeeper.xtime_nsec = 0; timekeeper.shift = clock->shift; timekeeper.ntp_error = 0; timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; + + /* + * The timekeeper keeps its own mult values for the currently + * active clocksource. These value will be adjusted via NTP + * to counteract clock drifting. + */ + timekeeper.mult = clock->mult; } /* @@ -154,14 +163,15 @@ static void timekeeping_forward_now(void) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; - nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); + nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult, + timekeeper.shift); /* If arch requires, add in gettimeoffset() */ nsec += arch_gettimeoffset(); timespec_add_ns(&xtime, nsec); - nsec = clocksource_cyc2ns(cycle_delta, clock->mult_orig, clock->shift); + nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); timespec_add_ns(&raw_time, nsec); } @@ -193,8 +203,8 @@ void getnstimeofday(struct timespec *ts) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, - clock->shift); + nsecs = clocksource_cyc2ns(cycle_delta, timekeeper.mult, + timekeeper.shift); /* If arch requires, add in gettimeoffset() */ nsecs += arch_gettimeoffset(); @@ -228,8 +238,8 @@ ktime_t ktime_get(void) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs += clocksource_cyc2ns(cycle_delta, clock->mult, - clock->shift); + nsecs += clocksource_cyc2ns(cycle_delta, timekeeper.mult, + timekeeper.shift); } while (read_seqretry(&xtime_lock, seq)); /* @@ -271,8 +281,8 @@ void ktime_get_ts(struct timespec *ts) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, - clock->shift); + nsecs = clocksource_cyc2ns(cycle_delta, timekeeper.mult, + timekeeper.shift); } while (read_seqretry(&xtime_lock, seq)); @@ -356,22 +366,10 @@ static void change_clocksource(void) if (new->enable && !new->enable(new)) return; - /* - * The frequency may have changed while the clocksource - * was disabled. If so the code in ->enable() must update - * the mult value to reflect the new frequency. Make sure - * mult_orig follows this change. - */ - new->mult_orig = new->mult; old = timekeeper.clock; timekeeper_setup_internals(new); - /* - * Save mult_orig in mult so that the value can be restored - * regardless if ->enable() updates the value of mult or not. - */ - old->mult = old->mult_orig; if (old->disable) old->disable(old); @@ -461,7 +459,7 @@ void getrawmonotonic(struct timespec *ts) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = clocksource_cyc2ns(cycle_delta, clock->mult_orig, + nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); *ts = raw_time; @@ -521,9 +519,6 @@ void __init timekeeping_init(void) clock = clocksource_default_clock(); if (clock->enable) clock->enable(clock); - /* set mult_orig on enable */ - clock->mult_orig = clock->mult; - timekeeper_setup_internals(clock); xtime.tv_sec = sec; @@ -697,7 +692,7 @@ static void timekeeping_adjust(s64 offset) } else return; - timekeeper.clock->mult += adj; + timekeeper.mult += adj; timekeeper.xtime_interval += interval; timekeeper.xtime_nsec -= offset; timekeeper.ntp_error -= (interval - offset) << @@ -789,7 +784,7 @@ void update_wall_time(void) timekeeper.ntp_error += timekeeper.xtime_nsec << timekeeper.ntp_error_shift; - nsecs = clocksource_cyc2ns(offset, clock->mult, clock->shift); + nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); update_xtime_cache(nsecs); /* check to see if there is a new clocksource to use */ -- cgit v1.2.3 From 75c5158f70c065b9704b924503d96e8297838f79 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:30 +0200 Subject: timekeeping: Update clocksource with stop_machine update_wall_time calls change_clocksource HZ times per second to check if a new clock source is available. In close to 100% of all calls there is no new clock. Replace the tick based check by an update done with stop_machine. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134810.711836357@de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 2 + kernel/time/clocksource.c | 112 +++++++++++++++++--------------------------- kernel/time/timekeeping.c | 41 ++++++++++------ 3 files changed, 72 insertions(+), 83 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index e34015effeb6..9ea40ff26f0e 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -291,4 +291,6 @@ static inline void update_vsyscall_tz(void) } #endif +extern void timekeeping_notify(struct clocksource *clock); + #endif /* _LINUX_CLOCKSOURCE_H */ diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index f18c9a6bdcf4..a1657b5fdeb9 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -109,35 +109,17 @@ EXPORT_SYMBOL(timecounter_cyc2time); /*[Clocksource internal variables]--------- * curr_clocksource: * currently selected clocksource. - * next_clocksource: - * pending next selected clocksource. * clocksource_list: * linked list with the registered clocksources - * clocksource_lock: - * protects manipulations to curr_clocksource and next_clocksource - * and the clocksource_list + * clocksource_mutex: + * protects manipulations to curr_clocksource and the clocksource_list * override_name: * Name of the user-specified clocksource. */ static struct clocksource *curr_clocksource; -static struct clocksource *next_clocksource; static LIST_HEAD(clocksource_list); -static DEFINE_SPINLOCK(clocksource_lock); +static DEFINE_MUTEX(clocksource_mutex); static char override_name[32]; -static int finished_booting; - -/* clocksource_done_booting - Called near the end of core bootup - * - * Hack to avoid lots of clocksource churn at boot time. - * We use fs_initcall because we want this to start before - * device_initcall but after subsys_initcall. - */ -static int __init clocksource_done_booting(void) -{ - finished_booting = 1; - return 0; -} -fs_initcall(clocksource_done_booting); #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static LIST_HEAD(watchdog_list); @@ -356,18 +338,16 @@ static inline void clocksource_resume_watchdog(void) { } void clocksource_resume(void) { struct clocksource *cs; - unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); - list_for_each_entry(cs, &clocksource_list, list) { + list_for_each_entry(cs, &clocksource_list, list) if (cs->resume) cs->resume(); - } clocksource_resume_watchdog(); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); } /** @@ -383,28 +363,13 @@ void clocksource_touch_watchdog(void) } #ifdef CONFIG_GENERIC_TIME -/** - * clocksource_get_next - Returns the selected clocksource - * - */ -struct clocksource *clocksource_get_next(void) -{ - unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); - if (next_clocksource && finished_booting) { - curr_clocksource = next_clocksource; - next_clocksource = NULL; - } - spin_unlock_irqrestore(&clocksource_lock, flags); - - return curr_clocksource; -} +static int finished_booting; /** * clocksource_select - Select the best clocksource available * - * Private function. Must hold clocksource_lock when called. + * Private function. Must hold clocksource_mutex when called. * * Select the clocksource with the best rating, or the clocksource, * which is selected by userspace override. @@ -413,7 +378,7 @@ static void clocksource_select(void) { struct clocksource *best, *cs; - if (list_empty(&clocksource_list)) + if (!finished_booting || list_empty(&clocksource_list)) return; /* First clocksource on the list has the best rating. */ best = list_first_entry(&clocksource_list, struct clocksource, list); @@ -438,13 +403,31 @@ static void clocksource_select(void) best = cs; break; } - if (curr_clocksource != best) - next_clocksource = best; + if (curr_clocksource != best) { + printk(KERN_INFO "Switching to clocksource %s\n", best->name); + curr_clocksource = best; + timekeeping_notify(curr_clocksource); + } } +/* + * clocksource_done_booting - Called near the end of core bootup + * + * Hack to avoid lots of clocksource churn at boot time. + * We use fs_initcall because we want this to start before + * device_initcall but after subsys_initcall. + */ +static int __init clocksource_done_booting(void) +{ + finished_booting = 1; + clocksource_select(); + return 0; +} +fs_initcall(clocksource_done_booting); + #else /* CONFIG_GENERIC_TIME */ -static void clocksource_select(void) { } +static inline void clocksource_select(void) { } #endif @@ -471,13 +454,11 @@ static void clocksource_enqueue(struct clocksource *cs) */ int clocksource_register(struct clocksource *cs) { - unsigned long flags; - - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); clocksource_select(); - spin_unlock_irqrestore(&clocksource_lock, flags); clocksource_enqueue_watchdog(cs); + mutex_unlock(&clocksource_mutex); return 0; } EXPORT_SYMBOL(clocksource_register); @@ -487,14 +468,12 @@ EXPORT_SYMBOL(clocksource_register); */ void clocksource_change_rating(struct clocksource *cs, int rating) { - unsigned long flags; - - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); list_del(&cs->list); cs->rating = rating; clocksource_enqueue(cs); clocksource_select(); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); } EXPORT_SYMBOL(clocksource_change_rating); @@ -503,13 +482,11 @@ EXPORT_SYMBOL(clocksource_change_rating); */ void clocksource_unregister(struct clocksource *cs) { - unsigned long flags; - + mutex_lock(&clocksource_mutex); clocksource_dequeue_watchdog(cs); - spin_lock_irqsave(&clocksource_lock, flags); list_del(&cs->list); clocksource_select(); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); } EXPORT_SYMBOL(clocksource_unregister); @@ -527,9 +504,9 @@ sysfs_show_current_clocksources(struct sys_device *dev, { ssize_t count = 0; - spin_lock_irq(&clocksource_lock); + mutex_lock(&clocksource_mutex); count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); - spin_unlock_irq(&clocksource_lock); + mutex_unlock(&clocksource_mutex); return count; } @@ -557,14 +534,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, if (buf[count-1] == '\n') count--; - spin_lock_irq(&clocksource_lock); + mutex_lock(&clocksource_mutex); if (count > 0) memcpy(override_name, buf, count); override_name[count] = 0; clocksource_select(); - spin_unlock_irq(&clocksource_lock); + mutex_unlock(&clocksource_mutex); return ret; } @@ -584,7 +561,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, struct clocksource *src; ssize_t count = 0; - spin_lock_irq(&clocksource_lock); + mutex_lock(&clocksource_mutex); list_for_each_entry(src, &clocksource_list, list) { /* * Don't show non-HRES clocksource if the tick code is @@ -596,7 +573,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "%s ", src->name); } - spin_unlock_irq(&clocksource_lock); + mutex_unlock(&clocksource_mutex); count += snprintf(buf + count, max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); @@ -651,11 +628,10 @@ device_initcall(init_clocksource_sysfs); */ static int __init boot_override_clocksource(char* str) { - unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); if (str) strlcpy(override_name, str, sizeof(override_name)); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); return 1; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 27ae01b596b7..41579e7fcf9d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -18,6 +18,7 @@ #include #include #include +#include /* Structure holding internal timekeeping values. */ struct timekeeper { @@ -179,6 +180,7 @@ void timekeeping_leap_insert(int leapsecond) } #ifdef CONFIG_GENERIC_TIME + /** * timekeeping_forward_now - update clock to the current time * @@ -351,31 +353,40 @@ EXPORT_SYMBOL(do_settimeofday); * * Accumulates current time interval and initializes new clocksource */ -static void change_clocksource(void) +static int change_clocksource(void *data) { struct clocksource *new, *old; - new = clocksource_get_next(); - - if (!new || timekeeper.clock == new) - return; + new = (struct clocksource *) data; timekeeping_forward_now(); + if (!new->enable || new->enable(new) == 0) { + old = timekeeper.clock; + timekeeper_setup_internals(new); + if (old->disable) + old->disable(old); + } + return 0; +} - if (new->enable && !new->enable(new)) +/** + * timekeeping_notify - Install a new clock source + * @clock: pointer to the clock source + * + * This function is called from clocksource.c after a new, better clock + * source has been registered. The caller holds the clocksource_mutex. + */ +void timekeeping_notify(struct clocksource *clock) +{ + if (timekeeper.clock == clock) return; - - old = timekeeper.clock; - timekeeper_setup_internals(new); - - if (old->disable) - old->disable(old); - + stop_machine(change_clocksource, clock, NULL); tick_clock_notify(); } + #else /* GENERIC_TIME */ + static inline void timekeeping_forward_now(void) { } -static inline void change_clocksource(void) { } /** * ktime_get - get the monotonic time in ktime_t format @@ -416,6 +427,7 @@ void ktime_get_ts(struct timespec *ts) ts->tv_nsec + tomono.tv_nsec); } EXPORT_SYMBOL_GPL(ktime_get_ts); + #endif /* !GENERIC_TIME */ /** @@ -773,7 +785,6 @@ void update_wall_time(void) update_xtime_cache(nsecs); /* check to see if there is a new clocksource to use */ - change_clocksource(); update_vsyscall(&xtime, timekeeper.clock); } -- cgit v1.2.3 From d4f587c67fc39e0030ddd718675e252e208da4d7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:31 +0200 Subject: timekeeping: Increase granularity of read_persistent_clock() The persistent clock of some architectures (e.g. s390) have a better granularity than seconds. To reduce the delta between the host clock and the guest clock in a virtualized system change the read_persistent_clock function to return a struct timespec. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134811.013873340@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/m68knommu/kernel/time.c | 5 ++-- arch/mips/dec/time.c | 5 ++-- arch/mips/lasat/ds1603.c | 5 ++-- arch/mips/lasat/sysctl.c | 8 ++++-- arch/mips/lemote/lm2e/setup.c | 5 ++-- arch/mips/mti-malta/malta-time.c | 5 ++-- arch/mips/pmc-sierra/yosemite/setup.c | 5 ++-- arch/mips/sibyte/swarm/setup.c | 15 +++++++--- arch/mips/sni/time.c | 5 ++-- arch/powerpc/kernel/time.c | 7 +++-- arch/s390/kernel/time.c | 22 +++------------ arch/sh/kernel/time.c | 6 ++-- arch/x86/kernel/rtc.c | 5 ++-- arch/xtensa/kernel/time.c | 5 ++-- include/linux/time.h | 2 +- kernel/time/timekeeping.c | 52 +++++++++++++++++++---------------- 16 files changed, 83 insertions(+), 74 deletions(-) (limited to 'include') diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c index d182b2f72211..68432248515c 100644 --- a/arch/m68knommu/kernel/time.c +++ b/arch/m68knommu/kernel/time.c @@ -72,9 +72,10 @@ static unsigned long read_rtc_mmss(void) return mktime(year, mon, day, hour, min, sec);; } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return read_rtc_mmss(); + ts->tv_sec = read_rtc_mmss(); + ts->tv_nsec = 0; } int update_persistent_clock(struct timespec now) diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c index 463136e6685a..02f505f23c32 100644 --- a/arch/mips/dec/time.c +++ b/arch/mips/dec/time.c @@ -18,7 +18,7 @@ #include #include -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned int year, mon, day, hour, min, sec, real_year; unsigned long flags; @@ -53,7 +53,8 @@ unsigned long read_persistent_clock(void) year += real_year - 72 + 2000; - return mktime(year, mon, day, hour, min, sec); + ts->tv_sec = mktime(year, mon, day, hour, min, sec); + ts->tv_nsec = 0; } /* diff --git a/arch/mips/lasat/ds1603.c b/arch/mips/lasat/ds1603.c index 52cb1436a12a..c6fd96ff118d 100644 --- a/arch/mips/lasat/ds1603.c +++ b/arch/mips/lasat/ds1603.c @@ -135,7 +135,7 @@ static void rtc_end_op(void) lasat_ndelay(1000); } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned long word; unsigned long flags; @@ -147,7 +147,8 @@ unsigned long read_persistent_clock(void) rtc_end_op(); spin_unlock_irqrestore(&rtc_lock, flags); - return word; + ts->tv_sec = word; + ts->tv_nsec = 0; } int rtc_mips_set_mmss(unsigned long time) diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c index 8f88886feb12..3f04d4c406b7 100644 --- a/arch/mips/lasat/sysctl.c +++ b/arch/mips/lasat/sysctl.c @@ -92,10 +92,12 @@ static int rtctmp; int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp, loff_t *ppos) { + struct timespec ts; int r; if (!write) { - rtctmp = read_persistent_clock(); + read_persistent_clock(&ts); + rtctmp = ts.tv_sec; /* check for time < 0 and set to 0 */ if (rtctmp < 0) rtctmp = 0; @@ -134,9 +136,11 @@ int sysctl_lasat_rtc(ctl_table *table, void *oldval, size_t *oldlenp, void *newval, size_t newlen) { + struct timespec ts; int r; - rtctmp = read_persistent_clock(); + read_persistent_clock(&ts); + rtctmp = ts.tv_sec; if (rtctmp < 0) rtctmp = 0; r = sysctl_intvec(table, oldval, oldlenp, newval, newlen); diff --git a/arch/mips/lemote/lm2e/setup.c b/arch/mips/lemote/lm2e/setup.c index ebd6ceaef2fd..24b355df6127 100644 --- a/arch/mips/lemote/lm2e/setup.c +++ b/arch/mips/lemote/lm2e/setup.c @@ -54,9 +54,10 @@ void __init plat_time_init(void) mips_hpt_frequency = cpu_clock_freq / 2; } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return mc146818_get_cmos_time(); + ts->tv_sec = mc146818_get_cmos_time(); + ts->tv_nsec = 0; } void (*__wbflush)(void); diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 0b97d47691fc..3c6f190aa61c 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -100,9 +100,10 @@ static unsigned int __init estimate_cpu_frequency(void) return count; } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return mc146818_get_cmos_time(); + ts->tv_sec = mc146818_get_cmos_time(); + ts->tv_nsec = 0; } static void __init plat_perf_setup(void) diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c index 2d3c0dca275d..3498ac9c35af 100644 --- a/arch/mips/pmc-sierra/yosemite/setup.c +++ b/arch/mips/pmc-sierra/yosemite/setup.c @@ -70,7 +70,7 @@ void __init bus_error_init(void) } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned int year, month, day, hour, min, sec; unsigned long flags; @@ -92,7 +92,8 @@ unsigned long read_persistent_clock(void) m48t37_base->control = 0x00; spin_unlock_irqrestore(&rtc_lock, flags); - return mktime(year, month, day, hour, min, sec); + ts->tv_sec = mktime(year, month, day, hour, min, sec); + ts->tv_nsec = 0; } int rtc_mips_set_time(unsigned long tim) diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c index 672e45d495a9..623ffc933c4c 100644 --- a/arch/mips/sibyte/swarm/setup.c +++ b/arch/mips/sibyte/swarm/setup.c @@ -87,19 +87,26 @@ enum swarm_rtc_type { enum swarm_rtc_type swarm_rtc_type; -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { + unsigned long sec; + switch (swarm_rtc_type) { case RTC_XICOR: - return xicor_get_time(); + sec = xicor_get_time(); + break; case RTC_M4LT81: - return m41t81_get_time(); + sec = m41t81_get_time(); + break; case RTC_NONE: default: - return mktime(2000, 1, 1, 0, 0, 0); + sec = mktime(2000, 1, 1, 0, 0, 0); + break; } + ts->tv_sec = sec; + tv->tv_nsec = 0; } int rtc_mips_set_time(unsigned long sec) diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c index 0d9ec1a5c24a..62df6a598e0a 100644 --- a/arch/mips/sni/time.c +++ b/arch/mips/sni/time.c @@ -182,7 +182,8 @@ void __init plat_time_init(void) setup_pit_timer(); } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return -1; + ts->tv_sec = -1; + ts->tv_nsec = 0; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index eae4511ceeac..ad63f30fe3da 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -769,7 +769,7 @@ int update_persistent_clock(struct timespec now) return ppc_md.set_rtc_time(&tm); } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { struct rtc_time tm; static int first = 1; @@ -787,8 +787,9 @@ unsigned long read_persistent_clock(void) if (!ppc_md.get_rtc_time) return 0; ppc_md.get_rtc_time(&tm); - return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec); + ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); + ts->tv_nsec = 0; } /* clocksource code */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e76c2e7a8b9a..a94ec48587b4 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -182,12 +182,9 @@ static void timing_alert_interrupt(__u16 code) static void etr_reset(void); static void stp_reset(void); -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - struct timespec ts; - - tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, &ts); - return ts.tv_sec; + tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts); } static cycle_t read_tod_clock(struct clocksource *cs) @@ -248,7 +245,6 @@ void __init time_init(void) { struct timespec ts; unsigned long flags; - cycle_t now; /* Reset time synchronization interfaces. */ etr_reset(); @@ -266,20 +262,10 @@ void __init time_init(void) panic("Could not register TOD clock source"); /* - * The TOD clock is an accurate clock. The xtime should be - * initialized in a way that the difference between TOD and - * xtime is reasonably small. Too bad that timekeeping_init - * sets xtime.tv_nsec to zero. In addition the clock source - * change from the jiffies clock source to the TOD clock - * source add another error of up to 1/HZ second. The same - * function sets wall_to_monotonic to a value that is too - * small for /proc/uptime to be accurate. - * Reset xtime and wall_to_monotonic to sane values. + * Reset wall_to_monotonic to the initial timestamp created + * in head.S to get a precise value in /proc/uptime. */ write_seqlock_irqsave(&xtime_lock, flags); - now = get_clock(); - tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime); - clocksource_tod.cycle_last = now; tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts); set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec); write_sequnlock_irqrestore(&xtime_lock, flags); diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c index 9b352a1e3fb4..3f4706aa975e 100644 --- a/arch/sh/kernel/time.c +++ b/arch/sh/kernel/time.c @@ -39,11 +39,9 @@ void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time; int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time; #ifdef CONFIG_GENERIC_CMOS_UPDATE -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - struct timespec tv; - rtc_sh_get_time(&tv); - return tv.tv_sec; + rtc_sh_get_time(&ts); } int update_persistent_clock(struct timespec now) diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 5d465b207e72..bf67dcb4a44c 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -178,7 +178,7 @@ static int set_rtc_mmss(unsigned long nowtime) } /* not static: needed by APM */ -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned long retval, flags; @@ -186,7 +186,8 @@ unsigned long read_persistent_clock(void) retval = get_wallclock(); spin_unlock_irqrestore(&rtc_lock, flags); - return retval; + ts->tv_sec = retval; + ts->tv_nsec = 0; } int update_persistent_clock(struct timespec now) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 8848120d291b..19085ff0484a 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -59,9 +59,8 @@ static struct irqaction timer_irqaction = { void __init time_init(void) { - xtime.tv_nsec = 0; - xtime.tv_sec = read_persistent_clock(); - + /* FIXME: xtime&wall_to_monotonic are set in timekeeping_init. */ + read_persistent_clock(&xtime); set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); diff --git a/include/linux/time.h b/include/linux/time.h index e7c844558884..53a3216f0d1b 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -101,7 +101,7 @@ extern struct timespec xtime; extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; -extern unsigned long read_persistent_clock(void); +extern void read_persistent_clock(struct timespec *ts); extern int update_persistent_clock(struct timespec now); extern int no_sync_cmos_clock __read_mostly; void timekeeping_init(void); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 41579e7fcf9d..f1a21ce491e6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -154,7 +154,7 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); */ struct timespec xtime __attribute__ ((aligned (16))); struct timespec wall_to_monotonic __attribute__ ((aligned (16))); -static unsigned long total_sleep_time; /* seconds */ +static struct timespec total_sleep_time; /* * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. @@ -487,17 +487,18 @@ int timekeeping_valid_for_hres(void) } /** - * read_persistent_clock - Return time in seconds from the persistent clock. + * read_persistent_clock - Return time from the persistent clock. * * Weak dummy function for arches that do not yet support it. - * Returns seconds from epoch using the battery backed persistent clock. - * Returns zero if unsupported. + * Reads the time from the battery backed persistent clock. + * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. * * XXX - Do be sure to remove it once all arches implement it. */ -unsigned long __attribute__((weak)) read_persistent_clock(void) +void __attribute__((weak)) read_persistent_clock(struct timespec *ts) { - return 0; + ts->tv_sec = 0; + ts->tv_nsec = 0; } /* @@ -507,7 +508,9 @@ void __init timekeeping_init(void) { struct clocksource *clock; unsigned long flags; - unsigned long sec = read_persistent_clock(); + struct timespec now; + + read_persistent_clock(&now); write_seqlock_irqsave(&xtime_lock, flags); @@ -518,19 +521,20 @@ void __init timekeeping_init(void) clock->enable(clock); timekeeper_setup_internals(clock); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; + xtime.tv_sec = now.tv_sec; + xtime.tv_nsec = now.tv_nsec; raw_time.tv_sec = 0; raw_time.tv_nsec = 0; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); update_xtime_cache(0); - total_sleep_time = 0; + total_sleep_time.tv_sec = 0; + total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&xtime_lock, flags); } /* time in seconds when suspend began */ -static unsigned long timekeeping_suspend_time; +static struct timespec timekeeping_suspend_time; /** * timekeeping_resume - Resumes the generic timekeeping subsystem. @@ -543,18 +547,19 @@ static unsigned long timekeeping_suspend_time; static int timekeeping_resume(struct sys_device *dev) { unsigned long flags; - unsigned long now = read_persistent_clock(); + struct timespec ts; + + read_persistent_clock(&ts); clocksource_resume(); write_seqlock_irqsave(&xtime_lock, flags); - if (now && (now > timekeeping_suspend_time)) { - unsigned long sleep_length = now - timekeeping_suspend_time; - - xtime.tv_sec += sleep_length; - wall_to_monotonic.tv_sec -= sleep_length; - total_sleep_time += sleep_length; + if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { + ts = timespec_sub(ts, timekeeping_suspend_time); + xtime = timespec_add_safe(xtime, ts); + wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); + total_sleep_time = timespec_add_safe(total_sleep_time, ts); } update_xtime_cache(0); /* re-base the last cycle value */ @@ -577,7 +582,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; - timekeeping_suspend_time = read_persistent_clock(); + read_persistent_clock(&timekeeping_suspend_time); write_seqlock_irqsave(&xtime_lock, flags); timekeeping_forward_now(); @@ -801,9 +806,10 @@ void update_wall_time(void) */ void getboottime(struct timespec *ts) { - set_normalized_timespec(ts, - - (wall_to_monotonic.tv_sec + total_sleep_time), - - wall_to_monotonic.tv_nsec); + struct timespec boottime; + + boottime = timespec_add_safe(wall_to_monotonic, total_sleep_time); + set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); } /** @@ -812,7 +818,7 @@ void getboottime(struct timespec *ts) */ void monotonic_to_bootbased(struct timespec *ts) { - ts->tv_sec += total_sleep_time; + *ts = timespec_add_safe(*ts, total_sleep_time); } unsigned long get_seconds(void) -- cgit v1.2.3 From 23970e389e9cee43c4b41023935e1417271708b2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:32 +0200 Subject: timekeeping: Introduce read_boot_clock Add the new function read_boot_clock to get the exact time the system has been started. For architectures without support for exact boot time a new weak function is added that returns 0. Use the exact boot time to initialize wall_to_monotonic, or xtime if the read_boot_clock returned 0. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134811.296703241@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/s390/kernel/time.c | 17 +++++------------ include/linux/time.h | 1 + kernel/time/timekeeping.c | 24 ++++++++++++++++++++++-- 3 files changed, 28 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index a94ec48587b4..6bff1a1d9060 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -187,6 +187,11 @@ void read_persistent_clock(struct timespec *ts) tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts); } +void read_boot_clock(struct timespec *ts) +{ + tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts); +} + static cycle_t read_tod_clock(struct clocksource *cs) { return get_clock(); @@ -243,9 +248,6 @@ void update_vsyscall_tz(void) */ void __init time_init(void) { - struct timespec ts; - unsigned long flags; - /* Reset time synchronization interfaces. */ etr_reset(); stp_reset(); @@ -261,15 +263,6 @@ void __init time_init(void) if (clocksource_register(&clocksource_tod) != 0) panic("Could not register TOD clock source"); - /* - * Reset wall_to_monotonic to the initial timestamp created - * in head.S to get a precise value in /proc/uptime. - */ - write_seqlock_irqsave(&xtime_lock, flags); - tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts); - set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec); - write_sequnlock_irqrestore(&xtime_lock, flags); - /* Enable TOD clock interrupts on the boot cpu. */ init_cpu_timer(); diff --git a/include/linux/time.h b/include/linux/time.h index 53a3216f0d1b..f505988398e6 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -102,6 +102,7 @@ extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; extern void read_persistent_clock(struct timespec *ts); +extern void read_boot_clock(struct timespec *ts); extern int update_persistent_clock(struct timespec now); extern int no_sync_cmos_clock __read_mostly; void timekeeping_init(void); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f1a21ce491e6..15e06defca55 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -501,6 +501,21 @@ void __attribute__((weak)) read_persistent_clock(struct timespec *ts) ts->tv_nsec = 0; } +/** + * read_boot_clock - Return time of the system start. + * + * Weak dummy function for arches that do not yet support it. + * Function to read the exact time the system has been started. + * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. + * + * XXX - Do be sure to remove it once all arches implement it. + */ +void __attribute__((weak)) read_boot_clock(struct timespec *ts) +{ + ts->tv_sec = 0; + ts->tv_nsec = 0; +} + /* * timekeeping_init - Initializes the clocksource and common timekeeping values */ @@ -508,9 +523,10 @@ void __init timekeeping_init(void) { struct clocksource *clock; unsigned long flags; - struct timespec now; + struct timespec now, boot; read_persistent_clock(&now); + read_boot_clock(&boot); write_seqlock_irqsave(&xtime_lock, flags); @@ -525,8 +541,12 @@ void __init timekeeping_init(void) xtime.tv_nsec = now.tv_nsec; raw_time.tv_sec = 0; raw_time.tv_nsec = 0; + if (boot.tv_sec == 0 && boot.tv_nsec == 0) { + boot.tv_sec = xtime.tv_sec; + boot.tv_nsec = xtime.tv_nsec; + } set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); + -boot.tv_sec, -boot.tv_nsec); update_xtime_cache(0); total_sleep_time.tv_sec = 0; total_sleep_time.tv_nsec = 0; -- cgit v1.2.3 From 0ccff1a49def92d6b838a6da166c89004b3a4d0c Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Mon, 17 Aug 2009 22:38:04 -0400 Subject: jbd2: bitfields should be unsigned This fixes sparse noise: error: dubious one-bit signed bitfield Signed-off-by: H Hartley Sweeten Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" Cc: Jan Kara --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d97eb652d6ca..52695d3dfd0b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -652,7 +652,7 @@ struct transaction_s * This transaction is being forced and some process is * waiting for it to finish. */ - int t_synchronous_commit:1; + unsigned int t_synchronous_commit:1; /* * For use by the filesystem to store fs-specific data -- cgit v1.2.3 From 776f3360de6ed246e973577828f725681120fd7a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 19 Aug 2009 15:56:37 +1000 Subject: drm: fixup includes in encoder slave header files. Signed-off-by: Dave Airlie --- include/drm/drm_encoder_slave.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_encoder_slave.h b/include/drm/drm_encoder_slave.h index 821ec40c17d8..e5e5c94ca92c 100644 --- a/include/drm/drm_encoder_slave.h +++ b/include/drm/drm_encoder_slave.h @@ -27,8 +27,8 @@ #ifndef __DRM_ENCODER_SLAVE_H__ #define __DRM_ENCODER_SLAVE_H__ -#include -#include +#include "drmP.h" +#include "drm_crtc.h" /** * struct drm_encoder_slave_funcs - Entry points exposed by a slave encoder driver -- cgit v1.2.3 From 53bd83899f5ba6b0da8f5ef976129273854a72d4 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 1 Jul 2009 10:04:40 -0700 Subject: drm: clarify scaling property names Now that we're using the scaling property in the Intel driver I noticed that the names were a bit confusing. I've corrected them according to our discussion on IRC and the mailing list, though I've left out potential new additions for a new scaling property with an integer (or two) for the scaling factor. None of the drivers implement that today, but if someone wants to do it, I think it could be done with the addition of a single new type and a new property to describe the scaling factor in the X and Y directions. Signed-off-by: Jesse Barnes Acked-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 8 ++++---- drivers/gpu/drm/i915/intel_lvds.c | 14 +++----------- include/drm/drm_mode.h | 9 +++++---- 3 files changed, 12 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 362a538cdedc..39a6bc69d223 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -68,10 +68,10 @@ DRM_ENUM_NAME_FN(drm_get_dpms_name, drm_dpms_enum_list) */ static struct drm_prop_enum_list drm_scaling_mode_enum_list[] = { - { DRM_MODE_SCALE_NON_GPU, "Non-GPU" }, - { DRM_MODE_SCALE_FULLSCREEN, "Fullscreen" }, - { DRM_MODE_SCALE_NO_SCALE, "No scale" }, - { DRM_MODE_SCALE_ASPECT, "Aspect" }, + { DRM_MODE_SCALE_NONE, "None" }, + { DRM_MODE_SCALE_FULLSCREEN, "Full" }, + { DRM_MODE_SCALE_CENTER, "Center" }, + { DRM_MODE_SCALE_ASPECT, "Full aspect" }, }; static struct drm_prop_enum_list drm_dithering_mode_enum_list[] = diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index b59c65d19d81..5df486fbe056 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -38,14 +38,6 @@ #include "i915_drv.h" #include -/* - * the following four scaling options are defined. - * #define DRM_MODE_SCALE_NON_GPU 0 - * #define DRM_MODE_SCALE_FULLSCREEN 1 - * #define DRM_MODE_SCALE_NO_SCALE 2 - * #define DRM_MODE_SCALE_ASPECT 3 - */ - /* Private structure for the integrated LVDS support */ struct intel_lvds_priv { int fitting_mode; @@ -334,7 +326,7 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder, I915_WRITE(BCLRPAT_B, 0); switch (lvds_priv->fitting_mode) { - case DRM_MODE_SCALE_NO_SCALE: + case DRM_MODE_SCALE_CENTER: /* * For centered modes, we have to calculate border widths & * heights and modify the values programmed into the CRTC. @@ -670,8 +662,8 @@ static int intel_lvds_set_property(struct drm_connector *connector, connector->encoder) { struct drm_crtc *crtc = connector->encoder->crtc; struct intel_lvds_priv *lvds_priv = intel_output->dev_priv; - if (value == DRM_MODE_SCALE_NON_GPU) { - DRM_DEBUG_KMS("non_GPU property is unsupported\n"); + if (value == DRM_MODE_SCALE_NONE) { + DRM_DEBUG_KMS("no scaling not supported\n"); return 0; } if (lvds_priv->fitting_mode == value) { diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h index 616aeb42b773..1f908416aedb 100644 --- a/include/drm/drm_mode.h +++ b/include/drm/drm_mode.h @@ -68,10 +68,11 @@ #define DRM_MODE_DPMS_OFF 3 /* Scaling mode options */ -#define DRM_MODE_SCALE_NON_GPU 0 -#define DRM_MODE_SCALE_FULLSCREEN 1 -#define DRM_MODE_SCALE_NO_SCALE 2 -#define DRM_MODE_SCALE_ASPECT 3 +#define DRM_MODE_SCALE_NONE 0 /* Unmodified timing (display or + software can still scale) */ +#define DRM_MODE_SCALE_FULLSCREEN 1 /* Full screen, ignore aspect */ +#define DRM_MODE_SCALE_CENTER 2 /* Centered, no scaling */ +#define DRM_MODE_SCALE_ASPECT 3 /* Full screen, preserve aspect */ /* Dithering mode options */ #define DRM_MODE_DITHERING_OFF 0 -- cgit v1.2.3 From 949ef70e2d1a5c12178875f513df34fc85d91a38 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 17 Aug 2009 19:49:19 +0300 Subject: drm/kms: no need to return void value (encoder) Cc: Francisco Jerez Signed-off-by: Pekka Paalanen Signed-off-by: Dave Airlie --- include/drm/drm_encoder_slave.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_encoder_slave.h b/include/drm/drm_encoder_slave.h index e5e5c94ca92c..2f65633d28a7 100644 --- a/include/drm/drm_encoder_slave.h +++ b/include/drm/drm_encoder_slave.h @@ -154,7 +154,7 @@ static inline int drm_i2c_encoder_register(struct module *owner, */ static inline void drm_i2c_encoder_unregister(struct drm_i2c_encoder_driver *driver) { - return i2c_del_driver(&driver->i2c_driver); + i2c_del_driver(&driver->i2c_driver); } void drm_i2c_encoder_destroy(struct drm_encoder *encoder); -- cgit v1.2.3 From a0724fcf829e5afb66159ef68cb16a805ea11b42 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 17 Aug 2009 01:18:38 +0300 Subject: drm/ttm: optimize bo_kmap_type values A micro-optimization on the function ttm_kmap_obj_virtual(). By defining the values of enum ttm_bo_kmap_obj::bo_kmap_type to have a bit indicating iomem, size of the function ttm_kmap_obj_virtual() will be reduced by 16 bytes on x86_64 (gcc 4.1.2). ttm_kmap_obj_virtual() may be heavily used, when buffer objects are accessed via wrappers, that work for both kinds of memory addresses: iomem cookies and kernel virtual. Signed-off-by: Pekka Paalanen Signed-off-by: Dave Airlie --- include/drm/ttm/ttm_bo_api.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index cd22ab4b495c..99dc521aa1a9 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -245,14 +245,15 @@ struct ttm_buffer_object { * premapped region. */ +#define TTM_BO_MAP_IOMEM_MASK 0x80 struct ttm_bo_kmap_obj { void *virtual; struct page *page; enum { - ttm_bo_map_iomap, - ttm_bo_map_vmap, - ttm_bo_map_kmap, - ttm_bo_map_premapped, + ttm_bo_map_iomap = 1 | TTM_BO_MAP_IOMEM_MASK, + ttm_bo_map_vmap = 2, + ttm_bo_map_kmap = 3, + ttm_bo_map_premapped = 4 | TTM_BO_MAP_IOMEM_MASK, } bo_kmap_type; }; @@ -522,8 +523,7 @@ extern int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type); static inline void *ttm_kmap_obj_virtual(struct ttm_bo_kmap_obj *map, bool *is_iomem) { - *is_iomem = (map->bo_kmap_type == ttm_bo_map_iomap || - map->bo_kmap_type == ttm_bo_map_premapped); + *is_iomem = !!(map->bo_kmap_type & TTM_BO_MAP_IOMEM_MASK); return map->virtual; } -- cgit v1.2.3 From 327c225bd548bf7871f116a0baa5ebdac884e452 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 17 Aug 2009 16:28:37 +0200 Subject: drm: Enable drm drivers to add drm sysfs devices. Export utility functions for drivers to add specialized devices in the sysfs drm class subdirectory. Initially this will be needed form TTM to add a virtual device that handles power management. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_sysfs.c | 25 +++++++++++++++++++++++++ include/drm/drm_sysfs.h | 12 ++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 include/drm/drm_sysfs.h (limited to 'include') diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index adc179459c25..de154556c405 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -16,6 +16,7 @@ #include #include +#include "drm_sysfs.h" #include "drm_core.h" #include "drmP.h" @@ -515,3 +516,27 @@ void drm_sysfs_device_remove(struct drm_minor *minor) { device_unregister(&minor->kdev); } + + +/** + * drm_class_device_register - Register a struct device in the drm class. + * + * @dev: pointer to struct device to register. + * + * @dev should have all relevant members pre-filled with the exception + * of the class member. In particular, the device_type member must + * be set. + */ + +int drm_class_device_register(struct device *dev) +{ + dev->class = drm_class; + return device_register(dev); +} +EXPORT_SYMBOL_GPL(drm_class_device_register); + +void drm_class_device_unregister(struct device *dev) +{ + return device_unregister(dev); +} +EXPORT_SYMBOL_GPL(drm_class_device_unregister); diff --git a/include/drm/drm_sysfs.h b/include/drm/drm_sysfs.h new file mode 100644 index 000000000000..1d8e033fde67 --- /dev/null +++ b/include/drm/drm_sysfs.h @@ -0,0 +1,12 @@ +#ifndef _DRM_SYSFS_H_ +#define _DRM_SYSFS_H_ + +/** + * This minimalistic include file is intended for users (read TTM) that + * don't want to include the full drmP.h file. + */ + +extern int drm_class_device_register(struct device *dev); +extern void drm_class_device_unregister(struct device *dev); + +#endif -- cgit v1.2.3 From 5fd9cbad3a4ae82c83c55b9c621d156c326724ef Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 17 Aug 2009 16:28:39 +0200 Subject: drm/ttm: Memory accounting rework. Use inclusive zones to simplify accounting and its sysfs representation. Use DMA32 accounting where applicable. Add a sysfs interface to make the heuristically determined limits readable and configurable. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 6 +- drivers/gpu/drm/ttm/ttm_global.c | 4 +- drivers/gpu/drm/ttm/ttm_memory.c | 488 +++++++++++++++++++++++++++++++++------ drivers/gpu/drm/ttm/ttm_tt.c | 29 +-- include/drm/ttm/ttm_memory.h | 43 ++-- include/drm/ttm/ttm_module.h | 2 + 6 files changed, 453 insertions(+), 119 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c1c407f7cca3..f16909ceec93 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -70,7 +70,7 @@ static void ttm_bo_release_list(struct kref *list_kref) if (bo->destroy) bo->destroy(bo); else { - ttm_mem_global_free(bdev->mem_glob, bo->acc_size, false); + ttm_mem_global_free(bdev->mem_glob, bo->acc_size); kfree(bo); } } @@ -1065,14 +1065,14 @@ int ttm_buffer_object_create(struct ttm_bo_device *bdev, size_t acc_size = ttm_bo_size(bdev, (size + PAGE_SIZE - 1) >> PAGE_SHIFT); - ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false, false); + ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false); if (unlikely(ret != 0)) return ret; bo = kzalloc(sizeof(*bo), GFP_KERNEL); if (unlikely(bo == NULL)) { - ttm_mem_global_free(mem_glob, acc_size, false); + ttm_mem_global_free(mem_glob, acc_size); return -ENOMEM; } diff --git a/drivers/gpu/drm/ttm/ttm_global.c b/drivers/gpu/drm/ttm/ttm_global.c index 0b14eb1972b8..541744d00d3e 100644 --- a/drivers/gpu/drm/ttm/ttm_global.c +++ b/drivers/gpu/drm/ttm/ttm_global.c @@ -71,7 +71,7 @@ int ttm_global_item_ref(struct ttm_global_reference *ref) mutex_lock(&item->mutex); if (item->refcount == 0) { - item->object = kmalloc(ref->size, GFP_KERNEL); + item->object = kzalloc(ref->size, GFP_KERNEL); if (unlikely(item->object == NULL)) { ret = -ENOMEM; goto out_err; @@ -89,7 +89,6 @@ int ttm_global_item_ref(struct ttm_global_reference *ref) mutex_unlock(&item->mutex); return 0; out_err: - kfree(item->object); mutex_unlock(&item->mutex); item->object = NULL; return ret; @@ -105,7 +104,6 @@ void ttm_global_item_unref(struct ttm_global_reference *ref) BUG_ON(ref->object != item->object); if (--item->refcount == 0) { ref->release(ref); - kfree(item->object); item->object = NULL; } mutex_unlock(&item->mutex); diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c index 87323d4ff68d..62fb5cf0899e 100644 --- a/drivers/gpu/drm/ttm/ttm_memory.c +++ b/drivers/gpu/drm/ttm/ttm_memory.c @@ -26,15 +26,180 @@ **************************************************************************/ #include "ttm/ttm_memory.h" +#include "ttm/ttm_module.h" #include #include #include #include #include -#define TTM_PFX "[TTM] " #define TTM_MEMORY_ALLOC_RETRIES 4 +struct ttm_mem_zone { + struct kobject kobj; + struct ttm_mem_global *glob; + const char *name; + uint64_t zone_mem; + uint64_t emer_mem; + uint64_t max_mem; + uint64_t swap_limit; + uint64_t used_mem; +}; + +static struct attribute ttm_mem_sys = { + .name = "zone_memory", + .mode = S_IRUGO +}; +static struct attribute ttm_mem_emer = { + .name = "emergency_memory", + .mode = S_IRUGO | S_IWUSR +}; +static struct attribute ttm_mem_max = { + .name = "available_memory", + .mode = S_IRUGO | S_IWUSR +}; +static struct attribute ttm_mem_swap = { + .name = "swap_limit", + .mode = S_IRUGO | S_IWUSR +}; +static struct attribute ttm_mem_used = { + .name = "used_memory", + .mode = S_IRUGO +}; + +static void ttm_mem_zone_kobj_release(struct kobject *kobj) +{ + struct ttm_mem_zone *zone = + container_of(kobj, struct ttm_mem_zone, kobj); + + printk(KERN_INFO TTM_PFX + "Zone %7s: Used memory at exit: %llu kiB.\n", + zone->name, (unsigned long long) zone->used_mem >> 10); + kfree(zone); +} + +static ssize_t ttm_mem_zone_show(struct kobject *kobj, + struct attribute *attr, + char *buffer) +{ + struct ttm_mem_zone *zone = + container_of(kobj, struct ttm_mem_zone, kobj); + uint64_t val = 0; + + spin_lock(&zone->glob->lock); + if (attr == &ttm_mem_sys) + val = zone->zone_mem; + else if (attr == &ttm_mem_emer) + val = zone->emer_mem; + else if (attr == &ttm_mem_max) + val = zone->max_mem; + else if (attr == &ttm_mem_swap) + val = zone->swap_limit; + else if (attr == &ttm_mem_used) + val = zone->used_mem; + spin_unlock(&zone->glob->lock); + + return snprintf(buffer, PAGE_SIZE, "%llu\n", + (unsigned long long) val >> 10); +} + +static void ttm_check_swapping(struct ttm_mem_global *glob); + +static ssize_t ttm_mem_zone_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, + size_t size) +{ + struct ttm_mem_zone *zone = + container_of(kobj, struct ttm_mem_zone, kobj); + int chars; + unsigned long val; + uint64_t val64; + + chars = sscanf(buffer, "%lu", &val); + if (chars == 0) + return size; + + val64 = val; + val64 <<= 10; + + spin_lock(&zone->glob->lock); + if (val64 > zone->zone_mem) + val64 = zone->zone_mem; + if (attr == &ttm_mem_emer) { + zone->emer_mem = val64; + if (zone->max_mem > val64) + zone->max_mem = val64; + } else if (attr == &ttm_mem_max) { + zone->max_mem = val64; + if (zone->emer_mem < val64) + zone->emer_mem = val64; + } else if (attr == &ttm_mem_swap) + zone->swap_limit = val64; + spin_unlock(&zone->glob->lock); + + ttm_check_swapping(zone->glob); + + return size; +} + +static struct attribute *ttm_mem_zone_attrs[] = { + &ttm_mem_sys, + &ttm_mem_emer, + &ttm_mem_max, + &ttm_mem_swap, + &ttm_mem_used, + NULL +}; + +static struct sysfs_ops ttm_mem_zone_ops = { + .show = &ttm_mem_zone_show, + .store = &ttm_mem_zone_store +}; + +static struct kobj_type ttm_mem_zone_kobj_type = { + .release = &ttm_mem_zone_kobj_release, + .sysfs_ops = &ttm_mem_zone_ops, + .default_attrs = ttm_mem_zone_attrs, +}; + +static void ttm_mem_global_kobj_release(struct kobject *kobj) +{ + struct ttm_mem_global *glob = + container_of(kobj, struct ttm_mem_global, kobj); + + kfree(glob); +} + +static struct kobj_type ttm_mem_glob_kobj_type = { + .release = &ttm_mem_global_kobj_release, +}; + +static bool ttm_zones_above_swap_target(struct ttm_mem_global *glob, + bool from_wq, uint64_t extra) +{ + unsigned int i; + struct ttm_mem_zone *zone; + uint64_t target; + + for (i = 0; i < glob->num_zones; ++i) { + zone = glob->zones[i]; + + if (from_wq) + target = zone->swap_limit; + else if (capable(CAP_SYS_ADMIN)) + target = zone->emer_mem; + else + target = zone->max_mem; + + target = (extra > target) ? 0ULL : target; + + if (zone->used_mem > target) + return true; + } + return false; +} + /** * At this point we only support a single shrink callback. * Extend this if needed, perhaps using a linked list of callbacks. @@ -42,34 +207,17 @@ * many threads may try to swap out at any given time. */ -static void ttm_shrink(struct ttm_mem_global *glob, bool from_workqueue, +static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq, uint64_t extra) { int ret; struct ttm_mem_shrink *shrink; - uint64_t target; - uint64_t total_target; spin_lock(&glob->lock); if (glob->shrink == NULL) goto out; - if (from_workqueue) { - target = glob->swap_limit; - total_target = glob->total_memory_swap_limit; - } else if (capable(CAP_SYS_ADMIN)) { - total_target = glob->emer_total_memory; - target = glob->emer_memory; - } else { - total_target = glob->max_total_memory; - target = glob->max_memory; - } - - total_target = (extra >= total_target) ? 0 : total_target - extra; - target = (extra >= target) ? 0 : target - extra; - - while (glob->used_memory > target || - glob->used_total_memory > total_target) { + while (ttm_zones_above_swap_target(glob, from_wq, extra)) { shrink = glob->shrink; spin_unlock(&glob->lock); ret = shrink->do_shrink(shrink); @@ -81,6 +229,8 @@ out: spin_unlock(&glob->lock); } + + static void ttm_shrink_work(struct work_struct *work) { struct ttm_mem_global *glob = @@ -89,63 +239,178 @@ static void ttm_shrink_work(struct work_struct *work) ttm_shrink(glob, true, 0ULL); } +static int ttm_mem_init_kernel_zone(struct ttm_mem_global *glob, + const struct sysinfo *si) +{ + struct ttm_mem_zone *zone = kzalloc(sizeof(*zone), GFP_KERNEL); + uint64_t mem; + + if (unlikely(!zone)) + return -ENOMEM; + + mem = si->totalram - si->totalhigh; + mem *= si->mem_unit; + + zone->name = "kernel"; + zone->zone_mem = mem; + zone->max_mem = mem >> 1; + zone->emer_mem = (mem >> 1) + (mem >> 2); + zone->swap_limit = zone->max_mem - (mem >> 3); + zone->used_mem = 0; + zone->glob = glob; + glob->zone_kernel = zone; + glob->zones[glob->num_zones++] = zone; + kobject_init(&zone->kobj, &ttm_mem_zone_kobj_type); + return kobject_add(&zone->kobj, &glob->kobj, zone->name); +} + +#ifdef CONFIG_HIGHMEM +static int ttm_mem_init_highmem_zone(struct ttm_mem_global *glob, + const struct sysinfo *si) +{ + struct ttm_mem_zone *zone = kzalloc(sizeof(*zone), GFP_KERNEL); + uint64_t mem; + + if (unlikely(!zone)) + return -ENOMEM; + + if (si->totalhigh == 0) + return 0; + + mem = si->totalram; + mem *= si->mem_unit; + + zone->name = "highmem"; + zone->zone_mem = mem; + zone->max_mem = mem >> 1; + zone->emer_mem = (mem >> 1) + (mem >> 2); + zone->swap_limit = zone->max_mem - (mem >> 3); + zone->used_mem = 0; + zone->glob = glob; + glob->zone_highmem = zone; + glob->zones[glob->num_zones++] = zone; + kobject_init(&zone->kobj, &ttm_mem_zone_kobj_type); + return kobject_add(&zone->kobj, &glob->kobj, zone->name); +} +#else +static int ttm_mem_init_dma32_zone(struct ttm_mem_global *glob, + const struct sysinfo *si) +{ + struct ttm_mem_zone *zone = kzalloc(sizeof(*zone), GFP_KERNEL); + uint64_t mem; + + if (unlikely(!zone)) + return -ENOMEM; + + mem = si->totalram; + mem *= si->mem_unit; + + /** + * No special dma32 zone needed. + */ + + if (mem <= ((uint64_t) 1ULL << 32)) + return 0; + + /* + * Limit max dma32 memory to 4GB for now + * until we can figure out how big this + * zone really is. + */ + + mem = ((uint64_t) 1ULL << 32); + zone->name = "dma32"; + zone->zone_mem = mem; + zone->max_mem = mem >> 1; + zone->emer_mem = (mem >> 1) + (mem >> 2); + zone->swap_limit = zone->max_mem - (mem >> 3); + zone->used_mem = 0; + zone->glob = glob; + glob->zone_dma32 = zone; + glob->zones[glob->num_zones++] = zone; + kobject_init(&zone->kobj, &ttm_mem_zone_kobj_type); + return kobject_add(&zone->kobj, &glob->kobj, zone->name); +} +#endif + int ttm_mem_global_init(struct ttm_mem_global *glob) { struct sysinfo si; - uint64_t mem; + int ret; + int i; + struct ttm_mem_zone *zone; spin_lock_init(&glob->lock); glob->swap_queue = create_singlethread_workqueue("ttm_swap"); INIT_WORK(&glob->work, ttm_shrink_work); init_waitqueue_head(&glob->queue); + kobject_init(&glob->kobj, &ttm_mem_glob_kobj_type); + ret = kobject_add(&glob->kobj, + ttm_get_kobj(), + "memory_accounting"); + if (unlikely(ret != 0)) + goto out_no_zone; si_meminfo(&si); - mem = si.totalram - si.totalhigh; - mem *= si.mem_unit; - - glob->max_memory = mem >> 1; - glob->emer_memory = (mem >> 1) + (mem >> 2); - glob->swap_limit = glob->max_memory - (mem >> 3); - glob->used_memory = 0; - glob->used_total_memory = 0; - glob->shrink = NULL; - - mem = si.totalram; - mem *= si.mem_unit; - - glob->max_total_memory = mem >> 1; - glob->emer_total_memory = (mem >> 1) + (mem >> 2); - - glob->total_memory_swap_limit = glob->max_total_memory - (mem >> 3); - - printk(KERN_INFO TTM_PFX "TTM available graphics memory: %llu MiB\n", - glob->max_total_memory >> 20); - printk(KERN_INFO TTM_PFX "TTM available object memory: %llu MiB\n", - glob->max_memory >> 20); - + ret = ttm_mem_init_kernel_zone(glob, &si); + if (unlikely(ret != 0)) + goto out_no_zone; +#ifdef CONFIG_HIGHMEM + ret = ttm_mem_init_highmem_zone(glob, &si); + if (unlikely(ret != 0)) + goto out_no_zone; +#else + ret = ttm_mem_init_dma32_zone(glob, &si); + if (unlikely(ret != 0)) + goto out_no_zone; +#endif + for (i = 0; i < glob->num_zones; ++i) { + zone = glob->zones[i]; + printk(KERN_INFO TTM_PFX + "Zone %7s: Available graphics memory: %llu kiB.\n", + zone->name, (unsigned long long) zone->max_mem >> 10); + } return 0; +out_no_zone: + ttm_mem_global_release(glob); + return ret; } EXPORT_SYMBOL(ttm_mem_global_init); void ttm_mem_global_release(struct ttm_mem_global *glob) { - printk(KERN_INFO TTM_PFX "Used total memory is %llu bytes.\n", - (unsigned long long)glob->used_total_memory); + unsigned int i; + struct ttm_mem_zone *zone; + flush_workqueue(glob->swap_queue); destroy_workqueue(glob->swap_queue); glob->swap_queue = NULL; + for (i = 0; i < glob->num_zones; ++i) { + zone = glob->zones[i]; + kobject_del(&zone->kobj); + kobject_put(&zone->kobj); + } + kobject_del(&glob->kobj); + kobject_put(&glob->kobj); } EXPORT_SYMBOL(ttm_mem_global_release); -static inline void ttm_check_swapping(struct ttm_mem_global *glob) +static void ttm_check_swapping(struct ttm_mem_global *glob) { - bool needs_swapping; + bool needs_swapping = false; + unsigned int i; + struct ttm_mem_zone *zone; spin_lock(&glob->lock); - needs_swapping = (glob->used_memory > glob->swap_limit || - glob->used_total_memory > - glob->total_memory_swap_limit); + for (i = 0; i < glob->num_zones; ++i) { + zone = glob->zones[i]; + if (zone->used_mem > zone->swap_limit) { + needs_swapping = true; + break; + } + } + spin_unlock(&glob->lock); if (unlikely(needs_swapping)) @@ -153,44 +418,60 @@ static inline void ttm_check_swapping(struct ttm_mem_global *glob) } -void ttm_mem_global_free(struct ttm_mem_global *glob, - uint64_t amount, bool himem) +static void ttm_mem_global_free_zone(struct ttm_mem_global *glob, + struct ttm_mem_zone *single_zone, + uint64_t amount) { + unsigned int i; + struct ttm_mem_zone *zone; + spin_lock(&glob->lock); - glob->used_total_memory -= amount; - if (!himem) - glob->used_memory -= amount; - wake_up_all(&glob->queue); + for (i = 0; i < glob->num_zones; ++i) { + zone = glob->zones[i]; + if (single_zone && zone != single_zone) + continue; + zone->used_mem -= amount; + } spin_unlock(&glob->lock); } +void ttm_mem_global_free(struct ttm_mem_global *glob, + uint64_t amount) +{ + return ttm_mem_global_free_zone(glob, NULL, amount); +} + static int ttm_mem_global_reserve(struct ttm_mem_global *glob, - uint64_t amount, bool himem, bool reserve) + struct ttm_mem_zone *single_zone, + uint64_t amount, bool reserve) { uint64_t limit; - uint64_t lomem_limit; int ret = -ENOMEM; + unsigned int i; + struct ttm_mem_zone *zone; spin_lock(&glob->lock); + for (i = 0; i < glob->num_zones; ++i) { + zone = glob->zones[i]; + if (single_zone && zone != single_zone) + continue; - if (capable(CAP_SYS_ADMIN)) { - limit = glob->emer_total_memory; - lomem_limit = glob->emer_memory; - } else { - limit = glob->max_total_memory; - lomem_limit = glob->max_memory; - } + limit = (capable(CAP_SYS_ADMIN)) ? + zone->emer_mem : zone->max_mem; - if (unlikely(glob->used_total_memory + amount > limit)) - goto out_unlock; - if (unlikely(!himem && glob->used_memory + amount > lomem_limit)) - goto out_unlock; + if (zone->used_mem > limit) + goto out_unlock; + } if (reserve) { - glob->used_total_memory += amount; - if (!himem) - glob->used_memory += amount; + for (i = 0; i < glob->num_zones; ++i) { + zone = glob->zones[i]; + if (single_zone && zone != single_zone) + continue; + zone->used_mem += amount; + } } + ret = 0; out_unlock: spin_unlock(&glob->lock); @@ -199,12 +480,17 @@ out_unlock: return ret; } -int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory, - bool no_wait, bool interruptible, bool himem) + +static int ttm_mem_global_alloc_zone(struct ttm_mem_global *glob, + struct ttm_mem_zone *single_zone, + uint64_t memory, + bool no_wait, bool interruptible) { int count = TTM_MEMORY_ALLOC_RETRIES; - while (unlikely(ttm_mem_global_reserve(glob, memory, himem, true) + while (unlikely(ttm_mem_global_reserve(glob, + single_zone, + memory, true) != 0)) { if (no_wait) return -ENOMEM; @@ -216,6 +502,56 @@ int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory, return 0; } +int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory, + bool no_wait, bool interruptible) +{ + /** + * Normal allocations of kernel memory are registered in + * all zones. + */ + + return ttm_mem_global_alloc_zone(glob, NULL, memory, no_wait, + interruptible); +} + +int ttm_mem_global_alloc_page(struct ttm_mem_global *glob, + struct page *page, + bool no_wait, bool interruptible) +{ + + struct ttm_mem_zone *zone = NULL; + + /** + * Page allocations may be registed in a single zone + * only if highmem or !dma32. + */ + +#ifdef CONFIG_HIGHMEM + if (PageHighMem(page) && glob->zone_highmem != NULL) + zone = glob->zone_highmem; +#else + if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL) + zone = glob->zone_kernel; +#endif + return ttm_mem_global_alloc_zone(glob, zone, PAGE_SIZE, no_wait, + interruptible); +} + +void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page) +{ + struct ttm_mem_zone *zone = NULL; + +#ifdef CONFIG_HIGHMEM + if (PageHighMem(page) && glob->zone_highmem != NULL) + zone = glob->zone_highmem; +#else + if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL) + zone = glob->zone_kernel; +#endif + ttm_mem_global_free_zone(glob, zone, PAGE_SIZE); +} + + size_t ttm_round_pot(size_t size) { if ((size & (size - 1)) == 0) diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 75dc8bd24592..4e1e2566d519 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -166,7 +166,7 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm) set_page_dirty_lock(page); ttm->pages[i] = NULL; - ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE, false); + ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE); put_page(page); } ttm->state = tt_unpopulated; @@ -187,21 +187,14 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index) if (!p) return NULL; - if (PageHighMem(p)) { - ret = - ttm_mem_global_alloc(mem_glob, PAGE_SIZE, - false, false, true); - if (unlikely(ret != 0)) - goto out_err; + ret = ttm_mem_global_alloc_page(mem_glob, p, false, false); + if (unlikely(ret != 0)) + goto out_err; + + if (PageHighMem(p)) ttm->pages[--ttm->first_himem_page] = p; - } else { - ret = - ttm_mem_global_alloc(mem_glob, PAGE_SIZE, - false, false, false); - if (unlikely(ret != 0)) - goto out_err; + else ttm->pages[++ttm->last_lomem_page] = p; - } } return p; out_err: @@ -355,8 +348,8 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm) printk(KERN_ERR TTM_PFX "Erroneous page count. " "Leaking pages.\n"); - ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE, - PageHighMem(cur_page)); + ttm_mem_global_free_page(ttm->bdev->mem_glob, + cur_page); __free_page(cur_page); } } @@ -411,7 +404,7 @@ int ttm_tt_set_user(struct ttm_tt *ttm, */ ret = ttm_mem_global_alloc(mem_glob, num_pages * PAGE_SIZE, - false, false, false); + false, false); if (unlikely(ret != 0)) return ret; @@ -422,7 +415,7 @@ int ttm_tt_set_user(struct ttm_tt *ttm, if (ret != num_pages && write) { ttm_tt_free_user_pages(ttm); - ttm_mem_global_free(mem_glob, num_pages * PAGE_SIZE, false); + ttm_mem_global_free(mem_glob, num_pages * PAGE_SIZE); return -ENOMEM; } diff --git a/include/drm/ttm/ttm_memory.h b/include/drm/ttm/ttm_memory.h index d8b8f042c4f1..6983a7cf4da4 100644 --- a/include/drm/ttm/ttm_memory.h +++ b/include/drm/ttm/ttm_memory.h @@ -32,6 +32,7 @@ #include #include #include +#include /** * struct ttm_mem_shrink - callback to shrink TTM memory usage. @@ -60,34 +61,33 @@ struct ttm_mem_shrink { * @queue: Wait queue for processes suspended waiting for memory. * @lock: Lock to protect the @shrink - and the memory accounting members, * that is, essentially the whole structure with some exceptions. - * @emer_memory: Lowmem memory limit available for root. - * @max_memory: Lowmem memory limit available for non-root. - * @swap_limit: Lowmem memory limit where the shrink workqueue kicks in. - * @used_memory: Currently used lowmem memory. - * @used_total_memory: Currently used total (lowmem + highmem) memory. - * @total_memory_swap_limit: Total memory limit where the shrink workqueue - * kicks in. - * @max_total_memory: Total memory available to non-root processes. - * @emer_total_memory: Total memory available to root processes. + * @zones: Array of pointers to accounting zones. + * @num_zones: Number of populated entries in the @zones array. + * @zone_kernel: Pointer to the kernel zone. + * @zone_highmem: Pointer to the highmem zone if there is one. + * @zone_dma32: Pointer to the dma32 zone if there is one. * * Note that this structure is not per device. It should be global for all * graphics devices. */ +#define TTM_MEM_MAX_ZONES 2 +struct ttm_mem_zone; struct ttm_mem_global { + struct kobject kobj; struct ttm_mem_shrink *shrink; struct workqueue_struct *swap_queue; struct work_struct work; wait_queue_head_t queue; spinlock_t lock; - uint64_t emer_memory; - uint64_t max_memory; - uint64_t swap_limit; - uint64_t used_memory; - uint64_t used_total_memory; - uint64_t total_memory_swap_limit; - uint64_t max_total_memory; - uint64_t emer_total_memory; + struct ttm_mem_zone *zones[TTM_MEM_MAX_ZONES]; + unsigned int num_zones; + struct ttm_mem_zone *zone_kernel; +#ifdef CONFIG_HIGHMEM + struct ttm_mem_zone *zone_highmem; +#else + struct ttm_mem_zone *zone_dma32; +#endif }; /** @@ -146,8 +146,13 @@ static inline void ttm_mem_unregister_shrink(struct ttm_mem_global *glob, extern int ttm_mem_global_init(struct ttm_mem_global *glob); extern void ttm_mem_global_release(struct ttm_mem_global *glob); extern int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory, - bool no_wait, bool interruptible, bool himem); + bool no_wait, bool interruptible); extern void ttm_mem_global_free(struct ttm_mem_global *glob, - uint64_t amount, bool himem); + uint64_t amount); +extern int ttm_mem_global_alloc_page(struct ttm_mem_global *glob, + struct page *page, + bool no_wait, bool interruptible); +extern void ttm_mem_global_free_page(struct ttm_mem_global *glob, + struct page *page); extern size_t ttm_round_pot(size_t size); #endif diff --git a/include/drm/ttm/ttm_module.h b/include/drm/ttm/ttm_module.h index 889a4c7958ae..0a72ac7c7e58 100644 --- a/include/drm/ttm/ttm_module.h +++ b/include/drm/ttm/ttm_module.h @@ -32,6 +32,7 @@ #define _TTM_MODULE_H_ #include +struct kobject; #define TTM_PFX "[TTM]" @@ -54,5 +55,6 @@ extern void ttm_global_init(void); extern void ttm_global_release(void); extern int ttm_global_item_ref(struct ttm_global_reference *ref); extern void ttm_global_item_unref(struct ttm_global_reference *ref); +extern struct kobject *ttm_get_kobj(void); #endif /* _TTM_MODULE_H_ */ -- cgit v1.2.3 From a987fcaa805fcb24ba885c2e29fd4fdb6816f08f Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 18 Aug 2009 16:51:56 +0200 Subject: ttm: Make parts of a struct ttm_bo_device global. Common resources, like memory accounting and swap lists should be global and not per device. Introduce a struct ttm_bo_global to accomodate this, and register it with sysfs. Add a small sysfs interface to return the number of active buffer objects. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_object.h | 1 + drivers/gpu/drm/radeon/radeon_ttm.c | 33 +++- drivers/gpu/drm/ttm/ttm_bo.c | 292 ++++++++++++++++++++++----------- drivers/gpu/drm/ttm/ttm_bo_util.c | 4 +- drivers/gpu/drm/ttm/ttm_tt.c | 12 +- include/drm/ttm/ttm_bo_api.h | 1 + include/drm/ttm/ttm_bo_driver.h | 94 ++++++++--- 7 files changed, 296 insertions(+), 141 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 473e4775dc5a..10e8af6bb456 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -37,6 +37,7 @@ * TTM. */ struct radeon_mman { + struct ttm_bo_global_ref bo_global_ref; struct ttm_global_reference mem_global_ref; bool mem_global_referenced; struct ttm_bo_device bdev; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 1227a97f5169..343b6d6b99c6 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -77,9 +77,25 @@ static int radeon_ttm_global_init(struct radeon_device *rdev) global_ref->release = &radeon_ttm_mem_global_release; r = ttm_global_item_ref(global_ref); if (r != 0) { - DRM_ERROR("Failed referencing a global TTM memory object.\n"); + DRM_ERROR("Failed setting up TTM memory accounting " + "subsystem.\n"); return r; } + + rdev->mman.bo_global_ref.mem_glob = + rdev->mman.mem_global_ref.object; + global_ref = &rdev->mman.bo_global_ref.ref; + global_ref->global_type = TTM_GLOBAL_TTM_BO; + global_ref->size = sizeof(struct ttm_mem_global); + global_ref->init = &ttm_bo_global_init; + global_ref->release = &ttm_bo_global_release; + r = ttm_global_item_ref(global_ref); + if (r != 0) { + DRM_ERROR("Failed setting up TTM BO subsystem.\n"); + ttm_global_item_unref(&rdev->mman.mem_global_ref); + return r; + } + rdev->mman.mem_global_referenced = true; return 0; } @@ -87,6 +103,7 @@ static int radeon_ttm_global_init(struct radeon_device *rdev) static void radeon_ttm_global_fini(struct radeon_device *rdev) { if (rdev->mman.mem_global_referenced) { + ttm_global_item_unref(&rdev->mman.bo_global_ref.ref); ttm_global_item_unref(&rdev->mman.mem_global_ref); rdev->mman.mem_global_referenced = false; } @@ -286,9 +303,11 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, r = ttm_bo_move_ttm(bo, true, no_wait, new_mem); out_cleanup: if (tmp_mem.mm_node) { - spin_lock(&rdev->mman.bdev.lru_lock); + struct ttm_bo_global *glob = rdev->mman.bdev.glob; + + spin_lock(&glob->lru_lock); drm_mm_put_block(tmp_mem.mm_node); - spin_unlock(&rdev->mman.bdev.lru_lock); + spin_unlock(&glob->lru_lock); return r; } return r; @@ -323,9 +342,11 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo, } out_cleanup: if (tmp_mem.mm_node) { - spin_lock(&rdev->mman.bdev.lru_lock); + struct ttm_bo_global *glob = rdev->mman.bdev.glob; + + spin_lock(&glob->lru_lock); drm_mm_put_block(tmp_mem.mm_node); - spin_unlock(&rdev->mman.bdev.lru_lock); + spin_unlock(&glob->lru_lock); return r; } return r; @@ -441,7 +462,7 @@ int radeon_ttm_init(struct radeon_device *rdev) } /* No others user of address space so set it to 0 */ r = ttm_bo_device_init(&rdev->mman.bdev, - rdev->mman.mem_global_ref.object, + rdev->mman.bo_global_ref.ref.object, &radeon_bo_driver, DRM_FILE_PAGE_OFFSET); if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index f16909ceec93..0d0b1b7afbcf 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -45,6 +45,39 @@ static int ttm_bo_setup_vm(struct ttm_buffer_object *bo); static void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo); static int ttm_bo_swapout(struct ttm_mem_shrink *shrink); +static void ttm_bo_global_kobj_release(struct kobject *kobj); + +static struct attribute ttm_bo_count = { + .name = "bo_count", + .mode = S_IRUGO +}; + +static ssize_t ttm_bo_global_show(struct kobject *kobj, + struct attribute *attr, + char *buffer) +{ + struct ttm_bo_global *glob = + container_of(kobj, struct ttm_bo_global, kobj); + + return snprintf(buffer, PAGE_SIZE, "%lu\n", + (unsigned long) atomic_read(&glob->bo_count)); +} + +static struct attribute *ttm_bo_global_attrs[] = { + &ttm_bo_count, + NULL +}; + +static struct sysfs_ops ttm_bo_global_ops = { + .show = &ttm_bo_global_show +}; + +static struct kobj_type ttm_bo_glob_kobj_type = { + .release = &ttm_bo_global_kobj_release, + .sysfs_ops = &ttm_bo_global_ops, + .default_attrs = ttm_bo_global_attrs +}; + static inline uint32_t ttm_bo_type_flags(unsigned type) { @@ -67,10 +100,11 @@ static void ttm_bo_release_list(struct kref *list_kref) if (bo->ttm) ttm_tt_destroy(bo->ttm); + atomic_dec(&bo->glob->bo_count); if (bo->destroy) bo->destroy(bo); else { - ttm_mem_global_free(bdev->mem_glob, bo->acc_size); + ttm_mem_global_free(bdev->glob->mem_glob, bo->acc_size); kfree(bo); } } @@ -107,7 +141,7 @@ static void ttm_bo_add_to_lru(struct ttm_buffer_object *bo) kref_get(&bo->list_kref); if (bo->ttm != NULL) { - list_add_tail(&bo->swap, &bdev->swap_lru); + list_add_tail(&bo->swap, &bo->glob->swap_lru); kref_get(&bo->list_kref); } } @@ -142,7 +176,7 @@ int ttm_bo_reserve_locked(struct ttm_buffer_object *bo, bool interruptible, bool no_wait, bool use_sequence, uint32_t sequence) { - struct ttm_bo_device *bdev = bo->bdev; + struct ttm_bo_global *glob = bo->glob; int ret; while (unlikely(atomic_cmpxchg(&bo->reserved, 0, 1) != 0)) { @@ -154,9 +188,9 @@ int ttm_bo_reserve_locked(struct ttm_buffer_object *bo, if (no_wait) return -EBUSY; - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); ret = ttm_bo_wait_unreserved(bo, interruptible); - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); if (unlikely(ret)) return ret; @@ -182,16 +216,16 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo, bool interruptible, bool no_wait, bool use_sequence, uint32_t sequence) { - struct ttm_bo_device *bdev = bo->bdev; + struct ttm_bo_global *glob = bo->glob; int put_count = 0; int ret; - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); ret = ttm_bo_reserve_locked(bo, interruptible, no_wait, use_sequence, sequence); if (likely(ret == 0)) put_count = ttm_bo_del_from_lru(bo); - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); while (put_count--) kref_put(&bo->list_kref, ttm_bo_ref_bug); @@ -201,13 +235,13 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo, void ttm_bo_unreserve(struct ttm_buffer_object *bo) { - struct ttm_bo_device *bdev = bo->bdev; + struct ttm_bo_global *glob = bo->glob; - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); ttm_bo_add_to_lru(bo); atomic_set(&bo->reserved, 0); wake_up_all(&bo->event_queue); - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); } EXPORT_SYMBOL(ttm_bo_unreserve); @@ -218,6 +252,7 @@ EXPORT_SYMBOL(ttm_bo_unreserve); static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc) { struct ttm_bo_device *bdev = bo->bdev; + struct ttm_bo_global *glob = bo->glob; int ret = 0; uint32_t page_flags = 0; @@ -230,14 +265,14 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc) page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC; case ttm_bo_type_kernel: bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT, - page_flags, bdev->dummy_read_page); + page_flags, glob->dummy_read_page); if (unlikely(bo->ttm == NULL)) ret = -ENOMEM; break; case ttm_bo_type_user: bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT, page_flags | TTM_PAGE_FLAG_USER, - bdev->dummy_read_page); + glob->dummy_read_page); if (unlikely(bo->ttm == NULL)) ret = -ENOMEM; break; @@ -355,6 +390,7 @@ out_err: static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all) { struct ttm_bo_device *bdev = bo->bdev; + struct ttm_bo_global *glob = bo->glob; struct ttm_bo_driver *driver = bdev->driver; int ret; @@ -366,7 +402,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all) spin_unlock(&bo->lock); - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); ret = ttm_bo_reserve_locked(bo, false, false, false, 0); BUG_ON(ret); if (bo->ttm) @@ -381,7 +417,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all) bo->mem.mm_node = NULL; } put_count = ttm_bo_del_from_lru(bo); - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); atomic_set(&bo->reserved, 0); @@ -391,14 +427,14 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all) return 0; } - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); if (list_empty(&bo->ddestroy)) { void *sync_obj = bo->sync_obj; void *sync_obj_arg = bo->sync_obj_arg; kref_get(&bo->list_kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy); - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); spin_unlock(&bo->lock); if (sync_obj) @@ -408,7 +444,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all) ret = 0; } else { - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); spin_unlock(&bo->lock); ret = -EBUSY; } @@ -423,11 +459,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all) static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all) { + struct ttm_bo_global *glob = bdev->glob; struct ttm_buffer_object *entry, *nentry; struct list_head *list, *next; int ret; - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); list_for_each_safe(list, next, &bdev->ddestroy) { entry = list_entry(list, struct ttm_buffer_object, ddestroy); nentry = NULL; @@ -444,16 +481,16 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all) } kref_get(&entry->list_kref); - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); ret = ttm_bo_cleanup_refs(entry, remove_all); kref_put(&entry->list_kref, ttm_bo_release_list); - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); if (nentry) { bool next_onlist = !list_empty(next); - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); kref_put(&nentry->list_kref, ttm_bo_release_list); - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); /* * Someone might have raced us and removed the * next entry from the list. We don't bother restarting @@ -467,7 +504,7 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all) break; } ret = !list_empty(&bdev->ddestroy); - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); return ret; } @@ -517,6 +554,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, unsigned mem_type, { int ret = 0; struct ttm_bo_device *bdev = bo->bdev; + struct ttm_bo_global *glob = bo->glob; struct ttm_mem_reg evict_mem; uint32_t proposed_placement; @@ -565,12 +603,12 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, unsigned mem_type, goto out; } - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); if (evict_mem.mm_node) { drm_mm_put_block(evict_mem.mm_node); evict_mem.mm_node = NULL; } - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); bo->evicted = true; out: return ret; @@ -585,6 +623,7 @@ static int ttm_bo_mem_force_space(struct ttm_bo_device *bdev, uint32_t mem_type, bool interruptible, bool no_wait) { + struct ttm_bo_global *glob = bdev->glob; struct drm_mm_node *node; struct ttm_buffer_object *entry; struct ttm_mem_type_manager *man = &bdev->man[mem_type]; @@ -598,7 +637,7 @@ retry_pre_get: if (unlikely(ret != 0)) return ret; - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); do { node = drm_mm_search_free(&man->manager, num_pages, mem->page_alignment, 1); @@ -619,7 +658,7 @@ retry_pre_get: if (likely(ret == 0)) put_count = ttm_bo_del_from_lru(entry); - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); if (unlikely(ret != 0)) return ret; @@ -635,21 +674,21 @@ retry_pre_get: if (ret) return ret; - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); } while (1); if (!node) { - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); return -ENOMEM; } node = drm_mm_get_block_atomic(node, num_pages, mem->page_alignment); if (unlikely(!node)) { - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); goto retry_pre_get; } - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); mem->mm_node = node; mem->mem_type = mem_type; return 0; @@ -697,6 +736,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, bool interruptible, bool no_wait) { struct ttm_bo_device *bdev = bo->bdev; + struct ttm_bo_global *glob = bo->glob; struct ttm_mem_type_manager *man; uint32_t num_prios = bdev->driver->num_mem_type_prio; @@ -733,20 +773,20 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, if (unlikely(ret)) return ret; - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); node = drm_mm_search_free(&man->manager, mem->num_pages, mem->page_alignment, 1); if (unlikely(!node)) { - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); break; } node = drm_mm_get_block_atomic(node, mem->num_pages, mem-> page_alignment); - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); } while (!node); } if (node) @@ -816,7 +856,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, uint32_t proposed_placement, bool interruptible, bool no_wait) { - struct ttm_bo_device *bdev = bo->bdev; + struct ttm_bo_global *glob = bo->glob; int ret = 0; struct ttm_mem_reg mem; @@ -852,9 +892,9 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, out_unlock: if (ret && mem.mm_node) { - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); drm_mm_put_block(mem.mm_node); - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); } return ret; } @@ -990,6 +1030,7 @@ int ttm_buffer_object_init(struct ttm_bo_device *bdev, INIT_LIST_HEAD(&bo->ddestroy); INIT_LIST_HEAD(&bo->swap); bo->bdev = bdev; + bo->glob = bdev->glob; bo->type = type; bo->num_pages = num_pages; bo->mem.mem_type = TTM_PL_SYSTEM; @@ -1002,6 +1043,7 @@ int ttm_buffer_object_init(struct ttm_bo_device *bdev, bo->seq_valid = false; bo->persistant_swap_storage = persistant_swap_storage; bo->acc_size = acc_size; + atomic_inc(&bo->glob->bo_count); ret = ttm_bo_check_placement(bo, flags, 0ULL); if (unlikely(ret != 0)) @@ -1040,13 +1082,13 @@ out_err: } EXPORT_SYMBOL(ttm_buffer_object_init); -static inline size_t ttm_bo_size(struct ttm_bo_device *bdev, +static inline size_t ttm_bo_size(struct ttm_bo_global *glob, unsigned long num_pages) { size_t page_array_size = (num_pages * sizeof(void *) + PAGE_SIZE - 1) & PAGE_MASK; - return bdev->ttm_bo_size + 2 * page_array_size; + return glob->ttm_bo_size + 2 * page_array_size; } int ttm_buffer_object_create(struct ttm_bo_device *bdev, @@ -1061,10 +1103,10 @@ int ttm_buffer_object_create(struct ttm_bo_device *bdev, { struct ttm_buffer_object *bo; int ret; - struct ttm_mem_global *mem_glob = bdev->mem_glob; + struct ttm_mem_global *mem_glob = bdev->glob->mem_glob; size_t acc_size = - ttm_bo_size(bdev, (size + PAGE_SIZE - 1) >> PAGE_SHIFT); + ttm_bo_size(bdev->glob, (size + PAGE_SIZE - 1) >> PAGE_SHIFT); ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false); if (unlikely(ret != 0)) return ret; @@ -1118,6 +1160,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev, struct list_head *head, unsigned mem_type, bool allow_errors) { + struct ttm_bo_global *glob = bdev->glob; struct ttm_buffer_object *entry; int ret; int put_count; @@ -1126,30 +1169,31 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev, * Can't use standard list traversal since we're unlocking. */ - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); while (!list_empty(head)) { entry = list_first_entry(head, struct ttm_buffer_object, lru); kref_get(&entry->list_kref); ret = ttm_bo_reserve_locked(entry, false, false, false, 0); put_count = ttm_bo_del_from_lru(entry); - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); while (put_count--) kref_put(&entry->list_kref, ttm_bo_ref_bug); BUG_ON(ret); ret = ttm_bo_leave_list(entry, mem_type, allow_errors); ttm_bo_unreserve(entry); kref_put(&entry->list_kref, ttm_bo_release_list); - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); } - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); return 0; } int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type) { + struct ttm_bo_global *glob = bdev->glob; struct ttm_mem_type_manager *man = &bdev->man[mem_type]; int ret = -EINVAL; @@ -1171,13 +1215,13 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type) if (mem_type > 0) { ttm_bo_force_list_clean(bdev, &man->lru, mem_type, false); - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); if (drm_mm_clean(&man->manager)) drm_mm_takedown(&man->manager); else ret = -EBUSY; - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); } return ret; @@ -1251,11 +1295,83 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, } EXPORT_SYMBOL(ttm_bo_init_mm); +static void ttm_bo_global_kobj_release(struct kobject *kobj) +{ + struct ttm_bo_global *glob = + container_of(kobj, struct ttm_bo_global, kobj); + + printk(KERN_INFO TTM_PFX "Freeing bo global.\n"); + ttm_mem_unregister_shrink(glob->mem_glob, &glob->shrink); + __free_page(glob->dummy_read_page); + kfree(glob); +} + +void ttm_bo_global_release(struct ttm_global_reference *ref) +{ + struct ttm_bo_global *glob = ref->object; + + kobject_del(&glob->kobj); + kobject_put(&glob->kobj); +} +EXPORT_SYMBOL(ttm_bo_global_release); + +int ttm_bo_global_init(struct ttm_global_reference *ref) +{ + struct ttm_bo_global_ref *bo_ref = + container_of(ref, struct ttm_bo_global_ref, ref); + struct ttm_bo_global *glob = ref->object; + int ret; + + mutex_init(&glob->device_list_mutex); + spin_lock_init(&glob->lru_lock); + glob->mem_glob = bo_ref->mem_glob; + glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32); + + if (unlikely(glob->dummy_read_page == NULL)) { + ret = -ENOMEM; + goto out_no_drp; + } + + INIT_LIST_HEAD(&glob->swap_lru); + INIT_LIST_HEAD(&glob->device_list); + + ttm_mem_init_shrink(&glob->shrink, ttm_bo_swapout); + ret = ttm_mem_register_shrink(glob->mem_glob, &glob->shrink); + if (unlikely(ret != 0)) { + printk(KERN_ERR TTM_PFX + "Could not register buffer object swapout.\n"); + goto out_no_shrink; + } + + glob->ttm_bo_extra_size = + ttm_round_pot(sizeof(struct ttm_tt)) + + ttm_round_pot(sizeof(struct ttm_backend)); + + glob->ttm_bo_size = glob->ttm_bo_extra_size + + ttm_round_pot(sizeof(struct ttm_buffer_object)); + + atomic_set(&glob->bo_count, 0); + + kobject_init(&glob->kobj, &ttm_bo_glob_kobj_type); + ret = kobject_add(&glob->kobj, ttm_get_kobj(), "buffer_objects"); + if (unlikely(ret != 0)) + kobject_put(&glob->kobj); + return ret; +out_no_shrink: + __free_page(glob->dummy_read_page); +out_no_drp: + kfree(glob); + return ret; +} +EXPORT_SYMBOL(ttm_bo_global_init); + + int ttm_bo_device_release(struct ttm_bo_device *bdev) { int ret = 0; unsigned i = TTM_NUM_MEM_TYPES; struct ttm_mem_type_manager *man; + struct ttm_bo_global *glob = bdev->glob; while (i--) { man = &bdev->man[i]; @@ -1271,98 +1387,74 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev) } } + mutex_lock(&glob->device_list_mutex); + list_del(&bdev->device_list); + mutex_unlock(&glob->device_list_mutex); + if (!cancel_delayed_work(&bdev->wq)) flush_scheduled_work(); while (ttm_bo_delayed_delete(bdev, true)) ; - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); if (list_empty(&bdev->ddestroy)) TTM_DEBUG("Delayed destroy list was clean\n"); if (list_empty(&bdev->man[0].lru)) TTM_DEBUG("Swap list was clean\n"); - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); - ttm_mem_unregister_shrink(bdev->mem_glob, &bdev->shrink); BUG_ON(!drm_mm_clean(&bdev->addr_space_mm)); write_lock(&bdev->vm_lock); drm_mm_takedown(&bdev->addr_space_mm); write_unlock(&bdev->vm_lock); - __free_page(bdev->dummy_read_page); return ret; } EXPORT_SYMBOL(ttm_bo_device_release); -/* - * This function is intended to be called on drm driver load. - * If you decide to call it from firstopen, you must protect the call - * from a potentially racing ttm_bo_driver_finish in lastclose. - * (This may happen on X server restart). - */ - int ttm_bo_device_init(struct ttm_bo_device *bdev, - struct ttm_mem_global *mem_glob, - struct ttm_bo_driver *driver, uint64_t file_page_offset) + struct ttm_bo_global *glob, + struct ttm_bo_driver *driver, + uint64_t file_page_offset) { int ret = -EINVAL; - bdev->dummy_read_page = NULL; rwlock_init(&bdev->vm_lock); - spin_lock_init(&bdev->lru_lock); + spin_lock_init(&glob->lru_lock); bdev->driver = driver; - bdev->mem_glob = mem_glob; memset(bdev->man, 0, sizeof(bdev->man)); - bdev->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32); - if (unlikely(bdev->dummy_read_page == NULL)) { - ret = -ENOMEM; - goto out_err0; - } - /* * Initialize the system memory buffer type. * Other types need to be driver / IOCTL initialized. */ ret = ttm_bo_init_mm(bdev, TTM_PL_SYSTEM, 0, 0); if (unlikely(ret != 0)) - goto out_err1; + goto out_no_sys; bdev->addr_space_rb = RB_ROOT; ret = drm_mm_init(&bdev->addr_space_mm, file_page_offset, 0x10000000); if (unlikely(ret != 0)) - goto out_err2; + goto out_no_addr_mm; INIT_DELAYED_WORK(&bdev->wq, ttm_bo_delayed_workqueue); bdev->nice_mode = true; INIT_LIST_HEAD(&bdev->ddestroy); - INIT_LIST_HEAD(&bdev->swap_lru); bdev->dev_mapping = NULL; - ttm_mem_init_shrink(&bdev->shrink, ttm_bo_swapout); - ret = ttm_mem_register_shrink(mem_glob, &bdev->shrink); - if (unlikely(ret != 0)) { - printk(KERN_ERR TTM_PFX - "Could not register buffer object swapout.\n"); - goto out_err2; - } + bdev->glob = glob; - bdev->ttm_bo_extra_size = - ttm_round_pot(sizeof(struct ttm_tt)) + - ttm_round_pot(sizeof(struct ttm_backend)); - - bdev->ttm_bo_size = bdev->ttm_bo_extra_size + - ttm_round_pot(sizeof(struct ttm_buffer_object)); + mutex_lock(&glob->device_list_mutex); + list_add_tail(&bdev->device_list, &glob->device_list); + mutex_unlock(&glob->device_list_mutex); return 0; -out_err2: +out_no_addr_mm: ttm_bo_clean_mm(bdev, 0); -out_err1: - __free_page(bdev->dummy_read_page); -out_err0: +out_no_sys: return ret; } EXPORT_SYMBOL(ttm_bo_device_init); @@ -1607,21 +1699,21 @@ void ttm_bo_synccpu_write_release(struct ttm_buffer_object *bo) static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) { - struct ttm_bo_device *bdev = - container_of(shrink, struct ttm_bo_device, shrink); + struct ttm_bo_global *glob = + container_of(shrink, struct ttm_bo_global, shrink); struct ttm_buffer_object *bo; int ret = -EBUSY; int put_count; uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM); - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); while (ret == -EBUSY) { - if (unlikely(list_empty(&bdev->swap_lru))) { - spin_unlock(&bdev->lru_lock); + if (unlikely(list_empty(&glob->swap_lru))) { + spin_unlock(&glob->lru_lock); return -EBUSY; } - bo = list_first_entry(&bdev->swap_lru, + bo = list_first_entry(&glob->swap_lru, struct ttm_buffer_object, swap); kref_get(&bo->list_kref); @@ -1633,16 +1725,16 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) ret = ttm_bo_reserve_locked(bo, false, true, false, 0); if (unlikely(ret == -EBUSY)) { - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); ttm_bo_wait_unreserved(bo, false); kref_put(&bo->list_kref, ttm_bo_release_list); - spin_lock(&bdev->lru_lock); + spin_lock(&glob->lru_lock); } } BUG_ON(ret != 0); put_count = ttm_bo_del_from_lru(bo); - spin_unlock(&bdev->lru_lock); + spin_unlock(&glob->lru_lock); while (put_count--) kref_put(&bo->list_kref, ttm_bo_ref_bug); @@ -1696,6 +1788,6 @@ out: void ttm_bo_swapout_all(struct ttm_bo_device *bdev) { - while (ttm_bo_swapout(&bdev->shrink) == 0) + while (ttm_bo_swapout(&bdev->glob->shrink) == 0) ; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index bdec583901eb..12cd47aa18ce 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -41,9 +41,9 @@ void ttm_bo_free_old_node(struct ttm_buffer_object *bo) struct ttm_mem_reg *old_mem = &bo->mem; if (old_mem->mm_node) { - spin_lock(&bo->bdev->lru_lock); + spin_lock(&bo->glob->lru_lock); drm_mm_put_block(old_mem->mm_node); - spin_unlock(&bo->bdev->lru_lock); + spin_unlock(&bo->glob->lru_lock); } old_mem->mm_node = NULL; } diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 4e1e2566d519..b0f73096d372 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -166,7 +166,7 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm) set_page_dirty_lock(page); ttm->pages[i] = NULL; - ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE); + ttm_mem_global_free(ttm->glob->mem_glob, PAGE_SIZE); put_page(page); } ttm->state = tt_unpopulated; @@ -177,8 +177,7 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm) static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index) { struct page *p; - struct ttm_bo_device *bdev = ttm->bdev; - struct ttm_mem_global *mem_glob = bdev->mem_glob; + struct ttm_mem_global *mem_glob = ttm->glob->mem_glob; int ret; while (NULL == (p = ttm->pages[index])) { @@ -348,7 +347,7 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm) printk(KERN_ERR TTM_PFX "Erroneous page count. " "Leaking pages.\n"); - ttm_mem_global_free_page(ttm->bdev->mem_glob, + ttm_mem_global_free_page(ttm->glob->mem_glob, cur_page); __free_page(cur_page); } @@ -394,7 +393,7 @@ int ttm_tt_set_user(struct ttm_tt *ttm, struct mm_struct *mm = tsk->mm; int ret; int write = (ttm->page_flags & TTM_PAGE_FLAG_WRITE) != 0; - struct ttm_mem_global *mem_glob = ttm->bdev->mem_glob; + struct ttm_mem_global *mem_glob = ttm->glob->mem_glob; BUG_ON(num_pages != ttm->num_pages); BUG_ON((ttm->page_flags & TTM_PAGE_FLAG_USER) == 0); @@ -439,8 +438,7 @@ struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, unsigned long size, if (!ttm) return NULL; - ttm->bdev = bdev; - + ttm->glob = bdev->glob; ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; ttm->first_himem_page = ttm->num_pages; ttm->last_lomem_page = -1; diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 99dc521aa1a9..491146170522 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -155,6 +155,7 @@ struct ttm_buffer_object { * Members constant at init. */ + struct ttm_bo_global *glob; struct ttm_bo_device *bdev; unsigned long buffer_start; enum ttm_bo_type type; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 62ed733c52a2..9dc32f70b9a2 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -32,6 +32,7 @@ #include "ttm/ttm_bo_api.h" #include "ttm/ttm_memory.h" +#include "ttm/ttm_module.h" #include "drm_mm.h" #include "linux/workqueue.h" #include "linux/fs.h" @@ -160,7 +161,7 @@ struct ttm_tt { long last_lomem_page; uint32_t page_flags; unsigned long num_pages; - struct ttm_bo_device *bdev; + struct ttm_bo_global *glob; struct ttm_backend *be; struct task_struct *tsk; unsigned long start; @@ -355,24 +356,73 @@ struct ttm_bo_driver { void *(*sync_obj_ref) (void *sync_obj); }; -#define TTM_NUM_MEM_TYPES 8 +/** + * struct ttm_bo_global_ref - Argument to initialize a struct ttm_bo_global. + */ + +struct ttm_bo_global_ref { + struct ttm_global_reference ref; + struct ttm_mem_global *mem_glob; +}; -#define TTM_BO_PRIV_FLAG_MOVING 0 /* Buffer object is moving and needs - idling before CPU mapping */ -#define TTM_BO_PRIV_FLAG_MAX 1 /** - * struct ttm_bo_device - Buffer object driver device-specific data. + * struct ttm_bo_global - Buffer object driver global data. * * @mem_glob: Pointer to a struct ttm_mem_global object for accounting. - * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. - * @count: Current number of buffer object. - * @pages: Current number of pinned pages. * @dummy_read_page: Pointer to a dummy page used for mapping requests * of unpopulated pages. - * @shrink: A shrink callback object used for buffre object swap. + * @shrink: A shrink callback object used for buffer object swap. * @ttm_bo_extra_size: Extra size (sizeof(struct ttm_buffer_object) excluded) * used by a buffer object. This is excluding page arrays and backing pages. * @ttm_bo_size: This is @ttm_bo_extra_size + sizeof(struct ttm_buffer_object). + * @device_list_mutex: Mutex protecting the device list. + * This mutex is held while traversing the device list for pm options. + * @lru_lock: Spinlock protecting the bo subsystem lru lists. + * @device_list: List of buffer object devices. + * @swap_lru: Lru list of buffer objects used for swapping. + */ + +struct ttm_bo_global { + + /** + * Constant after init. + */ + + struct kobject kobj; + struct ttm_mem_global *mem_glob; + struct page *dummy_read_page; + struct ttm_mem_shrink shrink; + size_t ttm_bo_extra_size; + size_t ttm_bo_size; + struct mutex device_list_mutex; + spinlock_t lru_lock; + + /** + * Protected by device_list_mutex. + */ + struct list_head device_list; + + /** + * Protected by the lru_lock. + */ + struct list_head swap_lru; + + /** + * Internal protection. + */ + atomic_t bo_count; +}; + + +#define TTM_NUM_MEM_TYPES 8 + +#define TTM_BO_PRIV_FLAG_MOVING 0 /* Buffer object is moving and needs + idling before CPU mapping */ +#define TTM_BO_PRIV_FLAG_MAX 1 +/** + * struct ttm_bo_device - Buffer object driver device-specific data. + * + * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. * @man: An array of mem_type_managers. * @addr_space_mm: Range manager for the device address space. * lru_lock: Spinlock that protects the buffer+device lru lists and @@ -390,32 +440,21 @@ struct ttm_bo_device { /* * Constant after bo device init / atomic. */ - - struct ttm_mem_global *mem_glob; + struct list_head device_list; + struct ttm_bo_global *glob; struct ttm_bo_driver *driver; - struct page *dummy_read_page; - struct ttm_mem_shrink shrink; - - size_t ttm_bo_extra_size; - size_t ttm_bo_size; - rwlock_t vm_lock; + struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; /* * Protected by the vm lock. */ - struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; struct rb_root addr_space_rb; struct drm_mm addr_space_mm; /* - * Might want to change this to one lock per manager. - */ - spinlock_t lru_lock; - /* - * Protected by the lru lock. + * Protected by the global:lru lock. */ struct list_head ddestroy; - struct list_head swap_lru; /* * Protected by load / firstopen / lastclose /unload sync. @@ -629,6 +668,9 @@ extern int ttm_bo_pci_offset(struct ttm_bo_device *bdev, unsigned long *bus_offset, unsigned long *bus_size); +extern void ttm_bo_global_release(struct ttm_global_reference *ref); +extern int ttm_bo_global_init(struct ttm_global_reference *ref); + extern int ttm_bo_device_release(struct ttm_bo_device *bdev); /** @@ -646,7 +688,7 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev); * !0: Failure. */ extern int ttm_bo_device_init(struct ttm_bo_device *bdev, - struct ttm_mem_global *mem_glob, + struct ttm_bo_global *glob, struct ttm_bo_driver *driver, uint64_t file_page_offset); -- cgit v1.2.3 From 4516fc0454e7ffe2f369e80045b23c2b32155004 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:54 -0400 Subject: sunrpc: add routine for comparing addresses lockd needs these sort of routines, as does the NFSv4 callback code. Move lockd's routines into common code and rename them so that they can be used by others. Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/clntlock.c | 2 +- fs/lockd/host.c | 4 ++-- fs/lockd/mon.c | 2 +- fs/lockd/svcsubs.c | 2 +- include/linux/lockd/lockd.h | 43 ---------------------------------------- include/linux/sunrpc/clnt.h | 48 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 53 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 1f3b0fc0d351..fc9032dc8862 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -166,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) */ if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) continue; - if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) + if (!rpc_cmp_addr(nlm_addr(block->b_host), addr)) continue; if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) continue; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 7cb076ac6b45..4600c2037b8b 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -111,7 +111,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) */ chain = &nlm_hosts[nlm_hash_address(ni->sap)]; hlist_for_each_entry(host, pos, chain, h_hash) { - if (!nlm_cmp_addr(nlm_addr(host), ni->sap)) + if (!rpc_cmp_addr(nlm_addr(host), ni->sap)) continue; /* See if we have an NSM handle for this client */ @@ -125,7 +125,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) if (host->h_server != ni->server) continue; if (ni->server && - !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) + !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap)) continue; /* Move to head of hash chain. */ diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 30c933188dd7..f956651d0f65 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -209,7 +209,7 @@ static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap) struct nsm_handle *nsm; list_for_each_entry(nsm, &nsm_handles, sm_link) - if (nlm_cmp_addr(nsm_addr(nsm), sap)) + if (rpc_cmp_addr(nsm_addr(nsm), sap)) return nsm; return NULL; } diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 9e4d6aab611b..ad478da7ca63 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb); static int nlmsvc_match_ip(void *datap, struct nlm_host *host) { - return nlm_cmp_addr(nlm_srcaddr(host), datap); + return rpc_cmp_addr(nlm_srcaddr(host), datap); } /** diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index c325b187966b..e7a251a988c0 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -338,49 +338,6 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) } } -static inline int __nlm_cmp_addr4(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; - const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; - return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static inline int __nlm_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; - const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; - return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); -} -#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ -static inline int __nlm_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - return 0; -} -#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ - -/* - * Compare two host addresses - * - * Return TRUE if the addresses are the same; otherwise FALSE. - */ -static inline int nlm_cmp_addr(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - if (sap1->sa_family == sap2->sa_family) { - switch (sap1->sa_family) { - case AF_INET: - return __nlm_cmp_addr4(sap1, sap2); - case AF_INET6: - return __nlm_cmp_addr6(sap1, sap2); - } - } - return 0; -} - /* * Compare two NLM locks. * When the second lock is of type F_UNLCK, this acts like a wildcard. diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ab3f6e90caa5..b17df361be82 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -22,6 +22,7 @@ #include #include #include +#include struct rpc_inode; @@ -188,5 +189,52 @@ static inline void rpc_set_port(struct sockaddr *sap, #define IPV6_SCOPE_DELIMITER '%' #define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") +static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; + + return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; + return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); +} +#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + return false; +} +#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ + +/** + * rpc_cmp_addr - compare the address portion of two sockaddrs. + * @sap1: first sockaddr + * @sap2: second sockaddr + * + * Just compares the family and address portion. Ignores port, scope, etc. + * Returns true if the addrs are equal, false if they aren't. + */ +static inline bool rpc_cmp_addr(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + if (sap1->sa_family == sap2->sa_family) { + switch (sap1->sa_family) { + case AF_INET: + return __rpc_cmp_addr4(sap1, sap2); + case AF_INET6: + return __rpc_cmp_addr6(sap1, sap2); + } + } + return false; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From be3ad6b0b675fd1d6b48362ca30bdee75fbef6b4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:55 -0400 Subject: sunrpc: add common routine for copying address portion of a sockaddr Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/clnt.h | 50 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b17df361be82..044f531aee70 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -198,6 +198,17 @@ static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; } +static inline bool __rpc_copy_addr4(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in *ssin = (struct sockaddr_in *) src; + struct sockaddr_in *dsin = (struct sockaddr_in *) dst; + + dsin->sin_family = ssin->sin_family; + dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; + return true; +} + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr *sap2) @@ -206,12 +217,29 @@ static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); } + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; + struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; + + dsin6->sin6_family = ssin6->sin6_family; + ipv6_addr_copy(&dsin6->sin6_addr, &ssin6->sin6_addr); + return true; +} #else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr *sap2) { return false; } + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + return false; +} #endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ /** @@ -236,5 +264,27 @@ static inline bool rpc_cmp_addr(const struct sockaddr *sap1, return false; } +/** + * rpc_copy_addr - copy the address portion of one sockaddr to another + * @dst: destination sockaddr + * @src: source sockaddr + * + * Just copies the address portion and family. Ignores port, scope, etc. + * Caller is responsible for making certain that dst is large enough to hold + * the address in src. Returns true if address family is supported. Returns + * false otherwise. + */ +static inline bool rpc_copy_addr(struct sockaddr *dst, + const struct sockaddr *src) +{ + switch (src->sa_family) { + case AF_INET: + return __rpc_copy_addr4(dst, src); + case AF_INET6: + return __rpc_copy_addr6(dst, src); + } + return false; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From 363168b4ea8ec26aeb982ac6024a09f907ecd27e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:56 -0400 Subject: nfsd: make nfs4_client->cl_addr a struct sockaddr_storage It's currently a __be32, which isn't big enough to hold an IPv6 address. Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 32 +++++++++++++++++++------------- include/linux/nfsd/state.h | 2 +- 2 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9295c4b56bce..bfc14d879ea1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -55,6 +55,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -1220,13 +1221,15 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, int status; unsigned int strhashval; char dname[HEXDIR_LEN]; + char addr_str[INET6_ADDRSTRLEN]; nfs4_verifier verf = exid->verifier; - u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; + struct sockaddr *sa = svc_addr(rqstp); + rpc_ntop(sa, addr_str, sizeof(addr_str)); dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " - " ip_addr=%u flags %x, spa_how %d\n", + "ip_addr=%s flags %x, spa_how %d\n", __func__, rqstp, exid, exid->clname.len, exid->clname.data, - ip_addr, exid->flags, exid->spa_how); + addr_str, exid->flags, exid->spa_how); if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) return nfserr_inval; @@ -1315,7 +1318,7 @@ out_new: copy_verf(new, &verf); copy_cred(&new->cl_cred, &rqstp->rq_cred); - new->cl_addr = ip_addr; + rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa); gen_clid(new); gen_confirm(new); add_to_unconfirmed(new, strhashval); @@ -1389,7 +1392,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_create_session *cr_ses) { - u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; + struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; struct nfsd4_clid_slot *cs_slot = NULL; int status = 0; @@ -1417,7 +1420,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, cs_slot->sl_seqid++; } else if (unconf) { if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || - (ip_addr != unconf->cl_addr)) { + !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { status = nfserr_clid_inuse; goto out; } @@ -1564,7 +1567,7 @@ __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid *setclid) { - struct sockaddr_in *sin = svc_addr_in(rqstp); + struct sockaddr *sa = svc_addr(rqstp); struct xdr_netobj clname = { .len = setclid->se_namelen, .data = setclid->se_name, @@ -1596,8 +1599,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* RFC 3530 14.2.33 CASE 0: */ status = nfserr_clid_inuse; if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { - dprintk("NFSD: setclientid: string in use by client" - " at %pI4\n", &conf->cl_addr); + char addr_str[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, + sizeof(addr_str)); + dprintk("NFSD: setclientid: string in use by client " + "at %s\n", addr_str); goto out; } } @@ -1659,7 +1665,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, gen_clid(new); } copy_verf(new, &clverifier); - new->cl_addr = sin->sin_addr.s_addr; + rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa); new->cl_flavor = rqstp->rq_flavor; princ = svc_gss_principal(rqstp); if (princ) { @@ -1693,7 +1699,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid_confirm *setclientid_confirm) { - struct sockaddr_in *sin = svc_addr_in(rqstp); + struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; @@ -1712,9 +1718,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, unconf = find_unconfirmed_client(clid); status = nfserr_clid_inuse; - if (conf && conf->cl_addr != sin->sin_addr.s_addr) + if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa)) goto out; - if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) + if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa)) goto out; /* diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 58bb19784e12..3510ddd4be49 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -200,7 +200,7 @@ struct nfs4_client { char cl_recdir[HEXDIR_LEN]; /* recovery dir */ nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ - __be32 cl_addr; /* client ipaddress */ + struct sockaddr_storage cl_addr; /* client ipaddress */ u32 cl_flavor; /* setclientid pseudoflavor */ char *cl_principal; /* setclientid principal name */ struct svc_cred cl_cred; /* setclientid principal */ -- cgit v1.2.3 From aa9a4ec7707a5391cde556f3fa1b0eb4bca3bcf6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:57 -0400 Subject: nfsd: convert nfs4_cb_conn struct to hold address in sockaddr_storage ...rather than as a separate address and port fields. This will be necessary for implementing callbacks over IPv6. Also, convert gen_callback to use the standard rpcuaddr2sockaddr routine rather than its own private one. Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 11 ++----- fs/nfsd/nfs4state.c | 81 ++++++---------------------------------------- include/linux/nfsd/state.h | 4 +-- 3 files changed, 13 insertions(+), 83 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 3fd23f7aceca..81d1c5285dcc 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -377,7 +377,6 @@ static int max_cb_time(void) int setup_callback_client(struct nfs4_client *clp) { - struct sockaddr_in addr; struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_timeout timeparms = { .to_initval = max_cb_time(), @@ -385,8 +384,8 @@ int setup_callback_client(struct nfs4_client *clp) }; struct rpc_create_args args = { .protocol = IPPROTO_TCP, - .address = (struct sockaddr *)&addr, - .addrsize = sizeof(addr), + .address = (struct sockaddr *) &cb->cb_addr, + .addrsize = cb->cb_addrlen, .timeout = &timeparms, .program = &cb_program, .prognumber = cb->cb_prog, @@ -400,12 +399,6 @@ int setup_callback_client(struct nfs4_client *clp) if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; - /* Initialize address */ - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_port = htons(cb->cb_port); - addr.sin_addr.s_addr = htonl(cb->cb_addr); - /* Create RPC client */ client = rpc_create(&args); if (IS_ERR(client)) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index bfc14d879ea1..96a742308cee 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -897,76 +897,6 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, return NULL; } -/* a helper function for parse_callback */ -static int -parse_octet(unsigned int *lenp, char **addrp) -{ - unsigned int len = *lenp; - char *p = *addrp; - int n = -1; - char c; - - for (;;) { - if (!len) - break; - len--; - c = *p++; - if (c == '.') - break; - if ((c < '0') || (c > '9')) { - n = -1; - break; - } - if (n < 0) - n = 0; - n = (n * 10) + (c - '0'); - if (n > 255) { - n = -1; - break; - } - } - *lenp = len; - *addrp = p; - return n; -} - -/* parse and set the setclientid ipv4 callback address */ -static int -parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp) -{ - int temp = 0; - u32 cbaddr = 0; - u16 cbport = 0; - u32 addrlen = addr_len; - char *addr = addr_val; - int i, shift; - - /* ipaddress */ - shift = 24; - for(i = 4; i > 0 ; i--) { - if ((temp = parse_octet(&addrlen, &addr)) < 0) { - return 0; - } - cbaddr |= (temp << shift); - if (shift > 0) - shift -= 8; - } - *cbaddrp = cbaddr; - - /* port */ - shift = 8; - for(i = 2; i > 0 ; i--) { - if ((temp = parse_octet(&addrlen, &addr)) < 0) { - return 0; - } - cbport |= (temp << shift); - if (shift > 0) - shift -= 8; - } - *cbportp = cbport; - return 1; -} - static void gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) { @@ -976,14 +906,21 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) goto out_err; - if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, - &cb->cb_addr, &cb->cb_port))) + cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, + se->se_callback_addr_len, + (struct sockaddr *) &cb->cb_addr, + sizeof(cb->cb_addr)); + + if (!cb->cb_addrlen || cb->cb_addr.ss_family != AF_INET) goto out_err; + cb->cb_minorversion = 0; cb->cb_prog = se->se_callback_prog; cb->cb_ident = se->se_callback_ident; return; out_err: + cb->cb_addr.ss_family = AF_UNSPEC; + cb->cb_addrlen = 0; dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " "will not receive delegations\n", clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 3510ddd4be49..fb0c404c7c5c 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -81,8 +81,8 @@ struct nfs4_delegation { /* client delegation callback info */ struct nfs4_cb_conn { /* SETCLIENTID info */ - u32 cb_addr; - unsigned short cb_port; + struct sockaddr_storage cb_addr; + size_t cb_addrlen; u32 cb_prog; u32 cb_minorversion; u32 cb_ident; /* minorversion 0 only */ -- cgit v1.2.3 From fbf4665f41b02e757ab9d9198df65e319388e728 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:59 -0400 Subject: nfsd: populate sin6_scope_id on callback address with scopeid from rq_addr on SETCLIENTID call When a SETCLIENTID call comes in, one of the args given is the svc_rqst. This struct contains an rq_addr field which holds the address that sent the call. If this is an IPv6 address, then we can use the sin6_scope_id field in this address to populate the sin6_scope_id field in the callback address. AFAICT, the rq_addr.sin6_scope_id is non-zero if and only if the client mounted the server's link-local address. Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 7 +++++-- include/linux/sunrpc/clnt.h | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9ec0ca1ef4ea..d2a052480908 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -898,7 +898,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, } static void -gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) +gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) { struct nfs4_cb_conn *cb = &clp->cl_cb_conn; unsigned short expected_family; @@ -921,6 +921,9 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family) goto out_err; + if (cb->cb_addr.ss_family == AF_INET6) + ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid; + cb->cb_minorversion = 0; cb->cb_prog = se->se_callback_prog; cb->cb_ident = se->se_callback_ident; @@ -1621,7 +1624,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } copy_cred(&new->cl_cred, &rqstp->rq_cred); gen_confirm(new); - gen_callback(new, setclid); + gen_callback(new, setclid, rpc_get_scope_id(sa)); add_to_unconfirmed(new, strhashval); setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 044f531aee70..3d025588e56e 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -286,5 +286,20 @@ static inline bool rpc_copy_addr(struct sockaddr *dst, return false; } +/** + * rpc_get_scope_id - return scopeid for a given sockaddr + * @sa: sockaddr to get scopeid from + * + * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if + * not an AF_INET6 address. + */ +static inline u32 rpc_get_scope_id(const struct sockaddr *sa) +{ + if (sa->sa_family != AF_INET6) + return 0; + + return ((struct sockaddr_in6 *) sa)->sin6_scope_id; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From da15cfdae03351c689736f8d142618592e3cebc3 Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 19 Aug 2009 19:13:34 -0700 Subject: time: Introduce CLOCK_REALTIME_COARSE After talking with some application writers who want very fast, but not fine-grained timestamps, I decided to try to implement new clock_ids to clock_gettime(): CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE which returns the time at the last tick. This is very fast as we don't have to access any hardware (which can be very painful if you're using something like the acpi_pm clocksource), and we can even use the vdso clock_gettime() method to avoid the syscall. The only trade off is you only get low-res tick grained time resolution. This isn't a new idea, I know Ingo has a patch in the -rt tree that made the vsyscall gettimeofday() return coarse grained time when the vsyscall64 sysctrl was set to 2. However this affects all applications on a system. With this method, applications can choose the proper speed/granularity trade-off for themselves. Signed-off-by: John Stultz Cc: Andi Kleen Cc: nikolag@ca.ibm.com Cc: Darren Hart Cc: arjan@infradead.org Cc: jonathan@jonmasters.org LKML-Reference: <1250734414.6897.5.camel@localhost.localdomain> Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/vgtod.h | 1 + arch/x86/kernel/vsyscall_64.c | 1 + arch/x86/vdso/vclock_gettime.c | 39 ++++++++++++++++++++++++++++++++++++--- include/linux/time.h | 4 ++++ kernel/posix-timers.c | 35 +++++++++++++++++++++++++++++++++++ kernel/time/timekeeping.c | 21 +++++++++++++++++++++ 6 files changed, 98 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index dc27a69e5d2a..3d61e204826f 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -21,6 +21,7 @@ struct vsyscall_gtod_data { u32 shift; } clock; struct timespec wall_to_monotonic; + struct timespec wall_time_coarse; }; extern struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 25ee06a80aad..cf53a78e2dcf 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; + vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 6a40b78b46aa..ee55754cc3c5 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -86,14 +86,47 @@ notrace static noinline int do_monotonic(struct timespec *ts) return 0; } +notrace static noinline int do_realtime_coarse(struct timespec *ts) +{ + unsigned long seq; + do { + seq = read_seqbegin(>od->lock); + ts->tv_sec = gtod->wall_time_coarse.tv_sec; + ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; + } while (unlikely(read_seqretry(>od->lock, seq))); + return 0; +} + +notrace static noinline int do_monotonic_coarse(struct timespec *ts) +{ + unsigned long seq, ns, secs; + do { + seq = read_seqbegin(>od->lock); + secs = gtod->wall_time_coarse.tv_sec; + ns = gtod->wall_time_coarse.tv_nsec; + secs += gtod->wall_to_monotonic.tv_sec; + ns += gtod->wall_to_monotonic.tv_nsec; + } while (unlikely(read_seqretry(>od->lock, seq))); + vset_normalized_timespec(ts, secs, ns); + return 0; +} + notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { - if (likely(gtod->sysctl_enabled && gtod->clock.vread)) + if (likely(gtod->sysctl_enabled)) switch (clock) { case CLOCK_REALTIME: - return do_realtime(ts); + if (likely(gtod->clock.vread)) + return do_realtime(ts); + break; case CLOCK_MONOTONIC: - return do_monotonic(ts); + if (likely(gtod->clock.vread)) + return do_monotonic(ts); + break; + case CLOCK_REALTIME_COARSE: + return do_realtime_coarse(ts); + case CLOCK_MONOTONIC_COARSE: + return do_monotonic_coarse(ts); } return vdso_fallback_gettime(clock, ts); } diff --git a/include/linux/time.h b/include/linux/time.h index f505988398e6..256232f7e5e6 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -110,6 +110,8 @@ extern int timekeeping_suspended; unsigned long get_seconds(void); struct timespec current_kernel_time(void); +struct timespec __current_kernel_time(void); /* does not hold xtime_lock */ +struct timespec get_monotonic_coarse(void); #define CURRENT_TIME (current_kernel_time()) #define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) @@ -243,6 +245,8 @@ struct itimerval { #define CLOCK_PROCESS_CPUTIME_ID 2 #define CLOCK_THREAD_CPUTIME_ID 3 #define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 /* * The IDs of various hardware clocks: diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index d089d052c4a9..495440779ce3 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -242,6 +242,25 @@ static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp) return 0; } + +static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp) +{ + *tp = current_kernel_time(); + return 0; +} + +static int posix_get_monotonic_coarse(clockid_t which_clock, + struct timespec *tp) +{ + *tp = get_monotonic_coarse(); + return 0; +} + +int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp) +{ + *tp = ktime_to_timespec(KTIME_LOW_RES); + return 0; +} /* * Initialize everything, well, just everything in Posix clocks/timers ;) */ @@ -262,10 +281,26 @@ static __init int init_posix_timers(void) .timer_create = no_timer_create, .nsleep = no_nsleep, }; + struct k_clock clock_realtime_coarse = { + .clock_getres = posix_get_coarse_res, + .clock_get = posix_get_realtime_coarse, + .clock_set = do_posix_clock_nosettime, + .timer_create = no_timer_create, + .nsleep = no_nsleep, + }; + struct k_clock clock_monotonic_coarse = { + .clock_getres = posix_get_coarse_res, + .clock_get = posix_get_monotonic_coarse, + .clock_set = do_posix_clock_nosettime, + .timer_create = no_timer_create, + .nsleep = no_nsleep, + }; register_posix_clock(CLOCK_REALTIME, &clock_realtime); register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); + register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse); + register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse); posix_timers_cache = kmem_cache_create("posix_timers_cache", sizeof (struct k_itimer), 0, SLAB_PANIC, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 15e06defca55..03cbeb34d141 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -847,6 +847,10 @@ unsigned long get_seconds(void) } EXPORT_SYMBOL(get_seconds); +struct timespec __current_kernel_time(void) +{ + return xtime_cache; +} struct timespec current_kernel_time(void) { @@ -862,3 +866,20 @@ struct timespec current_kernel_time(void) return now; } EXPORT_SYMBOL(current_kernel_time); + +struct timespec get_monotonic_coarse(void) +{ + struct timespec now, mono; + unsigned long seq; + + do { + seq = read_seqbegin(&xtime_lock); + + now = xtime_cache; + mono = wall_to_monotonic; + } while (read_seqretry(&xtime_lock, seq)); + + set_normalized_timespec(&now, now.tv_sec + mono.tv_sec, + now.tv_nsec + mono.tv_nsec); + return now; +} -- cgit v1.2.3 From 05ecd5a1f76c183cca381705b3adb7d77c9a0439 Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Mon, 24 Aug 2009 19:52:38 +0900 Subject: sh: Simplify "multi-evt" interrupt handling. This patch changes the way in which "multi-evt" interrups are handled. The intc_evt2irq_table and related intc_evt2irq() have been removed and the "redirecting" handler is installed for the coupled interrupts. Thanks to that the do_IRQ() function don't have to use another level of indirection for all the interrupts... Signed-off-by: Pawel Moll Signed-off-by: Stuart Menefy Signed-off-by: Paul Mundt --- arch/sh/kernel/irq.c | 2 +- drivers/sh/intc.c | 54 ++++++++++++++++--------------------------------- include/linux/sh_intc.h | 1 - 3 files changed, 18 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 278c68c60488..d1053392e287 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -114,7 +114,7 @@ asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs) #endif irq_enter(); - irq = irq_demux(intc_evt2irq(irq)); + irq = irq_demux(evt2irq(irq)); #ifdef CONFIG_IRQSTACKS curctx = (union irq_ctx *)current_thread_info(); diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c index 4b1ca9d28353..a9174ec72853 100644 --- a/drivers/sh/intc.c +++ b/drivers/sh/intc.c @@ -663,16 +663,9 @@ static unsigned int __init save_reg(struct intc_desc_int *d, return 0; } -static unsigned char *intc_evt2irq_table; - -unsigned int intc_evt2irq(unsigned int vector) +static void intc_redirect_irq(unsigned int irq, struct irq_desc *desc) { - unsigned int irq = evt2irq(vector); - - if (intc_evt2irq_table && intc_evt2irq_table[irq]) - irq = intc_evt2irq_table[irq]; - - return irq; + generic_handle_irq((unsigned int)get_irq_data(irq)); } void __init register_intc_controller(struct intc_desc *desc) @@ -745,34 +738,6 @@ void __init register_intc_controller(struct intc_desc *desc) BUG_ON(k > 256); /* _INTC_ADDR_E() and _INTC_ADDR_D() are 8 bits */ - /* keep the first vector only if same enum is used multiple times */ - for (i = 0; i < desc->nr_vectors; i++) { - struct intc_vect *vect = desc->vectors + i; - int first_irq = evt2irq(vect->vect); - - if (!vect->enum_id) - continue; - - for (k = i + 1; k < desc->nr_vectors; k++) { - struct intc_vect *vect2 = desc->vectors + k; - - if (vect->enum_id != vect2->enum_id) - continue; - - vect2->enum_id = 0; - - if (!intc_evt2irq_table) - intc_evt2irq_table = kzalloc(NR_IRQS, GFP_NOWAIT); - - if (!intc_evt2irq_table) { - pr_warning("intc: cannot allocate evt2irq!\n"); - continue; - } - - intc_evt2irq_table[evt2irq(vect2->vect)] = first_irq; - } - } - /* register the vectors one by one */ for (i = 0; i < desc->nr_vectors; i++) { struct intc_vect *vect = desc->vectors + i; @@ -789,6 +754,21 @@ void __init register_intc_controller(struct intc_desc *desc) } intc_register_irq(desc, d, vect->enum_id, irq); + + for (k = i + 1; k < desc->nr_vectors; k++) { + struct intc_vect *vect2 = desc->vectors + k; + unsigned int irq2 = evt2irq(vect2->vect); + + if (vect->enum_id != vect2->enum_id) + continue; + + vect2->enum_id = 0; + + /* redirect this interrupts to the first one */ + set_irq_chip_and_handler_name(irq2, &d->chip, + intc_redirect_irq, "redirect"); + set_irq_data(irq2, (void *)irq); + } } } diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index eb1423a0078d..68e212ff9dde 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -85,7 +85,6 @@ struct intc_desc symbol __initdata = { \ } #endif -unsigned int intc_evt2irq(unsigned int vector); void __init register_intc_controller(struct intc_desc *desc); int intc_set_priority(unsigned int irq, unsigned int prio); -- cgit v1.2.3 From 0396c215f301e92677d1e9a064b405e31501dc1d Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 25 Aug 2009 16:41:06 +0100 Subject: uwb: avoid radio controller reset loops If a radio controller reset attempt occurs while a probe() or remove() is in progress it fails and is retried endlessly, potentially preventing the probe() or remove() from completing. If a reset fails, sleep for a bit before retrying the reset. This allows the probe()/remove() to complete. Signed-off-by: David Vrabel --- drivers/uwb/hwa-rc.c | 3 +-- drivers/uwb/reset.c | 21 +++++++++++---------- drivers/uwb/umc-bus.c | 2 +- drivers/uwb/whc-rc.c | 3 +-- include/linux/uwb.h | 2 +- 5 files changed, 15 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c index 9052bcb4f528..e7eeb63fab23 100644 --- a/drivers/uwb/hwa-rc.c +++ b/drivers/uwb/hwa-rc.c @@ -887,8 +887,7 @@ static int hwarc_post_reset(struct usb_interface *iface) struct hwarc *hwarc = usb_get_intfdata(iface); struct uwb_rc *uwb_rc = hwarc->uwb_rc; - uwb_rc_post_reset(uwb_rc); - return 0; + return uwb_rc_post_reset(uwb_rc); } /** USB device ID's that we handle */ diff --git a/drivers/uwb/reset.c b/drivers/uwb/reset.c index 70f8050221ff..7f0512e43d9d 100644 --- a/drivers/uwb/reset.c +++ b/drivers/uwb/reset.c @@ -30,6 +30,7 @@ */ #include #include +#include #include "uwb-internal.h" @@ -323,13 +324,15 @@ int uwbd_msg_handle_reset(struct uwb_event *evt) dev_info(&rc->uwb_dev.dev, "resetting radio controller\n"); ret = rc->reset(rc); - if (ret) { + if (ret < 0) { dev_err(&rc->uwb_dev.dev, "failed to reset hardware: %d\n", ret); goto error; } return 0; error: - /* Nothing can be done except try the reset again. */ + /* Nothing can be done except try the reset again. Wait a bit + to avoid reset loops during probe() or remove(). */ + msleep(1000); uwb_rc_reset_all(rc); return ret; } @@ -368,22 +371,20 @@ void uwb_rc_pre_reset(struct uwb_rc *rc) } EXPORT_SYMBOL_GPL(uwb_rc_pre_reset); -void uwb_rc_post_reset(struct uwb_rc *rc) +int uwb_rc_post_reset(struct uwb_rc *rc) { int ret; ret = rc->start(rc); if (ret) - goto error; + goto out; ret = uwb_rc_mac_addr_set(rc, &rc->uwb_dev.mac_addr); if (ret) - goto error; + goto out; ret = uwb_rc_dev_addr_set(rc, &rc->uwb_dev.dev_addr); if (ret) - goto error; - return; -error: - /* Nothing can be done except try the reset again. */ - uwb_rc_reset_all(rc); + goto out; +out: + return ret; } EXPORT_SYMBOL_GPL(uwb_rc_post_reset); diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c index 5ad36164c13b..cdd6c8efc9f8 100644 --- a/drivers/uwb/umc-bus.c +++ b/drivers/uwb/umc-bus.c @@ -66,7 +66,7 @@ int umc_controller_reset(struct umc_dev *umc) return -EAGAIN; ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper); if (ret >= 0) - device_for_each_child(parent, parent, umc_bus_post_reset_helper); + ret = device_for_each_child(parent, parent, umc_bus_post_reset_helper); up(&parent->sem); return ret; diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c index 19a1dd129212..1d9a6f54658e 100644 --- a/drivers/uwb/whc-rc.c +++ b/drivers/uwb/whc-rc.c @@ -443,8 +443,7 @@ static int whcrc_post_reset(struct umc_dev *umc) struct whcrc *whcrc = umc_get_drvdata(umc); struct uwb_rc *uwb_rc = whcrc->uwb_rc; - uwb_rc_post_reset(uwb_rc); - return 0; + return uwb_rc_post_reset(uwb_rc); } /* PCI device ID's that we handle [so it gets loaded] */ diff --git a/include/linux/uwb.h b/include/linux/uwb.h index c02128991ff7..7fc9746f22cd 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -597,7 +597,7 @@ void uwb_rc_neh_grok(struct uwb_rc *, void *, size_t); void uwb_rc_neh_error(struct uwb_rc *, int); void uwb_rc_reset_all(struct uwb_rc *rc); void uwb_rc_pre_reset(struct uwb_rc *rc); -void uwb_rc_post_reset(struct uwb_rc *rc); +int uwb_rc_post_reset(struct uwb_rc *rc); /** * uwb_rsv_is_owner - is the owner of this reservation the RC? -- cgit v1.2.3 From 9e36fda0b359d2a6ae039c3d7e71a04502a77898 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:35 -0700 Subject: x86, pat: Add PAT reserve free to io_mapping* APIs io_mapping_* interfaces were added, mainly for graphics drivers. Make this interface go through the PAT reserve/free, instead of hardcoding WC mapping. This makes sure that there are no aliases due to unconditional WC setting. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/iomap.h | 9 ++++++--- arch/x86/mm/iomap_32.c | 27 +++++++++++++++++++++++++-- include/linux/io-mapping.h | 17 ++++++++++++----- 3 files changed, 43 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index 0e9fe1d9d971..f35eb45d6576 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -26,13 +26,16 @@ #include #include -int -is_io_mapping_possible(resource_size_t base, unsigned long size); - void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); void iounmap_atomic(void *kvaddr, enum km_type type); +int +iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); + +void +iomap_free(resource_size_t base, unsigned long size); + #endif /* _ASM_X86_IOMAP_H */ diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index fe6f84ca121e..84e236ce76ba 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -21,7 +21,7 @@ #include #include -int is_io_mapping_possible(resource_size_t base, unsigned long size) +static int is_io_mapping_possible(resource_size_t base, unsigned long size) { #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) /* There is no way to map greater than 1 << 32 address without PAE */ @@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) #endif return 1; } -EXPORT_SYMBOL_GPL(is_io_mapping_possible); + +int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) +{ + unsigned long flag = _PAGE_CACHE_WC; + int ret; + + if (!is_io_mapping_possible(base, size)) + return -EINVAL; + + ret = io_reserve_memtype(base, base + size, &flag); + if (ret) + return ret; + + *prot = __pgprot(__PAGE_KERNEL | flag); + return 0; +} +EXPORT_SYMBOL_GPL(iomap_create_wc); + +void +iomap_free(resource_size_t base, unsigned long size) +{ + io_free_memtype(base, base + size); +} +EXPORT_SYMBOL_GPL(iomap_free); void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 0adb0f91568c..97eb928b4924 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -49,23 +49,30 @@ static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { struct io_mapping *iomap; - - if (!is_io_mapping_possible(base, size)) - return NULL; + pgprot_t prot; iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); if (!iomap) - return NULL; + goto out_err; + + if (iomap_create_wc(base, size, &prot)) + goto out_free; iomap->base = base; iomap->size = size; - iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL)); + iomap->prot = prot; return iomap; + +out_free: + kfree(iomap); +out_err: + return NULL; } static inline void io_mapping_free(struct io_mapping *mapping) { + iomap_free(mapping->base, mapping->size); kfree(mapping); } -- cgit v1.2.3 From 46cf98cdaef5471926010b5bddf84c44ec177fdd Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:37 -0700 Subject: x86, pat: Generalize the use of page flag PG_uncached Only IA64 was using PG_uncached as of now. We now intend to use this bit in x86 as well, to keep track of memory type of those addresses that have page struct for them. So, generalize the use of that bit across ia64 and x86. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/ia64/Kconfig | 4 ++++ arch/x86/Kconfig | 4 ++++ include/linux/page-flags.h | 4 ++-- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 170042b420d4..e6246119932a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -112,6 +112,10 @@ config IA64_UNCACHED_ALLOCATOR bool select GENERIC_ALLOCATOR +config ARCH_USES_PG_UNCACHED + def_bool y + depends on IA64_UNCACHED_ALLOCATOR + config AUDIT_ARCH bool default y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c07f72205909..8e1595382196 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1414,6 +1414,10 @@ config X86_PAT If unsure, say Y. +config ARCH_USES_PG_UNCACHED + def_bool y + depends on X86_PAT + config EFI bool "EFI runtime service support" depends on ACPI diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e2e5ce543595..2b87acfc5f87 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -99,7 +99,7 @@ enum pageflags { #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT PG_mlocked, /* Page is vma mlocked */ #endif -#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR +#ifdef CONFIG_ARCH_USES_PG_UNCACHED PG_uncached, /* Page has been mapped as uncached */ #endif __NR_PAGEFLAGS, @@ -257,7 +257,7 @@ PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) #endif -#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR +#ifdef CONFIG_ARCH_USES_PG_UNCACHED PAGEFLAG(Uncached, uncached) #else PAGEFLAG_FALSE(Uncached) -- cgit v1.2.3 From c9c97b8c75019814d8c007059bc827bb475be917 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 27 Aug 2009 09:53:47 +1000 Subject: drm/ttm: consolidate cache flushing code in one place. This merges the TTM and drm cache flushing into one file in the drm core. Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_cache.c | 51 +++++++++++++++++++++++++++------ drivers/gpu/drm/ttm/ttm_tt.c | 67 ++------------------------------------------ include/drm/drm_cache.h | 38 +++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 74 deletions(-) create mode 100644 include/drm/drm_cache.h (limited to 'include') diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c index 0e994a0e46d4..3a5575e638db 100644 --- a/drivers/gpu/drm/drm_cache.c +++ b/drivers/gpu/drm/drm_cache.c @@ -45,25 +45,58 @@ drm_clflush_page(struct page *page) clflush(page_virtual + i); kunmap_atomic(page_virtual, KM_USER0); } -#endif +static void drm_cache_flush_clflush(struct page *pages[], + unsigned long num_pages) +{ + unsigned long i; + + mb(); + for (i = 0; i < num_pages; i++) + drm_clflush_page(*pages++); + mb(); +} + +static void +drm_clflush_ipi_handler(void *null) +{ + wbinvd(); +} +#elif !defined(__powerpc__) +static void drm_cache_ipi_handler(void *dummy) +{ +} +#endif void drm_clflush_pages(struct page *pages[], unsigned long num_pages) { #if defined(CONFIG_X86) if (cpu_has_clflush) { - unsigned long i; - - mb(); - for (i = 0; i < num_pages; ++i) - drm_clflush_page(*pages++); - mb(); - + drm_cache_flush_clflush(pages, num_pages); return; } - wbinvd(); + if (on_each_cpu(drm_clflush_ipi_handler, NULL, 1) != 0) + printk(KERN_ERR "Timed out waiting for cache flush.\n"); + +#elif defined(__powerpc__) + unsigned long i; + for (i = 0; i < num_pages; i++) { + struct page *page = pages[i]; + void *page_virtual; + + if (unlikely(page == NULL)) + continue; + + page_virtual = kmap_atomic(page, KM_USER0); + flush_dcache_range((unsigned long)page_virtual, + (unsigned long)page_virtual + PAGE_SIZE); + kunmap_atomic(page_virtual, KM_USER0); + } +#else + if (on_each_cpu(drm_clflush_ipi_handler, NULL, 1) != 0) + printk(KERN_ERR "Timed out waiting for drm cache flush\n"); #endif } EXPORT_SYMBOL(drm_clflush_pages); diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 42cca5519761..a55ee1a56c16 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -34,76 +34,13 @@ #include #include #include +#include "drm_cache.h" #include "ttm/ttm_module.h" #include "ttm/ttm_bo_driver.h" #include "ttm/ttm_placement.h" static int ttm_tt_swapin(struct ttm_tt *ttm); -#if defined(CONFIG_X86) -static void ttm_tt_clflush_page(struct page *page) -{ - uint8_t *page_virtual; - unsigned int i; - - if (unlikely(page == NULL)) - return; - - page_virtual = kmap_atomic(page, KM_USER0); - - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) - clflush(page_virtual + i); - - kunmap_atomic(page_virtual, KM_USER0); -} - -static void ttm_tt_cache_flush_clflush(struct page *pages[], - unsigned long num_pages) -{ - unsigned long i; - - mb(); - for (i = 0; i < num_pages; ++i) - ttm_tt_clflush_page(*pages++); - mb(); -} -#elif !defined(__powerpc__) -static void ttm_tt_ipi_handler(void *null) -{ - ; -} -#endif - -void ttm_tt_cache_flush(struct page *pages[], unsigned long num_pages) -{ - -#if defined(CONFIG_X86) - if (cpu_has_clflush) { - ttm_tt_cache_flush_clflush(pages, num_pages); - return; - } -#elif defined(__powerpc__) - unsigned long i; - - for (i = 0; i < num_pages; ++i) { - struct page *page = pages[i]; - void *page_virtual; - - if (unlikely(page == NULL)) - continue; - - page_virtual = kmap_atomic(page, KM_USER0); - flush_dcache_range((unsigned long) page_virtual, - (unsigned long) page_virtual + PAGE_SIZE); - kunmap_atomic(page_virtual, KM_USER0); - } -#else - if (on_each_cpu(ttm_tt_ipi_handler, NULL, 1) != 0) - printk(KERN_ERR TTM_PFX - "Timed out waiting for drm cache flush.\n"); -#endif -} - /** * Allocates storage for pointers to the pages that back the ttm. * @@ -302,7 +239,7 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm, } if (ttm->caching_state == tt_cached) - ttm_tt_cache_flush(ttm->pages, ttm->num_pages); + drm_clflush_pages(ttm->pages, ttm->num_pages); for (i = 0; i < ttm->num_pages; ++i) { cur_page = ttm->pages[i]; diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h new file mode 100644 index 000000000000..7bfb063029d8 --- /dev/null +++ b/include/drm/drm_cache.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2009 Red Hat Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * + **************************************************************************/ +/* + * Authors: + * Dave Airlie + */ + +#ifndef _DRM_CACHE_H_ +#define _DRM_CACHE_H_ + +void drm_clflush_pages(struct page *pages[], unsigned long num_pages); + +#endif -- cgit v1.2.3 From a1a2d1d32250f6fcc317419e9dfb4a5a6946d2e6 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 23 Aug 2009 12:40:55 +0300 Subject: drm: GEM handles are u32, not int Several functions in the GEM kernel API used int as handle type, but user API has it __u32 which is also the intended type. Replace int with u32. Signed-off-by: Pekka Paalanen Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 11 +++++------ drivers/gpu/drm/i915/i915_gem.c | 3 ++- include/drm/drmP.h | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index ffe8f4394d50..230c9ffdd5e9 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -164,7 +164,7 @@ EXPORT_SYMBOL(drm_gem_object_alloc); * Removes the mapping from handle to filp for this object. */ static int -drm_gem_handle_delete(struct drm_file *filp, int handle) +drm_gem_handle_delete(struct drm_file *filp, u32 handle) { struct drm_device *dev; struct drm_gem_object *obj; @@ -207,7 +207,7 @@ drm_gem_handle_delete(struct drm_file *filp, int handle) int drm_gem_handle_create(struct drm_file *file_priv, struct drm_gem_object *obj, - int *handlep) + u32 *handlep) { int ret; @@ -221,7 +221,7 @@ again: /* do the allocation under our spinlock */ spin_lock(&file_priv->table_lock); - ret = idr_get_new_above(&file_priv->object_idr, obj, 1, handlep); + ret = idr_get_new_above(&file_priv->object_idr, obj, 1, (int *)handlep); spin_unlock(&file_priv->table_lock); if (ret == -EAGAIN) goto again; @@ -237,7 +237,7 @@ EXPORT_SYMBOL(drm_gem_handle_create); /** Returns a reference to the object named by the handle. */ struct drm_gem_object * drm_gem_object_lookup(struct drm_device *dev, struct drm_file *filp, - int handle) + u32 handle) { struct drm_gem_object *obj; @@ -344,7 +344,7 @@ drm_gem_open_ioctl(struct drm_device *dev, void *data, struct drm_gem_open *args = data; struct drm_gem_object *obj; int ret; - int handle; + u32 handle; if (!(dev->driver->driver_features & DRIVER_GEM)) return -ENODEV; @@ -539,7 +539,6 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND; vma->vm_ops = obj->dev->driver->gem_vm_ops; vma->vm_private_data = map->handle; - /* FIXME: use pgprot_writecombine when available */ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); /* Take a ref for this mapping of the object, so that the fault diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 140bee142fc2..0e6c9cca897c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -111,7 +111,8 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_create *args = data; struct drm_gem_object *obj; - int handle, ret; + int ret; + u32 handle; args->size = roundup(args->size, PAGE_SIZE); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index e0f1c1fee58b..eeefb6369e19 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1441,7 +1441,7 @@ drm_gem_object_unreference(struct drm_gem_object *obj) int drm_gem_handle_create(struct drm_file *file_priv, struct drm_gem_object *obj, - int *handlep); + u32 *handlep); static inline void drm_gem_object_handle_reference(struct drm_gem_object *obj) @@ -1467,7 +1467,7 @@ drm_gem_object_handle_unreference(struct drm_gem_object *obj) struct drm_gem_object *drm_gem_object_lookup(struct drm_device *dev, struct drm_file *filp, - int handle); + u32 handle); int drm_gem_close_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int drm_gem_flink_ioctl(struct drm_device *dev, void *data, -- cgit v1.2.3 From f8d80cdf40fe4d2393159012b38ce9f85a488686 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 2 Jun 2009 13:28:13 +0800 Subject: ACPICA: Remove duplicate extern declarations for public globals Some were defined twice, causes a warning with gcc -Wredundant-decls. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/acglobal.h | 33 ++++++++++++++++++--------------- include/acpi/acpixf.h | 1 + 2 files changed, 19 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 3d87362d17ed..0b73b31c1b53 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -58,6 +58,10 @@ #define ACPI_INIT_GLOBAL(a,b) a #endif +#ifdef DEFINE_ACPI_GLOBALS + +/* Public globals, available from outside ACPICA subsystem */ + /***************************************************************************** * * Runtime configuration (static defaults that can be overriden at runtime) @@ -78,7 +82,7 @@ * 5) Allow unresolved references (invalid target name) in package objects * 6) Enable warning messages for behavior that is not ACPI spec compliant */ -ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_enable_interpreter_slack, FALSE); +u8 ACPI_INIT_GLOBAL(acpi_gbl_enable_interpreter_slack, FALSE); /* * Automatically serialize ALL control methods? Default is FALSE, meaning @@ -86,27 +90,36 @@ ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_enable_interpreter_slack, FALSE); * Only change this if the ASL code is poorly written and cannot handle * reentrancy even though methods are marked "NotSerialized". */ -ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_all_methods_serialized, FALSE); +u8 ACPI_INIT_GLOBAL(acpi_gbl_all_methods_serialized, FALSE); /* * Create the predefined _OSI method in the namespace? Default is TRUE * because ACPI CA is fully compatible with other ACPI implementations. * Changing this will revert ACPI CA (and machine ASL) to pre-OSI behavior. */ -ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_create_osi_method, TRUE); +u8 ACPI_INIT_GLOBAL(acpi_gbl_create_osi_method, TRUE); /* * Disable wakeup GPEs during runtime? Default is TRUE because WAKE and * RUNTIME GPEs should never be shared, and WAKE GPEs should typically only * be enabled just before going to sleep. */ -ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_leave_wake_gpes_disabled, TRUE); +u8 ACPI_INIT_GLOBAL(acpi_gbl_leave_wake_gpes_disabled, TRUE); /* * Optionally use default values for the ACPI register widths. Set this to * TRUE to use the defaults, if an FADT contains incorrect widths/lengths. */ -ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE); +u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE); + +/* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */ + +struct acpi_table_fadt acpi_gbl_FADT; +u32 acpi_current_gpe_count; +u32 acpi_gbl_trace_flags; +acpi_name acpi_gbl_trace_method_name; + +#endif /***************************************************************************** * @@ -114,11 +127,6 @@ ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE); * ****************************************************************************/ -/* Runtime configuration of debug print levels */ - -extern u32 acpi_dbg_level; -extern u32 acpi_dbg_layer; - /* Procedure nesting level for debug output */ extern u32 acpi_gbl_nesting_level; @@ -127,10 +135,8 @@ extern u32 acpi_gbl_nesting_level; ACPI_EXTERN u32 acpi_gbl_original_dbg_level; ACPI_EXTERN u32 acpi_gbl_original_dbg_layer; -ACPI_EXTERN acpi_name acpi_gbl_trace_method_name; ACPI_EXTERN u32 acpi_gbl_trace_dbg_level; ACPI_EXTERN u32 acpi_gbl_trace_dbg_layer; -ACPI_EXTERN u32 acpi_gbl_trace_flags; /***************************************************************************** * @@ -142,10 +148,8 @@ ACPI_EXTERN u32 acpi_gbl_trace_flags; * acpi_gbl_root_table_list is the master list of ACPI tables found in the * RSDT/XSDT. * - * acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */ ACPI_EXTERN struct acpi_internal_rsdt acpi_gbl_root_table_list; -ACPI_EXTERN struct acpi_table_fadt acpi_gbl_FADT; ACPI_EXTERN struct acpi_table_facs *acpi_gbl_FACS; /* These addresses are calculated from the FADT Event Block addresses */ @@ -340,7 +344,6 @@ ACPI_EXTERN struct acpi_fixed_event_handler ACPI_EXTERN struct acpi_gpe_xrupt_info *acpi_gbl_gpe_xrupt_list_head; ACPI_EXTERN struct acpi_gpe_block_info *acpi_gbl_gpe_fadt_blocks[ACPI_MAX_GPE_BLOCKS]; -ACPI_EXTERN u32 acpi_current_gpe_count; /***************************************************************************** * diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 82ec6a3c0500..2aecaa5cc06c 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -64,6 +64,7 @@ extern u8 acpi_gbl_enable_interpreter_slack; extern u8 acpi_gbl_all_methods_serialized; extern u8 acpi_gbl_create_osi_method; extern u8 acpi_gbl_leave_wake_gpes_disabled; +extern u8 acpi_gbl_use_default_register_widths; extern acpi_name acpi_gbl_trace_method_name; extern u32 acpi_gbl_trace_flags; -- cgit v1.2.3 From c6b5774caafa4c12b6019366e2fdaaff117e95a4 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 24 Jun 2009 09:44:06 +0800 Subject: ACPICA: Add 64-bit support to acpi_read and acpi_write Needed by drivers for new ACPi tables. Internal versions of these functions still use 32-bit max transfers, in order to minimize disruption and stack use for the standard ACPI registers (FADT-based). Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/achware.h | 8 ++ drivers/acpi/acpica/evgpe.c | 8 +- drivers/acpi/acpica/evgpeblk.c | 4 +- drivers/acpi/acpica/hwgpe.c | 34 +++---- drivers/acpi/acpica/hwregs.c | 206 ++++++++++++++++++++++++++++++++++++++--- drivers/acpi/acpica/hwtimer.c | 2 +- drivers/acpi/acpica/hwxface.c | 166 +++++++++++++++++++-------------- include/acpi/acpixf.h | 4 +- 8 files changed, 327 insertions(+), 105 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h index 4afa3d8e0efb..36192f142fbb 100644 --- a/drivers/acpi/acpica/achware.h +++ b/drivers/acpi/acpica/achware.h @@ -62,6 +62,14 @@ u32 acpi_hw_get_mode(void); /* * hwregs - ACPI Register I/O */ +acpi_status +acpi_hw_validate_register(struct acpi_generic_address *reg, + u8 max_bit_width, u64 *address); + +acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg); + +acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg); + struct acpi_bit_register_info *acpi_hw_get_bit_register_info(u32 register_id); acpi_status acpi_hw_write_pm1_control(u32 pm1a_control, u32 pm1b_control); diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c index b9d8ee69ca6c..afacf4416c73 100644 --- a/drivers/acpi/acpica/evgpe.c +++ b/drivers/acpi/acpica/evgpe.c @@ -424,8 +424,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list) /* Read the Status Register */ status = - acpi_read(&status_reg, - &gpe_register_info->status_address); + acpi_hw_read(&status_reg, + &gpe_register_info->status_address); if (ACPI_FAILURE(status)) { goto unlock_and_exit; } @@ -433,8 +433,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list) /* Read the Enable Register */ status = - acpi_read(&enable_reg, - &gpe_register_info->enable_address); + acpi_hw_read(&enable_reg, + &gpe_register_info->enable_address); if (ACPI_FAILURE(status)) { goto unlock_and_exit; } diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c index 7b3463639422..a60aaa7635f3 100644 --- a/drivers/acpi/acpica/evgpeblk.c +++ b/drivers/acpi/acpica/evgpeblk.c @@ -843,14 +843,14 @@ acpi_ev_create_gpe_info_blocks(struct acpi_gpe_block_info *gpe_block) /* Disable all GPEs within this register */ - status = acpi_write(0x00, &this_register->enable_address); + status = acpi_hw_write(0x00, &this_register->enable_address); if (ACPI_FAILURE(status)) { goto error_exit; } /* Clear any pending GPE events within this register */ - status = acpi_write(0xFF, &this_register->status_address); + status = acpi_hw_write(0xFF, &this_register->status_address); if (ACPI_FAILURE(status)) { goto error_exit; } diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c index d3b7e37c9eed..c28c41b3180b 100644 --- a/drivers/acpi/acpica/hwgpe.c +++ b/drivers/acpi/acpica/hwgpe.c @@ -82,7 +82,7 @@ acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info) /* Get current value of the enable register that contains this GPE */ - status = acpi_read(&enable_mask, &gpe_register_info->enable_address); + status = acpi_hw_read(&enable_mask, &gpe_register_info->enable_address); if (ACPI_FAILURE(status)) { return (status); } @@ -95,7 +95,7 @@ acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info) /* Write the updated enable mask */ - status = acpi_write(enable_mask, &gpe_register_info->enable_address); + status = acpi_hw_write(enable_mask, &gpe_register_info->enable_address); return (status); } @@ -130,8 +130,8 @@ acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info * gpe_event_info) /* Write the entire GPE (runtime) enable register */ - status = acpi_write(gpe_register_info->enable_for_run, - &gpe_register_info->enable_address); + status = acpi_hw_write(gpe_register_info->enable_for_run, + &gpe_register_info->enable_address); return (status); } @@ -163,8 +163,8 @@ acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info * gpe_event_info) * Write a one to the appropriate bit in the status register to * clear this GPE. */ - status = acpi_write(register_bit, - &gpe_event_info->register_info->status_address); + status = acpi_hw_write(register_bit, + &gpe_event_info->register_info->status_address); return (status); } @@ -222,7 +222,7 @@ acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info, /* GPE currently active (status bit == 1)? */ - status = acpi_read(&in_byte, &gpe_register_info->status_address); + status = acpi_hw_read(&in_byte, &gpe_register_info->status_address); if (ACPI_FAILURE(status)) { goto unlock_and_exit; } @@ -266,8 +266,8 @@ acpi_hw_disable_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, /* Disable all GPEs in this register */ status = - acpi_write(0x00, - &gpe_block->register_info[i].enable_address); + acpi_hw_write(0x00, + &gpe_block->register_info[i].enable_address); if (ACPI_FAILURE(status)) { return (status); } @@ -303,8 +303,8 @@ acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, /* Clear status on all GPEs in this register */ status = - acpi_write(0xFF, - &gpe_block->register_info[i].status_address); + acpi_hw_write(0xFF, + &gpe_block->register_info[i].status_address); if (ACPI_FAILURE(status)) { return (status); } @@ -345,9 +345,9 @@ acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, /* Enable all "runtime" GPEs in this register */ - status = acpi_write(gpe_block->register_info[i].enable_for_run, - &gpe_block->register_info[i]. - enable_address); + status = + acpi_hw_write(gpe_block->register_info[i].enable_for_run, + &gpe_block->register_info[i].enable_address); if (ACPI_FAILURE(status)) { return (status); } @@ -387,9 +387,9 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, /* Enable all "wake" GPEs in this register */ - status = acpi_write(gpe_block->register_info[i].enable_for_wake, - &gpe_block->register_info[i]. - enable_address); + status = + acpi_hw_write(gpe_block->register_info[i].enable_for_wake, + &gpe_block->register_info[i].enable_address); if (ACPI_FAILURE(status)) { return (status); } diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c index 23d5505cb1f7..15c9ed2be853 100644 --- a/drivers/acpi/acpica/hwregs.c +++ b/drivers/acpi/acpica/hwregs.c @@ -62,6 +62,184 @@ acpi_hw_write_multiple(u32 value, struct acpi_generic_address *register_a, struct acpi_generic_address *register_b); +/****************************************************************************** + * + * FUNCTION: acpi_hw_validate_register + * + * PARAMETERS: Reg - GAS register structure + * max_bit_width - Max bit_width supported (32 or 64) + * Address - Pointer to where the gas->address + * is returned + * + * RETURN: Status + * + * DESCRIPTION: Validate the contents of a GAS register. Checks the GAS + * pointer, Address, space_id, bit_width, and bit_offset. + * + ******************************************************************************/ + +acpi_status +acpi_hw_validate_register(struct acpi_generic_address *reg, + u8 max_bit_width, u64 *address) +{ + + /* Must have a valid pointer to a GAS structure */ + + if (!reg) { + return (AE_BAD_PARAMETER); + } + + /* + * Copy the target address. This handles possible alignment issues. + * Address must not be null. A null address also indicates an optional + * ACPI register that is not supported, so no error message. + */ + ACPI_MOVE_64_TO_64(address, ®->address); + if (!(*address)) { + return (AE_BAD_ADDRESS); + } + + /* Validate the space_iD */ + + if ((reg->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) && + (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO)) { + ACPI_ERROR((AE_INFO, + "Unsupported address space: 0x%X", reg->space_id)); + return (AE_SUPPORT); + } + + /* Validate the bit_width */ + + if ((reg->bit_width != 8) && + (reg->bit_width != 16) && + (reg->bit_width != 32) && (reg->bit_width != max_bit_width)) { + ACPI_ERROR((AE_INFO, + "Unsupported register bit width: 0x%X", + reg->bit_width)); + return (AE_SUPPORT); + } + + /* Validate the bit_offset. Just a warning for now. */ + + if (reg->bit_offset != 0) { + ACPI_WARNING((AE_INFO, + "Unsupported register bit offset: 0x%X", + reg->bit_offset)); + } + + return (AE_OK); +} + +/****************************************************************************** + * + * FUNCTION: acpi_hw_read + * + * PARAMETERS: Value - Where the value is returned + * Reg - GAS register structure + * + * RETURN: Status + * + * DESCRIPTION: Read from either memory or IO space. This is a 32-bit max + * version of acpi_read, used internally since the overhead of + * 64-bit values is not needed. + * + * LIMITATIONS: + * bit_width must be exactly 8, 16, or 32. + * space_iD must be system_memory or system_iO. + * bit_offset and access_width are currently ignored, as there has + * not been a need to implement these. + * + ******************************************************************************/ + +acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg) +{ + u64 address; + acpi_status status; + + ACPI_FUNCTION_NAME(hw_read); + + /* Validate contents of the GAS register */ + + status = acpi_hw_validate_register(reg, 32, &address); + if (ACPI_FAILURE(status)) { + return (status); + } + + /* Initialize entire 32-bit return value to zero */ + + *value = 0; + + /* + * Two address spaces supported: Memory or IO. PCI_Config is + * not supported here because the GAS structure is insufficient + */ + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + status = acpi_os_read_memory((acpi_physical_address) + address, value, reg->bit_width); + } else { /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */ + + status = acpi_hw_read_port((acpi_io_address) + address, value, reg->bit_width); + } + + ACPI_DEBUG_PRINT((ACPI_DB_IO, + "Read: %8.8X width %2d from %8.8X%8.8X (%s)\n", + *value, reg->bit_width, ACPI_FORMAT_UINT64(address), + acpi_ut_get_region_name(reg->space_id))); + + return (status); +} + +/****************************************************************************** + * + * FUNCTION: acpi_hw_write + * + * PARAMETERS: Value - Value to be written + * Reg - GAS register structure + * + * RETURN: Status + * + * DESCRIPTION: Write to either memory or IO space. This is a 32-bit max + * version of acpi_write, used internally since the overhead of + * 64-bit values is not needed. + * + ******************************************************************************/ + +acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg) +{ + u64 address; + acpi_status status; + + ACPI_FUNCTION_NAME(hw_write); + + /* Validate contents of the GAS register */ + + status = acpi_hw_validate_register(reg, 32, &address); + if (ACPI_FAILURE(status)) { + return (status); + } + + /* + * Two address spaces supported: Memory or IO. PCI_Config is + * not supported here because the GAS structure is insufficient + */ + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + status = acpi_os_write_memory((acpi_physical_address) + address, value, reg->bit_width); + } else { /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */ + + status = acpi_hw_write_port((acpi_io_address) + address, value, reg->bit_width); + } + + ACPI_DEBUG_PRINT((ACPI_DB_IO, + "Wrote: %8.8X width %2d to %8.8X%8.8X (%s)\n", + value, reg->bit_width, ACPI_FORMAT_UINT64(address), + acpi_ut_get_region_name(reg->space_id))); + + return (status); +} + /******************************************************************************* * * FUNCTION: acpi_hw_clear_acpi_status @@ -152,15 +330,16 @@ acpi_status acpi_hw_write_pm1_control(u32 pm1a_control, u32 pm1b_control) ACPI_FUNCTION_TRACE(hw_write_pm1_control); - status = acpi_write(pm1a_control, &acpi_gbl_FADT.xpm1a_control_block); + status = + acpi_hw_write(pm1a_control, &acpi_gbl_FADT.xpm1a_control_block); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); } if (acpi_gbl_FADT.xpm1b_control_block.address) { status = - acpi_write(pm1b_control, - &acpi_gbl_FADT.xpm1b_control_block); + acpi_hw_write(pm1b_control, + &acpi_gbl_FADT.xpm1b_control_block); } return_ACPI_STATUS(status); } @@ -218,12 +397,13 @@ acpi_hw_register_read(u32 register_id, u32 * return_value) case ACPI_REGISTER_PM2_CONTROL: /* 8-bit access */ - status = acpi_read(&value, &acpi_gbl_FADT.xpm2_control_block); + status = + acpi_hw_read(&value, &acpi_gbl_FADT.xpm2_control_block); break; case ACPI_REGISTER_PM_TIMER: /* 32-bit access */ - status = acpi_read(&value, &acpi_gbl_FADT.xpm_timer_block); + status = acpi_hw_read(&value, &acpi_gbl_FADT.xpm_timer_block); break; case ACPI_REGISTER_SMI_COMMAND_BLOCK: /* 8-bit access */ @@ -340,7 +520,8 @@ acpi_status acpi_hw_register_write(u32 register_id, u32 value) * as per the ACPI spec. */ status = - acpi_read(&read_value, &acpi_gbl_FADT.xpm2_control_block); + acpi_hw_read(&read_value, + &acpi_gbl_FADT.xpm2_control_block); if (ACPI_FAILURE(status)) { goto exit; } @@ -350,12 +531,13 @@ acpi_status acpi_hw_register_write(u32 register_id, u32 value) ACPI_INSERT_BITS(value, ACPI_PM2_CONTROL_PRESERVED_BITS, read_value); - status = acpi_write(value, &acpi_gbl_FADT.xpm2_control_block); + status = + acpi_hw_write(value, &acpi_gbl_FADT.xpm2_control_block); break; case ACPI_REGISTER_PM_TIMER: /* 32-bit access */ - status = acpi_write(value, &acpi_gbl_FADT.xpm_timer_block); + status = acpi_hw_write(value, &acpi_gbl_FADT.xpm_timer_block); break; case ACPI_REGISTER_SMI_COMMAND_BLOCK: /* 8-bit access */ @@ -401,7 +583,7 @@ acpi_hw_read_multiple(u32 *value, /* The first register is always required */ - status = acpi_read(&value_a, register_a); + status = acpi_hw_read(&value_a, register_a); if (ACPI_FAILURE(status)) { return (status); } @@ -409,7 +591,7 @@ acpi_hw_read_multiple(u32 *value, /* Second register is optional */ if (register_b->address) { - status = acpi_read(&value_b, register_b); + status = acpi_hw_read(&value_b, register_b); if (ACPI_FAILURE(status)) { return (status); } @@ -452,7 +634,7 @@ acpi_hw_write_multiple(u32 value, /* The first register is always required */ - status = acpi_write(value, register_a); + status = acpi_hw_write(value, register_a); if (ACPI_FAILURE(status)) { return (status); } @@ -470,7 +652,7 @@ acpi_hw_write_multiple(u32 value, * and writes have no side effects" */ if (register_b->address) { - status = acpi_write(value, register_b); + status = acpi_hw_write(value, register_b); } return (status); diff --git a/drivers/acpi/acpica/hwtimer.c b/drivers/acpi/acpica/hwtimer.c index b7f522c8f023..6b282e85d039 100644 --- a/drivers/acpi/acpica/hwtimer.c +++ b/drivers/acpi/acpica/hwtimer.c @@ -100,7 +100,7 @@ acpi_status acpi_get_timer(u32 * ticks) } status = - acpi_hw_low_level_read(32, ticks, &acpi_gbl_FADT.xpm_timer_block); + acpi_hw_read(ticks, &acpi_gbl_FADT.xpm_timer_block); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c index 9829979f2bdd..4ead85f29215 100644 --- a/drivers/acpi/acpica/hwxface.c +++ b/drivers/acpi/acpica/hwxface.c @@ -80,7 +80,7 @@ acpi_status acpi_reset(void) /* Write the reset value to the reset register */ - status = acpi_write(acpi_gbl_FADT.reset_value, reset_reg); + status = acpi_hw_write(acpi_gbl_FADT.reset_value, reset_reg); return_ACPI_STATUS(status); } @@ -97,67 +97,92 @@ ACPI_EXPORT_SYMBOL(acpi_reset) * * DESCRIPTION: Read from either memory or IO space. * + * LIMITATIONS: + * bit_width must be exactly 8, 16, 32, or 64. + * space_iD must be system_memory or system_iO. + * bit_offset and access_width are currently ignored, as there has + * not been a need to implement these. + * ******************************************************************************/ -acpi_status acpi_read(u32 *value, struct acpi_generic_address *reg) +acpi_status acpi_read(u64 *return_value, struct acpi_generic_address *reg) { + u32 value; u32 width; u64 address; acpi_status status; ACPI_FUNCTION_NAME(acpi_read); - /* - * Must have a valid pointer to a GAS structure, and a non-zero address - * within. - */ - if (!reg) { + if (!return_value) { return (AE_BAD_PARAMETER); } - /* Get a local copy of the address. Handles possible alignment issues */ + /* Validate contents of the GAS register. Allow 64-bit transfers */ - ACPI_MOVE_64_TO_64(&address, ®->address); - if (!address) { - return (AE_BAD_ADDRESS); + status = acpi_hw_validate_register(reg, 64, &address); + if (ACPI_FAILURE(status)) { + return (status); } - /* Supported widths are 8/16/32 */ - width = reg->bit_width; - if ((width != 8) && (width != 16) && (width != 32)) { - return (AE_SUPPORT); + if (width == 64) { + width = 32; /* Break into two 32-bit transfers */ } - /* Initialize entire 32-bit return value to zero */ + /* Initialize entire 64-bit return value to zero */ - *value = 0; + *return_value = 0; + value = 0; /* * Two address spaces supported: Memory or IO. PCI_Config is * not supported here because the GAS structure is insufficient */ - switch (reg->space_id) { - case ACPI_ADR_SPACE_SYSTEM_MEMORY: + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + status = acpi_os_read_memory((acpi_physical_address) + address, &value, width); + if (ACPI_FAILURE(status)) { + return (status); + } + *return_value = value; + + if (reg->bit_width == 64) { - status = acpi_os_read_memory((acpi_physical_address) address, - value, width); - break; + /* Read the top 32 bits */ - case ACPI_ADR_SPACE_SYSTEM_IO: + status = acpi_os_read_memory((acpi_physical_address) + (address + 4), &value, 32); + if (ACPI_FAILURE(status)) { + return (status); + } + *return_value |= ((u64)value << 32); + } + } else { /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */ - status = - acpi_hw_read_port((acpi_io_address) address, value, width); - break; + status = acpi_hw_read_port((acpi_io_address) + address, &value, width); + if (ACPI_FAILURE(status)) { + return (status); + } + *return_value = value; - default: - ACPI_ERROR((AE_INFO, - "Unsupported address space: %X", reg->space_id)); - return (AE_BAD_PARAMETER); + if (reg->bit_width == 64) { + + /* Read the top 32 bits */ + + status = acpi_hw_read_port((acpi_io_address) + (address + 4), &value, 32); + if (ACPI_FAILURE(status)) { + return (status); + } + *return_value |= ((u64)value << 32); + } } ACPI_DEBUG_PRINT((ACPI_DB_IO, - "Read: %8.8X width %2d from %8.8X%8.8X (%s)\n", - *value, width, ACPI_FORMAT_UINT64(address), + "Read: %8.8X%8.8X width %2d from %8.8X%8.8X (%s)\n", + ACPI_FORMAT_UINT64(*return_value), reg->bit_width, + ACPI_FORMAT_UINT64(address), acpi_ut_get_region_name(reg->space_id))); return (status); @@ -169,7 +194,7 @@ ACPI_EXPORT_SYMBOL(acpi_read) * * FUNCTION: acpi_write * - * PARAMETERS: Value - To be written + * PARAMETERS: Value - Value to be written * Reg - GAS register structure * * RETURN: Status @@ -177,7 +202,7 @@ ACPI_EXPORT_SYMBOL(acpi_read) * DESCRIPTION: Write to either memory or IO space. * ******************************************************************************/ -acpi_status acpi_write(u32 value, struct acpi_generic_address *reg) +acpi_status acpi_write(u64 value, struct acpi_generic_address *reg) { u32 width; u64 address; @@ -185,54 +210,61 @@ acpi_status acpi_write(u32 value, struct acpi_generic_address *reg) ACPI_FUNCTION_NAME(acpi_write); - /* - * Must have a valid pointer to a GAS structure, and a non-zero address - * within. - */ - if (!reg) { - return (AE_BAD_PARAMETER); - } + /* Validate contents of the GAS register. Allow 64-bit transfers */ - /* Get a local copy of the address. Handles possible alignment issues */ - - ACPI_MOVE_64_TO_64(&address, ®->address); - if (!address) { - return (AE_BAD_ADDRESS); + status = acpi_hw_validate_register(reg, 64, &address); + if (ACPI_FAILURE(status)) { + return (status); } - /* Supported widths are 8/16/32 */ - width = reg->bit_width; - if ((width != 8) && (width != 16) && (width != 32)) { - return (AE_SUPPORT); + if (width == 64) { + width = 32; /* Break into two 32-bit transfers */ } /* - * Two address spaces supported: Memory or IO. - * PCI_Config is not supported here because the GAS struct is insufficient + * Two address spaces supported: Memory or IO. PCI_Config is + * not supported here because the GAS structure is insufficient */ - switch (reg->space_id) { - case ACPI_ADR_SPACE_SYSTEM_MEMORY: - - status = acpi_os_write_memory((acpi_physical_address) address, - value, width); - break; + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + status = acpi_os_write_memory((acpi_physical_address) + address, ACPI_LODWORD(value), + width); + if (ACPI_FAILURE(status)) { + return (status); + } - case ACPI_ADR_SPACE_SYSTEM_IO: + if (reg->bit_width == 64) { + status = acpi_os_write_memory((acpi_physical_address) + (address + 4), + ACPI_HIDWORD(value), 32); + if (ACPI_FAILURE(status)) { + return (status); + } + } + } else { /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */ - status = acpi_hw_write_port((acpi_io_address) address, value, + status = acpi_hw_write_port((acpi_io_address) + address, ACPI_LODWORD(value), width); - break; + if (ACPI_FAILURE(status)) { + return (status); + } - default: - ACPI_ERROR((AE_INFO, - "Unsupported address space: %X", reg->space_id)); - return (AE_BAD_PARAMETER); + if (reg->bit_width == 64) { + status = acpi_hw_write_port((acpi_io_address) + (address + 4), + ACPI_HIDWORD(value), 32); + if (ACPI_FAILURE(status)) { + return (status); + } + } } ACPI_DEBUG_PRINT((ACPI_DB_IO, - "Wrote: %8.8X width %2d to %8.8X%8.8X (%s)\n", - value, width, ACPI_FORMAT_UINT64(address), + "Wrote: %8.8X%8.8X width %2d to %8.8X%8.8X (%s)\n", + ACPI_FORMAT_UINT64(value), reg->bit_width, + ACPI_FORMAT_UINT64(address), acpi_ut_get_region_name(reg->space_id))); return (status); diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 2aecaa5cc06c..b450a195319a 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -360,9 +360,9 @@ acpi_status acpi_set_firmware_waking_vector(u32 physical_address); acpi_status acpi_set_firmware_waking_vector64(u64 physical_address); #endif -acpi_status acpi_read(u32 *value, struct acpi_generic_address *reg); +acpi_status acpi_read(u64 *value, struct acpi_generic_address *reg); -acpi_status acpi_write(u32 value, struct acpi_generic_address *reg); +acpi_status acpi_write(u64 value, struct acpi_generic_address *reg); acpi_status acpi_get_sleep_type_data(u8 sleep_state, u8 * slp_typ_a, u8 * slp_typ_b); -- cgit v1.2.3 From 15b8dd53f5ffaf8e2d9095c423f713423f576c0f Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Mon, 29 Jun 2009 13:39:29 +0800 Subject: ACPICA: Major update for acpi_get_object_info external interface Completed a major update for the acpi_get_object_info external interface. Changes include: - Support for variable, unlimited length HID, UID, and CID strings - Support Processor objects the same as Devices (HID,UID,CID,ADR,STA, etc.) - Call the _SxW power methods on behalf of a device object - Determine if a device is a PCI root bridge - Change the ACPI_BUFFER parameter to ACPI_DEVICE_INFO. These changes will require an update to all callers of this interface. See the ACPICA Programmer Reference for details. Also, update all invocations of acpi_get_object_info interface Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- arch/ia64/hp/common/sba_iommu.c | 7 +- drivers/acpi/acpi_memhotplug.c | 11 +- drivers/acpi/acpica/Makefile | 2 +- drivers/acpi/acpica/acconfig.h | 5 + drivers/acpi/acpica/acglobal.h | 3 +- drivers/acpi/acpica/acinterp.h | 4 +- drivers/acpi/acpica/acutils.h | 24 ++- drivers/acpi/acpica/evrgnini.c | 45 +---- drivers/acpi/acpica/exutils.c | 53 +++-- drivers/acpi/acpica/nsdumpdv.c | 7 +- drivers/acpi/acpica/nsxfeval.c | 23 ++- drivers/acpi/acpica/nsxfname.c | 237 +++++++++++++++++------ drivers/acpi/acpica/uteval.c | 375 ++++-------------------------------- drivers/acpi/acpica/utglobal.c | 10 +- drivers/acpi/acpica/utids.c | 382 +++++++++++++++++++++++++++++++++++++ drivers/acpi/acpica/utmisc.c | 28 +++ drivers/acpi/container.c | 11 +- drivers/acpi/dock.c | 8 +- drivers/acpi/glue.c | 6 +- drivers/acpi/scan.c | 153 +++++++++------ drivers/char/agp/hp-agp.c | 9 +- drivers/ide/ide-acpi.c | 5 +- drivers/pci/hotplug/acpiphp_ibm.c | 12 +- drivers/platform/x86/sony-laptop.c | 7 +- drivers/pnp/pnpacpi/core.c | 6 +- include/acpi/acpi_bus.h | 8 +- include/acpi/acpixf.h | 3 +- include/acpi/actypes.h | 87 +++++---- 28 files changed, 901 insertions(+), 630 deletions(-) create mode 100644 drivers/acpi/acpica/utids.c (limited to 'include') diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 8cfb001092ab..674a8374c6d9 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -2026,24 +2026,21 @@ acpi_sba_ioc_add(struct acpi_device *device) struct ioc *ioc; acpi_status status; u64 hpa, length; - struct acpi_buffer buffer; struct acpi_device_info *dev_info; status = hp_acpi_csr_space(device->handle, &hpa, &length); if (ACPI_FAILURE(status)) return 1; - buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; - status = acpi_get_object_info(device->handle, &buffer); + status = acpi_get_object_info(device->handle, &dev_info); if (ACPI_FAILURE(status)) return 1; - dev_info = buffer.pointer; /* * For HWP0001, only SBA appears in ACPI namespace. It encloses the PCI * root bridges, and its CSR space includes the IOC function. */ - if (strncmp("HWP0001", dev_info->hardware_id.value, 7) == 0) { + if (strncmp("HWP0001", dev_info->hardware_id.string, 7) == 0) { hpa += ZX1_IOC_OFFSET; /* zx1 based systems default to kernel page size iommu pages */ if (!iovp_shift) diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 9a62224cc278..80eacbe157e2 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -481,26 +481,23 @@ static acpi_status is_memory_device(acpi_handle handle) { char *hardware_id; acpi_status status; - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; struct acpi_device_info *info; - - status = acpi_get_object_info(handle, &buffer); + status = acpi_get_object_info(handle, &info); if (ACPI_FAILURE(status)) return status; - info = buffer.pointer; if (!(info->valid & ACPI_VALID_HID)) { - kfree(buffer.pointer); + kfree(info); return AE_ERROR; } - hardware_id = info->hardware_id.value; + hardware_id = info->hardware_id.string; if ((hardware_id == NULL) || (strcmp(hardware_id, ACPI_MEMORY_DEVICE_HID))) status = AE_ERROR; - kfree(buffer.pointer); + kfree(info); return status; } diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile index 72ac28da14e3..0e7d56185f6d 100644 --- a/drivers/acpi/acpica/Makefile +++ b/drivers/acpi/acpica/Makefile @@ -44,4 +44,4 @@ acpi-y += tbxface.o tbinstal.o tbutils.o tbfind.o tbfadt.o tbxfroot.o acpi-y += utalloc.o utdebug.o uteval.o utinit.o utmisc.o utxface.o \ utcopy.o utdelete.o utglobal.o utmath.o utobject.o \ - utstate.o utmutex.o utobject.o utresrc.o utlock.o + utstate.o utmutex.o utobject.o utresrc.o utlock.o utids.o diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h index e6777fb883d2..6c1fb2d9f4d5 100644 --- a/drivers/acpi/acpica/acconfig.h +++ b/drivers/acpi/acpica/acconfig.h @@ -203,6 +203,11 @@ #define ACPI_SMBUS_BUFFER_SIZE 34 +/* _sx_d and _sx_w control methods */ + +#define ACPI_NUM_sx_d_METHODS 4 +#define ACPI_NUM_sx_w_METHODS 5 + /****************************************************************************** * * ACPI AML Debugger diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 0b73b31c1b53..6389f7c1de59 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -265,7 +265,8 @@ ACPI_EXTERN u8 acpi_gbl_osi_data; extern u8 acpi_gbl_shutdown; extern u32 acpi_gbl_startup_flags; extern const char *acpi_gbl_sleep_state_names[ACPI_S_STATE_COUNT]; -extern const char *acpi_gbl_highest_dstate_names[4]; +extern const char *acpi_gbl_lowest_dstate_names[ACPI_NUM_sx_w_METHODS]; +extern const char *acpi_gbl_highest_dstate_names[ACPI_NUM_sx_d_METHODS]; extern const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES]; extern const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS]; diff --git a/drivers/acpi/acpica/acinterp.h b/drivers/acpi/acpica/acinterp.h index e8db7a3143a5..5db9f2916f7c 100644 --- a/drivers/acpi/acpica/acinterp.h +++ b/drivers/acpi/acpica/acinterp.h @@ -461,9 +461,9 @@ void acpi_ex_acquire_global_lock(u32 rule); void acpi_ex_release_global_lock(u32 rule); -void acpi_ex_eisa_id_to_string(u32 numeric_id, char *out_string); +void acpi_ex_eisa_id_to_string(char *dest, acpi_integer compressed_id); -void acpi_ex_unsigned_integer_to_string(acpi_integer value, char *out_string); +void acpi_ex_integer_to_string(char *dest, acpi_integer value); /* * exregion - default op_region handlers diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h index 897810ba0ccc..b0add85de308 100644 --- a/drivers/acpi/acpica/acutils.h +++ b/drivers/acpi/acpica/acutils.h @@ -324,26 +324,30 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node, acpi_status acpi_ut_evaluate_numeric_object(char *object_name, struct acpi_namespace_node *device_node, - acpi_integer * address); + acpi_integer *value); acpi_status -acpi_ut_execute_HID(struct acpi_namespace_node *device_node, - struct acpica_device_id *hid); +acpi_ut_execute_STA(struct acpi_namespace_node *device_node, u32 *status_flags); acpi_status -acpi_ut_execute_CID(struct acpi_namespace_node *device_node, - struct acpi_compatible_id_list **return_cid_list); +acpi_ut_execute_power_methods(struct acpi_namespace_node *device_node, + const char **method_names, + u8 method_count, u8 *out_values); +/* + * utids - device ID support + */ acpi_status -acpi_ut_execute_STA(struct acpi_namespace_node *device_node, - u32 * status_flags); +acpi_ut_execute_HID(struct acpi_namespace_node *device_node, + struct acpica_device_id **return_id); acpi_status acpi_ut_execute_UID(struct acpi_namespace_node *device_node, - struct acpica_device_id *uid); + struct acpica_device_id **return_id); acpi_status -acpi_ut_execute_sxds(struct acpi_namespace_node *device_node, u8 * highest); +acpi_ut_execute_CID(struct acpi_namespace_node *device_node, + struct acpica_device_id_list **return_cid_list); /* * utlock - reader/writer locks @@ -445,6 +449,8 @@ acpi_ut_short_divide(acpi_integer in_dividend, */ const char *acpi_ut_validate_exception(acpi_status status); +u8 acpi_ut_is_pci_root_bridge(char *id); + u8 acpi_ut_is_aml_table(struct acpi_table_header *table); acpi_status acpi_ut_allocate_owner_id(acpi_owner_id * owner_id); diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c index 284a7becbe96..cf29c4953028 100644 --- a/drivers/acpi/acpica/evrgnini.c +++ b/drivers/acpi/acpica/evrgnini.c @@ -50,8 +50,6 @@ ACPI_MODULE_NAME("evrgnini") /* Local prototypes */ -static u8 acpi_ev_match_pci_root_bridge(char *id); - static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node); /******************************************************************************* @@ -330,37 +328,6 @@ acpi_ev_pci_config_region_setup(acpi_handle handle, return_ACPI_STATUS(AE_OK); } -/******************************************************************************* - * - * FUNCTION: acpi_ev_match_pci_root_bridge - * - * PARAMETERS: Id - The HID/CID in string format - * - * RETURN: TRUE if the Id is a match for a PCI/PCI-Express Root Bridge - * - * DESCRIPTION: Determine if the input ID is a PCI Root Bridge ID. - * - ******************************************************************************/ - -static u8 acpi_ev_match_pci_root_bridge(char *id) -{ - - /* - * Check if this is a PCI root. - * ACPI 3.0+: check for a PCI Express root also. - */ - if (!(ACPI_STRNCMP(id, - PCI_ROOT_HID_STRING, - sizeof(PCI_ROOT_HID_STRING))) || - !(ACPI_STRNCMP(id, - PCI_EXPRESS_ROOT_HID_STRING, - sizeof(PCI_EXPRESS_ROOT_HID_STRING)))) { - return (TRUE); - } - - return (FALSE); -} - /******************************************************************************* * * FUNCTION: acpi_ev_is_pci_root_bridge @@ -377,9 +344,10 @@ static u8 acpi_ev_match_pci_root_bridge(char *id) static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node) { acpi_status status; - struct acpica_device_id hid; - struct acpi_compatible_id_list *cid; + struct acpica_device_id *hid; + struct acpica_device_id_list *cid; u32 i; + u8 match; /* Get the _HID and check for a PCI Root Bridge */ @@ -388,7 +356,10 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node) return (FALSE); } - if (acpi_ev_match_pci_root_bridge(hid.value)) { + match = acpi_ut_is_pci_root_bridge(hid->string); + ACPI_FREE(hid); + + if (match) { return (TRUE); } @@ -402,7 +373,7 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node) /* Check all _CIDs in the returned list */ for (i = 0; i < cid->count; i++) { - if (acpi_ev_match_pci_root_bridge(cid->id[i].value)) { + if (acpi_ut_is_pci_root_bridge(cid->ids[i].string)) { ACPI_FREE(cid); return (TRUE); } diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c index 87730e944132..7d41f99f7052 100644 --- a/drivers/acpi/acpica/exutils.c +++ b/drivers/acpi/acpica/exutils.c @@ -358,50 +358,67 @@ static u32 acpi_ex_digits_needed(acpi_integer value, u32 base) * * FUNCTION: acpi_ex_eisa_id_to_string * - * PARAMETERS: numeric_id - EISA ID to be converted + * PARAMETERS: compressed_id - EISAID to be converted * out_string - Where to put the converted string (8 bytes) * * RETURN: None * - * DESCRIPTION: Convert a numeric EISA ID to string representation + * DESCRIPTION: Convert a numeric EISAID to string representation. Return + * buffer must be large enough to hold the string. The string + * returned is always exactly of length ACPI_EISAID_STRING_SIZE + * (includes null terminator). The EISAID is always 32 bits. * ******************************************************************************/ -void acpi_ex_eisa_id_to_string(u32 numeric_id, char *out_string) +void acpi_ex_eisa_id_to_string(char *out_string, acpi_integer compressed_id) { - u32 eisa_id; + u32 swapped_id; ACPI_FUNCTION_ENTRY(); + /* The EISAID should be a 32-bit integer */ + + if (compressed_id > ACPI_UINT32_MAX) { + ACPI_WARNING((AE_INFO, + "Expected EISAID is larger than 32 bits: 0x%8.8X%8.8X, truncating", + ACPI_FORMAT_UINT64(compressed_id))); + } + /* Swap ID to big-endian to get contiguous bits */ - eisa_id = acpi_ut_dword_byte_swap(numeric_id); + swapped_id = acpi_ut_dword_byte_swap((u32)compressed_id); - out_string[0] = (char)('@' + (((unsigned long)eisa_id >> 26) & 0x1f)); - out_string[1] = (char)('@' + ((eisa_id >> 21) & 0x1f)); - out_string[2] = (char)('@' + ((eisa_id >> 16) & 0x1f)); - out_string[3] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 12); - out_string[4] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 8); - out_string[5] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 4); - out_string[6] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 0); + /* First 3 bytes are uppercase letters. Next 4 bytes are hexadecimal */ + + out_string[0] = + (char)(0x40 + (((unsigned long)swapped_id >> 26) & 0x1F)); + out_string[1] = (char)(0x40 + ((swapped_id >> 21) & 0x1F)); + out_string[2] = (char)(0x40 + ((swapped_id >> 16) & 0x1F)); + out_string[3] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 12); + out_string[4] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 8); + out_string[5] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 4); + out_string[6] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 0); out_string[7] = 0; } /******************************************************************************* * - * FUNCTION: acpi_ex_unsigned_integer_to_string + * FUNCTION: acpi_ex_integer_to_string * - * PARAMETERS: Value - Value to be converted - * out_string - Where to put the converted string (8 bytes) + * PARAMETERS: out_string - Where to put the converted string. At least + * 21 bytes are needed to hold the largest + * possible 64-bit integer. + * Value - Value to be converted * * RETURN: None, string * - * DESCRIPTION: Convert a number to string representation. Assumes string - * buffer is large enough to hold the string. + * DESCRIPTION: Convert a 64-bit integer to decimal string representation. + * Assumes string buffer is large enough to hold the string. The + * largest string is (ACPI_MAX64_DECIMAL_DIGITS + 1). * ******************************************************************************/ -void acpi_ex_unsigned_integer_to_string(acpi_integer value, char *out_string) +void acpi_ex_integer_to_string(char *out_string, acpi_integer value) { u32 count; u32 digits_needed; diff --git a/drivers/acpi/acpica/nsdumpdv.c b/drivers/acpi/acpica/nsdumpdv.c index 41994fe7fbb8..0fe87f1aef16 100644 --- a/drivers/acpi/acpica/nsdumpdv.c +++ b/drivers/acpi/acpica/nsdumpdv.c @@ -70,7 +70,6 @@ static acpi_status acpi_ns_dump_one_device(acpi_handle obj_handle, u32 level, void *context, void **return_value) { - struct acpi_buffer buffer; struct acpi_device_info *info; acpi_status status; u32 i; @@ -80,17 +79,15 @@ acpi_ns_dump_one_device(acpi_handle obj_handle, status = acpi_ns_dump_one_object(obj_handle, level, context, return_value); - buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; - status = acpi_get_object_info(obj_handle, &buffer); + status = acpi_get_object_info(obj_handle, &info); if (ACPI_SUCCESS(status)) { - info = buffer.pointer; for (i = 0; i < level; i++) { ACPI_DEBUG_PRINT_RAW((ACPI_DB_TABLES, " ")); } ACPI_DEBUG_PRINT_RAW((ACPI_DB_TABLES, " HID: %s, ADR: %8.8X%8.8X, Status: %X\n", - info->hardware_id.value, + info->hardware_id.string, ACPI_FORMAT_UINT64(info->address), info->current_status)); ACPI_FREE(info); diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c index daf4ad37896d..4929dbdbc8f0 100644 --- a/drivers/acpi/acpica/nsxfeval.c +++ b/drivers/acpi/acpica/nsxfeval.c @@ -535,10 +535,11 @@ acpi_ns_get_device_callback(acpi_handle obj_handle, acpi_status status; struct acpi_namespace_node *node; u32 flags; - struct acpica_device_id hid; - struct acpi_compatible_id_list *cid; + struct acpica_device_id *hid; + struct acpica_device_id_list *cid; u32 i; - int found; + u8 found; + int no_match; status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); if (ACPI_FAILURE(status)) { @@ -582,10 +583,14 @@ acpi_ns_get_device_callback(acpi_handle obj_handle, return (AE_CTRL_DEPTH); } - if (ACPI_STRNCMP(hid.value, info->hid, sizeof(hid.value)) != 0) { - - /* Get the list of Compatible IDs */ + no_match = ACPI_STRCMP(hid->string, info->hid); + ACPI_FREE(hid); + if (no_match) { + /* + * HID does not match, attempt match within the + * list of Compatible IDs (CIDs) + */ status = acpi_ut_execute_CID(node, &cid); if (status == AE_NOT_FOUND) { return (AE_OK); @@ -597,10 +602,8 @@ acpi_ns_get_device_callback(acpi_handle obj_handle, found = 0; for (i = 0; i < cid->count; i++) { - if (ACPI_STRNCMP(cid->id[i].value, info->hid, - sizeof(struct - acpi_compatible_id)) == - 0) { + if (ACPI_STRCMP(cid->ids[i].string, info->hid) + == 0) { found = 1; break; } diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c index f23593d6add4..ddc84af6336e 100644 --- a/drivers/acpi/acpica/nsxfname.c +++ b/drivers/acpi/acpica/nsxfname.c @@ -51,6 +51,11 @@ #define _COMPONENT ACPI_NAMESPACE ACPI_MODULE_NAME("nsxfname") +/* Local prototypes */ +static char *acpi_ns_copy_device_id(struct acpica_device_id *dest, + struct acpica_device_id *source, + char *string_area); + /****************************************************************************** * * FUNCTION: acpi_get_handle @@ -68,6 +73,7 @@ ACPI_MODULE_NAME("nsxfname") * namespace handle. * ******************************************************************************/ + acpi_status acpi_get_handle(acpi_handle parent, acpi_string pathname, acpi_handle * ret_handle) @@ -208,12 +214,40 @@ acpi_get_name(acpi_handle handle, u32 name_type, struct acpi_buffer * buffer) ACPI_EXPORT_SYMBOL(acpi_get_name) +/****************************************************************************** + * + * FUNCTION: acpi_ns_copy_device_id + * + * PARAMETERS: Dest - Pointer to the destination DEVICE_ID + * Source - Pointer to the source DEVICE_ID + * string_area - Pointer to where to copy the dest string + * + * RETURN: Pointer to the next string area + * + * DESCRIPTION: Copy a single DEVICE_ID, including the string data. + * + ******************************************************************************/ +static char *acpi_ns_copy_device_id(struct acpica_device_id *dest, + struct acpica_device_id *source, + char *string_area) +{ + /* Create the destination DEVICE_ID */ + + dest->string = string_area; + dest->length = source->length; + + /* Copy actual string and return a pointer to the next string area */ + + ACPI_MEMCPY(string_area, source->string, source->length); + return (string_area + source->length); +} + /****************************************************************************** * * FUNCTION: acpi_get_object_info * - * PARAMETERS: Handle - Object Handle - * Buffer - Where the info is returned + * PARAMETERS: Handle - Object Handle + * return_buffer - Where the info is returned * * RETURN: Status * @@ -221,33 +255,37 @@ ACPI_EXPORT_SYMBOL(acpi_get_name) * namespace node and possibly by running several standard * control methods (Such as in the case of a device.) * + * For Device and Processor objects, run the Device _HID, _UID, _CID, _STA, + * _ADR, _sx_w, and _sx_d methods. + * + * Note: Allocates the return buffer, must be freed by the caller. + * ******************************************************************************/ + acpi_status -acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer) +acpi_get_object_info(acpi_handle handle, + struct acpi_device_info **return_buffer) { - acpi_status status; struct acpi_namespace_node *node; struct acpi_device_info *info; - struct acpi_device_info *return_info; - struct acpi_compatible_id_list *cid_list = NULL; - acpi_size size; + struct acpica_device_id_list *cid_list = NULL; + struct acpica_device_id *hid = NULL; + struct acpica_device_id *uid = NULL; + char *next_id_string; + acpi_object_type type; + acpi_name name; + u8 param_count = 0; + u8 valid = 0; + u32 info_size; + u32 i; + acpi_status status; /* Parameter validation */ - if (!handle || !buffer) { + if (!handle || !return_buffer) { return (AE_BAD_PARAMETER); } - status = acpi_ut_validate_buffer(buffer); - if (ACPI_FAILURE(status)) { - return (status); - } - - info = ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_device_info)); - if (!info) { - return (AE_NO_MEMORY); - } - status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); if (ACPI_FAILURE(status)) { goto cleanup; @@ -256,66 +294,91 @@ acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer) node = acpi_ns_map_handle_to_node(handle); if (!node) { (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); - status = AE_BAD_PARAMETER; - goto cleanup; + return (AE_BAD_PARAMETER); } - /* Init return structure */ - - size = sizeof(struct acpi_device_info); + /* Get the namespace node data while the namespace is locked */ - info->type = node->type; - info->name = node->name.integer; - info->valid = 0; + info_size = sizeof(struct acpi_device_info); + type = node->type; + name = node->name.integer; if (node->type == ACPI_TYPE_METHOD) { - info->param_count = node->object->method.param_count; + param_count = node->object->method.param_count; } status = acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); if (ACPI_FAILURE(status)) { - goto cleanup; + return (status); } - /* If not a device, we are all done */ - - if (info->type == ACPI_TYPE_DEVICE) { + if ((type == ACPI_TYPE_DEVICE) || (type == ACPI_TYPE_PROCESSOR)) { /* - * Get extra info for ACPI Devices objects only: - * Run the Device _HID, _UID, _CID, _STA, _ADR and _sx_d methods. + * Get extra info for ACPI Device/Processor objects only: + * Run the Device _HID, _UID, and _CID methods. * * Note: none of these methods are required, so they may or may - * not be present for this device. The Info->Valid bitfield is used - * to indicate which methods were found and ran successfully. + * not be present for this device. The Info->Valid bitfield is used + * to indicate which methods were found and run successfully. */ /* Execute the Device._HID method */ - status = acpi_ut_execute_HID(node, &info->hardware_id); + status = acpi_ut_execute_HID(node, &hid); if (ACPI_SUCCESS(status)) { - info->valid |= ACPI_VALID_HID; + info_size += hid->length; + valid |= ACPI_VALID_HID; } /* Execute the Device._UID method */ - status = acpi_ut_execute_UID(node, &info->unique_id); + status = acpi_ut_execute_UID(node, &uid); if (ACPI_SUCCESS(status)) { - info->valid |= ACPI_VALID_UID; + info_size += uid->length; + valid |= ACPI_VALID_UID; } /* Execute the Device._CID method */ status = acpi_ut_execute_CID(node, &cid_list); if (ACPI_SUCCESS(status)) { - size += cid_list->size; - info->valid |= ACPI_VALID_CID; + + /* Add size of CID strings and CID pointer array */ + + info_size += + (cid_list->list_size - + sizeof(struct acpica_device_id_list)); + valid |= ACPI_VALID_CID; } + } + + /* + * Now that we have the variable-length data, we can allocate the + * return buffer + */ + info = ACPI_ALLOCATE_ZEROED(info_size); + if (!info) { + status = AE_NO_MEMORY; + goto cleanup; + } + + /* Get the fixed-length data */ + + if ((type == ACPI_TYPE_DEVICE) || (type == ACPI_TYPE_PROCESSOR)) { + /* + * Get extra info for ACPI Device/Processor objects only: + * Run the _STA, _ADR and, sx_w, and _sx_d methods. + * + * Note: none of these methods are required, so they may or may + * not be present for this device. The Info->Valid bitfield is used + * to indicate which methods were found and run successfully. + */ /* Execute the Device._STA method */ status = acpi_ut_execute_STA(node, &info->current_status); if (ACPI_SUCCESS(status)) { - info->valid |= ACPI_VALID_STA; + valid |= ACPI_VALID_STA; } /* Execute the Device._ADR method */ @@ -323,36 +386,100 @@ acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer) status = acpi_ut_evaluate_numeric_object(METHOD_NAME__ADR, node, &info->address); if (ACPI_SUCCESS(status)) { - info->valid |= ACPI_VALID_ADR; + valid |= ACPI_VALID_ADR; + } + + /* Execute the Device._sx_w methods */ + + status = acpi_ut_execute_power_methods(node, + acpi_gbl_lowest_dstate_names, + ACPI_NUM_sx_w_METHODS, + info->lowest_dstates); + if (ACPI_SUCCESS(status)) { + valid |= ACPI_VALID_SXWS; } /* Execute the Device._sx_d methods */ - status = acpi_ut_execute_sxds(node, info->highest_dstates); + status = acpi_ut_execute_power_methods(node, + acpi_gbl_highest_dstate_names, + ACPI_NUM_sx_d_METHODS, + info->highest_dstates); if (ACPI_SUCCESS(status)) { - info->valid |= ACPI_VALID_SXDS; + valid |= ACPI_VALID_SXDS; } } - /* Validate/Allocate/Clear caller buffer */ + /* + * Create a pointer to the string area of the return buffer. + * Point to the end of the base struct acpi_device_info structure. + */ + next_id_string = ACPI_CAST_PTR(char, info->compatible_id_list.ids); + if (cid_list) { - status = acpi_ut_initialize_buffer(buffer, size); - if (ACPI_FAILURE(status)) { - goto cleanup; + /* Point past the CID DEVICE_ID array */ + + next_id_string += + ((acpi_size) cid_list->count * + sizeof(struct acpica_device_id)); } - /* Populate the return buffer */ + /* + * Copy the HID, UID, and CIDs to the return buffer. The variable-length + * strings are copied to the reserved area at the end of the buffer. + * + * For HID and CID, check if the ID is a PCI Root Bridge. + */ + if (hid) { + next_id_string = acpi_ns_copy_device_id(&info->hardware_id, + hid, next_id_string); + + if (acpi_ut_is_pci_root_bridge(hid->string)) { + info->flags |= ACPI_PCI_ROOT_BRIDGE; + } + } - return_info = buffer->pointer; - ACPI_MEMCPY(return_info, info, sizeof(struct acpi_device_info)); + if (uid) { + next_id_string = acpi_ns_copy_device_id(&info->unique_id, + uid, next_id_string); + } if (cid_list) { - ACPI_MEMCPY(&return_info->compatibility_id, cid_list, - cid_list->size); + info->compatible_id_list.count = cid_list->count; + info->compatible_id_list.list_size = cid_list->list_size; + + /* Copy each CID */ + + for (i = 0; i < cid_list->count; i++) { + next_id_string = + acpi_ns_copy_device_id(&info->compatible_id_list. + ids[i], &cid_list->ids[i], + next_id_string); + + if (acpi_ut_is_pci_root_bridge(cid_list->ids[i].string)) { + info->flags |= ACPI_PCI_ROOT_BRIDGE; + } + } } + /* Copy the fixed-length data */ + + info->info_size = info_size; + info->type = type; + info->name = name; + info->param_count = param_count; + info->valid = valid; + + *return_buffer = info; + status = AE_OK; + cleanup: - ACPI_FREE(info); + if (hid) { + ACPI_FREE(hid); + } + if (uid) { + ACPI_FREE(uid); + } if (cid_list) { ACPI_FREE(cid_list); } diff --git a/drivers/acpi/acpica/uteval.c b/drivers/acpi/acpica/uteval.c index 006b16c26017..5503307b8bb7 100644 --- a/drivers/acpi/acpica/uteval.c +++ b/drivers/acpi/acpica/uteval.c @@ -44,19 +44,10 @@ #include #include "accommon.h" #include "acnamesp.h" -#include "acinterp.h" #define _COMPONENT ACPI_UTILITIES ACPI_MODULE_NAME("uteval") -/* Local prototypes */ -static void -acpi_ut_copy_id_string(char *destination, char *source, acpi_size max_length); - -static acpi_status -acpi_ut_translate_one_cid(union acpi_operand_object *obj_desc, - struct acpi_compatible_id *one_cid); - /* * Strings supported by the _OSI predefined (internal) method. * @@ -213,7 +204,7 @@ acpi_status acpi_osi_invalidate(char *interface) * RETURN: Status * * DESCRIPTION: Evaluates a namespace object and verifies the type of the - * return object. Common code that simplifies accessing objects + * return object. Common code that simplifies accessing objects * that have required return objects of fixed types. * * NOTE: Internal function, no parameter validation @@ -298,7 +289,7 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node, if ((acpi_gbl_enable_interpreter_slack) && (!expected_return_btypes)) { /* - * We received a return object, but one was not expected. This can + * We received a return object, but one was not expected. This can * happen frequently if the "implicit return" feature is enabled. * Just delete the return object and return AE_OK. */ @@ -340,12 +331,12 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node, * * PARAMETERS: object_name - Object name to be evaluated * device_node - Node for the device - * Address - Where the value is returned + * Value - Where the value is returned * * RETURN: Status * * DESCRIPTION: Evaluates a numeric namespace object for a selected device - * and stores result in *Address. + * and stores result in *Value. * * NOTE: Internal function, no parameter validation * @@ -354,7 +345,7 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node, acpi_status acpi_ut_evaluate_numeric_object(char *object_name, struct acpi_namespace_node *device_node, - acpi_integer * address) + acpi_integer *value) { union acpi_operand_object *obj_desc; acpi_status status; @@ -369,295 +360,7 @@ acpi_ut_evaluate_numeric_object(char *object_name, /* Get the returned Integer */ - *address = obj_desc->integer.value; - - /* On exit, we must delete the return object */ - - acpi_ut_remove_reference(obj_desc); - return_ACPI_STATUS(status); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ut_copy_id_string - * - * PARAMETERS: Destination - Where to copy the string - * Source - Source string - * max_length - Length of the destination buffer - * - * RETURN: None - * - * DESCRIPTION: Copies an ID string for the _HID, _CID, and _UID methods. - * Performs removal of a leading asterisk if present -- workaround - * for a known issue on a bunch of machines. - * - ******************************************************************************/ - -static void -acpi_ut_copy_id_string(char *destination, char *source, acpi_size max_length) -{ - - /* - * Workaround for ID strings that have a leading asterisk. This construct - * is not allowed by the ACPI specification (ID strings must be - * alphanumeric), but enough existing machines have this embedded in their - * ID strings that the following code is useful. - */ - if (*source == '*') { - source++; - } - - /* Do the actual copy */ - - ACPI_STRNCPY(destination, source, max_length); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ut_execute_HID - * - * PARAMETERS: device_node - Node for the device - * Hid - Where the HID is returned - * - * RETURN: Status - * - * DESCRIPTION: Executes the _HID control method that returns the hardware - * ID of the device. - * - * NOTE: Internal function, no parameter validation - * - ******************************************************************************/ - -acpi_status -acpi_ut_execute_HID(struct acpi_namespace_node *device_node, - struct acpica_device_id *hid) -{ - union acpi_operand_object *obj_desc; - acpi_status status; - - ACPI_FUNCTION_TRACE(ut_execute_HID); - - status = acpi_ut_evaluate_object(device_node, METHOD_NAME__HID, - ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING, - &obj_desc); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - if (obj_desc->common.type == ACPI_TYPE_INTEGER) { - - /* Convert the Numeric HID to string */ - - acpi_ex_eisa_id_to_string((u32) obj_desc->integer.value, - hid->value); - } else { - /* Copy the String HID from the returned object */ - - acpi_ut_copy_id_string(hid->value, obj_desc->string.pointer, - sizeof(hid->value)); - } - - /* On exit, we must delete the return object */ - - acpi_ut_remove_reference(obj_desc); - return_ACPI_STATUS(status); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ut_translate_one_cid - * - * PARAMETERS: obj_desc - _CID object, must be integer or string - * one_cid - Where the CID string is returned - * - * RETURN: Status - * - * DESCRIPTION: Return a numeric or string _CID value as a string. - * (Compatible ID) - * - * NOTE: Assumes a maximum _CID string length of - * ACPI_MAX_CID_LENGTH. - * - ******************************************************************************/ - -static acpi_status -acpi_ut_translate_one_cid(union acpi_operand_object *obj_desc, - struct acpi_compatible_id *one_cid) -{ - - switch (obj_desc->common.type) { - case ACPI_TYPE_INTEGER: - - /* Convert the Numeric CID to string */ - - acpi_ex_eisa_id_to_string((u32) obj_desc->integer.value, - one_cid->value); - return (AE_OK); - - case ACPI_TYPE_STRING: - - if (obj_desc->string.length > ACPI_MAX_CID_LENGTH) { - return (AE_AML_STRING_LIMIT); - } - - /* Copy the String CID from the returned object */ - - acpi_ut_copy_id_string(one_cid->value, obj_desc->string.pointer, - ACPI_MAX_CID_LENGTH); - return (AE_OK); - - default: - - return (AE_TYPE); - } -} - -/******************************************************************************* - * - * FUNCTION: acpi_ut_execute_CID - * - * PARAMETERS: device_node - Node for the device - * return_cid_list - Where the CID list is returned - * - * RETURN: Status - * - * DESCRIPTION: Executes the _CID control method that returns one or more - * compatible hardware IDs for the device. - * - * NOTE: Internal function, no parameter validation - * - ******************************************************************************/ - -acpi_status -acpi_ut_execute_CID(struct acpi_namespace_node * device_node, - struct acpi_compatible_id_list ** return_cid_list) -{ - union acpi_operand_object *obj_desc; - acpi_status status; - u32 count; - u32 size; - struct acpi_compatible_id_list *cid_list; - u32 i; - - ACPI_FUNCTION_TRACE(ut_execute_CID); - - /* Evaluate the _CID method for this device */ - - status = acpi_ut_evaluate_object(device_node, METHOD_NAME__CID, - ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING - | ACPI_BTYPE_PACKAGE, &obj_desc); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - /* Get the number of _CIDs returned */ - - count = 1; - if (obj_desc->common.type == ACPI_TYPE_PACKAGE) { - count = obj_desc->package.count; - } - - /* Allocate a worst-case buffer for the _CIDs */ - - size = (((count - 1) * sizeof(struct acpi_compatible_id)) + - sizeof(struct acpi_compatible_id_list)); - - cid_list = ACPI_ALLOCATE_ZEROED((acpi_size) size); - if (!cid_list) { - return_ACPI_STATUS(AE_NO_MEMORY); - } - - /* Init CID list */ - - cid_list->count = count; - cid_list->size = size; - - /* - * A _CID can return either a single compatible ID or a package of - * compatible IDs. Each compatible ID can be one of the following: - * 1) Integer (32 bit compressed EISA ID) or - * 2) String (PCI ID format, e.g. "PCI\VEN_vvvv&DEV_dddd&SUBSYS_ssssssss") - */ - - /* The _CID object can be either a single CID or a package (list) of CIDs */ - - if (obj_desc->common.type == ACPI_TYPE_PACKAGE) { - - /* Translate each package element */ - - for (i = 0; i < count; i++) { - status = - acpi_ut_translate_one_cid(obj_desc->package. - elements[i], - &cid_list->id[i]); - if (ACPI_FAILURE(status)) { - break; - } - } - } else { - /* Only one CID, translate to a string */ - - status = acpi_ut_translate_one_cid(obj_desc, cid_list->id); - } - - /* Cleanup on error */ - - if (ACPI_FAILURE(status)) { - ACPI_FREE(cid_list); - } else { - *return_cid_list = cid_list; - } - - /* On exit, we must delete the _CID return object */ - - acpi_ut_remove_reference(obj_desc); - return_ACPI_STATUS(status); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ut_execute_UID - * - * PARAMETERS: device_node - Node for the device - * Uid - Where the UID is returned - * - * RETURN: Status - * - * DESCRIPTION: Executes the _UID control method that returns the hardware - * ID of the device. - * - * NOTE: Internal function, no parameter validation - * - ******************************************************************************/ - -acpi_status -acpi_ut_execute_UID(struct acpi_namespace_node *device_node, - struct acpica_device_id *uid) -{ - union acpi_operand_object *obj_desc; - acpi_status status; - - ACPI_FUNCTION_TRACE(ut_execute_UID); - - status = acpi_ut_evaluate_object(device_node, METHOD_NAME__UID, - ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING, - &obj_desc); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - if (obj_desc->common.type == ACPI_TYPE_INTEGER) { - - /* Convert the Numeric UID to string */ - - acpi_ex_unsigned_integer_to_string(obj_desc->integer.value, - uid->value); - } else { - /* Copy the String UID from the returned object */ - - acpi_ut_copy_id_string(uid->value, obj_desc->string.pointer, - sizeof(uid->value)); - } + *value = obj_desc->integer.value; /* On exit, we must delete the return object */ @@ -716,60 +419,64 @@ acpi_ut_execute_STA(struct acpi_namespace_node *device_node, u32 * flags) /******************************************************************************* * - * FUNCTION: acpi_ut_execute_Sxds + * FUNCTION: acpi_ut_execute_power_methods * * PARAMETERS: device_node - Node for the device - * Flags - Where the status flags are returned + * method_names - Array of power method names + * method_count - Number of methods to execute + * out_values - Where the power method values are returned * - * RETURN: Status + * RETURN: Status, out_values * - * DESCRIPTION: Executes _STA for selected device and stores results in - * *Flags. + * DESCRIPTION: Executes the specified power methods for the device and returns + * the result(s). * * NOTE: Internal function, no parameter validation * - ******************************************************************************/ +******************************************************************************/ acpi_status -acpi_ut_execute_sxds(struct acpi_namespace_node *device_node, u8 * highest) +acpi_ut_execute_power_methods(struct acpi_namespace_node *device_node, + const char **method_names, + u8 method_count, u8 *out_values) { union acpi_operand_object *obj_desc; acpi_status status; + acpi_status final_status = AE_NOT_FOUND; u32 i; - ACPI_FUNCTION_TRACE(ut_execute_sxds); + ACPI_FUNCTION_TRACE(ut_execute_power_methods); - for (i = 0; i < 4; i++) { - highest[i] = 0xFF; + for (i = 0; i < method_count; i++) { + /* + * Execute the power method (_sx_d or _sx_w). The only allowable + * return type is an Integer. + */ status = acpi_ut_evaluate_object(device_node, ACPI_CAST_PTR(char, - acpi_gbl_highest_dstate_names - [i]), + method_names[i]), ACPI_BTYPE_INTEGER, &obj_desc); - if (ACPI_FAILURE(status)) { - if (status != AE_NOT_FOUND) { - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "%s on Device %4.4s, %s\n", - ACPI_CAST_PTR(char, - acpi_gbl_highest_dstate_names - [i]), - acpi_ut_get_node_name - (device_node), - acpi_format_exception - (status))); - - return_ACPI_STATUS(status); - } - } else { - /* Extract the Dstate value */ - - highest[i] = (u8) obj_desc->integer.value; + if (ACPI_SUCCESS(status)) { + out_values[i] = (u8)obj_desc->integer.value; /* Delete the return object */ acpi_ut_remove_reference(obj_desc); + final_status = AE_OK; /* At least one value is valid */ + continue; } + + out_values[i] = ACPI_UINT8_MAX; + if (status == AE_NOT_FOUND) { + continue; /* Ignore if not found */ + } + + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "Failed %s on Device %4.4s, %s\n", + ACPI_CAST_PTR(char, method_names[i]), + acpi_ut_get_node_name(device_node), + acpi_format_exception(status))); } - return_ACPI_STATUS(AE_OK); + return_ACPI_STATUS(final_status); } diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c index 59e46f257c02..ed7a33c67fbe 100644 --- a/drivers/acpi/acpica/utglobal.c +++ b/drivers/acpi/acpica/utglobal.c @@ -90,7 +90,15 @@ const char *acpi_gbl_sleep_state_names[ACPI_S_STATE_COUNT] = { "\\_S5_" }; -const char *acpi_gbl_highest_dstate_names[4] = { +const char *acpi_gbl_lowest_dstate_names[ACPI_NUM_sx_w_METHODS] = { + "_S0W", + "_S1W", + "_S2W", + "_S3W", + "_S4W" +}; + +const char *acpi_gbl_highest_dstate_names[ACPI_NUM_sx_d_METHODS] = { "_S1D", "_S2D", "_S3D", diff --git a/drivers/acpi/acpica/utids.c b/drivers/acpi/acpica/utids.c new file mode 100644 index 000000000000..52eaae404554 --- /dev/null +++ b/drivers/acpi/acpica/utids.c @@ -0,0 +1,382 @@ +/****************************************************************************** + * + * Module Name: utids - support for device IDs - HID, UID, CID + * + *****************************************************************************/ + +/* + * Copyright (C) 2000 - 2009, Intel Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + */ + +#include +#include "accommon.h" +#include "acinterp.h" + +#define _COMPONENT ACPI_UTILITIES +ACPI_MODULE_NAME("utids") + +/* Local prototypes */ +static void acpi_ut_copy_id_string(char *destination, char *source); + +/******************************************************************************* + * + * FUNCTION: acpi_ut_copy_id_string + * + * PARAMETERS: Destination - Where to copy the string + * Source - Source string + * + * RETURN: None + * + * DESCRIPTION: Copies an ID string for the _HID, _CID, and _UID methods. + * Performs removal of a leading asterisk if present -- workaround + * for a known issue on a bunch of machines. + * + ******************************************************************************/ + +static void acpi_ut_copy_id_string(char *destination, char *source) +{ + + /* + * Workaround for ID strings that have a leading asterisk. This construct + * is not allowed by the ACPI specification (ID strings must be + * alphanumeric), but enough existing machines have this embedded in their + * ID strings that the following code is useful. + */ + if (*source == '*') { + source++; + } + + /* Do the actual copy */ + + ACPI_STRCPY(destination, source); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ut_execute_HID + * + * PARAMETERS: device_node - Node for the device + * return_id - Where the string HID is returned + * + * RETURN: Status + * + * DESCRIPTION: Executes the _HID control method that returns the hardware + * ID of the device. The HID is either an 32-bit encoded EISAID + * Integer or a String. A string is always returned. An EISAID + * is converted to a string. + * + * NOTE: Internal function, no parameter validation + * + ******************************************************************************/ + +acpi_status +acpi_ut_execute_HID(struct acpi_namespace_node *device_node, + struct acpica_device_id **return_id) +{ + union acpi_operand_object *obj_desc; + struct acpica_device_id *hid; + u32 length; + acpi_status status; + + ACPI_FUNCTION_TRACE(ut_execute_HID); + + status = acpi_ut_evaluate_object(device_node, METHOD_NAME__HID, + ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING, + &obj_desc); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* Get the size of the String to be returned, includes null terminator */ + + if (obj_desc->common.type == ACPI_TYPE_INTEGER) { + length = ACPI_EISAID_STRING_SIZE; + } else { + length = obj_desc->string.length + 1; + } + + /* Allocate a buffer for the HID */ + + hid = + ACPI_ALLOCATE_ZEROED(sizeof(struct acpica_device_id) + + (acpi_size) length); + if (!hid) { + status = AE_NO_MEMORY; + goto cleanup; + } + + /* Area for the string starts after DEVICE_ID struct */ + + hid->string = ACPI_ADD_PTR(char, hid, sizeof(struct acpica_device_id)); + + /* Convert EISAID to a string or simply copy existing string */ + + if (obj_desc->common.type == ACPI_TYPE_INTEGER) { + acpi_ex_eisa_id_to_string(hid->string, obj_desc->integer.value); + } else { + acpi_ut_copy_id_string(hid->string, obj_desc->string.pointer); + } + + hid->length = length; + *return_id = hid; + +cleanup: + + /* On exit, we must delete the return object */ + + acpi_ut_remove_reference(obj_desc); + return_ACPI_STATUS(status); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ut_execute_UID + * + * PARAMETERS: device_node - Node for the device + * return_id - Where the string UID is returned + * + * RETURN: Status + * + * DESCRIPTION: Executes the _UID control method that returns the unique + * ID of the device. The UID is either a 64-bit Integer (NOT an + * EISAID) or a string. Always returns a string. A 64-bit integer + * is converted to a decimal string. + * + * NOTE: Internal function, no parameter validation + * + ******************************************************************************/ + +acpi_status +acpi_ut_execute_UID(struct acpi_namespace_node *device_node, + struct acpica_device_id **return_id) +{ + union acpi_operand_object *obj_desc; + struct acpica_device_id *uid; + u32 length; + acpi_status status; + + ACPI_FUNCTION_TRACE(ut_execute_UID); + + status = acpi_ut_evaluate_object(device_node, METHOD_NAME__UID, + ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING, + &obj_desc); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* Get the size of the String to be returned, includes null terminator */ + + if (obj_desc->common.type == ACPI_TYPE_INTEGER) { + length = ACPI_MAX64_DECIMAL_DIGITS + 1; + } else { + length = obj_desc->string.length + 1; + } + + /* Allocate a buffer for the UID */ + + uid = + ACPI_ALLOCATE_ZEROED(sizeof(struct acpica_device_id) + + (acpi_size) length); + if (!uid) { + status = AE_NO_MEMORY; + goto cleanup; + } + + /* Area for the string starts after DEVICE_ID struct */ + + uid->string = ACPI_ADD_PTR(char, uid, sizeof(struct acpica_device_id)); + + /* Convert an Integer to string, or just copy an existing string */ + + if (obj_desc->common.type == ACPI_TYPE_INTEGER) { + acpi_ex_integer_to_string(uid->string, obj_desc->integer.value); + } else { + acpi_ut_copy_id_string(uid->string, obj_desc->string.pointer); + } + + uid->length = length; + *return_id = uid; + +cleanup: + + /* On exit, we must delete the return object */ + + acpi_ut_remove_reference(obj_desc); + return_ACPI_STATUS(status); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ut_execute_CID + * + * PARAMETERS: device_node - Node for the device + * return_cid_list - Where the CID list is returned + * + * RETURN: Status, list of CID strings + * + * DESCRIPTION: Executes the _CID control method that returns one or more + * compatible hardware IDs for the device. + * + * NOTE: Internal function, no parameter validation + * + * A _CID method can return either a single compatible ID or a package of + * compatible IDs. Each compatible ID can be one of the following: + * 1) Integer (32 bit compressed EISA ID) or + * 2) String (PCI ID format, e.g. "PCI\VEN_vvvv&DEV_dddd&SUBSYS_ssssssss") + * + * The Integer CIDs are converted to string format by this function. + * + ******************************************************************************/ + +acpi_status +acpi_ut_execute_CID(struct acpi_namespace_node *device_node, + struct acpica_device_id_list **return_cid_list) +{ + union acpi_operand_object **cid_objects; + union acpi_operand_object *obj_desc; + struct acpica_device_id_list *cid_list; + char *next_id_string; + u32 string_area_size; + u32 length; + u32 cid_list_size; + acpi_status status; + u32 count; + u32 i; + + ACPI_FUNCTION_TRACE(ut_execute_CID); + + /* Evaluate the _CID method for this device */ + + status = acpi_ut_evaluate_object(device_node, METHOD_NAME__CID, + ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING + | ACPI_BTYPE_PACKAGE, &obj_desc); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* + * Get the count and size of the returned _CIDs. _CID can return either + * a Package of Integers/Strings or a single Integer or String. + * Note: This section also validates that all CID elements are of the + * correct type (Integer or String). + */ + if (obj_desc->common.type == ACPI_TYPE_PACKAGE) { + count = obj_desc->package.count; + cid_objects = obj_desc->package.elements; + } else { /* Single Integer or String CID */ + + count = 1; + cid_objects = &obj_desc; + } + + string_area_size = 0; + for (i = 0; i < count; i++) { + + /* String lengths include null terminator */ + + switch (cid_objects[i]->common.type) { + case ACPI_TYPE_INTEGER: + string_area_size += ACPI_EISAID_STRING_SIZE; + break; + + case ACPI_TYPE_STRING: + string_area_size += cid_objects[i]->string.length + 1; + break; + + default: + status = AE_TYPE; + goto cleanup; + } + } + + /* + * Now that we know the length of the CIDs, allocate return buffer: + * 1) Size of the base structure + + * 2) Size of the CID DEVICE_ID array + + * 3) Size of the actual CID strings + */ + cid_list_size = sizeof(struct acpica_device_id_list) + + ((count - 1) * sizeof(struct acpica_device_id)) + string_area_size; + + cid_list = ACPI_ALLOCATE_ZEROED(cid_list_size); + if (!cid_list) { + status = AE_NO_MEMORY; + goto cleanup; + } + + /* Area for CID strings starts after the CID DEVICE_ID array */ + + next_id_string = ACPI_CAST_PTR(char, cid_list->ids) + + ((acpi_size) count * sizeof(struct acpica_device_id)); + + /* Copy/convert the CIDs to the return buffer */ + + for (i = 0; i < count; i++) { + if (cid_objects[i]->common.type == ACPI_TYPE_INTEGER) { + + /* Convert the Integer (EISAID) CID to a string */ + + acpi_ex_eisa_id_to_string(next_id_string, + cid_objects[i]->integer. + value); + length = ACPI_EISAID_STRING_SIZE; + } else { /* ACPI_TYPE_STRING */ + + /* Copy the String CID from the returned object */ + + acpi_ut_copy_id_string(next_id_string, + cid_objects[i]->string.pointer); + length = cid_objects[i]->string.length + 1; + } + + cid_list->ids[i].string = next_id_string; + cid_list->ids[i].length = length; + next_id_string += length; + } + + /* Finish the CID list */ + + cid_list->count = count; + cid_list->list_size = cid_list_size; + *return_cid_list = cid_list; + +cleanup: + + /* On exit, we must delete the _CID return object */ + + acpi_ut_remove_reference(obj_desc); + return_ACPI_STATUS(status); +} diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c index fbe782348b0b..9cd65334ca75 100644 --- a/drivers/acpi/acpica/utmisc.c +++ b/drivers/acpi/acpica/utmisc.c @@ -118,6 +118,34 @@ const char *acpi_ut_validate_exception(acpi_status status) return (ACPI_CAST_PTR(const char, exception)); } +/******************************************************************************* + * + * FUNCTION: acpi_ut_is_pci_root_bridge + * + * PARAMETERS: Id - The HID/CID in string format + * + * RETURN: TRUE if the Id is a match for a PCI/PCI-Express Root Bridge + * + * DESCRIPTION: Determine if the input ID is a PCI Root Bridge ID. + * + ******************************************************************************/ + +u8 acpi_ut_is_pci_root_bridge(char *id) +{ + + /* + * Check if this is a PCI root bridge. + * ACPI 3.0+: check for a PCI Express root also. + */ + if (!(ACPI_STRCMP(id, + PCI_ROOT_HID_STRING)) || + !(ACPI_STRCMP(id, PCI_EXPRESS_ROOT_HID_STRING))) { + return (TRUE); + } + + return (FALSE); +} + /******************************************************************************* * * FUNCTION: acpi_ut_is_aml_table diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c index fe0cdf83641a..2aee8c24dc56 100644 --- a/drivers/acpi/container.c +++ b/drivers/acpi/container.c @@ -200,20 +200,17 @@ container_walk_namespace_cb(acpi_handle handle, u32 lvl, void *context, void **rv) { char *hid = NULL; - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; struct acpi_device_info *info; acpi_status status; int *action = context; - - status = acpi_get_object_info(handle, &buffer); - if (ACPI_FAILURE(status) || !buffer.pointer) { + status = acpi_get_object_info(handle, &info); + if (ACPI_FAILURE(status)) { return AE_OK; } - info = buffer.pointer; if (info->valid & ACPI_VALID_HID) - hid = info->hardware_id.value; + hid = info->hardware_id.string; if (hid == NULL) { goto end; @@ -240,7 +237,7 @@ container_walk_namespace_cb(acpi_handle handle, } end: - kfree(buffer.pointer); + kfree(info); return AE_OK; } diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index efb959d6c8a9..39536b80bce7 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -231,18 +231,16 @@ static int is_ata(acpi_handle handle) static int is_battery(acpi_handle handle) { struct acpi_device_info *info; - struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; int ret = 1; - if (!ACPI_SUCCESS(acpi_get_object_info(handle, &buffer))) + if (!ACPI_SUCCESS(acpi_get_object_info(handle, &info))) return 0; - info = buffer.pointer; if (!(info->valid & ACPI_VALID_HID)) ret = 0; else - ret = !strcmp("PNP0C0A", info->hardware_id.value); + ret = !strcmp("PNP0C0A", info->hardware_id.string); - kfree(buffer.pointer); + kfree(info); return ret; } diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index a8a5c29958c8..27a7072347ea 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -93,15 +93,13 @@ do_acpi_find_child(acpi_handle handle, u32 lvl, void *context, void **rv) { acpi_status status; struct acpi_device_info *info; - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; struct acpi_find_child *find = context; - status = acpi_get_object_info(handle, &buffer); + status = acpi_get_object_info(handle, &info); if (ACPI_SUCCESS(status)) { - info = buffer.pointer; if (info->address == find->address) find->handle = handle; - kfree(buffer.pointer); + kfree(info); } return AE_OK; } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 781435d7e369..0ab526de7c55 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -60,13 +60,13 @@ static int create_modalias(struct acpi_device *acpi_dev, char *modalias, } if (acpi_dev->flags.compatible_ids) { - struct acpi_compatible_id_list *cid_list; + struct acpica_device_id_list *cid_list; int i; cid_list = acpi_dev->pnp.cid_list; for (i = 0; i < cid_list->count; i++) { count = snprintf(&modalias[len], size, "%s:", - cid_list->id[i].value); + cid_list->ids[i].string); if (count < 0 || count >= size) { printk(KERN_ERR PREFIX "%s cid[%i] exceeds event buffer size", acpi_dev->pnp.device_name, i); @@ -287,14 +287,14 @@ int acpi_match_device_ids(struct acpi_device *device, } if (device->flags.compatible_ids) { - struct acpi_compatible_id_list *cid_list = device->pnp.cid_list; + struct acpica_device_id_list *cid_list = device->pnp.cid_list; int i; for (id = ids; id->id[0]; id++) { /* compare multiple _CID entries against driver ids */ for (i = 0; i < cid_list->count; i++) { if (!strcmp((char*)id->id, - cid_list->id[i].value)) + cid_list->ids[i].string)) return 0; } } @@ -999,33 +999,89 @@ static int acpi_dock_match(struct acpi_device *device) return acpi_get_handle(device->handle, "_DCK", &tmp); } +static struct acpica_device_id_list* +acpi_add_cid( + struct acpi_device_info *info, + struct acpica_device_id *new_cid) +{ + struct acpica_device_id_list *cid; + char *next_id_string; + acpi_size cid_length; + acpi_size new_cid_length; + u32 i; + + + /* Allocate new CID list with room for the new CID */ + + if (!new_cid) + new_cid_length = info->compatible_id_list.list_size; + else if (info->compatible_id_list.list_size) + new_cid_length = info->compatible_id_list.list_size + + new_cid->length + sizeof(struct acpica_device_id); + else + new_cid_length = sizeof(struct acpica_device_id_list) + new_cid->length; + + cid = ACPI_ALLOCATE_ZEROED(new_cid_length); + if (!cid) { + return NULL; + } + + cid->list_size = new_cid_length; + cid->count = info->compatible_id_list.count; + if (new_cid) + cid->count++; + next_id_string = (char *) cid->ids + (cid->count * sizeof(struct acpica_device_id)); + + /* Copy all existing CIDs */ + + for (i = 0; i < info->compatible_id_list.count; i++) { + cid_length = info->compatible_id_list.ids[i].length; + cid->ids[i].string = next_id_string; + cid->ids[i].length = cid_length; + + ACPI_MEMCPY(next_id_string, info->compatible_id_list.ids[i].string, + cid_length); + + next_id_string += cid_length; + } + + /* Append the new CID */ + + if (new_cid) { + cid->ids[i].string = next_id_string; + cid->ids[i].length = new_cid->length; + + ACPI_MEMCPY(next_id_string, new_cid->string, new_cid->length); + } + + return cid; +} + static void acpi_device_set_id(struct acpi_device *device, struct acpi_device *parent, acpi_handle handle, int type) { - struct acpi_device_info *info; - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_device_info *info = NULL; char *hid = NULL; char *uid = NULL; - struct acpi_compatible_id_list *cid_list = NULL; - const char *cid_add = NULL; + struct acpica_device_id_list *cid_list = NULL; + char *cid_add = NULL; acpi_status status; switch (type) { case ACPI_BUS_TYPE_DEVICE: - status = acpi_get_object_info(handle, &buffer); + status = acpi_get_object_info(handle, &info); if (ACPI_FAILURE(status)) { printk(KERN_ERR PREFIX "%s: Error reading device info\n", __func__); return; } - info = buffer.pointer; if (info->valid & ACPI_VALID_HID) - hid = info->hardware_id.value; + hid = info->hardware_id.string; if (info->valid & ACPI_VALID_UID) - uid = info->unique_id.value; + uid = info->unique_id.string; if (info->valid & ACPI_VALID_CID) - cid_list = &info->compatibility_id; + cid_list = &info->compatible_id_list; if (info->valid & ACPI_VALID_ADR) { device->pnp.bus_address = info->address; device->flags.bus_address = 1; @@ -1076,55 +1132,44 @@ static void acpi_device_set_id(struct acpi_device *device, } if (hid) { - strcpy(device->pnp.hardware_id, hid); - device->flags.hardware_id = 1; - } + device->pnp.hardware_id = ACPI_ALLOCATE_ZEROED(strlen (hid) + 1); + if (device->pnp.hardware_id) { + strcpy(device->pnp.hardware_id, hid); + device->flags.hardware_id = 1; + } + } else + device->pnp.hardware_id = NULL; + if (uid) { - strcpy(device->pnp.unique_id, uid); - device->flags.unique_id = 1; - } + device->pnp.unique_id = ACPI_ALLOCATE_ZEROED(strlen (uid) + 1); + if (device->pnp.unique_id) { + strcpy(device->pnp.unique_id, uid); + device->flags.unique_id = 1; + } + } else + device->pnp.unique_id = NULL; + if (cid_list || cid_add) { - struct acpi_compatible_id_list *list; - int size = 0; - int count = 0; - - if (cid_list) { - size = cid_list->size; - } else if (cid_add) { - size = sizeof(struct acpi_compatible_id_list); - cid_list = ACPI_ALLOCATE_ZEROED((acpi_size) size); - if (!cid_list) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(buffer.pointer); - return; - } else { - cid_list->count = 0; - cid_list->size = size; - } + struct acpica_device_id_list *list; + + if (cid_add) { + struct acpica_device_id cid; + cid.length = strlen (cid_add) + 1; + cid.string = cid_add; + + list = acpi_add_cid(info, &cid); + } else { + list = acpi_add_cid(info, NULL); } - if (cid_add) - size += sizeof(struct acpi_compatible_id); - list = kmalloc(size, GFP_KERNEL); if (list) { - if (cid_list) { - memcpy(list, cid_list, cid_list->size); - count = cid_list->count; - } - if (cid_add) { - strncpy(list->id[count].value, cid_add, - ACPI_MAX_CID_LENGTH); - count++; - device->flags.compatible_ids = 1; - } - list->size = size; - list->count = count; device->pnp.cid_list = list; - } else - printk(KERN_ERR PREFIX "Memory allocation error\n"); + if (cid_add) + device->flags.compatible_ids = 1; + } } - kfree(buffer.pointer); + kfree(info); } static int acpi_device_set_context(struct acpi_device *device, int type) diff --git a/drivers/char/agp/hp-agp.c b/drivers/char/agp/hp-agp.c index 8f3d4c184914..7bead4c816ca 100644 --- a/drivers/char/agp/hp-agp.c +++ b/drivers/char/agp/hp-agp.c @@ -478,7 +478,6 @@ zx1_gart_probe (acpi_handle obj, u32 depth, void *context, void **ret) { acpi_handle handle, parent; acpi_status status; - struct acpi_buffer buffer; struct acpi_device_info *info; u64 lba_hpa, sba_hpa, length; int match; @@ -490,13 +489,11 @@ zx1_gart_probe (acpi_handle obj, u32 depth, void *context, void **ret) /* Look for an enclosing IOC scope and find its CSR space */ handle = obj; do { - buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; - status = acpi_get_object_info(handle, &buffer); + status = acpi_get_object_info(handle, &info); if (ACPI_SUCCESS(status)) { /* TBD check _CID also */ - info = buffer.pointer; - info->hardware_id.value[sizeof(info->hardware_id)-1] = '\0'; - match = (strcmp(info->hardware_id.value, "HWP0001") == 0); + info->hardware_id.string[sizeof(info->hardware_id.length)-1] = '\0'; + match = (strcmp(info->hardware_id.string, "HWP0001") == 0); kfree(info); if (match) { status = hp_acpi_csr_space(handle, &sba_hpa, &length); diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c index c509c9916464..c0cf45a11b93 100644 --- a/drivers/ide/ide-acpi.c +++ b/drivers/ide/ide-acpi.c @@ -114,8 +114,6 @@ static int ide_get_dev_handle(struct device *dev, acpi_handle *handle, unsigned int bus, devnum, func; acpi_integer addr; acpi_handle dev_handle; - struct acpi_buffer buffer = {.length = ACPI_ALLOCATE_BUFFER, - .pointer = NULL}; acpi_status status; struct acpi_device_info *dinfo = NULL; int ret = -ENODEV; @@ -134,12 +132,11 @@ static int ide_get_dev_handle(struct device *dev, acpi_handle *handle, goto err; } - status = acpi_get_object_info(dev_handle, &buffer); + status = acpi_get_object_info(dev_handle, &dinfo); if (ACPI_FAILURE(status)) { DEBPRINT("get_object_info for device failed\n"); goto err; } - dinfo = buffer.pointer; if (dinfo && (dinfo->valid & ACPI_VALID_ADR) && dinfo->address == addr) { *pcidevfn = addr; diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index 5befa7e379b7..a9d926b7d805 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -398,23 +398,21 @@ static acpi_status __init ibm_find_acpi_device(acpi_handle handle, acpi_handle *phandle = (acpi_handle *)context; acpi_status status; struct acpi_device_info *info; - struct acpi_buffer info_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; int retval = 0; - status = acpi_get_object_info(handle, &info_buffer); + status = acpi_get_object_info(handle, &info); if (ACPI_FAILURE(status)) { err("%s: Failed to get device information status=0x%x\n", __func__, status); return retval; } - info = info_buffer.pointer; - info->hardware_id.value[sizeof(info->hardware_id.value) - 1] = '\0'; + info->hardware_id.string[sizeof(info->hardware_id.length) - 1] = '\0'; if (info->current_status && (info->valid & ACPI_VALID_HID) && - (!strcmp(info->hardware_id.value, IBM_HARDWARE_ID1) || - !strcmp(info->hardware_id.value, IBM_HARDWARE_ID2))) { + (!strcmp(info->hardware_id.string, IBM_HARDWARE_ID1) || + !strcmp(info->hardware_id.string, IBM_HARDWARE_ID2))) { dbg("found hardware: %s, handle: %p\n", - info->hardware_id.value, handle); + info->hardware_id.string, handle); *phandle = handle; /* returning non-zero causes the search to stop * and returns this value to the caller of diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index dafaa4a92df5..f9f68e0e7344 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -976,15 +976,12 @@ static acpi_status sony_walk_callback(acpi_handle handle, u32 level, void *context, void **return_value) { struct acpi_device_info *info; - struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; - - if (ACPI_SUCCESS(acpi_get_object_info(handle, &buffer))) { - info = buffer.pointer; + if (ACPI_SUCCESS(acpi_get_object_info(handle, &info))) { printk(KERN_WARNING DRV_PFX "method: name: %4.4s, args %X\n", (char *)&info->name, info->param_count); - kfree(buffer.pointer); + kfree(info); } return AE_OK; diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index 9496494f340e..c07fdb94d665 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -194,13 +194,13 @@ static int __init pnpacpi_add_device(struct acpi_device *device) pnpacpi_parse_resource_option_data(dev); if (device->flags.compatible_ids) { - struct acpi_compatible_id_list *cid_list = device->pnp.cid_list; + struct acpica_device_id_list *cid_list = device->pnp.cid_list; int i; for (i = 0; i < cid_list->count; i++) { - if (!ispnpidacpi(cid_list->id[i].value)) + if (!ispnpidacpi(cid_list->ids[i].string)) continue; - pnp_add_id(dev, cid_list->id[i].value); + pnp_add_id(dev, cid_list->ids[i].string); } } diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c65e4ce6c3af..b91420b52c6f 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -173,17 +173,15 @@ struct acpi_device_dir { typedef char acpi_bus_id[8]; typedef unsigned long acpi_bus_address; -typedef char acpi_hardware_id[15]; -typedef char acpi_unique_id[9]; typedef char acpi_device_name[40]; typedef char acpi_device_class[20]; struct acpi_device_pnp { acpi_bus_id bus_id; /* Object name */ acpi_bus_address bus_address; /* _ADR */ - acpi_hardware_id hardware_id; /* _HID */ - struct acpi_compatible_id_list *cid_list; /* _CIDs */ - acpi_unique_id unique_id; /* _UID */ + char *hardware_id; /* _HID */ + struct acpica_device_id_list *cid_list; /* _CIDs */ + char *unique_id; /* _UID */ acpi_device_name device_name; /* Driver-determined */ acpi_device_class device_class; /* " */ }; diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index b450a195319a..04904c7f1aa1 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -200,7 +200,8 @@ acpi_evaluate_object_typed(acpi_handle object, acpi_object_type return_type); acpi_status -acpi_get_object_info(acpi_handle handle, struct acpi_buffer *return_buffer); +acpi_get_object_info(acpi_handle handle, + struct acpi_device_info **return_buffer); acpi_status acpi_install_method(u8 *buffer); diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 37ba576d06e8..7a4ff79e238c 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -338,7 +338,7 @@ typedef u32 acpi_physical_address; /* PM Timer ticks per second (HZ) */ -#define PM_TIMER_FREQUENCY 3579545 +#define PM_TIMER_FREQUENCY 3579545 /******************************************************************************* * @@ -969,38 +969,60 @@ acpi_status(*acpi_walk_callback) (acpi_handle obj_handle, #define ACPI_INTERRUPT_NOT_HANDLED 0x00 #define ACPI_INTERRUPT_HANDLED 0x01 -/* Length of _HID, _UID, _CID, and UUID values */ +/* Length of 32-bit EISAID values when converted back to a string */ + +#define ACPI_EISAID_STRING_SIZE 8 /* Includes null terminator */ + +/* Length of UUID (string) values */ -#define ACPI_DEVICE_ID_LENGTH 0x09 -#define ACPI_MAX_CID_LENGTH 48 #define ACPI_UUID_LENGTH 16 -/* Common string version of device HIDs and UIDs */ +/* Structures used for device/processor HID, UID, CID */ struct acpica_device_id { - char value[ACPI_DEVICE_ID_LENGTH]; + u32 length; /* Length of string + null */ + char *string; }; -/* Common string version of device CIDs */ - -struct acpi_compatible_id { - char value[ACPI_MAX_CID_LENGTH]; +struct acpica_device_id_list { + u32 count; /* Number of IDs in Ids array */ + u32 list_size; /* Size of list, including ID strings */ + struct acpica_device_id ids[1]; /* ID array */ }; -struct acpi_compatible_id_list { - u32 count; - u32 size; - struct acpi_compatible_id id[1]; +/* + * Structure returned from acpi_get_object_info. + * Optimized for both 32- and 64-bit builds + */ +struct acpi_device_info { + u32 info_size; /* Size of info, including ID strings */ + u32 name; /* ACPI object Name */ + acpi_object_type type; /* ACPI object Type */ + u8 param_count; /* If a method, required parameter count */ + u8 valid; /* Indicates which optional fields are valid */ + u8 flags; /* Miscellaneous info */ + u8 highest_dstates[4]; /* _sx_d values: 0xFF indicates not valid */ + u8 lowest_dstates[5]; /* _sx_w values: 0xFF indicates not valid */ + u32 current_status; /* _STA value */ + acpi_integer address; /* _ADR value */ + struct acpica_device_id hardware_id; /* _HID value */ + struct acpica_device_id unique_id; /* _UID value */ + struct acpica_device_id_list compatible_id_list; /* _CID list */ }; -/* Structure and flags for acpi_get_object_info */ +/* Values for Flags field above (acpi_get_object_info) */ + +#define ACPI_PCI_ROOT_BRIDGE 0x01 -#define ACPI_VALID_STA 0x0001 -#define ACPI_VALID_ADR 0x0002 -#define ACPI_VALID_HID 0x0004 -#define ACPI_VALID_UID 0x0008 -#define ACPI_VALID_CID 0x0010 -#define ACPI_VALID_SXDS 0x0020 +/* Flags for Valid field above (acpi_get_object_info) */ + +#define ACPI_VALID_STA 0x01 +#define ACPI_VALID_ADR 0x02 +#define ACPI_VALID_HID 0x04 +#define ACPI_VALID_UID 0x08 +#define ACPI_VALID_CID 0x10 +#define ACPI_VALID_SXDS 0x20 +#define ACPI_VALID_SXWS 0x40 /* Flags for _STA method */ @@ -1011,29 +1033,6 @@ struct acpi_compatible_id_list { #define ACPI_STA_DEVICE_OK 0x08 /* Synonym */ #define ACPI_STA_BATTERY_PRESENT 0x10 -#define ACPI_COMMON_OBJ_INFO \ - acpi_object_type type; /* ACPI object type */ \ - acpi_name name /* ACPI object Name */ - -struct acpi_obj_info_header { - ACPI_COMMON_OBJ_INFO; -}; - -/* Structure returned from Get Object Info */ - -struct acpi_device_info { - ACPI_COMMON_OBJ_INFO; - - u32 param_count; /* If a method, required parameter count */ - u32 valid; /* Indicates which fields below are valid */ - u32 current_status; /* _STA value */ - acpi_integer address; /* _ADR value if any */ - struct acpica_device_id hardware_id; /* _HID value if any */ - struct acpica_device_id unique_id; /* _UID value if any */ - u8 highest_dstates[4]; /* _sx_d values: 0xFF indicates not valid */ - struct acpi_compatible_id_list compatibility_id; /* List of _CIDs if any */ -}; - /* Context structs for address space handlers */ struct acpi_pci_id { -- cgit v1.2.3 From 6557a49a443a347d24aed58076365432ded30edc Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 24 Jun 2009 11:32:04 +0800 Subject: ACPICA: ACPI 4.0: Interpreter support for IPMI. Adds support for IPMI which is similar to SMBus and uses a bi-directional data buffer. ACPICA BZ 773. http://acpica.org/bugzilla/show_bug.cgi?id=773 Signed-off-by: Lin Ming Signed-off-by: Bob Moore Signed-off-by: Len Brown --- drivers/acpi/acpica/acconfig.h | 3 +- drivers/acpi/acpica/exfield.c | 82 ++++++++++++++++++++++++++++-------------- drivers/acpi/acpica/exfldio.c | 7 ++-- include/acpi/actypes.h | 3 +- 4 files changed, 63 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h index 9123d5a11627..8e679ef5b231 100644 --- a/drivers/acpi/acpica/acconfig.h +++ b/drivers/acpi/acpica/acconfig.h @@ -199,9 +199,10 @@ #define ACPI_RSDP_CHECKSUM_LENGTH 20 #define ACPI_RSDP_XCHECKSUM_LENGTH 36 -/* SMBus bidirectional buffer size */ +/* SMBus and IPMI bidirectional buffer size */ #define ACPI_SMBUS_BUFFER_SIZE 34 +#define ACPI_IPMI_BUFFER_SIZE 66 /* _sx_d and _sx_w control methods */ diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c index 546dcdd86785..0b33d6c887b9 100644 --- a/drivers/acpi/acpica/exfield.c +++ b/drivers/acpi/acpica/exfield.c @@ -72,6 +72,7 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state, union acpi_operand_object *buffer_desc; acpi_size length; void *buffer; + u32 function; ACPI_FUNCTION_TRACE_PTR(ex_read_data_from_field, obj_desc); @@ -97,13 +98,27 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state, } } else if ((obj_desc->common.type == ACPI_TYPE_LOCAL_REGION_FIELD) && (obj_desc->field.region_obj->region.space_id == - ACPI_ADR_SPACE_SMBUS)) { + ACPI_ADR_SPACE_SMBUS + || obj_desc->field.region_obj->region.space_id == + ACPI_ADR_SPACE_IPMI)) { /* - * This is an SMBus read. We must create a buffer to hold the data - * and directly access the region handler. + * This is an SMBus or IPMI read. We must create a buffer to hold + * the data and then directly access the region handler. + * + * Note: Smbus protocol value is passed in upper 16-bits of Function */ - buffer_desc = - acpi_ut_create_buffer_object(ACPI_SMBUS_BUFFER_SIZE); + if (obj_desc->field.region_obj->region.space_id == + ACPI_ADR_SPACE_SMBUS) { + length = ACPI_SMBUS_BUFFER_SIZE; + function = + ACPI_READ | (obj_desc->field.attribute << 16); + } else { /* IPMI */ + + length = ACPI_IPMI_BUFFER_SIZE; + function = ACPI_READ; + } + + buffer_desc = acpi_ut_create_buffer_object(length); if (!buffer_desc) { return_ACPI_STATUS(AE_NO_MEMORY); } @@ -112,16 +127,13 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state, acpi_ex_acquire_global_lock(obj_desc->common_field.field_flags); - /* - * Perform the read. - * Note: Smbus protocol value is passed in upper 16-bits of Function - */ + /* Call the region handler for the read */ + status = acpi_ex_access_region(obj_desc, 0, ACPI_CAST_PTR(acpi_integer, buffer_desc-> buffer.pointer), - ACPI_READ | (obj_desc->field. - attribute << 16)); + function); acpi_ex_release_global_lock(obj_desc->common_field.field_flags); goto exit; } @@ -212,6 +224,7 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc, u32 length; void *buffer; union acpi_operand_object *buffer_desc; + u32 function; ACPI_FUNCTION_TRACE_PTR(ex_write_data_to_field, obj_desc); @@ -234,39 +247,56 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc, } } else if ((obj_desc->common.type == ACPI_TYPE_LOCAL_REGION_FIELD) && (obj_desc->field.region_obj->region.space_id == - ACPI_ADR_SPACE_SMBUS)) { + ACPI_ADR_SPACE_SMBUS + || obj_desc->field.region_obj->region.space_id == + ACPI_ADR_SPACE_IPMI)) { /* - * This is an SMBus write. We will bypass the entire field mechanism - * and handoff the buffer directly to the handler. + * This is an SMBus or IPMI write. We will bypass the entire field + * mechanism and handoff the buffer directly to the handler. For + * these address spaces, the buffer is bi-directional; on a write, + * return data is returned in the same buffer. + * + * Source must be a buffer of sufficient size: + * ACPI_SMBUS_BUFFER_SIZE or ACPI_IPMI_BUFFER_SIZE. * - * Source must be a buffer of sufficient size (ACPI_SMBUS_BUFFER_SIZE). + * Note: SMBus protocol type is passed in upper 16-bits of Function */ if (source_desc->common.type != ACPI_TYPE_BUFFER) { ACPI_ERROR((AE_INFO, - "SMBus write requires Buffer, found type %s", + "SMBus or IPMI write requires Buffer, found type %s", acpi_ut_get_object_type_name(source_desc))); return_ACPI_STATUS(AE_AML_OPERAND_TYPE); } - if (source_desc->buffer.length < ACPI_SMBUS_BUFFER_SIZE) { + if (obj_desc->field.region_obj->region.space_id == + ACPI_ADR_SPACE_SMBUS) { + length = ACPI_SMBUS_BUFFER_SIZE; + function = + ACPI_WRITE | (obj_desc->field.attribute << 16); + } else { /* IPMI */ + + length = ACPI_IPMI_BUFFER_SIZE; + function = ACPI_WRITE; + } + + if (source_desc->buffer.length < length) { ACPI_ERROR((AE_INFO, - "SMBus write requires Buffer of length %X, found length %X", - ACPI_SMBUS_BUFFER_SIZE, - source_desc->buffer.length)); + "SMBus or IPMI write requires Buffer of length %X, found length %X", + length, source_desc->buffer.length)); return_ACPI_STATUS(AE_AML_BUFFER_LIMIT); } - buffer_desc = - acpi_ut_create_buffer_object(ACPI_SMBUS_BUFFER_SIZE); + /* Create the bi-directional buffer */ + + buffer_desc = acpi_ut_create_buffer_object(length); if (!buffer_desc) { return_ACPI_STATUS(AE_NO_MEMORY); } buffer = buffer_desc->buffer.pointer; - ACPI_MEMCPY(buffer, source_desc->buffer.pointer, - ACPI_SMBUS_BUFFER_SIZE); + ACPI_MEMCPY(buffer, source_desc->buffer.pointer, length); /* Lock entire transaction if requested */ @@ -275,12 +305,10 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc, /* * Perform the write (returns status and perhaps data in the * same buffer) - * Note: SMBus protocol type is passed in upper 16-bits of Function. */ status = acpi_ex_access_region(obj_desc, 0, (acpi_integer *) buffer, - ACPI_WRITE | (obj_desc->field. - attribute << 16)); + function); acpi_ex_release_global_lock(obj_desc->common_field.field_flags); *result_desc = buffer_desc; diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c index 6687be167f5f..d7b3b418fb45 100644 --- a/drivers/acpi/acpica/exfldio.c +++ b/drivers/acpi/acpica/exfldio.c @@ -120,12 +120,13 @@ acpi_ex_setup_region(union acpi_operand_object *obj_desc, } /* - * Exit now for SMBus address space, it has a non-linear address space + * Exit now for SMBus or IPMI address space, it has a non-linear address space * and the request cannot be directly validated */ - if (rgn_desc->region.space_id == ACPI_ADR_SPACE_SMBUS) { + if (rgn_desc->region.space_id == ACPI_ADR_SPACE_SMBUS || + rgn_desc->region.space_id == ACPI_ADR_SPACE_IPMI) { - /* SMBus has a non-linear address space */ + /* SMBus or IPMI has a non-linear address space */ return_ACPI_STATUS(AE_OK); } diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 7a4ff79e238c..4371805d2def 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -732,7 +732,8 @@ typedef u8 acpi_adr_space_type; #define ACPI_ADR_SPACE_SMBUS (acpi_adr_space_type) 4 #define ACPI_ADR_SPACE_CMOS (acpi_adr_space_type) 5 #define ACPI_ADR_SPACE_PCI_BAR_TARGET (acpi_adr_space_type) 6 -#define ACPI_ADR_SPACE_DATA_TABLE (acpi_adr_space_type) 7 +#define ACPI_ADR_SPACE_IPMI (acpi_adr_space_type) 7 +#define ACPI_ADR_SPACE_DATA_TABLE (acpi_adr_space_type) 8 #define ACPI_ADR_SPACE_FIXED_HARDWARE (acpi_adr_space_type) 127 /* -- cgit v1.2.3 From 8e4319c425077c4cc540696a5bb6c4d12f017dcd Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Mon, 29 Jun 2009 13:43:27 +0800 Subject: ACPICA: Fix several acpi_attach_data problems Handler was never invoked. Now invoked if/when host node is deleted. Data object was not automatically deleted when host node was deleted. Interface to handler had an unused parameter, removed it. ACPICA BZ 778. http://acpica.org/bugzilla/show_bug.cgi?id=778 Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/acnamesp.h | 2 + drivers/acpi/acpica/nsalloc.c | 88 +++++++++++++++++++++++++++++------------- drivers/acpi/acpica/nsload.c | 3 +- drivers/acpi/bus.c | 2 +- drivers/acpi/glue.c | 2 +- drivers/acpi/scan.c | 2 +- include/acpi/acpi_bus.h | 4 +- include/acpi/actypes.h | 2 +- 8 files changed, 70 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h index 94cdc2b8cb93..a78e02f62d5e 100644 --- a/drivers/acpi/acpica/acnamesp.h +++ b/drivers/acpi/acpica/acnamesp.h @@ -144,6 +144,8 @@ struct acpi_namespace_node *acpi_ns_create_node(u32 name); void acpi_ns_delete_node(struct acpi_namespace_node *node); +void acpi_ns_remove_node(struct acpi_namespace_node *node); + void acpi_ns_delete_namespace_subtree(struct acpi_namespace_node *parent_handle); diff --git a/drivers/acpi/acpica/nsalloc.c b/drivers/acpi/acpica/nsalloc.c index efc971ab7d65..8a58a1b85aa0 100644 --- a/drivers/acpi/acpica/nsalloc.c +++ b/drivers/acpi/acpica/nsalloc.c @@ -96,17 +96,68 @@ struct acpi_namespace_node *acpi_ns_create_node(u32 name) * * RETURN: None * - * DESCRIPTION: Delete a namespace node + * DESCRIPTION: Delete a namespace node. All node deletions must come through + * here. Detaches any attached objects, including any attached + * data. If a handler is associated with attached data, it is + * invoked before the node is deleted. * ******************************************************************************/ void acpi_ns_delete_node(struct acpi_namespace_node *node) +{ + union acpi_operand_object *obj_desc; + + ACPI_FUNCTION_NAME(ns_delete_node); + + /* Detach an object if there is one */ + + acpi_ns_detach_object(node); + + /* + * Delete an attached data object if present (an object that was created + * and attached via acpi_attach_data). Note: After any normal object is + * detached above, the only possible remaining object is a data object. + */ + obj_desc = node->object; + if (obj_desc && (obj_desc->common.type == ACPI_TYPE_LOCAL_DATA)) { + + /* Invoke the attached data deletion handler if present */ + + if (obj_desc->data.handler) { + obj_desc->data.handler(node, obj_desc->data.pointer); + } + + acpi_ut_remove_reference(obj_desc); + } + + /* Now we can delete the node */ + + (void)acpi_os_release_object(acpi_gbl_namespace_cache, node); + + ACPI_MEM_TRACKING(acpi_gbl_ns_node_list->total_freed++); + ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS, "Node %p, Remaining %X\n", + node, acpi_gbl_current_node_count)); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ns_remove_node + * + * PARAMETERS: Node - Node to be removed/deleted + * + * RETURN: None + * + * DESCRIPTION: Remove (unlink) and delete a namespace node + * + ******************************************************************************/ + +void acpi_ns_remove_node(struct acpi_namespace_node *node) { struct acpi_namespace_node *parent_node; struct acpi_namespace_node *prev_node; struct acpi_namespace_node *next_node; - ACPI_FUNCTION_TRACE_PTR(ns_delete_node, node); + ACPI_FUNCTION_TRACE_PTR(ns_remove_node, node); parent_node = acpi_ns_get_parent_node(node); @@ -142,12 +193,9 @@ void acpi_ns_delete_node(struct acpi_namespace_node *node) } } - ACPI_MEM_TRACKING(acpi_gbl_ns_node_list->total_freed++); - - /* Detach an object if there is one, then delete the node */ + /* Delete the node and any attached objects */ - acpi_ns_detach_object(node); - (void)acpi_os_release_object(acpi_gbl_namespace_cache, node); + acpi_ns_delete_node(node); return_VOID; } @@ -273,25 +321,11 @@ void acpi_ns_delete_children(struct acpi_namespace_node *parent_node) parent_node, child_node)); } - /* Now we can free this child object */ - - ACPI_MEM_TRACKING(acpi_gbl_ns_node_list->total_freed++); - - ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS, - "Object %p, Remaining %X\n", child_node, - acpi_gbl_current_node_count)); - - /* Detach an object if there is one, then free the child node */ - - acpi_ns_detach_object(child_node); - - /* Now we can delete the node */ - - (void)acpi_os_release_object(acpi_gbl_namespace_cache, - child_node); - - /* And move on to the next child in the list */ - + /* + * Delete this child node and move on to the next child in the list. + * No need to unlink the node since we are deleting the entire branch. + */ + acpi_ns_delete_node(child_node); child_node = next_node; } while (!(flags & ANOBJ_END_OF_PEER_LIST)); @@ -433,7 +467,7 @@ void acpi_ns_delete_namespace_by_owner(acpi_owner_id owner_id) if (deletion_node) { acpi_ns_delete_children(deletion_node); - acpi_ns_delete_node(deletion_node); + acpi_ns_remove_node(deletion_node); deletion_node = NULL; } diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c index dcd7a6adbbbc..a7234e60e985 100644 --- a/drivers/acpi/acpica/nsload.c +++ b/drivers/acpi/acpica/nsload.c @@ -270,8 +270,7 @@ static acpi_status acpi_ns_delete_subtree(acpi_handle start_handle) /* Now delete the starting object, and we are done */ - acpi_ns_delete_node(child_handle); - + acpi_ns_remove_node(child_handle); return_ACPI_STATUS(AE_OK); } diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 2876fc70c3a9..620183f13e5e 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -141,7 +141,7 @@ int acpi_bus_get_status(struct acpi_device *device) EXPORT_SYMBOL(acpi_bus_get_status); void acpi_bus_private_data_handler(acpi_handle handle, - u32 function, void *context) + void *context) { return; } diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 27a7072347ea..9a4ce33f137e 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -119,7 +119,7 @@ EXPORT_SYMBOL(acpi_get_child); /* Link ACPI devices with physical devices */ static void acpi_glue_data_handler(acpi_handle handle, - u32 function, void *context) + void *context) { /* we provide an empty handler */ } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 0ab526de7c55..9606af13d3b8 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -687,7 +687,7 @@ acpi_bus_get_ejd(acpi_handle handle, acpi_handle *ejd) } EXPORT_SYMBOL_GPL(acpi_bus_get_ejd); -void acpi_bus_data_handler(acpi_handle handle, u32 function, void *context) +void acpi_bus_data_handler(acpi_handle handle, void *context) { /* TBD */ diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index b91420b52c6f..6e83a68fbd7b 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -312,7 +312,7 @@ struct acpi_bus_event { extern struct kobject *acpi_kobj; extern int acpi_bus_generate_netlink_event(const char*, const char*, u8, int); -void acpi_bus_private_data_handler(acpi_handle, u32, void *); +void acpi_bus_private_data_handler(acpi_handle, void *); int acpi_bus_get_private_data(acpi_handle, void **); extern int acpi_notifier_call_chain(struct acpi_device *, u32, u32); extern int register_acpi_notifier(struct notifier_block *); @@ -325,7 +325,7 @@ extern void unregister_acpi_bus_notifier(struct notifier_block *nb); */ int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device); -void acpi_bus_data_handler(acpi_handle handle, u32 function, void *context); +void acpi_bus_data_handler(acpi_handle handle, void *context); int acpi_bus_get_status(struct acpi_device *device); int acpi_bus_get_power(acpi_handle handle, int *state); int acpi_bus_set_power(acpi_handle handle, int state); diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 4371805d2def..ef4601149f49 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -922,7 +922,7 @@ typedef void (*acpi_notify_handler) (acpi_handle device, u32 value, void *context); typedef -void (*acpi_object_handler) (acpi_handle object, u32 function, void *data); +void (*acpi_object_handler) (acpi_handle object, void *data); typedef acpi_status(*acpi_init_handler) (acpi_handle object, u32 function); -- cgit v1.2.3 From eb2289ba1ba994de25af0d94b5e80ba93d2c1c3c Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 24 Jun 2009 13:42:00 +0800 Subject: ACPICA: ACPI 4.0: Changes for existing ACPI tables. FACS: new flag and new OspmFlags field. SRAT: x2APIC - add ClockDomain field to descriptor #2 Includes header and disassembler support. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/actbl.h | 19 +++++++++++++------ include/acpi/actbl1.h | 8 +++++--- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 222733d01f36..0649a5670026 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -161,17 +161,24 @@ struct acpi_table_facs { u32 flags; u64 xfirmware_waking_vector; /* 64-bit version of the Firmware Waking Vector (ACPI 2.0+) */ u8 version; /* Version of this table (ACPI 2.0+) */ - u8 reserved[31]; /* Reserved, must be zero */ + u8 reserved[3]; /* Reserved, must be zero */ + u32 ospm_flags; /* Flags to be set by OSPM (ACPI 4.0) */ + u8 reserved1[24]; /* Reserved, must be zero */ }; -/* Flag macros */ +/* global_lock flags */ + +#define ACPI_GLOCK_PENDING (1) /* 00: Pending global lock ownership */ +#define ACPI_GLOCK_OWNED (1<<1) /* 01: Global lock is owned */ + +/* Flags */ -#define ACPI_FACS_S4_BIOS_PRESENT (1) /* 00: S4BIOS support is present */ +#define ACPI_FACS_S4_BIOS_PRESENT (1) /* 00: S4BIOS support is present */ +#define ACPI_FACS_64BIT_WAKE (1<<1) /* 01: 64-bit wake vector supported (ACPI 4.0) */ -/* Global lock flags */ +/* ospm_flags */ -#define ACPI_GLOCK_PENDING 0x01 /* 00: Pending global lock ownership */ -#define ACPI_GLOCK_OWNED 0x02 /* 01: Global lock is owned */ +#define ACPI_FACS_64BIT_ENVIRONMENT (1) /* 00: 64-bit wake environment is required (ACPI 4.0) */ /******************************************************************************* * diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 59ade0752473..ec36693f868c 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -1011,7 +1011,7 @@ struct acpi_madt_interrupt_source { #define ACPI_MADT_CPEI_OVERRIDE (1) -/* 9: Processor Local X2_APIC (07/2008) */ +/* 9: Processor Local X2APIC (ACPI 4.0) */ struct acpi_madt_local_x2apic { struct acpi_subtable_header header; @@ -1021,7 +1021,7 @@ struct acpi_madt_local_x2apic { u32 uid; /* ACPI processor UID */ }; -/* 10: Local X2APIC NMI (07/2008) */ +/* 10: Local X2APIC NMI (ACPI 4.0) */ struct acpi_madt_local_x2apic_nmi { struct acpi_subtable_header header; @@ -1211,7 +1211,7 @@ struct acpi_srat_mem_affinity { #define ACPI_SRAT_MEM_HOT_PLUGGABLE (1<<1) /* 01: Memory region is hot pluggable */ #define ACPI_SRAT_MEM_NON_VOLATILE (1<<2) /* 02: Memory region is non-volatile */ -/* 2: Processor Local X2_APIC Affinity (07/2008) */ +/* 2: Processor Local X2_APIC Affinity (ACPI 4.0) */ struct acpi_srat_x2apic_cpu_affinity { struct acpi_subtable_header header; @@ -1219,6 +1219,8 @@ struct acpi_srat_x2apic_cpu_affinity { u32 proximity_domain; u32 apic_id; u32 flags; + u32 clock_domain; + u32 reserved2; }; /* Flags for struct acpi_srat_cpu_affinity and struct acpi_srat_x2apic_cpu_affinity */ -- cgit v1.2.3 From 3ce804ed83827a7fd27190836f9421b29ac64512 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Thu, 25 Jun 2009 10:31:32 -0700 Subject: ACPICA: Update version to 20090625 Update version number. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 04904c7f1aa1..063e577e791e 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20090521 +#define ACPI_CA_VERSION 0x20090625 #include "actypes.h" #include "actbl.h" -- cgit v1.2.3 From a5fe1a03f7720b8da8364a1737e1e5a357904e99 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Thu, 13 Aug 2009 10:43:27 +0800 Subject: ACPICA: fix leak of acpi_os_validate_address http://bugzilla.kernel.org/show_bug.cgi?id=13620 If the dynamic region is created and added to resource list over and over again, it has the potential to be a memory leak by growing the list every time. This patch fixes the memory leak, as below 1) add a new field "count" to struct acpi_res_list. When inserting, if the region(addr, len) is already in the resource list, we just increase "count", otherwise, the region is inserted with count=1. When deleting, the "count" is decreased, if it's decreased to 0, the region is deleted from the resource list. With "count", the region with same address and length can only be inserted to the resource list once, so prevent potential memory leak. 2) add a new function acpi_os_invalidate_address, which is called when region is deleted. Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/utdelete.c | 6 +++ drivers/acpi/osl.c | 94 ++++++++++++++++++++++++++++++++++++++++-- include/acpi/acpiosxf.h | 3 ++ 3 files changed, 100 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpica/utdelete.c b/drivers/acpi/acpica/utdelete.c index bc1710315088..96e26e70c63d 100644 --- a/drivers/acpi/acpica/utdelete.c +++ b/drivers/acpi/acpica/utdelete.c @@ -215,6 +215,12 @@ static void acpi_ut_delete_internal_obj(union acpi_operand_object *object) ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS, "***** Region %p\n", object)); + /* Invalidate the region address/length via the host OS */ + + acpi_os_invalidate_address(object->region.space_id, + object->region.address, + (acpi_size) object->region.length); + second_desc = acpi_ns_get_secondary_object(object); if (second_desc) { /* diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 5691f165a952..c5b4f1ed9b71 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -88,6 +88,7 @@ struct acpi_res_list { char name[5]; /* only can have a length of 4 chars, make use of this one instead of res->name, no need to kalloc then */ struct list_head resource_list; + int count; }; static LIST_HEAD(resource_list_head); @@ -1358,6 +1359,89 @@ acpi_os_validate_interface (char *interface) return AE_SUPPORT; } +static inline int acpi_res_list_add(struct acpi_res_list *res) +{ + struct acpi_res_list *res_list_elem; + + list_for_each_entry(res_list_elem, &resource_list_head, + resource_list) { + + if (res->resource_type == res_list_elem->resource_type && + res->start == res_list_elem->start && + res->end == res_list_elem->end) { + + /* + * The Region(addr,len) already exist in the list, + * just increase the count + */ + + res_list_elem->count++; + return 0; + } + } + + res->count = 1; + list_add(&res->resource_list, &resource_list_head); + return 1; +} + +static inline void acpi_res_list_del(struct acpi_res_list *res) +{ + struct acpi_res_list *res_list_elem; + + list_for_each_entry(res_list_elem, &resource_list_head, + resource_list) { + + if (res->resource_type == res_list_elem->resource_type && + res->start == res_list_elem->start && + res->end == res_list_elem->end) { + + /* + * If the res count is decreased to 0, + * remove and free it + */ + + if (--res_list_elem->count == 0) { + list_del(&res_list_elem->resource_list); + kfree(res_list_elem); + } + return; + } + } +} + +acpi_status +acpi_os_invalidate_address( + u8 space_id, + acpi_physical_address address, + acpi_size length) +{ + struct acpi_res_list res; + + switch (space_id) { + case ACPI_ADR_SPACE_SYSTEM_IO: + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + /* Only interference checks against SystemIO and SytemMemory + are needed */ + res.start = address; + res.end = address + length - 1; + res.resource_type = space_id; + spin_lock(&acpi_res_lock); + acpi_res_list_del(&res); + spin_unlock(&acpi_res_lock); + break; + case ACPI_ADR_SPACE_PCI_CONFIG: + case ACPI_ADR_SPACE_EC: + case ACPI_ADR_SPACE_SMBUS: + case ACPI_ADR_SPACE_CMOS: + case ACPI_ADR_SPACE_PCI_BAR_TARGET: + case ACPI_ADR_SPACE_DATA_TABLE: + case ACPI_ADR_SPACE_FIXED_HARDWARE: + break; + } + return AE_OK; +} + /****************************************************************************** * * FUNCTION: acpi_os_validate_address @@ -1382,6 +1466,7 @@ acpi_os_validate_address ( char *name) { struct acpi_res_list *res; + int added; if (acpi_enforce_resources == ENFORCE_RESOURCES_NO) return AE_OK; @@ -1399,14 +1484,17 @@ acpi_os_validate_address ( res->end = address + length - 1; res->resource_type = space_id; spin_lock(&acpi_res_lock); - list_add(&res->resource_list, &resource_list_head); + added = acpi_res_list_add(res); spin_unlock(&acpi_res_lock); - pr_debug("Added %s resource: start: 0x%llx, end: 0x%llx, " - "name: %s\n", (space_id == ACPI_ADR_SPACE_SYSTEM_IO) + pr_debug("%s %s resource: start: 0x%llx, end: 0x%llx, " + "name: %s\n", added ? "Added" : "Already exist", + (space_id == ACPI_ADR_SPACE_SYSTEM_IO) ? "SystemIO" : "System Memory", (unsigned long long)res->start, (unsigned long long)res->end, res->name); + if (!added) + kfree(res); break; case ACPI_ADR_SPACE_PCI_CONFIG: case ACPI_ADR_SPACE_EC: diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index ab0b85cf21f3..eb0e7189075f 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -245,6 +245,9 @@ acpi_status acpi_osi_invalidate(char* interface); acpi_status acpi_os_validate_address(u8 space_id, acpi_physical_address address, acpi_size length, char *name); +acpi_status +acpi_os_invalidate_address(u8 space_id, acpi_physical_address address, + acpi_size length); u64 acpi_os_get_timer(void); -- cgit v1.2.3 From f726f30e32305a34a203ff975e60885aa7556c6a Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 4 Aug 2009 19:08:24 +0000 Subject: dma: Add set_dma_mask hook to struct dma_map_ops POWERPC needs this hook. SPARC could use it too. Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce Signed-off-by: Benjamin Herrenschmidt --- include/linux/dma-mapping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index c0f6c3cd788c..91b761846061 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -58,6 +58,7 @@ struct dma_map_ops { enum dma_data_direction dir); int (*mapping_error)(struct device *dev, dma_addr_t dma_addr); int (*dma_supported)(struct device *dev, u64 mask); + int (*set_dma_mask)(struct device *dev, u64 mask); int is_phys; }; -- cgit v1.2.3 From 468de9e54a900559b55aa939a4daeaea1915e572 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 27 Aug 2009 12:07:40 -0400 Subject: nfsd41: expand solo sequence check Compounds consisting of only a sequence operation don't need any additional caching beyond the sequence information we store in the slot entry. Fix nfsd4_is_solo_sequence to identify this case correctly. The additional check for a failed sequence in nfsd4_store_cache_entry() is redundant, since the nfsd4_is_solo_sequence call lower down catches this case. The final ce_cachethis set in nfsd4_sequence is also redundant. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 9 --------- include/linux/nfsd/xdr4.h | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5f634d24861c..b44a2cfde6f1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -991,16 +991,10 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; struct svc_rqst *rqstp = resp->rqstp; - struct nfsd4_compoundargs *args = rqstp->rq_argp; - struct nfsd4_op *op = &args->ops[resp->opcnt]; struct kvec *resv = &rqstp->rq_res.head[0]; dprintk("--> %s entry %p\n", __func__, entry); - /* Don't cache a failed OP_SEQUENCE. */ - if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status) - return; - nfsd4_release_respages(entry->ce_respages, entry->ce_resused); entry->ce_opcnt = resp->opcnt; entry->ce_status = resp->cstate.status; @@ -1490,9 +1484,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, slot->sl_inuse = true; slot->sl_seqid = seq->seqid; slot->sl_cache_entry.ce_cachethis = seq->cachethis; - /* Always set the cache entry cachethis for solo sequence */ - if (nfsd4_is_solo_sequence(resp)) - slot->sl_cache_entry.ce_cachethis = 1; cstate->slot = slot; cstate->session = session; diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 5e4beb0deb80..3f716607c86d 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -467,7 +467,7 @@ struct nfsd4_compoundres { static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) { struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; - return args->opcnt == 1; + return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE; } static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) -- cgit v1.2.3 From 7285dd7fd375763bfb8ab1ac9cf3f1206f503c16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 28 Aug 2009 20:25:24 +0200 Subject: clocksource: Resolve cpu hotplug dead lock with TSC unstable Martin Schwidefsky analyzed it: To register a clocksource the clocksource_mutex is acquired and if necessary timekeeping_notify is called to install the clocksource as the timekeeper clock. timekeeping_notify uses stop_machine which needs to take cpu_add_remove_lock mutex. Starting a new cpu is done with the cpu_add_remove_lock mutex held. native_cpu_up checks the tsc of the new cpu and if the tsc is no good clocksource_change_rating is called. Which needs the clocksource_mutex and the deadlock is complete. The solution is to replace the TSC via the clocksource watchdog mechanism. Mark the TSC as unstable and schedule the watchdog work so it gets removed in the watchdog thread context. Signed-off-by: Thomas Gleixner LKML-Reference: Cc: Martin Schwidefsky Cc: John Stultz --- arch/x86/kernel/tsc.c | 8 +++++--- include/linux/clocksource.h | 1 + kernel/time/clocksource.c | 33 ++++++++++++++++++++++++++++++--- 3 files changed, 36 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 968425422c46..fc3672a303d6 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -767,12 +767,14 @@ void mark_tsc_unstable(char *reason) { if (!tsc_unstable) { tsc_unstable = 1; - printk("Marking TSC unstable due to %s\n", reason); + printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); /* Change only the rating, when not registered */ if (clocksource_tsc.mult) - clocksource_change_rating(&clocksource_tsc, 0); - else + clocksource_mark_unstable(&clocksource_tsc); + else { + clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; clocksource_tsc.rating = 0; + } } } diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 9ea40ff26f0e..83d2fbd81b93 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -277,6 +277,7 @@ extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_resume(void); extern struct clocksource * __init __weak clocksource_default_clock(void); +extern void clocksource_mark_unstable(struct clocksource *cs); #ifdef CONFIG_GENERIC_TIME_VSYSCALL extern void update_vsyscall(struct timespec *ts, struct clocksource *c); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e0c86ad6e9fb..a0af4ffcb6e5 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -149,15 +149,42 @@ static void clocksource_watchdog_work(struct work_struct *work) kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); } -static void clocksource_unstable(struct clocksource *cs, int64_t delta) +static void __clocksource_unstable(struct clocksource *cs) { - printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", - cs->name, delta); cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); cs->flags |= CLOCK_SOURCE_UNSTABLE; schedule_work(&watchdog_work); } +static void clocksource_unstable(struct clocksource *cs, int64_t delta) +{ + printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", + cs->name, delta); + __clocksource_unstable(cs); +} + +/** + * clocksource_mark_unstable - mark clocksource unstable via watchdog + * @cs: clocksource to be marked unstable + * + * This function is called instead of clocksource_change_rating from + * cpu hotplug code to avoid a deadlock between the clocksource mutex + * and the cpu hotplug mutex. It defers the update of the clocksource + * to the watchdog thread. + */ +void clocksource_mark_unstable(struct clocksource *cs) +{ + unsigned long flags; + + spin_lock_irqsave(&watchdog_lock, flags); + if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) { + if (list_empty(&cs->wd_list)) + list_add(&cs->wd_list, &watchdog_list); + __clocksource_unstable(cs); + } + spin_unlock_irqrestore(&watchdog_lock, flags); +} + static void clocksource_watchdog(unsigned long data) { struct clocksource *cs; -- cgit v1.2.3 From b24aad44438d5bc21cbbfb94a99d9bf710d8295b Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 24 Jul 2009 13:30:17 +0800 Subject: ACPICA: Split large ACPI table header Split out the non-acpi-defined ACPI tables into the existing (but empty) actbl2.h file. Preparation for new ACPI 4.0 tables. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/actbl.h | 35 +-- include/acpi/actbl1.h | 553 +---------------------------------------------- include/acpi/actbl2.h | 585 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 611 insertions(+), 562 deletions(-) create mode 100644 include/acpi/actbl2.h (limited to 'include') diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 0649a5670026..55fcfc6725b2 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -44,6 +44,19 @@ #ifndef __ACTBL_H__ #define __ACTBL_H__ +/******************************************************************************* + * + * Fundamental ACPI tables + * + * This file contains definitions for the ACPI tables that are directly consumed + * by ACPICA. All other tables are consumed by the OS-dependent ACPI-related + * device drivers and other OS support code. + * + * The RSDP and FACS do not use the common ACPI table header. All other ACPI + * tables use the header. + * + ******************************************************************************/ + /* * Values for description table header signatures. Useful because they make * it more difficult to inadvertently type in the wrong signature. @@ -65,11 +78,6 @@ #pragma pack(1) /* - * These are the ACPI tables that are directly consumed by the subsystem. - * - * The RSDP and FACS do not use the common ACPI table header. All other ACPI - * tables use the header. - * * Note about bitfields: The u8 type is used for bitfields in ACPI tables. * This is the only type that is even remotely portable. Anything else is not * portable, so do not use any other bitfield types. @@ -77,9 +85,8 @@ /******************************************************************************* * - * ACPI Table Header. This common header is used by all tables except the - * RSDP and FACS. The define is used for direct inclusion of header into - * other ACPI tables + * Master ACPI Table Header. This common header is used by all ACPI tables + * except the RSDP and FACS. * ******************************************************************************/ @@ -95,13 +102,16 @@ struct acpi_table_header { u32 asl_compiler_revision; /* ASL compiler version */ }; -/* +/******************************************************************************* + * * GAS - Generic Address Structure (ACPI 2.0+) * * Note: Since this structure is used in the ACPI tables, it is byte aligned. - * If misalignment is not supported, access to the Address field must be - * performed with care. - */ + * If misaliged access is not supported by the hardware, accesses to the + * 64-bit Address field must be performed with care. + * + ******************************************************************************/ + struct acpi_generic_address { u8 space_id; /* Address space where struct or register exists */ u8 bit_width; /* Size in bits of given register */ @@ -325,5 +335,6 @@ struct acpi_table_desc { */ #include +#include #endif /* __ACTBL_H__ */ diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index ec36693f868c..582af1fcb8f5 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -46,41 +46,29 @@ /******************************************************************************* * - * Additional ACPI Tables + * Additional ACPI Tables (1) * * These tables are not consumed directly by the ACPICA subsystem, but are * included here to support device drivers and the AML disassembler. * + * The tables in this file are fully defined within the ACPI specification. + * ******************************************************************************/ /* * Values for description table header signatures. Useful because they make * it more difficult to inadvertently type in the wrong signature. */ -#define ACPI_SIG_ASF "ASF!" /* Alert Standard Format table */ #define ACPI_SIG_BERT "BERT" /* Boot Error Record Table */ -#define ACPI_SIG_BOOT "BOOT" /* Simple Boot Flag Table */ #define ACPI_SIG_CPEP "CPEP" /* Corrected Platform Error Polling table */ -#define ACPI_SIG_DBGP "DBGP" /* Debug Port table */ -#define ACPI_SIG_DMAR "DMAR" /* DMA Remapping table */ #define ACPI_SIG_ECDT "ECDT" /* Embedded Controller Boot Resources Table */ #define ACPI_SIG_EINJ "EINJ" /* Error Injection table */ #define ACPI_SIG_ERST "ERST" /* Error Record Serialization Table */ #define ACPI_SIG_HEST "HEST" /* Hardware Error Source Table */ -#define ACPI_SIG_HPET "HPET" /* High Precision Event Timer table */ -#define ACPI_SIG_IBFT "IBFT" /* i_sCSI Boot Firmware Table */ #define ACPI_SIG_MADT "APIC" /* Multiple APIC Description Table */ -#define ACPI_SIG_MCFG "MCFG" /* PCI Memory Mapped Configuration table */ #define ACPI_SIG_SBST "SBST" /* Smart Battery Specification Table */ -#define ACPI_SIG_SLIC "SLIC" /* Software Licensing Description Table */ #define ACPI_SIG_SLIT "SLIT" /* System Locality Distance Information Table */ -#define ACPI_SIG_SPCR "SPCR" /* Serial Port Console Redirection table */ -#define ACPI_SIG_SPMI "SPMI" /* Server Platform Management Interface table */ #define ACPI_SIG_SRAT "SRAT" /* System Resource Affinity Table */ -#define ACPI_SIG_TCPA "TCPA" /* Trusted Computing Platform Alliance table */ -#define ACPI_SIG_UEFI "UEFI" /* Uefi Boot Optimization Table */ -#define ACPI_SIG_WDAT "WDAT" /* Watchdog Action Table */ -#define ACPI_SIG_WDRT "WDRT" /* Watchdog Resource Table */ /* * All tables must be byte-packed to match the ACPI specification, since @@ -113,115 +101,6 @@ struct acpi_whea_header { u64 mask; /* Bitmask required for this register instruction */ }; -/******************************************************************************* - * - * ASF - Alert Standard Format table (Signature "ASF!") - * - * Conforms to the Alert Standard Format Specification V2.0, 23 April 2003 - * - ******************************************************************************/ - -struct acpi_table_asf { - struct acpi_table_header header; /* Common ACPI table header */ -}; - -/* ASF subtable header */ - -struct acpi_asf_header { - u8 type; - u8 reserved; - u16 length; -}; - -/* Values for Type field above */ - -enum acpi_asf_type { - ACPI_ASF_TYPE_INFO = 0, - ACPI_ASF_TYPE_ALERT = 1, - ACPI_ASF_TYPE_CONTROL = 2, - ACPI_ASF_TYPE_BOOT = 3, - ACPI_ASF_TYPE_ADDRESS = 4, - ACPI_ASF_TYPE_RESERVED = 5 -}; - -/* - * ASF subtables - */ - -/* 0: ASF Information */ - -struct acpi_asf_info { - struct acpi_asf_header header; - u8 min_reset_value; - u8 min_poll_interval; - u16 system_id; - u32 mfg_id; - u8 flags; - u8 reserved2[3]; -}; - -/* 1: ASF Alerts */ - -struct acpi_asf_alert { - struct acpi_asf_header header; - u8 assert_mask; - u8 deassert_mask; - u8 alerts; - u8 data_length; -}; - -struct acpi_asf_alert_data { - u8 address; - u8 command; - u8 mask; - u8 value; - u8 sensor_type; - u8 type; - u8 offset; - u8 source_type; - u8 severity; - u8 sensor_number; - u8 entity; - u8 instance; -}; - -/* 2: ASF Remote Control */ - -struct acpi_asf_remote { - struct acpi_asf_header header; - u8 controls; - u8 data_length; - u16 reserved2; -}; - -struct acpi_asf_control_data { - u8 function; - u8 address; - u8 command; - u8 value; -}; - -/* 3: ASF RMCP Boot Options */ - -struct acpi_asf_rmcp { - struct acpi_asf_header header; - u8 capabilities[7]; - u8 completion_code; - u32 enterprise_id; - u8 command; - u16 parameter; - u16 boot_options; - u16 oem_parameters; -}; - -/* 4: ASF Address */ - -struct acpi_asf_address { - struct acpi_asf_header header; - u8 eprom_address; - u8 devices; -}; - /******************************************************************************* * * BERT - Boot Error Record Table @@ -251,18 +130,6 @@ struct acpi_bert_region { #define ACPI_BERT_MULTIPLE_UNCORRECTABLE (4) #define ACPI_BERT_MULTIPLE_CORRECTABLE (8) -/******************************************************************************* - * - * BOOT - Simple Boot Flag Table - * - ******************************************************************************/ - -struct acpi_table_boot { - struct acpi_table_header header; /* Common ACPI table header */ - u8 cmos_index; /* Index in CMOS RAM for the boot register */ - u8 reserved[3]; -}; - /******************************************************************************* * * CPEP - Corrected Platform Error Polling table @@ -284,123 +151,6 @@ struct acpi_cpep_polling { u32 interval; /* Polling interval (msec) */ }; -/******************************************************************************* - * - * DBGP - Debug Port table - * - ******************************************************************************/ - -struct acpi_table_dbgp { - struct acpi_table_header header; /* Common ACPI table header */ - u8 type; /* 0=full 16550, 1=subset of 16550 */ - u8 reserved[3]; - struct acpi_generic_address debug_port; -}; - -/******************************************************************************* - * - * DMAR - DMA Remapping table - * From "Intel Virtualization Technology for Directed I/O", Sept. 2007 - * - ******************************************************************************/ - -struct acpi_table_dmar { - struct acpi_table_header header; /* Common ACPI table header */ - u8 width; /* Host Address Width */ - u8 flags; - u8 reserved[10]; -}; - -/* Flags */ - -#define ACPI_DMAR_INTR_REMAP (1) - -/* DMAR subtable header */ - -struct acpi_dmar_header { - u16 type; - u16 length; -}; - -/* Values for subtable type in struct acpi_dmar_header */ - -enum acpi_dmar_type { - ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, - ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, - ACPI_DMAR_TYPE_ATSR = 2, - ACPI_DMAR_TYPE_RESERVED = 3 /* 3 and greater are reserved */ -}; - -struct acpi_dmar_device_scope { - u8 entry_type; - u8 length; - u16 reserved; - u8 enumeration_id; - u8 bus; -}; - -/* Values for entry_type in struct acpi_dmar_device_scope */ - -enum acpi_dmar_scope_type { - ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0, - ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1, - ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2, - ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3, - ACPI_DMAR_SCOPE_TYPE_HPET = 4, - ACPI_DMAR_SCOPE_TYPE_RESERVED = 5 /* 5 and greater are reserved */ -}; - -struct acpi_dmar_pci_path { - u8 dev; - u8 fn; -}; - -/* - * DMAR Sub-tables, correspond to Type in struct acpi_dmar_header - */ - -/* 0: Hardware Unit Definition */ - -struct acpi_dmar_hardware_unit { - struct acpi_dmar_header header; - u8 flags; - u8 reserved; - u16 segment; - u64 address; /* Register Base Address */ -}; - -/* Flags */ - -#define ACPI_DMAR_INCLUDE_ALL (1) - -/* 1: Reserved Memory Defininition */ - -struct acpi_dmar_reserved_memory { - struct acpi_dmar_header header; - u16 reserved; - u16 segment; - u64 base_address; /* 4_k aligned base address */ - u64 end_address; /* 4_k aligned limit address */ -}; - -/* Flags */ - -#define ACPI_DMAR_ALLOW_ALL (1) - - -/* 2: Root Port ATS Capability Reporting Structure */ - -struct acpi_dmar_atsr { - struct acpi_dmar_header header; - u8 flags; - u8 reserved; - u16 segment; -}; - -/* Flags */ - -#define ACPI_DMAR_ALL_PORTS (1) - /******************************************************************************* * * ECDT - Embedded Controller Boot Resources Table @@ -762,119 +512,6 @@ struct acpi_hest_generic { u32 error_status_block_length; }; -/******************************************************************************* - * - * HPET - High Precision Event Timer table - * - ******************************************************************************/ - -struct acpi_table_hpet { - struct acpi_table_header header; /* Common ACPI table header */ - u32 id; /* Hardware ID of event timer block */ - struct acpi_generic_address address; /* Address of event timer block */ - u8 sequence; /* HPET sequence number */ - u16 minimum_tick; /* Main counter min tick, periodic mode */ - u8 flags; -}; - -/*! Flags */ - -#define ACPI_HPET_PAGE_PROTECT (1) /* 00: No page protection */ -#define ACPI_HPET_PAGE_PROTECT_4 (1<<1) /* 01: 4KB page protected */ -#define ACPI_HPET_PAGE_PROTECT_64 (1<<2) /* 02: 64KB page protected */ - -/*! [End] no source code translation !*/ - -/******************************************************************************* - * - * IBFT - Boot Firmware Table - * - ******************************************************************************/ - -struct acpi_table_ibft { - struct acpi_table_header header; /* Common ACPI table header */ - u8 reserved[12]; -}; - -/* IBFT common subtable header */ - -struct acpi_ibft_header { - u8 type; - u8 version; - u16 length; - u8 index; - u8 flags; -}; - -/* Values for Type field above */ - -enum acpi_ibft_type { - ACPI_IBFT_TYPE_NOT_USED = 0, - ACPI_IBFT_TYPE_CONTROL = 1, - ACPI_IBFT_TYPE_INITIATOR = 2, - ACPI_IBFT_TYPE_NIC = 3, - ACPI_IBFT_TYPE_TARGET = 4, - ACPI_IBFT_TYPE_EXTENSIONS = 5, - ACPI_IBFT_TYPE_RESERVED = 6 /* 6 and greater are reserved */ -}; - -/* IBFT subtables */ - -struct acpi_ibft_control { - struct acpi_ibft_header header; - u16 extensions; - u16 initiator_offset; - u16 nic0_offset; - u16 target0_offset; - u16 nic1_offset; - u16 target1_offset; -}; - -struct acpi_ibft_initiator { - struct acpi_ibft_header header; - u8 sns_server[16]; - u8 slp_server[16]; - u8 primary_server[16]; - u8 secondary_server[16]; - u16 name_length; - u16 name_offset; -}; - -struct acpi_ibft_nic { - struct acpi_ibft_header header; - u8 ip_address[16]; - u8 subnet_mask_prefix; - u8 origin; - u8 gateway[16]; - u8 primary_dns[16]; - u8 secondary_dns[16]; - u8 dhcp[16]; - u16 vlan; - u8 mac_address[6]; - u16 pci_address; - u16 name_length; - u16 name_offset; -}; - -struct acpi_ibft_target { - struct acpi_ibft_header header; - u8 target_ip_address[16]; - u16 target_ip_socket; - u8 target_boot_lun[8]; - u8 chap_type; - u8 nic_association; - u16 target_name_length; - u16 target_name_offset; - u16 chap_name_length; - u16 chap_name_offset; - u16 chap_secret_length; - u16 chap_secret_offset; - u16 reverse_chap_name_length; - u16 reverse_chap_name_offset; - u16 reverse_chap_secret_length; - u16 reverse_chap_secret_offset; -}; - /******************************************************************************* * * MADT - Multiple APIC Description Table @@ -1056,27 +693,6 @@ struct acpi_madt_local_x2apic_nmi { #define ACPI_MADT_TRIGGER_RESERVED (2<<2) #define ACPI_MADT_TRIGGER_LEVEL (3<<2) -/******************************************************************************* - * - * MCFG - PCI Memory Mapped Configuration table and sub-table - * - ******************************************************************************/ - -struct acpi_table_mcfg { - struct acpi_table_header header; /* Common ACPI table header */ - u8 reserved[8]; -}; - -/* Subtable */ - -struct acpi_mcfg_allocation { - u64 address; /* Base address, processor-relative */ - u16 pci_segment; /* PCI segment group number */ - u8 start_bus_number; /* Starting PCI Bus number */ - u8 end_bus_number; /* Final PCI Bus number */ - u32 reserved; -}; - /******************************************************************************* * * SBST - Smart Battery Specification Table @@ -1102,59 +718,6 @@ struct acpi_table_slit { u8 entry[1]; /* Real size = localities^2 */ }; -/******************************************************************************* - * - * SPCR - Serial Port Console Redirection table - * - ******************************************************************************/ - -struct acpi_table_spcr { - struct acpi_table_header header; /* Common ACPI table header */ - u8 interface_type; /* 0=full 16550, 1=subset of 16550 */ - u8 reserved[3]; - struct acpi_generic_address serial_port; - u8 interrupt_type; - u8 pc_interrupt; - u32 interrupt; - u8 baud_rate; - u8 parity; - u8 stop_bits; - u8 flow_control; - u8 terminal_type; - u8 reserved1; - u16 pci_device_id; - u16 pci_vendor_id; - u8 pci_bus; - u8 pci_device; - u8 pci_function; - u32 pci_flags; - u8 pci_segment; - u32 reserved2; -}; - -/******************************************************************************* - * - * SPMI - Server Platform Management Interface table - * - ******************************************************************************/ - -struct acpi_table_spmi { - struct acpi_table_header header; /* Common ACPI table header */ - u8 reserved; - u8 interface_type; - u16 spec_revision; /* Version of IPMI */ - u8 interrupt_type; - u8 gpe_number; /* GPE assigned */ - u8 reserved1; - u8 pci_device_flag; - u32 interrupt; - struct acpi_generic_address ipmi_register; - u8 pci_segment; - u8 pci_bus; - u8 pci_device; - u8 pci_function; -}; - /******************************************************************************* * * SRAT - System Resource Affinity Table @@ -1227,116 +790,6 @@ struct acpi_srat_x2apic_cpu_affinity { #define ACPI_SRAT_CPU_ENABLED (1) /* 00: Use affinity structure */ -/******************************************************************************* - * - * TCPA - Trusted Computing Platform Alliance table - * - ******************************************************************************/ - -struct acpi_table_tcpa { - struct acpi_table_header header; /* Common ACPI table header */ - u16 reserved; - u32 max_log_length; /* Maximum length for the event log area */ - u64 log_address; /* Address of the event log area */ -}; - -/******************************************************************************* - * - * UEFI - UEFI Boot optimization Table - * - ******************************************************************************/ - -struct acpi_table_uefi { - struct acpi_table_header header; /* Common ACPI table header */ - u8 identifier[16]; /* UUID identifier */ - u16 data_offset; /* Offset of remaining data in table */ - u8 data; -}; - -/******************************************************************************* - * - * WDAT - Watchdog Action Table - * - ******************************************************************************/ - -struct acpi_table_wdat { - struct acpi_table_header header; /* Common ACPI table header */ - u32 header_length; /* Watchdog Header Length */ - u16 pci_segment; /* PCI Segment number */ - u8 pci_bus; /* PCI Bus number */ - u8 pci_device; /* PCI Device number */ - u8 pci_function; /* PCI Function number */ - u8 reserved[3]; - u32 timer_period; /* Period of one timer count (msec) */ - u32 max_count; /* Maximum counter value supported */ - u32 min_count; /* Minimum counter value */ - u8 flags; - u8 reserved2[3]; - u32 entries; /* Number of watchdog entries that follow */ -}; - -/* WDAT Instruction Entries (actions) */ - -struct acpi_wdat_entry { - struct acpi_whea_header whea_header; /* Common header for WHEA tables */ -}; - -/* Values for Action field above */ - -enum acpi_wdat_actions { - ACPI_WDAT_RESET = 1, - ACPI_WDAT_GET_CURRENT_COUNTDOWN = 4, - ACPI_WDAT_GET_COUNTDOWN = 5, - ACPI_WDAT_SET_COUNTDOWN = 6, - ACPI_WDAT_GET_RUNNING_STATE = 8, - ACPI_WDAT_SET_RUNNING_STATE = 9, - ACPI_WDAT_GET_STOPPED_STATE = 10, - ACPI_WDAT_SET_STOPPED_STATE = 11, - ACPI_WDAT_GET_REBOOT = 16, - ACPI_WDAT_SET_REBOOT = 17, - ACPI_WDAT_GET_SHUTDOWN = 18, - ACPI_WDAT_SET_SHUTDOWN = 19, - ACPI_WDAT_GET_STATUS = 32, - ACPI_WDAT_SET_STATUS = 33, - ACPI_WDAT_ACTION_RESERVED = 34 /* 34 and greater are reserved */ -}; - -/* Values for Instruction field above */ - -enum acpi_wdat_instructions { - ACPI_WDAT_READ_VALUE = 0, - ACPI_WDAT_READ_COUNTDOWN = 1, - ACPI_WDAT_WRITE_VALUE = 2, - ACPI_WDAT_WRITE_COUNTDOWN = 3, - ACPI_WDAT_INSTRUCTION_RESERVED = 4, /* 4 and greater are reserved */ - ACPI_WDAT_PRESERVE_REGISTER = 0x80 /* Except for this value */ -}; - -/******************************************************************************* - * - * WDRT - Watchdog Resource Table - * - ******************************************************************************/ - -struct acpi_table_wdrt { - struct acpi_table_header header; /* Common ACPI table header */ - u32 header_length; /* Watchdog Header Length */ - u8 pci_segment; /* PCI Segment number */ - u8 pci_bus; /* PCI Bus number */ - u8 pci_device; /* PCI Device number */ - u8 pci_function; /* PCI Function number */ - u32 timer_period; /* Period of one timer count (msec) */ - u32 max_count; /* Maximum counter value supported */ - u32 min_count; /* Minimum counter value */ - u8 flags; - u8 reserved[3]; - u32 entries; /* Number of watchdog entries that follow */ -}; - -/* Flags */ - -#define ACPI_WDRT_TIMER_ENABLED (1) /* 00: Timer enabled */ - /* Reset to default packing */ #pragma pack() diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h new file mode 100644 index 000000000000..b271aba0e524 --- /dev/null +++ b/include/acpi/actbl2.h @@ -0,0 +1,585 @@ +#ifndef __ACTBL2_H__ +#define __ACTBL2_H__ + +/******************************************************************************* + * + * Additional ACPI Tables (2) + * + * These tables are not consumed directly by the ACPICA subsystem, but are + * included here to support device drivers and the AML disassembler. + * + * The tables in this file are defined by third-party specifications, and are + * not defined directly by the ACPI specification itself. + * + ******************************************************************************/ + +/* + * Values for description table header signatures. Useful because they make + * it more difficult to inadvertently type in the wrong signature. + */ +#define ACPI_SIG_ASF "ASF!" /* Alert Standard Format table */ +#define ACPI_SIG_BOOT "BOOT" /* Simple Boot Flag Table */ +#define ACPI_SIG_DBGP "DBGP" /* Debug Port table */ +#define ACPI_SIG_DMAR "DMAR" /* DMA Remapping table */ +#define ACPI_SIG_HPET "HPET" /* High Precision Event Timer table */ +#define ACPI_SIG_IBFT "IBFT" /* i_sCSI Boot Firmware Table */ +#define ACPI_SIG_MCFG "MCFG" /* PCI Memory Mapped Configuration table */ +#define ACPI_SIG_SLIC "SLIC" /* Software Licensing Description Table */ +#define ACPI_SIG_SPCR "SPCR" /* Serial Port Console Redirection table */ +#define ACPI_SIG_SPMI "SPMI" /* Server Platform Management Interface table */ +#define ACPI_SIG_TCPA "TCPA" /* Trusted Computing Platform Alliance table */ +#define ACPI_SIG_UEFI "UEFI" /* Uefi Boot Optimization Table */ +#define ACPI_SIG_WDAT "WDAT" /* Watchdog Action Table */ +#define ACPI_SIG_WDRT "WDRT" /* Watchdog Resource Table */ + +/* + * All tables must be byte-packed to match the ACPI specification, since + * the tables are provided by the system BIOS. + */ +#pragma pack(1) + +/* + * Note about bitfields: The u8 type is used for bitfields in ACPI tables. + * This is the only type that is even remotely portable. Anything else is not + * portable, so do not use any other bitfield types. + */ + +/******************************************************************************* + * + * ASF - Alert Standard Format table (Signature "ASF!") + * + * Conforms to the Alert Standard Format Specification V2.0, 23 April 2003 + * + ******************************************************************************/ + +struct acpi_table_asf { + struct acpi_table_header header; /* Common ACPI table header */ +}; + +/* ASF subtable header */ + +struct acpi_asf_header { + u8 type; + u8 reserved; + u16 length; +}; + +/* Values for Type field above */ + +enum acpi_asf_type { + ACPI_ASF_TYPE_INFO = 0, + ACPI_ASF_TYPE_ALERT = 1, + ACPI_ASF_TYPE_CONTROL = 2, + ACPI_ASF_TYPE_BOOT = 3, + ACPI_ASF_TYPE_ADDRESS = 4, + ACPI_ASF_TYPE_RESERVED = 5 +}; + +/* + * ASF subtables + */ + +/* 0: ASF Information */ + +struct acpi_asf_info { + struct acpi_asf_header header; + u8 min_reset_value; + u8 min_poll_interval; + u16 system_id; + u32 mfg_id; + u8 flags; + u8 reserved2[3]; +}; + +/* 1: ASF Alerts */ + +struct acpi_asf_alert { + struct acpi_asf_header header; + u8 assert_mask; + u8 deassert_mask; + u8 alerts; + u8 data_length; +}; + +struct acpi_asf_alert_data { + u8 address; + u8 command; + u8 mask; + u8 value; + u8 sensor_type; + u8 type; + u8 offset; + u8 source_type; + u8 severity; + u8 sensor_number; + u8 entity; + u8 instance; +}; + +/* 2: ASF Remote Control */ + +struct acpi_asf_remote { + struct acpi_asf_header header; + u8 controls; + u8 data_length; + u16 reserved2; +}; + +struct acpi_asf_control_data { + u8 function; + u8 address; + u8 command; + u8 value; +}; + +/* 3: ASF RMCP Boot Options */ + +struct acpi_asf_rmcp { + struct acpi_asf_header header; + u8 capabilities[7]; + u8 completion_code; + u32 enterprise_id; + u8 command; + u16 parameter; + u16 boot_options; + u16 oem_parameters; +}; + +/* 4: ASF Address */ + +struct acpi_asf_address { + struct acpi_asf_header header; + u8 eprom_address; + u8 devices; +}; + +/******************************************************************************* + * + * BOOT - Simple Boot Flag Table + * + ******************************************************************************/ + +struct acpi_table_boot { + struct acpi_table_header header; /* Common ACPI table header */ + u8 cmos_index; /* Index in CMOS RAM for the boot register */ + u8 reserved[3]; +}; + +/******************************************************************************* + * + * DBGP - Debug Port table + * + ******************************************************************************/ + +struct acpi_table_dbgp { + struct acpi_table_header header; /* Common ACPI table header */ + u8 type; /* 0=full 16550, 1=subset of 16550 */ + u8 reserved[3]; + struct acpi_generic_address debug_port; +}; + +/******************************************************************************* + * + * DMAR - DMA Remapping table + * From "Intel Virtualization Technology for Directed I/O", Sept. 2007 + * + ******************************************************************************/ + +struct acpi_table_dmar { + struct acpi_table_header header; /* Common ACPI table header */ + u8 width; /* Host Address Width */ + u8 flags; + u8 reserved[10]; +}; + +/* Flags */ + +#define ACPI_DMAR_INTR_REMAP (1) + +/* DMAR subtable header */ + +struct acpi_dmar_header { + u16 type; + u16 length; +}; + +/* Values for subtable type in struct acpi_dmar_header */ + +enum acpi_dmar_type { + ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, + ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, + ACPI_DMAR_TYPE_ATSR = 2, + ACPI_DMAR_TYPE_RESERVED = 3 /* 3 and greater are reserved */ +}; + +struct acpi_dmar_device_scope { + u8 entry_type; + u8 length; + u16 reserved; + u8 enumeration_id; + u8 bus; +}; + +/* Values for entry_type in struct acpi_dmar_device_scope */ + +enum acpi_dmar_scope_type { + ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0, + ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1, + ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2, + ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3, + ACPI_DMAR_SCOPE_TYPE_HPET = 4, + ACPI_DMAR_SCOPE_TYPE_RESERVED = 5 /* 5 and greater are reserved */ +}; + +struct acpi_dmar_pci_path { + u8 dev; + u8 fn; +}; + +/* + * DMAR Sub-tables, correspond to Type in struct acpi_dmar_header + */ + +/* 0: Hardware Unit Definition */ + +struct acpi_dmar_hardware_unit { + struct acpi_dmar_header header; + u8 flags; + u8 reserved; + u16 segment; + u64 address; /* Register Base Address */ +}; + +/* Flags */ + +#define ACPI_DMAR_INCLUDE_ALL (1) + +/* 1: Reserved Memory Defininition */ + +struct acpi_dmar_reserved_memory { + struct acpi_dmar_header header; + u16 reserved; + u16 segment; + u64 base_address; /* 4_k aligned base address */ + u64 end_address; /* 4_k aligned limit address */ +}; + +/* Flags */ + +#define ACPI_DMAR_ALLOW_ALL (1) + +/* 2: Root Port ATS Capability Reporting Structure */ + +struct acpi_dmar_atsr { + struct acpi_dmar_header header; + u8 flags; + u8 reserved; + u16 segment; +}; + +/* Flags */ + +#define ACPI_DMAR_ALL_PORTS (1) + +/******************************************************************************* + * + * HPET - High Precision Event Timer table + * + ******************************************************************************/ + +struct acpi_table_hpet { + struct acpi_table_header header; /* Common ACPI table header */ + u32 id; /* Hardware ID of event timer block */ + struct acpi_generic_address address; /* Address of event timer block */ + u8 sequence; /* HPET sequence number */ + u16 minimum_tick; /* Main counter min tick, periodic mode */ + u8 flags; +}; + +/*! Flags */ + +#define ACPI_HPET_PAGE_PROTECT (1) /* 00: No page protection */ +#define ACPI_HPET_PAGE_PROTECT_4 (1<<1) /* 01: 4KB page protected */ +#define ACPI_HPET_PAGE_PROTECT_64 (1<<2) /* 02: 64KB page protected */ + +/*! [End] no source code translation !*/ + +/******************************************************************************* + * + * IBFT - Boot Firmware Table + * + ******************************************************************************/ + +struct acpi_table_ibft { + struct acpi_table_header header; /* Common ACPI table header */ + u8 reserved[12]; +}; + +/* IBFT common subtable header */ + +struct acpi_ibft_header { + u8 type; + u8 version; + u16 length; + u8 index; + u8 flags; +}; + +/* Values for Type field above */ + +enum acpi_ibft_type { + ACPI_IBFT_TYPE_NOT_USED = 0, + ACPI_IBFT_TYPE_CONTROL = 1, + ACPI_IBFT_TYPE_INITIATOR = 2, + ACPI_IBFT_TYPE_NIC = 3, + ACPI_IBFT_TYPE_TARGET = 4, + ACPI_IBFT_TYPE_EXTENSIONS = 5, + ACPI_IBFT_TYPE_RESERVED = 6 /* 6 and greater are reserved */ +}; + +/* IBFT subtables */ + +struct acpi_ibft_control { + struct acpi_ibft_header header; + u16 extensions; + u16 initiator_offset; + u16 nic0_offset; + u16 target0_offset; + u16 nic1_offset; + u16 target1_offset; +}; + +struct acpi_ibft_initiator { + struct acpi_ibft_header header; + u8 sns_server[16]; + u8 slp_server[16]; + u8 primary_server[16]; + u8 secondary_server[16]; + u16 name_length; + u16 name_offset; +}; + +struct acpi_ibft_nic { + struct acpi_ibft_header header; + u8 ip_address[16]; + u8 subnet_mask_prefix; + u8 origin; + u8 gateway[16]; + u8 primary_dns[16]; + u8 secondary_dns[16]; + u8 dhcp[16]; + u16 vlan; + u8 mac_address[6]; + u16 pci_address; + u16 name_length; + u16 name_offset; +}; + +struct acpi_ibft_target { + struct acpi_ibft_header header; + u8 target_ip_address[16]; + u16 target_ip_socket; + u8 target_boot_lun[8]; + u8 chap_type; + u8 nic_association; + u16 target_name_length; + u16 target_name_offset; + u16 chap_name_length; + u16 chap_name_offset; + u16 chap_secret_length; + u16 chap_secret_offset; + u16 reverse_chap_name_length; + u16 reverse_chap_name_offset; + u16 reverse_chap_secret_length; + u16 reverse_chap_secret_offset; +}; + +/******************************************************************************* + * + * MCFG - PCI Memory Mapped Configuration table and sub-table + * + ******************************************************************************/ + +struct acpi_table_mcfg { + struct acpi_table_header header; /* Common ACPI table header */ + u8 reserved[8]; +}; + +/* Subtable */ + +struct acpi_mcfg_allocation { + u64 address; /* Base address, processor-relative */ + u16 pci_segment; /* PCI segment group number */ + u8 start_bus_number; /* Starting PCI Bus number */ + u8 end_bus_number; /* Final PCI Bus number */ + u32 reserved; +}; + +/******************************************************************************* + * + * SPCR - Serial Port Console Redirection table + * + ******************************************************************************/ + +struct acpi_table_spcr { + struct acpi_table_header header; /* Common ACPI table header */ + u8 interface_type; /* 0=full 16550, 1=subset of 16550 */ + u8 reserved[3]; + struct acpi_generic_address serial_port; + u8 interrupt_type; + u8 pc_interrupt; + u32 interrupt; + u8 baud_rate; + u8 parity; + u8 stop_bits; + u8 flow_control; + u8 terminal_type; + u8 reserved1; + u16 pci_device_id; + u16 pci_vendor_id; + u8 pci_bus; + u8 pci_device; + u8 pci_function; + u32 pci_flags; + u8 pci_segment; + u32 reserved2; +}; + +/******************************************************************************* + * + * SPMI - Server Platform Management Interface table + * + ******************************************************************************/ + +struct acpi_table_spmi { + struct acpi_table_header header; /* Common ACPI table header */ + u8 reserved; + u8 interface_type; + u16 spec_revision; /* Version of IPMI */ + u8 interrupt_type; + u8 gpe_number; /* GPE assigned */ + u8 reserved1; + u8 pci_device_flag; + u32 interrupt; + struct acpi_generic_address ipmi_register; + u8 pci_segment; + u8 pci_bus; + u8 pci_device; + u8 pci_function; +}; + +/******************************************************************************* + * + * TCPA - Trusted Computing Platform Alliance table + * + ******************************************************************************/ + +struct acpi_table_tcpa { + struct acpi_table_header header; /* Common ACPI table header */ + u16 reserved; + u32 max_log_length; /* Maximum length for the event log area */ + u64 log_address; /* Address of the event log area */ +}; + +/******************************************************************************* + * + * UEFI - UEFI Boot optimization Table + * + ******************************************************************************/ + +struct acpi_table_uefi { + struct acpi_table_header header; /* Common ACPI table header */ + u8 identifier[16]; /* UUID identifier */ + u16 data_offset; /* Offset of remaining data in table */ + u8 data; +}; + +/******************************************************************************* + * + * WDAT - Watchdog Action Table + * + ******************************************************************************/ + +struct acpi_table_wdat { + struct acpi_table_header header; /* Common ACPI table header */ + u32 header_length; /* Watchdog Header Length */ + u16 pci_segment; /* PCI Segment number */ + u8 pci_bus; /* PCI Bus number */ + u8 pci_device; /* PCI Device number */ + u8 pci_function; /* PCI Function number */ + u8 reserved[3]; + u32 timer_period; /* Period of one timer count (msec) */ + u32 max_count; /* Maximum counter value supported */ + u32 min_count; /* Minimum counter value */ + u8 flags; + u8 reserved2[3]; + u32 entries; /* Number of watchdog entries that follow */ +}; + +/* WDAT Instruction Entries (actions) */ + +struct acpi_wdat_entry { + struct acpi_whea_header whea_header; /* Common header for WHEA tables */ +}; + +/* Values for Action field above */ + +enum acpi_wdat_actions { + ACPI_WDAT_RESET = 1, + ACPI_WDAT_GET_CURRENT_COUNTDOWN = 4, + ACPI_WDAT_GET_COUNTDOWN = 5, + ACPI_WDAT_SET_COUNTDOWN = 6, + ACPI_WDAT_GET_RUNNING_STATE = 8, + ACPI_WDAT_SET_RUNNING_STATE = 9, + ACPI_WDAT_GET_STOPPED_STATE = 10, + ACPI_WDAT_SET_STOPPED_STATE = 11, + ACPI_WDAT_GET_REBOOT = 16, + ACPI_WDAT_SET_REBOOT = 17, + ACPI_WDAT_GET_SHUTDOWN = 18, + ACPI_WDAT_SET_SHUTDOWN = 19, + ACPI_WDAT_GET_STATUS = 32, + ACPI_WDAT_SET_STATUS = 33, + ACPI_WDAT_ACTION_RESERVED = 34 /* 34 and greater are reserved */ +}; + +/* Values for Instruction field above */ + +enum acpi_wdat_instructions { + ACPI_WDAT_READ_VALUE = 0, + ACPI_WDAT_READ_COUNTDOWN = 1, + ACPI_WDAT_WRITE_VALUE = 2, + ACPI_WDAT_WRITE_COUNTDOWN = 3, + ACPI_WDAT_INSTRUCTION_RESERVED = 4, /* 4 and greater are reserved */ + ACPI_WDAT_PRESERVE_REGISTER = 0x80 /* Except for this value */ +}; + +/******************************************************************************* + * + * WDRT - Watchdog Resource Table + * + ******************************************************************************/ + +struct acpi_table_wdrt { + struct acpi_table_header header; /* Common ACPI table header */ + u32 header_length; /* Watchdog Header Length */ + u8 pci_segment; /* PCI Segment number */ + u8 pci_bus; /* PCI Bus number */ + u8 pci_device; /* PCI Device number */ + u8 pci_function; /* PCI Function number */ + u32 timer_period; /* Period of one timer count (msec) */ + u32 max_count; /* Maximum counter value supported */ + u32 min_count; /* Minimum counter value */ + u8 flags; + u8 reserved[3]; + u32 entries; /* Number of watchdog entries that follow */ +}; + +/* Flags */ + +#define ACPI_WDRT_TIMER_ENABLED (1) /* 00: Timer enabled */ + +/* Reset to default packing */ + +#pragma pack() + +#endif /* __ACTBL2_H__ */ -- cgit v1.2.3 From 6e2d5ebd0d36199920676fdceaff4f4bfe66297b Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Mon, 27 Jul 2009 10:53:00 +0800 Subject: ACPICA: ACPI 4: Update headers for new and changed ACPI tables. Add IVRS,MSCT,UEFI,WAET,WDAT. Updated several existing tables for ACPI 4.0-related changes. Added document references for all tables not defined in ACPI spec. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/actbl.h | 30 +++-- include/acpi/actbl1.h | 339 +++++++++++++++++++++++++++++++++----------------- include/acpi/actbl2.h | 339 +++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 557 insertions(+), 151 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 55fcfc6725b2..1b6587952604 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -58,8 +58,9 @@ ******************************************************************************/ /* - * Values for description table header signatures. Useful because they make - * it more difficult to inadvertently type in the wrong signature. + * Values for description table header signatures for tables defined in this + * file. Useful because they make it more difficult to inadvertently type in + * the wrong signature. */ #define ACPI_SIG_DSDT "DSDT" /* Differentiated System Description Table */ #define ACPI_SIG_FADT "FACP" /* Fixed ACPI Description Table */ @@ -123,6 +124,7 @@ struct acpi_generic_address { /******************************************************************************* * * RSDP - Root System Description Pointer (Signature is "RSD PTR ") + * Version 2 * ******************************************************************************/ @@ -143,6 +145,7 @@ struct acpi_table_rsdp { /******************************************************************************* * * RSDT/XSDT - Root System Description Tables + * Version 1 (both) * ******************************************************************************/ @@ -176,23 +179,24 @@ struct acpi_table_facs { u8 reserved1[24]; /* Reserved, must be zero */ }; -/* global_lock flags */ +/* Masks for global_lock flag field above */ #define ACPI_GLOCK_PENDING (1) /* 00: Pending global lock ownership */ #define ACPI_GLOCK_OWNED (1<<1) /* 01: Global lock is owned */ -/* Flags */ +/* Masks for Flags field above */ #define ACPI_FACS_S4_BIOS_PRESENT (1) /* 00: S4BIOS support is present */ #define ACPI_FACS_64BIT_WAKE (1<<1) /* 01: 64-bit wake vector supported (ACPI 4.0) */ -/* ospm_flags */ +/* Masks for ospm_flags field above */ #define ACPI_FACS_64BIT_ENVIRONMENT (1) /* 00: 64-bit wake environment is required (ACPI 4.0) */ /******************************************************************************* * * FADT - Fixed ACPI Description Table (Signature "FACP") + * Version 4 * ******************************************************************************/ @@ -253,7 +257,7 @@ struct acpi_table_fadt { struct acpi_generic_address xgpe1_block; /* 64-bit Extended General Purpose Event 1 Reg Blk address */ }; -/* FADT Boot Architecture Flags (boot_flags) */ +/* Masks for FADT Boot Architecture Flags (boot_flags) */ #define ACPI_FADT_LEGACY_DEVICES (1) /* 00: [V2] System has LPC or ISA bus devices */ #define ACPI_FADT_8042 (1<<1) /* 01: [V3] System has an 8042 controller on port 60/64 */ @@ -263,7 +267,7 @@ struct acpi_table_fadt { #define FADT2_REVISION_ID 3 -/* FADT flags */ +/* Masks for FADT flags */ #define ACPI_FADT_WBINVD (1) /* 00: [V1] The wbinvd instruction works properly */ #define ACPI_FADT_WBINVD_FLUSH (1<<1) /* 01: [V1] wbinvd flushes but does not invalidate caches */ @@ -286,7 +290,7 @@ struct acpi_table_fadt { #define ACPI_FADT_APIC_CLUSTER (1<<18) /* 18: [V4] All local APICs must use cluster model (ACPI 3.0) */ #define ACPI_FADT_APIC_PHYSICAL (1<<19) /* 19: [V4] All local x_aPICs must use physical dest mode (ACPI 3.0) */ -/* FADT Prefered Power Management Profiles */ +/* Values for preferred_profile (Prefered Power Management Profiles) */ enum acpi_prefered_pm_profiles { PM_UNSPECIFIED = 0, @@ -304,14 +308,16 @@ enum acpi_prefered_pm_profiles { #define ACPI_FADT_OFFSET(f) (u8) ACPI_OFFSET (struct acpi_table_fadt, f) +/* + * Internal table-related structures + */ union acpi_name_union { u32 integer; char ascii[4]; }; -/* - * Internal ACPI Table Descriptor. One per ACPI table - */ +/* Internal ACPI Table Descriptor. One per ACPI table. */ + struct acpi_table_desc { acpi_physical_address address; struct acpi_table_header *pointer; @@ -321,7 +327,7 @@ struct acpi_table_desc { u8 flags; }; -/* Flags for above */ +/* Masks for Flags field above */ #define ACPI_TABLE_ORIGIN_UNKNOWN (0) #define ACPI_TABLE_ORIGIN_MAPPED (1) diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 582af1fcb8f5..0417f2abc44b 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -56,8 +56,9 @@ ******************************************************************************/ /* - * Values for description table header signatures. Useful because they make - * it more difficult to inadvertently type in the wrong signature. + * Values for description table header signatures for tables defined in this + * file. Useful because they make it more difficult to inadvertently type in + * the wrong signature. */ #define ACPI_SIG_BERT "BERT" /* Boot Error Record Table */ #define ACPI_SIG_CPEP "CPEP" /* Corrected Platform Error Polling table */ @@ -66,6 +67,7 @@ #define ACPI_SIG_ERST "ERST" /* Error Record Serialization Table */ #define ACPI_SIG_HEST "HEST" /* Hardware Error Source Table */ #define ACPI_SIG_MADT "APIC" /* Multiple APIC Description Table */ +#define ACPI_SIG_MSCT "MSCT" /* Maximum System Characteristics Table */ #define ACPI_SIG_SBST "SBST" /* Smart Battery Specification Table */ #define ACPI_SIG_SLIT "SLIT" /* System Locality Distance Information Table */ #define ACPI_SIG_SRAT "SRAT" /* System Resource Affinity Table */ @@ -82,14 +84,20 @@ * portable, so do not use any other bitfield types. */ -/* Common Subtable header (used in MADT, SRAT, etc.) */ +/******************************************************************************* + * + * Common subtable headers + * + ******************************************************************************/ + +/* Generic subtable header (used in MADT, SRAT, etc.) */ struct acpi_subtable_header { u8 type; u8 length; }; -/* Common Subtable header for WHEA tables (EINJ, ERST, WDAT) */ +/* Subtable header for WHEA tables (EINJ, ERST, WDAT) */ struct acpi_whea_header { u8 action; @@ -103,7 +111,8 @@ struct acpi_whea_header { /******************************************************************************* * - * BERT - Boot Error Record Table + * BERT - Boot Error Record Table (ACPI 4.0) + * Version 1 * ******************************************************************************/ @@ -113,26 +122,43 @@ struct acpi_table_bert { u64 address; /* Physical addresss of the error region */ }; -/* Boot Error Region */ +/* Boot Error Region (not a subtable, pointed to by Address field above) */ struct acpi_bert_region { - u32 block_status; - u32 raw_data_offset; - u32 raw_data_length; - u32 data_length; - u32 error_severity; + u32 block_status; /* Type of error information */ + u32 raw_data_offset; /* Offset to raw error data */ + u32 raw_data_length; /* Length of raw error data */ + u32 data_length; /* Length of generic error data */ + u32 error_severity; /* Severity code */ }; -/* block_status Flags */ +/* Values for block_status flags above */ #define ACPI_BERT_UNCORRECTABLE (1) -#define ACPI_BERT_CORRECTABLE (2) -#define ACPI_BERT_MULTIPLE_UNCORRECTABLE (4) -#define ACPI_BERT_MULTIPLE_CORRECTABLE (8) +#define ACPI_BERT_CORRECTABLE (1<<1) +#define ACPI_BERT_MULTIPLE_UNCORRECTABLE (1<<2) +#define ACPI_BERT_MULTIPLE_CORRECTABLE (1<<3) +#define ACPI_BERT_ERROR_ENTRY_COUNT (0xFF<<4) /* 8 bits, error count */ + +/* Values for error_severity above */ + +enum acpi_bert_error_severity { + ACPI_BERT_ERROR_CORRECTABLE = 0, + ACPI_BERT_ERROR_FATAL = 1, + ACPI_BERT_ERROR_CORRECTED = 2, + ACPI_BERT_ERROR_NONE = 3, + ACPI_BERT_ERROR_RESERVED = 4 /* 4 and greater are reserved */ +}; + +/* + * Note: The generic error data that follows the error_severity field above + * uses the struct acpi_hest_generic_data defined under the HEST table below + */ /******************************************************************************* * - * CPEP - Corrected Platform Error Polling table + * CPEP - Corrected Platform Error Polling table (ACPI 4.0) + * Version 1 * ******************************************************************************/ @@ -144,8 +170,7 @@ struct acpi_table_cpep { /* Subtable */ struct acpi_cpep_polling { - u8 type; - u8 length; + struct acpi_subtable_header header; u8 id; /* Processor ID */ u8 eid; /* Processor EID */ u32 interval; /* Polling interval (msec) */ @@ -154,6 +179,7 @@ struct acpi_cpep_polling { /******************************************************************************* * * ECDT - Embedded Controller Boot Resources Table + * Version 1 * ******************************************************************************/ @@ -168,14 +194,16 @@ struct acpi_table_ecdt { /******************************************************************************* * - * EINJ - Error Injection Table + * EINJ - Error Injection Table (ACPI 4.0) + * Version 1 * ******************************************************************************/ struct acpi_table_einj { struct acpi_table_header header; /* Common ACPI table header */ u32 header_length; - u32 reserved; + u8 flags; + u8 reserved[3]; u32 entries; }; @@ -185,6 +213,10 @@ struct acpi_einj_entry { struct acpi_whea_header whea_header; /* Common header for WHEA tables */ }; +/* Masks for Flags field above */ + +#define ACPI_EINJ_PRESERVE (1) + /* Values for Action field above */ enum acpi_einj_actions { @@ -220,9 +252,34 @@ struct acpi_einj_trigger { u32 entry_count; }; +/* Command status return values */ + +enum acpi_einj_command_status { + ACPI_EINJ_SUCCESS = 0, + ACPI_EINJ_FAILURE = 1, + ACPI_EINJ_INVALID_ACCESS = 2, + ACPI_EINJ_STATUS_RESERVED = 3 /* 3 and greater are reserved */ +}; + +/* Error types returned from ACPI_EINJ_GET_ERROR_TYPE (bitfield) */ + +#define ACPI_EINJ_PROCESSOR_CORRECTABLE (1) +#define ACPI_EINJ_PROCESSOR_UNCORRECTABLE (1<<1) +#define ACPI_EINJ_PROCESSOR_FATAL (1<<2) +#define ACPI_EINJ_MEMORY_CORRECTABLE (1<<3) +#define ACPI_EINJ_MEMORY_UNCORRECTABLE (1<<4) +#define ACPI_EINJ_MEMORY_FATAL (1<<5) +#define ACPI_EINJ_PCIX_CORRECTABLE (1<<6) +#define ACPI_EINJ_PCIX_UNCORRECTABLE (1<<7) +#define ACPI_EINJ_PCIX_FATAL (1<<8) +#define ACPI_EINJ_PLATFORM_CORRECTABLE (1<<9) +#define ACPI_EINJ_PLATFORM_UNCORRECTABLE (1<<10) +#define ACPI_EINJ_PLATFORM_FATAL (1<<11) + /******************************************************************************* * - * ERST - Error Record Serialization Table + * ERST - Error Record Serialization Table (ACPI 4.0) + * Version 1 * ******************************************************************************/ @@ -239,19 +296,23 @@ struct acpi_erst_entry { struct acpi_whea_header whea_header; /* Common header for WHEA tables */ }; +/* Masks for Flags field above */ + +#define ACPI_ERST_PRESERVE (1) + /* Values for Action field above */ enum acpi_erst_actions { - ACPI_ERST_BEGIN_WRITE_OPERATION = 0, - ACPI_ERST_BEGIN_READ_OPERATION = 1, - ACPI_ERST_BETGIN_CLEAR_OPERATION = 2, - ACPI_ERST_END_OPERATION = 3, + ACPI_ERST_BEGIN_WRITE = 0, + ACPI_ERST_BEGIN_READ = 1, + ACPI_ERST_BEGIN_CLEAR = 2, + ACPI_ERST_END = 3, ACPI_ERST_SET_RECORD_OFFSET = 4, ACPI_ERST_EXECUTE_OPERATION = 5, ACPI_ERST_CHECK_BUSY_STATUS = 6, ACPI_ERST_GET_COMMAND_STATUS = 7, - ACPI_ERST_GET_RECORD_IDENTIFIER = 8, - ACPI_ERST_SET_RECORD_IDENTIFIER = 9, + ACPI_ERST_GET_RECORD_ID = 8, + ACPI_ERST_SET_RECORD_ID = 9, ACPI_ERST_GET_RECORD_COUNT = 10, ACPI_ERST_BEGIN_DUMMY_WRIITE = 11, ACPI_ERST_NOT_USED = 12, @@ -286,9 +347,29 @@ enum acpi_erst_instructions { ACPI_ERST_INSTRUCTION_RESERVED = 19 /* 19 and greater are reserved */ }; +/* Command status return values */ + +enum acpi_erst_command_status { + ACPI_ERST_SUCESS = 0, + ACPI_ERST_NO_SPACE = 1, + ACPI_ERST_NOT_AVAILABLE = 2, + ACPI_ERST_FAILURE = 3, + ACPI_ERST_RECORD_EMPTY = 4, + ACPI_ERST_NOT_FOUND = 5, + ACPI_ERST_STATUS_RESERVED = 6 /* 6 and greater are reserved */ +}; + +/* Error Record Serialization Information */ + +struct acpi_erst_info { + u16 signature; /* Should be "ER" */ + u8 data[48]; +}; + /******************************************************************************* * - * HEST - Hardware Error Source Table + * HEST - Hardware Error Source Table (ACPI 4.0) + * Version 1 * ******************************************************************************/ @@ -301,70 +382,49 @@ struct acpi_table_hest { struct acpi_hest_header { u16 type; + u16 source_id; }; /* Values for Type field above for subtables */ enum acpi_hest_types { - ACPI_HEST_TYPE_XPF_MACHINE_CHECK = 0, - ACPI_HEST_TYPE_XPF_CORRECTED_MACHINE_CHECK = 1, - ACPI_HEST_TYPE_XPF_UNUSED = 2, - ACPI_HEST_TYPE_XPF_NON_MASKABLE_INTERRUPT = 3, - ACPI_HEST_TYPE_IPF_CORRECTED_MACHINE_CHECK = 4, - ACPI_HEST_TYPE_IPF_CORRECTED_PLATFORM_ERROR = 5, + ACPI_HEST_TYPE_IA32_CHECK = 0, + ACPI_HEST_TYPE_IA32_CORRECTED_CHECK = 1, + ACPI_HEST_TYPE_IA32_NMI = 2, + ACPI_HEST_TYPE_NOT_USED3 = 3, + ACPI_HEST_TYPE_NOT_USED4 = 4, + ACPI_HEST_TYPE_NOT_USED5 = 5, ACPI_HEST_TYPE_AER_ROOT_PORT = 6, ACPI_HEST_TYPE_AER_ENDPOINT = 7, ACPI_HEST_TYPE_AER_BRIDGE = 8, - ACPI_HEST_TYPE_GENERIC_HARDWARE_ERROR_SOURCE = 9, + ACPI_HEST_TYPE_GENERIC_ERROR = 9, ACPI_HEST_TYPE_RESERVED = 10 /* 10 and greater are reserved */ }; /* - * HEST Sub-subtables + * HEST substructures contained in subtables */ -/* XPF Machine Check Error Bank */ - -struct acpi_hest_xpf_error_bank { +/* + * IA32 Error Bank(s) - Follows the struct acpi_hest_ia_machine_check and + * struct acpi_hest_ia_corrected structures. + */ +struct acpi_hest_ia_error_bank { u8 bank_number; u8 clear_status_on_init; u8 status_format; - u8 config_write_enable; + u8 reserved; u32 control_register; - u64 control_init_data; + u64 control_data; u32 status_register; u32 address_register; u32 misc_register; }; -/* Generic Error Status */ - -struct acpi_hest_generic_status { - u32 block_status; - u32 raw_data_offset; - u32 raw_data_length; - u32 data_length; - u32 error_severity; -}; - -/* Generic Error Data */ - -struct acpi_hest_generic_data { - u8 section_type[16]; - u32 error_severity; - u16 revision; - u8 validation_bits; - u8 flags; - u32 error_data_length; - u8 fru_id[16]; - u8 fru_text[20]; -}; - -/* Common HEST structure for PCI/AER types below (6,7,8) */ +/* Common HEST sub-structure for PCI/AER structures below (6,7,8) */ struct acpi_hest_aer_common { - u16 source_id; - u16 config_write_enable; + u16 reserved1; u8 flags; u8 enabled; u32 records_to_pre_allocate; @@ -373,13 +433,18 @@ struct acpi_hest_aer_common { u16 device; u16 function; u16 device_control; - u16 reserved; + u16 reserved2; u32 uncorrectable_error_mask; u32 uncorrectable_error_severity; u32 correctable_error_mask; u32 advanced_error_capabilities; }; +/* Masks for HEST Flags fields */ + +#define ACPI_HEST_FIRMWARE_FIRST (1) +#define ACPI_HEST_GLOBAL (1<<1) + /* Hardware Error Notification */ struct acpi_hest_notify { @@ -405,71 +470,59 @@ enum acpi_hest_notify_types { ACPI_HEST_NOTIFY_RESERVED = 5 /* 5 and greater are reserved */ }; +/* Values for config_write_enable bitfield above */ + +#define ACPI_HEST_TYPE (1) +#define ACPI_HEST_POLL_INTERVAL (1<<1) +#define ACPI_HEST_POLL_THRESHOLD_VALUE (1<<2) +#define ACPI_HEST_POLL_THRESHOLD_WINDOW (1<<3) +#define ACPI_HEST_ERR_THRESHOLD_VALUE (1<<4) +#define ACPI_HEST_ERR_THRESHOLD_WINDOW (1<<5) + /* * HEST subtables - * - * From WHEA Design Document, 16 May 2007. - * Note: There is no subtable type 2 in this version of the document, - * and there are two different subtable type 3s. */ - /* 0: XPF Machine Check Exception */ +/* 0: IA32 Machine Check Exception */ -struct acpi_hest_xpf_machine_check { +struct acpi_hest_ia_machine_check { struct acpi_hest_header header; - u16 source_id; - u16 config_write_enable; + u16 reserved1; u8 flags; - u8 reserved1; + u8 enabled; u32 records_to_pre_allocate; u32 max_sections_per_record; u64 global_capability_data; u64 global_control_data; u8 num_hardware_banks; - u8 reserved2[7]; + u8 reserved3[7]; }; -/* 1: XPF Corrected Machine Check */ +/* 1: IA32 Corrected Machine Check */ -struct acpi_table_hest_xpf_corrected { +struct acpi_table_hest_ia_corrected { struct acpi_hest_header header; - u16 source_id; - u16 config_write_enable; + u16 reserved1; u8 flags; u8 enabled; u32 records_to_pre_allocate; u32 max_sections_per_record; struct acpi_hest_notify notify; u8 num_hardware_banks; - u8 reserved[3]; + u8 reserved2[3]; }; -/* 3: XPF Non-Maskable Interrupt */ +/* 2: IA32 Non-Maskable Interrupt */ -struct acpi_hest_xpf_nmi { +struct acpi_hest_ia_nmi { struct acpi_hest_header header; - u16 source_id; u32 reserved; u32 records_to_pre_allocate; u32 max_sections_per_record; u32 max_raw_data_length; }; -/* 4: IPF Corrected Machine Check */ - -struct acpi_hest_ipf_corrected { - struct acpi_hest_header header; - u8 enabled; - u8 reserved; -}; - -/* 5: IPF Corrected Platform Error */ - -struct acpi_hest_ipf_corrected_platform { - struct acpi_hest_header header; - u8 enabled; - u8 reserved; -}; +/* 3,4,5: Not used */ /* 6: PCI Express Root Port AER */ @@ -491,30 +544,61 @@ struct acpi_hest_aer { struct acpi_hest_aer_bridge { struct acpi_hest_header header; struct acpi_hest_aer_common aer; - u32 secondary_uncorrectable_error_mask; - u32 secondary_uncorrectable_error_severity; - u32 secondary_advanced_capabilities; + u32 second_uncorrectable_error_mask; + u32 second_uncorrectable_error_severity; + u32 second_advanced_capabilities; }; /* 9: Generic Hardware Error Source */ struct acpi_hest_generic { struct acpi_hest_header header; - u16 source_id; u16 related_source_id; - u8 config_write_enable; + u8 reserved; u8 enabled; u32 records_to_pre_allocate; u32 max_sections_per_record; u32 max_raw_data_length; struct acpi_generic_address error_status_address; struct acpi_hest_notify notify; - u32 error_status_block_length; + u32 error_block_length; +}; + +/* Generic Error Status block */ + +struct acpi_hest_generic_status { + u32 block_status; + u32 raw_data_offset; + u32 raw_data_length; + u32 data_length; + u32 error_severity; +}; + +/* Values for block_status flags above */ + +#define ACPI_HEST_UNCORRECTABLE (1) +#define ACPI_HEST_CORRECTABLE (1<<1) +#define ACPI_HEST_MULTIPLE_UNCORRECTABLE (1<<2) +#define ACPI_HEST_MULTIPLE_CORRECTABLE (1<<3) +#define ACPI_HEST_ERROR_ENTRY_COUNT (0xFF<<4) /* 8 bits, error count */ + +/* Generic Error Data entry */ + +struct acpi_hest_generic_data { + u8 section_type[16]; + u32 error_severity; + u16 revision; + u8 validation_bits; + u8 flags; + u32 error_data_length; + u8 fru_id[16]; + u8 fru_text[20]; }; /******************************************************************************* * * MADT - Multiple APIC Description Table + * Version 3 * ******************************************************************************/ @@ -524,16 +608,16 @@ struct acpi_table_madt { u32 flags; }; -/* Flags */ +/* Masks for Flags field above */ -#define ACPI_MADT_PCAT_COMPAT (1) /* 00: System also has dual 8259s */ +#define ACPI_MADT_PCAT_COMPAT (1) /* 00: System also has dual 8259s */ /* Values for PCATCompat flag */ #define ACPI_MADT_DUAL_PIC 0 #define ACPI_MADT_MULTIPLE_APIC 1 -/* Values for subtable type in struct acpi_subtable_header */ +/* Values for MADT subtable type in struct acpi_subtable_header */ enum acpi_madt_type { ACPI_MADT_TYPE_LOCAL_APIC = 0, @@ -644,7 +728,7 @@ struct acpi_madt_interrupt_source { u32 flags; /* Interrupt Source Flags */ }; -/* Flags field above */ +/* Masks for Flags field above */ #define ACPI_MADT_CPEI_OVERRIDE (1) @@ -693,9 +777,36 @@ struct acpi_madt_local_x2apic_nmi { #define ACPI_MADT_TRIGGER_RESERVED (2<<2) #define ACPI_MADT_TRIGGER_LEVEL (3<<2) +/******************************************************************************* + * + * MSCT - Maximum System Characteristics Table (ACPI 4.0) + * Version 1 + * + ******************************************************************************/ + +struct acpi_table_msct { + struct acpi_table_header header; /* Common ACPI table header */ + u32 proximity_offset; /* Location of proximity info struct(s) */ + u32 max_proximity_domains; /* Max number of proximity domains */ + u32 max_clock_domains; /* Max number of clock domains */ + u64 max_address; /* Max physical address in system */ +}; + +/* Subtable - Maximum Proximity Domain Information. Version 1 */ + +struct acpi_msct_proximity { + u8 revision; + u8 length; + u32 range_start; /* Start of domain range */ + u32 range_end; /* End of domain range */ + u32 processor_capacity; + u64 memory_capacity; /* In bytes */ +}; + /******************************************************************************* * * SBST - Smart Battery Specification Table + * Version 1 * ******************************************************************************/ @@ -709,6 +820,7 @@ struct acpi_table_sbst { /******************************************************************************* * * SLIT - System Locality Distance Information Table + * Version 1 * ******************************************************************************/ @@ -721,6 +833,7 @@ struct acpi_table_slit { /******************************************************************************* * * SRAT - System Resource Affinity Table + * Version 3 * ******************************************************************************/ @@ -755,6 +868,10 @@ struct acpi_srat_cpu_affinity { u32 reserved; /* Reserved, must be zero */ }; +/* Flags */ + +#define ACPI_SRAT_CPU_USE_AFFINITY (1) /* 00: Use affinity structure */ + /* 1: Memory Affinity */ struct acpi_srat_mem_affinity { diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index b271aba0e524..6f3dce9991e1 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -14,8 +14,9 @@ ******************************************************************************/ /* - * Values for description table header signatures. Useful because they make - * it more difficult to inadvertently type in the wrong signature. + * Values for description table header signatures for tables defined in this + * file. Useful because they make it more difficult to inadvertently type in + * the wrong signature. */ #define ACPI_SIG_ASF "ASF!" /* Alert Standard Format table */ #define ACPI_SIG_BOOT "BOOT" /* Simple Boot Flag Table */ @@ -23,12 +24,14 @@ #define ACPI_SIG_DMAR "DMAR" /* DMA Remapping table */ #define ACPI_SIG_HPET "HPET" /* High Precision Event Timer table */ #define ACPI_SIG_IBFT "IBFT" /* i_sCSI Boot Firmware Table */ +#define ACPI_SIG_IVRS "IVRS" /* I/O Virtualization Reporting Structure */ #define ACPI_SIG_MCFG "MCFG" /* PCI Memory Mapped Configuration table */ #define ACPI_SIG_SLIC "SLIC" /* Software Licensing Description Table */ #define ACPI_SIG_SPCR "SPCR" /* Serial Port Console Redirection table */ #define ACPI_SIG_SPMI "SPMI" /* Server Platform Management Interface table */ #define ACPI_SIG_TCPA "TCPA" /* Trusted Computing Platform Alliance table */ #define ACPI_SIG_UEFI "UEFI" /* Uefi Boot Optimization Table */ +#define ACPI_SIG_WAET "WAET" /* Windows ACPI Emulated devices Table */ #define ACPI_SIG_WDAT "WDAT" /* Watchdog Action Table */ #define ACPI_SIG_WDRT "WDRT" /* Watchdog Resource Table */ @@ -47,6 +50,7 @@ /******************************************************************************* * * ASF - Alert Standard Format table (Signature "ASF!") + * Revision 0x10 * * Conforms to the Alert Standard Format Specification V2.0, 23 April 2003 * @@ -91,6 +95,10 @@ struct acpi_asf_info { u8 reserved2[3]; }; +/* Masks for Flags field above */ + +#define ACPI_ASF_SMBUS_PROTOCOLS (1) + /* 1: ASF Alerts */ struct acpi_asf_alert { @@ -156,6 +164,9 @@ struct acpi_asf_address { /******************************************************************************* * * BOOT - Simple Boot Flag Table + * Version 1 + * + * Conforms to the "Simple Boot Flag Specification", Version 2.1 * ******************************************************************************/ @@ -168,6 +179,9 @@ struct acpi_table_boot { /******************************************************************************* * * DBGP - Debug Port table + * Version 1 + * + * Conforms to the "Debug Port Specification", Version 1.00, 2/9/2000 * ******************************************************************************/ @@ -181,7 +195,10 @@ struct acpi_table_dbgp { /******************************************************************************* * * DMAR - DMA Remapping table - * From "Intel Virtualization Technology for Directed I/O", Sept. 2007 + * Version 1 + * + * Conforms to "Intel Virtualization Technology for Directed I/O", + * Version 1.2, Sept. 2008 * ******************************************************************************/ @@ -192,7 +209,7 @@ struct acpi_table_dmar { u8 reserved[10]; }; -/* Flags */ +/* Masks for Flags field above */ #define ACPI_DMAR_INTR_REMAP (1) @@ -209,9 +226,12 @@ enum acpi_dmar_type { ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, ACPI_DMAR_TYPE_ATSR = 2, - ACPI_DMAR_TYPE_RESERVED = 3 /* 3 and greater are reserved */ + ACPI_DMAR_HARDWARE_AFFINITY = 3, + ACPI_DMAR_TYPE_RESERVED = 4 /* 4 and greater are reserved */ }; +/* DMAR Device Scope structure */ + struct acpi_dmar_device_scope { u8 entry_type; u8 length; @@ -250,7 +270,7 @@ struct acpi_dmar_hardware_unit { u64 address; /* Register Base Address */ }; -/* Flags */ +/* Masks for Flags field above */ #define ACPI_DMAR_INCLUDE_ALL (1) @@ -264,7 +284,7 @@ struct acpi_dmar_reserved_memory { u64 end_address; /* 4_k aligned limit address */ }; -/* Flags */ +/* Masks for Flags field above */ #define ACPI_DMAR_ALLOW_ALL (1) @@ -277,13 +297,26 @@ struct acpi_dmar_atsr { u16 segment; }; -/* Flags */ +/* Masks for Flags field above */ #define ACPI_DMAR_ALL_PORTS (1) +/* 3: Remapping Hardware Static Affinity Structure */ + +struct acpi_dmar_rhsa { + struct acpi_dmar_header header; + u32 reserved; + u64 base_address; + u32 proximity_domain; +}; + /******************************************************************************* * * HPET - High Precision Event Timer table + * Version 1 + * + * Conforms to "IA-PC HPET (High Precision Event Timers) Specification", + * Version 1.0a, October 2004 * ******************************************************************************/ @@ -296,17 +329,28 @@ struct acpi_table_hpet { u8 flags; }; -/*! Flags */ +/* Masks for Flags field above */ -#define ACPI_HPET_PAGE_PROTECT (1) /* 00: No page protection */ -#define ACPI_HPET_PAGE_PROTECT_4 (1<<1) /* 01: 4KB page protected */ -#define ACPI_HPET_PAGE_PROTECT_64 (1<<2) /* 02: 64KB page protected */ +#define ACPI_HPET_PAGE_PROTECT_MASK (3) -/*! [End] no source code translation !*/ +/* Values for Page Protect flags */ + +enum acpi_hpet_page_protect { + ACPI_HPET_NO_PAGE_PROTECT = 0, + ACPI_HPET_PAGE_PROTECT4 = 1, + ACPI_HPET_PAGE_PROTECT64 = 2 +}; /******************************************************************************* * * IBFT - Boot Firmware Table + * Version 1 + * + * Conforms to "iSCSI Boot Firmware Table (iBFT) as Defined in ACPI 3.0b + * Specification", Version 1.01, March 1, 2007 + * + * Note: It appears that this table is not intended to appear in the RSDT/XSDT. + * Therefore, it is not currently supported by the disassembler. * ******************************************************************************/ @@ -394,9 +438,184 @@ struct acpi_ibft_target { u16 reverse_chap_secret_offset; }; +/******************************************************************************* + * + * IVRS - I/O Virtualization Reporting Structure + * Version 1 + * + * Conforms to "AMD I/O Virtualization Technology (IOMMU) Specification", + * Revision 1.26, February 2009. + * + ******************************************************************************/ + +struct acpi_table_ivrs { + struct acpi_table_header header; /* Common ACPI table header */ + u32 info; /* Common virtualization info */ + u64 reserved; +}; + +/* Values for Info field above */ + +#define ACPI_IVRS_PHYSICAL_SIZE 0x00007F00 /* 7 bits, physical address size */ +#define ACPI_IVRS_VIRTUAL_SIZE 0x003F8000 /* 7 bits, virtual address size */ +#define ACPI_IVRS_ATS_RESERVED 0x00400000 /* ATS address translation range reserved */ + +/* IVRS subtable header */ + +struct acpi_ivrs_header { + u8 type; /* Subtable type */ + u8 flags; + u16 length; /* Subtable length */ + u16 device_id; /* ID of IOMMU */ +}; + +/* Values for subtable Type above */ + +enum acpi_ivrs_type { + ACPI_IVRS_TYPE_HARDWARE = 0x10, + ACPI_IVRS_TYPE_MEMORY1 = 0x20, + ACPI_IVRS_TYPE_MEMORY2 = 0x21, + ACPI_IVRS_TYPE_MEMORY3 = 0x22 +}; + +/* Masks for Flags field above for IVHD subtable */ + +#define ACPI_IVHD_TT_ENABLE (1) +#define ACPI_IVHD_PASS_PW (1<<1) +#define ACPI_IVHD_RES_PASS_PW (1<<2) +#define ACPI_IVHD_ISOC (1<<3) +#define ACPI_IVHD_IOTLB (1<<4) + +/* Masks for Flags field above for IVMD subtable */ + +#define ACPI_IVMD_UNITY (1) +#define ACPI_IVMD_READ (1<<1) +#define ACPI_IVMD_WRITE (1<<2) +#define ACPI_IVMD_EXCLUSION_RANGE (1<<3) + +/* + * IVRS subtables, correspond to Type in struct acpi_ivrs_header + */ + +/* 0x10: I/O Virtualization Hardware Definition Block (IVHD) */ + +struct acpi_ivrs_hardware { + struct acpi_ivrs_header header; + u16 capability_offset; /* Offset for IOMMU control fields */ + u64 base_address; /* IOMMU control registers */ + u16 pci_segment_group; + u16 info; /* MSI number and unit ID */ + u32 reserved; +}; + +/* Masks for Info field above */ + +#define ACPI_IVHD_MSI_NUMBER_MASK 0x001F /* 5 bits, MSI message number */ +#define ACPI_IVHD_UNIT_ID_MASK 0x1F00 /* 5 bits, unit_iD */ + +/* + * Device Entries for IVHD subtable, appear after struct acpi_ivrs_hardware structure. + * Upper two bits of the Type field are the (encoded) length of the structure. + * Currently, only 4 and 8 byte entries are defined. 16 and 32 byte entries + * are reserved for future use but not defined. + */ +struct acpi_ivrs_de_header { + u8 type; + u16 id; + u8 data_setting; +}; + +/* Length of device entry is in the top two bits of Type field above */ + +#define ACPI_IVHD_ENTRY_LENGTH 0xC0 + +/* Values for device entry Type field above */ + +enum acpi_ivrs_device_entry_type { + /* 4-byte device entries, all use struct acpi_ivrs_device4 */ + + ACPI_IVRS_TYPE_PAD4 = 0, + ACPI_IVRS_TYPE_ALL = 1, + ACPI_IVRS_TYPE_SELECT = 2, + ACPI_IVRS_TYPE_START = 3, + ACPI_IVRS_TYPE_END = 4, + + /* 8-byte device entries */ + + ACPI_IVRS_TYPE_PAD8 = 64, + ACPI_IVRS_TYPE_NOT_USED = 65, + ACPI_IVRS_TYPE_ALIAS_SELECT = 66, /* Uses struct acpi_ivrs_device8a */ + ACPI_IVRS_TYPE_ALIAS_START = 67, /* Uses struct acpi_ivrs_device8a */ + ACPI_IVRS_TYPE_EXT_SELECT = 70, /* Uses struct acpi_ivrs_device8b */ + ACPI_IVRS_TYPE_EXT_START = 71, /* Uses struct acpi_ivrs_device8b */ + ACPI_IVRS_TYPE_SPECIAL = 72 /* Uses struct acpi_ivrs_device8c */ +}; + +/* Values for Data field above */ + +#define ACPI_IVHD_INIT_PASS (1) +#define ACPI_IVHD_EINT_PASS (1<<1) +#define ACPI_IVHD_NMI_PASS (1<<2) +#define ACPI_IVHD_SYSTEM_MGMT (3<<4) +#define ACPI_IVHD_LINT0_PASS (1<<6) +#define ACPI_IVHD_LINT1_PASS (1<<7) + +/* Types 0-4: 4-byte device entry */ + +struct acpi_ivrs_device4 { + struct acpi_ivrs_de_header header; +}; + +/* Types 66-67: 8-byte device entry */ + +struct acpi_ivrs_device8a { + struct acpi_ivrs_de_header header; + u8 reserved1; + u16 used_id; + u8 reserved2; +}; + +/* Types 70-71: 8-byte device entry */ + +struct acpi_ivrs_device8b { + struct acpi_ivrs_de_header header; + u32 extended_data; +}; + +/* Values for extended_data above */ + +#define ACPI_IVHD_ATS_DISABLED (1<<31) + +/* Type 72: 8-byte device entry */ + +struct acpi_ivrs_device8c { + struct acpi_ivrs_de_header header; + u8 handle; + u16 used_id; + u8 variety; +}; + +/* Values for Variety field above */ + +#define ACPI_IVHD_IOAPIC 1 +#define ACPI_IVHD_HPET 2 + +/* 0x20, 0x21, 0x22: I/O Virtualization Memory Definition Block (IVMD) */ + +struct acpi_ivrs_memory { + struct acpi_ivrs_header header; + u16 aux_data; + u64 reserved; + u64 start_address; + u64 memory_length; +}; + /******************************************************************************* * * MCFG - PCI Memory Mapped Configuration table and sub-table + * Version 1 + * + * Conforms to "PCI Firmware Specification", Revision 3.0, June 20, 2005 * ******************************************************************************/ @@ -418,6 +637,10 @@ struct acpi_mcfg_allocation { /******************************************************************************* * * SPCR - Serial Port Console Redirection table + * Version 1 + * + * Conforms to "Serial Port Console Redirection Table", + * Version 1.00, January 11, 2002 * ******************************************************************************/ @@ -445,16 +668,25 @@ struct acpi_table_spcr { u32 reserved2; }; +/* Masks for pci_flags field above */ + +#define ACPI_SPCR_DO_NOT_DISABLE (1) + /******************************************************************************* * * SPMI - Server Platform Management Interface table + * Version 5 + * + * Conforms to "Intelligent Platform Management Interface Specification + * Second Generation v2.0", Document Revision 1.0, February 12, 2004 with + * June 12, 2009 markup. * ******************************************************************************/ struct acpi_table_spmi { struct acpi_table_header header; /* Common ACPI table header */ - u8 reserved; u8 interface_type; + u8 reserved; /* Must be 1 */ u16 spec_revision; /* Version of IPMI */ u8 interrupt_type; u8 gpe_number; /* GPE assigned */ @@ -466,11 +698,27 @@ struct acpi_table_spmi { u8 pci_bus; u8 pci_device; u8 pci_function; + u8 reserved2; +}; + +/* Values for interface_type above */ + +enum acpi_spmi_interface_types { + ACPI_SPMI_NOT_USED = 0, + ACPI_SPMI_KEYBOARD = 1, + ACPI_SPMI_SMI = 2, + ACPI_SPMI_BLOCK_TRANSFER = 3, + ACPI_SPMI_SMBUS = 4, + ACPI_SPMI_RESERVED = 5 /* 5 and above are reserved */ }; /******************************************************************************* * * TCPA - Trusted Computing Platform Alliance table + * Version 1 + * + * Conforms to "TCG PC Specific Implementation Specification", + * Version 1.1, August 18, 2003 * ******************************************************************************/ @@ -484,6 +732,10 @@ struct acpi_table_tcpa { /******************************************************************************* * * UEFI - UEFI Boot optimization Table + * Version 1 + * + * Conforms to "Unified Extensible Firmware Interface Specification", + * Version 2.3, May 8, 2009 * ******************************************************************************/ @@ -491,12 +743,34 @@ struct acpi_table_uefi { struct acpi_table_header header; /* Common ACPI table header */ u8 identifier[16]; /* UUID identifier */ u16 data_offset; /* Offset of remaining data in table */ - u8 data; }; +/******************************************************************************* + * + * WAET - Windows ACPI Emulated devices Table + * Version 1 + * + * Conforms to "Windows ACPI Emulated Devices Table", version 1.0, April 6, 2009 + * + ******************************************************************************/ + +struct acpi_table_waet { + struct acpi_table_header header; /* Common ACPI table header */ + u32 flags; +}; + +/* Masks for Flags field above */ + +#define ACPI_WAET_RTC_NO_ACK (1) /* RTC requires no int acknowledge */ +#define ACPI_WAET_TIMER_ONE_READ (1<<1) /* PM timer requires only one read */ + /******************************************************************************* * * WDAT - Watchdog Action Table + * Version 1 + * + * Conforms to "Hardware Watchdog Timers Design Specification", + * Copyright 2006 Microsoft Corporation. * ******************************************************************************/ @@ -516,10 +790,20 @@ struct acpi_table_wdat { u32 entries; /* Number of watchdog entries that follow */ }; +/* Masks for Flags field above */ + +#define ACPI_WDAT_ENABLED (1) +#define ACPI_WDAT_STOPPED 0x80 + /* WDAT Instruction Entries (actions) */ struct acpi_wdat_entry { - struct acpi_whea_header whea_header; /* Common header for WHEA tables */ + u8 action; + u8 instruction; + u16 reserved; + struct acpi_generic_address register_region; + u32 value; /* Value used with Read/Write register */ + u32 mask; /* Bitmask required for this register instruction */ }; /* Values for Action field above */ @@ -556,28 +840,27 @@ enum acpi_wdat_instructions { /******************************************************************************* * * WDRT - Watchdog Resource Table + * Version 1 + * + * Conforms to "Watchdog Timer Hardware Requirements for Windows Server 2003", + * Version 1.01, August 28, 2006 * ******************************************************************************/ struct acpi_table_wdrt { struct acpi_table_header header; /* Common ACPI table header */ - u32 header_length; /* Watchdog Header Length */ - u8 pci_segment; /* PCI Segment number */ + struct acpi_generic_address control_register; + struct acpi_generic_address count_register; + u16 pci_device_id; + u16 pci_vendor_id; u8 pci_bus; /* PCI Bus number */ u8 pci_device; /* PCI Device number */ u8 pci_function; /* PCI Function number */ - u32 timer_period; /* Period of one timer count (msec) */ - u32 max_count; /* Maximum counter value supported */ - u32 min_count; /* Minimum counter value */ - u8 flags; - u8 reserved[3]; - u32 entries; /* Number of watchdog entries that follow */ + u8 pci_segment; /* PCI Segment number */ + u16 max_count; /* Maximum counter value supported */ + u8 units; }; -/* Flags */ - -#define ACPI_WDRT_TIMER_ENABLED (1) /* 00: Timer enabled */ - /* Reset to default packing */ #pragma pack() -- cgit v1.2.3 From c276e3884163355464a76e60ed9e272b52b4acc2 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Mon, 27 Jul 2009 14:55:02 +0800 Subject: ACPICA: Update definitions for HEST table Eliminate duplicated code in disassembler. Shorten identifiers that were too long. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/actbl1.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 0417f2abc44b..34b10c06bcfd 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -427,17 +427,17 @@ struct acpi_hest_aer_common { u16 reserved1; u8 flags; u8 enabled; - u32 records_to_pre_allocate; + u32 records_to_preallocate; u32 max_sections_per_record; u32 bus; u16 device; u16 function; u16 device_control; u16 reserved2; - u32 uncorrectable_error_mask; - u32 uncorrectable_error_severity; - u32 correctable_error_mask; - u32 advanced_error_capabilities; + u32 uncorrectable_mask; + u32 uncorrectable_severity; + u32 correctable_mask; + u32 advanced_capabilities; }; /* Masks for HEST Flags fields */ @@ -490,7 +490,7 @@ struct acpi_hest_ia_machine_check { u16 reserved1; u8 flags; u8 enabled; - u32 records_to_pre_allocate; + u32 records_to_preallocate; u32 max_sections_per_record; u64 global_capability_data; u64 global_control_data; @@ -505,7 +505,7 @@ struct acpi_table_hest_ia_corrected { u16 reserved1; u8 flags; u8 enabled; - u32 records_to_pre_allocate; + u32 records_to_preallocate; u32 max_sections_per_record; struct acpi_hest_notify notify; u8 num_hardware_banks; @@ -517,7 +517,7 @@ struct acpi_table_hest_ia_corrected { struct acpi_hest_ia_nmi { struct acpi_hest_header header; u32 reserved; - u32 records_to_pre_allocate; + u32 records_to_preallocate; u32 max_sections_per_record; u32 max_raw_data_length; }; @@ -544,9 +544,9 @@ struct acpi_hest_aer { struct acpi_hest_aer_bridge { struct acpi_hest_header header; struct acpi_hest_aer_common aer; - u32 second_uncorrectable_error_mask; - u32 second_uncorrectable_error_severity; - u32 second_advanced_capabilities; + u32 uncorrectable_mask2; + u32 uncorrectable_severity2; + u32 advanced_capabilities2; }; /* 9: Generic Hardware Error Source */ @@ -556,7 +556,7 @@ struct acpi_hest_generic { u16 related_source_id; u8 reserved; u8 enabled; - u32 records_to_pre_allocate; + u32 records_to_preallocate; u32 max_sections_per_record; u32 max_raw_data_length; struct acpi_generic_address error_status_address; -- cgit v1.2.3 From 1872bbc94b2d092ece22a8fbf1c3e81f0fba0052 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Thu, 13 Aug 2009 13:31:00 +0800 Subject: ACPICA: Fix typo for HEST ACPI table Problem with the name of one of the subtables. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/actbl1.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 34b10c06bcfd..0b9b430b092b 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -500,7 +500,7 @@ struct acpi_hest_ia_machine_check { /* 1: IA32 Corrected Machine Check */ -struct acpi_table_hest_ia_corrected { +struct acpi_hest_ia_corrected { struct acpi_hest_header header; u16 reserved1; u8 flags; -- cgit v1.2.3 From 49ae80c9944401222e47108883c486b5a5a24006 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Thu, 13 Aug 2009 13:43:12 +0800 Subject: ACPICA: Update version to 20090730 Version 20090730. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 063e577e791e..f3b358b7432f 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20090625 +#define ACPI_CA_VERSION 0x20090730 #include "actypes.h" #include "actbl.h" -- cgit v1.2.3 From a192a9580bcc41692be1f36b77c3b681827f566a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 28 Jul 2009 16:45:54 -0400 Subject: ACPI: Move definition of PREFIX from acpi_bus.h to internal..h Linux/ACPI core files using internal.h all PREFIX "ACPI: ", however, not all ACPI drivers use/want it -- and they should not have to #undef PREFIX to define their own. Add GPL commment to internal.h while we are there. This does not change any actual console output, asside from a whitespace fix. Signed-off-by: Len Brown --- arch/x86/pci/mmconfig-shared.c | 2 ++ drivers/acpi/ac.c | 2 ++ drivers/acpi/battery.c | 2 ++ drivers/acpi/blacklist.c | 2 ++ drivers/acpi/button.c | 2 ++ drivers/acpi/cm_sbs.c | 2 ++ drivers/acpi/container.c | 2 ++ drivers/acpi/dock.c | 2 ++ drivers/acpi/ec.c | 1 - drivers/acpi/event.c | 2 ++ drivers/acpi/fan.c | 2 ++ drivers/acpi/glue.c | 2 ++ drivers/acpi/internal.h | 22 +++++++++++++++++++++- drivers/acpi/numa.c | 2 ++ drivers/acpi/pci_irq.c | 2 ++ drivers/acpi/pci_link.c | 2 ++ drivers/acpi/pci_root.c | 2 ++ drivers/acpi/power.c | 2 ++ drivers/acpi/processor_core.c | 2 ++ drivers/acpi/processor_idle.c | 2 ++ drivers/acpi/processor_perflib.c | 2 ++ drivers/acpi/processor_thermal.c | 2 ++ drivers/acpi/processor_throttling.c | 2 ++ drivers/acpi/sbs.c | 2 ++ drivers/acpi/sbshc.c | 2 ++ drivers/acpi/system.c | 2 ++ drivers/acpi/thermal.c | 2 ++ drivers/acpi/utils.c | 2 ++ drivers/acpi/video.c | 2 ++ drivers/acpi/video_detect.c | 2 ++ drivers/pci/dmar.c | 3 +-- drivers/platform/x86/fujitsu-laptop.c | 4 ++-- drivers/platform/x86/wmi.c | 1 - include/acpi/acpi_bus.h | 2 -- 34 files changed, 80 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 712443ec6d43..81d3466765ca 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -18,6 +18,8 @@ #include #include +#define PREFIX "ACPI: " + /* aperture is up to 256MB but BIOS may reserve less */ #define MMCONFIG_APER_MIN (2 * 1024*1024) #define MMCONFIG_APER_MAX (256 * 1024*1024) diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index 0df8fcb687d6..98b9690b0159 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -37,6 +37,8 @@ #include #include +#define PREFIX "ACPI: " + #define ACPI_AC_CLASS "ac_adapter" #define ACPI_AC_DEVICE_NAME "AC Adapter" #define ACPI_AC_FILE_STATE "state" diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 58b4517ce712..f8c3d1bb6969 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -45,6 +45,8 @@ #include #endif +#define PREFIX "ACPI: " + #define ACPI_BATTERY_VALUE_UNKNOWN 0xFFFFFFFF #define ACPI_BATTERY_CLASS "battery" diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index f6baa77deefb..19152ea2b104 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -34,6 +34,8 @@ #include #include +#include "internal.h" + enum acpi_blacklist_predicates { all_versions, less_than_or_equal, diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 9195deba9d94..d295bdccc09c 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -33,6 +33,8 @@ #include #include +#define PREFIX "ACPI: " + #define ACPI_BUTTON_CLASS "button" #define ACPI_BUTTON_FILE_INFO "info" #define ACPI_BUTTON_FILE_STATE "state" diff --git a/drivers/acpi/cm_sbs.c b/drivers/acpi/cm_sbs.c index 332fe4b21708..6c9ee68e46fb 100644 --- a/drivers/acpi/cm_sbs.c +++ b/drivers/acpi/cm_sbs.c @@ -28,6 +28,8 @@ #include #include +#define PREFIX "ACPI: " + ACPI_MODULE_NAME("cm_sbs"); #define ACPI_AC_CLASS "ac_adapter" #define ACPI_BATTERY_CLASS "battery" diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c index fe0cdf83641a..5f2c3c00a315 100644 --- a/drivers/acpi/container.c +++ b/drivers/acpi/container.c @@ -35,6 +35,8 @@ #include #include +#define PREFIX "ACPI: " + #define ACPI_CONTAINER_DEVICE_NAME "ACPI container device" #define ACPI_CONTAINER_CLASS "container" diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index efb959d6c8a9..9a855669ff12 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -33,6 +33,8 @@ #include #include +#define PREFIX "ACPI: " + #define ACPI_DOCK_DRIVER_DESCRIPTION "ACPI Dock Station Driver" ACPI_MODULE_NAME("dock"); diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 391f331674c7..5180f0f1dd02 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -47,7 +47,6 @@ #define ACPI_EC_DEVICE_NAME "Embedded Controller" #define ACPI_EC_FILE_INFO "info" -#undef PREFIX #define PREFIX "ACPI: EC: " /* EC status register */ diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c index aeb7e5fb4a04..c511071bfd79 100644 --- a/drivers/acpi/event.c +++ b/drivers/acpi/event.c @@ -14,6 +14,8 @@ #include #include +#include "internal.h" + #define _COMPONENT ACPI_SYSTEM_COMPONENT ACPI_MODULE_NAME("event"); diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index 53698ea08371..f419849a0d3f 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -34,6 +34,8 @@ #include #include +#define PREFIX "ACPI: " + #define ACPI_FAN_CLASS "fan" #define ACPI_FAN_FILE_STATE "state" diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index a8a5c29958c8..dc36a448de43 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -12,6 +12,8 @@ #include #include +#include "internal.h" + #define ACPI_GLUE_DEBUG 0 #if ACPI_GLUE_DEBUG #define DBG(x...) printk(PREFIX x) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 11a69b53004e..074cf8682d52 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -1,4 +1,24 @@ -/* For use by Linux/ACPI infrastructure, not drivers */ +/* + * acpi/internal.h + * For use by Linux/ACPI infrastructure, not drivers + * + * Copyright (c) 2009, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define PREFIX "ACPI: " int init_acpi_device_notify(void); int acpi_scan_init(void); diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index d440ccd27d91..202dd0c976a3 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -30,6 +30,8 @@ #include #include +#define PREFIX "ACPI: " + #define ACPI_NUMA 0x80000000 #define _COMPONENT ACPI_NUMA ACPI_MODULE_NAME("numa"); diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index b794eb88ab90..843699ed93f2 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -40,6 +40,8 @@ #include #include +#define PREFIX "ACPI: " + #define _COMPONENT ACPI_PCI_COMPONENT ACPI_MODULE_NAME("pci_irq"); diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 16e0f9d3d17c..394ae89409c2 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -43,6 +43,8 @@ #include #include +#define PREFIX "ACPI: " + #define _COMPONENT ACPI_PCI_COMPONENT ACPI_MODULE_NAME("pci_link"); #define ACPI_PCI_LINK_CLASS "pci_irq_routing" diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 55b5b90c2a44..dee916707a7d 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -36,6 +36,8 @@ #include #include +#define PREFIX "ACPI: " + #define _COMPONENT ACPI_PCI_COMPONENT ACPI_MODULE_NAME("pci_root"); #define ACPI_PCI_ROOT_CLASS "pci_bridge" diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index d74365d4a6e7..e86603f37dee 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -44,6 +44,8 @@ #include #include +#define PREFIX "ACPI: " + #define _COMPONENT ACPI_POWER_COMPONENT ACPI_MODULE_NAME("power"); #define ACPI_POWER_CLASS "power_resource" diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 2cc4b3033872..b4a1ab297e7b 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -59,6 +59,8 @@ #include #include +#define PREFIX "ACPI: " + #define ACPI_PROCESSOR_CLASS "processor" #define ACPI_PROCESSOR_DEVICE_NAME "Processor" #define ACPI_PROCESSOR_FILE_INFO "info" diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 66393d5c4c7c..22aab1fc9b45 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -60,6 +60,8 @@ #include #include +#define PREFIX "ACPI: " + #define ACPI_PROCESSOR_CLASS "processor" #define _COMPONENT ACPI_PROCESSOR_COMPONENT ACPI_MODULE_NAME("processor_idle"); diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 60e543d3234e..11088cf10319 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -39,6 +39,8 @@ #include #include +#define PREFIX "ACPI: " + #define ACPI_PROCESSOR_CLASS "processor" #define ACPI_PROCESSOR_FILE_PERFORMANCE "performance" #define _COMPONENT ACPI_PROCESSOR_COMPONENT diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index 31adda1099e0..3e3181c0efc3 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -40,6 +40,8 @@ #include #include +#define PREFIX "ACPI: " + #define ACPI_PROCESSOR_CLASS "processor" #define _COMPONENT ACPI_PROCESSOR_COMPONENT ACPI_MODULE_NAME("processor_thermal"); diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index ae39797aab55..b366b9c13d4d 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -41,6 +41,8 @@ #include #include +#define PREFIX "ACPI: " + #define ACPI_PROCESSOR_CLASS "processor" #define _COMPONENT ACPI_PROCESSOR_COMPONENT ACPI_MODULE_NAME("processor_throttling"); diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index 4b214b74ebaa..52b9db8afc20 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -46,6 +46,8 @@ #include "sbshc.h" +#define PREFIX "ACPI: " + #define ACPI_SBS_CLASS "sbs" #define ACPI_AC_CLASS "ac_adapter" #define ACPI_BATTERY_CLASS "battery" diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index 0619734895b2..d9339806df45 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -15,6 +15,8 @@ #include #include "sbshc.h" +#define PREFIX "ACPI: " + #define ACPI_SMB_HC_CLASS "smbus_host_controller" #define ACPI_SMB_HC_DEVICE_NAME "ACPI SMBus HC" diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c index 9c61ab2177cf..d11282975f35 100644 --- a/drivers/acpi/system.c +++ b/drivers/acpi/system.c @@ -31,6 +31,8 @@ #include +#define PREFIX "ACPI: " + #define _COMPONENT ACPI_SYSTEM_COMPONENT ACPI_MODULE_NAME("system"); diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 564ea1424288..65f67815902a 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -47,6 +47,8 @@ #include #include +#define PREFIX "ACPI: " + #define ACPI_THERMAL_CLASS "thermal_zone" #define ACPI_THERMAL_DEVICE_NAME "Thermal Zone" #define ACPI_THERMAL_FILE_STATE "state" diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index f844941089bb..811fec10462b 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -30,6 +30,8 @@ #include #include +#include "internal.h" + #define _COMPONENT ACPI_BUS_COMPONENT ACPI_MODULE_NAME("utils"); diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 8851315ce858..a0fa3946b507 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -44,6 +44,8 @@ #include #include +#define PREFIX "ACPI: " + #define ACPI_VIDEO_CLASS "video" #define ACPI_VIDEO_BUS_NAME "Video Bus" #define ACPI_VIDEO_DEVICE_NAME "Video Device" diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 7cd2b63435ea..7032f25da9b5 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -38,6 +38,8 @@ #include #include +#define PREFIX "ACPI: " + ACPI_MODULE_NAME("video"); #define _COMPONENT ACPI_VIDEO_COMPONENT diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 7b287cb38b7a..998f02d2ba42 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -34,8 +34,7 @@ #include #include -#undef PREFIX -#define PREFIX "DMAR:" +#define PREFIX "DMAR: " /* No locks are needed as DMA remapping hardware unit * list is constructed at boot time and hotplug of diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 218b9a16ac3f..eabddc9c192b 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -700,7 +700,7 @@ static int acpi_fujitsu_add(struct acpi_device *device) goto end; } - printk(KERN_INFO PREFIX "%s [%s] (%s)\n", + printk(KERN_INFO "ACPI: %s [%s] (%s)\n", acpi_device_name(device), acpi_device_bid(device), !device->power.state ? "on" : "off"); @@ -874,7 +874,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) goto end; } - printk(KERN_INFO PREFIX "%s [%s] (%s)\n", + printk(KERN_INFO "ACPI: %s [%s] (%s)\n", acpi_device_name(device), acpi_device_bid(device), !device->power.state ? "on" : "off"); diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index f215a5919192..177f8d767df4 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -42,7 +42,6 @@ MODULE_LICENSE("GPL"); #define ACPI_WMI_CLASS "wmi" -#undef PREFIX #define PREFIX "ACPI: WMI: " static DEFINE_MUTEX(wmi_data_lock); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c65e4ce6c3af..f485107ddc43 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -30,8 +30,6 @@ #include -#define PREFIX "ACPI: " - /* TBD: Make dynamic */ #define ACPI_MAX_HANDLES 10 struct acpi_handle_list { -- cgit v1.2.3 From e55a5999ffcf72dc4d43d73618957964cb87065a Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 28 Jul 2009 17:41:53 +0800 Subject: ACPI: Handle CONFIG_ACPI=n better from linux/acpi.h linux/acpi.h is the top level header for interfacing with the ACPI sub-system, so acpi_disabled should be up there instead of down in asm/acpi.h -- particularly since asm/acpi.h doesn't exist for all architectures. Same story for acpi_table_parse(), which is a top-level API to Linux/ACPI. This is necessary for building some code that used to always depend on CONFIG_ACPI=y, but will soon also need to build with CONFIG_ACPI=n. Signed-off-by: Feng Tang Signed-off-by: Len Brown --- arch/x86/include/asm/acpi.h | 1 - include/linux/acpi.h | 11 ++++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 20d1465a2ab0..4518dc500903 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -144,7 +144,6 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) #else /* !CONFIG_ACPI */ -#define acpi_disabled 1 #define acpi_lapic 0 #define acpi_ioapic 0 static inline void acpi_noirq_set(void) { } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 34321cfffeab..3fce811bf9ac 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -292,7 +292,10 @@ void __init acpi_s4_no_nvs(void); extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags); extern void acpi_early_init(void); -#else /* CONFIG_ACPI */ +#else /* !CONFIG_ACPI */ + +#define acpi_disabled 1 + static inline void acpi_early_init(void) { } static inline int early_acpi_boot_init(void) @@ -331,5 +334,11 @@ static inline int acpi_check_mem_region(resource_size_t start, return 0; } +struct acpi_table_header; +static inline int acpi_table_parse(char *id, + int (*handler)(struct acpi_table_header *)) +{ + return -1; +} #endif /* !CONFIG_ACPI */ #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3 From 117a9ac777f8034d4675b821172d2ff71f6ec47a Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 14 Aug 2009 15:10:24 -0400 Subject: SFI: create linux/sfi.h include/linux/include/sfi.h defines everything that customers of SFI need to know in order to use the SFI suport in the kernel. The primary API is sfi_table_parse(), where a driver or another part of the kernel can supply a handler to parse the named table. sfi.h also includes the currently defined table signatures and table formats. Signed-off-by: Feng Tang Signed-off-by: Len Brown --- include/linux/sfi.h | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 include/linux/sfi.h (limited to 'include') diff --git a/include/linux/sfi.h b/include/linux/sfi.h new file mode 100644 index 000000000000..9a6f7607174e --- /dev/null +++ b/include/linux/sfi.h @@ -0,0 +1,206 @@ +/* sfi.h Simple Firmware Interface */ + +/* + + This file is provided under a dual BSD/GPLv2 license. When using or + redistributing this file, you may do so under either license. + + GPL LICENSE SUMMARY + + Copyright(c) 2009 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + The full GNU General Public License is included in this distribution + in the file called LICENSE.GPL. + + BSD LICENSE + + Copyright(c) 2009 Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef _LINUX_SFI_H +#define _LINUX_SFI_H + +/* Table signatures reserved by the SFI specification */ +#define SFI_SIG_SYST "SYST" +#define SFI_SIG_FREQ "FREQ" +#define SFI_SIG_IDLE "IDLE" +#define SFI_SIG_CPUS "CPUS" +#define SFI_SIG_MTMR "MTMR" +#define SFI_SIG_MRTC "MRTC" +#define SFI_SIG_MMAP "MMAP" +#define SFI_SIG_APIC "APIC" +#define SFI_SIG_XSDT "XSDT" +#define SFI_SIG_WAKE "WAKE" +#define SFI_SIG_SPIB "SPIB" +#define SFI_SIG_I2CB "I2CB" +#define SFI_SIG_GPEM "GPEM" + +#define SFI_SIGNATURE_SIZE 4 +#define SFI_OEM_ID_SIZE 6 +#define SFI_OEM_TABLE_ID_SIZE 8 + +#define SFI_SYST_SEARCH_BEGIN 0x000E0000 +#define SFI_SYST_SEARCH_END 0x000FFFFF + +#define SFI_GET_NUM_ENTRIES(ptable, entry_type) \ + ((ptable->header.len - sizeof(struct sfi_table_header)) / \ + (sizeof(entry_type))) +/* + * Table structures must be byte-packed to match the SFI specification, + * as they are provided by the BIOS. + */ +struct sfi_table_header { + char sig[SFI_SIGNATURE_SIZE]; + u32 len; + u8 rev; + u8 csum; + char oem_id[SFI_OEM_ID_SIZE]; + char oem_table_id[SFI_OEM_TABLE_ID_SIZE]; +} __packed; + +struct sfi_table_simple { + struct sfi_table_header header; + u64 pentry[1]; +} __packed; + +/* Comply with UEFI spec 2.1 */ +struct sfi_mem_entry { + u32 type; + u64 phys_start; + u64 virt_start; + u64 pages; + u64 attrib; +} __packed; + +struct sfi_cpu_table_entry { + u32 apic_id; +} __packed; + +struct sfi_cstate_table_entry { + u32 hint; /* MWAIT hint */ + u32 latency; /* latency in ms */ +} __packed; + +struct sfi_apic_table_entry { + u64 phys_addr; /* phy base addr for APIC reg */ +} __packed; + +struct sfi_freq_table_entry { + u32 freq_mhz; /* in MHZ */ + u32 latency; /* transition latency in ms */ + u32 ctrl_val; /* value to write to PERF_CTL */ +} __packed; + +struct sfi_wake_table_entry { + u64 phys_addr; /* pointer to where the wake vector locates */ +} __packed; + +struct sfi_timer_table_entry { + u64 phys_addr; /* phy base addr for the timer */ + u32 freq_hz; /* in HZ */ + u32 irq; +} __packed; + +struct sfi_rtc_table_entry { + u64 phys_addr; /* phy base addr for the RTC */ + u32 irq; +} __packed; + +struct sfi_spi_table_entry { + u16 host_num; /* attached to host 0, 1...*/ + u16 cs; /* chip select */ + u16 irq_info; + char name[16]; + u8 dev_info[10]; +} __packed; + +struct sfi_i2c_table_entry { + u16 host_num; + u16 addr; /* slave addr */ + u16 irq_info; + char name[16]; + u8 dev_info[10]; +} __packed; + +struct sfi_gpe_table_entry { + u16 logical_id; /* logical id */ + u16 phys_id; /* physical GPE id */ +} __packed; + + +typedef int (*sfi_table_handler) (struct sfi_table_header *table); + +#ifdef CONFIG_SFI +extern void __init sfi_init(void); +extern int __init sfi_platform_init(void); +extern void __init sfi_init_late(void); +extern int sfi_table_parse(char *signature, char *oem_id, char *oem_table_id, + sfi_table_handler handler); + +extern int sfi_disabled; +static inline void disable_sfi(void) +{ + sfi_disabled = 1; +} + +#else /* !CONFIG_SFI */ + +static inline void sfi_init(void) +{ +} + +static inline void sfi_init_late(void) +{ +} + +#define sfi_disabled 0 + +static inline int sfi_table_parse(char *signature, char *oem_id, + char *oem_table_id, + sfi_table_handler handler) +{ + return -1; +} + +#endif /* !CONFIG_SFI */ + +#endif /*_LINUX_SFI_H*/ -- cgit v1.2.3 From 13e82d023c4c3f13ab1e665cbb917a7ebba8935c Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 14 Aug 2009 15:17:53 -0400 Subject: SFI: add capability to parse ACPI tables Extend SFI to access standard ACPI tables. (eg. the PCI MCFG) using sfi_acpi_table_parse(). Note that this is _not_ a hybrid ACPI + SFI mode. The platform boots in either ACPI mode or SFI mode. SFI runs only with acpi_disabled=1, which can be set at build-time via CONFIG_ACPI=n, or at boot time by the failure to find ACPI platform support. So this extension simply allows SFI-platforms to re-use existing standard table formats that happen to be defined to live in ACPI envelopes. Signed-off-by: Feng Tang Signed-off-by: Len Brown --- drivers/sfi/sfi_acpi.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sfi_acpi.h | 93 +++++++++++++++++++++++++ 2 files changed, 268 insertions(+) create mode 100644 drivers/sfi/sfi_acpi.c create mode 100644 include/linux/sfi_acpi.h (limited to 'include') diff --git a/drivers/sfi/sfi_acpi.c b/drivers/sfi/sfi_acpi.c new file mode 100644 index 000000000000..34aba30eb84b --- /dev/null +++ b/drivers/sfi/sfi_acpi.c @@ -0,0 +1,175 @@ +/* sfi_acpi.c Simple Firmware Interface - ACPI extensions */ + +/* + + This file is provided under a dual BSD/GPLv2 license. When using or + redistributing this file, you may do so under either license. + + GPL LICENSE SUMMARY + + Copyright(c) 2009 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + The full GNU General Public License is included in this distribution + in the file called LICENSE.GPL. + + BSD LICENSE + + Copyright(c) 2009 Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#define KMSG_COMPONENT "SFI" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include + +#include +#include "sfi_core.h" + +/* + * SFI can access ACPI-defined tables via an optional ACPI XSDT. + * + * This allows re-use, and avoids re-definition, of standard tables. + * For example, the "MCFG" table is defined by PCI, reserved by ACPI, + * and is expected to be present many SFI-only systems. + */ + +static struct acpi_table_xsdt *xsdt_va __read_mostly; + +#define XSDT_GET_NUM_ENTRIES(ptable, entry_type) \ + ((ptable->header.length - sizeof(struct acpi_table_header)) / \ + (sizeof(entry_type))) + +static inline struct sfi_table_header *acpi_to_sfi_th( + struct acpi_table_header *th) +{ + return (struct sfi_table_header *)th; +} + +static inline struct acpi_table_header *sfi_to_acpi_th( + struct sfi_table_header *th) +{ + return (struct acpi_table_header *)th; +} + +/* + * sfi_acpi_parse_xsdt() + * + * Parse the ACPI XSDT for later access by sfi_acpi_table_parse(). + */ +static int __init sfi_acpi_parse_xsdt(struct sfi_table_header *th) +{ + struct sfi_table_key key = SFI_ANY_KEY; + int tbl_cnt, i; + void *ret; + + xsdt_va = (struct acpi_table_xsdt *)th; + tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64); + for (i = 0; i < tbl_cnt; i++) { + ret = sfi_check_table(xsdt_va->table_offset_entry[i], &key); + if (IS_ERR(ret)) { + disable_sfi(); + return -1; + } + } + + return 0; +} + +int __init sfi_acpi_init(void) +{ + struct sfi_table_key xsdt_key = { .sig = SFI_SIG_XSDT }; + + sfi_table_parse(SFI_SIG_XSDT, NULL, NULL, sfi_acpi_parse_xsdt); + + /* Only call the get_table to keep the table mapped */ + xsdt_va = (struct acpi_table_xsdt *)sfi_get_table(&xsdt_key); + return 0; +} + +static struct acpi_table_header *sfi_acpi_get_table(struct sfi_table_key *key) +{ + u32 tbl_cnt, i; + void *ret; + + tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64); + for (i = 0; i < tbl_cnt; i++) { + ret = sfi_check_table(xsdt_va->table_offset_entry[i], key); + if (!IS_ERR(ret) && ret) + return sfi_to_acpi_th(ret); + } + + return NULL; +} + +static void sfi_acpi_put_table(struct acpi_table_header *table) +{ + sfi_put_table(acpi_to_sfi_th(table)); +} + +/* + * sfi_acpi_table_parse() + * + * Find specified table in XSDT, run handler on it and return its return value + */ +int sfi_acpi_table_parse(char *signature, char *oem_id, char *oem_table_id, + int(*handler)(struct acpi_table_header *)) +{ + struct acpi_table_header *table = NULL; + struct sfi_table_key key; + int ret = 0; + + if (sfi_disabled) + return -1; + + key.sig = signature; + key.oem_id = oem_id; + key.oem_table_id = oem_table_id; + + table = sfi_acpi_get_table(&key); + if (!table) + return -EINVAL; + + ret = handler(table); + sfi_acpi_put_table(table); + return ret; +} diff --git a/include/linux/sfi_acpi.h b/include/linux/sfi_acpi.h new file mode 100644 index 000000000000..c4a5a8cd4469 --- /dev/null +++ b/include/linux/sfi_acpi.h @@ -0,0 +1,93 @@ +/* sfi.h Simple Firmware Interface */ + +/* + + This file is provided under a dual BSD/GPLv2 license. When using or + redistributing this file, you may do so under either license. + + GPL LICENSE SUMMARY + + Copyright(c) 2009 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + The full GNU General Public License is included in this distribution + in the file called LICENSE.GPL. + + BSD LICENSE + + Copyright(c) 2009 Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef _LINUX_SFI_ACPI_H +#define _LINUX_SFI_ACPI_H + +#ifdef CONFIG_SFI +#include /* struct acpi_table_header */ + +extern int sfi_acpi_table_parse(char *signature, char *oem_id, + char *oem_table_id, + int (*handler)(struct acpi_table_header *)); + +static inline int acpi_sfi_table_parse(char *signature, + int (*handler)(struct acpi_table_header *)) +{ + if (!acpi_table_parse(signature, handler)) + return 0; + + return sfi_acpi_table_parse(signature, NULL, NULL, handler); +} +#else /* !CONFIG_SFI */ + +static inline int sfi_acpi_table_parse(char *signature, char *oem_id, + char *oem_table_id, + int (*handler)(struct acpi_table_header *)) +{ + return -1; +} + +static inline int acpi_sfi_table_parse(char *signature, + int (*handler)(struct acpi_table_header *)) +{ + return acpi_table_parse(signature, handler); +} +#endif /* !CONFIG_SFI */ + +#endif /*_LINUX_SFI_ACPI_H*/ -- cgit v1.2.3 From 2b022e3d4bf9885f781221c59d86283a2cdfc2ed Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 10 Aug 2009 10:48:59 +0800 Subject: timers: Add tracepoints for timer_list timers Add tracepoints which cover the timer life cycle. The tracepoints are integrated with the already existing debug_object debug points as far as possible. Based on patches from Mathieu: http://marc.info/?l=linux-kernel&m=123791201816247&w=2 and Anton: http://marc.info/?l=linux-kernel&m=124331396919301&w=2 [ tglx: Fixed timeout value in timer_start tracepoint, massaged comments and made the printk's more readable ] Signed-off-by: Xiao Guangrong Cc: Anton Blanchard Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: KOSAKI Motohiro Cc: Zhaolei LKML-Reference: <4A7F8A9B.3040201@cn.fujitsu.com> Signed-off-by: Thomas Gleixner --- include/trace/events/timer.h | 137 +++++++++++++++++++++++++++++++++++++++++++ kernel/timer.c | 32 ++++++++-- 2 files changed, 165 insertions(+), 4 deletions(-) create mode 100644 include/trace/events/timer.h (limited to 'include') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h new file mode 100644 index 000000000000..725892a93b49 --- /dev/null +++ b/include/trace/events/timer.h @@ -0,0 +1,137 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM timer + +#if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TIMER_H + +#include +#include + +/** + * timer_init - called when the timer is initialized + * @timer: pointer to struct timer_list + */ +TRACE_EVENT(timer_init, + + TP_PROTO(struct timer_list *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("timer %p", __entry->timer) +); + +/** + * timer_start - called when the timer is started + * @timer: pointer to struct timer_list + * @expires: the timers expiry time + */ +TRACE_EVENT(timer_start, + + TP_PROTO(struct timer_list *timer, unsigned long expires), + + TP_ARGS(timer, expires), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( void *, function ) + __field( unsigned long, expires ) + __field( unsigned long, now ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->function = timer->function; + __entry->expires = expires; + __entry->now = jiffies; + ), + + TP_printk("timer %p: func %pf, expires %lu, timeout %ld", + __entry->timer, __entry->function, __entry->expires, + (long)__entry->expires - __entry->now) +); + +/** + * timer_expire_entry - called immediately before the timer callback + * @timer: pointer to struct timer_list + * + * Allows to determine the timer latency. + */ +TRACE_EVENT(timer_expire_entry, + + TP_PROTO(struct timer_list *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( unsigned long, now ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->now = jiffies; + ), + + TP_printk("timer %p: now %lu", __entry->timer, __entry->now) +); + +/** + * timer_expire_exit - called immediately after the timer callback returns + * @timer: pointer to struct timer_list + * + * When used in combination with the timer_expire_entry tracepoint we can + * determine the runtime of the timer callback function. + * + * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might + * be invalid. We solely track the pointer. + */ +TRACE_EVENT(timer_expire_exit, + + TP_PROTO(struct timer_list *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field(void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("timer %p", __entry->timer) +); + +/** + * timer_cancel - called when the timer is canceled + * @timer: pointer to struct timer_list + */ +TRACE_EVENT(timer_cancel, + + TP_PROTO(struct timer_list *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("timer %p", __entry->timer) +); + +#endif /* _TRACE_TIMER_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/timer.c b/kernel/timer.c index 8e92be654dad..a7352b00703c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -46,6 +46,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); @@ -521,6 +524,25 @@ static inline void debug_timer_activate(struct timer_list *timer) { } static inline void debug_timer_deactivate(struct timer_list *timer) { } #endif +static inline void debug_init(struct timer_list *timer) +{ + debug_timer_init(timer); + trace_timer_init(timer); +} + +static inline void +debug_activate(struct timer_list *timer, unsigned long expires) +{ + debug_timer_activate(timer); + trace_timer_start(timer, expires); +} + +static inline void debug_deactivate(struct timer_list *timer) +{ + debug_timer_deactivate(timer); + trace_timer_cancel(timer); +} + static void __init_timer(struct timer_list *timer, const char *name, struct lock_class_key *key) @@ -549,7 +571,7 @@ void init_timer_key(struct timer_list *timer, const char *name, struct lock_class_key *key) { - debug_timer_init(timer); + debug_init(timer); __init_timer(timer, name, key); } EXPORT_SYMBOL(init_timer_key); @@ -568,7 +590,7 @@ static inline void detach_timer(struct timer_list *timer, { struct list_head *entry = &timer->entry; - debug_timer_deactivate(timer); + debug_deactivate(timer); __list_del(entry->prev, entry->next); if (clear_pending) @@ -632,7 +654,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, goto out_unlock; } - debug_timer_activate(timer); + debug_activate(timer, expires); new_base = __get_cpu_var(tvec_bases); @@ -787,7 +809,7 @@ void add_timer_on(struct timer_list *timer, int cpu) BUG_ON(timer_pending(timer) || !timer->function); spin_lock_irqsave(&base->lock, flags); timer_set_base(timer, base); - debug_timer_activate(timer); + debug_activate(timer, timer->expires); if (time_before(timer->expires, base->next_timer) && !tbase_get_deferrable(timer->base)) base->next_timer = timer->expires; @@ -1000,7 +1022,9 @@ static inline void __run_timers(struct tvec_base *base) */ lock_map_acquire(&lockdep_map); + trace_timer_expire_entry(timer); fn(data); + trace_timer_expire_exit(timer); lock_map_release(&lockdep_map); -- cgit v1.2.3 From c6a2a1770245f654f35f60e1458d4356680f9519 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 10 Aug 2009 10:51:23 +0800 Subject: hrtimer: Add tracepoint for hrtimers Add tracepoints which cover the life cycle of a hrtimer. The tracepoints are integrated with the already existing debug_object debug points as far as possible. [ tglx: Fixed comments, made output conistent, easier to read and parse. Fixed output for 32bit archs which do not use the scalar representation of ktime_t. Hand current time to trace_hrtimer_expiry_entry instead of calling get_time() inside of the trace assignment. ] Signed-off-by: Xiao Guangrong Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Anton Blanchard Cc: Peter Zijlstra Cc: KOSAKI Motohiro Cc: Zhaolei LKML-Reference: <4A7F8B2B.5020908@cn.fujitsu.com> Signed-off-by: Thomas Gleixner --- include/trace/events/timer.h | 139 +++++++++++++++++++++++++++++++++++++++++++ kernel/hrtimer.c | 40 ++++++++++--- 2 files changed, 171 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 725892a93b49..df3c07fa0cb8 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -5,6 +5,7 @@ #define _TRACE_TIMER_H #include +#include #include /** @@ -131,6 +132,144 @@ TRACE_EVENT(timer_cancel, TP_printk("timer %p", __entry->timer) ); +/** + * hrtimer_init - called when the hrtimer is initialized + * @timer: pointer to struct hrtimer + * @clockid: the hrtimers clock + * @mode: the hrtimers mode + */ +TRACE_EVENT(hrtimer_init, + + TP_PROTO(struct hrtimer *timer, clockid_t clockid, + enum hrtimer_mode mode), + + TP_ARGS(timer, clockid, mode), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( clockid_t, clockid ) + __field( enum hrtimer_mode, mode ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->clockid = clockid; + __entry->mode = mode; + ), + + TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer, + __entry->clockid == CLOCK_REALTIME ? + "CLOCK_REALTIME" : "CLOCK_MONOTONIC", + __entry->mode == HRTIMER_MODE_ABS ? + "HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL") +); + +/** + * hrtimer_start - called when the hrtimer is started + * @timer: pointer to struct hrtimer + */ +TRACE_EVENT(hrtimer_start, + + TP_PROTO(struct hrtimer *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( void *, function ) + __field( s64, expires ) + __field( s64, softexpires ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->function = timer->function; + __entry->expires = hrtimer_get_expires(timer).tv64; + __entry->softexpires = hrtimer_get_softexpires(timer).tv64; + ), + + TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu", + __entry->timer, __entry->function, + (unsigned long long)ktime_to_ns((ktime_t) { + .tv64 = __entry->expires }), + (unsigned long long)ktime_to_ns((ktime_t) { + .tv64 = __entry->softexpires })) +); + +/** + * htimmer_expire_entry - called immediately before the hrtimer callback + * @timer: pointer to struct hrtimer + * @now: pointer to variable which contains current time of the + * timers base. + * + * Allows to determine the timer latency. + */ +TRACE_EVENT(hrtimer_expire_entry, + + TP_PROTO(struct hrtimer *timer, ktime_t *now), + + TP_ARGS(timer, now), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( s64, now ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->now = now->tv64; + ), + + TP_printk("hrtimer %p, now %llu", __entry->timer, + (unsigned long long)ktime_to_ns((ktime_t) { + .tv64 = __entry->now })) + ); + +/** + * hrtimer_expire_exit - called immediately after the hrtimer callback returns + * @timer: pointer to struct hrtimer + * + * When used in combination with the hrtimer_expire_entry tracepoint we can + * determine the runtime of the callback function. + */ +TRACE_EVENT(hrtimer_expire_exit, + + TP_PROTO(struct hrtimer *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("hrtimer %p", __entry->timer) +); + +/** + * hrtimer_cancel - called when the hrtimer is canceled + * @timer: pointer to struct hrtimer + */ +TRACE_EVENT(hrtimer_cancel, + + TP_PROTO(struct hrtimer *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("hrtimer %p", __entry->timer) +); + #endif /* _TRACE_TIMER_H */ /* This part must be outside protection */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index e2f91ecc01a8..b44d1b07377b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -48,6 +48,8 @@ #include +#include + /* * The timer bases: * @@ -441,6 +443,26 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer) { } static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } #endif +static inline void +debug_init(struct hrtimer *timer, clockid_t clockid, + enum hrtimer_mode mode) +{ + debug_hrtimer_init(timer); + trace_hrtimer_init(timer, clockid, mode); +} + +static inline void debug_activate(struct hrtimer *timer) +{ + debug_hrtimer_activate(timer); + trace_hrtimer_start(timer); +} + +static inline void debug_deactivate(struct hrtimer *timer) +{ + debug_hrtimer_deactivate(timer); + trace_hrtimer_cancel(timer); +} + /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -797,7 +819,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, struct hrtimer *entry; int leftmost = 1; - debug_hrtimer_activate(timer); + debug_activate(timer); /* * Find the right place in the rbtree: @@ -883,7 +905,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) * reprogramming happens in the interrupt handler. This is a * rare case and less expensive than a smp call. */ - debug_hrtimer_deactivate(timer); + debug_deactivate(timer); timer_stats_hrtimer_clear_start_info(timer); reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, @@ -1116,7 +1138,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { - debug_hrtimer_init(timer); + debug_init(timer, clock_id, mode); __hrtimer_init(timer, clock_id, mode); } EXPORT_SYMBOL_GPL(hrtimer_init); @@ -1140,7 +1162,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) } EXPORT_SYMBOL_GPL(hrtimer_get_res); -static void __run_hrtimer(struct hrtimer *timer) +static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) { struct hrtimer_clock_base *base = timer->base; struct hrtimer_cpu_base *cpu_base = base->cpu_base; @@ -1149,7 +1171,7 @@ static void __run_hrtimer(struct hrtimer *timer) WARN_ON(!irqs_disabled()); - debug_hrtimer_deactivate(timer); + debug_deactivate(timer); __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); timer_stats_account_hrtimer(timer); fn = timer->function; @@ -1160,7 +1182,9 @@ static void __run_hrtimer(struct hrtimer *timer) * the timer base. */ spin_unlock(&cpu_base->lock); + trace_hrtimer_expire_entry(timer, now); restart = fn(timer); + trace_hrtimer_expire_exit(timer); spin_lock(&cpu_base->lock); /* @@ -1271,7 +1295,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) break; } - __run_hrtimer(timer); + __run_hrtimer(timer, &basenow); } base++; } @@ -1393,7 +1417,7 @@ void hrtimer_run_queues(void) hrtimer_get_expires_tv64(timer)) break; - __run_hrtimer(timer); + __run_hrtimer(timer, &base->softirq_time); } spin_unlock(&cpu_base->lock); } @@ -1569,7 +1593,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, while ((node = rb_first(&old_base->active))) { timer = rb_entry(node, struct hrtimer, node); BUG_ON(hrtimer_callback_running(timer)); - debug_hrtimer_deactivate(timer); + debug_deactivate(timer); /* * Mark it as STATE_MIGRATE not INACTIVE otherwise the -- cgit v1.2.3 From 3f0a525ebf4b8ef041a332bbe4a73aee94bb064b Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 10 Aug 2009 10:52:30 +0800 Subject: itimers: Add tracepoints for itimer Add tracepoints for all itimer variants: ITIMER_REAL, ITIMER_VIRTUAL and ITIMER_PROF. [ tglx: Fixed comments and made the output more readable, parseable and consistent. Replaced pid_vnr by pid_nr because the hrtimer callback can happen in any namespace ] Signed-off-by: Xiao Guangrong Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Anton Blanchard Cc: Peter Zijlstra Cc: KOSAKI Motohiro Cc: Zhaolei LKML-Reference: <4A7F8B6E.2010109@cn.fujitsu.com> Signed-off-by: Thomas Gleixner --- include/trace/events/timer.h | 66 ++++++++++++++++++++++++++++++++++++++++++++ kernel/itimer.c | 5 ++++ kernel/posix-cpu-timers.c | 7 ++++- 3 files changed, 77 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index df3c07fa0cb8..1844c48d640e 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -270,6 +270,72 @@ TRACE_EVENT(hrtimer_cancel, TP_printk("hrtimer %p", __entry->timer) ); +/** + * itimer_state - called when itimer is started or canceled + * @which: name of the interval timer + * @value: the itimers value, itimer is canceled if value->it_value is + * zero, otherwise it is started + * @expires: the itimers expiry time + */ +TRACE_EVENT(itimer_state, + + TP_PROTO(int which, const struct itimerval *const value, + cputime_t expires), + + TP_ARGS(which, value, expires), + + TP_STRUCT__entry( + __field( int, which ) + __field( cputime_t, expires ) + __field( long, value_sec ) + __field( long, value_usec ) + __field( long, interval_sec ) + __field( long, interval_usec ) + ), + + TP_fast_assign( + __entry->which = which; + __entry->expires = expires; + __entry->value_sec = value->it_value.tv_sec; + __entry->value_usec = value->it_value.tv_usec; + __entry->interval_sec = value->it_interval.tv_sec; + __entry->interval_usec = value->it_interval.tv_usec; + ), + + TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu", + __entry->which, __entry->expires, + __entry->value_sec, __entry->value_usec, + __entry->interval_sec, __entry->interval_usec) +); + +/** + * itimer_expire - called when itimer expires + * @which: type of the interval timer + * @pid: pid of the process which owns the timer + * @now: current time, used to calculate the latency of itimer + */ +TRACE_EVENT(itimer_expire, + + TP_PROTO(int which, struct pid *pid, cputime_t now), + + TP_ARGS(which, pid, now), + + TP_STRUCT__entry( + __field( int , which ) + __field( pid_t, pid ) + __field( cputime_t, now ) + ), + + TP_fast_assign( + __entry->which = which; + __entry->now = now; + __entry->pid = pid_nr(pid); + ), + + TP_printk("which %d, pid %d, now %lu", __entry->which, + (int) __entry->pid, __entry->now) +); + #endif /* _TRACE_TIMER_H */ /* This part must be outside protection */ diff --git a/kernel/itimer.c b/kernel/itimer.c index 8078a32d3b10..b03451ede528 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -122,6 +123,7 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer) struct signal_struct *sig = container_of(timer, struct signal_struct, real_timer); + trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0); kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid); return HRTIMER_NORESTART; @@ -166,6 +168,8 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, } it->expires = nval; it->incr = ninterval; + trace_itimer_state(clock_id == CPUCLOCK_VIRT ? + ITIMER_VIRTUAL : ITIMER_PROF, value, nval); spin_unlock_irq(&tsk->sighand->siglock); @@ -217,6 +221,7 @@ again: } else tsk->signal->it_real_incr.tv64 = 0; + trace_itimer_state(ITIMER_REAL, value, 0); spin_unlock_irq(&tsk->sighand->siglock); break; case ITIMER_VIRTUAL: diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 12161f74744e..5c9dc228747b 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -8,6 +8,7 @@ #include #include #include +#include /* * Called after updating RLIMIT_CPU to set timer expiration if necessary. @@ -1090,9 +1091,13 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, cputime_one_jiffy); it->error -= onecputick; } - } else + } else { it->expires = cputime_zero; + } + trace_itimer_expire(signo == SIGPROF ? + ITIMER_PROF : ITIMER_VIRTUAL, + tsk->signal->leader_pid, cur_time); __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); } -- cgit v1.2.3 From 138d15692bf76841f252d4b836a535cf5f9154e9 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 28 Aug 2009 23:29:38 +0400 Subject: ACPICA: Don't switch task then not allowed Signed-off-by: Alexey Starikovskiy Signed-off-by: Len Brown --- include/acpi/platform/aclinux.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index fcb8e4b159b1..9d7febde10a1 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -149,10 +149,10 @@ static inline void *acpi_os_acquire_object(acpi_cache_t * cache) #define ACPI_FREE(a) kfree(a) /* Used within ACPICA to show where it is safe to preempt execution */ - +#include #define ACPI_PREEMPTION_POINT() \ do { \ - if (!irqs_disabled()) \ + if (!in_atomic_preempt_off()) \ cond_resched(); \ } while (0) -- cgit v1.2.3 From ad283ea4a3ce82cda2efe33163748a397b31b1eb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 29 Aug 2009 19:09:26 -0700 Subject: async_tx: add sum check flags Replace the flat zero_sum_result with a collection of flags to contain the P (xor) zero-sum result, and the soon to be utilized Q (raid6 reed solomon syndrome) zero-sum result. Use the SUM_CHECK_ namespace instead of DMA_ since these flags will be used on non-dma-zero-sum enabled platforms. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- arch/arm/include/asm/hardware/iop3xx-adma.h | 5 +++-- arch/arm/mach-iop13xx/include/mach/adma.h | 12 +++++++----- crypto/async_tx/async_xor.c | 4 ++-- drivers/md/raid5.c | 2 +- drivers/md/raid5.h | 5 +++-- include/linux/async_tx.h | 2 +- include/linux/dmaengine.h | 21 ++++++++++++++++++++- 7 files changed, 37 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h index 83e6ba338e2c..26eefea02314 100644 --- a/arch/arm/include/asm/hardware/iop3xx-adma.h +++ b/arch/arm/include/asm/hardware/iop3xx-adma.h @@ -756,13 +756,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, hw_desc->src[0] = val; } -static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) +static inline enum sum_check_flags +iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) { struct iop3xx_desc_aau *hw_desc = desc->hw_desc; struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); - return desc_ctrl.zero_result_err; + return desc_ctrl.zero_result_err << SUM_CHECK_P; } static inline void iop_chan_append(struct iop_adma_chan *chan) diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h index 5722e86f2174..1cd31df8924d 100644 --- a/arch/arm/mach-iop13xx/include/mach/adma.h +++ b/arch/arm/mach-iop13xx/include/mach/adma.h @@ -428,18 +428,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, hw_desc->block_fill_data = val; } -static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) +static inline enum sum_check_flags +iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) { struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; + enum sum_check_flags flags; BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); - if (desc_ctrl.pq_xfer_en) - return byte_count.zero_result_err_q; - else - return byte_count.zero_result_err; + flags = byte_count.zero_result_err_q << SUM_CHECK_Q; + flags |= byte_count.zero_result_err << SUM_CHECK_P; + + return flags; } static inline void iop_chan_append(struct iop_adma_chan *chan) diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 1e96c4df7061..78fb7780272a 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -246,7 +246,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) */ struct dma_async_tx_descriptor * async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, u32 *result, + int src_cnt, size_t len, enum sum_check_flags *result, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL, @@ -304,7 +304,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, async_tx_quiesce(&tx); - *result = page_is_zero(dest, offset, len) ? 0 : 1; + *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P; async_tx_sync_epilog(submit); submit->flags = flags_orig; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7727954cf726..1f2a266f3cf7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2590,7 +2590,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, * we are done. Otherwise update the mismatch count and repair * parity if !MD_RECOVERY_CHECK */ - if (sh->ops.zero_sum_result == 0) + if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0) /* parity is correct (on disc, * not in buffer any more) */ diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index e7baabffee86..75f2c6c4cf90 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -2,6 +2,7 @@ #define _RAID5_H #include +#include /* * @@ -215,8 +216,8 @@ struct stripe_head { * @target - STRIPE_OP_COMPUTE_BLK target */ struct stripe_operations { - int target; - u32 zero_sum_result; + int target; + enum sum_check_flags zero_sum_result; } ops; struct r5dev { struct bio req; diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 00cfb637ddf2..3d21a2517518 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -148,7 +148,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, struct dma_async_tx_descriptor * async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, u32 *result, + int src_cnt, size_t len, enum sum_check_flags *result, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 6768727d00d7..02447afcebad 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -86,6 +86,25 @@ enum dma_ctrl_flags { DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), }; +/** + * enum sum_check_bits - bit position of pq_check_flags + */ +enum sum_check_bits { + SUM_CHECK_P = 0, + SUM_CHECK_Q = 1, +}; + +/** + * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations + * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise + * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise + */ +enum sum_check_flags { + SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P), + SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q), +}; + + /** * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. * See linux/cpumask.h @@ -245,7 +264,7 @@ struct dma_device { unsigned int src_cnt, size_t len, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, - size_t len, u32 *result, unsigned long flags); + size_t len, enum sum_check_flags *result, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); -- cgit v1.2.3 From 95475e57113c66aac7583925736ed2e2d58c990d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:19:02 -0700 Subject: async_tx: remove walk of tx->parent chain in dma_wait_for_async_tx We currently walk the parent chain when waiting for a given tx to complete however this walk may race with the driver cleanup routine. The routines in async_raid6_recov.c may fall back to the synchronous path at any point so we need to be prepared to call async_tx_quiesce() (which calls dma_wait_for_async_tx). To remove the ->parent walk we guarantee that every time a dependency is attached ->issue_pending() is invoked, then we can simply poll the initial descriptor until completion. This also allows for a lighter weight 'issue pending' implementation as there is no longer a requirement to iterate through all the channels' ->issue_pending() routines as long as operations have been submitted in an ordered chain. async_tx_issue_pending() is added for this case. Signed-off-by: Dan Williams --- crypto/async_tx/async_tx.c | 13 +++++++------ drivers/dma/dmaengine.c | 45 ++++++++++----------------------------------- include/linux/async_tx.h | 23 +++++++++++++++++++++++ 3 files changed, 40 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 6e37ad3f4417..60615fedcf5e 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -77,8 +77,8 @@ static void async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, struct dma_async_tx_descriptor *tx) { - struct dma_chan *chan; - struct dma_device *device; + struct dma_chan *chan = depend_tx->chan; + struct dma_device *device = chan->device; struct dma_async_tx_descriptor *intr_tx = (void *) ~0; /* first check to see if we can still append to depend_tx */ @@ -90,11 +90,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, } spin_unlock_bh(&depend_tx->lock); - if (!intr_tx) + /* attached dependency, flush the parent channel */ + if (!intr_tx) { + device->device_issue_pending(chan); return; - - chan = depend_tx->chan; - device = chan->device; + } /* see if we can schedule an interrupt * otherwise poll for completion @@ -128,6 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, intr_tx->tx_submit(intr_tx); async_tx_ack(intr_tx); } + device->device_issue_pending(chan); } else { if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) panic("%s: DMA_ERROR waiting for depend_tx\n", diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 6781e8f3c064..e002e0e0d055 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -934,49 +934,24 @@ EXPORT_SYMBOL(dma_async_tx_descriptor_init); /* dma_wait_for_async_tx - spin wait for a transaction to complete * @tx: in-flight transaction to wait on - * - * This routine assumes that tx was obtained from a call to async_memcpy, - * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped - * and submitted). Walking the parent chain is only meant to cover for DMA - * drivers that do not implement the DMA_INTERRUPT capability and may race with - * the driver's descriptor cleanup routine. */ enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) { - enum dma_status status; - struct dma_async_tx_descriptor *iter; - struct dma_async_tx_descriptor *parent; + unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); if (!tx) return DMA_SUCCESS; - WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for" - " %s\n", __func__, dma_chan_name(tx->chan)); - - /* poll through the dependency chain, return when tx is complete */ - do { - iter = tx; - - /* find the root of the unsubmitted dependency chain */ - do { - parent = iter->parent; - if (!parent) - break; - else - iter = parent; - } while (parent); - - /* there is a small window for ->parent == NULL and - * ->cookie == -EBUSY - */ - while (iter->cookie == -EBUSY) - cpu_relax(); - - status = dma_sync_wait(iter->chan, iter->cookie); - } while (status == DMA_IN_PROGRESS || (iter != tx)); - - return status; + while (tx->cookie == -EBUSY) { + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { + pr_err("%s timeout waiting for descriptor submission\n", + __func__); + return DMA_ERROR; + } + cpu_relax(); + } + return dma_sync_wait(tx->chan, tx->cookie); } EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 3d21a2517518..12a2efcbd565 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -83,6 +83,24 @@ struct async_submit_ctl { #ifdef CONFIG_DMA_ENGINE #define async_tx_issue_pending_all dma_issue_pending_all + +/** + * async_tx_issue_pending - send pending descriptor to the hardware channel + * @tx: descriptor handle to retrieve hardware context + * + * Note: any dependent operations will have already been issued by + * async_tx_channel_switch, or (in the case of no channel switch) will + * be already pending on this channel. + */ +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + if (likely(tx)) { + struct dma_chan *chan = tx->chan; + struct dma_device *dma = chan->device; + + dma->device_issue_pending(chan); + } +} #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #include #else @@ -98,6 +116,11 @@ static inline void async_tx_issue_pending_all(void) do { } while (0); } +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + do { } while (0); +} + static inline struct dma_chan * async_tx_find_channel(struct async_submit_ctl *submit, enum dma_transaction_type tx_type, struct page **dst, -- cgit v1.2.3 From b2f46fd8ef3dff2ab30f31126833f78b7480283a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:20:36 -0700 Subject: async_tx: add support for asynchronous GF multiplication [ Based on an original patch by Yuri Tikhonov ] This adds support for doing asynchronous GF multiplication by adding two additional functions to the async_tx API: async_gen_syndrome() does simultaneous XOR and Galois field multiplication of sources. async_syndrome_val() validates the given source buffers against known P and Q values. When a request is made to run async_pq against more than the hardware maximum number of supported sources we need to reuse the previous generated P and Q values as sources into the next operation. Care must be taken to remove Q from P' and P from Q'. For example to perform a 5 source pq op with hardware that only supports 4 sources at a time the following approach is taken: p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})) p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10})) p' = p + q + q + src4 = p + src4 q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10}*src4 Note: 4 is the minimum acceptable maxpq otherwise we punt to synchronous-software path. The DMA_PREP_CONTINUE flag indicates to the driver to reuse p and q as sources (in the above manner) and fill the remaining slots up to maxpq with the new sources/coefficients. Note1: Some devices have native support for P+Q continuation and can skip this extra work. Devices with this capability can advertise it with dma_set_maxpq. It is up to each driver how to handle the DMA_PREP_CONTINUE flag. Note2: The api supports disabling the generation of P when generating Q, this is ignored by the synchronous path but is implemented by some dma devices to save unnecessary writes. In this case the continuation algorithm is simplified to only reuse Q as a source. Cc: H. Peter Anvin Cc: David Woodhouse Signed-off-by: Yuri Tikhonov Signed-off-by: Ilya Yanok Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 3 + arch/arm/mach-iop13xx/setup.c | 2 +- crypto/async_tx/Kconfig | 4 + crypto/async_tx/Makefile | 1 + crypto/async_tx/async_pq.c | 388 ++++++++++++++++++++++++++++++++++ crypto/async_tx/async_xor.c | 2 +- drivers/dma/dmaengine.c | 4 + drivers/dma/iop-adma.c | 2 +- include/linux/async_tx.h | 9 + include/linux/dmaengine.h | 87 +++++++- 10 files changed, 493 insertions(+), 9 deletions(-) create mode 100644 crypto/async_tx/async_pq.c (limited to 'include') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 6b15e488c0e7..0e48e054d69a 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -64,6 +64,9 @@ xor - xor a series of source buffers and write the result to a xor_val - xor a series of source buffers and set a flag if the result is zero. The implementation attempts to prevent writes to memory +pq - generate the p+q (raid6 syndrome) from a series of source buffers +pq_val - validate that a p and or q buffer are in sync with a given series of + sources 3.3 Descriptor management: The return value is non-NULL and points to a 'descriptor' when the operation diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index 9800228b71d3..2e7ca0d75f8a 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c @@ -506,7 +506,7 @@ void __init iop13xx_platform_init(void) dma_cap_set(DMA_MEMSET, plat_data->cap_mask); dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); - dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); + dma_cap_set(DMA_PQ, plat_data->cap_mask); dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask); break; diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index d8fb39145986..cb6d7314f198 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -14,3 +14,7 @@ config ASYNC_MEMSET tristate select ASYNC_CORE +config ASYNC_PQ + tristate + select ASYNC_CORE + diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 27baa7d52fbc..1b9926588259 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o +obj-$(CONFIG_ASYNC_PQ) += async_pq.o diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c new file mode 100644 index 000000000000..108b21efb499 --- /dev/null +++ b/crypto/async_tx/async_pq.c @@ -0,0 +1,388 @@ +/* + * Copyright(c) 2007 Yuri Tikhonov + * Copyright(c) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#include +#include +#include +#include +#include + +/** + * scribble - space to hold throwaway P buffer for synchronous gen_syndrome + */ +static struct page *scribble; + +static bool is_raid6_zero_block(struct page *p) +{ + return p == (void *) raid6_empty_zero_page; +} + +/* the struct page *blocks[] parameter passed to async_gen_syndrome() + * and async_syndrome_val() contains the 'P' destination address at + * blocks[disks-2] and the 'Q' destination address at blocks[disks-1] + * + * note: these are macros as they are used as lvalues + */ +#define P(b, d) (b[d-2]) +#define Q(b, d) (b[d-1]) + +/** + * do_async_gen_syndrome - asynchronously calculate P and/or Q + */ +static __async_inline struct dma_async_tx_descriptor * +do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, + const unsigned char *scfs, unsigned int offset, int disks, + size_t len, dma_addr_t *dma_src, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct dma_device *dma = chan->device; + enum dma_ctrl_flags dma_flags = 0; + enum async_tx_flags flags_orig = submit->flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + dma_async_tx_callback cb_param_orig = submit->cb_param; + int src_cnt = disks - 2; + unsigned char coefs[src_cnt]; + unsigned short pq_src_cnt; + dma_addr_t dma_dest[2]; + int src_off = 0; + int idx; + int i; + + /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */ + if (P(blocks, disks)) + dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset, + len, DMA_BIDIRECTIONAL); + else + dma_flags |= DMA_PREP_PQ_DISABLE_P; + if (Q(blocks, disks)) + dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset, + len, DMA_BIDIRECTIONAL); + else + dma_flags |= DMA_PREP_PQ_DISABLE_Q; + + /* convert source addresses being careful to collapse 'empty' + * sources and update the coefficients accordingly + */ + for (i = 0, idx = 0; i < src_cnt; i++) { + if (is_raid6_zero_block(blocks[i])) + continue; + dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len, + DMA_TO_DEVICE); + coefs[idx] = scfs[i]; + idx++; + } + src_cnt = idx; + + while (src_cnt > 0) { + submit->flags = flags_orig; + pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags)); + /* if we are submitting additional pqs, leave the chain open, + * clear the callback parameters, and leave the destination + * buffers mapped + */ + if (src_cnt > pq_src_cnt) { + submit->flags &= ~ASYNC_TX_ACK; + dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; + submit->cb_fn = NULL; + submit->cb_param = NULL; + } else { + dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP; + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; + if (cb_fn_orig) + dma_flags |= DMA_PREP_INTERRUPT; + } + + /* Since we have clobbered the src_list we are committed + * to doing this asynchronously. Drivers force forward + * progress in case they can not provide a descriptor + */ + for (;;) { + tx = dma->device_prep_dma_pq(chan, dma_dest, + &dma_src[src_off], + pq_src_cnt, + &coefs[src_off], len, + dma_flags); + if (likely(tx)) + break; + async_tx_quiesce(&submit->depend_tx); + dma_async_issue_pending(chan); + } + + async_tx_submit(chan, tx, submit); + submit->depend_tx = tx; + + /* drop completed sources */ + src_cnt -= pq_src_cnt; + src_off += pq_src_cnt; + + dma_flags |= DMA_PREP_CONTINUE; + } + + return tx; +} + +/** + * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome + */ +static void +do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, + size_t len, struct async_submit_ctl *submit) +{ + void **srcs; + int i; + + if (submit->scribble) + srcs = submit->scribble; + else + srcs = (void **) blocks; + + for (i = 0; i < disks; i++) { + if (is_raid6_zero_block(blocks[i])) { + BUG_ON(i > disks - 3); /* P or Q can't be zero */ + srcs[i] = blocks[i]; + } else + srcs[i] = page_address(blocks[i]) + offset; + } + raid6_call.gen_syndrome(disks, len, srcs); + async_tx_sync_epilog(submit); +} + +/** + * async_gen_syndrome - asynchronously calculate a raid6 syndrome + * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 + * @offset: common offset into each block (src and dest) to start transaction + * @disks: number of blocks (including missing P or Q, see below) + * @len: length of operation in bytes + * @submit: submission/completion modifiers + * + * General note: This routine assumes a field of GF(2^8) with a + * primitive polynomial of 0x11d and a generator of {02}. + * + * 'disks' note: callers can optionally omit either P or Q (but not + * both) from the calculation by setting blocks[disks-2] or + * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <= + * PAGE_SIZE as a temporary buffer of this size is used in the + * synchronous path. 'disks' always accounts for both destination + * buffers. + * + * 'blocks' note: if submit->scribble is NULL then the contents of + * 'blocks' may be overridden + */ +struct dma_async_tx_descriptor * +async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, + size_t len, struct async_submit_ctl *submit) +{ + int src_cnt = disks - 2; + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &P(blocks, disks), 2, + blocks, src_cnt, len); + struct dma_device *device = chan ? chan->device : NULL; + dma_addr_t *dma_src = NULL; + + BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks))); + + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) blocks; + + if (dma_src && device && + (src_cnt <= dma_maxpq(device, 0) || + dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) { + /* run the p+q asynchronously */ + pr_debug("%s: (async) disks: %d len: %zu\n", + __func__, disks, len); + return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset, + disks, len, dma_src, submit); + } + + /* run the pq synchronously */ + pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); + + /* wait for any prerequisite operations */ + async_tx_quiesce(&submit->depend_tx); + + if (!P(blocks, disks)) { + P(blocks, disks) = scribble; + BUG_ON(len + offset > PAGE_SIZE); + } + if (!Q(blocks, disks)) { + Q(blocks, disks) = scribble; + BUG_ON(len + offset > PAGE_SIZE); + } + do_sync_gen_syndrome(blocks, offset, disks, len, submit); + + return NULL; +} +EXPORT_SYMBOL_GPL(async_gen_syndrome); + +/** + * async_syndrome_val - asynchronously validate a raid6 syndrome + * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 + * @offset: common offset into each block (src and dest) to start transaction + * @disks: number of blocks (including missing P or Q, see below) + * @len: length of operation in bytes + * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set + * @spare: temporary result buffer for the synchronous case + * @submit: submission / completion modifiers + * + * The same notes from async_gen_syndrome apply to the 'blocks', + * and 'disks' parameters of this routine. The synchronous path + * requires a temporary result buffer and submit->scribble to be + * specified. + */ +struct dma_async_tx_descriptor * +async_syndrome_val(struct page **blocks, unsigned int offset, int disks, + size_t len, enum sum_check_flags *pqres, struct page *spare, + struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL, + NULL, 0, blocks, disks, + len); + struct dma_device *device = chan ? chan->device : NULL; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; + dma_addr_t *dma_src = NULL; + + BUG_ON(disks < 4); + + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) blocks; + + if (dma_src && device && disks <= dma_maxpq(device, 0)) { + struct device *dev = device->dev; + dma_addr_t *pq = &dma_src[disks-2]; + int i; + + pr_debug("%s: (async) disks: %d len: %zu\n", + __func__, disks, len); + if (!P(blocks, disks)) + dma_flags |= DMA_PREP_PQ_DISABLE_P; + if (!Q(blocks, disks)) + dma_flags |= DMA_PREP_PQ_DISABLE_Q; + for (i = 0; i < disks; i++) + if (likely(blocks[i])) { + BUG_ON(is_raid6_zero_block(blocks[i])); + dma_src[i] = dma_map_page(dev, blocks[i], + offset, len, + DMA_TO_DEVICE); + } + + for (;;) { + tx = device->device_prep_dma_pq_val(chan, pq, dma_src, + disks - 2, + raid6_gfexp, + len, pqres, + dma_flags); + if (likely(tx)) + break; + async_tx_quiesce(&submit->depend_tx); + dma_async_issue_pending(chan); + } + async_tx_submit(chan, tx, submit); + + return tx; + } else { + struct page *p_src = P(blocks, disks); + struct page *q_src = Q(blocks, disks); + enum async_tx_flags flags_orig = submit->flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + void *scribble = submit->scribble; + void *cb_param_orig = submit->cb_param; + void *p, *q, *s; + + pr_debug("%s: (sync) disks: %d len: %zu\n", + __func__, disks, len); + + /* caller must provide a temporary result buffer and + * allow the input parameters to be preserved + */ + BUG_ON(!spare || !scribble); + + /* wait for any prerequisite operations */ + async_tx_quiesce(&submit->depend_tx); + + /* recompute p and/or q into the temporary buffer and then + * check to see the result matches the current value + */ + tx = NULL; + *pqres = 0; + if (p_src) { + init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL, + NULL, NULL, scribble); + tx = async_xor(spare, blocks, offset, disks-2, len, submit); + async_tx_quiesce(&tx); + p = page_address(p_src) + offset; + s = page_address(spare) + offset; + *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P; + } + + if (q_src) { + P(blocks, disks) = NULL; + Q(blocks, disks) = spare; + init_async_submit(submit, 0, NULL, NULL, NULL, scribble); + tx = async_gen_syndrome(blocks, offset, disks, len, submit); + async_tx_quiesce(&tx); + q = page_address(q_src) + offset; + s = page_address(spare) + offset; + *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q; + } + + /* restore P, Q and submit */ + P(blocks, disks) = p_src; + Q(blocks, disks) = q_src; + + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; + submit->flags = flags_orig; + async_tx_sync_epilog(submit); + + return NULL; + } +} +EXPORT_SYMBOL_GPL(async_syndrome_val); + +static int __init async_pq_init(void) +{ + scribble = alloc_page(GFP_KERNEL); + + if (scribble) + return 0; + + pr_err("%s: failed to allocate required spare page\n", __func__); + + return -ENOMEM; +} + +static void __exit async_pq_exit(void) +{ + put_page(scribble); +} + +module_init(async_pq_init); +module_exit(async_pq_exit); + +MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation"); +MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 78fb7780272a..56b5f98da463 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -62,7 +62,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, while (src_cnt) { submit->flags = flags_orig; dma_flags = 0; - xor_src_cnt = min(src_cnt, dma->max_xor); + xor_src_cnt = min(src_cnt, (int)dma->max_xor); /* if we are submitting additional xors, leave the chain open, * clear the callback parameters, and leave the destination * buffer mapped diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index e002e0e0d055..cd5673d3043b 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -646,6 +646,10 @@ int dma_async_device_register(struct dma_device *device) !device->device_prep_dma_xor); BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) && !device->device_prep_dma_xor_val); + BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) && + !device->device_prep_dma_pq); + BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && + !device->device_prep_dma_pq_val); BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && !device->device_prep_dma_memset); BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 6ff79a672699..4496bc606662 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -1257,7 +1257,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " "( %s%s%s%s%s%s%s%s%s%s)\n", - dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", + dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 12a2efcbd565..e6ce5f004f98 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -185,5 +185,14 @@ async_memset(struct page *dest, int val, unsigned int offset, struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); +struct dma_async_tx_descriptor * +async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, enum sum_check_flags *pqres, struct page *spare, + struct async_submit_ctl *submit); + void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 02447afcebad..ce010cd991d2 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -52,7 +52,7 @@ enum dma_status { enum dma_transaction_type { DMA_MEMCPY, DMA_XOR, - DMA_PQ_XOR, + DMA_PQ, DMA_DUAL_XOR, DMA_PQ_UPDATE, DMA_XOR_VAL, @@ -70,20 +70,28 @@ enum dma_transaction_type { /** * enum dma_ctrl_flags - DMA flags to augment operation preparation, - * control completion, and communicate status. + * control completion, and communicate status. * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of - * this transaction + * this transaction * @DMA_CTRL_ACK - the descriptor cannot be reused until the client - * acknowledges receipt, i.e. has has a chance to establish any - * dependency chains + * acknowledges receipt, i.e. has has a chance to establish any dependency + * chains * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) + * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q + * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P + * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as + * sources that were the result of a previous operation, in the case of a PQ + * operation it continues the calculation with new sources */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), DMA_CTRL_ACK = (1 << 1), DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2), DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), + DMA_PREP_PQ_DISABLE_P = (1 << 4), + DMA_PREP_PQ_DISABLE_Q = (1 << 5), + DMA_PREP_CONTINUE = (1 << 6), }; /** @@ -226,6 +234,7 @@ struct dma_async_tx_descriptor { * @global_node: list_head for global dma_device_list * @cap_mask: one or more dma_capability flags * @max_xor: maximum number of xor sources, 0 if no capability + * @max_pq: maximum number of PQ sources and PQ-continue capability * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the @@ -234,6 +243,8 @@ struct dma_async_tx_descriptor { * @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_xor: prepares a xor operation * @device_prep_dma_xor_val: prepares a xor validation operation + * @device_prep_dma_pq: prepares a pq operation + * @device_prep_dma_pq_val: prepares a pqzero_sum operation * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation @@ -248,7 +259,9 @@ struct dma_device { struct list_head channels; struct list_head global_node; dma_cap_mask_t cap_mask; - int max_xor; + unsigned short max_xor; + unsigned short max_pq; + #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; struct device *dev; @@ -265,6 +278,14 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, size_t len, enum sum_check_flags *result, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq)( + struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)( + struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); @@ -283,6 +304,60 @@ struct dma_device { void (*device_issue_pending)(struct dma_chan *chan); }; +static inline void +dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) +{ + dma->max_pq = maxpq; + if (has_pq_continue) + dma->max_pq |= DMA_HAS_PQ_CONTINUE; +} + +static inline bool dmaf_continue(enum dma_ctrl_flags flags) +{ + return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE; +} + +static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags) +{ + enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P; + + return (flags & mask) == mask; +} + +static inline bool dma_dev_has_pq_continue(struct dma_device *dma) +{ + return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; +} + +static unsigned short dma_dev_to_maxpq(struct dma_device *dma) +{ + return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; +} + +/* dma_maxpq - reduce maxpq in the face of continued operations + * @dma - dma device with PQ capability + * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set + * + * When an engine does not support native continuation we need 3 extra + * source slots to reuse P and Q with the following coefficients: + * 1/ {00} * P : remove P from Q', but use it as a source for P' + * 2/ {01} * Q : use Q to continue Q' calculation + * 3/ {00} * Q : subtract Q from P' to cancel (2) + * + * In the case where P is disabled we only need 1 extra source: + * 1/ {01} * Q : use Q to continue Q' calculation + */ +static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags) +{ + if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags)) + return dma_dev_to_maxpq(dma); + else if (dmaf_p_disabled_continue(flags)) + return dma_dev_to_maxpq(dma) - 1; + else if (dmaf_continue(flags)) + return dma_dev_to_maxpq(dma) - 3; + BUG(); +} + /* --- public DMA engine API --- */ #ifdef CONFIG_DMA_ENGINE -- cgit v1.2.3 From 0a82a6239beecc95db6e05fe43ee62d16b381d38 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:20:37 -0700 Subject: async_tx: add support for asynchronous RAID6 recovery operations async_raid6_2data_recov() recovers two data disk failures async_raid6_datap_recov() recovers a data disk and the P disk These routines are a port of the synchronous versions found in drivers/md/raid6recov.c. The primary difference is breaking out the xor operations into separate calls to async_xor. Two helper routines are introduced to perform scalar multiplication where needed. async_sum_product() multiplies two sources by scalar coefficients and then sums (xor) the result. async_mult() simply multiplies a single source by a scalar. This implemention also includes, in contrast to the original synchronous-only code, special case handling for the 4-disk and 5-disk array cases. In these situations the default N-disk algorithm will present 0-source or 1-source operations to dma devices. To cover for dma devices where the minimum source count is 2 we implement 4-disk and 5-disk handling in the recovery code. [ Impact: asynchronous raid6 recovery routines for 2data and datap cases ] Cc: Yuri Tikhonov Cc: Ilya Yanok Cc: H. Peter Anvin Cc: David Woodhouse Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 4 + crypto/async_tx/Kconfig | 5 + crypto/async_tx/Makefile | 1 + crypto/async_tx/async_raid6_recov.c | 448 ++++++++++++++++++++++++++++++++++ include/linux/async_tx.h | 8 + 5 files changed, 466 insertions(+) create mode 100644 crypto/async_tx/async_raid6_recov.c (limited to 'include') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 0e48e054d69a..ba046b8fa92f 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -67,6 +67,10 @@ xor_val - xor a series of source buffers and set a flag if the pq - generate the p+q (raid6 syndrome) from a series of source buffers pq_val - validate that a p and or q buffer are in sync with a given series of sources +datap - (raid6_datap_recov) recover a raid6 data block and the p block + from the given sources +2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given + sources 3.3 Descriptor management: The return value is non-NULL and points to a 'descriptor' when the operation diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index cb6d7314f198..e5aeb2b79e6f 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -18,3 +18,8 @@ config ASYNC_PQ tristate select ASYNC_CORE +config ASYNC_RAID6_RECOV + tristate + select ASYNC_CORE + select ASYNC_PQ + diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 1b9926588259..9a1a76811b80 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o obj-$(CONFIG_ASYNC_PQ) += async_pq.o +obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c new file mode 100644 index 000000000000..0c14d48c9896 --- /dev/null +++ b/crypto/async_tx/async_raid6_recov.c @@ -0,0 +1,448 @@ +/* + * Asynchronous RAID-6 recovery calculations ASYNC_TX API. + * Copyright(c) 2009 Intel Corporation + * + * based on raid6recov.c: + * Copyright 2002 H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include +#include +#include +#include +#include + +static struct dma_async_tx_descriptor * +async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, + size_t len, struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &dest, 1, srcs, 2, len); + struct dma_device *dma = chan ? chan->device : NULL; + const u8 *amul, *bmul; + u8 ax, bx; + u8 *a, *b, *c; + + if (dma) { + dma_addr_t dma_dest[2]; + dma_addr_t dma_src[2]; + struct device *dev = dma->dev; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + + dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); + dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); + tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, + len, dma_flags); + if (tx) { + async_tx_submit(chan, tx, submit); + return tx; + } + } + + /* run the operation synchronously */ + async_tx_quiesce(&submit->depend_tx); + amul = raid6_gfmul[coef[0]]; + bmul = raid6_gfmul[coef[1]]; + a = page_address(srcs[0]); + b = page_address(srcs[1]); + c = page_address(dest); + + while (len--) { + ax = amul[*a++]; + bx = bmul[*b++]; + *c++ = ax ^ bx; + } + + return NULL; +} + +static struct dma_async_tx_descriptor * +async_mult(struct page *dest, struct page *src, u8 coef, size_t len, + struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &dest, 1, &src, 1, len); + struct dma_device *dma = chan ? chan->device : NULL; + const u8 *qmul; /* Q multiplier table */ + u8 *d, *s; + + if (dma) { + dma_addr_t dma_dest[2]; + dma_addr_t dma_src[1]; + struct device *dev = dma->dev; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + + dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); + tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, + len, dma_flags); + if (tx) { + async_tx_submit(chan, tx, submit); + return tx; + } + } + + /* no channel available, or failed to allocate a descriptor, so + * perform the operation synchronously + */ + async_tx_quiesce(&submit->depend_tx); + qmul = raid6_gfmul[coef]; + d = page_address(dest); + s = page_address(src); + + while (len--) + *d++ = qmul[*s++]; + + return NULL; +} + +static struct dma_async_tx_descriptor * +__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *a, *b; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + + p = blocks[4-2]; + q = blocks[4-1]; + + a = blocks[faila]; + b = blocks[failb]; + + /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */ + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = p; + srcs[1] = q; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_sum_product(b, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = p; + srcs[1] = b; + init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(a, srcs, 0, 2, bytes, submit); + + return tx; + +} + +static struct dma_async_tx_descriptor * +__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *g, *dp, *dq; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + int uninitialized_var(good); + int i; + + for (i = 0; i < 3; i++) { + if (i == faila || i == failb) + continue; + else { + good = i; + break; + } + } + BUG_ON(i >= 3); + + p = blocks[5-2]; + q = blocks[5-1]; + g = blocks[good]; + + /* Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for delta p and + * delta q + */ + dp = blocks[faila]; + dq = blocks[failb]; + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_memcpy(dp, g, 0, 0, bytes, submit); + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); + + /* compute P + Pxy */ + srcs[0] = dp; + srcs[1] = p; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + /* compute Q + Qxy */ + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = dp; + srcs[1] = dq; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_sum_product(dq, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = dp; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + return tx; +} + +static struct dma_async_tx_descriptor * +__2data_recov_n(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *dp, *dq; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + + p = blocks[disks-2]; + q = blocks[disks-1]; + + /* Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = blocks[faila]; + blocks[faila] = (void *)raid6_empty_zero_page; + blocks[disks-2] = dp; + dq = blocks[failb]; + blocks[failb] = (void *)raid6_empty_zero_page; + blocks[disks-1] = dq; + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); + + /* Restore pointer table */ + blocks[faila] = dp; + blocks[failb] = dq; + blocks[disks-2] = p; + blocks[disks-1] = q; + + /* compute P + Pxy */ + srcs[0] = dp; + srcs[1] = p; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + /* compute Q + Qxy */ + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = dp; + srcs[1] = dq; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_sum_product(dq, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = dp; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + return tx; +} + +/** + * async_raid6_2data_recov - asynchronously calculate two missing data blocks + * @disks: number of disks in the RAID-6 array + * @bytes: block size + * @faila: first failed drive index + * @failb: second failed drive index + * @blocks: array of source pointers where the last two entries are p and q + * @submit: submission/completion modifiers + */ +struct dma_async_tx_descriptor * +async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) +{ + BUG_ON(faila == failb); + if (failb < faila) + swap(faila, failb); + + pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); + + /* we need to preserve the contents of 'blocks' for the async + * case, so punt to synchronous if a scribble buffer is not available + */ + if (!submit->scribble) { + void **ptrs = (void **) blocks; + int i; + + async_tx_quiesce(&submit->depend_tx); + for (i = 0; i < disks; i++) + ptrs[i] = page_address(blocks[i]); + + raid6_2data_recov(disks, bytes, faila, failb, ptrs); + + async_tx_sync_epilog(submit); + + return NULL; + } + + switch (disks) { + case 4: + /* dma devices do not uniformly understand a zero source pq + * operation (in contrast to the synchronous case), so + * explicitly handle the 4 disk special case + */ + return __2data_recov_4(bytes, faila, failb, blocks, submit); + case 5: + /* dma devices do not uniformly understand a single + * source pq operation (in contrast to the synchronous + * case), so explicitly handle the 5 disk special case + */ + return __2data_recov_5(bytes, faila, failb, blocks, submit); + default: + return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); + } +} +EXPORT_SYMBOL_GPL(async_raid6_2data_recov); + +/** + * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block + * @disks: number of disks in the RAID-6 array + * @bytes: block size + * @faila: failed drive index + * @blocks: array of source pointers where the last two entries are p and q + * @submit: submission/completion modifiers + */ +struct dma_async_tx_descriptor * +async_raid6_datap_recov(int disks, size_t bytes, int faila, + struct page **blocks, struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *dq; + u8 coef; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + struct page *srcs[2]; + + pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); + + /* we need to preserve the contents of 'blocks' for the async + * case, so punt to synchronous if a scribble buffer is not available + */ + if (!scribble) { + void **ptrs = (void **) blocks; + int i; + + async_tx_quiesce(&submit->depend_tx); + for (i = 0; i < disks; i++) + ptrs[i] = page_address(blocks[i]); + + raid6_datap_recov(disks, bytes, faila, ptrs); + + async_tx_sync_epilog(submit); + + return NULL; + } + + p = blocks[disks-2]; + q = blocks[disks-1]; + + /* Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = blocks[faila]; + blocks[faila] = (void *)raid6_empty_zero_page; + blocks[disks-1] = dq; + + /* in the 4 disk case we only need to perform a single source + * multiplication + */ + if (disks == 4) { + int good = faila == 0 ? 1 : 0; + struct page *g = blocks[good]; + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_memcpy(p, g, 0, 0, bytes, submit); + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); + } else { + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); + } + + /* Restore pointer table */ + blocks[faila] = dq; + blocks[disks-1] = q; + + /* calculate g^{-faila} */ + coef = raid6_gfinv[raid6_gfexp[faila]]; + + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_mult(dq, dq, coef, bytes, submit); + + srcs[0] = p; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(p, srcs, 0, 2, bytes, submit); + + return tx; +} +EXPORT_SYMBOL_GPL(async_raid6_datap_recov); + +MODULE_AUTHOR("Dan Williams "); +MODULE_DESCRIPTION("asynchronous RAID-6 recovery api"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index e6ce5f004f98..866e61c4e2e0 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -194,5 +194,13 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, size_t len, enum sum_check_flags *pqres, struct page *spare, struct async_submit_ctl *submit); +struct dma_async_tx_descriptor * +async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb, + struct page **ptrs, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_raid6_datap_recov(int src_num, size_t bytes, int faila, + struct page **ptrs, struct async_submit_ctl *submit); + void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ -- cgit v1.2.3 From e500011ffa191d662ac64d4ada6a5187b3180e16 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 30 Aug 2009 13:19:12 -0700 Subject: timers: Drop a function prototype Drop prototype for non-existent next_timer_interrupt() function. Signed-off-by: Randy Dunlap Cc: akpm LKML-Reference: <4A9ADEC0.70306@oracle.com> Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index be62ec2ebea5..a2d1eb6cb3f0 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -173,11 +173,6 @@ extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); */ #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) -/* - * Return when the next timer-wheel timeout occurs (in absolute jiffies), - * locks the timer base: - */ -extern unsigned long next_timer_interrupt(void); /* * Return when the next timer-wheel timeout occurs (in absolute jiffies), * locks the timer base and does the comparison against the given -- cgit v1.2.3 From f380ef86916904e4b79f7bec599deb51057b2d0c Mon Sep 17 00:00:00 2001 From: Maarten Maathuis Date: Wed, 19 Aug 2009 00:56:44 +0200 Subject: drm/crtc_helper: place drm_helper_encoder_in_use() in the header file - The symbol was already exported. Signed-off-by: Maarten Maathuis Signed-off-by: Dave Airlie --- include/drm/drm_crtc_helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index 6769ff6c1bc0..e44a4f87303c 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -98,6 +98,7 @@ extern bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb); extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc); +extern bool drm_helper_encoder_in_use(struct drm_encoder *encoder); extern void drm_helper_connector_dpms(struct drm_connector *connector, int mode); -- cgit v1.2.3 From 785b93ef8c309730c2de84ce9c229e40e2d01480 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 28 Aug 2009 15:46:53 +1000 Subject: drm/kms: move driver specific fb common code to helper functions (v2) Initially I always meant this code to be shared, but things ran away from me before I got to it. This refactors the i915 and radeon kms fbdev interaction layers out into generic helpers + driver specific pieces. It moves all the panic/sysrq enhancements to the core file, and stores a linked list of kernel fbs. This could possibly be improved to only store the fb which has fbcon on it for panics etc. radeon retains some specific codes used for a big endian workaround. changes: fix oops in v1 fix freeing path for crtc_info Reviewed-by: Jesse Barnes Signed-off-by: Dave Airlie --- drivers/gpu/drm/Makefile | 3 +- drivers/gpu/drm/drm_fb_helper.c | 697 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_dma.c | 3 +- drivers/gpu/drm/i915/intel_display.c | 12 - drivers/gpu/drm/i915/intel_drv.h | 3 - drivers/gpu/drm/i915/intel_fb.c | 737 ++------------------------------ drivers/gpu/drm/radeon/radeon_display.c | 5 +- drivers/gpu/drm/radeon/radeon_fb.c | 670 ++++------------------------- drivers/gpu/drm/radeon/radeon_mode.h | 2 - include/drm/drm_crtc.h | 2 + include/drm/drm_fb_helper.h | 82 ++++ 11 files changed, 907 insertions(+), 1309 deletions(-) create mode 100644 drivers/gpu/drm/drm_fb_helper.c create mode 100644 include/drm/drm_fb_helper.h (limited to 'include') diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 5f0aec4f082a..99071684de25 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -11,7 +11,8 @@ drm-y := drm_auth.o drm_bufs.o drm_cache.o \ drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \ drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o \ drm_crtc.o drm_crtc_helper.o drm_modes.o drm_edid.o \ - drm_info.o drm_debugfs.o drm_encoder_slave.o + drm_info.o drm_debugfs.o drm_encoder_slave.o \ + drm_fb_helper.o drm-$(CONFIG_COMPAT) += drm_ioc32.o diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c new file mode 100644 index 000000000000..d6ffea74a502 --- /dev/null +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -0,0 +1,697 @@ +/* + * Copyright (c) 2006-2009 Red Hat Inc. + * Copyright (c) 2006-2008 Intel Corporation + * Copyright (c) 2007 Dave Airlie + * + * DRM framebuffer helper functions + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + * + * Authors: + * Dave Airlie + * Jesse Barnes + */ +#include +#include +#include "drmP.h" +#include "drm_crtc.h" +#include "drm_fb_helper.h" +#include "drm_crtc_helper.h" + +static LIST_HEAD(kernel_fb_helper_list); + +bool drm_fb_helper_force_kernel_mode(void) +{ + int i = 0; + bool ret, error = false; + struct drm_fb_helper *helper; + + if (list_empty(&kernel_fb_helper_list)) + return false; + + list_for_each_entry(helper, &kernel_fb_helper_list, kernel_fb_list) { + for (i = 0; i < helper->crtc_count; i++) { + struct drm_mode_set *mode_set = &helper->crtc_info[i].mode_set; + ret = drm_crtc_helper_set_config(mode_set); + if (ret) + error = true; + } + } + return error; +} + +int drm_fb_helper_panic(struct notifier_block *n, unsigned long ununsed, + void *panic_str) +{ + DRM_ERROR("panic occurred, switching back to text console\n"); + return drm_fb_helper_force_kernel_mode(); + return 0; +} +EXPORT_SYMBOL(drm_fb_helper_panic); + +static struct notifier_block paniced = { + .notifier_call = drm_fb_helper_panic, +}; + +/** + * drm_fb_helper_restore - restore the framebuffer console (kernel) config + * + * Restore's the kernel's fbcon mode, used for lastclose & panic paths. + */ +void drm_fb_helper_restore(void) +{ + bool ret; + ret = drm_fb_helper_force_kernel_mode(); + if (ret == true) + DRM_ERROR("Failed to restore crtc configuration\n"); +} +EXPORT_SYMBOL(drm_fb_helper_restore); + +static void drm_fb_helper_restore_work_fn(struct work_struct *ignored) +{ + drm_fb_helper_restore(); +} +static DECLARE_WORK(drm_fb_helper_restore_work, drm_fb_helper_restore_work_fn); + +static void drm_fb_helper_sysrq(int dummy1, struct tty_struct *dummy3) +{ + schedule_work(&drm_fb_helper_restore_work); +} + +static struct sysrq_key_op sysrq_drm_fb_helper_restore_op = { + .handler = drm_fb_helper_sysrq, + .help_msg = "force-fb(V)", + .action_msg = "Restore framebuffer console", +}; + +static void drm_fb_helper_on(struct fb_info *info) +{ + struct drm_fb_helper *fb_helper = info->par; + struct drm_device *dev = fb_helper->dev; + struct drm_crtc *crtc; + struct drm_encoder *encoder; + int i; + + /* + * For each CRTC in this fb, turn the crtc on then, + * find all associated encoders and turn them on. + */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; + + for (i = 0; i < fb_helper->crtc_count; i++) { + if (crtc->base.id == fb_helper->crtc_info[i].crtc_id) + break; + } + + mutex_lock(&dev->mode_config.mutex); + crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON); + mutex_unlock(&dev->mode_config.mutex); + + /* Found a CRTC on this fb, now find encoders */ + list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { + if (encoder->crtc == crtc) { + struct drm_encoder_helper_funcs *encoder_funcs; + + encoder_funcs = encoder->helper_private; + mutex_lock(&dev->mode_config.mutex); + encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON); + mutex_unlock(&dev->mode_config.mutex); + } + } + } +} + +static void drm_fb_helper_off(struct fb_info *info, int dpms_mode) +{ + struct drm_fb_helper *fb_helper = info->par; + struct drm_device *dev = fb_helper->dev; + struct drm_crtc *crtc; + struct drm_encoder *encoder; + int i; + + /* + * For each CRTC in this fb, find all associated encoders + * and turn them off, then turn off the CRTC. + */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; + + for (i = 0; i < fb_helper->crtc_count; i++) { + if (crtc->base.id == fb_helper->crtc_info[i].crtc_id) + break; + } + + /* Found a CRTC on this fb, now find encoders */ + list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { + if (encoder->crtc == crtc) { + struct drm_encoder_helper_funcs *encoder_funcs; + + encoder_funcs = encoder->helper_private; + mutex_lock(&dev->mode_config.mutex); + encoder_funcs->dpms(encoder, dpms_mode); + mutex_unlock(&dev->mode_config.mutex); + } + } + if (dpms_mode == DRM_MODE_DPMS_OFF) { + mutex_lock(&dev->mode_config.mutex); + crtc_funcs->dpms(crtc, dpms_mode); + mutex_unlock(&dev->mode_config.mutex); + } + } +} + +int drm_fb_helper_blank(int blank, struct fb_info *info) +{ + switch (blank) { + case FB_BLANK_UNBLANK: + drm_fb_helper_on(info); + break; + case FB_BLANK_NORMAL: + drm_fb_helper_off(info, DRM_MODE_DPMS_STANDBY); + break; + case FB_BLANK_HSYNC_SUSPEND: + drm_fb_helper_off(info, DRM_MODE_DPMS_STANDBY); + break; + case FB_BLANK_VSYNC_SUSPEND: + drm_fb_helper_off(info, DRM_MODE_DPMS_SUSPEND); + break; + case FB_BLANK_POWERDOWN: + drm_fb_helper_off(info, DRM_MODE_DPMS_OFF); + break; + } + return 0; +} +EXPORT_SYMBOL(drm_fb_helper_blank); + +static void drm_fb_helper_crtc_free(struct drm_fb_helper *helper) +{ + int i; + + for (i = 0; i < helper->crtc_count; i++) + kfree(helper->crtc_info[i].mode_set.connectors); + kfree(helper->crtc_info); +} + +int drm_fb_helper_init_crtc_count(struct drm_fb_helper *helper, int crtc_count, int max_conn_count) +{ + struct drm_device *dev = helper->dev; + struct drm_crtc *crtc; + int ret = 0; + int i; + + helper->crtc_info = kcalloc(crtc_count, sizeof(struct drm_fb_helper_crtc), GFP_KERNEL); + if (!helper->crtc_info) + return -ENOMEM; + + helper->crtc_count = crtc_count; + + for (i = 0; i < crtc_count; i++) { + helper->crtc_info[i].mode_set.connectors = + kcalloc(max_conn_count, + sizeof(struct drm_connector *), + GFP_KERNEL); + + if (!helper->crtc_info[i].mode_set.connectors) { + ret = -ENOMEM; + goto out_free; + } + helper->crtc_info[i].mode_set.num_connectors = 0; + } + + i = 0; + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + helper->crtc_info[i].crtc_id = crtc->base.id; + helper->crtc_info[i].mode_set.crtc = crtc; + i++; + } + helper->conn_limit = max_conn_count; + return 0; +out_free: + drm_fb_helper_crtc_free(helper); + return -ENOMEM; +} +EXPORT_SYMBOL(drm_fb_helper_init_crtc_count); + +int drm_fb_helper_setcolreg(unsigned regno, + unsigned red, + unsigned green, + unsigned blue, + unsigned transp, + struct fb_info *info) +{ + struct drm_fb_helper *fb_helper = info->par; + struct drm_device *dev = fb_helper->dev; + struct drm_crtc *crtc; + int i; + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct drm_framebuffer *fb = fb_helper->fb; + + for (i = 0; i < fb_helper->crtc_count; i++) { + if (crtc->base.id == fb_helper->crtc_info[i].crtc_id) + break; + } + if (i == fb_helper->crtc_count) + continue; + + if (regno > 255) + return 1; + + if (fb->depth == 8) { + fb_helper->funcs->gamma_set(crtc, red, green, blue, regno); + return 0; + } + + if (regno < 16) { + switch (fb->depth) { + case 15: + fb->pseudo_palette[regno] = ((red & 0xf800) >> 1) | + ((green & 0xf800) >> 6) | + ((blue & 0xf800) >> 11); + break; + case 16: + fb->pseudo_palette[regno] = (red & 0xf800) | + ((green & 0xfc00) >> 5) | + ((blue & 0xf800) >> 11); + break; + case 24: + case 32: + fb->pseudo_palette[regno] = + (((red >> 8) & 0xff) << info->var.red.offset) | + (((green >> 8) & 0xff) << info->var.green.offset) | + (((blue >> 8) & 0xff) << info->var.blue.offset); + break; + } + } + } + return 0; +} +EXPORT_SYMBOL(drm_fb_helper_setcolreg); + +int drm_fb_helper_check_var(struct fb_var_screeninfo *var, + struct fb_info *info) +{ + struct drm_fb_helper *fb_helper = info->par; + struct drm_framebuffer *fb = fb_helper->fb; + int depth; + + if (var->pixclock == -1 || !var->pixclock) + return -EINVAL; + + /* Need to resize the fb object !!! */ + if (var->xres > fb->width || var->yres > fb->height) { + DRM_ERROR("Requested width/height is greater than current fb " + "object %dx%d > %dx%d\n", var->xres, var->yres, + fb->width, fb->height); + DRM_ERROR("Need resizing code.\n"); + return -EINVAL; + } + + switch (var->bits_per_pixel) { + case 16: + depth = (var->green.length == 6) ? 16 : 15; + break; + case 32: + depth = (var->transp.length > 0) ? 32 : 24; + break; + default: + depth = var->bits_per_pixel; + break; + } + + switch (depth) { + case 8: + var->red.offset = 0; + var->green.offset = 0; + var->blue.offset = 0; + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + var->transp.length = 0; + var->transp.offset = 0; + break; + case 15: + var->red.offset = 10; + var->green.offset = 5; + var->blue.offset = 0; + var->red.length = 5; + var->green.length = 5; + var->blue.length = 5; + var->transp.length = 1; + var->transp.offset = 15; + break; + case 16: + var->red.offset = 11; + var->green.offset = 5; + var->blue.offset = 0; + var->red.length = 5; + var->green.length = 6; + var->blue.length = 5; + var->transp.length = 0; + var->transp.offset = 0; + break; + case 24: + var->red.offset = 16; + var->green.offset = 8; + var->blue.offset = 0; + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + var->transp.length = 0; + var->transp.offset = 0; + break; + case 32: + var->red.offset = 16; + var->green.offset = 8; + var->blue.offset = 0; + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + var->transp.length = 8; + var->transp.offset = 24; + break; + default: + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL(drm_fb_helper_check_var); + +/* this will let fbcon do the mode init */ +int drm_fb_helper_set_par(struct fb_info *info) +{ + struct drm_fb_helper *fb_helper = info->par; + struct drm_device *dev = fb_helper->dev; + struct fb_var_screeninfo *var = &info->var; + struct drm_crtc *crtc; + int ret; + int i; + + if (var->pixclock != -1) { + DRM_ERROR("PIXEL CLCOK SET\n"); + return -EINVAL; + } + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + + for (i = 0; i < fb_helper->crtc_count; i++) { + if (crtc->base.id == fb_helper->crtc_info[i].crtc_id) + break; + } + if (i == fb_helper->crtc_count) + continue; + + if (crtc->fb == fb_helper->crtc_info[i].mode_set.fb) { + mutex_lock(&dev->mode_config.mutex); + ret = crtc->funcs->set_config(&fb_helper->crtc_info->mode_set); + mutex_unlock(&dev->mode_config.mutex); + if (ret) + return ret; + } + } + return 0; +} +EXPORT_SYMBOL(drm_fb_helper_set_par); + +int drm_fb_helper_pan_display(struct fb_var_screeninfo *var, + struct fb_info *info) +{ + struct drm_fb_helper *fb_helper = info->par; + struct drm_device *dev = fb_helper->dev; + struct drm_mode_set *modeset; + struct drm_crtc *crtc; + int ret = 0; + int i; + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + for (i = 0; i < fb_helper->crtc_count; i++) { + if (crtc->base.id == fb_helper->crtc_info[i].crtc_id) + break; + } + + if (i == fb_helper->crtc_count) + continue; + + modeset = &fb_helper->crtc_info[i].mode_set; + + modeset->x = var->xoffset; + modeset->y = var->yoffset; + + if (modeset->num_connectors) { + mutex_lock(&dev->mode_config.mutex); + ret = crtc->funcs->set_config(modeset); + mutex_unlock(&dev->mode_config.mutex); + if (!ret) { + info->var.xoffset = var->xoffset; + info->var.yoffset = var->yoffset; + } + } + } + return ret; +} +EXPORT_SYMBOL(drm_fb_helper_pan_display); + +int drm_fb_helper_single_fb_probe(struct drm_device *dev, + int (*fb_create)(struct drm_device *dev, + uint32_t fb_width, + uint32_t fb_height, + uint32_t surface_width, + uint32_t surface_height, + struct drm_framebuffer **fb_ptr)) +{ + struct drm_crtc *crtc; + struct drm_connector *connector; + unsigned int fb_width = (unsigned)-1, fb_height = (unsigned)-1; + unsigned int surface_width = 0, surface_height = 0; + int new_fb = 0; + int crtc_count = 0; + int ret, i, conn_count = 0; + struct fb_info *info; + struct drm_framebuffer *fb; + struct drm_mode_set *modeset = NULL; + struct drm_fb_helper *fb_helper; + + /* first up get a count of crtcs now in use and new min/maxes width/heights */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + if (drm_helper_crtc_in_use(crtc)) { + if (crtc->desired_mode) { + if (crtc->desired_mode->hdisplay < fb_width) + fb_width = crtc->desired_mode->hdisplay; + + if (crtc->desired_mode->vdisplay < fb_height) + fb_height = crtc->desired_mode->vdisplay; + + if (crtc->desired_mode->hdisplay > surface_width) + surface_width = crtc->desired_mode->hdisplay; + + if (crtc->desired_mode->vdisplay > surface_height) + surface_height = crtc->desired_mode->vdisplay; + } + crtc_count++; + } + } + + if (crtc_count == 0 || fb_width == -1 || fb_height == -1) { + /* hmm everyone went away - assume VGA cable just fell out + and will come back later. */ + return 0; + } + + /* do we have an fb already? */ + if (list_empty(&dev->mode_config.fb_kernel_list)) { + ret = (*fb_create)(dev, fb_width, fb_height, surface_width, + surface_height, &fb); + if (ret) + return -EINVAL; + new_fb = 1; + } else { + fb = list_first_entry(&dev->mode_config.fb_kernel_list, + struct drm_framebuffer, filp_head); + + /* if someone hotplugs something bigger than we have already allocated, we are pwned. + As really we can't resize an fbdev that is in the wild currently due to fbdev + not really being designed for the lower layers moving stuff around under it. + - so in the grand style of things - punt. */ + if ((fb->width < surface_width) || + (fb->height < surface_height)) { + DRM_ERROR("Framebuffer not large enough to scale console onto.\n"); + return -EINVAL; + } + } + + info = fb->fbdev; + fb_helper = info->par; + + crtc_count = 0; + /* okay we need to setup new connector sets in the crtcs */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + modeset = &fb_helper->crtc_info[crtc_count].mode_set; + modeset->fb = fb; + conn_count = 0; + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + if (connector->encoder) + if (connector->encoder->crtc == modeset->crtc) { + modeset->connectors[conn_count] = connector; + conn_count++; + if (conn_count > fb_helper->conn_limit) + BUG(); + } + } + + for (i = conn_count; i < fb_helper->conn_limit; i++) + modeset->connectors[i] = NULL; + + modeset->crtc = crtc; + crtc_count++; + + modeset->num_connectors = conn_count; + if (modeset->crtc->desired_mode) { + if (modeset->mode) + drm_mode_destroy(dev, modeset->mode); + modeset->mode = drm_mode_duplicate(dev, + modeset->crtc->desired_mode); + } + } + fb_helper->crtc_count = crtc_count; + fb_helper->fb = fb; + + if (new_fb) { + info->var.pixclock = -1; + if (register_framebuffer(info) < 0) + return -EINVAL; + } else { + drm_fb_helper_set_par(info); + } + printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node, + info->fix.id); + + /* Switch back to kernel console on panic */ + /* multi card linked list maybe */ + if (list_empty(&kernel_fb_helper_list)) { + printk(KERN_INFO "registered panic notifier\n"); + atomic_notifier_chain_register(&panic_notifier_list, + &paniced); + register_sysrq_key('v', &sysrq_drm_fb_helper_restore_op); + } + list_add(&fb_helper->kernel_fb_list, &kernel_fb_helper_list); + return 0; +} +EXPORT_SYMBOL(drm_fb_helper_single_fb_probe); + +void drm_fb_helper_free(struct drm_fb_helper *helper) +{ + list_del(&helper->kernel_fb_list); + if (list_empty(&kernel_fb_helper_list)) { + printk(KERN_INFO "unregistered panic notifier\n"); + atomic_notifier_chain_unregister(&panic_notifier_list, + &paniced); + unregister_sysrq_key('v', &sysrq_drm_fb_helper_restore_op); + } + drm_fb_helper_crtc_free(helper); +} +EXPORT_SYMBOL(drm_fb_helper_free); + +void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch) +{ + info->fix.type = FB_TYPE_PACKED_PIXELS; + info->fix.visual = FB_VISUAL_TRUECOLOR; + info->fix.type_aux = 0; + info->fix.xpanstep = 1; /* doing it in hw */ + info->fix.ypanstep = 1; /* doing it in hw */ + info->fix.ywrapstep = 0; + info->fix.accel = FB_ACCEL_I830; + info->fix.type_aux = 0; + + info->fix.line_length = pitch; + return; +} +EXPORT_SYMBOL(drm_fb_helper_fill_fix); + +void drm_fb_helper_fill_var(struct fb_info *info, struct drm_framebuffer *fb, + uint32_t fb_width, uint32_t fb_height) +{ + info->pseudo_palette = fb->pseudo_palette; + info->var.xres_virtual = fb->width; + info->var.yres_virtual = fb->height; + info->var.bits_per_pixel = fb->bits_per_pixel; + info->var.xoffset = 0; + info->var.yoffset = 0; + info->var.activate = FB_ACTIVATE_NOW; + info->var.height = -1; + info->var.width = -1; + + switch (fb->depth) { + case 8: + info->var.red.offset = 0; + info->var.green.offset = 0; + info->var.blue.offset = 0; + info->var.red.length = 8; /* 8bit DAC */ + info->var.green.length = 8; + info->var.blue.length = 8; + info->var.transp.offset = 0; + info->var.transp.length = 0; + break; + case 15: + info->var.red.offset = 10; + info->var.green.offset = 5; + info->var.blue.offset = 0; + info->var.red.length = 5; + info->var.green.length = 5; + info->var.blue.length = 5; + info->var.transp.offset = 15; + info->var.transp.length = 1; + break; + case 16: + info->var.red.offset = 11; + info->var.green.offset = 5; + info->var.blue.offset = 0; + info->var.red.length = 5; + info->var.green.length = 6; + info->var.blue.length = 5; + info->var.transp.offset = 0; + break; + case 24: + info->var.red.offset = 16; + info->var.green.offset = 8; + info->var.blue.offset = 0; + info->var.red.length = 8; + info->var.green.length = 8; + info->var.blue.length = 8; + info->var.transp.offset = 0; + info->var.transp.length = 0; + break; + case 32: + info->var.red.offset = 16; + info->var.green.offset = 8; + info->var.blue.offset = 0; + info->var.red.length = 8; + info->var.green.length = 8; + info->var.blue.length = 8; + info->var.transp.offset = 24; + info->var.transp.length = 8; + break; + default: + break; + } + + info->var.xres = fb_width; + info->var.yres = fb_height; +} +EXPORT_SYMBOL(drm_fb_helper_fill_var); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 544d889b9b16..c628c3671394 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -29,6 +29,7 @@ #include "drmP.h" #include "drm.h" #include "drm_crtc_helper.h" +#include "drm_fb_helper.h" #include "intel_drv.h" #include "i915_drm.h" #include "i915_drv.h" @@ -1347,7 +1348,7 @@ void i915_driver_lastclose(struct drm_device * dev) drm_i915_private_t *dev_priv = dev->dev_private; if (!dev_priv || drm_core_check_feature(dev, DRIVER_MODESET)) { - intelfb_restore(); + drm_fb_helper_restore(); return; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d6fce2133413..5fb7a4f4a427 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3060,8 +3060,6 @@ static void intel_crtc_destroy(struct drm_crtc *crtc) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - if (intel_crtc->mode_set.mode) - drm_mode_destroy(crtc->dev, intel_crtc->mode_set.mode); drm_crtc_cleanup(crtc); kfree(intel_crtc); } @@ -3107,16 +3105,6 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) intel_crtc->cursor_addr = 0; intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF; drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); - - intel_crtc->mode_set.crtc = &intel_crtc->base; - intel_crtc->mode_set.connectors = (struct drm_connector **)(intel_crtc + 1); - intel_crtc->mode_set.num_connectors = 0; - - if (i915_fbpercrtc) { - - - - } } int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index d6f92ea1b553..38910f8f30ed 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -96,9 +96,6 @@ struct intel_crtc { uint32_t cursor_addr; u8 lut_r[256], lut_g[256], lut_b[256]; int dpms_mode; - struct intel_framebuffer *fbdev_fb; - /* a mode_set for fbdev users on this crtc */ - struct drm_mode_set mode_set; }; #define to_intel_crtc(x) container_of(x, struct intel_crtc, base) diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index 1d30802e773e..3041530c3673 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -39,339 +39,34 @@ #include "drmP.h" #include "drm.h" #include "drm_crtc.h" +#include "drm_fb_helper.h" #include "intel_drv.h" #include "i915_drm.h" #include "i915_drv.h" struct intelfb_par { - struct drm_device *dev; - struct drm_display_mode *our_mode; + struct drm_fb_helper helper; struct intel_framebuffer *intel_fb; - int crtc_count; - /* crtc currently bound to this */ - uint32_t crtc_ids[2]; + struct drm_display_mode *our_mode; }; -static int intelfb_setcolreg(unsigned regno, unsigned red, unsigned green, - unsigned blue, unsigned transp, - struct fb_info *info) -{ - struct intelfb_par *par = info->par; - struct drm_device *dev = par->dev; - struct drm_crtc *crtc; - int i; - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_mode_set *modeset = &intel_crtc->mode_set; - struct drm_framebuffer *fb = modeset->fb; - - for (i = 0; i < par->crtc_count; i++) - if (crtc->base.id == par->crtc_ids[i]) - break; - - if (i == par->crtc_count) - continue; - - - if (regno > 255) - return 1; - - if (fb->depth == 8) { - intel_crtc_fb_gamma_set(crtc, red, green, blue, regno); - return 0; - } - - if (regno < 16) { - switch (fb->depth) { - case 15: - fb->pseudo_palette[regno] = ((red & 0xf800) >> 1) | - ((green & 0xf800) >> 6) | - ((blue & 0xf800) >> 11); - break; - case 16: - fb->pseudo_palette[regno] = (red & 0xf800) | - ((green & 0xfc00) >> 5) | - ((blue & 0xf800) >> 11); - break; - case 24: - case 32: - fb->pseudo_palette[regno] = ((red & 0xff00) << 8) | - (green & 0xff00) | - ((blue & 0xff00) >> 8); - break; - } - } - } - return 0; -} - -static int intelfb_check_var(struct fb_var_screeninfo *var, - struct fb_info *info) -{ - struct intelfb_par *par = info->par; - struct intel_framebuffer *intel_fb = par->intel_fb; - struct drm_framebuffer *fb = &intel_fb->base; - int depth; - - if (var->pixclock == -1 || !var->pixclock) - return -EINVAL; - - /* Need to resize the fb object !!! */ - if (var->xres > fb->width || var->yres > fb->height) { - DRM_ERROR("Requested width/height is greater than current fb object %dx%d > %dx%d\n",var->xres,var->yres,fb->width,fb->height); - DRM_ERROR("Need resizing code.\n"); - return -EINVAL; - } - - switch (var->bits_per_pixel) { - case 16: - depth = (var->green.length == 6) ? 16 : 15; - break; - case 32: - depth = (var->transp.length > 0) ? 32 : 24; - break; - default: - depth = var->bits_per_pixel; - break; - } - - switch (depth) { - case 8: - var->red.offset = 0; - var->green.offset = 0; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 15: - var->red.offset = 10; - var->green.offset = 5; - var->blue.offset = 0; - var->red.length = 5; - var->green.length = 5; - var->blue.length = 5; - var->transp.length = 1; - var->transp.offset = 15; - break; - case 16: - var->red.offset = 11; - var->green.offset = 5; - var->blue.offset = 0; - var->red.length = 5; - var->green.length = 6; - var->blue.length = 5; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 24: - var->red.offset = 16; - var->green.offset = 8; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 32: - var->red.offset = 16; - var->green.offset = 8; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 8; - var->transp.offset = 24; - break; - default: - return -EINVAL; - } - - return 0; -} - -/* this will let fbcon do the mode init */ -/* FIXME: take mode config lock? */ -static int intelfb_set_par(struct fb_info *info) -{ - struct intelfb_par *par = info->par; - struct drm_device *dev = par->dev; - struct fb_var_screeninfo *var = &info->var; - int i; - - DRM_DEBUG("%d %d\n", var->xres, var->pixclock); - - if (var->pixclock != -1) { - - DRM_ERROR("PIXEL CLOCK SET\n"); - return -EINVAL; - } else { - struct drm_crtc *crtc; - int ret; - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - - for (i = 0; i < par->crtc_count; i++) - if (crtc->base.id == par->crtc_ids[i]) - break; - - if (i == par->crtc_count) - continue; - - if (crtc->fb == intel_crtc->mode_set.fb) { - mutex_lock(&dev->mode_config.mutex); - ret = crtc->funcs->set_config(&intel_crtc->mode_set); - mutex_unlock(&dev->mode_config.mutex); - if (ret) - return ret; - } - } - return 0; - } -} - -static int intelfb_pan_display(struct fb_var_screeninfo *var, - struct fb_info *info) -{ - struct intelfb_par *par = info->par; - struct drm_device *dev = par->dev; - struct drm_mode_set *modeset; - struct drm_crtc *crtc; - struct intel_crtc *intel_crtc; - int ret = 0; - int i; - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - for (i = 0; i < par->crtc_count; i++) - if (crtc->base.id == par->crtc_ids[i]) - break; - - if (i == par->crtc_count) - continue; - - intel_crtc = to_intel_crtc(crtc); - modeset = &intel_crtc->mode_set; - - modeset->x = var->xoffset; - modeset->y = var->yoffset; - - if (modeset->num_connectors) { - mutex_lock(&dev->mode_config.mutex); - ret = crtc->funcs->set_config(modeset); - mutex_unlock(&dev->mode_config.mutex); - if (!ret) { - info->var.xoffset = var->xoffset; - info->var.yoffset = var->yoffset; - } - } - } - - return ret; -} - -static void intelfb_on(struct fb_info *info) -{ - struct intelfb_par *par = info->par; - struct drm_device *dev = par->dev; - struct drm_crtc *crtc; - struct drm_encoder *encoder; - int i; - - /* - * For each CRTC in this fb, find all associated encoders - * and turn them off, then turn off the CRTC. - */ - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; - - for (i = 0; i < par->crtc_count; i++) - if (crtc->base.id == par->crtc_ids[i]) - break; - - crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON); - - /* Found a CRTC on this fb, now find encoders */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { - if (encoder->crtc == crtc) { - struct drm_encoder_helper_funcs *encoder_funcs; - encoder_funcs = encoder->helper_private; - encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON); - } - } - } -} - -static void intelfb_off(struct fb_info *info, int dpms_mode) -{ - struct intelfb_par *par = info->par; - struct drm_device *dev = par->dev; - struct drm_crtc *crtc; - struct drm_encoder *encoder; - int i; - - /* - * For each CRTC in this fb, find all associated encoders - * and turn them off, then turn off the CRTC. - */ - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; - - for (i = 0; i < par->crtc_count; i++) - if (crtc->base.id == par->crtc_ids[i]) - break; - - /* Found a CRTC on this fb, now find encoders */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { - if (encoder->crtc == crtc) { - struct drm_encoder_helper_funcs *encoder_funcs; - encoder_funcs = encoder->helper_private; - encoder_funcs->dpms(encoder, dpms_mode); - } - } - if (dpms_mode == DRM_MODE_DPMS_OFF) - crtc_funcs->dpms(crtc, dpms_mode); - } -} - -static int intelfb_blank(int blank, struct fb_info *info) -{ - switch (blank) { - case FB_BLANK_UNBLANK: - intelfb_on(info); - break; - case FB_BLANK_NORMAL: - intelfb_off(info, DRM_MODE_DPMS_STANDBY); - break; - case FB_BLANK_HSYNC_SUSPEND: - intelfb_off(info, DRM_MODE_DPMS_STANDBY); - break; - case FB_BLANK_VSYNC_SUSPEND: - intelfb_off(info, DRM_MODE_DPMS_SUSPEND); - break; - case FB_BLANK_POWERDOWN: - intelfb_off(info, DRM_MODE_DPMS_OFF); - break; - } - return 0; -} - static struct fb_ops intelfb_ops = { .owner = THIS_MODULE, - .fb_check_var = intelfb_check_var, - .fb_set_par = intelfb_set_par, - .fb_setcolreg = intelfb_setcolreg, + .fb_check_var = drm_fb_helper_check_var, + .fb_set_par = drm_fb_helper_set_par, + .fb_setcolreg = drm_fb_helper_setcolreg, .fb_fillrect = cfb_fillrect, .fb_copyarea = cfb_copyarea, .fb_imageblit = cfb_imageblit, - .fb_pan_display = intelfb_pan_display, - .fb_blank = intelfb_blank, + .fb_pan_display = drm_fb_helper_pan_display, + .fb_blank = drm_fb_helper_blank, }; +static struct drm_fb_helper_funcs intel_fb_helper_funcs = { + .gamma_set = intel_crtc_fb_gamma_set, +}; + + /** * Curretly it is assumed that the old framebuffer is reused. * @@ -412,25 +107,10 @@ int intelfb_resize(struct drm_device *dev, struct drm_crtc *crtc) } EXPORT_SYMBOL(intelfb_resize); -static struct drm_mode_set kernelfb_mode; - -static int intelfb_panic(struct notifier_block *n, unsigned long ununsed, - void *panic_str) -{ - DRM_ERROR("panic occurred, switching back to text console\n"); - - intelfb_restore(); - return 0; -} - -static struct notifier_block paniced = { - .notifier_call = intelfb_panic, -}; - static int intelfb_create(struct drm_device *dev, uint32_t fb_width, uint32_t fb_height, uint32_t surface_width, uint32_t surface_height, - struct intel_framebuffer **intel_fb_p) + struct drm_framebuffer **fb_p) { struct fb_info *info; struct intelfb_par *par; @@ -479,7 +159,7 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width, list_add(&fb->filp_head, &dev->mode_config.fb_kernel_list); intel_fb = to_intel_framebuffer(fb); - *intel_fb_p = intel_fb; + *fb_p = fb; info = framebuffer_alloc(sizeof(struct intelfb_par), device); if (!info) { @@ -489,21 +169,19 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width, par = info->par; + par->helper.funcs = &intel_fb_helper_funcs; + par->helper.dev = dev; + ret = drm_fb_helper_init_crtc_count(&par->helper, 2, + INTELFB_CONN_LIMIT); + if (ret) + goto out_unref; + strcpy(info->fix.id, "inteldrmfb"); - info->fix.type = FB_TYPE_PACKED_PIXELS; - info->fix.visual = FB_VISUAL_TRUECOLOR; - info->fix.type_aux = 0; - info->fix.xpanstep = 1; /* doing it in hw */ - info->fix.ypanstep = 1; /* doing it in hw */ - info->fix.ywrapstep = 0; - info->fix.accel = FB_ACCEL_I830; - info->fix.type_aux = 0; info->flags = FBINFO_DEFAULT; info->fbops = &intelfb_ops; - info->fix.line_length = fb->pitch; /* setup aperture base/size for vesafb takeover */ info->aperture_base = dev->mode_config.fb_base; @@ -527,18 +205,8 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width, // memset(info->screen_base, 0, size); - info->pseudo_palette = fb->pseudo_palette; - info->var.xres_virtual = fb->width; - info->var.yres_virtual = fb->height; - info->var.bits_per_pixel = fb->bits_per_pixel; - info->var.xoffset = 0; - info->var.yoffset = 0; - info->var.activate = FB_ACTIVATE_NOW; - info->var.height = -1; - info->var.width = -1; - - info->var.xres = fb_width; - info->var.yres = fb_height; + drm_fb_helper_fill_fix(info, fb->depth); + drm_fb_helper_fill_var(info, fb, fb_width, fb_height); /* FIXME: we really shouldn't expose mmio space at all */ info->fix.mmio_start = pci_resource_start(dev->pdev, mmio_bar); @@ -550,64 +218,9 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width, info->pixmap.flags = FB_PIXMAP_SYSTEM; info->pixmap.scan_align = 1; - switch(fb->depth) { - case 8: - info->var.red.offset = 0; - info->var.green.offset = 0; - info->var.blue.offset = 0; - info->var.red.length = 8; /* 8bit DAC */ - info->var.green.length = 8; - info->var.blue.length = 8; - info->var.transp.offset = 0; - info->var.transp.length = 0; - break; - case 15: - info->var.red.offset = 10; - info->var.green.offset = 5; - info->var.blue.offset = 0; - info->var.red.length = 5; - info->var.green.length = 5; - info->var.blue.length = 5; - info->var.transp.offset = 15; - info->var.transp.length = 1; - break; - case 16: - info->var.red.offset = 11; - info->var.green.offset = 5; - info->var.blue.offset = 0; - info->var.red.length = 5; - info->var.green.length = 6; - info->var.blue.length = 5; - info->var.transp.offset = 0; - break; - case 24: - info->var.red.offset = 16; - info->var.green.offset = 8; - info->var.blue.offset = 0; - info->var.red.length = 8; - info->var.green.length = 8; - info->var.blue.length = 8; - info->var.transp.offset = 0; - info->var.transp.length = 0; - break; - case 32: - info->var.red.offset = 16; - info->var.green.offset = 8; - info->var.blue.offset = 0; - info->var.red.length = 8; - info->var.green.length = 8; - info->var.blue.length = 8; - info->var.transp.offset = 24; - info->var.transp.length = 8; - break; - default: - break; - } - fb->fbdev = info; par->intel_fb = intel_fb; - par->dev = dev; /* To allow resizeing without swapping buffers */ DRM_DEBUG("allocated %dx%d fb: 0x%08x, bo %p\n", intel_fb->base.width, @@ -625,307 +238,12 @@ out: return ret; } -static int intelfb_multi_fb_probe_crtc(struct drm_device *dev, struct drm_crtc *crtc) -{ - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_framebuffer *intel_fb; - struct drm_framebuffer *fb; - struct drm_connector *connector; - struct fb_info *info; - struct intelfb_par *par; - struct drm_mode_set *modeset; - unsigned int width, height; - int new_fb = 0; - int ret, i, conn_count; - - if (!drm_helper_crtc_in_use(crtc)) - return 0; - - if (!crtc->desired_mode) - return 0; - - width = crtc->desired_mode->hdisplay; - height = crtc->desired_mode->vdisplay; - - /* is there an fb bound to this crtc already */ - if (!intel_crtc->mode_set.fb) { - ret = intelfb_create(dev, width, height, width, height, &intel_fb); - if (ret) - return -EINVAL; - new_fb = 1; - } else { - fb = intel_crtc->mode_set.fb; - intel_fb = to_intel_framebuffer(fb); - if ((intel_fb->base.width < width) || (intel_fb->base.height < height)) - return -EINVAL; - } - - info = intel_fb->base.fbdev; - par = info->par; - - modeset = &intel_crtc->mode_set; - modeset->fb = &intel_fb->base; - conn_count = 0; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - if (connector->encoder) - if (connector->encoder->crtc == modeset->crtc) { - modeset->connectors[conn_count] = connector; - conn_count++; - if (conn_count > INTELFB_CONN_LIMIT) - BUG(); - } - } - - for (i = conn_count; i < INTELFB_CONN_LIMIT; i++) - modeset->connectors[i] = NULL; - - par->crtc_ids[0] = crtc->base.id; - - modeset->num_connectors = conn_count; - if (modeset->crtc->desired_mode) { - if (modeset->mode) - drm_mode_destroy(dev, modeset->mode); - modeset->mode = drm_mode_duplicate(dev, - modeset->crtc->desired_mode); - } - - par->crtc_count = 1; - - if (new_fb) { - info->var.pixclock = -1; - if (register_framebuffer(info) < 0) - return -EINVAL; - } else - intelfb_set_par(info); - - DRM_INFO("fb%d: %s frame buffer device\n", info->node, - info->fix.id); - - /* Switch back to kernel console on panic */ - kernelfb_mode = *modeset; - atomic_notifier_chain_register(&panic_notifier_list, &paniced); - DRM_DEBUG("registered panic notifier\n"); - - return 0; -} - -static int intelfb_multi_fb_probe(struct drm_device *dev) -{ - - struct drm_crtc *crtc; - int ret = 0; - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - ret = intelfb_multi_fb_probe_crtc(dev, crtc); - if (ret) - return ret; - } - return ret; -} - -static int intelfb_single_fb_probe(struct drm_device *dev) -{ - struct drm_crtc *crtc; - struct drm_connector *connector; - unsigned int fb_width = (unsigned)-1, fb_height = (unsigned)-1; - unsigned int surface_width = 0, surface_height = 0; - int new_fb = 0; - int crtc_count = 0; - int ret, i, conn_count = 0; - struct intel_framebuffer *intel_fb; - struct fb_info *info; - struct intelfb_par *par; - struct drm_mode_set *modeset = NULL; - - DRM_DEBUG("\n"); - - /* Get a count of crtcs now in use and new min/maxes width/heights */ - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - if (!drm_helper_crtc_in_use(crtc)) - continue; - - crtc_count++; - if (!crtc->desired_mode) - continue; - - /* Smallest mode determines console size... */ - if (crtc->desired_mode->hdisplay < fb_width) - fb_width = crtc->desired_mode->hdisplay; - - if (crtc->desired_mode->vdisplay < fb_height) - fb_height = crtc->desired_mode->vdisplay; - - /* ... but largest for memory allocation dimensions */ - if (crtc->desired_mode->hdisplay > surface_width) - surface_width = crtc->desired_mode->hdisplay; - - if (crtc->desired_mode->vdisplay > surface_height) - surface_height = crtc->desired_mode->vdisplay; - } - - if (crtc_count == 0 || fb_width == -1 || fb_height == -1) { - /* hmm everyone went away - assume VGA cable just fell out - and will come back later. */ - DRM_DEBUG("no CRTCs available?\n"); - return 0; - } - -//fail - /* Find the fb for our new config */ - if (list_empty(&dev->mode_config.fb_kernel_list)) { - DRM_DEBUG("creating new fb (console size %dx%d, " - "buffer size %dx%d)\n", fb_width, fb_height, - surface_width, surface_height); - ret = intelfb_create(dev, fb_width, fb_height, surface_width, - surface_height, &intel_fb); - if (ret) - return -EINVAL; - new_fb = 1; - } else { - struct drm_framebuffer *fb; - - fb = list_first_entry(&dev->mode_config.fb_kernel_list, - struct drm_framebuffer, filp_head); - intel_fb = to_intel_framebuffer(fb); - - /* if someone hotplugs something bigger than we have already - * allocated, we are pwned. As really we can't resize an - * fbdev that is in the wild currently due to fbdev not really - * being designed for the lower layers moving stuff around - * under it. - * - so in the grand style of things - punt. - */ - if ((fb->width < surface_width) || - (fb->height < surface_height)) { - DRM_ERROR("fb not large enough for console\n"); - return -EINVAL; - } - } -// fail - - info = intel_fb->base.fbdev; - par = info->par; - - crtc_count = 0; - /* - * For each CRTC, set up the connector list for the CRTC's mode - * set configuration. - */ - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - - modeset = &intel_crtc->mode_set; - modeset->fb = &intel_fb->base; - conn_count = 0; - list_for_each_entry(connector, &dev->mode_config.connector_list, - head) { - if (!connector->encoder) - continue; - - if(connector->encoder->crtc == modeset->crtc) { - modeset->connectors[conn_count++] = connector; - if (conn_count > INTELFB_CONN_LIMIT) - BUG(); - } - } - - /* Zero out remaining connector pointers */ - for (i = conn_count; i < INTELFB_CONN_LIMIT; i++) - modeset->connectors[i] = NULL; - - par->crtc_ids[crtc_count++] = crtc->base.id; - - modeset->num_connectors = conn_count; - if (modeset->crtc->desired_mode) { - if (modeset->mode) - drm_mode_destroy(dev, modeset->mode); - modeset->mode = drm_mode_duplicate(dev, - modeset->crtc->desired_mode); - } - } - par->crtc_count = crtc_count; - - if (new_fb) { - info->var.pixclock = -1; - if (register_framebuffer(info) < 0) - return -EINVAL; - } else - intelfb_set_par(info); - - DRM_INFO("fb%d: %s frame buffer device\n", info->node, - info->fix.id); - - /* Switch back to kernel console on panic */ - kernelfb_mode = *modeset; - atomic_notifier_chain_register(&panic_notifier_list, &paniced); - DRM_DEBUG("registered panic notifier\n"); - - return 0; -} - -/** - * intelfb_restore - restore the framebuffer console (kernel) config - * - * Restore's the kernel's fbcon mode, used for lastclose & panic paths. - */ -void intelfb_restore(void) -{ - int ret; - if ((ret = drm_crtc_helper_set_config(&kernelfb_mode)) != 0) { - DRM_ERROR("Failed to restore crtc configuration: %d\n", - ret); - } -} - -static void intelfb_restore_work_fn(struct work_struct *ignored) -{ - intelfb_restore(); -} -static DECLARE_WORK(intelfb_restore_work, intelfb_restore_work_fn); - -static void intelfb_sysrq(int dummy1, struct tty_struct *dummy3) -{ - schedule_work(&intelfb_restore_work); -} - -static struct sysrq_key_op sysrq_intelfb_restore_op = { - .handler = intelfb_sysrq, - .help_msg = "force-fb(V)", - .action_msg = "Restore framebuffer console", -}; - int intelfb_probe(struct drm_device *dev) { int ret; DRM_DEBUG("\n"); - - /* something has changed in the lower levels of hell - deal with it - here */ - - /* two modes : a) 1 fb to rule all crtcs. - b) one fb per crtc. - two actions 1) new connected device - 2) device removed. - case a/1 : if the fb surface isn't big enough - resize the surface fb. - if the fb size isn't big enough - resize fb into surface. - if everything big enough configure the new crtc/etc. - case a/2 : undo the configuration - possibly resize down the fb to fit the new configuration. - case b/1 : see if it is on a new crtc - setup a new fb and add it. - case b/2 : teardown the new fb. - */ - - /* mode a first */ - /* search for an fb */ - if (i915_fbpercrtc == 1) { - ret = intelfb_multi_fb_probe(dev); - } else { - ret = intelfb_single_fb_probe(dev); - } - - register_sysrq_key('v', &sysrq_intelfb_restore_op); - + ret = drm_fb_helper_single_fb_probe(dev, intelfb_create); return ret; } EXPORT_SYMBOL(intelfb_probe); @@ -940,13 +258,14 @@ int intelfb_remove(struct drm_device *dev, struct drm_framebuffer *fb) info = fb->fbdev; if (info) { + struct intelfb_par *par = info->par; unregister_framebuffer(info); iounmap(info->screen_base); + if (info->par) + drm_fb_helper_free(&par->helper); framebuffer_release(info); } - atomic_notifier_chain_unregister(&panic_notifier_list, &paniced); - memset(&kernelfb_mode, 0, sizeof(struct drm_mode_set)); return 0; } EXPORT_SYMBOL(intelfb_remove); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index a8fa1bb84cf7..af035605d147 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -158,9 +158,6 @@ static void radeon_crtc_destroy(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - if (radeon_crtc->mode_set.mode) { - drm_mode_destroy(crtc->dev, radeon_crtc->mode_set.mode); - } drm_crtc_cleanup(crtc); kfree(radeon_crtc); } @@ -189,9 +186,11 @@ static void radeon_crtc_init(struct drm_device *dev, int index) radeon_crtc->crtc_id = index; rdev->mode_info.crtcs[index] = radeon_crtc; +#if 0 radeon_crtc->mode_set.crtc = &radeon_crtc->base; radeon_crtc->mode_set.connectors = (struct drm_connector **)(radeon_crtc + 1); radeon_crtc->mode_set.num_connectors = 0; +#endif for (i = 0; i < 256; i++) { radeon_crtc->lut_r[i] = i << 2; diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index ec383edf5f38..ebb58959f418 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -28,15 +28,7 @@ */ #include -#include -#include -#include -#include -#include -#include -#include #include -#include #include "drmP.h" #include "drm.h" @@ -45,375 +37,86 @@ #include "radeon_drm.h" #include "radeon.h" +#include "drm_fb_helper.h" + struct radeon_fb_device { - struct radeon_device *rdev; - struct drm_display_mode *mode; + struct drm_fb_helper helper; struct radeon_framebuffer *rfb; - int crtc_count; - /* crtc currently bound to this */ - uint32_t crtc_ids[2]; + struct radeon_device *rdev; }; -static int radeonfb_setcolreg(unsigned regno, - unsigned red, - unsigned green, - unsigned blue, - unsigned transp, - struct fb_info *info) -{ - struct radeon_fb_device *rfbdev = info->par; - struct drm_device *dev = rfbdev->rdev->ddev; - struct drm_crtc *crtc; - int i; - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - struct drm_mode_set *modeset = &radeon_crtc->mode_set; - struct drm_framebuffer *fb = modeset->fb; - - for (i = 0; i < rfbdev->crtc_count; i++) { - if (crtc->base.id == rfbdev->crtc_ids[i]) { - break; - } - } - if (i == rfbdev->crtc_count) { - continue; - } - if (regno > 255) { - return 1; - } - if (fb->depth == 8) { - radeon_crtc_fb_gamma_set(crtc, red, green, blue, regno); - return 0; - } - - if (regno < 16) { - switch (fb->depth) { - case 15: - fb->pseudo_palette[regno] = ((red & 0xf800) >> 1) | - ((green & 0xf800) >> 6) | - ((blue & 0xf800) >> 11); - break; - case 16: - fb->pseudo_palette[regno] = (red & 0xf800) | - ((green & 0xfc00) >> 5) | - ((blue & 0xf800) >> 11); - break; - case 24: - case 32: - fb->pseudo_palette[regno] = - (((red >> 8) & 0xff) << info->var.red.offset) | - (((green >> 8) & 0xff) << info->var.green.offset) | - (((blue >> 8) & 0xff) << info->var.blue.offset); - break; - } - } - } - return 0; -} - -static int radeonfb_check_var(struct fb_var_screeninfo *var, - struct fb_info *info) +static int radeon_fb_check_var(struct fb_var_screeninfo *var, + struct fb_info *info) { - struct radeon_fb_device *rfbdev = info->par; - struct radeon_framebuffer *rfb = rfbdev->rfb; - struct drm_framebuffer *fb = &rfb->base; - int depth; - - if (var->pixclock == -1 || !var->pixclock) { - return -EINVAL; - } - /* Need to resize the fb object !!! */ - if (var->xres > fb->width || var->yres > fb->height) { - DRM_ERROR("Requested width/height is greater than current fb " - "object %dx%d > %dx%d\n", var->xres, var->yres, - fb->width, fb->height); - DRM_ERROR("Need resizing code.\n"); - return -EINVAL; - } - - switch (var->bits_per_pixel) { - case 16: - depth = (var->green.length == 6) ? 16 : 15; - break; - case 32: - depth = (var->transp.length > 0) ? 32 : 24; - break; - default: - depth = var->bits_per_pixel; - break; - } - - switch (depth) { - case 8: - var->red.offset = 0; - var->green.offset = 0; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; - var->transp.offset = 0; - break; -#ifdef __LITTLE_ENDIAN - case 15: - var->red.offset = 10; - var->green.offset = 5; - var->blue.offset = 0; - var->red.length = 5; - var->green.length = 5; - var->blue.length = 5; - var->transp.length = 1; - var->transp.offset = 15; - break; - case 16: - var->red.offset = 11; - var->green.offset = 5; - var->blue.offset = 0; - var->red.length = 5; - var->green.length = 6; - var->blue.length = 5; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 24: - var->red.offset = 16; - var->green.offset = 8; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 32: - var->red.offset = 16; - var->green.offset = 8; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 8; - var->transp.offset = 24; - break; -#else - case 24: - var->red.offset = 8; - var->green.offset = 16; - var->blue.offset = 24; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 32: - var->red.offset = 8; - var->green.offset = 16; - var->blue.offset = 24; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 8; - var->transp.offset = 0; - break; -#endif - default: - return -EINVAL; - } - return 0; -} - -/* this will let fbcon do the mode init */ -static int radeonfb_set_par(struct fb_info *info) -{ - struct radeon_fb_device *rfbdev = info->par; - struct drm_device *dev = rfbdev->rdev->ddev; - struct fb_var_screeninfo *var = &info->var; - struct drm_crtc *crtc; int ret; - int i; - - if (var->pixclock != -1) { - DRM_ERROR("PIXEL CLCOK SET\n"); - return -EINVAL; - } - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - - for (i = 0; i < rfbdev->crtc_count; i++) { - if (crtc->base.id == rfbdev->crtc_ids[i]) { - break; - } - } - if (i == rfbdev->crtc_count) { - continue; - } - if (crtc->fb == radeon_crtc->mode_set.fb) { - mutex_lock(&dev->mode_config.mutex); - ret = crtc->funcs->set_config(&radeon_crtc->mode_set); - mutex_unlock(&dev->mode_config.mutex); - if (ret) { - return ret; - } - } - } - return 0; -} - -static int radeonfb_pan_display(struct fb_var_screeninfo *var, - struct fb_info *info) -{ - struct radeon_fb_device *rfbdev = info->par; - struct drm_device *dev = rfbdev->rdev->ddev; - struct drm_mode_set *modeset; - struct drm_crtc *crtc; - struct radeon_crtc *radeon_crtc; - int ret = 0; - int i; - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - for (i = 0; i < rfbdev->crtc_count; i++) { - if (crtc->base.id == rfbdev->crtc_ids[i]) { - break; - } - } - - if (i == rfbdev->crtc_count) { - continue; - } - - radeon_crtc = to_radeon_crtc(crtc); - modeset = &radeon_crtc->mode_set; - - modeset->x = var->xoffset; - modeset->y = var->yoffset; - - if (modeset->num_connectors) { - mutex_lock(&dev->mode_config.mutex); - ret = crtc->funcs->set_config(modeset); - mutex_unlock(&dev->mode_config.mutex); - if (!ret) { - info->var.xoffset = var->xoffset; - info->var.yoffset = var->yoffset; - } + ret = drm_fb_helper_check_var(var, info); + if (ret) + return ret; + + /* big endian override for radeon endian workaround */ +#ifdef __BIG_ENDIAN + { + int depth; + switch (var->bits_per_pixel) { + case 16: + depth = (var->green.length == 6) ? 16 : 15; + break; + case 32: + depth = (var->transp.length > 0) ? 32 : 24; + break; + default: + depth = var->bits_per_pixel; + break; } - } - return ret; -} - -static void radeonfb_on(struct fb_info *info) -{ - struct radeon_fb_device *rfbdev = info->par; - struct drm_device *dev = rfbdev->rdev->ddev; - struct drm_crtc *crtc; - struct drm_encoder *encoder; - int i; - - /* - * For each CRTC in this fb, find all associated encoders - * and turn them off, then turn off the CRTC. - */ - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; - - for (i = 0; i < rfbdev->crtc_count; i++) { - if (crtc->base.id == rfbdev->crtc_ids[i]) { - break; - } - } - - mutex_lock(&dev->mode_config.mutex); - crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON); - mutex_unlock(&dev->mode_config.mutex); - - /* Found a CRTC on this fb, now find encoders */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { - if (encoder->crtc == crtc) { - struct drm_encoder_helper_funcs *encoder_funcs; - - encoder_funcs = encoder->helper_private; - mutex_lock(&dev->mode_config.mutex); - encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON); - mutex_unlock(&dev->mode_config.mutex); - } - } - } -} - -static void radeonfb_off(struct fb_info *info, int dpms_mode) -{ - struct radeon_fb_device *rfbdev = info->par; - struct drm_device *dev = rfbdev->rdev->ddev; - struct drm_crtc *crtc; - struct drm_encoder *encoder; - int i; - - /* - * For each CRTC in this fb, find all associated encoders - * and turn them off, then turn off the CRTC. - */ - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; - - for (i = 0; i < rfbdev->crtc_count; i++) { - if (crtc->base.id == rfbdev->crtc_ids[i]) { - break; - } - } - - /* Found a CRTC on this fb, now find encoders */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { - if (encoder->crtc == crtc) { - struct drm_encoder_helper_funcs *encoder_funcs; - - encoder_funcs = encoder->helper_private; - mutex_lock(&dev->mode_config.mutex); - encoder_funcs->dpms(encoder, dpms_mode); - mutex_unlock(&dev->mode_config.mutex); - } - } - if (dpms_mode == DRM_MODE_DPMS_OFF) { - mutex_lock(&dev->mode_config.mutex); - crtc_funcs->dpms(crtc, dpms_mode); - mutex_unlock(&dev->mode_config.mutex); + switch (depth) { + case 8: + var->red.offset = 0; + var->green.offset = 0; + var->blue.offset = 0; + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + var->transp.length = 0; + var->transp.offset = 0; + break; + case 24: + var->red.offset = 8; + var->green.offset = 16; + var->blue.offset = 24; + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + var->transp.length = 0; + var->transp.offset = 0; + break; + case 32: + var->red.offset = 8; + var->green.offset = 16; + var->blue.offset = 24; + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + var->transp.length = 8; + var->transp.offset = 0; + break; + default: + return -EINVAL; } } -} - -int radeonfb_blank(int blank, struct fb_info *info) -{ - switch (blank) { - case FB_BLANK_UNBLANK: - radeonfb_on(info); - break; - case FB_BLANK_NORMAL: - radeonfb_off(info, DRM_MODE_DPMS_STANDBY); - break; - case FB_BLANK_HSYNC_SUSPEND: - radeonfb_off(info, DRM_MODE_DPMS_STANDBY); - break; - case FB_BLANK_VSYNC_SUSPEND: - radeonfb_off(info, DRM_MODE_DPMS_SUSPEND); - break; - case FB_BLANK_POWERDOWN: - radeonfb_off(info, DRM_MODE_DPMS_OFF); - break; - } +#endif return 0; } static struct fb_ops radeonfb_ops = { .owner = THIS_MODULE, - .fb_check_var = radeonfb_check_var, - .fb_set_par = radeonfb_set_par, - .fb_setcolreg = radeonfb_setcolreg, + .fb_check_var = radeon_fb_check_var, + .fb_set_par = drm_fb_helper_set_par, + .fb_setcolreg = drm_fb_helper_setcolreg, .fb_fillrect = cfb_fillrect, .fb_copyarea = cfb_copyarea, .fb_imageblit = cfb_imageblit, - .fb_pan_display = radeonfb_pan_display, - .fb_blank = radeonfb_blank, + .fb_pan_display = drm_fb_helper_pan_display, + .fb_blank = drm_fb_helper_blank, }; /** @@ -456,21 +159,6 @@ int radeonfb_resize(struct drm_device *dev, struct drm_crtc *crtc) } EXPORT_SYMBOL(radeonfb_resize); -static struct drm_mode_set panic_mode; - -int radeonfb_panic(struct notifier_block *n, unsigned long ununsed, - void *panic_str) -{ - DRM_ERROR("panic occurred, switching back to text console\n"); - drm_crtc_helper_set_config(&panic_mode); - return 0; -} -EXPORT_SYMBOL(radeonfb_panic); - -static struct notifier_block paniced = { - .notifier_call = radeonfb_panic, -}; - static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp, bool tiled) { int aligned = width; @@ -495,11 +183,16 @@ static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp, bo return aligned; } -int radeonfb_create(struct radeon_device *rdev, +static struct drm_fb_helper_funcs radeon_fb_helper_funcs = { + .gamma_set = radeon_crtc_fb_gamma_set, +}; + +int radeonfb_create(struct drm_device *dev, uint32_t fb_width, uint32_t fb_height, uint32_t surface_width, uint32_t surface_height, - struct radeon_framebuffer **rfb_p) + struct drm_framebuffer **fb_p) { + struct radeon_device *rdev = dev->dev_private; struct fb_info *info; struct radeon_fb_device *rfbdev; struct drm_framebuffer *fb = NULL; @@ -554,8 +247,8 @@ int radeonfb_create(struct radeon_device *rdev, list_add(&fb->filp_head, &rdev->ddev->mode_config.fb_kernel_list); + *fb_p = fb; rfb = to_radeon_framebuffer(fb); - *rfb_p = rfb; rdev->fbdev_rfb = rfb; rdev->fbdev_robj = robj; @@ -564,7 +257,14 @@ int radeonfb_create(struct radeon_device *rdev, ret = -ENOMEM; goto out_unref; } + rfbdev = info->par; + rfbdev->helper.funcs = &radeon_fb_helper_funcs; + rfbdev->helper.dev = dev; + ret = drm_fb_helper_init_crtc_count(&rfbdev->helper, 2, + RADEONFB_CONN_LIMIT); + if (ret) + goto out_unref; if (fb_tiled) radeon_object_check_tiling(robj, 0, 0); @@ -577,33 +277,19 @@ int radeonfb_create(struct radeon_device *rdev, memset_io(fbptr, 0, aligned_size); strcpy(info->fix.id, "radeondrmfb"); - info->fix.type = FB_TYPE_PACKED_PIXELS; - info->fix.visual = FB_VISUAL_TRUECOLOR; - info->fix.type_aux = 0; - info->fix.xpanstep = 1; /* doing it in hw */ - info->fix.ypanstep = 1; /* doing it in hw */ - info->fix.ywrapstep = 0; - info->fix.accel = FB_ACCEL_NONE; - info->fix.type_aux = 0; + + drm_fb_helper_fill_fix(info, fb->pitch); + info->flags = FBINFO_DEFAULT; info->fbops = &radeonfb_ops; - info->fix.line_length = fb->pitch; + tmp = fb_gpuaddr - rdev->mc.vram_location; info->fix.smem_start = rdev->mc.aper_base + tmp; info->fix.smem_len = size; info->screen_base = fbptr; info->screen_size = size; - info->pseudo_palette = fb->pseudo_palette; - info->var.xres_virtual = fb->width; - info->var.yres_virtual = fb->height; - info->var.bits_per_pixel = fb->bits_per_pixel; - info->var.xoffset = 0; - info->var.yoffset = 0; - info->var.activate = FB_ACTIVATE_NOW; - info->var.height = -1; - info->var.width = -1; - info->var.xres = fb_width; - info->var.yres = fb_height; + + drm_fb_helper_fill_var(info, fb, fb_width, fb_height); /* setup aperture base/size for vesafb takeover */ info->aperture_base = rdev->ddev->mode_config.fb_base; @@ -626,6 +312,9 @@ int radeonfb_create(struct radeon_device *rdev, DRM_INFO("fb depth is %d\n", fb->depth); DRM_INFO(" pitch is %d\n", fb->pitch); +#ifdef __BIG_ENDIAN + /* fill var sets defaults for this stuff - override + on big endian */ switch (fb->depth) { case 8: info->var.red.offset = 0; @@ -637,47 +326,6 @@ int radeonfb_create(struct radeon_device *rdev, info->var.transp.offset = 0; info->var.transp.length = 0; break; -#ifdef __LITTLE_ENDIAN - case 15: - info->var.red.offset = 10; - info->var.green.offset = 5; - info->var.blue.offset = 0; - info->var.red.length = 5; - info->var.green.length = 5; - info->var.blue.length = 5; - info->var.transp.offset = 15; - info->var.transp.length = 1; - break; - case 16: - info->var.red.offset = 11; - info->var.green.offset = 5; - info->var.blue.offset = 0; - info->var.red.length = 5; - info->var.green.length = 6; - info->var.blue.length = 5; - info->var.transp.offset = 0; - break; - case 24: - info->var.red.offset = 16; - info->var.green.offset = 8; - info->var.blue.offset = 0; - info->var.red.length = 8; - info->var.green.length = 8; - info->var.blue.length = 8; - info->var.transp.offset = 0; - info->var.transp.length = 0; - break; - case 32: - info->var.red.offset = 16; - info->var.green.offset = 8; - info->var.blue.offset = 0; - info->var.red.length = 8; - info->var.green.length = 8; - info->var.blue.length = 8; - info->var.transp.offset = 24; - info->var.transp.length = 8; - break; -#else case 24: info->var.red.offset = 8; info->var.green.offset = 16; @@ -699,9 +347,9 @@ int radeonfb_create(struct radeon_device *rdev, info->var.transp.length = 8; break; default: -#endif break; } +#endif fb->fbdev = info; rfbdev->rfb = rfb; @@ -726,145 +374,10 @@ out: return ret; } -static int radeonfb_single_fb_probe(struct radeon_device *rdev) -{ - struct drm_crtc *crtc; - struct drm_connector *connector; - unsigned int fb_width = (unsigned)-1, fb_height = (unsigned)-1; - unsigned int surface_width = 0, surface_height = 0; - int new_fb = 0; - int crtc_count = 0; - int ret, i, conn_count = 0; - struct radeon_framebuffer *rfb; - struct fb_info *info; - struct radeon_fb_device *rfbdev; - struct drm_mode_set *modeset = NULL; - - /* first up get a count of crtcs now in use and new min/maxes width/heights */ - list_for_each_entry(crtc, &rdev->ddev->mode_config.crtc_list, head) { - if (drm_helper_crtc_in_use(crtc)) { - if (crtc->desired_mode) { - if (crtc->desired_mode->hdisplay < fb_width) - fb_width = crtc->desired_mode->hdisplay; - - if (crtc->desired_mode->vdisplay < fb_height) - fb_height = crtc->desired_mode->vdisplay; - - if (crtc->desired_mode->hdisplay > surface_width) - surface_width = crtc->desired_mode->hdisplay; - - if (crtc->desired_mode->vdisplay > surface_height) - surface_height = crtc->desired_mode->vdisplay; - } - crtc_count++; - } - } - - if (crtc_count == 0 || fb_width == -1 || fb_height == -1) { - /* hmm everyone went away - assume VGA cable just fell out - and will come back later. */ - return 0; - } - - /* do we have an fb already? */ - if (list_empty(&rdev->ddev->mode_config.fb_kernel_list)) { - /* create an fb if we don't have one */ - ret = radeonfb_create(rdev, fb_width, fb_height, surface_width, surface_height, &rfb); - if (ret) { - return -EINVAL; - } - new_fb = 1; - } else { - struct drm_framebuffer *fb; - fb = list_first_entry(&rdev->ddev->mode_config.fb_kernel_list, struct drm_framebuffer, filp_head); - rfb = to_radeon_framebuffer(fb); - - /* if someone hotplugs something bigger than we have already allocated, we are pwned. - As really we can't resize an fbdev that is in the wild currently due to fbdev - not really being designed for the lower layers moving stuff around under it. - - so in the grand style of things - punt. */ - if ((fb->width < surface_width) || (fb->height < surface_height)) { - DRM_ERROR("Framebuffer not large enough to scale console onto.\n"); - return -EINVAL; - } - } - - info = rfb->base.fbdev; - rdev->fbdev_info = info; - rfbdev = info->par; - - crtc_count = 0; - /* okay we need to setup new connector sets in the crtcs */ - list_for_each_entry(crtc, &rdev->ddev->mode_config.crtc_list, head) { - struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - modeset = &radeon_crtc->mode_set; - modeset->fb = &rfb->base; - conn_count = 0; - list_for_each_entry(connector, &rdev->ddev->mode_config.connector_list, head) { - if (connector->encoder) - if (connector->encoder->crtc == modeset->crtc) { - modeset->connectors[conn_count] = connector; - conn_count++; - if (conn_count > RADEONFB_CONN_LIMIT) - BUG(); - } - } - - for (i = conn_count; i < RADEONFB_CONN_LIMIT; i++) - modeset->connectors[i] = NULL; - - - rfbdev->crtc_ids[crtc_count++] = crtc->base.id; - - modeset->num_connectors = conn_count; - if (modeset->crtc->desired_mode) { - if (modeset->mode) { - drm_mode_destroy(rdev->ddev, modeset->mode); - } - modeset->mode = drm_mode_duplicate(rdev->ddev, - modeset->crtc->desired_mode); - } - } - rfbdev->crtc_count = crtc_count; - - if (new_fb) { - info->var.pixclock = -1; - if (register_framebuffer(info) < 0) - return -EINVAL; - } else { - radeonfb_set_par(info); - } - printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node, - info->fix.id); - - /* Switch back to kernel console on panic */ - panic_mode = *modeset; - atomic_notifier_chain_register(&panic_notifier_list, &paniced); - printk(KERN_INFO "registered panic notifier\n"); - - return 0; -} - int radeonfb_probe(struct drm_device *dev) { int ret; - - /* something has changed in the lower levels of hell - deal with it - here */ - - /* two modes : a) 1 fb to rule all crtcs. - b) one fb per crtc. - two actions 1) new connected device - 2) device removed. - case a/1 : if the fb surface isn't big enough - resize the surface fb. - if the fb size isn't big enough - resize fb into surface. - if everything big enough configure the new crtc/etc. - case a/2 : undo the configuration - possibly resize down the fb to fit the new configuration. - case b/1 : see if it is on a new crtc - setup a new fb and add it. - case b/2 : teardown the new fb. - */ - ret = radeonfb_single_fb_probe(dev->dev_private); + ret = drm_fb_helper_single_fb_probe(dev, &radeonfb_create); return ret; } EXPORT_SYMBOL(radeonfb_probe); @@ -880,16 +393,17 @@ int radeonfb_remove(struct drm_device *dev, struct drm_framebuffer *fb) } info = fb->fbdev; if (info) { + struct radeon_fb_device *rfbdev = info->par; robj = rfb->obj->driver_private; unregister_framebuffer(info); radeon_object_kunmap(robj); radeon_object_unpin(robj); + drm_fb_helper_free(&rfbdev->helper); framebuffer_release(info); } printk(KERN_INFO "unregistered panic notifier\n"); - atomic_notifier_chain_unregister(&panic_notifier_list, &paniced); - memset(&panic_mode, 0, sizeof(struct drm_mode_set)); + return 0; } EXPORT_SYMBOL(radeonfb_remove); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 3b09a1f2d8f9..20e9509a7130 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -195,8 +195,6 @@ struct radeon_crtc { bool enabled; bool can_tile; uint32_t crtc_offset; - struct radeon_framebuffer *fbdev_fb; - struct drm_mode_set mode_set; struct drm_gem_object *cursor_bo; uint64_t cursor_addr; int cursor_width; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index db92a83f8ca9..b0427a70fcbd 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -259,6 +259,8 @@ struct drm_framebuffer { void *fbdev; u32 pseudo_palette[17]; struct list_head filp_head; + /* if you are using the helper */ + void *helper_private; }; struct drm_property_blob { diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h new file mode 100644 index 000000000000..88fffbdfa26f --- /dev/null +++ b/include/drm/drm_fb_helper.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2006-2009 Red Hat Inc. + * Copyright (c) 2006-2008 Intel Corporation + * Copyright (c) 2007 Dave Airlie + * + * DRM framebuffer helper functions + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + * + * Authors: + * Dave Airlie + * Jesse Barnes + */ +#ifndef DRM_FB_HELPER_H +#define DRM_FB_HELPER_H + +struct drm_fb_helper_crtc { + uint32_t crtc_id; + struct drm_mode_set mode_set; +}; + +struct drm_fb_helper_funcs { + void (*gamma_set)(struct drm_crtc *crtc, u16 red, u16 green, + u16 blue, int regno); +}; + +struct drm_fb_helper { + struct drm_framebuffer *fb; + struct drm_device *dev; + struct drm_display_mode *mode; + int crtc_count; + struct drm_fb_helper_crtc *crtc_info; + struct drm_fb_helper_funcs *funcs; + int conn_limit; + struct list_head kernel_fb_list; +}; + +int drm_fb_helper_single_fb_probe(struct drm_device *dev, + int (*fb_create)(struct drm_device *dev, + uint32_t fb_width, + uint32_t fb_height, + uint32_t surface_width, + uint32_t surface_height, + struct drm_framebuffer **fb_ptr)); +int drm_fb_helper_init_crtc_count(struct drm_fb_helper *helper, int crtc_count, + int max_conn); +void drm_fb_helper_free(struct drm_fb_helper *helper); +int drm_fb_helper_blank(int blank, struct fb_info *info); +int drm_fb_helper_pan_display(struct fb_var_screeninfo *var, + struct fb_info *info); +int drm_fb_helper_set_par(struct fb_info *info); +int drm_fb_helper_check_var(struct fb_var_screeninfo *var, + struct fb_info *info); +int drm_fb_helper_setcolreg(unsigned regno, + unsigned red, + unsigned green, + unsigned blue, + unsigned transp, + struct fb_info *info); + +void drm_fb_helper_restore(void); +void drm_fb_helper_fill_var(struct fb_info *info, struct drm_framebuffer *fb, + uint32_t fb_width, uint32_t fb_height); +void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch); + +#endif -- cgit v1.2.3 From 98a56ab382079f777e261e14512cbd4fb2107af4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 17 Sep 2009 08:48:28 -0400 Subject: ext4: Fix spelling typo in the trace format for trace_ext4_da_writepages() Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 8d433c4e3709..15051d2d1219 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -243,7 +243,7 @@ TRACE_EVENT(ext4_da_writepages, __entry->range_cyclic = wbc->range_cyclic; ), - TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", + TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, __entry->nonblocking, -- cgit v1.2.3 From b3a3ca8ca0c3c29abc5b2bfe94bb14f3f4590df9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 31 Aug 2009 23:13:11 -0400 Subject: ext4: Add new tracepoint: trace_ext4_da_write_pages() Add a new tracepoint which shows the pages that will be written using write_cache_pages() by ext4_da_writepages(). Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 15 +++++++++++++++ fs/ext4/inode.c | 13 +------------ include/trace/events/ext4.h | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 41a76e163b99..81014f4ed22d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -113,6 +113,21 @@ struct ext4_allocation_request { unsigned int flags; }; +/* + * For delayed allocation tracking + */ +struct mpage_da_data { + struct inode *inode; + sector_t b_blocknr; /* start block number of extent */ + size_t b_size; /* size of extent */ + unsigned long b_state; /* state of the extent */ + unsigned long first_page, next_page; /* extent of pages */ + struct writeback_control *wbc; + int io_done; + int pages_written; + int retval; +}; + /* * Special inodes numbers */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ff659e757578..17802a96af9f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1875,18 +1875,6 @@ static void ext4_da_page_release_reservation(struct page *page, * Delayed allocation stuff */ -struct mpage_da_data { - struct inode *inode; - sector_t b_blocknr; /* start block number of extent */ - size_t b_size; /* size of extent */ - unsigned long b_state; /* state of the extent */ - unsigned long first_page, next_page; /* extent of pages */ - struct writeback_control *wbc; - int io_done; - int pages_written; - int retval; -}; - /* * mpage_da_submit_io - walks through extent of pages and try to write * them with writepage() call back @@ -2863,6 +2851,7 @@ retry: mpd.io_done = 1; ret = MPAGE_DA_EXTENT_TAIL; } + trace_ext4_da_write_pages(inode, &mpd); wbc->nr_to_write -= mpd.pages_written; ext4_journal_stop(handle); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 15051d2d1219..dd43399288ea 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -251,6 +251,40 @@ TRACE_EVENT(ext4_da_writepages, __entry->range_cyclic) ); +TRACE_EVENT(ext4_da_write_pages, + TP_PROTO(struct inode *inode, struct mpage_da_data *mpd), + + TP_ARGS(inode, mpd), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u64, b_blocknr ) + __field( __u32, b_size ) + __field( __u32, b_state ) + __field( unsigned long, first_page ) + __field( int, io_done ) + __field( int, pages_written ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->b_blocknr = mpd->b_blocknr; + __entry->b_size = mpd->b_size; + __entry->b_state = mpd->b_state; + __entry->first_page = mpd->first_page; + __entry->io_done = mpd->io_done; + __entry->pages_written = mpd->pages_written; + ), + + TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d", + jbd2_dev_to_name(__entry->dev), __entry->ino, + __entry->b_blocknr, __entry->b_size, + __entry->b_state, __entry->first_page, + __entry->io_done, __entry->pages_written) +); + TRACE_EVENT(ext4_da_writepages_result, TP_PROTO(struct inode *inode, struct writeback_control *wbc, int ret, int pages_written), -- cgit v1.2.3 From 8aa84ad8d6c740a04386f599694609ee4998e82e Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 24 Jul 2009 15:25:05 +0200 Subject: [CPUFREQ] Introduce global, not per core: /sys/devices/system/cpu/cpufreq Currently everything in the cpufreq layer is per core based. This does not reflect reality, for example ondemand on conservative governors have global sysfs variables. Introduce a global cpufreq directory and add the kobject to the governor struct, so that governors can easily access it. The directory is initialized in the cpufreq_core_init initcall and thus will always be created if cpufreq is compiled in, even if no cpufreq driver is active later. Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 9 ++++++++- include/linux/cpufreq.h | 10 ++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bbd5c2164ab6..4da28444b235 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -686,6 +686,9 @@ static struct attribute *default_attrs[] = { NULL }; +struct kobject *cpufreq_global_kobject; +EXPORT_SYMBOL(cpufreq_global_kobject); + #define to_policy(k) container_of(k, struct cpufreq_policy, kobj) #define to_attr(a) container_of(a, struct freq_attr, attr) @@ -1935,7 +1938,11 @@ static int __init cpufreq_core_init(void) per_cpu(policy_cpu, cpu) = -1; init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); } + + cpufreq_global_kobject = kobject_create_and_add("cpufreq", + &cpu_sysdev_class.kset.kobj); + BUG_ON(!cpufreq_global_kobject); + return 0; } - core_initcall(cpufreq_core_init); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 161042746afc..44717eb47639 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -65,6 +65,9 @@ static inline int cpufreq_unregister_notifier(struct notifier_block *nb, struct cpufreq_governor; +/* /sys/devices/system/cpu/cpufreq: entry point for global variables */ +extern struct kobject *cpufreq_global_kobject; + #define CPUFREQ_ETERNAL (-1) struct cpufreq_cpuinfo { unsigned int max_freq; @@ -274,6 +277,13 @@ struct freq_attr { ssize_t (*store)(struct cpufreq_policy *, const char *, size_t count); }; +struct global_attr { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, + struct attribute *attr, char *buf); + ssize_t (*store)(struct kobject *a, struct attribute *b, + const char *c, size_t count); +}; /********************************************************************* * CPUFREQ 2.6. INTERFACE * -- cgit v1.2.3 From fa8a123855e20068204982596b8fafceb1a67f0b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 26 Aug 2009 13:13:37 +1000 Subject: drm/mm: add ability to dump mm lists via debugfs This adds code to the drm_mm to talk to debugfs, and adds support to radeon to add the VRAM and GTT mm lists to debugfs. I tested with spinlock debugging and it doesn't give out. Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_mm.c | 21 ++++++++++++++ drivers/gpu/drm/radeon/radeon_ttm.c | 56 +++++++++++++++++++++++++++++++++++++ include/drm/drm_mm.h | 4 +++ 3 files changed, 81 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 3e47869d6dae..c861d80fd779 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -44,6 +44,7 @@ #include "drmP.h" #include "drm_mm.h" #include +#include #define MM_UNUSED_TARGET 4 @@ -370,3 +371,23 @@ void drm_mm_takedown(struct drm_mm * mm) BUG_ON(mm->num_unused != 0); } EXPORT_SYMBOL(drm_mm_takedown); + +#if defined(CONFIG_DEBUG_FS) +int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm) +{ + struct drm_mm_node *entry; + int total_used = 0, total_free = 0, total = 0; + + list_for_each_entry(entry, &mm->ml_entry, ml_entry) { + seq_printf(m, "0x%08lx-0x%08lx: 0x%08lx: %s\n", entry->start, entry->start + entry->size, entry->size, entry->free ? "free" : "used"); + total += entry->size; + if (entry->free) + total_free += entry->size; + else + total_used += entry->size; + } + seq_printf(m, "total: %d, used %d free %d\n", total, total_free, total_used); + return 0; +} +EXPORT_SYMBOL(drm_mm_dump_table); +#endif diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 0a85e7b5d592..dc7a44274ea8 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -35,11 +35,14 @@ #include #include #include +#include #include "radeon_reg.h" #include "radeon.h" #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) +static int radeon_ttm_debugfs_init(struct radeon_device *rdev); + static struct radeon_device *radeon_get_rdev(struct ttm_bo_device *bdev) { struct radeon_mman *mman; @@ -504,6 +507,12 @@ int radeon_ttm_init(struct radeon_device *rdev) if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) { rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping; } + + r = radeon_ttm_debugfs_init(rdev); + if (r) { + DRM_ERROR("Failed to init debugfs\n"); + return r; + } return 0; } @@ -678,3 +687,50 @@ struct ttm_backend *radeon_ttm_backend_create(struct radeon_device *rdev) gtt->bound = false; return >t->backend; } + +#define RADEON_DEBUGFS_MEM_TYPES 2 + +static struct drm_info_list radeon_mem_types_list[RADEON_DEBUGFS_MEM_TYPES]; +static char radeon_mem_types_names[RADEON_DEBUGFS_MEM_TYPES][32]; + +#if defined(CONFIG_DEBUG_FS) +static int radeon_mm_dump_table(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_mm *mm = (struct drm_mm *)node->info_ent->data; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + int ret; + struct ttm_bo_global *glob = rdev->mman.bdev.glob; + + spin_lock(&glob->lru_lock); + ret = drm_mm_dump_table(m, mm); + spin_unlock(&glob->lru_lock); + return ret; +} +#endif + +static int radeon_ttm_debugfs_init(struct radeon_device *rdev) +{ + unsigned i; + +#if defined(CONFIG_DEBUG_FS) + for (i = 0; i < RADEON_DEBUGFS_MEM_TYPES; i++) { + if (i == 0) + sprintf(radeon_mem_types_names[i], "radeon_vram_mm"); + else + sprintf(radeon_mem_types_names[i], "radeon_gtt_mm"); + radeon_mem_types_list[i].name = radeon_mem_types_names[i]; + radeon_mem_types_list[i].show = &radeon_mm_dump_table; + radeon_mem_types_list[i].driver_features = 0; + if (i == 0) + radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_VRAM].manager; + else + radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_TT].manager; + + } + return radeon_debugfs_add_files(rdev, radeon_mem_types_list, RADEON_DEBUGFS_MEM_TYPES); + +#endif + return 0; +} diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index f8332073d277..bc5a87e8aeea 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -96,4 +96,8 @@ static inline struct drm_mm *drm_get_mm(struct drm_mm_node *block) return block->mm; } +#ifdef CONFIG_DEBUG_FS +int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm); +#endif + #endif -- cgit v1.2.3 From a3a0544b2c84e1d7a2022b558ecf66d8c6a8dd93 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 31 Aug 2009 15:16:30 +1000 Subject: drm/kms: add explicit encoder disable function and detach harder. For shared tv-out and VGA encoders, we really need to know if the encoder is just being switched off temporarily in blanking or if we are really disabling it hard. Also we need to try harder to disconnect encoders from unused connectors so we can share more efficently. (shared encoders stuff is coming in radeon tv-out support) Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 24 ++++++++++++++++++++---- include/drm/drm_crtc_helper.h | 2 ++ 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 205349ea1075..eea5e6c4099c 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -260,13 +260,27 @@ EXPORT_SYMBOL(drm_helper_crtc_in_use); void drm_helper_disable_unused_functions(struct drm_device *dev) { struct drm_encoder *encoder; + struct drm_connector *connector; struct drm_encoder_helper_funcs *encoder_funcs; struct drm_crtc *crtc; + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + if (!connector->encoder) + continue; + if (connector->status == connector_status_disconnected) + connector->encoder = NULL; + } + list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { encoder_funcs = encoder->helper_private; - if (!drm_helper_encoder_in_use(encoder)) - (*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF); + if (!drm_helper_encoder_in_use(encoder)) { + if (encoder_funcs->disable) + (*encoder_funcs->disable)(encoder); + else + (*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF); + } + /* disconnector encoder from any connector */ + encoder->crtc = NULL; } list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { @@ -411,7 +425,7 @@ static int drm_pick_crtcs(struct drm_device *dev, c = 0; list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - if ((connector->encoder->possible_crtcs & (1 << c)) == 0) { + if ((encoder->possible_crtcs & (1 << c)) == 0) { c++; continue; } @@ -496,8 +510,10 @@ static void drm_setup_crtcs(struct drm_device *dev) mode->name, crtc->base.id); crtc->desired_mode = mode; connector->encoder->crtc = crtc; - } else + } else { connector->encoder->crtc = NULL; + connector->encoder = NULL; + } i++; } diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index e44a4f87303c..4c8dacaf4f58 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -79,6 +79,8 @@ struct drm_encoder_helper_funcs { /* detect for DAC style encoders */ enum drm_connector_status (*detect)(struct drm_encoder *encoder, struct drm_connector *connector); + /* disable encoder when not in use - more explicit than dpms off */ + void (*disable)(struct drm_encoder *encoder); }; struct drm_connector_helper_funcs { -- cgit v1.2.3 From 6d703a81ad5fdd102334751ddacb053ecc6ff046 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 1 Sep 2009 17:52:57 -0700 Subject: ide: convert to ->proc_fops ->read_proc, ->write_proc are going away, ->proc_fops should be used instead. The only tricky place is IDENTIFY handling: if for some reason taskfile_lib_get_identify() fails, buffer _is_ changed and at least first byte is overwritten. Emulate old behaviour with returning that first byte to userspace and reporting length=1 despite overall -E. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- drivers/ide/ide-cd.c | 28 +++- drivers/ide/ide-disk_proc.c | 129 +++++++++++------ drivers/ide/ide-floppy_proc.c | 30 ++-- drivers/ide/ide-proc.c | 330 +++++++++++++++++++++++++++--------------- drivers/ide/ide-tape.c | 31 ++-- include/linux/ide.h | 24 +-- 6 files changed, 365 insertions(+), 207 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index ad0ab0c0a493..b79ca419d8d9 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -1389,19 +1390,30 @@ static sector_t ide_cdrom_capacity(ide_drive_t *drive) return capacity * sectors_per_frame; } -static int proc_idecd_read_capacity(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int idecd_capacity_proc_show(struct seq_file *m, void *v) { - ide_drive_t *drive = data; - int len; + ide_drive_t *drive = m->private; - len = sprintf(page, "%llu\n", (long long)ide_cdrom_capacity(drive)); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "%llu\n", (long long)ide_cdrom_capacity(drive)); + return 0; +} + +static int idecd_capacity_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, idecd_capacity_proc_show, PDE(inode)->data); } +static const struct file_operations idecd_capacity_proc_fops = { + .owner = THIS_MODULE, + .open = idecd_capacity_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static ide_proc_entry_t idecd_proc[] = { - { "capacity", S_IFREG|S_IRUGO, proc_idecd_read_capacity, NULL }, - { NULL, 0, NULL, NULL } + { "capacity", S_IFREG|S_IRUGO, &idecd_capacity_proc_fops }, + {} }; static ide_proc_entry_t *ide_cd_proc_entries(ide_drive_t *drive) diff --git a/drivers/ide/ide-disk_proc.c b/drivers/ide/ide-disk_proc.c index 19f263bf0a9e..60b0590ccc9c 100644 --- a/drivers/ide/ide-disk_proc.c +++ b/drivers/ide/ide-disk_proc.c @@ -1,5 +1,6 @@ #include #include +#include #include "ide-disk.h" @@ -37,77 +38,117 @@ static int get_smart_data(ide_drive_t *drive, u8 *buf, u8 sub_cmd) return ide_raw_taskfile(drive, &cmd, buf, 1); } -static int proc_idedisk_read_cache - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int idedisk_cache_proc_show(struct seq_file *m, void *v) { - ide_drive_t *drive = (ide_drive_t *) data; - char *out = page; - int len; + ide_drive_t *drive = (ide_drive_t *) m->private; if (drive->dev_flags & IDE_DFLAG_ID_READ) - len = sprintf(out, "%i\n", drive->id[ATA_ID_BUF_SIZE] / 2); + seq_printf(m, "%i\n", drive->id[ATA_ID_BUF_SIZE] / 2); else - len = sprintf(out, "(none)\n"); + seq_printf(m, "(none)\n"); + return 0; +} - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); +static int idedisk_cache_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, idedisk_cache_proc_show, PDE(inode)->data); } -static int proc_idedisk_read_capacity - (char *page, char **start, off_t off, int count, int *eof, void *data) +static const struct file_operations idedisk_cache_proc_fops = { + .owner = THIS_MODULE, + .open = idedisk_cache_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int idedisk_capacity_proc_show(struct seq_file *m, void *v) { - ide_drive_t*drive = (ide_drive_t *)data; - int len; + ide_drive_t*drive = (ide_drive_t *)m->private; - len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive)); + seq_printf(m, "%llu\n", (long long)ide_gd_capacity(drive)); + return 0; +} - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); +static int idedisk_capacity_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, idedisk_capacity_proc_show, PDE(inode)->data); } -static int proc_idedisk_read_smart(char *page, char **start, off_t off, - int count, int *eof, void *data, u8 sub_cmd) +static const struct file_operations idedisk_capacity_proc_fops = { + .owner = THIS_MODULE, + .open = idedisk_capacity_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __idedisk_proc_show(struct seq_file *m, ide_drive_t *drive, u8 sub_cmd) { - ide_drive_t *drive = (ide_drive_t *)data; - int len = 0, i = 0; + u8 *buf; + + buf = kmalloc(SECTOR_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; (void)smart_enable(drive); - if (get_smart_data(drive, page, sub_cmd) == 0) { - unsigned short *val = (unsigned short *) page; - char *out = (char *)val + SECTOR_SIZE; - - page = out; - do { - out += sprintf(out, "%04x%c", le16_to_cpu(*val), - (++i & 7) ? ' ' : '\n'); - val += 1; - } while (i < SECTOR_SIZE / 2); - len = out - page; + if (get_smart_data(drive, buf, sub_cmd) == 0) { + __le16 *val = (__le16 *)buf; + int i; + + for (i = 0; i < SECTOR_SIZE / 2; i++) { + seq_printf(m, "%04x%c", le16_to_cpu(val[i]), + (i % 8) == 7 ? '\n' : ' '); + } } + kfree(buf); + return 0; +} - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); +static int idedisk_sv_proc_show(struct seq_file *m, void *v) +{ + return __idedisk_proc_show(m, m->private, ATA_SMART_READ_VALUES); } -static int proc_idedisk_read_sv - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int idedisk_sv_proc_open(struct inode *inode, struct file *file) { - return proc_idedisk_read_smart(page, start, off, count, eof, data, - ATA_SMART_READ_VALUES); + return single_open(file, idedisk_sv_proc_show, PDE(inode)->data); } -static int proc_idedisk_read_st - (char *page, char **start, off_t off, int count, int *eof, void *data) +static const struct file_operations idedisk_sv_proc_fops = { + .owner = THIS_MODULE, + .open = idedisk_sv_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int idedisk_st_proc_show(struct seq_file *m, void *v) { - return proc_idedisk_read_smart(page, start, off, count, eof, data, - ATA_SMART_READ_THRESHOLDS); + return __idedisk_proc_show(m, m->private, ATA_SMART_READ_THRESHOLDS); } +static int idedisk_st_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, idedisk_st_proc_show, PDE(inode)->data); +} + +static const struct file_operations idedisk_st_proc_fops = { + .owner = THIS_MODULE, + .open = idedisk_st_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + ide_proc_entry_t ide_disk_proc[] = { - { "cache", S_IFREG|S_IRUGO, proc_idedisk_read_cache, NULL }, - { "capacity", S_IFREG|S_IRUGO, proc_idedisk_read_capacity, NULL }, - { "geometry", S_IFREG|S_IRUGO, proc_ide_read_geometry, NULL }, - { "smart_values", S_IFREG|S_IRUSR, proc_idedisk_read_sv, NULL }, - { "smart_thresholds", S_IFREG|S_IRUSR, proc_idedisk_read_st, NULL }, - { NULL, 0, NULL, NULL } + { "cache", S_IFREG|S_IRUGO, &idedisk_cache_proc_fops }, + { "capacity", S_IFREG|S_IRUGO, &idedisk_capacity_proc_fops }, + { "geometry", S_IFREG|S_IRUGO, &ide_geometry_proc_fops }, + { "smart_values", S_IFREG|S_IRUSR, &idedisk_sv_proc_fops }, + { "smart_thresholds", S_IFREG|S_IRUSR, &idedisk_st_proc_fops }, + {} }; ide_devset_rw_field(bios_cyl, bios_cyl); diff --git a/drivers/ide/ide-floppy_proc.c b/drivers/ide/ide-floppy_proc.c index fcd4d8153df5..d711d9b883de 100644 --- a/drivers/ide/ide-floppy_proc.c +++ b/drivers/ide/ide-floppy_proc.c @@ -1,22 +1,34 @@ #include #include +#include #include "ide-floppy.h" -static int proc_idefloppy_read_capacity(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int idefloppy_capacity_proc_show(struct seq_file *m, void *v) { - ide_drive_t*drive = (ide_drive_t *)data; - int len; + ide_drive_t*drive = (ide_drive_t *)m->private; - len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive)); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "%llu\n", (long long)ide_gd_capacity(drive)); + return 0; } +static int idefloppy_capacity_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, idefloppy_capacity_proc_show, PDE(inode)->data); +} + +static const struct file_operations idefloppy_capacity_proc_fops = { + .owner = THIS_MODULE, + .open = idefloppy_capacity_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + ide_proc_entry_t ide_floppy_proc[] = { - { "capacity", S_IFREG|S_IRUGO, proc_idefloppy_read_capacity, NULL }, - { "geometry", S_IFREG|S_IRUGO, proc_ide_read_geometry, NULL }, - { NULL, 0, NULL, NULL } + { "capacity", S_IFREG|S_IRUGO, &idefloppy_capacity_proc_fops }, + { "geometry", S_IFREG|S_IRUGO, &ide_geometry_proc_fops }, + {} }; ide_devset_rw_field(bios_cyl, bios_cyl); diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index 021de41655e6..28d09a5d8450 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c @@ -30,11 +30,9 @@ static struct proc_dir_entry *proc_ide_root; -static int proc_ide_read_imodel - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int ide_imodel_proc_show(struct seq_file *m, void *v) { - ide_hwif_t *hwif = (ide_hwif_t *) data; - int len; + ide_hwif_t *hwif = (ide_hwif_t *) m->private; const char *name; switch (hwif->chipset) { @@ -53,63 +51,108 @@ static int proc_ide_read_imodel case ide_acorn: name = "acorn"; break; default: name = "(unknown)"; break; } - len = sprintf(page, "%s\n", name); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "%s\n", name); + return 0; } -static int proc_ide_read_mate - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int ide_imodel_proc_open(struct inode *inode, struct file *file) { - ide_hwif_t *hwif = (ide_hwif_t *) data; - int len; + return single_open(file, ide_imodel_proc_show, PDE(inode)->data); +} + +static const struct file_operations ide_imodel_proc_fops = { + .owner = THIS_MODULE, + .open = ide_imodel_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ide_mate_proc_show(struct seq_file *m, void *v) +{ + ide_hwif_t *hwif = (ide_hwif_t *) m->private; if (hwif && hwif->mate) - len = sprintf(page, "%s\n", hwif->mate->name); + seq_printf(m, "%s\n", hwif->mate->name); else - len = sprintf(page, "(none)\n"); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "(none)\n"); + return 0; +} + +static int ide_mate_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_mate_proc_show, PDE(inode)->data); } -static int proc_ide_read_channel - (char *page, char **start, off_t off, int count, int *eof, void *data) +static const struct file_operations ide_mate_proc_fops = { + .owner = THIS_MODULE, + .open = ide_mate_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ide_channel_proc_show(struct seq_file *m, void *v) { - ide_hwif_t *hwif = (ide_hwif_t *) data; - int len; + ide_hwif_t *hwif = (ide_hwif_t *) m->private; - page[0] = hwif->channel ? '1' : '0'; - page[1] = '\n'; - len = 2; - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "%c\n", hwif->channel ? '1' : '0'); + return 0; } -static int proc_ide_read_identify - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int ide_channel_proc_open(struct inode *inode, struct file *file) { - ide_drive_t *drive = (ide_drive_t *)data; - int len = 0, i = 0; - int err = 0; + return single_open(file, ide_channel_proc_show, PDE(inode)->data); +} - len = sprintf(page, "\n"); +static const struct file_operations ide_channel_proc_fops = { + .owner = THIS_MODULE, + .open = ide_channel_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; - if (drive) { - __le16 *val = (__le16 *)page; +static int ide_identify_proc_show(struct seq_file *m, void *v) +{ + ide_drive_t *drive = (ide_drive_t *)m->private; + u8 *buf; - err = taskfile_lib_get_identify(drive, page); - if (!err) { - char *out = (char *)page + SECTOR_SIZE; + if (!drive) { + seq_putc(m, '\n'); + return 0; + } - page = out; - do { - out += sprintf(out, "%04x%c", - le16_to_cpup(val), (++i & 7) ? ' ' : '\n'); - val += 1; - } while (i < SECTOR_SIZE / 2); - len = out - page; + buf = kmalloc(SECTOR_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + if (taskfile_lib_get_identify(drive, buf) == 0) { + __le16 *val = (__le16 *)buf; + int i; + + for (i = 0; i < SECTOR_SIZE / 2; i++) { + seq_printf(m, "%04x%c", le16_to_cpu(val[i]), + (i % 8) == 7 ? '\n' : ' '); } - } - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + } else + seq_putc(m, buf[0]); + kfree(buf); + return 0; +} + +static int ide_identify_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_identify_proc_show, PDE(inode)->data); } +static const struct file_operations ide_identify_proc_fops = { + .owner = THIS_MODULE, + .open = ide_identify_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /** * ide_find_setting - find a specific setting * @st: setting table pointer @@ -240,22 +283,20 @@ static void proc_ide_settings_warn(void) warned = 1; } -static int proc_ide_read_settings - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int ide_settings_proc_show(struct seq_file *m, void *v) { const struct ide_proc_devset *setting, *g, *d; const struct ide_devset *ds; - ide_drive_t *drive = (ide_drive_t *) data; - char *out = page; - int len, rc, mul_factor, div_factor; + ide_drive_t *drive = (ide_drive_t *) m->private; + int rc, mul_factor, div_factor; proc_ide_settings_warn(); mutex_lock(&ide_setting_mtx); g = ide_generic_settings; d = drive->settings; - out += sprintf(out, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n"); - out += sprintf(out, "----\t\t\t-----\t\t---\t\t---\t\t----\n"); + seq_printf(m, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n"); + seq_printf(m, "----\t\t\t-----\t\t---\t\t---\t\t----\n"); while (g->name || (d && d->name)) { /* read settings in the alphabetical order */ if (g->name && d && d->name) { @@ -269,31 +310,35 @@ static int proc_ide_read_settings setting = g++; mul_factor = setting->mulf ? setting->mulf(drive) : 1; div_factor = setting->divf ? setting->divf(drive) : 1; - out += sprintf(out, "%-24s", setting->name); + seq_printf(m, "%-24s", setting->name); rc = ide_read_setting(drive, setting); if (rc >= 0) - out += sprintf(out, "%-16d", rc * mul_factor / div_factor); + seq_printf(m, "%-16d", rc * mul_factor / div_factor); else - out += sprintf(out, "%-16s", "write-only"); - out += sprintf(out, "%-16d%-16d", (setting->min * mul_factor + div_factor - 1) / div_factor, setting->max * mul_factor / div_factor); + seq_printf(m, "%-16s", "write-only"); + seq_printf(m, "%-16d%-16d", (setting->min * mul_factor + div_factor - 1) / div_factor, setting->max * mul_factor / div_factor); ds = setting->setting; if (ds->get) - out += sprintf(out, "r"); + seq_printf(m, "r"); if (ds->set) - out += sprintf(out, "w"); - out += sprintf(out, "\n"); + seq_printf(m, "w"); + seq_printf(m, "\n"); } - len = out - page; mutex_unlock(&ide_setting_mtx); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + return 0; +} + +static int ide_settings_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_settings_proc_show, PDE(inode)->data); } #define MAX_LEN 30 -static int proc_ide_write_settings(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static ssize_t ide_settings_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) { - ide_drive_t *drive = (ide_drive_t *) data; + ide_drive_t *drive = (ide_drive_t *) PDE(file->f_path.dentry->d_inode)->data; char name[MAX_LEN + 1]; int for_real = 0, mul_factor, div_factor; unsigned long n; @@ -388,63 +433,104 @@ static int proc_ide_write_settings(struct file *file, const char __user *buffer, return count; parse_error: free_page((unsigned long)buf); - printk("proc_ide_write_settings(): parse error\n"); + printk("%s(): parse error\n", __func__); return -EINVAL; } -int proc_ide_read_capacity - (char *page, char **start, off_t off, int count, int *eof, void *data) +static const struct file_operations ide_settings_proc_fops = { + .owner = THIS_MODULE, + .open = ide_settings_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = ide_settings_proc_write, +}; + +static int ide_capacity_proc_show(struct seq_file *m, void *v) { - int len = sprintf(page, "%llu\n", (long long)0x7fffffff); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "%llu\n", (long long)0x7fffffff); + return 0; } -EXPORT_SYMBOL_GPL(proc_ide_read_capacity); +static int ide_capacity_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_capacity_proc_show, NULL); +} -int proc_ide_read_geometry - (char *page, char **start, off_t off, int count, int *eof, void *data) +const struct file_operations ide_capacity_proc_fops = { + .owner = THIS_MODULE, + .open = ide_capacity_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +EXPORT_SYMBOL_GPL(ide_capacity_proc_fops); + +static int ide_geometry_proc_show(struct seq_file *m, void *v) { - ide_drive_t *drive = (ide_drive_t *) data; - char *out = page; - int len; + ide_drive_t *drive = (ide_drive_t *) m->private; - out += sprintf(out, "physical %d/%d/%d\n", + seq_printf(m, "physical %d/%d/%d\n", drive->cyl, drive->head, drive->sect); - out += sprintf(out, "logical %d/%d/%d\n", + seq_printf(m, "logical %d/%d/%d\n", drive->bios_cyl, drive->bios_head, drive->bios_sect); + return 0; +} - len = out - page; - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); +static int ide_geometry_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_geometry_proc_show, PDE(inode)->data); } -EXPORT_SYMBOL(proc_ide_read_geometry); +const struct file_operations ide_geometry_proc_fops = { + .owner = THIS_MODULE, + .open = ide_geometry_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +EXPORT_SYMBOL(ide_geometry_proc_fops); -static int proc_ide_read_dmodel - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int ide_dmodel_proc_show(struct seq_file *seq, void *v) { - ide_drive_t *drive = (ide_drive_t *) data; + ide_drive_t *drive = (ide_drive_t *) seq->private; char *m = (char *)&drive->id[ATA_ID_PROD]; - int len; - len = sprintf(page, "%.40s\n", m[0] ? m : "(none)"); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(seq, "%.40s\n", m[0] ? m : "(none)"); + return 0; +} + +static int ide_dmodel_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_dmodel_proc_show, PDE(inode)->data); } -static int proc_ide_read_driver - (char *page, char **start, off_t off, int count, int *eof, void *data) +static const struct file_operations ide_dmodel_proc_fops = { + .owner = THIS_MODULE, + .open = ide_dmodel_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ide_driver_proc_show(struct seq_file *m, void *v) { - ide_drive_t *drive = (ide_drive_t *)data; + ide_drive_t *drive = (ide_drive_t *)m->private; struct device *dev = &drive->gendev; struct ide_driver *ide_drv; - int len; if (dev->driver) { ide_drv = to_ide_driver(dev->driver); - len = sprintf(page, "%s version %s\n", + seq_printf(m, "%s version %s\n", dev->driver->name, ide_drv->version); } else - len = sprintf(page, "ide-default version 0.9.newide\n"); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "ide-default version 0.9.newide\n"); + return 0; +} + +static int ide_driver_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_driver_proc_show, PDE(inode)->data); } static int ide_replace_subdriver(ide_drive_t *drive, const char *driver) @@ -474,10 +560,10 @@ static int ide_replace_subdriver(ide_drive_t *drive, const char *driver) return ret; } -static int proc_ide_write_driver - (struct file *file, const char __user *buffer, unsigned long count, void *data) +static ssize_t ide_driver_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) { - ide_drive_t *drive = (ide_drive_t *) data; + ide_drive_t *drive = (ide_drive_t *) PDE(file->f_path.dentry->d_inode)->data; char name[32]; if (!capable(CAP_SYS_ADMIN)) @@ -492,12 +578,19 @@ static int proc_ide_write_driver return count; } -static int proc_ide_read_media - (char *page, char **start, off_t off, int count, int *eof, void *data) +static const struct file_operations ide_driver_proc_fops = { + .owner = THIS_MODULE, + .open = ide_driver_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = ide_driver_proc_write, +}; + +static int ide_media_proc_show(struct seq_file *m, void *v) { - ide_drive_t *drive = (ide_drive_t *) data; + ide_drive_t *drive = (ide_drive_t *) m->private; const char *media; - int len; switch (drive->media) { case ide_disk: media = "disk\n"; break; @@ -507,20 +600,30 @@ static int proc_ide_read_media case ide_optical: media = "optical\n"; break; default: media = "UNKNOWN\n"; break; } - strcpy(page, media); - len = strlen(media); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_puts(m, media); + return 0; +} + +static int ide_media_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_media_proc_show, PDE(inode)->data); } +static const struct file_operations ide_media_proc_fops = { + .owner = THIS_MODULE, + .open = ide_media_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static ide_proc_entry_t generic_drive_entries[] = { - { "driver", S_IFREG|S_IRUGO, proc_ide_read_driver, - proc_ide_write_driver }, - { "identify", S_IFREG|S_IRUSR, proc_ide_read_identify, NULL }, - { "media", S_IFREG|S_IRUGO, proc_ide_read_media, NULL }, - { "model", S_IFREG|S_IRUGO, proc_ide_read_dmodel, NULL }, - { "settings", S_IFREG|S_IRUSR|S_IWUSR, proc_ide_read_settings, - proc_ide_write_settings }, - { NULL, 0, NULL, NULL } + { "driver", S_IFREG|S_IRUGO, &ide_driver_proc_fops }, + { "identify", S_IFREG|S_IRUSR, &ide_identify_proc_fops}, + { "media", S_IFREG|S_IRUGO, &ide_media_proc_fops }, + { "model", S_IFREG|S_IRUGO, &ide_dmodel_proc_fops }, + { "settings", S_IFREG|S_IRUSR|S_IWUSR, &ide_settings_proc_fops}, + {} }; static void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p, void *data) @@ -530,11 +633,8 @@ static void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p if (!dir || !p) return; while (p->name != NULL) { - ent = create_proc_entry(p->name, p->mode, dir); + ent = proc_create_data(p->name, p->mode, dir, p->proc_fops, data); if (!ent) return; - ent->data = data; - ent->read_proc = p->read_proc; - ent->write_proc = p->write_proc; p++; } } @@ -617,10 +717,10 @@ void ide_proc_unregister_device(ide_drive_t *drive) } static ide_proc_entry_t hwif_entries[] = { - { "channel", S_IFREG|S_IRUGO, proc_ide_read_channel, NULL }, - { "mate", S_IFREG|S_IRUGO, proc_ide_read_mate, NULL }, - { "model", S_IFREG|S_IRUGO, proc_ide_read_imodel, NULL }, - { NULL, 0, NULL, NULL } + { "channel", S_IFREG|S_IRUGO, &ide_channel_proc_fops }, + { "mate", S_IFREG|S_IRUGO, &ide_mate_proc_fops }, + { "model", S_IFREG|S_IRUGO, &ide_imodel_proc_fops }, + {} }; void ide_proc_register_port(ide_hwif_t *hwif) diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 7b2032bc357b..9d6f62baac27 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -1816,22 +1817,32 @@ static void ide_tape_release(struct device *dev) } #ifdef CONFIG_IDE_PROC_FS -static int proc_idetape_read_name - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int idetape_name_proc_show(struct seq_file *m, void *v) { - ide_drive_t *drive = (ide_drive_t *) data; + ide_drive_t *drive = (ide_drive_t *) m->private; idetape_tape_t *tape = drive->driver_data; - char *out = page; - int len; - len = sprintf(out, "%s\n", tape->name); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "%s\n", tape->name); + return 0; +} + +static int idetape_name_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, idetape_name_proc_show, PDE(inode)->data); } +static const struct file_operations idetape_name_proc_fops = { + .owner = THIS_MODULE, + .open = idetape_name_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static ide_proc_entry_t idetape_proc[] = { - { "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL }, - { "name", S_IFREG|S_IRUGO, proc_idetape_read_name, NULL }, - { NULL, 0, NULL, NULL } + { "capacity", S_IFREG|S_IRUGO, &ide_capacity_proc_fops }, + { "name", S_IFREG|S_IRUGO, &idetape_name_proc_fops }, + {} }; static ide_proc_entry_t *ide_tape_proc_entries(ide_drive_t *drive) diff --git a/include/linux/ide.h b/include/linux/ide.h index 803c1ae31237..e4135d6e0556 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -919,8 +919,7 @@ __IDE_PROC_DEVSET(_name, _min, _max, NULL, NULL) typedef struct { const char *name; mode_t mode; - read_proc_t *read_proc; - write_proc_t *write_proc; + const struct file_operations *proc_fops; } ide_proc_entry_t; void proc_ide_create(void); @@ -932,24 +931,8 @@ void ide_proc_unregister_port(ide_hwif_t *); void ide_proc_register_driver(ide_drive_t *, struct ide_driver *); void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *); -read_proc_t proc_ide_read_capacity; -read_proc_t proc_ide_read_geometry; - -/* - * Standard exit stuff: - */ -#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) \ -{ \ - len -= off; \ - if (len < count) { \ - *eof = 1; \ - if (len <= 0) \ - return 0; \ - } else \ - len = count; \ - *start = page + off; \ - return len; \ -} +extern const struct file_operations ide_capacity_proc_fops; +extern const struct file_operations ide_geometry_proc_fops; #else static inline void proc_ide_create(void) { ; } static inline void proc_ide_destroy(void) { ; } @@ -961,7 +944,6 @@ static inline void ide_proc_register_driver(ide_drive_t *drive, struct ide_driver *driver) { ; } static inline void ide_proc_unregister_driver(ide_drive_t *drive, struct ide_driver *driver) { ; } -#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0; #endif enum { -- cgit v1.2.3 From 69575d388603365f2afbf4166df93152df59b165 Mon Sep 17 00:00:00 2001 From: Shane Wang Date: Tue, 1 Sep 2009 18:25:07 -0700 Subject: x86, intel_txt: clean up the impact on generic code, unbreak non-x86 Move tboot.h from asm to linux to fix the build errors of intel_txt patch on non-X86 platforms. Remove the tboot code from generic code init/main.c and kernel/cpu.c. Signed-off-by: Shane Wang Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 4 + arch/x86/include/asm/tboot.h | 197 ------------------------------------------ arch/x86/kernel/reboot.c | 3 +- arch/x86/kernel/setup.c | 3 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/tboot.c | 58 ++++++++++--- drivers/acpi/acpica/hwsleep.c | 2 +- drivers/pci/dmar.c | 2 +- drivers/pci/intel-iommu.c | 2 +- include/linux/tboot.h | 162 ++++++++++++++++++++++++++++++++++ init/main.c | 3 - kernel/cpu.c | 6 +- security/Kconfig | 2 +- 13 files changed, 221 insertions(+), 225 deletions(-) delete mode 100644 arch/x86/include/asm/tboot.h create mode 100644 include/linux/tboot.h (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 738bdc6b0f8b..b66f2102c35d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -178,6 +178,10 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y +config HAVE_INTEL_TXT + def_bool y + depends on EXPERIMENTAL && DMAR && ACPI + # Use the generic interrupt handling code in kernel/irq/: config GENERIC_HARDIRQS bool diff --git a/arch/x86/include/asm/tboot.h b/arch/x86/include/asm/tboot.h deleted file mode 100644 index b13929d4e5f4..000000000000 --- a/arch/x86/include/asm/tboot.h +++ /dev/null @@ -1,197 +0,0 @@ -/* - * tboot.h: shared data structure with tboot and kernel and functions - * used by kernel for runtime support of Intel(R) Trusted - * Execution Technology - * - * Copyright (c) 2006-2009, Intel Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - */ - -#ifndef _ASM_TBOOT_H -#define _ASM_TBOOT_H - -#include - -/* these must have the values from 0-5 in this order */ -enum { - TB_SHUTDOWN_REBOOT = 0, - TB_SHUTDOWN_S5, - TB_SHUTDOWN_S4, - TB_SHUTDOWN_S3, - TB_SHUTDOWN_HALT, - TB_SHUTDOWN_WFS -}; - -#ifdef CONFIG_INTEL_TXT - -/* used to communicate between tboot and the launched kernel */ - -#define TB_KEY_SIZE 64 /* 512 bits */ - -#define MAX_TB_MAC_REGIONS 32 - -struct tboot_mac_region { - u64 start; /* must be 64 byte -aligned */ - u32 size; /* must be 64 byte -granular */ -} __packed; - -/* GAS - Generic Address Structure (ACPI 2.0+) */ -struct tboot_acpi_generic_address { - u8 space_id; - u8 bit_width; - u8 bit_offset; - u8 access_width; - u64 address; -} __packed; - -/* - * combines Sx info from FADT and FACS tables per ACPI 2.0+ spec - * (http://www.acpi.info/) - */ -struct tboot_acpi_sleep_info { - struct tboot_acpi_generic_address pm1a_cnt_blk; - struct tboot_acpi_generic_address pm1b_cnt_blk; - struct tboot_acpi_generic_address pm1a_evt_blk; - struct tboot_acpi_generic_address pm1b_evt_blk; - u16 pm1a_cnt_val; - u16 pm1b_cnt_val; - u64 wakeup_vector; - u32 vector_width; - u64 kernel_s3_resume_vector; -} __packed; - -/* - * shared memory page used for communication between tboot and kernel - */ -struct tboot { - /* - * version 3+ fields: - */ - - /* TBOOT_UUID */ - u8 uuid[16]; - - /* version number: 5 is current */ - u32 version; - - /* physical addr of tb_log_t log */ - u32 log_addr; - - /* - * physical addr of entry point for tboot shutdown and - * type of shutdown (TB_SHUTDOWN_*) being requested - */ - u32 shutdown_entry; - u32 shutdown_type; - - /* kernel-specified ACPI info for Sx shutdown */ - struct tboot_acpi_sleep_info acpi_sinfo; - - /* tboot location in memory (physical) */ - u32 tboot_base; - u32 tboot_size; - - /* memory regions (phys addrs) for tboot to MAC on S3 */ - u8 num_mac_regions; - struct tboot_mac_region mac_regions[MAX_TB_MAC_REGIONS]; - - - /* - * version 4+ fields: - */ - - /* symmetric key for use by kernel; will be encrypted on S3 */ - u8 s3_key[TB_KEY_SIZE]; - - - /* - * version 5+ fields: - */ - - /* used to 4byte-align num_in_wfs */ - u8 reserved_align[3]; - - /* number of processors in wait-for-SIPI */ - u32 num_in_wfs; -} __packed; - -/* - * UUID for tboot data struct to facilitate matching - * defined as {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} by tboot, which is - * represented as {} in the char array used here - */ -#define TBOOT_UUID {0xff, 0x8d, 0x3c, 0x66, 0xb3, 0xe8, 0x82, 0x4b, 0xbf,\ - 0xaa, 0x19, 0xea, 0x4d, 0x5, 0x7a, 0x8} - -extern struct tboot *tboot; - -static inline int tboot_enabled(void) -{ - return tboot != NULL; -} - -extern void tboot_probe(void); -extern void tboot_create_trampoline(void); -extern void tboot_shutdown(u32 shutdown_type); -extern void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control); -extern int tboot_wait_for_aps(int num_aps); -extern struct acpi_table_header *tboot_get_dmar_table( - struct acpi_table_header *dmar_tbl); -extern int tboot_force_iommu(void); - -#else /* CONFIG_INTEL_TXT */ - -static inline int tboot_enabled(void) -{ - return 0; -} - -static inline void tboot_probe(void) -{ -} - -static inline void tboot_create_trampoline(void) -{ -} - -static inline void tboot_shutdown(u32 shutdown_type) -{ -} - -static inline void tboot_sleep(u8 sleep_state, u32 pm1a_control, - u32 pm1b_control) -{ -} - -static inline int tboot_wait_for_aps(int num_aps) -{ - return 0; -} - -static inline struct acpi_table_header *tboot_get_dmar_table( - struct acpi_table_header *dmar_tbl) -{ - return dmar_tbl; -} - -static inline int tboot_force_iommu(void) -{ - return 0; -} - -#endif /* !CONFIG_INTEL_TXT */ - -#endif /* _ASM_TBOOT_H */ diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9de01c5d9794..18ce5c04242a 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -24,8 +25,6 @@ # include #endif -#include - /* * Power off function, if any */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 80d6e9e32483..6ce0d6f38f7f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -66,6 +66,7 @@ #include #include +#include #include