From 0c6a89ba640d28e1dcd7fd1a217d2cfb92ae4953 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 29 Jul 2007 23:00:46 +0900 Subject: [SCSI] bsg: update sg_io_v4 structure This updates sg_io_v4 structure (based on Doug's RFC, release 1.3). The major changes are: - add dout_resid field - increase tag size to 64 bits to comply with SAM-4 and SRP - add dout_iovec_count and din_iovec_count dout_iovec_count and din_iovec_count aren't supported now. I'm not sure whether they will be supported or not but they were added for the possible future changes. Signed-off-by: FUJITA Tomonori Signed-off-by: James Bottomley --- include/linux/bsg.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 102dc096e1cb..60e377b520f8 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -15,14 +15,18 @@ struct sg_io_v4 { __u32 request_len; /* [i] in bytes */ __u64 request; /* [i], [*i] {SCSI: cdb} */ + __u64 request_tag; /* [i] {SCSI: task tag (only if flagged)} */ __u32 request_attr; /* [i] {SCSI: task attribute} */ - __u32 request_tag; /* [i] {SCSI: task tag (only if flagged)} */ __u32 request_priority; /* [i] {SCSI: task priority} */ + __u32 request_extra; /* [i] {spare, for padding} */ __u32 max_response_len; /* [i] in bytes */ __u64 response; /* [i], [*o] {SCSI: (auto)sense data} */ - /* "din_" for data in (from device); "dout_" for data out (to device) */ + /* "dout_": data out (to device); "din_": data in (from device) */ + __u32 dout_iovec_count; /* [i] 0 -> "flat" dout transfer else + dout_xfer points to array of iovec */ __u32 dout_xfer_len; /* [i] bytes to be transferred to device */ + __u32 din_iovec_count; /* [i] 0 -> "flat" din transfer */ __u32 din_xfer_len; /* [i] bytes to be transferred from device */ __u64 dout_xferp; /* [i], [*i] */ __u64 din_xferp; /* [i], [*o] */ @@ -39,8 +43,9 @@ struct sg_io_v4 { __u32 info; /* [o] additional information */ __u32 duration; /* [o] time to complete, in milliseconds */ __u32 response_len; /* [o] bytes of response actually written */ - __s32 din_resid; /* [o] actual_din_xfer_len - din_xfer_len */ - __u32 generated_tag; /* [o] {SCSI: task tag that transport chose} */ + __s32 din_resid; /* [o] din_xfer_len - actual_din_xfer_len */ + __s32 dout_resid; /* [o] dout_xfer_len - actual_dout_xfer_len */ + __u64 generated_tag; /* [o] {SCSI: transport generated task tag} */ __u32 spare_out; /* [o] */ __u32 padding; -- cgit v1.2.3 From 0a5fcb9cf8e5c3fabaab1c20668f58fe85d7c70d Mon Sep 17 00:00:00 2001 From: "sebastian@breakpoint.cc" Date: Thu, 26 Jul 2007 23:21:32 +0200 Subject: sctp: move global declaration to header file. sctp_chunk_cachep & sctp_bucket_cachep is used module global, so move it to a header file. Signed-off-by: Sebastian Siewior Signed-off-by: Vlad Yasevich --- include/net/sctp/sctp.h | 10 ++++++++++ net/sctp/sm_make_chunk.c | 2 -- net/sctp/socket.c | 2 -- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 16baef4dab7e..d529045c1679 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -189,6 +189,16 @@ int sctp_assocs_proc_init(void); void sctp_assocs_proc_exit(void); +/* + * Module global variables + */ + + /* + * sctp/protocol.c + */ +extern struct kmem_cache *sctp_chunk_cachep __read_mostly; +extern struct kmem_cache *sctp_bucket_cachep __read_mostly; + /* * Section: Macros, externs, and inlines */ diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 8d18f570c2e6..ad02311dcd83 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -65,8 +65,6 @@ #include #include -extern struct kmem_cache *sctp_chunk_cachep; - SCTP_STATIC struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, __u8 type, __u8 flags, int paylen); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f8bacc898e12..f8de0eb235d9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -107,8 +107,6 @@ static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; -extern struct kmem_cache *sctp_bucket_cachep; - /* Get the sndbuf space available at the time on the association. */ static inline int sctp_wspace(struct sctp_association *asoc) { -- cgit v1.2.3 From 5ca95c48f1bd006d1aafe2f8bf1a859262d6d7b1 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 3 Aug 2007 10:16:29 +0900 Subject: sh: fix cf support on r2d boards This patch makes sure cf support is enabled on R2D-PLUS but disabled on R2D-1. Without this fix R2D-1 boards hang on bootup. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/boards/renesas/rts7751r2d/setup.c | 9 ++++++++- include/asm-sh/rts7751r2d.h | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/sh/boards/renesas/rts7751r2d/setup.c b/arch/sh/boards/renesas/rts7751r2d/setup.c index e165d85c03b5..6f7029d33241 100644 --- a/arch/sh/boards/renesas/rts7751r2d/setup.c +++ b/arch/sh/boards/renesas/rts7751r2d/setup.c @@ -140,12 +140,19 @@ static struct platform_device sm501_device = { static struct platform_device *rts7751r2d_devices[] __initdata = { &uart_device, &heartbeat_device, - &cf_ide_device, &sm501_device, }; static int __init rts7751r2d_devices_setup(void) { + int ret; + + if (ctrl_inw(PA_BVERREG) == 0x10) { /* only working on R2D-PLUS */ + ret = platform_device_register(&cf_ide_device); + if (ret) + return ret; + } + return platform_add_devices(rts7751r2d_devices, ARRAY_SIZE(rts7751r2d_devices)); } diff --git a/include/asm-sh/rts7751r2d.h b/include/asm-sh/rts7751r2d.h index 10565ac7966a..5d7800aa31b5 100644 --- a/include/asm-sh/rts7751r2d.h +++ b/include/asm-sh/rts7751r2d.h @@ -37,7 +37,7 @@ #define PA_VERREG 0xa4000032 /* FPGA Version Register */ #define PA_INPORT 0xa4000034 /* KEY Input Port control */ #define PA_OUTPORT 0xa4000036 /* LED control */ -#define PA_DMPORT 0xa4000038 /* DM270 Output Port control */ +#define PA_BVERREG 0xa4000038 /* Board Revision Register */ #define PA_AX88796L 0xaa000400 /* AX88796L Area */ #define PA_VOYAGER 0xab000000 /* VOYAGER GX Area */ -- cgit v1.2.3 From 3516ffb0fef710749daf288c0fe146503e0cf9d4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 2 Aug 2007 19:23:56 -0700 Subject: [TCP]: Invoke tcp_sendmsg() directly, do not use inet_sendmsg(). As discovered by Evegniy Polyakov, if we try to sendmsg after a connection reset, we can do incredibly stupid things. The core issue is that inet_sendmsg() tries to autobind the socket, but we should never do that for TCP. Instead we should just go straight into TCP's sendmsg() code which will do all of the necessary state and pending socket error checks. TCP's sendpage already directly vectors to tcp_sendpage(), so this merely brings sendmsg() in line with that. Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_ipv4.c | 1 - net/ipv6/af_inet6.c | 2 +- net/ipv6/tcp_ipv6.c | 1 - 6 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index c209361ab74a..185c7ecce4cc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -281,7 +281,7 @@ extern int tcp_v4_remember_stamp(struct sock *sk); extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); -extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, +extern int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size); extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 06c08e5740fb..e68103475cca 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -831,7 +831,7 @@ const struct proto_ops inet_stream_ops = { .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, - .sendmsg = inet_sendmsg, + .sendmsg = tcp_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, .sendpage = tcp_sendpage, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index da4c0b6ab79a..7e740112b238 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -658,9 +658,10 @@ static inline int select_size(struct sock *sk) return tmp; } -int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) { + struct sock *sk = sock->sk; struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3f5f7423b95c..9c94627c8c7e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2425,7 +2425,6 @@ struct proto tcp_prot = { .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, - .sendmsg = tcp_sendmsg, .recvmsg = tcp_recvmsg, .backlog_rcv = tcp_v4_do_rcv, .hash = tcp_v4_hash, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index eed09373a45d..b5f96372ad73 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -484,7 +484,7 @@ const struct proto_ops inet6_stream_ops = { .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ .getsockopt = sock_common_getsockopt, /* ok */ - .sendmsg = inet_sendmsg, /* ok */ + .sendmsg = tcp_sendmsg, /* ok */ .recvmsg = sock_common_recvmsg, /* ok */ .mmap = sock_no_mmap, .sendpage = tcp_sendpage, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f10f3689d671..cbdb78487915 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2115,7 +2115,6 @@ struct proto tcpv6_prot = { .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, - .sendmsg = tcp_sendmsg, .recvmsg = tcp_recvmsg, .backlog_rcv = tcp_v6_do_rcv, .hash = tcp_v6_hash, -- cgit v1.2.3 From c0f7c6cb5dbb6d90e0334e62376dbc6ac3d1d315 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 3 Aug 2007 14:08:24 +1000 Subject: [POWERPC] Expand RPN field to 34 bits when using 64k pages The real page number field in our PTEs when configured for 64kB pages is currently 32 bits, which turns out to be not quite enough for the resources that the eHCA driver wants to map. This expands the RPN field to include 2 adjacent, previously-unused bits. Signed-off-by: Paul Mackerras Acked-by: Benjamin Herrenschmidt --- include/asm-powerpc/pgtable-64k.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-powerpc/pgtable-64k.h b/include/asm-powerpc/pgtable-64k.h index 31cbd3d7fce8..33ae9018fe72 100644 --- a/include/asm-powerpc/pgtable-64k.h +++ b/include/asm-powerpc/pgtable-64k.h @@ -49,12 +49,10 @@ /* Shift to put page number into pte. * - * That gives us a max RPN of 32 bits, which means a max of 48 bits - * of addressable physical space. - * We could get 3 more bits here by setting PTE_RPN_SHIFT to 29 but - * 32 makes PTEs more readable for debugging for now :) + * That gives us a max RPN of 34 bits, which means a max of 50 bits + * of addressable physical space, or 46 bits for the special 4k PFNs. */ -#define PTE_RPN_SHIFT (32) +#define PTE_RPN_SHIFT (30) #define PTE_RPN_MAX (1UL << (64 - PTE_RPN_SHIFT)) #define PTE_RPN_MASK (~((1UL< Date: Fri, 3 Aug 2007 11:55:39 +1000 Subject: [POWERPC] Fixes for the SLB shadow buffer code On a machine with hardware 64kB pages and a kernel configured for a 64kB base page size, we need to change the vmalloc segment from 64kB pages to 4kB pages if some driver creates a non-cacheable mapping in the vmalloc area. However, we never updated with SLB shadow buffer. This fixes it. Thanks to paulus for finding this. Also added some write barriers to ensure the shadow buffer contents are always consistent. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/entry_64.S | 3 +++ arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/slb.c | 28 ++++++++++++++++++---------- include/asm-powerpc/mmu-hash64.h | 1 + 4 files changed, 23 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 9ef28da2c7fe..952eba6701f4 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -389,8 +389,11 @@ BEGIN_FTR_SECTION ld r9,PACA_SLBSHADOWPTR(r13) li r12,0 std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */ + eieio std r7,SLBSHADOW_STACKVSID(r9) /* Save VSID */ + eieio std r0,SLBSHADOW_STACKESID(r9) /* Save ESID */ + eieio slbie r6 slbie r6 /* Workaround POWER5 < DD2.1 issue */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index bc7b0cedae5e..f1789578747a 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -759,7 +759,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) mmu_psize_defs[mmu_vmalloc_psize].sllp) { get_paca()->vmalloc_sllp = mmu_psize_defs[mmu_vmalloc_psize].sllp; - slb_flush_and_rebolt(); + slb_vmalloc_update(); } #endif /* CONFIG_PPC_64K_PAGES */ diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 304375a73574..b0697017d0e8 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -53,7 +53,8 @@ static inline unsigned long mk_vsid_data(unsigned long ea, unsigned long flags) return (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | flags; } -static inline void slb_shadow_update(unsigned long esid, unsigned long vsid, +static inline void slb_shadow_update(unsigned long ea, + unsigned long flags, unsigned long entry) { /* @@ -61,11 +62,11 @@ static inline void slb_shadow_update(unsigned long esid, unsigned long vsid, * updating it. */ get_slb_shadow()->save_area[entry].esid = 0; - barrier(); - get_slb_shadow()->save_area[entry].vsid = vsid; - barrier(); - get_slb_shadow()->save_area[entry].esid = esid; - + smp_wmb(); + get_slb_shadow()->save_area[entry].vsid = mk_vsid_data(ea, flags); + smp_wmb(); + get_slb_shadow()->save_area[entry].esid = mk_esid_data(ea, entry); + smp_wmb(); } static inline void create_shadowed_slbe(unsigned long ea, unsigned long flags, @@ -76,8 +77,7 @@ static inline void create_shadowed_slbe(unsigned long ea, unsigned long flags, * we don't get a stale entry here if we get preempted by PHYP * between these two statements. */ - slb_shadow_update(mk_esid_data(ea, entry), mk_vsid_data(ea, flags), - entry); + slb_shadow_update(ea, flags, entry); asm volatile("slbmte %0,%1" : : "r" (mk_vsid_data(ea, flags)), @@ -104,8 +104,7 @@ void slb_flush_and_rebolt(void) ksp_esid_data &= ~SLB_ESID_V; /* Only third entry (stack) may change here so only resave that */ - slb_shadow_update(ksp_esid_data, - mk_vsid_data(ksp_esid_data, lflags), 2); + slb_shadow_update(get_paca()->kstack, lflags, 2); /* We need to do this all in asm, so we're sure we don't touch * the stack between the slbia and rebolting it. */ @@ -123,6 +122,15 @@ void slb_flush_and_rebolt(void) : "memory"); } +void slb_vmalloc_update(void) +{ + unsigned long vflags; + + vflags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp; + slb_shadow_update(VMALLOC_START, vflags, 1); + slb_flush_and_rebolt(); +} + /* Flush all user entries from the segment table of the current processor. */ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) { diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h index 695962f02059..3112ad14ad95 100644 --- a/include/asm-powerpc/mmu-hash64.h +++ b/include/asm-powerpc/mmu-hash64.h @@ -262,6 +262,7 @@ extern void slb_initialize(void); extern void slb_flush_and_rebolt(void); extern void stab_initialize(unsigned long stab); +extern void slb_vmalloc_update(void); #endif /* __ASSEMBLY__ */ /* -- cgit v1.2.3 From e8b398deb73b4a0c3d636f2146c9f35f8eac6d88 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Tue, 31 Jul 2007 16:37:11 +0300 Subject: IB: Include from ib_mad.h uses struct list_head, so while linux/list.h seems to be pulled in indirectly by one of the headers it includes, the right thing is to include linux/list.h directly. Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier --- include/rdma/ib_mad.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 30712ddd8a5e..8ec3799e42e1 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -39,6 +39,8 @@ #if !defined( IB_MAD_H ) #define IB_MAD_H +#include + #include /* Management base version */ -- cgit v1.2.3 From bfb3ea125174813cdf87b1120caf0c9bd580283e Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Tue, 31 Jul 2007 16:49:15 +0300 Subject: IB: Include and from ib_verbs.h uses struct list_head and rw_semaphore, so while the files and seem to be pulled in indirectly by the other header files it includes, the right thing is to include those files directly. Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0627a6aa282a..7a99f1125d24 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -46,6 +46,8 @@ #include #include #include +#include +#include #include #include -- cgit v1.2.3 From 92ddc447ce7382e36b72a240697c00bf4beb8d75 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Wed, 1 Aug 2007 13:33:56 +0300 Subject: IB: Move the macro IB_UMEM_MAX_PAGE_CHUNK() to umem.c After moving the definition of struct ib_umem_chunk from ib_verbs.h to ib_umem.h there isn't any reason for the macro IB_UMEM_MAX_PAGE_CHUNK to stay in ib_verbs.h. Move the macro to umem.c, the only place where it is used. Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier --- drivers/infiniband/core/umem.c | 5 +++++ include/rdma/ib_verbs.h | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 26d0470eef6e..664d2faa9e74 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -40,6 +40,11 @@ #include "uverbs.h" +#define IB_UMEM_MAX_PAGE_CHUNK \ + ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \ + ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \ + (void *) &((struct ib_umem_chunk *) 0)->page_list[0])) + static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { struct ib_umem_chunk *chunk, *tmp; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7a99f1125d24..4bea182d7116 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -733,11 +733,6 @@ struct ib_udata { size_t outlen; }; -#define IB_UMEM_MAX_PAGE_CHUNK \ - ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \ - ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \ - (void *) &((struct ib_umem_chunk *) 0)->page_list[0])) - struct ib_pd { struct ib_device *device; struct ib_uobject *uobject; -- cgit v1.2.3 From 63213196fd4b05b9c3539cbe34775c60f1f6fad0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 Jul 2007 20:43:17 -0700 Subject: [SPARC64]: Add missing dma_sync_single_range_for_*(). Reported by Andrew Morton. Signed-off-by: David S. Miller --- include/asm-sparc64/dma-mapping.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/asm-sparc64/dma-mapping.h b/include/asm-sparc64/dma-mapping.h index a72a5f271f31..1fc655452b81 100644 --- a/include/asm-sparc64/dma-mapping.h +++ b/include/asm-sparc64/dma-mapping.h @@ -108,6 +108,25 @@ static inline void dma_sync_single_for_device(struct device *dev, dma_ops->sync_single_for_device(dev, dma_handle, size, direction); } +static inline void dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, + enum dma_data_direction direction) +{ + dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction); +} + +static inline void dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, + enum dma_data_direction direction) +{ + dma_sync_single_for_device(dev, dma_handle+offset, size, direction); +} + + static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction) -- cgit v1.2.3 From 0a808a3131b2a6656475d82219f5e5d25edd7160 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 2 Aug 2007 00:19:14 -0700 Subject: [SPARC32]: Fix modular build of floppy driver. Signed-off-by: David S. Miller --- arch/sparc/kernel/entry.S | 7 ++-- arch/sparc/kernel/irq.c | 84 +++++++++++++++++++++++++++++------------ arch/sparc/kernel/sparc_ksyms.c | 1 - include/asm-sparc/floppy.h | 20 +++++----- include/asm-sparc/irq.h | 7 +--- 5 files changed, 75 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index eac38388f5fd..88d2cefd01be 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -1,7 +1,6 @@ -/* $Id: entry.S,v 1.170 2001/11/13 00:57:05 davem Exp $ - * arch/sparc/kernel/entry.S: Sparc trap low-level entry points. +/* arch/sparc/kernel/entry.S: Sparc trap low-level entry points. * - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net) * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx) * Copyright (C) 1996-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz) @@ -129,7 +128,7 @@ trap_low: RESTORE_ALL #endif -#ifdef CONFIG_BLK_DEV_FD +#if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE) .text .align 4 .globl floppy_hardint diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c index 75b2240ad0f9..b76dc03fc318 100644 --- a/arch/sparc/kernel/irq.c +++ b/arch/sparc/kernel/irq.c @@ -351,34 +351,14 @@ void handler_irq(int irq, struct pt_regs * regs) set_irq_regs(old_regs); } -#ifdef CONFIG_BLK_DEV_FD -extern void floppy_interrupt(int irq, void *dev_id); - -void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs) -{ - struct pt_regs *old_regs; - int cpu = smp_processor_id(); - - old_regs = set_irq_regs(regs); - disable_pil_irq(irq); - irq_enter(); - kstat_cpu(cpu).irqs[irq]++; - floppy_interrupt(irq, dev_id); - irq_exit(); - enable_pil_irq(irq); - set_irq_regs(old_regs); - // XXX Eek, it's totally changed with preempt_count() and such - // if (softirq_pending(cpu)) - // do_softirq(); -} -#endif +#if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE) /* Fast IRQs on the Sparc can only have one routine attached to them, * thus no sharing possible. */ -int request_fast_irq(unsigned int irq, - irq_handler_t handler, - unsigned long irqflags, const char *devname) +static int request_fast_irq(unsigned int irq, + void (*handler)(void), + unsigned long irqflags, const char *devname) { struct irqaction *action; unsigned long flags; @@ -457,7 +437,6 @@ int request_fast_irq(unsigned int irq, */ flush_cache_all(); - action->handler = handler; action->flags = irqflags; cpus_clear(action->mask); action->name = devname; @@ -475,6 +454,61 @@ out: return ret; } +/* These variables are used to access state from the assembler + * interrupt handler, floppy_hardint, so we cannot put these in + * the floppy driver image because that would not work in the + * modular case. + */ +volatile unsigned char *fdc_status; +EXPORT_SYMBOL(fdc_status); + +char *pdma_vaddr; +EXPORT_SYMBOL(pdma_vaddr); + +unsigned long pdma_size; +EXPORT_SYMBOL(pdma_size); + +volatile int doing_pdma; +EXPORT_SYMBOL(doing_pdma); + +char *pdma_base; +EXPORT_SYMBOL(pdma_base); + +unsigned long pdma_areasize; +EXPORT_SYMBOL(pdma_areasize); + +extern void floppy_hardint(void); + +static irqreturn_t (*floppy_irq_handler)(int irq, void *dev_id); + +void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs) +{ + struct pt_regs *old_regs; + int cpu = smp_processor_id(); + + old_regs = set_irq_regs(regs); + disable_pil_irq(irq); + irq_enter(); + kstat_cpu(cpu).irqs[irq]++; + floppy_irq_handler(irq, dev_id); + irq_exit(); + enable_pil_irq(irq); + set_irq_regs(old_regs); + // XXX Eek, it's totally changed with preempt_count() and such + // if (softirq_pending(cpu)) + // do_softirq(); +} + +int sparc_floppy_request_irq(int irq, unsigned long flags, + irqreturn_t (*irq_handler)(int irq, void *)) +{ + floppy_irq_handler = irq_handler; + return request_fast_irq(irq, floppy_hardint, flags, "floppy"); +} +EXPORT_SYMBOL(sparc_floppy_request_irq); + +#endif + int request_irq(unsigned int irq, irq_handler_t handler, unsigned long irqflags, const char * devname, void *dev_id) diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c index 7b4abde43028..ef647acc479e 100644 --- a/arch/sparc/kernel/sparc_ksyms.c +++ b/arch/sparc/kernel/sparc_ksyms.c @@ -143,7 +143,6 @@ EXPORT_SYMBOL(mstk48t02_regs); EXPORT_SYMBOL(set_auxio); EXPORT_SYMBOL(get_auxio); #endif -EXPORT_SYMBOL(request_fast_irq); EXPORT_SYMBOL(io_remap_pfn_range); /* P3: iounit_xxx may be needed, sun4d users */ /* EXPORT_SYMBOL(iounit_map_dma_init); */ diff --git a/include/asm-sparc/floppy.h b/include/asm-sparc/floppy.h index 28ce2b9c3da8..acd06d8ff70a 100644 --- a/include/asm-sparc/floppy.h +++ b/include/asm-sparc/floppy.h @@ -48,7 +48,7 @@ struct sun_flpy_controller { /* You'll only ever find one controller on a SparcStation anyways. */ static struct sun_flpy_controller *sun_fdc = NULL; -volatile unsigned char *fdc_status; +extern volatile unsigned char *fdc_status; struct sun_floppy_ops { unsigned char (*fd_inb)(int port); @@ -225,13 +225,13 @@ static void sun_82077_fd_outb(unsigned char value, int port) * underruns. If non-zero, doing_pdma encodes the direction of * the transfer for debugging. 1=read 2=write */ -char *pdma_vaddr; -unsigned long pdma_size; -volatile int doing_pdma = 0; +extern char *pdma_vaddr; +extern unsigned long pdma_size; +extern volatile int doing_pdma; /* This is software state */ -char *pdma_base = NULL; -unsigned long pdma_areasize; +extern char *pdma_base; +extern unsigned long pdma_areasize; /* Common routines to all controller types on the Sparc. */ static __inline__ void virtual_dma_init(void) @@ -281,7 +281,8 @@ static __inline__ void sun_fd_enable_dma(void) } /* Our low-level entry point in arch/sparc/kernel/entry.S */ -irqreturn_t floppy_hardint(int irq, void *unused); +extern int sparc_floppy_request_irq(int irq, unsigned long flags, + irqreturn_t (*irq_handler)(int irq, void *)); static int sun_fd_request_irq(void) { @@ -290,8 +291,9 @@ static int sun_fd_request_irq(void) if(!once) { once = 1; - error = request_fast_irq(FLOPPY_IRQ, floppy_hardint, - IRQF_DISABLED, "floppy"); + error = sparc_floppy_request_irq(FLOPPY_IRQ, + IRQF_DISABLED, + floppy_interrupt); return ((error == 0) ? 0 : -1); } else return 0; } diff --git a/include/asm-sparc/irq.h b/include/asm-sparc/irq.h index 61fb99643afd..fe205cc444b8 100644 --- a/include/asm-sparc/irq.h +++ b/include/asm-sparc/irq.h @@ -1,7 +1,6 @@ -/* $Id: irq.h,v 1.32 2000/08/26 02:42:28 anton Exp $ - * irq.h: IRQ registers on the Sparc. +/* irq.h: IRQ registers on the Sparc. * - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net) */ #ifndef _SPARC_IRQ_H @@ -13,6 +12,4 @@ #define irq_canonicalize(irq) (irq) -extern int request_fast_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, __const__ char *devname); - #endif -- cgit v1.2.3 From 6ba60d2195cd65d72eaf7ce3903a707c5bf20c7b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Aug 2007 14:24:17 -0700 Subject: [SPARC]: Fix O_CLOEXEC values. The one choosen by asm-generic/fcntl.h is not appropriate for this platform. Noticed by Ulrich Drepper. Signed-off-by: David S. Miller --- include/asm-sparc/fcntl.h | 2 +- include/asm-sparc64/fcntl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-sparc/fcntl.h b/include/asm-sparc/fcntl.h index 5db60b5ae7b0..7bbdfc77accd 100644 --- a/include/asm-sparc/fcntl.h +++ b/include/asm-sparc/fcntl.h @@ -16,6 +16,7 @@ #define O_LARGEFILE 0x40000 #define O_DIRECT 0x100000 /* direct disk access hint */ #define O_NOATIME 0x200000 +#define O_CLOEXEC 0x400000 #define F_GETOWN 5 /* for sockets. */ #define F_SETOWN 6 /* for sockets. */ @@ -31,6 +32,5 @@ #define __ARCH_FLOCK_PAD short __unused; #define __ARCH_FLOCK64_PAD short __unused; -#include #endif diff --git a/include/asm-sparc64/fcntl.h b/include/asm-sparc64/fcntl.h index b2aecf0054bd..111f6b3b8925 100644 --- a/include/asm-sparc64/fcntl.h +++ b/include/asm-sparc64/fcntl.h @@ -16,7 +16,7 @@ #define O_LARGEFILE 0x40000 #define O_DIRECT 0x100000 /* direct disk access hint */ #define O_NOATIME 0x200000 - +#define O_CLOEXEC 0x400000 #define F_GETOWN 5 /* for sockets. */ #define F_SETOWN 6 /* for sockets. */ -- cgit v1.2.3 From 03a5743a12b58e10eaa936a02498539db645ba4e Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 3 Aug 2007 16:41:11 -0500 Subject: [SCSI] sd: disentangle barriers in SCSI Our current implementation has a generic set of barrier functions that go through the SCSI driver model. Realistically, this is unnecessary, because the only device that can use barriers (sd) can set the flush functions up at probe or revalidate time. This patch pulls the barrier functions out of the mid layer and scsi driver model and relocates them directly in sd. Acked-by: Tejun Heo Signed-off-by: James Bottomley --- drivers/scsi/scsi_lib.c | 17 ----------------- drivers/scsi/sd.c | 14 +++++++++++--- include/scsi/scsi_driver.h | 2 -- include/scsi/sd.h | 2 -- 4 files changed, 11 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 21c075d44db1..a417a6ff9f97 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1038,22 +1038,6 @@ static int scsi_init_io(struct scsi_cmnd *cmd) return BLKPREP_KILL; } -static int scsi_issue_flush_fn(struct request_queue *q, struct gendisk *disk, - sector_t *error_sector) -{ - struct scsi_device *sdev = q->queuedata; - struct scsi_driver *drv; - - if (sdev->sdev_state != SDEV_RUNNING) - return -ENXIO; - - drv = *(struct scsi_driver **) disk->private_data; - if (drv->issue_flush) - return drv->issue_flush(&sdev->sdev_gendev, error_sector); - - return -EOPNOTSUPP; -} - static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev, struct request *req) { @@ -1596,7 +1580,6 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) return NULL; blk_queue_prep_rq(q, scsi_prep_fn); - blk_queue_issue_flush_fn(q, scsi_issue_flush_fn); blk_queue_softirq_done(q, scsi_softirq_done); return q; } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e21c7142a3ea..2c6116fd4578 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -241,7 +241,6 @@ static struct scsi_driver sd_template = { }, .rescan = sd_rescan, .init_command = sd_init_command, - .issue_flush = sd_issue_flush, }; /* @@ -800,10 +799,17 @@ static int sd_sync_cache(struct scsi_disk *sdkp) return 0; } -static int sd_issue_flush(struct device *dev, sector_t *error_sector) +static int sd_issue_flush(struct request_queue *q, struct gendisk *disk, + sector_t *error_sector) { int ret = 0; - struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); + struct scsi_device *sdp = q->queuedata; + struct scsi_disk *sdkp; + + if (sdp->sdev_state != SDEV_RUNNING) + return -ENXIO; + + sdkp = scsi_disk_get_from_dev(&sdp->sdev_gendev); if (!sdkp) return -ENODEV; @@ -1663,6 +1669,8 @@ static int sd_probe(struct device *dev) sd_revalidate_disk(gd); + blk_queue_issue_flush_fn(sdp->request_queue, sd_issue_flush); + gd->driverfs_dev = &sdp->sdev_gendev; gd->flags = GENHD_FL_DRIVERFS; if (sdp->removable) diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 02e26c1672bf..3465f31a21c4 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -13,8 +13,6 @@ struct scsi_driver { int (*init_command)(struct scsi_cmnd *); void (*rescan)(struct device *); - int (*issue_flush)(struct device *, sector_t *); - int (*prepare_flush)(struct request_queue *, struct request *); }; #define to_scsi_driver(drv) \ container_of((drv), struct scsi_driver, gendrv) diff --git a/include/scsi/sd.h b/include/scsi/sd.h index 78583fee0ab2..ce02ad1f5185 100644 --- a/include/scsi/sd.h +++ b/include/scsi/sd.h @@ -56,8 +56,6 @@ static int sd_suspend(struct device *dev, pm_message_t state); static int sd_resume(struct device *dev); static void sd_rescan(struct device *); static int sd_init_command(struct scsi_cmnd *); -static int sd_issue_flush(struct device *, sector_t *); -static void sd_prepare_flush(struct request_queue *, struct request *); static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer); static void scsi_disk_release(struct class_device *cdev); static void sd_print_sense_hdr(struct scsi_disk *, struct scsi_sense_hdr *); -- cgit v1.2.3 From 0a8626a475b2b6215a25e944fbd378bd5818fdd7 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 30 Jul 2007 02:07:39 +0100 Subject: [ARM] 4531/1: remove is_in_rom() protptype Remove unused is_in_rom() function prototype. Signed-off-by: Greg Ungerer Signed-off-by: Russell King --- include/asm-arm/pgtable-nommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-arm/pgtable-nommu.h b/include/asm-arm/pgtable-nommu.h index b186bc820e30..33c83dd87965 100644 --- a/include/asm-arm/pgtable-nommu.h +++ b/include/asm-arm/pgtable-nommu.h @@ -74,7 +74,6 @@ static inline int pte_file(pte_t pte) { return 0; } * These would be in other places but having them here reduces the diffs. */ extern unsigned int kobjsize(const void *objp); -extern int is_in_rom(unsigned long); /* * No page table caches to initialise. -- cgit v1.2.3 From c8154c8a9aaf548ad91e413da8b319858e37bc52 Mon Sep 17 00:00:00 2001 From: Mark Fortescue Date: Sat, 4 Aug 2007 21:17:18 -0700 Subject: [SPARC32]: Fix build. Correct incorrect removal of asm-generic/fcntl.h from asm-sparc/fcntl.h by commit 6ba60d2195cd65d72eaf7ce3903a707c5bf20c7b. Signed-off-by: Mark Fortescue Signed-off-by: David S. Miller --- include/asm-sparc/fcntl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-sparc/fcntl.h b/include/asm-sparc/fcntl.h index 7bbdfc77accd..5ec546349fc8 100644 --- a/include/asm-sparc/fcntl.h +++ b/include/asm-sparc/fcntl.h @@ -32,5 +32,6 @@ #define __ARCH_FLOCK_PAD short __unused; #define __ARCH_FLOCK64_PAD short __unused; +#include #endif -- cgit v1.2.3 From be1b685fe6c9928848b26b568eaa86ba8ce0046c Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Sat, 4 Aug 2007 21:18:16 -0700 Subject: [NETFILTER]: Add xt_statistic.h to the header list for usermode programs Add xt_statistic.h to the list of headers to install. Apparently needed to build newer versions of iptables. Signed-off-by: Chuck Ebbert Signed-off-by: David S. Miller --- include/linux/netfilter/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 43397a414cd6..ab57cb7d7c61 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -28,6 +28,7 @@ header-y += xt_policy.h header-y += xt_realm.h header-y += xt_sctp.h header-y += xt_state.h +header-y += xt_statistic.h header-y += xt_string.h header-y += xt_tcpmss.h header-y += xt_tcpudp.h -- cgit v1.2.3 From 5e11934d13c9a3bcb0cadad6c7a7de5c32660422 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Jul 2007 12:06:17 -0400 Subject: NFS: Fix put_nfs_open_context We need to grab the inode->i_lock atomically with the last reference put in order to remove the open context that is being freed from the nfsi->open_files list. Fix by converting the kref to a standard atomic counter and then using atomic_dec_and_lock()... Thanks to Arnd Bergmann for pointing out the problem. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 24 ++++++++---------------- include/linux/nfs_fs.h | 2 +- 2 files changed, 9 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bca6cdcb9f0d..71a49c3acabd 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -468,7 +468,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str ctx->lockowner = current->files; ctx->error = 0; ctx->dir_cookie = 0; - kref_init(&ctx->kref); + atomic_set(&ctx->count, 1); } return ctx; } @@ -476,21 +476,18 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { if (ctx != NULL) - kref_get(&ctx->kref); + atomic_inc(&ctx->count); return ctx; } -static void nfs_free_open_context(struct kref *kref) +void put_nfs_open_context(struct nfs_open_context *ctx) { - struct nfs_open_context *ctx = container_of(kref, - struct nfs_open_context, kref); + struct inode *inode = ctx->path.dentry->d_inode; - if (!list_empty(&ctx->list)) { - struct inode *inode = ctx->path.dentry->d_inode; - spin_lock(&inode->i_lock); - list_del(&ctx->list); - spin_unlock(&inode->i_lock); - } + if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) + return; + list_del(&ctx->list); + spin_unlock(&inode->i_lock); if (ctx->state != NULL) nfs4_close_state(&ctx->path, ctx->state, ctx->mode); if (ctx->cred != NULL) @@ -500,11 +497,6 @@ static void nfs_free_open_context(struct kref *kref) kfree(ctx); } -void put_nfs_open_context(struct nfs_open_context *ctx) -{ - kref_put(&ctx->kref, nfs_free_open_context); -} - /* * Ensure that mmap has a recent RPC credential for use when writing out * shared pages diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 9ba4aec37c50..157dcb055b5c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -71,7 +71,7 @@ struct nfs_access_entry { struct nfs4_state; struct nfs_open_context { - struct kref kref; + atomic_t count; struct path path; struct rpc_cred *cred; struct nfs4_state *state; -- cgit v1.2.3 From 501092929ccb8a1d2eb0ed700e38df4ae0de7108 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 7 Aug 2007 18:40:30 -0400 Subject: acpi-cpufreq: Fix some x86/x86-64 acpi-cpufreq driver issues This patch addresses some issues in x86/x86-64 acpi-cpufreq driver: 1. Current memory allocation for acpi_perf_data is actually open-coded alloc_percpu(). The patch defines and handles acpi_perf_data as percpu data. The code will be cleaner and easier to be maintained with this change. 2. Won't load driver in acpi_cpufreq_early_init() failure case. 3. Add __init for acpi_cpufreq_early_init(). Signed-off-by: Fenghua Yu Acked-by: Venkatesh Pallipadi Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c | 41 +++++++++++------------------ drivers/acpi/processor_perflib.c | 6 ++--- include/acpi/processor.h | 2 +- 3 files changed, 19 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 32d04b083e38..705e13a30781 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -68,7 +68,8 @@ struct acpi_cpufreq_data { }; static struct acpi_cpufreq_data *drv_data[NR_CPUS]; -static struct acpi_processor_performance *acpi_perf_data[NR_CPUS]; +/* acpi_perf_data is a pointer to percpu data. */ +static struct acpi_processor_performance *acpi_perf_data; static struct cpufreq_driver acpi_cpufreq_driver; @@ -508,24 +509,14 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) * do _PDC and _PSD and find out the processor dependency for the * actual init that will happen later... */ -static int acpi_cpufreq_early_init(void) +static int __init acpi_cpufreq_early_init(void) { - struct acpi_processor_performance *data; - unsigned int i, j; - dprintk("acpi_cpufreq_early_init\n"); - for_each_possible_cpu(i) { - data = kzalloc(sizeof(struct acpi_processor_performance), - GFP_KERNEL); - if (!data) { - for_each_possible_cpu(j) { - kfree(acpi_perf_data[j]); - acpi_perf_data[j] = NULL; - } - return -ENOMEM; - } - acpi_perf_data[i] = data; + acpi_perf_data = alloc_percpu(struct acpi_processor_performance); + if (!acpi_perf_data) { + dprintk("Memory allocation error for acpi_perf_data.\n"); + return -ENOMEM; } /* Do initialization in ACPI core */ @@ -574,14 +565,11 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) dprintk("acpi_cpufreq_cpu_init\n"); - if (!acpi_perf_data[cpu]) - return -ENODEV; - data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); if (!data) return -ENOMEM; - data->acpi_data = acpi_perf_data[cpu]; + data->acpi_data = percpu_ptr(acpi_perf_data, cpu); drv_data[cpu] = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) @@ -778,24 +766,25 @@ static struct cpufreq_driver acpi_cpufreq_driver = { static int __init acpi_cpufreq_init(void) { + int ret; + dprintk("acpi_cpufreq_init\n"); - acpi_cpufreq_early_init(); + ret = acpi_cpufreq_early_init(); + if (ret) + return ret; return cpufreq_register_driver(&acpi_cpufreq_driver); } static void __exit acpi_cpufreq_exit(void) { - unsigned int i; dprintk("acpi_cpufreq_exit\n"); cpufreq_unregister_driver(&acpi_cpufreq_driver); - for_each_possible_cpu(i) { - kfree(acpi_perf_data[i]); - acpi_perf_data[i] = NULL; - } + free_percpu(acpi_perf_data); + return; } diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index c4efc0c17f8f..463b0247cbc5 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -539,7 +539,7 @@ end: } int acpi_processor_preregister_performance( - struct acpi_processor_performance **performance) + struct acpi_processor_performance *performance) { int count, count_target; int retval = 0; @@ -567,12 +567,12 @@ int acpi_processor_preregister_performance( continue; } - if (!performance || !performance[i]) { + if (!performance || !percpu_ptr(performance, i)) { retval = -EINVAL; continue; } - pr->performance = performance[i]; + pr->performance = percpu_ptr(performance, i); cpu_set(i, pr->performance->shared_cpu_map); if (acpi_processor_get_psd(pr)) { retval = -EINVAL; diff --git a/include/acpi/processor.h b/include/acpi/processor.h index f9f987f8e661..ec3ffdadb4d2 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -232,7 +232,7 @@ struct acpi_processor_errata { extern int acpi_processor_preregister_performance(struct acpi_processor_performance - **performance); + *performance); extern int acpi_processor_register_performance(struct acpi_processor_performance *performance, unsigned int cpu); -- cgit v1.2.3 From 591e620693e71e24fb3450a4084217e44b7a60b6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 7 Aug 2007 18:12:01 -0700 Subject: [NETFILTER]: nf_nat: add symbolic dependency on IPv4 conntrack Loading nf_nat causes the conntrack core to be loaded, but we need IPv4 as well. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 2 ++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 ++++++ net/ipv4/netfilter/nf_nat_standalone.c | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 7a671603fca6..9bf059817aec 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -21,4 +21,6 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; extern int nf_conntrack_ipv4_compat_init(void); extern void nf_conntrack_ipv4_compat_fini(void); +extern void need_ipv4_conntrack(void); + #endif /*_NF_CONNTRACK_IPV4_H*/ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 64552afd01cb..d9b5177989c6 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -509,3 +509,9 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) module_init(nf_conntrack_l3proto_ipv4_init); module_exit(nf_conntrack_l3proto_ipv4_fini); + +void need_ipv4_conntrack(void) +{ + return; +} +EXPORT_SYMBOL_GPL(need_ipv4_conntrack); diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 332814dac503..46cc99def165 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -328,7 +328,7 @@ static int __init nf_nat_standalone_init(void) { int ret = 0; - need_conntrack(); + need_ipv4_conntrack(); #ifdef CONFIG_XFRM BUG_ON(ip_nat_decode_session != NULL); -- cgit v1.2.3 From 46bcea7751efc867ae4b073c99ad1d137a475bc0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 7 Aug 2007 18:46:36 -0700 Subject: [SPARC]: Centralize find_in_proplist() instead of duplicating N times. Signed-off-by: David S. Miller --- arch/sparc/kernel/prom.c | 15 +++++++++++++++ arch/sparc64/kernel/mdesc.c | 24 +++++------------------- arch/sparc64/kernel/prom.c | 15 +++++++++++++++ arch/sparc64/kernel/vio.c | 19 ++----------------- include/asm-sparc/prom.h | 1 + include/asm-sparc64/prom.h | 1 + 6 files changed, 39 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/prom.c b/arch/sparc/kernel/prom.c index 39fbd3c8ab0b..cd4fb79aa3a8 100644 --- a/arch/sparc/kernel/prom.c +++ b/arch/sparc/kernel/prom.c @@ -102,6 +102,21 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len } EXPORT_SYMBOL(of_set_property); +int of_find_in_proplist(const char *list, const char *match, int len) +{ + while (len > 0) { + int l; + + if (!strcmp(list, match)) + return 1; + l = strlen(list) + 1; + list += l; + len -= l; + } + return 0; +} +EXPORT_SYMBOL(of_find_in_proplist); + static unsigned int prom_early_allocated; static void * __init prom_early_alloc(unsigned long size) diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c index cce4d0ddf5d5..95059c2ec414 100644 --- a/arch/sparc64/kernel/mdesc.c +++ b/arch/sparc64/kernel/mdesc.c @@ -568,20 +568,6 @@ static void __init report_platform_properties(void) mdesc_release(hp); } -static int inline find_in_proplist(const char *list, const char *match, int len) -{ - while (len > 0) { - int l; - - if (!strcmp(list, match)) - return 1; - l = strlen(list) + 1; - list += l; - len -= l; - } - return 0; -} - static void __devinit fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_handle *hp, u64 mp) @@ -596,10 +582,10 @@ static void __devinit fill_in_one_cache(cpuinfo_sparc *c, switch (*level) { case 1: - if (find_in_proplist(type, "instn", type_len)) { + if (of_find_in_proplist(type, "instn", type_len)) { c->icache_size = *size; c->icache_line_size = *line_size; - } else if (find_in_proplist(type, "data", type_len)) { + } else if (of_find_in_proplist(type, "data", type_len)) { c->dcache_size = *size; c->dcache_line_size = *line_size; } @@ -677,7 +663,7 @@ static void __devinit set_core_ids(struct mdesc_handle *hp) continue; type = mdesc_get_property(hp, mp, "type", &len); - if (!find_in_proplist(type, "instn", len)) + if (!of_find_in_proplist(type, "instn", len)) continue; mark_core_ids(hp, mp, idx); @@ -718,8 +704,8 @@ static void __devinit __set_proc_ids(struct mdesc_handle *hp, int len; type = mdesc_get_property(hp, mp, "type", &len); - if (!find_in_proplist(type, "int", len) && - !find_in_proplist(type, "integer", len)) + if (!of_find_in_proplist(type, "int", len) && + !of_find_in_proplist(type, "integer", len)) continue; mark_proc_ids(hp, mp, idx); diff --git a/arch/sparc64/kernel/prom.c b/arch/sparc64/kernel/prom.c index f4e0a9ad9be3..d1a78c976cef 100644 --- a/arch/sparc64/kernel/prom.c +++ b/arch/sparc64/kernel/prom.c @@ -107,6 +107,21 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len } EXPORT_SYMBOL(of_set_property); +int of_find_in_proplist(const char *list, const char *match, int len) +{ + while (len > 0) { + int l; + + if (!strcmp(list, match)) + return 1; + l = strlen(list) + 1; + list += l; + len -= l; + } + return 0; +} +EXPORT_SYMBOL(of_find_in_proplist); + static unsigned int prom_early_allocated; static void * __init prom_early_alloc(unsigned long size) diff --git a/arch/sparc64/kernel/vio.c b/arch/sparc64/kernel/vio.c index 3685daf5157f..1550ac5673da 100644 --- a/arch/sparc64/kernel/vio.c +++ b/arch/sparc64/kernel/vio.c @@ -16,21 +16,6 @@ #include #include -static inline int find_in_proplist(const char *list, const char *match, - int len) -{ - while (len > 0) { - int l; - - if (!strcmp(list, match)) - return 1; - l = strlen(list) + 1; - list += l; - len -= l; - } - return 0; -} - static const struct vio_device_id *vio_match_device( const struct vio_device_id *matches, const struct vio_dev *dev) @@ -49,7 +34,7 @@ static const struct vio_device_id *vio_match_device( if (matches->compat[0]) { match &= len && - find_in_proplist(compat, matches->compat, len); + of_find_in_proplist(compat, matches->compat, len); } if (match) return matches; @@ -406,7 +391,7 @@ static int __init vio_init(void) "property\n"); goto out_release; } - if (!find_in_proplist(compat, channel_devices_compat, len)) { + if (!of_find_in_proplist(compat, channel_devices_compat, len)) { printk(KERN_ERR "VIO: Channel devices node lacks (%s) " "compat entry.\n", channel_devices_compat); goto out_release; diff --git a/include/asm-sparc/prom.h b/include/asm-sparc/prom.h index 350676c589f9..71f2a1998324 100644 --- a/include/asm-sparc/prom.h +++ b/include/asm-sparc/prom.h @@ -67,6 +67,7 @@ extern int of_set_property(struct device_node *node, const char *name, void *val extern int of_getintprop_default(struct device_node *np, const char *name, int def); +extern int of_find_in_proplist(const char *list, const char *match, int len); extern void prom_build_devicetree(void); diff --git a/include/asm-sparc64/prom.h b/include/asm-sparc64/prom.h index 31dcb92fbae0..07843f9f05df 100644 --- a/include/asm-sparc64/prom.h +++ b/include/asm-sparc64/prom.h @@ -76,6 +76,7 @@ extern int of_set_property(struct device_node *node, const char *name, void *val extern int of_getintprop_default(struct device_node *np, const char *name, int def); +extern int of_find_in_proplist(const char *list, const char *match, int len); extern void prom_build_devicetree(void); -- cgit v1.2.3 From 6c70b6fc7b6fc321636a014082d9e32333da1f80 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 8 Aug 2007 17:11:39 -0700 Subject: [SPARC64]: Do not assume sun4v chips have load-twin/store-init support. Check the cpu type in the OBP device tree before committing to using the optimized Niagara memcpy and memset implementation. If we don't recognize the cpu type, use a completely generic version. Signed-off-by: David S. Miller --- arch/sparc64/kernel/cpu.c | 42 ++++----- arch/sparc64/kernel/head.S | 111 ++++++++++++++++++++++-- arch/sparc64/kernel/sparc64_ksyms.c | 1 + arch/sparc64/lib/GENbzero.S | 160 +++++++++++++++++++++++++++++++++++ arch/sparc64/lib/GENcopy_from_user.S | 34 ++++++++ arch/sparc64/lib/GENcopy_to_user.S | 38 +++++++++ arch/sparc64/lib/GENmemcpy.S | 121 ++++++++++++++++++++++++++ arch/sparc64/lib/GENpage.S | 77 +++++++++++++++++ arch/sparc64/lib/GENpatch.S | 33 ++++++++ arch/sparc64/lib/Makefile | 4 +- include/asm-sparc64/oplib.h | 7 +- include/asm-sparc64/spitfire.h | 7 ++ include/asm-sparc64/xor.h | 6 +- 13 files changed, 604 insertions(+), 37 deletions(-) create mode 100644 arch/sparc64/lib/GENbzero.S create mode 100644 arch/sparc64/lib/GENcopy_from_user.S create mode 100644 arch/sparc64/lib/GENcopy_to_user.S create mode 100644 arch/sparc64/lib/GENmemcpy.S create mode 100644 arch/sparc64/lib/GENpage.S create mode 100644 arch/sparc64/lib/GENpatch.S (limited to 'include') diff --git a/arch/sparc64/kernel/cpu.c b/arch/sparc64/kernel/cpu.c index e4eff878123d..e43db73f2b91 100644 --- a/arch/sparc64/kernel/cpu.c +++ b/arch/sparc64/kernel/cpu.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 }; @@ -69,36 +69,24 @@ unsigned int fsr_storage; static void __init sun4v_cpu_probe(void) { - struct device_node *dp; - const char *compat; - int len; - - dp = of_find_node_by_name(NULL, "cpu"); - if (!dp) - goto no_compat; - - compat = of_get_property(dp, "compatible", &len); - if (!compat) - goto no_compat; - - if (of_find_in_proplist(compat, "SUNW,UltraSPARC-T1", len)) { + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: sparc_cpu_type = "UltraSparc T1 (Niagara)"; sparc_fpu_type = "UltraSparc T1 integrated FPU"; - } else if (of_find_in_proplist(compat, "SUNW,UltraSPARC-T2", len)) { + break; + + case SUN4V_CHIP_NIAGARA2: sparc_cpu_type = "UltraSparc T2 (Niagara2)"; sparc_fpu_type = "UltraSparc T2 integrated FPU"; - } else - goto unknown; - - return; - -no_compat: - compat = "no property"; - -unknown: - printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n", compat); - sparc_cpu_type = "Unknown SUN4V CPU"; - sparc_fpu_type = "Unknown SUN4V FPU"; + break; + + default: + printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n", + prom_cpu_compatible); + sparc_cpu_type = "Unknown SUN4V CPU"; + sparc_fpu_type = "Unknown SUN4V FPU"; + break; + } } void __init cpu_probe(void) diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index 9dbd833d79d6..ac18bd8e273f 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -97,7 +97,8 @@ sparc64_boot: .globl prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache .globl prom_boot_mapped_pc, prom_boot_mapping_mode .globl prom_boot_mapping_phys_high, prom_boot_mapping_phys_low - .globl is_sun4v + .globl prom_compatible_name, prom_cpu_path, prom_cpu_compatible + .globl is_sun4v, sun4v_chip_type prom_peer_name: .asciz "peer" prom_compatible_name: @@ -106,6 +107,8 @@ prom_finddev_name: .asciz "finddevice" prom_chosen_path: .asciz "/chosen" +prom_cpu_path: + .asciz "/cpu" prom_getprop_name: .asciz "getprop" prom_mmu_name: @@ -120,9 +123,13 @@ prom_unmap_name: .asciz "unmap" prom_sun4v_name: .asciz "sun4v" +prom_niagara_prefix: + .asciz "SUNW,UltraSPARC-T" .align 4 prom_root_compatible: .skip 64 +prom_cpu_compatible: + .skip 64 prom_root_node: .word 0 prom_mmu_ihandle_cache: @@ -138,6 +145,8 @@ prom_boot_mapping_phys_low: .xword 0 is_sun4v: .word 0 +sun4v_chip_type: + .word SUN4V_CHIP_INVALID 1: rd %pc, %l0 @@ -296,13 +305,13 @@ is_sun4v: sethi %hi(prom_sun4v_name), %g7 or %g7, %lo(prom_sun4v_name), %g7 mov 5, %g3 -1: ldub [%g7], %g2 +90: ldub [%g7], %g2 ldub [%g1], %g4 cmp %g2, %g4 - bne,pn %icc, 2f + bne,pn %icc, 80f add %g7, 1, %g7 subcc %g3, 1, %g3 - bne,pt %xcc, 1b + bne,pt %xcc, 90b add %g1, 1, %g1 sethi %hi(is_sun4v), %g1 @@ -310,7 +319,80 @@ is_sun4v: mov 1, %g7 stw %g7, [%g1] -2: + /* cpu_node = prom_finddevice("/cpu") */ + mov (1b - prom_finddev_name), %l1 + mov (1b - prom_cpu_path), %l2 + sub %l0, %l1, %l1 + sub %l0, %l2, %l2 + sub %sp, (192 + 128), %sp + + stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "finddevice" + mov 1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 + stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, "/cpu" + stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1 + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + + ldx [%sp + 2047 + 128 + 0x20], %l4 ! cpu device node + + mov (1b - prom_getprop_name), %l1 + mov (1b - prom_compatible_name), %l2 + mov (1b - prom_cpu_compatible), %l5 + sub %l0, %l1, %l1 + sub %l0, %l2, %l2 + sub %l0, %l5, %l5 + + /* prom_getproperty(cpu_node, "compatible", + * &prom_cpu_compatible, 64) + */ + stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop" + mov 4, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4 + mov 1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 + stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, cpu_node + stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "compatible" + stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_cpu_compatible + mov 64, %l3 + stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, size + stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1 + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + + add %sp, (192 + 128), %sp + + sethi %hi(prom_cpu_compatible), %g1 + or %g1, %lo(prom_cpu_compatible), %g1 + sethi %hi(prom_niagara_prefix), %g7 + or %g7, %lo(prom_niagara_prefix), %g7 + mov 17, %g3 +90: ldub [%g7], %g2 + ldub [%g1], %g4 + cmp %g2, %g4 + bne,pn %icc, 4f + add %g7, 1, %g7 + subcc %g3, 1, %g3 + bne,pt %xcc, 90b + add %g1, 1, %g1 + + sethi %hi(prom_cpu_compatible), %g1 + or %g1, %lo(prom_cpu_compatible), %g1 + ldub [%g1 + 17], %g2 + cmp %g2, '1' + be,pt %xcc, 5f + mov SUN4V_CHIP_NIAGARA1, %g4 + cmp %g2, '2' + be,pt %xcc, 5f + mov SUN4V_CHIP_NIAGARA2, %g4 +4: + mov SUN4V_CHIP_UNKNOWN, %g4 +5: sethi %hi(sun4v_chip_type), %g2 + or %g2, %lo(sun4v_chip_type), %g2 + stw %g4, [%g2] + +80: BRANCH_IF_SUN4V(g1, jump_to_sun4u_init) BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot) BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot) @@ -414,6 +496,24 @@ niagara_tlb_fixup: stw %g2, [%g1 + %lo(tlb_type)] /* Patch copy/clear ops. */ + sethi %hi(sun4v_chip_type), %g1 + lduw [%g1 + %lo(sun4v_chip_type)], %g1 + cmp %g1, SUN4V_CHIP_NIAGARA1 + be,pt %xcc, niagara_patch + cmp %g1, SUN4V_CHIP_NIAGARA2 + be,pt %xcc, niagara_patch + nop + + call generic_patch_copyops + nop + call generic_patch_bzero + nop + call generic_patch_pageops + nop + + ba,a,pt %xcc, 80f + +niagara_patch: call niagara_patch_copyops nop call niagara_patch_bzero @@ -421,6 +521,7 @@ niagara_tlb_fixup: call niagara_patch_pageops nop +80: /* Patch TLB/cache ops. */ call hypervisor_patch_cachetlbops nop diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index d270c2f0be0f..23fad7ebdd0d 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -168,6 +168,7 @@ EXPORT_SYMBOL(change_bit); EXPORT_SYMBOL(__flushw_user); EXPORT_SYMBOL(tlb_type); +EXPORT_SYMBOL(sun4v_chip_type); EXPORT_SYMBOL(get_fb_unmapped_area); EXPORT_SYMBOL(flush_icache_range); diff --git a/arch/sparc64/lib/GENbzero.S b/arch/sparc64/lib/GENbzero.S new file mode 100644 index 000000000000..f9c71d64eba1 --- /dev/null +++ b/arch/sparc64/lib/GENbzero.S @@ -0,0 +1,160 @@ +/* GENbzero.S: Generic sparc64 memset/clear_user. + * + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ +#include + +#define EX_ST(x,y) \ +98: x,y; \ + .section .fixup; \ + .align 4; \ +99: retl; \ + mov %o1, %o0; \ + .section __ex_table; \ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + + .align 32 + .text + + .globl GENmemset + .type GENmemset, #function +GENmemset: /* %o0=buf, %o1=pat, %o2=len */ + and %o1, 0xff, %o3 + mov %o2, %o1 + sllx %o3, 8, %g1 + or %g1, %o3, %o2 + sllx %o2, 16, %g1 + or %g1, %o2, %o2 + sllx %o2, 32, %g1 + ba,pt %xcc, 1f + or %g1, %o2, %o2 + + .globl GENbzero + .type GENbzero, #function +GENbzero: + clr %o2 +1: brz,pn %o1, GENbzero_return + mov %o0, %o3 + + /* %o5: saved %asi, restored at GENbzero_done + * %o4: store %asi to use + */ + rd %asi, %o5 + mov ASI_P, %o4 + wr %o4, 0x0, %asi + +GENbzero_from_clear_user: + cmp %o1, 15 + bl,pn %icc, GENbzero_tiny + andcc %o0, 0x7, %g1 + be,pt %xcc, 2f + mov 8, %g2 + sub %g2, %g1, %g1 + sub %o1, %g1, %o1 +1: EX_ST(stba %o2, [%o0 + 0x00] %asi) + subcc %g1, 1, %g1 + bne,pt %xcc, 1b + add %o0, 1, %o0 +2: cmp %o1, 128 + bl,pn %icc, GENbzero_medium + andcc %o0, (64 - 1), %g1 + be,pt %xcc, GENbzero_pre_loop + mov 64, %g2 + sub %g2, %g1, %g1 + sub %o1, %g1, %o1 +1: EX_ST(stxa %o2, [%o0 + 0x00] %asi) + subcc %g1, 8, %g1 + bne,pt %xcc, 1b + add %o0, 8, %o0 + +GENbzero_pre_loop: + andn %o1, (64 - 1), %g1 + sub %o1, %g1, %o1 +GENbzero_loop: + EX_ST(stxa %o2, [%o0 + 0x00] %asi) + EX_ST(stxa %o2, [%o0 + 0x08] %asi) + EX_ST(stxa %o2, [%o0 + 0x10] %asi) + EX_ST(stxa %o2, [%o0 + 0x18] %asi) + EX_ST(stxa %o2, [%o0 + 0x20] %asi) + EX_ST(stxa %o2, [%o0 + 0x28] %asi) + EX_ST(stxa %o2, [%o0 + 0x30] %asi) + EX_ST(stxa %o2, [%o0 + 0x38] %asi) + subcc %g1, 64, %g1 + bne,pt %xcc, GENbzero_loop + add %o0, 64, %o0 + + membar #Sync + wr %o4, 0x0, %asi + brz,pn %o1, GENbzero_done +GENbzero_medium: + andncc %o1, 0x7, %g1 + be,pn %xcc, 2f + sub %o1, %g1, %o1 +1: EX_ST(stxa %o2, [%o0 + 0x00] %asi) + subcc %g1, 8, %g1 + bne,pt %xcc, 1b + add %o0, 8, %o0 +2: brz,pt %o1, GENbzero_done + nop + +GENbzero_tiny: +1: EX_ST(stba %o2, [%o0 + 0x00] %asi) + subcc %o1, 1, %o1 + bne,pt %icc, 1b + add %o0, 1, %o0 + + /* fallthrough */ + +GENbzero_done: + wr %o5, 0x0, %asi + +GENbzero_return: + retl + mov %o3, %o0 + .size GENbzero, .-GENbzero + .size GENmemset, .-GENmemset + + .globl GENclear_user + .type GENclear_user, #function +GENclear_user: /* %o0=buf, %o1=len */ + rd %asi, %o5 + brz,pn %o1, GENbzero_done + clr %o3 + cmp %o5, ASI_AIUS + bne,pn %icc, GENbzero + clr %o2 + ba,pt %xcc, GENbzero_from_clear_user + mov ASI_AIUS, %o4 + .size GENclear_user, .-GENclear_user + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define GEN_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl generic_patch_bzero + .type generic_patch_bzero,#function +generic_patch_bzero: + GEN_DO_PATCH(memset, GENmemset) + GEN_DO_PATCH(__bzero, GENbzero) + GEN_DO_PATCH(__clear_user, GENclear_user) + retl + nop + .size generic_patch_bzero,.-generic_patch_bzero diff --git a/arch/sparc64/lib/GENcopy_from_user.S b/arch/sparc64/lib/GENcopy_from_user.S new file mode 100644 index 000000000000..2b9df99e87f9 --- /dev/null +++ b/arch/sparc64/lib/GENcopy_from_user.S @@ -0,0 +1,34 @@ +/* GENcopy_from_user.S: Generic sparc64 copy from userspace. + * + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ + +#define EX_LD(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: retl; \ + mov 1, %o0; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#define FUNC_NAME GENcopy_from_user +#define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, memcpy_user_stub; \ + nop +#endif + +#include "GENmemcpy.S" diff --git a/arch/sparc64/lib/GENcopy_to_user.S b/arch/sparc64/lib/GENcopy_to_user.S new file mode 100644 index 000000000000..bb3f7084daf9 --- /dev/null +++ b/arch/sparc64/lib/GENcopy_to_user.S @@ -0,0 +1,38 @@ +/* GENcopy_to_user.S: Generic sparc64 copy to userspace. + * + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ + +#define EX_ST(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: retl; \ + mov 1, %o0; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#define FUNC_NAME GENcopy_to_user +#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ + /* Writing to %asi is _expensive_ so we hardcode it. + * Reading %asi to check for KERNEL_DS is comparatively + * cheap. + */ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, memcpy_user_stub; \ + nop +#endif + +#include "GENmemcpy.S" diff --git a/arch/sparc64/lib/GENmemcpy.S b/arch/sparc64/lib/GENmemcpy.S new file mode 100644 index 000000000000..89358ee94851 --- /dev/null +++ b/arch/sparc64/lib/GENmemcpy.S @@ -0,0 +1,121 @@ +/* GENmemcpy.S: Generic sparc64 memcpy. + * + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ + +#ifdef __KERNEL__ +#define GLOBAL_SPARE %g7 +#else +#define GLOBAL_SPARE %g5 +#endif + +#ifndef EX_LD +#define EX_LD(x) x +#endif + +#ifndef EX_ST +#define EX_ST(x) x +#endif + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +#endif + +#ifndef LOAD +#define LOAD(type,addr,dest) type [addr], dest +#endif + +#ifndef STORE +#define STORE(type,src,addr) type src, [addr] +#endif + +#ifndef FUNC_NAME +#define FUNC_NAME GENmemcpy +#endif + +#ifndef PREAMBLE +#define PREAMBLE +#endif + +#ifndef XCC +#define XCC xcc +#endif + + .register %g2,#scratch + .register %g3,#scratch + + .text + .align 64 + + .globl FUNC_NAME + .type FUNC_NAME,#function +FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ + srlx %o2, 31, %g2 + cmp %g2, 0 + tne %XCC, 5 + PREAMBLE + mov %o0, GLOBAL_SPARE + + cmp %o2, 0 + be,pn %XCC, 85f + or %o0, %o1, %o3 + cmp %o2, 16 + blu,a,pn %XCC, 80f + or %o3, %o2, %o3 + + xor %o0, %o1, %o4 + andcc %o4, 0x7, %g0 + bne,a,pn %XCC, 90f + sub %o0, %o1, %o3 + + and %o0, 0x7, %o4 + sub %o4, 0x8, %o4 + sub %g0, %o4, %o4 + sub %o2, %o4, %o2 +1: subcc %o4, 1, %o4 + EX_LD(LOAD(ldub, %o1, %g1)) + EX_ST(STORE(stb, %g1, %o0)) + add %o1, 1, %o1 + bne,pt %XCC, 1b + add %o0, 1, %o0 + + andn %o2, 0x7, %g1 + sub %o2, %g1, %o2 +1: subcc %g1, 0x8, %g1 + EX_LD(LOAD(ldx, %o1, %g2)) + EX_ST(STORE(stx, %g2, %o0)) + add %o1, 0x8, %o1 + bne,pt %XCC, 1b + add %o0, 0x8, %o0 + + brz,pt %o2, 85f + sub %o0, %o1, %o3 + ba,a,pt %XCC, 90f + + .align 64 +80: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, 90f + sub %o0, %o1, %o3 + +1: + subcc %o2, 4, %o2 + EX_LD(LOAD(lduw, %o1, %g1)) + EX_ST(STORE(stw, %g1, %o1 + %o3)) + bgu,pt %XCC, 1b + add %o1, 4, %o1 + +85: retl + mov EX_RETVAL(GLOBAL_SPARE), %o0 + + .align 32 +90: + subcc %o2, 1, %o2 + EX_LD(LOAD(ldub, %o1, %g1)) + EX_ST(STORE(stb, %g1, %o1 + %o3)) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl + mov EX_RETVAL(GLOBAL_SPARE), %o0 + + .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc64/lib/GENpage.S b/arch/sparc64/lib/GENpage.S new file mode 100644 index 000000000000..2ef9d05f21bc --- /dev/null +++ b/arch/sparc64/lib/GENpage.S @@ -0,0 +1,77 @@ +/* GENpage.S: Generic clear and copy page. + * + * Copyright (C) 2007 (davem@davemloft.net) + */ +#include + + .text + .align 32 + +GENcopy_user_page: + set PAGE_SIZE, %g7 +1: ldx [%o1 + 0x00], %o2 + ldx [%o1 + 0x08], %o3 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + stx %o2, [%o0 + 0x00] + stx %o3, [%o0 + 0x08] + stx %o4, [%o0 + 0x10] + stx %o5, [%o0 + 0x18] + ldx [%o1 + 0x20], %o2 + ldx [%o1 + 0x28], %o3 + ldx [%o1 + 0x30], %o4 + ldx [%o1 + 0x38], %o5 + stx %o2, [%o0 + 0x20] + stx %o3, [%o0 + 0x28] + stx %o4, [%o0 + 0x30] + stx %o5, [%o0 + 0x38] + subcc %g7, 64, %g7 + add %o1, 64, %o1 + bne,pt %xcc, 1b + add %o0, 64, %o0 + retl + nop + +GENclear_page: +GENclear_user_page: + set PAGE_SIZE, %g7 +1: stx %g0, [%o0 + 0x00] + stx %g0, [%o0 + 0x08] + stx %g0, [%o0 + 0x10] + stx %g0, [%o0 + 0x18] + stx %g0, [%o0 + 0x20] + stx %g0, [%o0 + 0x28] + stx %g0, [%o0 + 0x30] + stx %g0, [%o0 + 0x38] + subcc %g7, 64, %g7 + bne,pt %xcc, 1b + add %o0, 64, %o0 + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define GEN_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl generic_patch_pageops + .type generic_patch_pageops,#function +generic_patch_pageops: + GEN_DO_PATCH(copy_user_page, GENcopy_user_page) + GEN_DO_PATCH(_clear_page, GENclear_page) + GEN_DO_PATCH(clear_user_page, GENclear_user_page) + retl + nop + .size generic_patch_pageops,.-generic_patch_pageops diff --git a/arch/sparc64/lib/GENpatch.S b/arch/sparc64/lib/GENpatch.S new file mode 100644 index 000000000000..fab9e89f16bd --- /dev/null +++ b/arch/sparc64/lib/GENpatch.S @@ -0,0 +1,33 @@ +/* GENpatch.S: Patch Ultra-I routines with generic variant. + * + * Copyright (C) 2007 David S. Miller + */ + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define GEN_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl generic_patch_copyops + .type generic_patch_copyops,#function +generic_patch_copyops: + GEN_DO_PATCH(memcpy, GENmemcpy) + GEN_DO_PATCH(___copy_from_user, GENcopy_from_user) + GEN_DO_PATCH(___copy_to_user, GENcopy_to_user) + retl + nop + .size generic_patch_copyops,.-generic_patch_copyops diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index c4a6d6e7d03c..f95fbfa3eeb8 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile @@ -1,4 +1,4 @@ -# $Id: Makefile,v 1.25 2000/12/14 22:57:25 davem Exp $ +# # Makefile for Sparc64 library files.. # @@ -13,6 +13,8 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ NGpage.o NGbzero.o \ + GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o GENpatch.o \ + GENpage.o GENbzero.o \ copy_in_user.o user_fixup.o memmove.o \ mcount.o ipcsum.o rwsem.o xor.o diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h index 3f23c5dc5f21..86dc5c018a19 100644 --- a/include/asm-sparc64/oplib.h +++ b/include/asm-sparc64/oplib.h @@ -1,8 +1,7 @@ -/* $Id: oplib.h,v 1.14 2001/12/19 00:29:51 davem Exp $ - * oplib.h: Describes the interface and available routines in the +/* oplib.h: Describes the interface and available routines in the * Linux Prom library. * - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net) * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ @@ -31,8 +30,10 @@ extern int prom_chosen_node; extern const char prom_peer_name[]; extern const char prom_compatible_name[]; extern const char prom_root_compatible[]; +extern const char prom_cpu_compatible[]; extern const char prom_finddev_name[]; extern const char prom_chosen_path[]; +extern const char prom_cpu_path[]; extern const char prom_getprop_name[]; extern const char prom_mmu_name[]; extern const char prom_callmethod_name[]; diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h index 23ad8a7987ad..cf7807813e85 100644 --- a/include/asm-sparc64/spitfire.h +++ b/include/asm-sparc64/spitfire.h @@ -38,6 +38,11 @@ #define L1DCACHE_SIZE 0x4000 +#define SUN4V_CHIP_INVALID 0x00 +#define SUN4V_CHIP_NIAGARA1 0x01 +#define SUN4V_CHIP_NIAGARA2 0x02 +#define SUN4V_CHIP_UNKNOWN 0xff + #ifndef __ASSEMBLY__ enum ultra_tlb_layout { @@ -49,6 +54,8 @@ enum ultra_tlb_layout { extern enum ultra_tlb_layout tlb_type; +extern int sun4v_chip_type; + extern int cheetah_pcache_forced_on; extern void cheetah_enable_pcache(void); diff --git a/include/asm-sparc64/xor.h b/include/asm-sparc64/xor.h index 8ce3f1813e28..a0233884fc94 100644 --- a/include/asm-sparc64/xor.h +++ b/include/asm-sparc64/xor.h @@ -63,4 +63,8 @@ static struct xor_block_template xor_block_niagara = { /* For VIS for everything except Niagara. */ #define XOR_SELECT_TEMPLATE(FASTEST) \ - (tlb_type == hypervisor ? &xor_block_niagara : &xor_block_VIS) + ((tlb_type == hypervisor && \ + (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \ + sun4v_chip_type == SUN4V_CHIP_NIAGARA2)) ? \ + &xor_block_niagara : \ + &xor_block_VIS) -- cgit v1.2.3 From 4301065920b0cbde3986519582347e883b166f3e Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Thu, 9 Aug 2007 11:16:46 +0200 Subject: sched: simplify move_tasks() The move_tasks() function is currently multiplexed with two distinct capabilities: 1. attempt to move a specified amount of weighted load from one run queue to another; and 2. attempt to move a specified number of tasks from one run queue to another. The first of these capabilities is used in two places, load_balance() and load_balance_idle(), and in both of these cases the return value of move_tasks() is used purely to decide if tasks/load were moved and no notice of the actual number of tasks moved is taken. The second capability is used in exactly one place, active_load_balance(), to attempt to move exactly one task and, as before, the return value is only used as an indicator of success or failure. This multiplexing of sched_task() was introduced, by me, as part of the smpnice patches and was motivated by the fact that the alternative, one function to move specified load and one to move a single task, would have led to two functions of roughly the same complexity as the old move_tasks() (or the new balance_tasks()). However, the new modular design of the new CFS scheduler allows a simpler solution to be adopted and this patch addresses that solution by: 1. adding a new function, move_one_task(), to be used by active_load_balance(); and 2. making move_tasks() a single purpose function that tries to move a specified weighted load and returns 1 for success and 0 for failure. One of the consequences of these changes is that neither move_one_task() or the new move_tasks() care how many tasks sched_class.load_balance() moves and this enables its interface to be simplified by returning the amount of load moved as its result and removing the load_moved pointer from the argument list. This helps simplify the new move_tasks() and slightly reduces the amount of work done in each of sched_class.load_balance()'s implementations. Further simplification, e.g. changes to balance_tasks(), are possible but (slightly) complicated by the special needs of load_balance_fair() so I've left them to a later patch (if this one gets accepted). NB Since move_tasks() gets called with two run queue locks held even small reductions in overhead are worthwhile. [ mingo@elte.hu ] this change also reduces code size nicely: text data bss dec hex filename 39216 3618 24 42858 a76a sched.o.before 39173 3618 24 42815 a73f sched.o.after Signed-off-by: Peter Williams Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 +-- kernel/sched.c | 82 +++++++++++++++++++++++++++---------------------- kernel/sched_fair.c | 8 ++--- kernel/sched_idletask.c | 4 +-- kernel/sched_rt.c | 9 +++--- 5 files changed, 58 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 17249fae5014..24bce423f10d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -866,11 +866,11 @@ struct sched_class { struct task_struct * (*pick_next_task) (struct rq *rq, u64 now); void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); - int (*load_balance) (struct rq *this_rq, int this_cpu, + unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *total_load_moved); + int *all_pinned); void (*set_curr_task) (struct rq *rq); void (*task_tick) (struct rq *rq, struct task_struct *p); diff --git a/kernel/sched.c b/kernel/sched.c index 4680f52974e3..42029634ef5a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2231,32 +2231,49 @@ out: } /* - * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted - * load from busiest to this_rq, as part of a balancing operation within - * "domain". Returns the number of tasks moved. + * move_tasks tries to move up to max_load_move weighted load from busiest to + * this_rq, as part of a balancing operation within domain "sd". + * Returns 1 if successful and 0 otherwise. * * Called with both runqueues locked. */ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, + unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned) { struct sched_class *class = sched_class_highest; - unsigned long load_moved, total_nr_moved = 0, nr_moved; - long rem_load_move = max_load_move; + unsigned long total_load_moved = 0; do { - nr_moved = class->load_balance(this_rq, this_cpu, busiest, - max_nr_move, (unsigned long)rem_load_move, - sd, idle, all_pinned, &load_moved); - total_nr_moved += nr_moved; - max_nr_move -= nr_moved; - rem_load_move -= load_moved; + total_load_moved += + class->load_balance(this_rq, this_cpu, busiest, + ULONG_MAX, max_load_move - total_load_moved, + sd, idle, all_pinned); class = class->next; - } while (class && max_nr_move && rem_load_move > 0); + } while (class && max_load_move > total_load_moved); - return total_nr_moved; + return total_load_moved > 0; +} + +/* + * move_one_task tries to move exactly one task from busiest to this_rq, as + * part of active balancing operations within "domain". + * Returns 1 if successful and 0 otherwise. + * + * Called with both runqueues locked. + */ +static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle) +{ + struct sched_class *class; + + for (class = sched_class_highest; class; class = class->next) + if (class->load_balance(this_rq, this_cpu, busiest, + 1, ULONG_MAX, sd, idle, NULL)) + return 1; + + return 0; } /* @@ -2588,11 +2605,6 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, */ #define MAX_PINNED_INTERVAL 512 -static inline unsigned long minus_1_or_zero(unsigned long n) -{ - return n > 0 ? n - 1 : 0; -} - /* * Check this_cpu to ensure it is balanced within domain. Attempt to move * tasks if there is an imbalance. @@ -2601,7 +2613,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum cpu_idle_type idle, int *balance) { - int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; + int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; struct sched_group *group; unsigned long imbalance; struct rq *busiest; @@ -2642,18 +2654,17 @@ redo: schedstat_add(sd, lb_imbalance[idle], imbalance); - nr_moved = 0; + ld_moved = 0; if (busiest->nr_running > 1) { /* * Attempt to move tasks. If find_busiest_group has found * an imbalance but busiest->nr_running <= 1, the group is - * still unbalanced. nr_moved simply stays zero, so it is + * still unbalanced. ld_moved simply stays zero, so it is * correctly treated as an imbalance. */ local_irq_save(flags); double_rq_lock(this_rq, busiest); - nr_moved = move_tasks(this_rq, this_cpu, busiest, - minus_1_or_zero(busiest->nr_running), + ld_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, idle, &all_pinned); double_rq_unlock(this_rq, busiest); local_irq_restore(flags); @@ -2661,7 +2672,7 @@ redo: /* * some other cpu did the load balance for us. */ - if (nr_moved && this_cpu != smp_processor_id()) + if (ld_moved && this_cpu != smp_processor_id()) resched_cpu(this_cpu); /* All tasks on this runqueue were pinned by CPU affinity */ @@ -2673,7 +2684,7 @@ redo: } } - if (!nr_moved) { + if (!ld_moved) { schedstat_inc(sd, lb_failed[idle]); sd->nr_balance_failed++; @@ -2722,10 +2733,10 @@ redo: sd->balance_interval *= 2; } - if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && + if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) return -1; - return nr_moved; + return ld_moved; out_balanced: schedstat_inc(sd, lb_balanced[idle]); @@ -2757,7 +2768,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) struct sched_group *group; struct rq *busiest = NULL; unsigned long imbalance; - int nr_moved = 0; + int ld_moved = 0; int sd_idle = 0; int all_pinned = 0; cpumask_t cpus = CPU_MASK_ALL; @@ -2792,12 +2803,11 @@ redo: schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance); - nr_moved = 0; + ld_moved = 0; if (busiest->nr_running > 1) { /* Attempt to move tasks */ double_lock_balance(this_rq, busiest); - nr_moved = move_tasks(this_rq, this_cpu, busiest, - minus_1_or_zero(busiest->nr_running), + ld_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, CPU_NEWLY_IDLE, &all_pinned); spin_unlock(&busiest->lock); @@ -2809,7 +2819,7 @@ redo: } } - if (!nr_moved) { + if (!ld_moved) { schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) @@ -2817,7 +2827,7 @@ redo: } else sd->nr_balance_failed = 0; - return nr_moved; + return ld_moved; out_balanced: schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); @@ -2905,8 +2915,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) if (likely(sd)) { schedstat_inc(sd, alb_cnt); - if (move_tasks(target_rq, target_cpu, busiest_rq, 1, - ULONG_MAX, sd, CPU_IDLE, NULL)) + if (move_one_task(target_rq, target_cpu, busiest_rq, + sd, CPU_IDLE)) schedstat_inc(sd, alb_pushed); else schedstat_inc(sd, alb_failed); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9f401588d509..7307a37cf26f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -944,11 +944,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) return p->prio; } -static int +static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *total_load_moved) + int *all_pinned) { struct cfs_rq *busy_cfs_rq; unsigned long load_moved, total_nr_moved = 0, nr_moved; @@ -1006,9 +1006,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, break; } - *total_load_moved = max_load_move - rem_load_move; - - return total_nr_moved; + return max_load_move - rem_load_move; } /* diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 41841e741c4a..1d8d9e13d950 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -37,11 +37,11 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now) { } -static int +static unsigned long load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *total_load_moved) + int *all_pinned) { return 0; } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 002fcf8d3f64..2b0626a43cb8 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -172,15 +172,16 @@ static struct task_struct *load_balance_next_rt(void *arg) return p; } -static int +static unsigned long load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *load_moved) + int *all_pinned) { int this_best_prio, best_prio, best_prio_seen = 0; int nr_moved; struct rq_iterator rt_rq_iterator; + unsigned long load_moved; best_prio = sched_find_first_bit(busiest->rt.active.bitmap); this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap); @@ -203,11 +204,11 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, rt_rq_iterator.arg = busiest; nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, - max_load_move, sd, idle, all_pinned, load_moved, + max_load_move, sd, idle, all_pinned, &load_moved, this_best_prio, best_prio, best_prio_seen, &rt_rq_iterator); - return nr_moved; + return load_moved; } static void task_tick_rt(struct rq *rq, struct task_struct *p) -- cgit v1.2.3 From a4ac01c36e286dd1b9a1d5cd7422c5af51dc55f8 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Thu, 9 Aug 2007 11:16:46 +0200 Subject: sched: fix bug in balance_tasks() There are two problems with balance_tasks() and how it used: 1. The variables best_prio and best_prio_seen (inherited from the old move_tasks()) were only required to handle problems caused by the active/expired arrays, the order in which they were processed and the possibility that the task with the highest priority could be on either. These issues are no longer present and the extra overhead associated with their use is unnecessary (and possibly wrong). 2. In the absence of CONFIG_FAIR_GROUP_SCHED being set, the same this_best_prio variable needs to be used by all scheduling classes or there is a risk of moving too much load. E.g. if the highest priority task on this at the beginning is a fairly low priority task and the rt class migrates a task (during its turn) then that moved task becomes the new highest priority task on this_rq but when the sched_fair class initializes its copy of this_best_prio it will get the priority of the original highest priority task as, due to the run queue locks being held, the reschedule triggered by pull_task() will not have taken place. This could result in inappropriate overriding of skip_for_load and excessive load being moved. The attached patch addresses these problems by deleting all reference to best_prio and best_prio_seen and making this_best_prio a reference parameter to the various functions involved. load_balance_fair() has also been modified so that this_best_prio is only reset (in the loop) if CONFIG_FAIR_GROUP_SCHED is set. This should preserve the effect of helping spread groups' higher priority tasks around the available CPUs while improving system performance when CONFIG_FAIR_GROUP_SCHED isn't set. Signed-off-by: Peter Williams Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- kernel/sched.c | 26 +++++++++++--------------- kernel/sched_fair.c | 32 ++++++++++++-------------------- kernel/sched_idletask.c | 2 +- kernel/sched_rt.c | 19 ++----------------- 5 files changed, 27 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 24bce423f10d..513b81c60e87 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -870,7 +870,7 @@ struct sched_class { struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned); + int *all_pinned, int *this_best_prio); void (*set_curr_task) (struct rq *rq); void (*task_tick) (struct rq *rq, struct task_struct *p); diff --git a/kernel/sched.c b/kernel/sched.c index 85b93118d244..1fa07c14624e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -745,8 +745,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int this_best_prio, int best_prio, int best_prio_seen, - struct rq_iterator *iterator); + int *this_best_prio, struct rq_iterator *iterator); #include "sched_stats.h" #include "sched_rt.c" @@ -2165,8 +2164,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int this_best_prio, int best_prio, int best_prio_seen, - struct rq_iterator *iterator) + int *this_best_prio, struct rq_iterator *iterator) { int pulled = 0, pinned = 0, skip_for_load; struct task_struct *p; @@ -2191,12 +2189,8 @@ next: */ skip_for_load = (p->se.load.weight >> 1) > rem_load_move + SCHED_LOAD_SCALE_FUZZ; - if (skip_for_load && p->prio < this_best_prio) - skip_for_load = !best_prio_seen && p->prio == best_prio; - if (skip_for_load || + if ((skip_for_load && p->prio >= *this_best_prio) || !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { - - best_prio_seen |= p->prio == best_prio; p = iterator->next(iterator->arg); goto next; } @@ -2210,8 +2204,8 @@ next: * and the prescribed amount of weighted load. */ if (pulled < max_nr_move && rem_load_move > 0) { - if (p->prio < this_best_prio) - this_best_prio = p->prio; + if (p->prio < *this_best_prio) + *this_best_prio = p->prio; p = iterator->next(iterator->arg); goto next; } @@ -2243,12 +2237,13 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, { struct sched_class *class = sched_class_highest; unsigned long total_load_moved = 0; + int this_best_prio = this_rq->curr->prio; do { total_load_moved += class->load_balance(this_rq, this_cpu, busiest, ULONG_MAX, max_load_move - total_load_moved, - sd, idle, all_pinned); + sd, idle, all_pinned, &this_best_prio); class = class->next; } while (class && max_load_move > total_load_moved); @@ -2266,10 +2261,12 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, struct sched_domain *sd, enum cpu_idle_type idle) { struct sched_class *class; + int this_best_prio = MAX_PRIO; for (class = sched_class_highest; class; class = class->next) if (class->load_balance(this_rq, this_cpu, busiest, - 1, ULONG_MAX, sd, idle, NULL)) + 1, ULONG_MAX, sd, idle, NULL, + &this_best_prio)) return 1; return 0; @@ -3184,8 +3181,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int this_best_prio, int best_prio, int best_prio_seen, - struct rq_iterator *iterator) + int *this_best_prio, struct rq_iterator *iterator) { *load_moved = 0; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 16511e9e5528..923bed0b0c42 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -929,6 +929,7 @@ static struct task_struct *load_balance_next_fair(void *arg) return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); } +#ifdef CONFIG_FAIR_GROUP_SCHED static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) { struct sched_entity *curr; @@ -942,12 +943,13 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) return p->prio; } +#endif static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned) + unsigned long max_nr_move, unsigned long max_load_move, + struct sched_domain *sd, enum cpu_idle_type idle, + int *all_pinned, int *this_best_prio) { struct cfs_rq *busy_cfs_rq; unsigned long load_moved, total_nr_moved = 0, nr_moved; @@ -958,10 +960,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, cfs_rq_iterator.next = load_balance_next_fair; for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { +#ifdef CONFIG_FAIR_GROUP_SCHED struct cfs_rq *this_cfs_rq; - long imbalance; + long imbalances; unsigned long maxload; - int this_best_prio, best_prio, best_prio_seen = 0; this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); @@ -975,27 +977,17 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, imbalance /= 2; maxload = min(rem_load_move, imbalance); - this_best_prio = cfs_rq_best_prio(this_cfs_rq); - best_prio = cfs_rq_best_prio(busy_cfs_rq); - - /* - * Enable handling of the case where there is more than one task - * with the best priority. If the current running task is one - * of those with prio==best_prio we know it won't be moved - * and therefore it's safe to override the skip (based on load) - * of any task we find with that prio. - */ - if (cfs_rq_curr(busy_cfs_rq) == &busiest->curr->se) - best_prio_seen = 1; - + *this_best_prio = cfs_rq_best_prio(this_cfs_rq); +#else +#define maxload rem_load_move +#endif /* pass busy_cfs_rq argument into * load_balance_[start|next]_fair iterators */ cfs_rq_iterator.arg = busy_cfs_rq; nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, maxload, sd, idle, all_pinned, - &load_moved, this_best_prio, best_prio, - best_prio_seen, &cfs_rq_iterator); + &load_moved, this_best_prio, &cfs_rq_iterator); total_nr_moved += nr_moved; max_nr_move -= nr_moved; diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 1d8d9e13d950..dc9e1068911f 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -41,7 +41,7 @@ static unsigned long load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned) + int *all_pinned, int *this_best_prio) { return 0; } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 2b0626a43cb8..5b559e8c8aa6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -176,26 +176,12 @@ static unsigned long load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned) + int *all_pinned, int *this_best_prio) { - int this_best_prio, best_prio, best_prio_seen = 0; int nr_moved; struct rq_iterator rt_rq_iterator; unsigned long load_moved; - best_prio = sched_find_first_bit(busiest->rt.active.bitmap); - this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap); - - /* - * Enable handling of the case where there is more than one task - * with the best priority. If the current running task is one - * of those with prio==best_prio we know it won't be moved - * and therefore it's safe to override the skip (based on load) - * of any task we find with that prio. - */ - if (busiest->curr->prio == best_prio) - best_prio_seen = 1; - rt_rq_iterator.start = load_balance_start_rt; rt_rq_iterator.next = load_balance_next_rt; /* pass 'busiest' rq argument into @@ -205,8 +191,7 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, max_load_move, sd, idle, all_pinned, &load_moved, - this_best_prio, best_prio, best_prio_seen, - &rt_rq_iterator); + this_best_prio, &rt_rq_iterator); return load_moved; } -- cgit v1.2.3 From 5cef9eca3837a8dcf605a360e213c4179a07c41a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:47 +0200 Subject: sched: remove the 'u64 now' parameter from print_cfs_rq() remove the 'u64 now' parameter from print_cfs_rq(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- kernel/sched_debug.c | 4 ++-- kernel/sched_fair.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 513b81c60e87..62ddddb49db3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -139,7 +139,7 @@ struct cfs_rq; extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); extern void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now); +print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); #else static inline void proc_sched_show_task(struct task_struct *p, struct seq_file *m) @@ -149,7 +149,7 @@ static inline void proc_sched_set_task(struct task_struct *p) { } static inline void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) +print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { } #endif diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 8421b9399e10..f977ee53f8ce 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -106,7 +106,7 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) (long long)wait_runtime_rq_sum); } -void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) +void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { SEQ_printf(m, "\ncfs_rq %p\n", cfs_rq); @@ -166,7 +166,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) P(cpu_load[4]); #undef P - print_cfs_stats(m, cpu, now); + print_cfs_stats(m, cpu); print_rq(m, rq, cpu, now); } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index bcf5fc59e8e9..025ac532b27a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1108,12 +1108,12 @@ struct sched_class fair_sched_class __read_mostly = { }; #ifdef CONFIG_SCHED_DEBUG -static void print_cfs_stats(struct seq_file *m, int cpu, u64 now) +static void print_cfs_stats(struct seq_file *m, int cpu) { struct rq *rq = cpu_rq(cpu); struct cfs_rq *cfs_rq; for_each_leaf_cfs_rq(rq, cfs_rq) - print_cfs_rq(m, cpu, cfs_rq, now); + print_cfs_rq(m, cpu, cfs_rq); } #endif -- cgit v1.2.3 From fd390f6a04f22fb457d6fd1855964f79536525de Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:48 +0200 Subject: sched: remove the 'u64 now' parameter from ->enqueue_task() remove the 'u64 now' parameter from ->enqueue_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +-- kernel/sched.c | 2 +- kernel/sched_fair.c | 3 +-- kernel/sched_rt.c | 3 +-- 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 62ddddb49db3..b11dedfbab6e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -855,8 +855,7 @@ struct sched_domain; struct sched_class { struct sched_class *next; - void (*enqueue_task) (struct rq *rq, struct task_struct *p, - int wakeup, u64 now); + void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep, u64 now); void (*yield_task) (struct rq *rq, struct task_struct *p); diff --git a/kernel/sched.c b/kernel/sched.c index 49a5fb0cdea0..43ae1566b8fc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -852,7 +852,7 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now) { sched_info_queued(p); - p->sched_class->enqueue_task(rq, p, wakeup, now); + p->sched_class->enqueue_task(rq, p, wakeup); p->se.on_rq = 1; } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a11d18861a3c..81db9626b7ed 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -782,8 +782,7 @@ static inline int is_same_group(struct task_struct *curr, struct task_struct *p) * increased. Here we update the fair scheduling stats and * then put the task into the rbtree: */ -static void -enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now) +static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index fa5a46273b79..1edaa99e0d3d 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -25,8 +25,7 @@ static inline void update_curr_rt(struct rq *rq) curr->se.exec_start = rq->clock; } -static void -enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now) +static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) { struct rt_prio_array *array = &rq->rt.active; -- cgit v1.2.3 From f02231e51a280f1a0fee4d03ad8f50048e06cced Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:48 +0200 Subject: sched: remove the 'u64 now' parameter from ->dequeue_task() remove the 'u64 now' parameter from ->dequeue_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +-- kernel/sched.c | 2 +- kernel/sched_fair.c | 3 +-- kernel/sched_idletask.c | 2 +- kernel/sched_rt.c | 3 +-- 5 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b11dedfbab6e..c7815a6b70e0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -856,8 +856,7 @@ struct sched_class { struct sched_class *next; void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); - void (*dequeue_task) (struct rq *rq, struct task_struct *p, - int sleep, u64 now); + void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); void (*yield_task) (struct rq *rq, struct task_struct *p); void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); diff --git a/kernel/sched.c b/kernel/sched.c index 43ae1566b8fc..e51d75f4b4d7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -859,7 +859,7 @@ enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now) static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now) { - p->sched_class->dequeue_task(rq, p, sleep, now); + p->sched_class->dequeue_task(rq, p, sleep); p->se.on_rq = 0; } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 81db9626b7ed..fb4d614af2c3 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -800,8 +800,7 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) * decreased. We remove the task from the rbtree and * update the fair scheduling stats: */ -static void -dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep, u64 now) +static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index dc9e1068911f..f69e083e0d96 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -25,7 +25,7 @@ static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now) * message if some code attempts to do it: */ static void -dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now) +dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep) { spin_unlock_irq(&rq->lock); printk(KERN_ERR "bad: scheduling from the idle thread!\n"); diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 1edaa99e0d3d..60591e2512b1 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -36,8 +36,7 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) /* * Adding/removing a task to/from a priority array: */ -static void -dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep, u64 now) +static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) { struct rt_prio_array *array = &rq->rt.active; -- cgit v1.2.3 From fb8d47240246e20f864f0724a23a7220cd1c59ac Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:48 +0200 Subject: sched: remove the 'u64 now' parameter from ->pick_next_task() remove the 'u64 now' parameter from ->pick_next_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- kernel/sched.c | 4 ++-- kernel/sched_fair.c | 2 +- kernel/sched_idletask.c | 2 +- kernel/sched_rt.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index c7815a6b70e0..c6ad4071c791 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -861,7 +861,7 @@ struct sched_class { void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); - struct task_struct * (*pick_next_task) (struct rq *rq, u64 now); + struct task_struct * (*pick_next_task) (struct rq *rq); void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, diff --git a/kernel/sched.c b/kernel/sched.c index e51d75f4b4d7..b67a288a0f1f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3410,14 +3410,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, u64 now) * the fair class we can call that function directly: */ if (likely(rq->nr_running == rq->cfs.nr_running)) { - p = fair_sched_class.pick_next_task(rq, now); + p = fair_sched_class.pick_next_task(rq); if (likely(p)) return p; } class = sched_class_highest; for ( ; ; ) { - p = class->pick_next_task(rq, now); + p = class->pick_next_task(rq); if (p) return p; /* diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index fb4d614af2c3..0b23aaf074fa 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -859,7 +859,7 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p) __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran); } -static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now) +static struct task_struct *pick_next_task_fair(struct rq *rq) { struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index f69e083e0d96..9f4c28f858fe 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -13,7 +13,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p) resched_task(rq->idle); } -static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now) +static struct task_struct *pick_next_task_idle(struct rq *rq) { schedstat_inc(rq, sched_goidle); diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 60591e2512b1..c0b0d6237bb6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -73,7 +73,7 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) resched_task(rq->curr); } -static struct task_struct *pick_next_task_rt(struct rq *rq, u64 now) +static struct task_struct *pick_next_task_rt(struct rq *rq) { struct rt_prio_array *array = &rq->rt.active; struct task_struct *next; -- cgit v1.2.3 From 31ee529cc2254e8b62880535ec8f21a4c5e1c091 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from ->put_prev_task() remove the 'u64 now' parameter from ->put_prev_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- kernel/sched.c | 2 +- kernel/sched_fair.c | 2 +- kernel/sched_idletask.c | 2 +- kernel/sched_rt.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index c6ad4071c791..9afb66a49358 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -862,7 +862,7 @@ struct sched_class { void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); struct task_struct * (*pick_next_task) (struct rq *rq); - void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); + void (*put_prev_task) (struct rq *rq, struct task_struct *p); unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, struct rq *busiest, diff --git a/kernel/sched.c b/kernel/sched.c index 4f9f9e9d7265..664440160485 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3470,7 +3470,7 @@ need_resched_nonpreemptible: if (unlikely(!rq->nr_running)) idle_balance(cpu, rq); - prev->sched_class->put_prev_task(rq, prev, now); + prev->sched_class->put_prev_task(rq, prev); next = pick_next_task(rq, prev); sched_info_switch(prev, next); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 0b23aaf074fa..103327b4275d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -878,7 +878,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) /* * Account for a descheduled task: */ -static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, u64 now) +static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) { struct sched_entity *se = &prev->se; struct cfs_rq *cfs_rq; diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 9f4c28f858fe..3503fb2d9f96 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -33,7 +33,7 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep) spin_lock_irq(&rq->lock); } -static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now) +static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index c0b0d6237bb6..dcdcad632fd9 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -92,7 +92,7 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) return next; } -static void put_prev_task_rt(struct rq *rq, struct task_struct *p, u64 now) +static void put_prev_task_rt(struct rq *rq, struct task_struct *p) { update_curr_rt(rq); p->se.exec_start = 0; -- cgit v1.2.3 From ee0827d8b5271094380410cf21d8c48c109a773a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from ->task_new() remove the 'u64 now' parameter from ->task_new(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- kernel/sched.c | 2 +- kernel/sched_fair.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9afb66a49358..682ef87da6eb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -872,7 +872,7 @@ struct sched_class { void (*set_curr_task) (struct rq *rq); void (*task_tick) (struct rq *rq, struct task_struct *p); - void (*task_new) (struct rq *rq, struct task_struct *p, u64 now); + void (*task_new) (struct rq *rq, struct task_struct *p); }; struct load_weight { diff --git a/kernel/sched.c b/kernel/sched.c index 664440160485..0619178efa01 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1672,7 +1672,7 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) * Let the scheduling class do new task startup * management (if any): */ - p->sched_class->task_new(rq, p, now); + p->sched_class->task_new(rq, p); inc_nr_running(p, rq, now); } check_preempt_curr(rq, p); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 103327b4275d..4a2cbde1057f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1020,7 +1020,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr) * monopolize the CPU. Note: the parent runqueue is locked, * the child is not running yet. */ -static void task_new_fair(struct rq *rq, struct task_struct *p, u64 now) +static void task_new_fair(struct rq *rq, struct task_struct *p) { struct cfs_rq *cfs_rq = task_cfs_rq(p); struct sched_entity *se = &p->se; -- cgit v1.2.3 From 6a0ed91e361a93ee1efb4c20c4967024ed2a8dd7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 7 Aug 2007 23:43:14 +0300 Subject: hexdump: use const notation Trivial fix: mark the buffer to hexdump as const so callers could avoid casting their const buffers when calling print_hex_dump(). The patch is really trivial and I suggest to consider it as a fix (it fixes GCC warnings) and push it to current tree. Signed-off-by: Artem Bityutskiy Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- lib/hexdump.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4300bb462d29..b4f5b81b4257 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -224,7 +224,7 @@ extern void hex_dump_to_buffer(const void *buf, size_t len, char *linebuf, size_t linebuflen, bool ascii); extern void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, - void *buf, size_t len, bool ascii); + const void *buf, size_t len, bool ascii); extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, void *buf, size_t len); #define hex_asc(x) "0123456789abcdef"[x] diff --git a/lib/hexdump.c b/lib/hexdump.c index 473f5aed6cae..16f2e2935e87 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -145,9 +145,9 @@ EXPORT_SYMBOL(hex_dump_to_buffer); */ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, - void *buf, size_t len, bool ascii) + const void *buf, size_t len, bool ascii) { - u8 *ptr = buf; + const u8 *ptr = buf; int i, linelen, remaining = len; unsigned char linebuf[200]; -- cgit v1.2.3 From 620b5e68ee89ba1d3f017056857459dc21be8c7b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Aug 2007 13:17:08 -0700 Subject: Fix Alpha O_CLOEXEC definition The default definition in asm-generic conflicts with Alpha's O_DIRECT, so, like several other arches, it needs to be redefined. Signed-off-by: Richard Hendersion Signed-off-by: Linus Torvalds --- include/asm-alpha/fcntl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-alpha/fcntl.h b/include/asm-alpha/fcntl.h index 87f2cf459e26..25da0017ec87 100644 --- a/include/asm-alpha/fcntl.h +++ b/include/asm-alpha/fcntl.h @@ -16,6 +16,7 @@ #define O_LARGEFILE 0400000 /* will be set by the kernel on every open */ #define O_DIRECT 02000000 /* direct disk access - should check with OSF/1 */ #define O_NOATIME 04000000 +#define O_CLOEXEC 010000000 /* set close_on_exec */ #define F_GETLK 7 #define F_SETLK 8 -- cgit v1.2.3 From 322392646bc36ff4381861e6589c22c1abc20dbf Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 10 Aug 2007 02:37:01 +0900 Subject: sh: Add missing dma_sync_single_range_for_*(). The b44 build uses these, caught by allmodconfig: drivers/net/b44.c: In function `b44_sync_dma_desc_for_cpu': drivers/net/b44.c:159: error: implicit declaration of function `dma_sync_single_range_for_cpu' Follow the sparc64 change and stub them in. Reported-by: Andrew Morton Signed-off-by: Paul Mundt --- include/asm-sh/dma-mapping.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/asm-sh/dma-mapping.h b/include/asm-sh/dma-mapping.h index 6f492ac3fa13..84fefdaa01a5 100644 --- a/include/asm-sh/dma-mapping.h +++ b/include/asm-sh/dma-mapping.h @@ -160,6 +160,25 @@ static inline void dma_sync_single_for_device(struct device *dev, dma_sync_single(dev, dma_handle, size, dir); } +static inline void dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, + enum dma_data_direction direction) +{ + dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction); +} + +static inline void dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, + enum dma_data_direction direction) +{ + dma_sync_single_for_device(dev, dma_handle+offset, size, direction); +} + + static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) -- cgit v1.2.3 From 757e2d60c32c5baf1992219b3178da16868423b3 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 10 Aug 2007 02:47:31 +0900 Subject: sh64: Add missing dma_sync_single_for_*(). Follow the sparc64 and sh change. Signed-off-by: Paul Mundt --- include/asm-sh64/dma-mapping.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/asm-sh64/dma-mapping.h b/include/asm-sh64/dma-mapping.h index d505f357f819..de4309960207 100644 --- a/include/asm-sh64/dma-mapping.h +++ b/include/asm-sh64/dma-mapping.h @@ -141,6 +141,24 @@ static inline void dma_sync_single_for_device(struct device *dev, dma_sync_single(dev, dma_handle, size, dir); } +static inline void dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, + enum dma_data_direction direction) +{ + dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction); +} + +static inline void dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, + enum dma_data_direction direction) +{ + dma_sync_single_for_device(dev, dma_handle+offset, size, direction); +} + static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) -- cgit v1.2.3 From f5996449e3244524cab0ba709a4bd87047a8175f Mon Sep 17 00:00:00 2001 From: Andre Detsch Date: Fri, 3 Aug 2007 18:53:46 -0700 Subject: [POWERPC] cell: Move SPU affinity init to spu_management_of_ops This patch moves affinity initialization code from spu_base.c to a new spu_management_of_ops function (init_affinity), which is empty in the case of PS3. This fixes a linking problem that was happening when compiling for PS3. Also, some small code style changes were made. Signed-off-by: Andre Detsch Signed-off-by: Geoff Levand Acked-by: Arnd Bergmann Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/cell/spu_base.c | 141 +------------------------- arch/powerpc/platforms/cell/spu_manage.c | 163 +++++++++++++++++++++++++++++++ arch/powerpc/platforms/ps3/spu.c | 6 ++ include/asm-powerpc/spu_priv1.h | 7 ++ 4 files changed, 177 insertions(+), 140 deletions(-) (limited to 'include') diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 90124228b8f4..095a30304c56 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -36,7 +36,6 @@ #include #include #include -#include "spu_priv1_mmio.h" const struct spu_management_ops *spu_management_ops; EXPORT_SYMBOL_GPL(spu_management_ops); @@ -636,138 +635,6 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf) static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL); -/* Hardcoded affinity idxs for QS20 */ -#define SPES_PER_BE 8 -static int QS20_reg_idxs[SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; -static int QS20_reg_memory[SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 }; - -static struct spu *spu_lookup_reg(int node, u32 reg) -{ - struct spu *spu; - - list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { - if (*(u32 *)get_property(spu_devnode(spu), "reg", NULL) == reg) - return spu; - } - return NULL; -} - -static void init_aff_QS20_harcoded(void) -{ - int node, i; - struct spu *last_spu, *spu; - u32 reg; - - for (node = 0; node < MAX_NUMNODES; node++) { - last_spu = NULL; - for (i = 0; i < SPES_PER_BE; i++) { - reg = QS20_reg_idxs[i]; - spu = spu_lookup_reg(node, reg); - if (!spu) - continue; - spu->has_mem_affinity = QS20_reg_memory[reg]; - if (last_spu) - list_add_tail(&spu->aff_list, - &last_spu->aff_list); - last_spu = spu; - } - } -} - -static int of_has_vicinity(void) -{ - struct spu* spu; - - spu = list_entry(cbe_spu_info[0].spus.next, struct spu, cbe_list); - return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL; -} - -static struct spu *aff_devnode_spu(int cbe, struct device_node *dn) -{ - struct spu *spu; - - list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) - if (spu_devnode(spu) == dn) - return spu; - return NULL; -} - -static struct spu * -aff_node_next_to(int cbe, struct device_node *target, struct device_node *avoid) -{ - struct spu *spu; - const phandle *vic_handles; - int lenp, i; - - list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) { - if (spu_devnode(spu) == avoid) - continue; - vic_handles = get_property(spu_devnode(spu), "vicinity", &lenp); - for (i=0; i < (lenp / sizeof(phandle)); i++) { - if (vic_handles[i] == target->linux_phandle) - return spu; - } - } - return NULL; -} - -static void init_aff_fw_vicinity_node(int cbe) -{ - struct spu *spu, *last_spu; - struct device_node *vic_dn, *last_spu_dn; - phandle avoid_ph; - const phandle *vic_handles; - const char *name; - int lenp, i, added, mem_aff; - - last_spu = list_entry(cbe_spu_info[cbe].spus.next, struct spu, cbe_list); - avoid_ph = 0; - for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) { - last_spu_dn = spu_devnode(last_spu); - vic_handles = get_property(last_spu_dn, "vicinity", &lenp); - - for (i = 0; i < (lenp / sizeof(phandle)); i++) { - if (vic_handles[i] == avoid_ph) - continue; - - vic_dn = of_find_node_by_phandle(vic_handles[i]); - if (!vic_dn) - continue; - - name = get_property(vic_dn, "name", NULL); - if (strcmp(name, "spe") == 0) { - spu = aff_devnode_spu(cbe, vic_dn); - avoid_ph = last_spu_dn->linux_phandle; - } - else { - mem_aff = strcmp(name, "mic-tm") == 0; - spu = aff_node_next_to(cbe, vic_dn, last_spu_dn); - if (!spu) - continue; - if (mem_aff) { - last_spu->has_mem_affinity = 1; - spu->has_mem_affinity = 1; - } - avoid_ph = vic_dn->linux_phandle; - } - list_add_tail(&spu->aff_list, &last_spu->aff_list); - last_spu = spu; - break; - } - } -} - -static void init_aff_fw_vicinity(void) -{ - int cbe; - - /* sets has_mem_affinity for each spu, as long as the - * spu->aff_list list, linking each spu to its neighbors - */ - for (cbe = 0; cbe < MAX_NUMNODES; cbe++) - init_aff_fw_vicinity_node(cbe); -} - static int __init init_spu_base(void) { int i, ret = 0; @@ -811,13 +678,7 @@ static int __init init_spu_base(void) mutex_unlock(&spu_full_list_mutex); spu_add_sysdev_attr(&attr_stat); - if (of_has_vicinity()) { - init_aff_fw_vicinity(); - } else { - long root = of_get_flat_dt_root(); - if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) - init_aff_QS20_harcoded(); - } + spu_init_affinity(); return 0; diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index 75ed50fcc3db..5eb88346181a 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -361,8 +361,171 @@ static int of_destroy_spu(struct spu *spu) return 0; } +/* Hardcoded affinity idxs for qs20 */ +#define QS20_SPES_PER_BE 8 +static int qs20_reg_idxs[QS20_SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; +static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 }; + +static struct spu *spu_lookup_reg(int node, u32 reg) +{ + struct spu *spu; + u32 *spu_reg; + + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + spu_reg = (u32*)of_get_property(spu_devnode(spu), "reg", NULL); + if (*spu_reg == reg) + return spu; + } + return NULL; +} + +static void init_affinity_qs20_harcoded(void) +{ + int node, i; + struct spu *last_spu, *spu; + u32 reg; + + for (node = 0; node < MAX_NUMNODES; node++) { + last_spu = NULL; + for (i = 0; i < QS20_SPES_PER_BE; i++) { + reg = qs20_reg_idxs[i]; + spu = spu_lookup_reg(node, reg); + if (!spu) + continue; + spu->has_mem_affinity = qs20_reg_memory[reg]; + if (last_spu) + list_add_tail(&spu->aff_list, + &last_spu->aff_list); + last_spu = spu; + } + } +} + +static int of_has_vicinity(void) +{ + struct spu* spu; + + spu = list_first_entry(&cbe_spu_info[0].spus, struct spu, cbe_list); + return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL; +} + +static struct spu *devnode_spu(int cbe, struct device_node *dn) +{ + struct spu *spu; + + list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) + if (spu_devnode(spu) == dn) + return spu; + return NULL; +} + +static struct spu * +neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid) +{ + struct spu *spu; + struct device_node *spu_dn; + const phandle *vic_handles; + int lenp, i; + + list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) { + spu_dn = spu_devnode(spu); + if (spu_dn == avoid) + continue; + vic_handles = of_get_property(spu_dn, "vicinity", &lenp); + for (i=0; i < (lenp / sizeof(phandle)); i++) { + if (vic_handles[i] == target->linux_phandle) + return spu; + } + } + return NULL; +} + +static void init_affinity_node(int cbe) +{ + struct spu *spu, *last_spu; + struct device_node *vic_dn, *last_spu_dn; + phandle avoid_ph; + const phandle *vic_handles; + const char *name; + int lenp, i, added; + + last_spu = list_first_entry(&cbe_spu_info[cbe].spus, struct spu, + cbe_list); + avoid_ph = 0; + for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) { + last_spu_dn = spu_devnode(last_spu); + vic_handles = of_get_property(last_spu_dn, "vicinity", &lenp); + + /* + * Walk through each phandle in vicinity property of the spu + * (tipically two vicinity phandles per spe node) + */ + for (i = 0; i < (lenp / sizeof(phandle)); i++) { + if (vic_handles[i] == avoid_ph) + continue; + + vic_dn = of_find_node_by_phandle(vic_handles[i]); + if (!vic_dn) + continue; + + /* a neighbour might be spe, mic-tm, or bif0 */ + name = of_get_property(vic_dn, "name", NULL); + if (!name) + continue; + + if (strcmp(name, "spe") == 0) { + spu = devnode_spu(cbe, vic_dn); + avoid_ph = last_spu_dn->linux_phandle; + } else { + /* + * "mic-tm" and "bif0" nodes do not have + * vicinity property. So we need to find the + * spe which has vic_dn as neighbour, but + * skipping the one we came from (last_spu_dn) + */ + spu = neighbour_spu(cbe, vic_dn, last_spu_dn); + if (!spu) + continue; + if (!strcmp(name, "mic-tm")) { + last_spu->has_mem_affinity = 1; + spu->has_mem_affinity = 1; + } + avoid_ph = vic_dn->linux_phandle; + } + + list_add_tail(&spu->aff_list, &last_spu->aff_list); + last_spu = spu; + break; + } + } +} + +static void init_affinity_fw(void) +{ + int cbe; + + for (cbe = 0; cbe < MAX_NUMNODES; cbe++) + init_affinity_node(cbe); +} + +static int __init init_affinity(void) +{ + if (of_has_vicinity()) { + init_affinity_fw(); + } else { + long root = of_get_flat_dt_root(); + if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) + init_affinity_qs20_harcoded(); + else + printk("No affinity configuration found"); + } + + return 0; +} + const struct spu_management_ops spu_management_of_ops = { .enumerate_spus = of_enumerate_spus, .create_spu = of_create_spu, .destroy_spu = of_destroy_spu, + .init_affinity = init_affinity, }; diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index 502d80ed982b..ac2a4b8a4c14 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -414,10 +414,16 @@ static int __init ps3_enumerate_spus(int (*fn)(void *data)) return num_resource_id; } +static int ps3_init_affinity(void) +{ + return 0; +} + const struct spu_management_ops spu_management_ps3_ops = { .enumerate_spus = ps3_enumerate_spus, .create_spu = ps3_create_spu, .destroy_spu = ps3_destroy_spu, + .init_affinity = ps3_init_affinity, }; /* spu_priv1_ops */ diff --git a/include/asm-powerpc/spu_priv1.h b/include/asm-powerpc/spu_priv1.h index 7e78f6a1ab8b..0f37c7c90820 100644 --- a/include/asm-powerpc/spu_priv1.h +++ b/include/asm-powerpc/spu_priv1.h @@ -178,6 +178,7 @@ struct spu_management_ops { int (*enumerate_spus)(int (*fn)(void *data)); int (*create_spu)(struct spu *spu, void *data); int (*destroy_spu)(struct spu *spu); + int (*init_affinity)(void); }; extern const struct spu_management_ops* spu_management_ops; @@ -200,6 +201,12 @@ spu_destroy_spu (struct spu *spu) return spu_management_ops->destroy_spu(spu); } +static inline int +spu_init_affinity (void) +{ + return spu_management_ops->init_affinity(); +} + /* * The declarations folowing are put here for convenience * and only intended to be used by the platform setup code. -- cgit v1.2.3 From 8f2ea1fd3f97ab7a809e939b5b9005a16f862439 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 7 Aug 2007 08:05:10 +1000 Subject: [POWERPC] Fix initialization and usage of dma_mask powerpc has a couple of bugs in the usage of dma_masks that tend to break when drivers explicitly try to set a 32-bit mask for example. First, the code that generates the pci devices from the OF device-tree doesn't initialize the mask properly, then our implementation of set_dma_mask() was trying to validate the -previous- mask value, not the one passed in as an argument. This fixes these problems. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/pci_64.c | 1 + include/asm-powerpc/dma-mapping.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index a97e23ac1976..291ffbc360c9 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -313,6 +313,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->current_state = 4; /* unknown power state */ dev->error_state = pci_channel_io_normal; + dev->dma_mask = 0xffffffff; if (!strcmp(type, "pci") || !strcmp(type, "pciex")) { /* a PCI-PCI bridge */ diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h index f6bd804d9090..744d6bb24116 100644 --- a/include/asm-powerpc/dma-mapping.h +++ b/include/asm-powerpc/dma-mapping.h @@ -95,7 +95,7 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask) return -EIO; if (dma_ops->set_dma_mask != NULL) return dma_ops->set_dma_mask(dev, dma_mask); - if (!dev->dma_mask || !dma_supported(dev, *dev->dma_mask)) + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) return -EIO; *dev->dma_mask = dma_mask; return 0; -- cgit v1.2.3 From 3ac4c949e02f26be1e4378f9acfb07ec87db947b Mon Sep 17 00:00:00 2001 From: Robert Reif Date: Fri, 10 Aug 2007 15:52:06 -0700 Subject: [SPARC32]: Remove iommu from struct sbus_bus and use archdata like sparc64. Signed-off-by: Robert Reif Signed-off-by: David S. Miller --- arch/sparc/kernel/ebus.c | 1 + arch/sparc/mm/io-unit.c | 18 +++++++++--------- arch/sparc/mm/iommu.c | 12 ++++++------ include/asm-sparc/sbus.h | 1 - 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/ebus.c b/arch/sparc/kernel/ebus.c index ac352eb6dff3..e2d02fd13f35 100644 --- a/arch/sparc/kernel/ebus.c +++ b/arch/sparc/kernel/ebus.c @@ -238,6 +238,7 @@ void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_device *d sd = &dev->ofdev.dev.archdata; sd->prom_node = dp; sd->op = &dev->ofdev; + sd->iommu = dev->bus->ofdev.dev.parent->archdata.iommu; dev->ofdev.node = dp; dev->ofdev.dev.parent = &dev->bus->ofdev.dev; diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index 4ccda77d08d6..7c89893b1fe8 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -66,7 +66,7 @@ iounit_init(int sbi_node, int io_node, struct sbus_bus *sbus) } if(!xpt) panic("Cannot map External Page Table."); - sbus->iommu = (struct iommu_struct *)iounit; + sbus->ofdev.dev.archdata.iommu = iounit; iounit->page_table = xpt; spin_lock_init(&iounit->lock); @@ -127,7 +127,7 @@ nexti: scan = find_next_zero_bit(iounit->bmap, limit, scan); static __u32 iounit_get_scsi_one(char *vaddr, unsigned long len, struct sbus_bus *sbus) { unsigned long ret, flags; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; spin_lock_irqsave(&iounit->lock, flags); ret = iounit_get_area(iounit, (unsigned long)vaddr, len); @@ -138,7 +138,7 @@ static __u32 iounit_get_scsi_one(char *vaddr, unsigned long len, struct sbus_bus static void iounit_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus *sbus) { unsigned long flags; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; /* FIXME: Cache some resolved pages - often several sg entries are to the same page */ spin_lock_irqsave(&iounit->lock, flags); @@ -153,7 +153,7 @@ static void iounit_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus static void iounit_release_scsi_one(__u32 vaddr, unsigned long len, struct sbus_bus *sbus) { unsigned long flags; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; spin_lock_irqsave(&iounit->lock, flags); len = ((vaddr & ~PAGE_MASK) + len + (PAGE_SIZE-1)) >> PAGE_SHIFT; @@ -168,7 +168,7 @@ static void iounit_release_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_ { unsigned long flags; unsigned long vaddr, len; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; spin_lock_irqsave(&iounit->lock, flags); while (sz != 0) { @@ -211,7 +211,7 @@ static int iounit_map_dma_area(dma_addr_t *pba, unsigned long va, __u32 addr, in i = ((addr - IOUNIT_DMA_BASE) >> PAGE_SHIFT); for_each_sbus(sbus) { - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; iopte = (iopte_t *)(iounit->page_table + i); *iopte = MKIOPTE(__pa(page)); @@ -235,7 +235,7 @@ static void iounit_unmap_dma_area(unsigned long addr, int len) static struct page *iounit_translate_dvma(unsigned long addr) { struct sbus_bus *sbus = sbus_root; /* They are all the same */ - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; int i; iopte_t *iopte; @@ -279,7 +279,7 @@ __u32 iounit_map_dma_init(struct sbus_bus *sbus, int size) unsigned long rotor, scan, limit; unsigned long flags; __u32 ret; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; npages = (size + (PAGE_SIZE-1)) >> PAGE_SHIFT; i = 0x0213; @@ -315,7 +315,7 @@ nexti: scan = find_next_zero_bit(iounit->bmap, limit, scan); __u32 iounit_map_dma_page(__u32 vaddr, void *addr, struct sbus_bus *sbus) { int scan = (vaddr - IOUNIT_DMA_BASE) >> PAGE_SHIFT; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; iounit->page_table[scan] = MKIOPTE(__pa(((unsigned long)addr) & PAGE_MASK)); return vaddr + (((unsigned long)addr) & ~PAGE_MASK); diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index be042efd1ba4..52e907af9d29 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -132,7 +132,7 @@ iommu_init(int iommund, struct sbus_bus *sbus) impl, vers, iommu->page_table, (int)(IOMMU_NPTES*sizeof(iopte_t)), (int)IOMMU_NPTES); - sbus->iommu = iommu; + sbus->ofdev.dev.archdata.iommu = iommu; } /* This begs to be btfixup-ed by srmmu. */ @@ -166,7 +166,7 @@ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte) static u32 iommu_get_one(struct page *page, int npages, struct sbus_bus *sbus) { - struct iommu_struct *iommu = sbus->iommu; + struct iommu_struct *iommu = sbus->ofdev.dev.archdata.iommu; int ioptex; iopte_t *iopte, *iopte0; unsigned int busa, busa0; @@ -291,7 +291,7 @@ static void iommu_get_scsi_sgl_pflush(struct scatterlist *sg, int sz, struct sbu static void iommu_release_one(u32 busa, int npages, struct sbus_bus *sbus) { - struct iommu_struct *iommu = sbus->iommu; + struct iommu_struct *iommu = sbus->ofdev.dev.archdata.iommu; int ioptex; int i; @@ -334,7 +334,7 @@ static int iommu_map_dma_area(dma_addr_t *pba, unsigned long va, unsigned long addr, int len) { unsigned long page, end; - struct iommu_struct *iommu = sbus_root->iommu; + struct iommu_struct *iommu = sbus_root->ofdev.dev.archdata.iommu; iopte_t *iopte = iommu->page_table; iopte_t *first; int ioptex; @@ -399,7 +399,7 @@ static int iommu_map_dma_area(dma_addr_t *pba, unsigned long va, static void iommu_unmap_dma_area(unsigned long busa, int len) { - struct iommu_struct *iommu = sbus_root->iommu; + struct iommu_struct *iommu = sbus_root->ofdev.dev.archdata.iommu; iopte_t *iopte = iommu->page_table; unsigned long end; int ioptex = (busa - iommu->start) >> PAGE_SHIFT; @@ -420,7 +420,7 @@ static void iommu_unmap_dma_area(unsigned long busa, int len) static struct page *iommu_translate_dvma(unsigned long busa) { - struct iommu_struct *iommu = sbus_root->iommu; + struct iommu_struct *iommu = sbus_root->ofdev.dev.archdata.iommu; iopte_t *iopte = iommu->page_table; iopte += ((busa - iommu->start) >> PAGE_SHIFT); diff --git a/include/asm-sparc/sbus.h b/include/asm-sparc/sbus.h index d036e4419d79..27d076c46964 100644 --- a/include/asm-sparc/sbus.h +++ b/include/asm-sparc/sbus.h @@ -68,7 +68,6 @@ struct sbus_dev { /* This struct describes the SBus(s) found on this machine. */ struct sbus_bus { struct of_device ofdev; - void *iommu; /* Opaque IOMMU cookie */ struct sbus_dev *devices; /* Link to devices on this SBus */ struct sbus_bus *next; /* next SBus, if more than one SBus */ int prom_node; /* PROM device tree node for this SBus */ -- cgit v1.2.3 From ec05b297f91a443aa26b74059b573bfad49c9ebb Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 30 Jul 2007 08:24:27 +0200 Subject: [PATCH] remove mm/filemap.c:file_send_actor() This patch removes the no longer used file_send_actor(). Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 - mm/filemap.c | 20 -------------------- 2 files changed, 21 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6bf139562947..16421f662a7a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1659,7 +1659,6 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); -extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); diff --git a/mm/filemap.c b/mm/filemap.c index 6cf700d41844..50021a60d01f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1218,26 +1218,6 @@ out: } EXPORT_SYMBOL(generic_file_aio_read); -int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) -{ - ssize_t written; - unsigned long count = desc->count; - struct file *file = desc->arg.data; - - if (size > count) - size = count; - - written = file->f_op->sendpage(file, page, offset, - size, &file->f_pos, sizeerror = written; - written = 0; - } - desc->count = count - written; - desc->written += written; - return written; -} - static ssize_t do_readahead(struct address_space *mapping, struct file *filp, unsigned long index, unsigned long nr) -- cgit v1.2.3 From c7149d6bce2561aeaa48caaa1700aa8b3b22008f Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Tue, 7 Aug 2007 15:30:23 +0200 Subject: Fix remap handling by blktrace This patch provides more information concerning REMAP operations on block IOs. The additional information provides clearer details at the user level, and supports post-processing analysis in btt. o Adds in partition remaps on the same device. o Fixed up the remap information in DM to be in the right order o Sent up mapped-from and mapped-to device information Signed-off-by: Alan D. Brunelle Signed-off-by: Jens Axboe --- block/ll_rw_blk.c | 4 ++++ drivers/md/dm.c | 4 ++-- include/linux/blktrace_api.h | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 8c2caff87cc3..a15845c164f2 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -3047,6 +3047,10 @@ static inline void blk_partition_remap(struct bio *bio) bio->bi_sector += p->start_sect; bio->bi_bdev = bdev->bd_contains; + + blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, + bdev->bd_dev, bio->bi_sector, + bio->bi_sector - p->start_sect); } } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 141ff9fa296e..2120155929a6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -580,8 +580,8 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, /* the bio has been remapped so dispatch it */ blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, - tio->io->bio->bi_bdev->bd_dev, sector, - clone->bi_sector); + tio->io->bio->bi_bdev->bd_dev, + clone->bi_sector, sector); generic_make_request(clone); } else if (r < 0 || r == DM_MAPIO_REQUEUE) { diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 90874a5d7d78..7b5d56b82b59 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -105,7 +105,7 @@ struct blk_io_trace { */ struct blk_io_trace_remap { __be32 device; - u32 __pad; + __be32 device_from; __be64 sector; }; @@ -272,6 +272,7 @@ static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio, return; r.device = cpu_to_be32(dev); + r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); r.sector = cpu_to_be64(to); __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); -- cgit v1.2.3 From 02a5e0acb3cb85d80d0fe834e366d38a92bbaa22 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 11 Aug 2007 22:34:32 +0200 Subject: BLOCK: Hide the contents of linux/bio.h if CONFIG_BLOCK=n Hide the contents of linux/bio.h if CONFIG_BLOCK=n as there shouldn't be compiled code that uses it. Signed-off-by: David Howells Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 4d85262b4fa4..1ddef34f43c3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -24,6 +24,8 @@ #include #include +#ifdef CONFIG_BLOCK + /* Platforms may set this to teach the BIO layer about IOMMU hardware. */ #include @@ -361,4 +363,5 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) +#endif /* CONFIG_BLOCK */ #endif /* __LINUX_BIO_H */ -- cgit v1.2.3 From 60c9834238482f805b1d9e4dc2a780405ddab80f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 10 Aug 2007 13:00:50 -0700 Subject: FRV: connect up fallocate Connect up the fallocate() system call. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/kernel/entry.S | 1 + include/asm-frv/unistd.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S index 275673c192aa..1e74f3c5cee2 100644 --- a/arch/frv/kernel/entry.S +++ b/arch/frv/kernel/entry.S @@ -1496,6 +1496,7 @@ sys_call_table: .long sys_signalfd .long sys_timerfd .long sys_eventfd + .long sys_fallocate syscall_table_size = (. - sys_call_table) diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h index 7306c71a8926..cd84f1771e34 100644 --- a/include/asm-frv/unistd.h +++ b/include/asm-frv/unistd.h @@ -330,10 +330,11 @@ #define __NR_signalfd 321 #define __NR_timerfd 322 #define __NR_eventfd 323 +#define __NR_fallocate 324 #ifdef __KERNEL__ -#define NR_syscalls 324 +#define NR_syscalls 325 #define __ARCH_WANT_IPC_PARSE_VERSION /* #define __ARCH_WANT_OLD_READDIR */ -- cgit v1.2.3 From 76ceb2f90f6efb6d1f3d88f855428bff947a3483 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 10 Aug 2007 13:00:55 -0700 Subject: Remove unused struct proc_dir_entry::set After /proc/sys rewrite it was left unused. Signed-off-by: Alexey Dobriyan Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 28e3664fdf1b..cd13a78c5db8 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -75,7 +75,6 @@ struct proc_dir_entry { write_proc_t *write_proc; atomic_t count; /* use count */ int deleted; /* delete flag */ - void *set; int pde_users; /* number of callers into module in progress */ spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ struct completion *pde_unload_completion; -- cgit v1.2.3 From 42fd552e8647316757ded0176466c41d17934dcf Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 10 Aug 2007 13:01:05 -0700 Subject: fix serial buffer memory leak Patch c5c34d4862e18ef07c1276d233507f540fb5a532 (tty: flush flip buffer on ldisc input queue flush) introduces a race condition which can lead to memory leaks. The problem can be triggered when tcflush() is called when data are being pushed to the line discipline driver by flush_to_ldisc(). flush_to_ldisc() releases tty->buf.lock when calling the line discipline receive_buf function. At that poing tty_buffer_flush() kicks in and sets both tty->buf.head and tty->buf.tail to NULL. When flush_to_ldisc() finishes, it restores tty->buf.head but doesn't touch tty->buf.tail. This corrups the buffer queue, and the next call to tty_buffer_request_room() will allocate a new buffer and overwrite tty->buf.head. The previous buffer is then lost forever without being released. (Thanks to Laurent for the above text, for finding, disgnosing and reporting the bug) - Use tty->flags bits for the flush status. - Wait for the flag to clear again before returning - Fix the doc error noted - Fix flush of empty queue leaving stale flushpending [akpm@linux-foundation.org: cleanup] Signed-off-by: Alan Cox Acked-by: Paul Fulghum Cc: Laurent Pinchart Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 56 +++++++++++++++++++++++++++++++++++++++++++++------ include/linux/tty.h | 2 ++ 2 files changed, 52 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index de37ebc3a4cf..51ea93cab6c4 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -369,25 +369,54 @@ static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b) } /** - * tty_buffer_flush - flush full tty buffers + * __tty_buffer_flush - flush full tty buffers * @tty: tty to flush * - * flush all the buffers containing receive data + * flush all the buffers containing receive data. Caller must + * hold the buffer lock and must have ensured no parallel flush to + * ldisc is running. * - * Locking: none + * Locking: Caller must hold tty->buf.lock */ -static void tty_buffer_flush(struct tty_struct *tty) +static void __tty_buffer_flush(struct tty_struct *tty) { struct tty_buffer *thead; - unsigned long flags; - spin_lock_irqsave(&tty->buf.lock, flags); while((thead = tty->buf.head) != NULL) { tty->buf.head = thead->next; tty_buffer_free(tty, thead); } tty->buf.tail = NULL; +} + +/** + * tty_buffer_flush - flush full tty buffers + * @tty: tty to flush + * + * flush all the buffers containing receive data. If the buffer is + * being processed by flush_to_ldisc then we defer the processing + * to that function + * + * Locking: none + */ + +static void tty_buffer_flush(struct tty_struct *tty) +{ + unsigned long flags; + spin_lock_irqsave(&tty->buf.lock, flags); + + /* If the data is being pushed to the tty layer then we can't + process it here. Instead set a flag and the flush_to_ldisc + path will process the flush request before it exits */ + if (test_bit(TTY_FLUSHING, &tty->flags)) { + set_bit(TTY_FLUSHPENDING, &tty->flags); + spin_unlock_irqrestore(&tty->buf.lock, flags); + wait_event(tty->read_wait, + test_bit(TTY_FLUSHPENDING, &tty->flags) == 0); + return; + } else + __tty_buffer_flush(tty); spin_unlock_irqrestore(&tty->buf.lock, flags); } @@ -3594,6 +3623,7 @@ static void flush_to_ldisc(struct work_struct *work) return; spin_lock_irqsave(&tty->buf.lock, flags); + set_bit(TTY_FLUSHING, &tty->flags); /* So we know a flush is running */ head = tty->buf.head; if (head != NULL) { tty->buf.head = NULL; @@ -3607,6 +3637,11 @@ static void flush_to_ldisc(struct work_struct *work) tty_buffer_free(tty, tbuf); continue; } + /* Ldisc or user is trying to flush the buffers + we are feeding to the ldisc, stop feeding the + line discipline as we want to empty the queue */ + if (test_bit(TTY_FLUSHPENDING, &tty->flags)) + break; if (!tty->receive_room) { schedule_delayed_work(&tty->buf.work, 1); break; @@ -3620,8 +3655,17 @@ static void flush_to_ldisc(struct work_struct *work) disc->receive_buf(tty, char_buf, flag_buf, count); spin_lock_irqsave(&tty->buf.lock, flags); } + /* Restore the queue head */ tty->buf.head = head; } + /* We may have a deferred request to flush the input buffer, + if so pull the chain under the lock and empty the queue */ + if (test_bit(TTY_FLUSHPENDING, &tty->flags)) { + __tty_buffer_flush(tty); + clear_bit(TTY_FLUSHPENDING, &tty->flags); + wake_up(&tty->read_wait); + } + clear_bit(TTY_FLUSHING, &tty->flags); spin_unlock_irqrestore(&tty->buf.lock, flags); tty_ldisc_deref(disc); diff --git a/include/linux/tty.h b/include/linux/tty.h index 691a1748d9d2..6570719eafdf 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -274,6 +274,8 @@ struct tty_struct { #define TTY_PTY_LOCK 16 /* pty private */ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ +#define TTY_FLUSHING 19 /* Flushing to ldisc in progress */ +#define TTY_FLUSHPENDING 20 /* Queued buffer flush pending */ #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty)) -- cgit v1.2.3 From eb9a9a56316f4fea98ee32873ccbf7098b7bd69b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 10 Aug 2007 13:01:07 -0700 Subject: hex_dump: add missing "const" qualifiers Add missing "const" qualifiers to the print_hex_dump_bytes() library routines. (akpm: rumoured to fix some compile warning somewhere) Signed-off-by: Alan Stern Cc: Artem Bityutskiy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- lib/hexdump.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index b4f5b81b4257..f592df74b3cf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -226,7 +226,7 @@ extern void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, - void *buf, size_t len); + const void *buf, size_t len); #define hex_asc(x) "0123456789abcdef"[x] #ifdef DEBUG diff --git a/lib/hexdump.c b/lib/hexdump.c index 16f2e2935e87..bd5edaeaa80b 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -189,7 +189,7 @@ EXPORT_SYMBOL(print_hex_dump); * rowsize of 16, groupsize of 1, and ASCII output included. */ void print_hex_dump_bytes(const char *prefix_str, int prefix_type, - void *buf, size_t len) + const void *buf, size_t len) { print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1, buf, len, 1); -- cgit v1.2.3 From 844add7abca0d10e9733fc16119e53cb4c1987b4 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Fri, 10 Aug 2007 13:01:07 -0700 Subject: RCU: Remove prototype for nonexistent function synchronize_idle() synchronize_idle() sounds like an interesting function, but we don't actually have it, so don't prototype it. Introduced in commit 9b06e818985d139fd9e82c28297f7744e1b484e1, in 2005. Signed-off-by: Josh Triplett Acked-by: Paul E. McKenney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rcupdate.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index c6b7485eac7c..fe17d7d750c2 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -281,7 +281,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head, extern void FASTCALL(call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *head))); extern void synchronize_rcu(void); -void synchronize_idle(void); extern void rcu_barrier(void); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 6ddfca9548d8ecc26096a30667423ba919109533 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 10 Aug 2007 13:01:09 -0700 Subject: timer: remove clockevents_unregister_notifier I find a function(clockevents_unregister_notifier) which is not called by anything in tree. Signed-off-by: Miao Xie Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/clockchips.h | 1 - kernel/time/clockevents.c | 10 ---------- 2 files changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index e0bd46eb2414..def5a659b8a5 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -123,7 +123,6 @@ extern void clockevents_exchange_device(struct clock_event_device *old, extern void clockevents_set_mode(struct clock_event_device *dev, enum clock_event_mode mode); extern int clockevents_register_notifier(struct notifier_block *nb); -extern void clockevents_unregister_notifier(struct notifier_block *nb); extern int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, ktime_t now); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 2ad1c37b8dfe..41dd3105ce7f 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -113,16 +113,6 @@ int clockevents_register_notifier(struct notifier_block *nb) return ret; } -/** - * clockevents_unregister_notifier - unregister a clock events change listener - */ -void clockevents_unregister_notifier(struct notifier_block *nb) -{ - spin_lock(&clockevents_lock); - raw_notifier_chain_unregister(&clockevents_chain, nb); - spin_unlock(&clockevents_lock); -} - /* * Notify about a clock event change. Called with clockevents_lock * held. -- cgit v1.2.3 From 73c59afc65cfa50c3362b9ce1ec151a79c41dd8e Mon Sep 17 00:00:00 2001 From: Muli Ben-Yehuda Date: Fri, 10 Aug 2007 13:01:19 -0700 Subject: finish i386 and x86-64 sysdata conversion This patch finishes the i386 and x86-64 ->sysdata conversion and hopefully also fixes Riku's and Andy's observed bugs. It is based on Yinghai Lu's and Andy Whitcroft's patches (thanks!) with some changes: - introduce pci_scan_bus_with_sysdata() and use it instead of pci_scan_bus() where appropriate. pci_scan_bus_with_sysdata() will allocate the sysdata structure and then call pci_scan_bus(). - always allocate pci_sysdata dynamically. The whole point of this sysdata work is to make it easy to do root-bus specific things (e.g., support PCI domains and IOMMU's). I dislike using a default struct pci_sysdata in some places and a dynamically allocated pci_sysdata elsewhere - the potential for someone indavertantly changing the default structure is too high. - this patch only makes the minimal changes necessary, i.e., the NUMA node is always initialized to -1. Patches to do the right thing with regards to the NUMA node can build on top of this (either add a 'node' parameter to pci_scan_bus_with_sysdata() or just update the node when it becomes known). The patch was compile tested with various configurations (e.g., NUMAQ, VISWS) and run-time tested on i386 and x86-64. Unfortunately none of my machines exhibited the bugs so caveat emptor. Andy, could you please see if this fixes the NUMA issues you've seen? Riku, does this fix "pci=noacpi" on your laptop? Signed-off-by: Muli Ben-Yehuda Cc: Yinghai Lu Cc: Andi Kleen Cc: Chuck Ebbert Cc: Cc: Andy Whitcroft Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/pci/common.c | 23 +++++++++++++++++++++++ arch/i386/pci/fixup.c | 6 +++--- arch/i386/pci/irq.c | 5 +++-- arch/i386/pci/legacy.c | 2 +- arch/i386/pci/numa.c | 15 +++++++++------ arch/i386/pci/visws.c | 4 ++-- include/asm-i386/pci.h | 3 +++ include/asm-x86_64/pci.h | 2 ++ 8 files changed, 46 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c index 85503deeda46..ebc6f3c66340 100644 --- a/arch/i386/pci/common.c +++ b/arch/i386/pci/common.c @@ -455,3 +455,26 @@ void pcibios_disable_device (struct pci_dev *dev) if (!dev->msi_enabled && pcibios_disable_irq) pcibios_disable_irq(dev); } + +struct pci_bus *pci_scan_bus_with_sysdata(int busno) +{ + struct pci_bus *bus = NULL; + struct pci_sysdata *sd; + + /* + * Allocate per-root-bus (not per bus) arch-specific data. + * TODO: leak; this memory is never freed. + * It's arguable whether it's worth the trouble to care. + */ + sd = kzalloc(sizeof(*sd), GFP_KERNEL); + if (!sd) { + printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno); + return NULL; + } + sd->node = -1; + bus = pci_scan_bus(busno, &pci_root_ops, sd); + if (!bus) + kfree(sd); + + return bus; +} diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c index e7306dbf6c42..c82cbf4c7226 100644 --- a/arch/i386/pci/fixup.c +++ b/arch/i386/pci/fixup.c @@ -25,9 +25,9 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) pci_read_config_byte(d, reg++, &subb); DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); if (busno) - pci_scan_bus(busno, &pci_root_ops, NULL); /* Bus A */ + pci_scan_bus_with_sysdata(busno); /* Bus A */ if (suba < subb) - pci_scan_bus(suba+1, &pci_root_ops, NULL); /* Bus B */ + pci_scan_bus_with_sysdata(suba+1); /* Bus B */ } pcibios_last_bus = -1; } @@ -42,7 +42,7 @@ static void __devinit pci_fixup_i450gx(struct pci_dev *d) u8 busno; pci_read_config_byte(d, 0x4a, &busno); printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", pci_name(d), busno); - pci_scan_bus(busno, &pci_root_ops, NULL); + pci_scan_bus_with_sysdata(busno); pcibios_last_bus = -1; } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx); diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index f2cb942f8281..665db063a40a 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -138,8 +138,9 @@ static void __init pirq_peer_trick(void) for(i = 1; i < 256; i++) { if (!busmap[i] || pci_find_bus(0, i)) continue; - if (pci_scan_bus(i, &pci_root_ops, NULL)) - printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i); + if (pci_scan_bus_with_sysdata(i)) + printk(KERN_INFO "PCI: Discovered primary peer " + "bus %02x [IRQ]\n", i); } pcibios_last_bus = -1; } diff --git a/arch/i386/pci/legacy.c b/arch/i386/pci/legacy.c index 149a9588c256..5565d7016b75 100644 --- a/arch/i386/pci/legacy.c +++ b/arch/i386/pci/legacy.c @@ -26,7 +26,7 @@ static void __devinit pcibios_fixup_peer_bridges(void) l != 0x0000 && l != 0xffff) { DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l); printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); - pci_scan_bus(n, &pci_root_ops, NULL); + pci_scan_bus_with_sysdata(n); break; } } diff --git a/arch/i386/pci/numa.c b/arch/i386/pci/numa.c index adbe17a38f6f..f5f165f69e0c 100644 --- a/arch/i386/pci/numa.c +++ b/arch/i386/pci/numa.c @@ -96,10 +96,14 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) pci_read_config_byte(d, reg++, &suba); pci_read_config_byte(d, reg++, &subb); DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); - if (busno) - pci_scan_bus(QUADLOCAL2BUS(quad,busno), &pci_root_ops, NULL); /* Bus A */ - if (suba < subb) - pci_scan_bus(QUADLOCAL2BUS(quad,suba+1), &pci_root_ops, NULL); /* Bus B */ + if (busno) { + /* Bus A */ + pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); + } + if (suba < subb) { + /* Bus B */ + pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, suba+1)); + } } pcibios_last_bus = -1; } @@ -123,8 +127,7 @@ static int __init pci_numa_init(void) continue; printk("Scanning PCI bus %d for quad %d\n", QUADLOCAL2BUS(quad,0), quad); - pci_scan_bus(QUADLOCAL2BUS(quad,0), - &pci_root_ops, NULL); + pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, 0)); } return 0; } diff --git a/arch/i386/pci/visws.c b/arch/i386/pci/visws.c index f1b486d4190b..8ecb1c722594 100644 --- a/arch/i386/pci/visws.c +++ b/arch/i386/pci/visws.c @@ -101,8 +101,8 @@ static int __init pcibios_init(void) "bridge B (PIIX4) bus: %u\n", pci_bus1, pci_bus0); raw_pci_ops = &pci_direct_conf1; - pci_scan_bus(pci_bus0, &pci_root_ops, NULL); - pci_scan_bus(pci_bus1, &pci_root_ops, NULL); + pci_scan_bus_with_sysdata(pci_bus0); + pci_scan_bus_with_sysdata(pci_bus1); pci_fixup_irqs(visws_swizzle, visws_map_irq); pcibios_resource_survey(); return 0; diff --git a/include/asm-i386/pci.h b/include/asm-i386/pci.h index d790343e9982..4fcacc711385 100644 --- a/include/asm-i386/pci.h +++ b/include/asm-i386/pci.h @@ -8,6 +8,9 @@ struct pci_sysdata { int node; /* NUMA node */ }; +/* scan a bus after allocating a pci_sysdata for it */ +extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); + #include /* for struct page */ /* Can be used to override the logic in pci_scan_bus for skipping diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h index 88926eb44f5c..5da8cb0c0599 100644 --- a/include/asm-x86_64/pci.h +++ b/include/asm-x86_64/pci.h @@ -10,6 +10,8 @@ struct pci_sysdata { void* iommu; /* IOMMU private data */ }; +extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); + #ifdef CONFIG_CALGARY_IOMMU static inline void* pci_iommu(struct pci_bus *bus) { -- cgit v1.2.3 From 9535239f6bc99f68e0cfae44505ad402b53ed24c Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Fri, 10 Aug 2007 13:01:20 -0700 Subject: changing include/asm-generic/pgtable.h for non-mmu There are some parts of include/asm-generic/pgtable.h that are relevant to the non-mmu architectures. To make it easier to include this from them I would like to ifdef the relevant parts. Without this there is a handful of functions that are referenced in here that are not defined on many non-mmu architectures. They could be defined out of course, as an alternative approach. Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable.h | 73 ++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index f605e8d0eed3..5f0d797d33fd 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -2,6 +2,7 @@ #define _ASM_GENERIC_PGTABLE_H #ifndef __ASSEMBLY__ +#ifdef CONFIG_MMU #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS /* @@ -132,41 +133,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres #define move_pte(pte, prot, old_addr, new_addr) (pte) #endif -/* - * A facility to provide lazy MMU batching. This allows PTE updates and - * page invalidations to be delayed until a call to leave lazy MMU mode - * is issued. Some architectures may benefit from doing this, and it is - * beneficial for both shadow and direct mode hypervisors, which may batch - * the PTE updates which happen during this window. Note that using this - * interface requires that read hazards be removed from the code. A read - * hazard could result in the direct mode hypervisor case, since the actual - * write to the page tables may not yet have taken place, so reads though - * a raw PTE pointer after it has been modified are not guaranteed to be - * up to date. This mode can only be entered and left under the protection of - * the page table locks for all page tables which may be modified. In the UP - * case, this is required so that preemption is disabled, and in the SMP case, - * it must synchronize the delayed page table writes properly on other CPUs. - */ -#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE -#define arch_enter_lazy_mmu_mode() do {} while (0) -#define arch_leave_lazy_mmu_mode() do {} while (0) -#define arch_flush_lazy_mmu_mode() do {} while (0) -#endif - -/* - * A facility to provide batching of the reload of page tables with the - * actual context switch code for paravirtualized guests. By convention, - * only one of the lazy modes (CPU, MMU) should be active at any given - * time, entry should never be nested, and entry and exits should always - * be paired. This is for sanity of maintaining and reasoning about the - * kernel code. - */ -#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE -#define arch_enter_lazy_cpu_mode() do {} while (0) -#define arch_leave_lazy_cpu_mode() do {} while (0) -#define arch_flush_lazy_cpu_mode() do {} while (0) -#endif - /* * When walking page tables, get the address of the next boundary, * or the end address of the range if that comes earlier. Although no @@ -233,6 +199,43 @@ static inline int pmd_none_or_clear_bad(pmd_t *pmd) } return 0; } +#endif /* CONFIG_MMU */ + +/* + * A facility to provide lazy MMU batching. This allows PTE updates and + * page invalidations to be delayed until a call to leave lazy MMU mode + * is issued. Some architectures may benefit from doing this, and it is + * beneficial for both shadow and direct mode hypervisors, which may batch + * the PTE updates which happen during this window. Note that using this + * interface requires that read hazards be removed from the code. A read + * hazard could result in the direct mode hypervisor case, since the actual + * write to the page tables may not yet have taken place, so reads though + * a raw PTE pointer after it has been modified are not guaranteed to be + * up to date. This mode can only be entered and left under the protection of + * the page table locks for all page tables which may be modified. In the UP + * case, this is required so that preemption is disabled, and in the SMP case, + * it must synchronize the delayed page table writes properly on other CPUs. + */ +#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE +#define arch_enter_lazy_mmu_mode() do {} while (0) +#define arch_leave_lazy_mmu_mode() do {} while (0) +#define arch_flush_lazy_mmu_mode() do {} while (0) +#endif + +/* + * A facility to provide batching of the reload of page tables with the + * actual context switch code for paravirtualized guests. By convention, + * only one of the lazy modes (CPU, MMU) should be active at any given + * time, entry should never be nested, and entry and exits should always + * be paired. This is for sanity of maintaining and reasoning about the + * kernel code. + */ +#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE +#define arch_enter_lazy_cpu_mode() do {} while (0) +#define arch_leave_lazy_cpu_mode() do {} while (0) +#define arch_flush_lazy_cpu_mode() do {} while (0) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_GENERIC_PGTABLE_H */ -- cgit v1.2.3 From 3f3f7b74a7749c3a669ca146270c07568b548665 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 10 Aug 2007 22:31:01 +0200 Subject: x86_64: Don't mark __exitcall as __cold gcc currently doesn't support attributes on types, so we can't use it function pointers. This avoids some warnings on a gcc 4.3 build. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/init.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/init.h b/include/linux/init.h index 1a4a283d19a9..74b1f43bf982 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -43,7 +43,7 @@ #define __init __attribute__ ((__section__ (".init.text"))) __cold #define __initdata __attribute__ ((__section__ (".init.data"))) #define __exitdata __attribute__ ((__section__(".exit.data"))) -#define __exit_call __attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) __cold +#define __exit_call __attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) /* modpost check for section mismatches during the kernel build. * A section mismatch happens when there are references from a -- cgit v1.2.3 From ab144f5ec64c42218a555ec1dbde6b60cf2982d6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 10 Aug 2007 22:31:03 +0200 Subject: i386: Make patching more robust, fix paravirt issue Commit 19d36ccdc34f5ed444f8a6af0cbfdb6790eb1177 "x86: Fix alternatives and kprobes to remap write-protected kernel text" uses code which is being patched for patching. In particular, paravirt_ops does patching in two stages: first it calls paravirt_ops.patch, then it fills any remaining instructions with nop_out(). nop_out calls text_poke() which calls lookup_address() which calls pgd_val() (aka paravirt_ops.pgd_val): that call site is one of the places we patch. If we always do patching as one single call to text_poke(), we only need make sure we're not patching the memcpy in text_poke itself. This means the prototype to paravirt_ops.patch needs to change, to marshal the new code into a buffer rather than patching in place as it does now. It also means all patching goes through text_poke(), which is known to be safe (apply_alternatives is also changed to make a single patch). AK: fix compilation on x86-64 (bad rusty!) AK: fix boot on x86-64 (sigh) AK: merged with other patches Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/alternative.c | 33 +++++++++++++++++---------- arch/i386/kernel/paravirt.c | 52 +++++++++++++++++++++--------------------- arch/i386/kernel/vmi.c | 35 ++++++++++++++++------------ arch/i386/xen/enlighten.c | 12 ++++++---- drivers/lguest/lguest.c | 9 ++++---- include/asm-i386/paravirt.h | 16 ++++++++----- 6 files changed, 90 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index c85598acb8fd..27a6b0c9a7cc 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c @@ -11,6 +11,8 @@ #include #include +#define MAX_PATCH_LEN (255-1) + #ifdef CONFIG_HOTPLUG_CPU static int smp_alt_once; @@ -148,7 +150,8 @@ static unsigned char** find_nop_table(void) #endif /* CONFIG_X86_64 */ -static void nop_out(void *insns, unsigned int len) +/* Use this to add nops to a buffer, then text_poke the whole buffer. */ +static void add_nops(void *insns, unsigned int len) { unsigned char **noptable = find_nop_table(); @@ -156,7 +159,7 @@ static void nop_out(void *insns, unsigned int len) unsigned int noplen = len; if (noplen > ASM_NOP_MAX) noplen = ASM_NOP_MAX; - text_poke(insns, noptable[noplen], noplen); + memcpy(insns, noptable[noplen], noplen); insns += noplen; len -= noplen; } @@ -174,15 +177,15 @@ extern u8 *__smp_locks[], *__smp_locks_end[]; void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { struct alt_instr *a; - u8 *instr; - int diff; + char insnbuf[MAX_PATCH_LEN]; DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); for (a = start; a < end; a++) { + u8 *instr = a->instr; BUG_ON(a->replacementlen > a->instrlen); + BUG_ON(a->instrlen > sizeof(insnbuf)); if (!boot_cpu_has(a->cpuid)) continue; - instr = a->instr; #ifdef CONFIG_X86_64 /* vsyscall code is not mapped yet. resolve it manually. */ if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { @@ -191,9 +194,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) __FUNCTION__, a->instr, instr); } #endif - memcpy(instr, a->replacement, a->replacementlen); - diff = a->instrlen - a->replacementlen; - nop_out(instr + a->replacementlen, diff); + memcpy(insnbuf, a->replacement, a->replacementlen); + add_nops(insnbuf + a->replacementlen, + a->instrlen - a->replacementlen); + text_poke(instr, insnbuf, a->instrlen); } } @@ -215,16 +219,18 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) { u8 **ptr; + char insn[1]; if (noreplace_smp) return; + add_nops(insn, 1); for (ptr = start; ptr < end; ptr++) { if (*ptr < text) continue; if (*ptr > text_end) continue; - nop_out(*ptr, 1); + text_poke(*ptr, insn, 1); }; } @@ -351,6 +357,7 @@ void apply_paravirt(struct paravirt_patch_site *start, struct paravirt_patch_site *end) { struct paravirt_patch_site *p; + char insnbuf[MAX_PATCH_LEN]; if (noreplace_paravirt) return; @@ -358,13 +365,15 @@ void apply_paravirt(struct paravirt_patch_site *start, for (p = start; p < end; p++) { unsigned int used; - used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr, - p->len); + BUG_ON(p->len > MAX_PATCH_LEN); + used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf, + (unsigned long)p->instr, p->len); BUG_ON(used > p->len); /* Pad the rest with nops */ - nop_out(p->instr + used, p->len - used); + add_nops(insnbuf + used, p->len - used); + text_poke(p->instr, insnbuf, p->len); } } extern struct paravirt_patch_site __start_parainstructions[], diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index ea962c0667d5..739cfb207dd7 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -69,7 +69,8 @@ DEF_NATIVE(read_tsc, "rdtsc"); DEF_NATIVE(ud2a, "ud2a"); -static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) +static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, + unsigned long addr, unsigned len) { const unsigned char *start, *end; unsigned ret; @@ -90,7 +91,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) #undef SITE patch_site: - ret = paravirt_patch_insns(insns, len, start, end); + ret = paravirt_patch_insns(ibuf, len, start, end); break; case PARAVIRT_PATCH(make_pgd): @@ -107,7 +108,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) break; default: - ret = paravirt_patch_default(type, clobbers, insns, len); + ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); break; } @@ -129,68 +130,67 @@ struct branch { u32 delta; } __attribute__((packed)); -unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, - void *site, u16 site_clobbers, +unsigned paravirt_patch_call(void *insnbuf, + const void *target, u16 tgt_clobbers, + unsigned long addr, u16 site_clobbers, unsigned len) { - unsigned char *call = site; - unsigned long delta = (unsigned long)target - (unsigned long)(call+5); - struct branch b; + struct branch *b = insnbuf; + unsigned long delta = (unsigned long)target - (addr+5); if (tgt_clobbers & ~site_clobbers) return len; /* target would clobber too much for this site */ if (len < 5) return len; /* call too long for patch site */ - b.opcode = 0xe8; /* call */ - b.delta = delta; - BUILD_BUG_ON(sizeof(b) != 5); - text_poke(call, (unsigned char *)&b, 5); + b->opcode = 0xe8; /* call */ + b->delta = delta; + BUILD_BUG_ON(sizeof(*b) != 5); return 5; } -unsigned paravirt_patch_jmp(void *target, void *site, unsigned len) +unsigned paravirt_patch_jmp(const void *target, void *insnbuf, + unsigned long addr, unsigned len) { - unsigned char *jmp = site; - unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5); - struct branch b; + struct branch *b = insnbuf; + unsigned long delta = (unsigned long)target - (addr+5); if (len < 5) return len; /* call too long for patch site */ - b.opcode = 0xe9; /* jmp */ - b.delta = delta; - text_poke(jmp, (unsigned char *)&b, 5); + b->opcode = 0xe9; /* jmp */ + b->delta = delta; return 5; } -unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len) +unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, + unsigned long addr, unsigned len) { void *opfunc = *((void **)¶virt_ops + type); unsigned ret; if (opfunc == NULL) /* If there's no function, patch it with a ud2a (BUG) */ - ret = paravirt_patch_insns(site, len, start_ud2a, end_ud2a); + ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a); else if (opfunc == paravirt_nop) /* If the operation is a nop, then nop the callsite */ ret = paravirt_patch_nop(); else if (type == PARAVIRT_PATCH(iret) || type == PARAVIRT_PATCH(irq_enable_sysexit)) /* If operation requires a jmp, then jmp */ - ret = paravirt_patch_jmp(opfunc, site, len); + ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len); else /* Otherwise call the function; assume target could clobber any caller-save reg */ - ret = paravirt_patch_call(opfunc, CLBR_ANY, - site, clobbers, len); + ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, + addr, clobbers, len); return ret; } -unsigned paravirt_patch_insns(void *site, unsigned len, +unsigned paravirt_patch_insns(void *insnbuf, unsigned len, const char *start, const char *end) { unsigned insn_len = end - start; @@ -198,7 +198,7 @@ unsigned paravirt_patch_insns(void *site, unsigned len, if (insn_len > len || start == NULL) insn_len = len; else - memcpy(site, start, insn_len); + memcpy(insnbuf, start, insn_len); return insn_len; } diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index 72042bb7ec94..18673e0f193b 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c @@ -87,12 +87,14 @@ struct vmi_timer_ops vmi_timer_ops; #define IRQ_PATCH_INT_MASK 0 #define IRQ_PATCH_DISABLE 5 -static inline void patch_offset(unsigned char *eip, unsigned char *dest) +static inline void patch_offset(void *insnbuf, + unsigned long eip, unsigned long dest) { - *(unsigned long *)(eip+1) = dest-eip-5; + *(unsigned long *)(insnbuf+1) = dest-eip-5; } -static unsigned patch_internal(int call, unsigned len, void *insns) +static unsigned patch_internal(int call, unsigned len, void *insnbuf, + unsigned long eip) { u64 reloc; struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; @@ -100,14 +102,14 @@ static unsigned patch_internal(int call, unsigned len, void *insns) switch(rel->type) { case VMI_RELOCATION_CALL_REL: BUG_ON(len < 5); - *(char *)insns = MNEM_CALL; - patch_offset(insns, rel->eip); + *(char *)insnbuf = MNEM_CALL; + patch_offset(insnbuf, eip, (unsigned long)rel->eip); return 5; case VMI_RELOCATION_JUMP_REL: BUG_ON(len < 5); - *(char *)insns = MNEM_JMP; - patch_offset(insns, rel->eip); + *(char *)insnbuf = MNEM_JMP; + patch_offset(insnbuf, eip, (unsigned long)rel->eip); return 5; case VMI_RELOCATION_NOP: @@ -128,21 +130,26 @@ static unsigned patch_internal(int call, unsigned len, void *insns) * Apply patch if appropriate, return length of new instruction * sequence. The callee does nop padding for us. */ -static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len) +static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, + unsigned long eip, unsigned len) { switch (type) { case PARAVIRT_PATCH(irq_disable): - return patch_internal(VMI_CALL_DisableInterrupts, len, insns); + return patch_internal(VMI_CALL_DisableInterrupts, len, + insns, eip); case PARAVIRT_PATCH(irq_enable): - return patch_internal(VMI_CALL_EnableInterrupts, len, insns); + return patch_internal(VMI_CALL_EnableInterrupts, len, + insns, eip); case PARAVIRT_PATCH(restore_fl): - return patch_internal(VMI_CALL_SetInterruptMask, len, insns); + return patch_internal(VMI_CALL_SetInterruptMask, len, + insns, eip); case PARAVIRT_PATCH(save_fl): - return patch_internal(VMI_CALL_GetInterruptMask, len, insns); + return patch_internal(VMI_CALL_GetInterruptMask, len, + insns, eip); case PARAVIRT_PATCH(iret): - return patch_internal(VMI_CALL_IRET, len, insns); + return patch_internal(VMI_CALL_IRET, len, insns, eip); case PARAVIRT_PATCH(irq_enable_sysexit): - return patch_internal(VMI_CALL_SYSEXIT, len, insns); + return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip); default: break; } diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c index 9a8c1181c001..f0c37511d8da 100644 --- a/arch/i386/xen/enlighten.c +++ b/arch/i386/xen/enlighten.c @@ -842,7 +842,8 @@ void __init xen_setup_vcpu_info_placement(void) } } -static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len) +static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, + unsigned long addr, unsigned len) { char *start, *end, *reloc; unsigned ret; @@ -869,7 +870,7 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len) if (start == NULL || (end-start) > len) goto default_patch; - ret = paravirt_patch_insns(insns, len, start, end); + ret = paravirt_patch_insns(insnbuf, len, start, end); /* Note: because reloc is assigned from something that appears to be an array, gcc assumes it's non-null, @@ -877,8 +878,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len) end. */ if (reloc > start && reloc < end) { int reloc_off = reloc - start; - long *relocp = (long *)(insns + reloc_off); - long delta = start - (char *)insns; + long *relocp = (long *)(insnbuf + reloc_off); + long delta = start - (char *)addr; *relocp += delta; } @@ -886,7 +887,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len) default_patch: default: - ret = paravirt_patch_default(type, clobbers, insns, len); + ret = paravirt_patch_default(type, clobbers, insnbuf, + addr, len); break; } diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index 524beea7fb19..6e135ac0834f 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c @@ -936,23 +936,24 @@ static const struct lguest_insns /* Now our patch routine is fairly simple (based on the native one in * paravirt.c). If we have a replacement, we copy it in and return how much of * the available space we used. */ -static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len) +static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, + unsigned long addr, unsigned len) { unsigned int insn_len; /* Don't do anything special if we don't have a replacement */ if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start) - return paravirt_patch_default(type, clobber, insns, len); + return paravirt_patch_default(type, clobber, ibuf, addr, len); insn_len = lguest_insns[type].end - lguest_insns[type].start; /* Similarly if we can't fit replacement (shouldn't happen, but let's * be thorough). */ if (len < insn_len) - return paravirt_patch_default(type, clobber, insns, len); + return paravirt_patch_default(type, clobber, ibuf, addr, len); /* Copy in our instructions. */ - memcpy(insns, lguest_insns[type].start, insn_len); + memcpy(ibuf, lguest_insns[type].start, insn_len); return insn_len; } diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 7df88be2dd9e..9fa3fa9e62d1 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -47,7 +47,8 @@ struct paravirt_ops * The patch function should return the number of bytes of code * generated, as we nop pad the rest in generic code. */ - unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len); + unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, + unsigned long addr, unsigned len); /* Basic arch-specific setup */ void (*arch_setup)(void); @@ -253,13 +254,16 @@ extern struct paravirt_ops paravirt_ops; unsigned paravirt_patch_nop(void); unsigned paravirt_patch_ignore(unsigned len); -unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, - void *site, u16 site_clobbers, +unsigned paravirt_patch_call(void *insnbuf, + const void *target, u16 tgt_clobbers, + unsigned long addr, u16 site_clobbers, unsigned len); -unsigned paravirt_patch_jmp(void *target, void *site, unsigned len); -unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len); +unsigned paravirt_patch_jmp(const void *target, void *insnbuf, + unsigned long addr, unsigned len); +unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, + unsigned long addr, unsigned len); -unsigned paravirt_patch_insns(void *site, unsigned len, +unsigned paravirt_patch_insns(void *insnbuf, unsigned len, const char *start, const char *end); int paravirt_disable_iospace(void); -- cgit v1.2.3 From d3f7eae182b04997be19343a23f7009170f4f7a5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 10 Aug 2007 22:31:07 +0200 Subject: i386: Use global flag to disable broken local apic timer on AMD CPUs. The Averatec 2370 and some other Turion laptop BIOS seems to program the ENABLE_C1E MSR inconsistently between cores. This confuses the lapic use heuristics because when C1E is enabled anywhere it seems to affect the complete chip. Use a global flag instead of a per cpu flag to handle this. If any CPU has C1E enabled disabled lapic use. Thanks to Cal Peake for debugging. Cc: tglx@linutronix.de Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 10 ++++------ arch/i386/kernel/cpu/amd.c | 7 ++++++- include/asm-i386/apic.h | 2 ++ include/asm-i386/cpufeature.h | 2 +- 4 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index bfc6cb7df7e7..f9fff29e01a9 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -61,8 +61,9 @@ static int enable_local_apic __initdata = 0; /* Local APIC timer verification ok */ static int local_apic_timer_verify_ok; -/* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int local_apic_timer_disabled; +/* Disable local APIC timer from the kernel commandline or via dmi quirk + or using CPU MSR check */ +int local_apic_timer_disabled; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -370,12 +371,9 @@ void __init setup_boot_APIC_clock(void) long delta, deltapm; int pm_referenced = 0; - if (boot_cpu_has(X86_FEATURE_LAPIC_TIMER_BROKEN)) - local_apic_timer_disabled = 1; - /* * The local apic timer can be disabled via the kernel - * commandline or from the test above. Register the lapic + * commandline or from the CPU detection code. Register the lapic * timer as a dummy clock event source on SMP systems, so the * broadcast mechanism is used. On UP systems simply ignore it. */ diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index c7ba455d5ac7..dcf6bbb1c7c0 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "cpu.h" @@ -22,6 +23,7 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); +#ifdef CONFIG_X86_LOCAL_APIC #define ENABLE_C1E_MASK 0x18000000 #define CPUID_PROCESSOR_SIGNATURE 1 #define CPUID_XFAM 0x0ff00000 @@ -52,6 +54,7 @@ static __cpuinit int amd_apic_timer_broken(void) } return 0; } +#endif int force_mwait __cpuinitdata; @@ -282,8 +285,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) num_cache_leaves = 3; } +#ifdef CONFIG_X86_LOCAL_APIC if (amd_apic_timer_broken()) - set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability); + local_apic_timer_disabled = 1; +#endif if (c->x86 == 0x10 && !force_mwait) clear_bit(X86_FEATURE_MWAIT, c->x86_capability); diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h index 1e8f6f252dd3..4091b33dcb10 100644 --- a/include/asm-i386/apic.h +++ b/include/asm-i386/apic.h @@ -116,6 +116,8 @@ extern void enable_NMI_through_LVT0 (void * dummy); extern int timer_over_8254; extern int local_apic_timer_c2_ok; +extern int local_apic_timer_disabled; + #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index c961c03cf1e2..7b3aa28ebc6e 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -79,7 +79,7 @@ #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ #define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ #define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_LAPIC_TIMER_BROKEN (3*32+ 14) /* lapic timer broken in C1 */ +/* 14 free */ #define X86_FEATURE_SYNC_RDTSC (3*32+15) /* RDTSC synchronizes the CPU */ #define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ -- cgit v1.2.3 From 6707de00fdec3e3225192fe3dcd21323a8936b1f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 12 Aug 2007 18:08:19 +0200 Subject: sched: make global code static This patch makes the following needlessly global code static: - arch_reinit_sched_domains() - struct attr_sched_mc_power_savings - struct attr_sched_smt_power_savings Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/cpu.h | 2 -- kernel/sched.c | 46 +++++++++++++++++++++++----------------------- 2 files changed, 23 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index c2236bbff412..1d5ded0836ee 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -41,8 +41,6 @@ extern void cpu_remove_sysdev_attr(struct sysdev_attribute *attr); extern int cpu_add_sysdev_attr_group(struct attribute_group *attrs); extern void cpu_remove_sysdev_attr_group(struct attribute_group *attrs); -extern struct sysdev_attribute attr_sched_mc_power_savings; -extern struct sysdev_attribute attr_sched_smt_power_savings; extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls); #ifdef CONFIG_HOTPLUG_CPU diff --git a/kernel/sched.c b/kernel/sched.c index 6247e4a8350f..c02659f1bd09 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6328,7 +6328,7 @@ int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2) } #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) -int arch_reinit_sched_domains(void) +static int arch_reinit_sched_domains(void) { int err; @@ -6357,24 +6357,6 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) return ret ? ret : count; } -int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) -{ - int err = 0; - -#ifdef CONFIG_SCHED_SMT - if (smt_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_smt_power_savings.attr); -#endif -#ifdef CONFIG_SCHED_MC - if (!err && mc_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_mc_power_savings.attr); -#endif - return err; -} -#endif - #ifdef CONFIG_SCHED_MC static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page) { @@ -6385,8 +6367,8 @@ static ssize_t sched_mc_power_savings_store(struct sys_device *dev, { return sched_power_savings_store(buf, count, 0); } -SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, - sched_mc_power_savings_store); +static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, + sched_mc_power_savings_store); #endif #ifdef CONFIG_SCHED_SMT @@ -6399,8 +6381,26 @@ static ssize_t sched_smt_power_savings_store(struct sys_device *dev, { return sched_power_savings_store(buf, count, 1); } -SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, - sched_smt_power_savings_store); +static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, + sched_smt_power_savings_store); +#endif + +int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) +{ + int err = 0; + +#ifdef CONFIG_SCHED_SMT + if (smt_capable()) + err = sysfs_create_file(&cls->kset.kobj, + &attr_sched_smt_power_savings.attr); +#endif +#ifdef CONFIG_SCHED_MC + if (!err && mc_capable()) + err = sysfs_create_file(&cls->kset.kobj, + &attr_sched_mc_power_savings.attr); +#endif + return err; +} #endif /* -- cgit v1.2.3 From 6cba986298105a87a09d65baa2658fb5111459c6 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Fri, 10 Aug 2007 23:42:59 +0200 Subject: [IA64] Use atomic64_read to read an atomic64_t. The routines ia64_atomic64_{add,sub} mistakenly use atomic_read() to grab the old value instead of using atomic64_read(). Signed-off-by: Andreas Schwab Signed-off-by: Tony Luck --- include/asm-ia64/atomic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-ia64/atomic.h b/include/asm-ia64/atomic.h index 1fc3b83325da..50c2b83fd5a0 100644 --- a/include/asm-ia64/atomic.h +++ b/include/asm-ia64/atomic.h @@ -55,7 +55,7 @@ ia64_atomic64_add (__s64 i, atomic64_t *v) do { CMPXCHG_BUGCHECK(v); - old = atomic_read(v); + old = atomic64_read(v); new = old + i; } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); return new; @@ -83,7 +83,7 @@ ia64_atomic64_sub (__s64 i, atomic64_t *v) do { CMPXCHG_BUGCHECK(v); - old = atomic_read(v); + old = atomic64_read(v); new = old - i; } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); return new; -- cgit v1.2.3 From 1115200a3df64d6925bc94b404039e7082409af4 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Mon, 13 Aug 2007 10:31:26 -0700 Subject: [IA64] SN2 needs platform specific irq_to_vector() function. Add base support for implementing platform_irq_to_vector(), and then use it on SN2. Signed-off-by: Kenji Kaneshige Acked-by: John Keller Signed-off-by: Tony Luck --- arch/ia64/kernel/irq.c | 5 +++++ arch/ia64/sn/kernel/irq.c | 7 +++++++ include/asm-ia64/hw_irq.h | 7 ++++++- include/asm-ia64/machvec.h | 7 +++++++ include/asm-ia64/machvec_init.h | 1 + include/asm-ia64/machvec_sn2.h | 2 ++ 6 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index cc3ee4ef37af..44be1c952b7c 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -33,6 +33,11 @@ void ack_bad_irq(unsigned int irq) } #ifdef CONFIG_IA64_GENERIC +ia64_vector __ia64_irq_to_vector(int irq) +{ + return irq_cfg[irq].vector; +} + unsigned int __ia64_local_vector_to_irq (ia64_vector vec) { return __get_cpu_var(vector_irq)[vec]; diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 7f6d2360a262..bfa43e4c7ef7 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -256,6 +256,13 @@ struct irq_chip irq_type_sn = { .set_affinity = sn_set_affinity_irq }; +ia64_vector sn_irq_to_vector(int irq) +{ + if (irq >= IA64_NUM_VECTORS) + return 0; + return (ia64_vector)irq; +} + unsigned int sn_local_vector_to_irq(u8 vector) { return (CPU_VECTOR_TO_IRQ(smp_processor_id(), vector)); diff --git a/include/asm-ia64/hw_irq.h b/include/asm-ia64/hw_irq.h index efa1b8f7251d..bba5baa3c7fc 100644 --- a/include/asm-ia64/hw_irq.h +++ b/include/asm-ia64/hw_irq.h @@ -124,6 +124,11 @@ static inline void ia64_resend_irq(unsigned int vector) extern irq_desc_t irq_desc[NR_IRQS]; #ifndef CONFIG_IA64_GENERIC +static inline ia64_vector __ia64_irq_to_vector(int irq) +{ + return irq_cfg[irq].vector; +} + static inline unsigned int __ia64_local_vector_to_irq (ia64_vector vec) { @@ -145,7 +150,7 @@ __ia64_local_vector_to_irq (ia64_vector vec) static inline ia64_vector irq_to_vector (int irq) { - return irq_cfg[irq].vector; + return platform_irq_to_vector(irq); } /* diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h index 5cf8bf1e805e..c201a2020aa4 100644 --- a/include/asm-ia64/machvec.h +++ b/include/asm-ia64/machvec.h @@ -30,6 +30,7 @@ typedef void ia64_mv_send_ipi_t (int, int, int, int); typedef void ia64_mv_timer_interrupt_t (int, void *); typedef void ia64_mv_global_tlb_purge_t (struct mm_struct *, unsigned long, unsigned long, unsigned long); typedef void ia64_mv_tlb_migrate_finish_t (struct mm_struct *); +typedef u8 ia64_mv_irq_to_vector (int); typedef unsigned int ia64_mv_local_vector_to_irq (u8); typedef char *ia64_mv_pci_get_legacy_mem_t (struct pci_bus *); typedef int ia64_mv_pci_legacy_read_t (struct pci_bus *, u16 port, u32 *val, @@ -145,6 +146,7 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *); # define platform_dma_sync_sg_for_device ia64_mv.dma_sync_sg_for_device # define platform_dma_mapping_error ia64_mv.dma_mapping_error # define platform_dma_supported ia64_mv.dma_supported +# define platform_irq_to_vector ia64_mv.irq_to_vector # define platform_local_vector_to_irq ia64_mv.local_vector_to_irq # define platform_pci_get_legacy_mem ia64_mv.pci_get_legacy_mem # define platform_pci_legacy_read ia64_mv.pci_legacy_read @@ -198,6 +200,7 @@ struct ia64_machine_vector { ia64_mv_dma_sync_sg_for_device *dma_sync_sg_for_device; ia64_mv_dma_mapping_error *dma_mapping_error; ia64_mv_dma_supported *dma_supported; + ia64_mv_irq_to_vector *irq_to_vector; ia64_mv_local_vector_to_irq *local_vector_to_irq; ia64_mv_pci_get_legacy_mem_t *pci_get_legacy_mem; ia64_mv_pci_legacy_read_t *pci_legacy_read; @@ -247,6 +250,7 @@ struct ia64_machine_vector { platform_dma_sync_sg_for_device, \ platform_dma_mapping_error, \ platform_dma_supported, \ + platform_irq_to_vector, \ platform_local_vector_to_irq, \ platform_pci_get_legacy_mem, \ platform_pci_legacy_read, \ @@ -366,6 +370,9 @@ extern ia64_mv_dma_supported swiotlb_dma_supported; #ifndef platform_dma_supported # define platform_dma_supported swiotlb_dma_supported #endif +#ifndef platform_irq_to_vector +# define platform_irq_to_vector __ia64_irq_to_vector +#endif #ifndef platform_local_vector_to_irq # define platform_local_vector_to_irq __ia64_local_vector_to_irq #endif diff --git a/include/asm-ia64/machvec_init.h b/include/asm-ia64/machvec_init.h index 2d36f6840f0b..7f21249fba3f 100644 --- a/include/asm-ia64/machvec_init.h +++ b/include/asm-ia64/machvec_init.h @@ -2,6 +2,7 @@ extern ia64_mv_send_ipi_t ia64_send_ipi; extern ia64_mv_global_tlb_purge_t ia64_global_tlb_purge; +extern ia64_mv_irq_to_vector __ia64_irq_to_vector; extern ia64_mv_local_vector_to_irq __ia64_local_vector_to_irq; extern ia64_mv_pci_get_legacy_mem_t ia64_pci_get_legacy_mem; extern ia64_mv_pci_legacy_read_t ia64_pci_legacy_read; diff --git a/include/asm-ia64/machvec_sn2.h b/include/asm-ia64/machvec_sn2.h index eaa2fce0fecd..61439a7f5b08 100644 --- a/include/asm-ia64/machvec_sn2.h +++ b/include/asm-ia64/machvec_sn2.h @@ -35,6 +35,7 @@ extern ia64_mv_send_ipi_t sn2_send_IPI; extern ia64_mv_timer_interrupt_t sn_timer_interrupt; extern ia64_mv_global_tlb_purge_t sn2_global_tlb_purge; extern ia64_mv_tlb_migrate_finish_t sn_tlb_migrate_finish; +extern ia64_mv_irq_to_vector sn_irq_to_vector; extern ia64_mv_local_vector_to_irq sn_local_vector_to_irq; extern ia64_mv_pci_get_legacy_mem_t sn_pci_get_legacy_mem; extern ia64_mv_pci_legacy_read_t sn_pci_legacy_read; @@ -104,6 +105,7 @@ extern ia64_mv_pci_fixup_bus_t sn_pci_fixup_bus; #define platform_readw_relaxed __sn_readw_relaxed #define platform_readl_relaxed __sn_readl_relaxed #define platform_readq_relaxed __sn_readq_relaxed +#define platform_irq_to_vector sn_irq_to_vector #define platform_local_vector_to_irq sn_local_vector_to_irq #define platform_pci_get_legacy_mem sn_pci_get_legacy_mem #define platform_pci_legacy_read sn_pci_legacy_read -- cgit v1.2.3 From 7f353bf29e162459f2f1e2ca25e41011fae65241 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 10 Aug 2007 15:47:58 -0700 Subject: [NET]: Share correct feature code between bridging and bonding http://bugzilla.kernel.org/show_bug.cgi?id=8797 shows that the bonding driver may produce bogus combinations of the checksum flags and SG/TSO. For example, if you bond devices with NETIF_F_HW_CSUM and NETIF_F_IP_CSUM you'll end up with a bonding device that has neither flag set. If both have TSO then this produces an illegal combination. The bridge device on the other hand has the correct code to deal with this. In fact, the same code can be used for both. So this patch moves that logic into net/core/dev.c and uses it for both bonding and bridging. In the process I've made small adjustments such as only setting GSO_ROBUST if at least one constituent device supports it. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 34 +++++++++++++--------------------- include/linux/netdevice.h | 2 ++ net/bridge/br_device.c | 2 +- net/bridge/br_if.c | 36 ++++-------------------------------- net/core/dev.c | 39 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 070b78d959cc..1afda3230def 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1202,43 +1202,35 @@ static int bond_sethwaddr(struct net_device *bond_dev, return 0; } -#define BOND_INTERSECT_FEATURES \ - (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_TSO | NETIF_F_UFO) +#define BOND_VLAN_FEATURES \ + (NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \ + NETIF_F_HW_VLAN_FILTER) /* * Compute the common dev->feature set available to all slaves. Some - * feature bits are managed elsewhere, so preserve feature bits set on - * master device that are not part of the examined set. + * feature bits are managed elsewhere, so preserve those feature bits + * on the master device. */ static int bond_compute_features(struct bonding *bond) { - unsigned long features = BOND_INTERSECT_FEATURES; struct slave *slave; struct net_device *bond_dev = bond->dev; + unsigned long features = bond_dev->features; unsigned short max_hard_header_len = ETH_HLEN; int i; + features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); + features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | + NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; + bond_for_each_slave(bond, slave, i) { - features &= (slave->dev->features & BOND_INTERSECT_FEATURES); + features = netdev_compute_features(features, + slave->dev->features); if (slave->dev->hard_header_len > max_hard_header_len) max_hard_header_len = slave->dev->hard_header_len; } - if ((features & NETIF_F_SG) && - !(features & NETIF_F_ALL_CSUM)) - features &= ~NETIF_F_SG; - - /* - * features will include NETIF_F_TSO (NETIF_F_UFO) iff all - * slave devices support NETIF_F_TSO (NETIF_F_UFO), which - * implies that all slaves also support scatter-gather - * (NETIF_F_SG), which implies that features also includes - * NETIF_F_SG. So no need to check whether we have an - * illegal combination of NETIF_F_{TSO,UFO} and - * !NETIF_F_SG - */ - - features |= (bond_dev->features & ~BOND_INTERSECT_FEATURES); + features |= (bond_dev->features & BOND_VLAN_FEATURES); bond_dev->features = features; bond_dev->hard_header_len = max_hard_header_len; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4a616d73cc25..e679b2751665 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1131,6 +1131,8 @@ extern void dev_seq_stop(struct seq_file *seq, void *v); extern void linkwatch_run_queue(void); +extern int netdev_compute_features(unsigned long all, unsigned long one); + static inline int net_gso_ok(int features, int gso_type) { int feature = gso_type << NETIF_F_GSO_SHIFT; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 5e1892d8d874..0eded176ce99 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -179,5 +179,5 @@ void br_dev_setup(struct net_device *dev) dev->priv_flags = IFF_EBRIDGE; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_GSO_ROBUST; + NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index b40dada002bf..749f0e8f541d 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -349,43 +349,15 @@ int br_min_mtu(const struct net_bridge *br) void br_features_recompute(struct net_bridge *br) { struct net_bridge_port *p; - unsigned long features, checksum; + unsigned long features; - checksum = br->feature_mask & NETIF_F_ALL_CSUM ? NETIF_F_NO_CSUM : 0; - features = br->feature_mask & ~NETIF_F_ALL_CSUM; + features = br->feature_mask; list_for_each_entry(p, &br->port_list, list) { - unsigned long feature = p->dev->features; - - /* if device needs checksumming, downgrade to hw checksumming */ - if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM)) - checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; - - /* if device can't do all checksum, downgrade to ipv4/ipv6 */ - if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM)) - checksum ^= NETIF_F_HW_CSUM - | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; - - if (checksum & NETIF_F_IPV6_CSUM && !(feature & NETIF_F_IPV6_CSUM)) - checksum &= ~NETIF_F_IPV6_CSUM; - - if (!(feature & NETIF_F_IP_CSUM)) - checksum = 0; - - if (feature & NETIF_F_GSO) - feature |= NETIF_F_GSO_SOFTWARE; - feature |= NETIF_F_GSO; - - features &= feature; + features = netdev_compute_features(features, p->dev->features); } - if (!(checksum & NETIF_F_ALL_CSUM)) - features &= ~NETIF_F_SG; - if (!(features & NETIF_F_SG)) - features &= ~NETIF_F_GSO_MASK; - - br->dev->features = features | checksum | NETIF_F_LLTX | - NETIF_F_GSO_ROBUST; + br->dev->features = features; } /* called with RTNL */ diff --git a/net/core/dev.c b/net/core/dev.c index 6cc8a70350ac..a76021c71207 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3993,6 +3993,45 @@ static int __init netdev_dma_register(void) static int __init netdev_dma_register(void) { return -ENODEV; } #endif /* CONFIG_NET_DMA */ +/** + * netdev_compute_feature - compute conjunction of two feature sets + * @all: first feature set + * @one: second feature set + * + * Computes a new feature set after adding a device with feature set + * @one to the master device with current feature set @all. Returns + * the new feature set. + */ +int netdev_compute_features(unsigned long all, unsigned long one) +{ + /* if device needs checksumming, downgrade to hw checksumming */ + if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) + all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; + + /* if device can't do all checksum, downgrade to ipv4/ipv6 */ + if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM)) + all ^= NETIF_F_HW_CSUM + | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + + if (one & NETIF_F_GSO) + one |= NETIF_F_GSO_SOFTWARE; + one |= NETIF_F_GSO; + + /* If even one device supports robust GSO, enable it for all. */ + if (one & NETIF_F_GSO_ROBUST) + all |= NETIF_F_GSO_ROBUST; + + all &= one | NETIF_F_LLTX; + + if (!(all & NETIF_F_ALL_CSUM)) + all &= ~NETIF_F_SG; + if (!(all & NETIF_F_SG)) + all &= ~NETIF_F_GSO_MASK; + + return all; +} +EXPORT_SYMBOL(netdev_compute_features); + /* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices that fail to initialise (normally hardware not -- cgit v1.2.3 From 5221b34edfd05ac35c077e071095cf853325320f Mon Sep 17 00:00:00 2001 From: Mariusz Kozlowski Date: Tue, 31 Jul 2007 23:41:00 +0200 Subject: include/asm-avr32/pgalloc.h: kmalloc + memset conversion to kcalloc Signed-off-by: Mariusz Kozlowski Signed-off-by: Haavard Skinnemoen --- include/asm-avr32/pgalloc.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-avr32/pgalloc.h b/include/asm-avr32/pgalloc.h index bb82e70cde8d..75248141c613 100644 --- a/include/asm-avr32/pgalloc.h +++ b/include/asm-avr32/pgalloc.h @@ -27,13 +27,7 @@ static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd, */ static __inline__ pgd_t *pgd_alloc(struct mm_struct *mm) { - unsigned int pgd_size = (USER_PTRS_PER_PGD * sizeof(pgd_t)); - pgd_t *pgd = kmalloc(pgd_size, GFP_KERNEL); - - if (pgd) - memset(pgd, 0, pgd_size); - - return pgd; + return kcalloc(USER_PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL); } static inline void pgd_free(pgd_t *pgd) -- cgit v1.2.3 From e7f3bac95e0bdfd520e065c4a29aad46190fcc99 Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Fri, 3 Aug 2007 13:29:01 +0200 Subject: [AVR32] Simplify pte_alloc_one{,_kernel} There's really no need to retry an allocation with __GFP_REPEAT set. Also, use get_zeroed_page() and __GFP_ZERO to eliminate the extra call to clear_page() afterwards. Signed-off-by: Haavard Skinnemoen --- include/asm-avr32/pgalloc.h | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/asm-avr32/pgalloc.h b/include/asm-avr32/pgalloc.h index 75248141c613..0e680f47209f 100644 --- a/include/asm-avr32/pgalloc.h +++ b/include/asm-avr32/pgalloc.h @@ -38,18 +38,9 @@ static inline void pgd_free(pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - int count = 0; pte_t *pte; - do { - pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT); - if (pte) - clear_page(pte); - else { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ); - } - } while (!pte && (count++ < 10)); + pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); return pte; } @@ -57,18 +48,9 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - int count = 0; struct page *pte; - do { - pte = alloc_pages(GFP_KERNEL, 0); - if (pte) - clear_page(page_address(pte)); - else { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ); - } - } while (!pte && (count++ < 10)); + pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); return pte; } -- cgit v1.2.3 From 6f834197a79d34d65ca44e9c77f2571edbcfb64b Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Wed, 15 Aug 2007 16:12:18 +0200 Subject: [AVR32] Fix bogus pte_page() definition The current definition of pte_page() masks out valid bits from the physical address, causing vmalloc_to_page() to misbehave. This may lead to everything from mmap() silently accessing the wrong data to "invalid pte" errors dumped by the kernel. Also remove the now-unused definition of PTE_PHYS_MASK. Thanks to Matteo Vit for discovering this bug. Signed-off-by: Haavard Skinnemoen --- include/asm-avr32/pgtable.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-avr32/pgtable.h b/include/asm-avr32/pgtable.h index c07bdd10b891..018f6e2a0242 100644 --- a/include/asm-avr32/pgtable.h +++ b/include/asm-avr32/pgtable.h @@ -32,8 +32,6 @@ #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) #define FIRST_USER_ADDRESS 0 -#define PTE_PHYS_MASK 0x1ffff000 - #ifndef __ASSEMBLY__ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern void paging_init(void); @@ -265,7 +263,7 @@ static inline pte_t pte_mkyoung(pte_t pte) * trivial. */ #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) -#define pte_page(x) phys_to_page(pte_val(x) & PTE_PHYS_MASK) +#define pte_page(x) (pfn_to_page(pte_pfn(x))) /* * Mark the prot value as uncacheable and unbufferable -- cgit v1.2.3 From cca67164bbdb083df9adb6480822518b0f4b358f Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Mon, 13 Aug 2007 16:24:01 +0200 Subject: [AVR32] Define mmiowb() Add empty definition of mmiowb() since some drivers need it. Uncached writes are strongly ordered on AVR32. They may be delayed if the dcache is busy doing a writeback, but AFAICT that's not what this macro is supposed to deal with, at least on UP systems. We might have to revisit this definition when a SMP-capable AVR32 CPU comes along, depending on how the busses and cache coherency stuff end up being implemented. Signed-off-by: Haavard Skinnemoen --- include/asm-avr32/io.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-avr32/io.h b/include/asm-avr32/io.h index e30d4b3bd836..64bb92bb6773 100644 --- a/include/asm-avr32/io.h +++ b/include/asm-avr32/io.h @@ -255,6 +255,8 @@ static inline void memset_io(volatile void __iomem *addr, unsigned char val, memset((void __force *)addr, val, count); } +#define mmiowb() + #define IO_SPACE_LIMIT 0xffffffff extern void __iomem *__ioremap(unsigned long offset, size_t size, -- cgit v1.2.3 From 8b224b813aad0231af62dc75d056aae83c9d4d12 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 16 Aug 2007 13:56:39 -0700 Subject: [SPARC64]: Create a HWCAP_SPARC_N2 and report it to userspace on Niagara-2. Signed-off-by: David S. Miller --- include/asm-sparc64/elf.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-sparc64/elf.h b/include/asm-sparc64/elf.h index 303d85e2f82e..8653e8665009 100644 --- a/include/asm-sparc64/elf.h +++ b/include/asm-sparc64/elf.h @@ -70,6 +70,7 @@ #define HWCAP_SPARC_V9 16 #define HWCAP_SPARC_ULTRA3 32 #define HWCAP_SPARC_BLKINIT 64 +#define HWCAP_SPARC_N2 128 /* * These are used to set parameters in the core dumps. @@ -155,8 +156,13 @@ static inline unsigned int sparc64_elf_hwcap(void) if (tlb_type == cheetah || tlb_type == cheetah_plus) cap |= HWCAP_SPARC_ULTRA3; - else if (tlb_type == hypervisor) - cap |= HWCAP_SPARC_BLKINIT; + else if (tlb_type == hypervisor) { + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA2) + cap |= HWCAP_SPARC_BLKINIT; + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2) + cap |= HWCAP_SPARC_N2; + } return cap; } -- cgit v1.2.3 From 405849610fd96b4f34cd1875c4c033228fea6c0f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 16 Aug 2007 22:59:49 -0700 Subject: [MATH-EMU]: Fix underflow exception reporting. The underflow exception cases were wrong. This is one weird area of ieee1754 handling in that the underflow behavior changes based upon whether underflow is enabled in the trap enable mask of the FPU control register. As a specific case the Sparc V9 manual gives us the following description: -------------------- If UFM = 0: Underflow occurs if a nonzero result is tiny and a loss of accuracy occurs. Tininess may be detected before or after rounding. Loss of accuracy may be either a denormalization loss or an inexact result. If UFM = 1: Underflow occurs if a nonzero result is tiny. Tininess may be detected before or after rounding. -------------------- What this amounts to in the packing case is if we go subnormal, we set underflow if any of the following are true: 1) rounding sets inexact 2) we ended up rounding back up to normal (this is the case where we set the exponent to 1 and set the fraction to zero), this should set inexact too 3) underflow is set in FPU control register trap-enable mask The initially discovered example was "DBL_MIN / 16.0" which incorrectly generated an underflow. It should not, unless underflow is set in the trap-enable mask of the FPU csr. Another example, "0x0.0000000000001p-1022 / 16.0", should signal both inexact and underflow. The cpu implementations and ieee1754 literature is very clear about this. This is case #2 above. However, if underflow is set in the trap enable mask, only underflow should be set and reported as a trap. That is handled properly by the prioritization logic in arch/sparc{,64}/math-emu/math.c:record_exception(). Based upon a report and test case from Jakub Jelinek. Signed-off-by: David S. Miller --- include/asm-sparc/sfp-machine.h | 6 ++++++ include/asm-sparc64/sfp-machine.h | 2 ++ include/math-emu/op-common.h | 5 ++++- include/math-emu/soft-fp.h | 7 +++++++ 4 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-sparc/sfp-machine.h b/include/asm-sparc/sfp-machine.h index ecfc86a4a725..266a42b8f99f 100644 --- a/include/asm-sparc/sfp-machine.h +++ b/include/asm-sparc/sfp-machine.h @@ -203,4 +203,10 @@ extern struct task_struct *last_task_used_math; #define FP_INHIBIT_RESULTS ((last_task_used_math->thread.fsr >> 23) & _fex) #endif +#ifdef CONFIG_SMP +#define FP_TRAPPING_EXCEPTIONS ((current->thread.fsr >> 23) & 0x1f) +#else +#define FP_TRAPPING_EXCEPTIONS ((last_task_used_math->thread.fsr >> 23) & 0x1f) +#endif + #endif diff --git a/include/asm-sparc64/sfp-machine.h b/include/asm-sparc64/sfp-machine.h index 89d42431efb5..c9331b02d9c8 100644 --- a/include/asm-sparc64/sfp-machine.h +++ b/include/asm-sparc64/sfp-machine.h @@ -88,4 +88,6 @@ #define FP_INHIBIT_RESULTS ((current_thread_info()->xfsr[0] >> 23) & _fex) +#define FP_TRAPPING_EXCEPTIONS ((current_thread_info()->xfsr[0] >> 23) & 0x1f) + #endif diff --git a/include/math-emu/op-common.h b/include/math-emu/op-common.h index 93780abd01bc..bb46e7645d53 100644 --- a/include/math-emu/op-common.h +++ b/include/math-emu/op-common.h @@ -145,13 +145,16 @@ do { \ { \ X##_e = 1; \ _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + FP_SET_EXCEPTION(FP_EX_INEXACT); \ } \ else \ { \ X##_e = 0; \ _FP_FRAC_SRL_##wc(X, _FP_WORKBITS); \ - FP_SET_EXCEPTION(FP_EX_UNDERFLOW); \ } \ + if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT) || \ + (FP_TRAPPING_EXCEPTIONS & FP_EX_UNDERFLOW)) \ + FP_SET_EXCEPTION(FP_EX_UNDERFLOW); \ } \ else \ { \ diff --git a/include/math-emu/soft-fp.h b/include/math-emu/soft-fp.h index d02eb64a865b..a0721ef5c2f9 100644 --- a/include/math-emu/soft-fp.h +++ b/include/math-emu/soft-fp.h @@ -97,12 +97,19 @@ #define FP_INHIBIT_RESULTS 0 #endif +#ifndef FP_TRAPPING_EXCEPTIONS +#define FP_TRAPPING_EXCPETIONS 0 +#endif + #define FP_SET_EXCEPTION(ex) \ _fex |= (ex) #define FP_UNSET_EXCEPTION(ex) \ _fex &= ~(ex) +#define FP_CUR_EXCEPTIONS \ + (_fex) + #define FP_CLEAR_EXCEPTIONS \ _fex = 0 -- cgit v1.2.3 From 86d7a9a9c4775f864e6bc5f5da66ef9ea3715734 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Thu, 2 Aug 2007 15:37:15 -0500 Subject: [POWERPC] Fix FSL BookE machine check reporting Reserved MCSR bits on FSL BookE parts may have spurious values when mcheck occurs. Mask these off when printing the MCSR to avoid confusion. Also, get rid of the MCSR_GL_CI bit defined for e500 - this bit doesn't actually have any meaning. Signed-off-by: Becky Bruce Signed-off-by: Kumar Gala --- arch/powerpc/kernel/traps.c | 4 +--- include/asm-powerpc/reg_booke.h | 12 +++++++++++- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 2bb1cb911783..d8502e377518 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -299,7 +299,7 @@ static inline int check_io_access(struct pt_regs *regs) #ifndef CONFIG_FSL_BOOKE #define get_mc_reason(regs) ((regs)->dsisr) #else -#define get_mc_reason(regs) (mfspr(SPRN_MCSR)) +#define get_mc_reason(regs) (mfspr(SPRN_MCSR) & MCSR_MASK) #endif #define REASON_FP ESR_FP #define REASON_ILLEGAL (ESR_PIL | ESR_PUO) @@ -414,8 +414,6 @@ void machine_check_exception(struct pt_regs *regs) printk("Data Cache Push Parity Error\n"); if (reason & MCSR_DCPERR) printk("Data Cache Parity Error\n"); - if (reason & MCSR_GL_CI) - printk("Guarded Load or Cache-Inhibited stwcx.\n"); if (reason & MCSR_BUS_IAERR) printk("Bus - Instruction Address Error\n"); if (reason & MCSR_BUS_RAERR) diff --git a/include/asm-powerpc/reg_booke.h b/include/asm-powerpc/reg_booke.h index 064405c207bf..8fdc2b47afb9 100644 --- a/include/asm-powerpc/reg_booke.h +++ b/include/asm-powerpc/reg_booke.h @@ -223,7 +223,6 @@ #define MCSR_ICPERR 0x40000000UL /* I-Cache Parity Error */ #define MCSR_DCP_PERR 0x20000000UL /* D-Cache Push Parity Error */ #define MCSR_DCPERR 0x10000000UL /* D-Cache Parity Error */ -#define MCSR_GL_CI 0x00010000UL /* Guarded Load or Cache-Inhibited stwcx. */ #define MCSR_BUS_IAERR 0x00000080UL /* Instruction Address Error */ #define MCSR_BUS_RAERR 0x00000040UL /* Read Address Error */ #define MCSR_BUS_WAERR 0x00000020UL /* Write Address Error */ @@ -232,6 +231,12 @@ #define MCSR_BUS_WBERR 0x00000004UL /* Write Data Bus Error */ #define MCSR_BUS_IPERR 0x00000002UL /* Instruction parity Error */ #define MCSR_BUS_RPERR 0x00000001UL /* Read parity Error */ + +/* e500 parts may set unused bits in MCSR; mask these off */ +#define MCSR_MASK (MCSR_MCP | MCSR_ICPERR | MCSR_DCP_PERR | \ + MCSR_DCPERR | MCSR_BUS_IAERR | MCSR_BUS_RAERR | \ + MCSR_BUS_WAERR | MCSR_BUS_IBERR | MCSR_BUS_RBERR | \ + MCSR_BUS_WBERR | MCSR_BUS_IPERR | MCSR_BUS_RPERR) #endif #ifdef CONFIG_E200 #define MCSR_MCP 0x80000000UL /* Machine Check Input Pin */ @@ -243,6 +248,11 @@ #define MCSR_BUS_DRERR 0x00000008UL /* Read Bus Error on data load */ #define MCSR_BUS_WRERR 0x00000004UL /* Write Bus Error on buffered store or cache line push */ + +/* e200 parts may set unused bits in MCSR; mask these off */ +#define MCSR_MASK (MCSR_MCP | MCSR_CP_PERR | MCSR_CPERR | \ + MCSR_EXCP_ERR | MCSR_BUS_IRERR | MCSR_BUS_DRERR | \ + MCSR_BUS_WRERR) #endif /* Bit definitions for the DBSR. */ -- cgit v1.2.3 From 118142080a75fc1ce599c73b7894a71b4813828e Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 16 Aug 2007 16:27:15 +0200 Subject: Cross-compilation between e.g. i386 -> 64bit could break -> work around it Adrian Bunk: scripts/mod/file2alias.c is compiled with HOSTCC and ensures that kernel_ulong_t is correct, but it can't cope with different padding on different architectures. Signed-off-by: Thomas Renninger Signed-off-by: Tony Luck --- include/linux/mod_devicetable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 2ada8ee316b3..4dc5fa8be781 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -159,7 +159,8 @@ struct ap_device_id { #define AP_DEVICE_ID_MATCH_DEVICE_TYPE 0x01 -#define ACPI_ID_LEN 9 +#define ACPI_ID_LEN 16 /* only 9 bytes needed here, 16 bytes are used */ + /* to workaround crosscompile issues */ struct acpi_device_id { __u8 id[ACPI_ID_LEN]; -- cgit v1.2.3 From 62be90012c507b2bf1047eb2eaa1e9151f7172fe Mon Sep 17 00:00:00 2001 From: Satyam Sharma Date: Thu, 16 Aug 2007 06:09:25 +0530 Subject: i386: Fix a couple busy loops in mach_wakecpu.h:wait_for_init_deassert() Use cpu_relax() in the busy loops, as atomic_read() doesn't automatically imply volatility for i386 and x86_64. x86_64 doesn't have this issue because it open-codes the while loop in smpboot.c:smp_callin() itself that already uses cpu_relax(). For i386, however, smpboot.c:smp_callin() calls wait_for_init_deassert() which is buggy for mach-default and mach-es7000 cases. [ I test-built a kernel -- smp_callin() itself got inlined in its only callsite, smpboot.c:start_secondary() -- and the relevant piece of code disassembles to the following: 0xc1019704 : mov 0xc144c4c8,%eax 0xc1019709 : test %eax,%eax 0xc101970b : je 0xc1019709 init_deasserted (at 0xc144c4c8) gets fetched into %eax only once and then we loop over the test of the stale value in the register only, so these look like real bugs to me. With the fix below, this becomes: 0xc1019706 : pause 0xc1019708 : cmpl $0x0,0xc144c4c8 0xc101970f : je 0xc1019706 which looks nice and healthy. ] Thanks to Heiko Carstens for noticing this. Signed-off-by: Satyam Sharma Cc: Heiko Carstens Signed-off-by: Linus Torvalds --- include/asm-i386/mach-default/mach_wakecpu.h | 3 ++- include/asm-i386/mach-es7000/mach_wakecpu.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-i386/mach-default/mach_wakecpu.h b/include/asm-i386/mach-default/mach_wakecpu.h index 673b85c9b273..3ebb17893aa5 100644 --- a/include/asm-i386/mach-default/mach_wakecpu.h +++ b/include/asm-i386/mach-default/mach_wakecpu.h @@ -15,7 +15,8 @@ static inline void wait_for_init_deassert(atomic_t *deassert) { - while (!atomic_read(deassert)); + while (!atomic_read(deassert)) + cpu_relax(); return; } diff --git a/include/asm-i386/mach-es7000/mach_wakecpu.h b/include/asm-i386/mach-es7000/mach_wakecpu.h index efc903b73486..84ff58314501 100644 --- a/include/asm-i386/mach-es7000/mach_wakecpu.h +++ b/include/asm-i386/mach-es7000/mach_wakecpu.h @@ -31,7 +31,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) static inline void wait_for_init_deassert(atomic_t *deassert) { #ifdef WAKE_SECONDARY_VIA_INIT - while (!atomic_read(deassert)); + while (!atomic_read(deassert)) + cpu_relax(); #endif return; } -- cgit v1.2.3 From 0328ecef9041fe477efc2404fd3aaa29e7ee0430 Mon Sep 17 00:00:00 2001 From: Daniel Gollub Date: Wed, 15 Aug 2007 02:40:35 +0200 Subject: x86_64: Fix to keep watchdog disabled by default for i386/x86_64 Fixed wrong expression which enabled watchdogs even if nmi_watchdog kernel parameter wasn't set. This regression got slightly introduced with commit b7471c6da94d30d3deadc55986cc38d1ff57f9ca. Introduced NMI_DISABLED (-1) which allows to switch the value of NMI_DEFAULT without breaking the APIC NMI watchdog code (again). Fixes: https://bugzilla.novell.com/show_bug.cgi?id=298084 http://bugzilla.kernel.org/show_bug.cgi?id=7839 And likely some more nmi_watchdog=0 related issues. Signed-off-by: Daniel Gollub Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 2 +- arch/i386/kernel/nmi.c | 4 ++-- arch/x86_64/kernel/nmi.c | 4 ++-- include/asm-i386/nmi.h | 3 ++- include/asm-x86_64/nmi.h | 3 ++- 5 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index f9fff29e01a9..3d67ae18d762 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -1085,7 +1085,7 @@ static int __init detect_init_APIC (void) if (l & MSR_IA32_APICBASE_ENABLE) mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - if (nmi_watchdog != NMI_NONE) + if (nmi_watchdog != NMI_NONE && nmi_watchdog != NMI_DISABLED) nmi_watchdog = NMI_LOCAL_APIC; printk(KERN_INFO "Found and enabled local APIC!\n"); diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 99beac7f96ce..8c1c965eb2a8 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -77,7 +77,7 @@ static int __init check_nmi_watchdog(void) unsigned int *prev_nmi_count; int cpu; - if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) + if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) return 0; if (!atomic_read(&nmi_active)) @@ -424,7 +424,7 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, if (!!old_state == !!nmi_watchdog_enabled) return 0; - if (atomic_read(&nmi_active) < 0) { + if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) { printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); return -EIO; } diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index cb8ee9d02f86..0ec6d2ddb931 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -85,7 +85,7 @@ int __init check_nmi_watchdog (void) int *counts; int cpu; - if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) + if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) return 0; if (!atomic_read(&nmi_active)) @@ -442,7 +442,7 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, if (!!old_state == !!nmi_watchdog_enabled) return 0; - if (atomic_read(&nmi_active) < 0) { + if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) { printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); return -EIO; } diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h index ff30c98f87b0..70a958a8e381 100644 --- a/include/asm-i386/nmi.h +++ b/include/asm-i386/nmi.h @@ -33,11 +33,12 @@ extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason); extern atomic_t nmi_active; extern unsigned int nmi_watchdog; -#define NMI_DEFAULT -1 +#define NMI_DISABLED -1 #define NMI_NONE 0 #define NMI_IO_APIC 1 #define NMI_LOCAL_APIC 2 #define NMI_INVALID 3 +#define NMI_DEFAULT NMI_DISABLED struct ctl_table; struct file; diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h index 5fb3c0de5ccc..65b6acf3bb59 100644 --- a/include/asm-x86_64/nmi.h +++ b/include/asm-x86_64/nmi.h @@ -64,11 +64,12 @@ extern int setup_nmi_watchdog(char *); extern atomic_t nmi_active; extern unsigned int nmi_watchdog; -#define NMI_DEFAULT -1 +#define NMI_DISABLED -1 #define NMI_NONE 0 #define NMI_IO_APIC 1 #define NMI_LOCAL_APIC 2 #define NMI_INVALID 3 +#define NMI_DEFAULT NMI_DISABLED struct ctl_table; struct file; -- cgit v1.2.3 From 2a67789618abb74f0f97d4836a2b937bff2f1b2d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Aug 2007 01:03:07 +0100 Subject: Fix tpyo Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/math-emu/soft-fp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/math-emu/soft-fp.h b/include/math-emu/soft-fp.h index a0721ef5c2f9..a6f873b45f98 100644 --- a/include/math-emu/soft-fp.h +++ b/include/math-emu/soft-fp.h @@ -98,7 +98,7 @@ #endif #ifndef FP_TRAPPING_EXCEPTIONS -#define FP_TRAPPING_EXCPETIONS 0 +#define FP_TRAPPING_EXCEPTIONS 0 #endif #define FP_SET_EXCEPTION(ex) \ -- cgit v1.2.3 From c9b0ee2c2af33c2ca722aa05bbcb604487134e4c Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 31 Jul 2007 12:42:22 -0300 Subject: V4L/DVB (5968): videodev2.h: remove superfluous FBUF GLOBAL_INV_ALPHA support There is no need for a global inverted alpha capability since all the application has to do is to pass '255-alpha' as the global alpha value. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index c66c8a3410b9..ae9b24c12f6a 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -617,7 +617,6 @@ struct v4l2_framebuffer #define V4L2_FBUF_CAP_LOCAL_ALPHA 0x0010 #define V4L2_FBUF_CAP_GLOBAL_ALPHA 0x0020 #define V4L2_FBUF_CAP_LOCAL_INV_ALPHA 0x0040 -#define V4L2_FBUF_CAP_GLOBAL_INV_ALPHA 0x0080 /* Flags for the 'flags' field. */ #define V4L2_FBUF_FLAG_PRIMARY 0x0001 #define V4L2_FBUF_FLAG_OVERLAY 0x0002 @@ -625,7 +624,6 @@ struct v4l2_framebuffer #define V4L2_FBUF_FLAG_LOCAL_ALPHA 0x0008 #define V4L2_FBUF_FLAG_GLOBAL_ALPHA 0x0010 #define V4L2_FBUF_FLAG_LOCAL_INV_ALPHA 0x0020 -#define V4L2_FBUF_FLAG_GLOBAL_INV_ALPHA 0x0040 struct v4l2_clip { -- cgit v1.2.3 From 1116fae5fdfa80c6744a9b5d75fb3ef687a69b19 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 20 Aug 2007 22:42:55 +0200 Subject: ide: config_drive_for_dma() fixes * Add DMA blacklist checking (->ide_dma_on check probably can go now). * Add ->atapi_dma flag checking and remove no longer needed ns87415_ide_dma_check() from ns87415 host driver. * Remove now needless __ide_dma_check() wrapper and symbol export. * Check drive->autodma instead of hwif->autodma (there should be no changes in behavior as all users of config_drive_for_dma() set both ->autodma flags). Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-dma.c | 26 ++++++++++---------------- drivers/ide/pci/ns87415.c | 9 --------- include/linux/ide.h | 1 - 3 files changed, 10 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 5fe1d72ab451..865a2740a6e3 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -349,9 +349,17 @@ EXPORT_SYMBOL_GPL(ide_destroy_dmatable); static int config_drive_for_dma (ide_drive_t *drive) { + ide_hwif_t *hwif = drive->hwif; struct hd_driveid *id = drive->id; - if ((id->capability & 1) && drive->hwif->autodma) { + /* consult the list of known "bad" drives */ + if (__ide_dma_bad_drive(drive)) + return -1; + + if (drive->media != ide_disk && hwif->atapi_dma == 0) + return -1; + + if ((id->capability & 1) && drive->autodma) { /* * Enable DMA on any drive that has * UltraDMA (mode 0/1/2/3/4/5/6) enabled @@ -513,20 +521,6 @@ int __ide_dma_on (ide_drive_t *drive) EXPORT_SYMBOL(__ide_dma_on); -/** - * __ide_dma_check - check DMA setup - * @drive: drive to check - * - * Don't use - due for extermination - */ - -int __ide_dma_check (ide_drive_t *drive) -{ - return config_drive_for_dma(drive); -} - -EXPORT_SYMBOL(__ide_dma_check); - /** * ide_dma_setup - begin a DMA phase * @drive: target device @@ -1021,7 +1015,7 @@ void ide_setup_dma (ide_hwif_t *hwif, unsigned long dma_base, unsigned int num_p if (!hwif->dma_host_on) hwif->dma_host_on = &ide_dma_host_on; if (!hwif->ide_dma_check) - hwif->ide_dma_check = &__ide_dma_check; + hwif->ide_dma_check = &config_drive_for_dma; if (!hwif->dma_setup) hwif->dma_setup = &ide_dma_setup; if (!hwif->dma_exec_cmd) diff --git a/drivers/ide/pci/ns87415.c b/drivers/ide/pci/ns87415.c index 09941f37d635..465c935fdf25 100644 --- a/drivers/ide/pci/ns87415.c +++ b/drivers/ide/pci/ns87415.c @@ -187,14 +187,6 @@ static int ns87415_ide_dma_setup(ide_drive_t *drive) return 1; } -static int ns87415_ide_dma_check (ide_drive_t *drive) -{ - if (drive->media != ide_disk) - return -1; - - return __ide_dma_check(drive); -} - static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif) { struct pci_dev *dev = hwif->pci_dev; @@ -266,7 +258,6 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif) outb(0x60, hwif->dma_status); hwif->dma_setup = &ns87415_ide_dma_setup; - hwif->ide_dma_check = &ns87415_ide_dma_check; hwif->ide_dma_end = &ns87415_ide_dma_end; if (!noautodma) diff --git a/include/linux/ide.h b/include/linux/ide.h index d71d0121b7f9..7e15e0870290 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1312,7 +1312,6 @@ void ide_dma_host_off(ide_drive_t *); void ide_dma_off_quietly(ide_drive_t *); void ide_dma_host_on(ide_drive_t *); extern int __ide_dma_on(ide_drive_t *); -extern int __ide_dma_check(ide_drive_t *); extern int ide_dma_setup(ide_drive_t *); extern void ide_dma_start(ide_drive_t *); extern int __ide_dma_end(ide_drive_t *); -- cgit v1.2.3 From a5b7e70d787f528386eda025d3e38f545017f241 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 20 Aug 2007 22:42:56 +0200 Subject: ide: add cable detection for early UDMA66 devices (take 3) * Move ide_in_drive_list() from ide-dma.c to ide-iops.c. * Add ivb_list[] table for listening early UDMA66 devices which don't conform to ATA4 standard wrt cable detection (bit14 is zero, only bit13 is valid) and use only device side cable detection for them since host side cable detection may be unreliable. * Add model "QUANTUM FIREBALLlct10 05" with firwmare "A03.0900" to the list (from Craig's bugreport). v2: * Improve kernel message basing on suggestion from Sergei. v3: * Don't print kernel message when no device side cable detection is done, plus some minor fixes. (Noticed by Sergei) Thanks to Craig for testing this patch. Cc: Craig Block Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-dma.c | 19 ------------------- drivers/ide/ide-iops.c | 39 ++++++++++++++++++++++++++++++++++++--- include/linux/ide.h | 3 ++- 3 files changed, 38 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 865a2740a6e3..ff644a5e12cd 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -134,25 +134,6 @@ static const struct drive_list_entry drive_blacklist [] = { }; -/** - * ide_in_drive_list - look for drive in black/white list - * @id: drive identifier - * @drive_table: list to inspect - * - * Look for a drive in the blacklist and the whitelist tables - * Returns 1 if the drive is found in the table. - */ - -int ide_in_drive_list(struct hd_driveid *id, const struct drive_list_entry *drive_table) -{ - for ( ; drive_table->id_model ; drive_table++) - if ((!strcmp(drive_table->id_model, id->model)) && - (!drive_table->id_firmware || - strstr(id->fw_rev, drive_table->id_firmware))) - return 1; - return 0; -} - /** * ide_dma_intr - IDE DMA interrupt handler * @drive: the drive the interrupt is for diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 92578b6832e9..fe2a69fed72b 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -565,6 +565,34 @@ int ide_wait_stat (ide_startstop_t *startstop, ide_drive_t *drive, u8 good, u8 b EXPORT_SYMBOL(ide_wait_stat); +/** + * ide_in_drive_list - look for drive in black/white list + * @id: drive identifier + * @drive_table: list to inspect + * + * Look for a drive in the blacklist and the whitelist tables + * Returns 1 if the drive is found in the table. + */ + +int ide_in_drive_list(struct hd_driveid *id, const struct drive_list_entry *drive_table) +{ + for ( ; drive_table->id_model; drive_table++) + if ((!strcmp(drive_table->id_model, id->model)) && + (!drive_table->id_firmware || + strstr(id->fw_rev, drive_table->id_firmware))) + return 1; + return 0; +} + +/* + * Early UDMA66 devices don't set bit14 to 1, only bit13 is valid. + * We list them here and depend on the device side cable detection for them. + */ +static const struct drive_list_entry ivb_list[] = { + { "QUANTUM FIREBALLlct10 05" , "A03.0900" }, + { NULL , NULL } +}; + /* * All hosts that use the 80c ribbon must use! * The name is derived from upper byte of word 93 and the 80c ribbon. @@ -573,11 +601,16 @@ u8 eighty_ninty_three (ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; struct hd_driveid *id = drive->id; + int ivb = ide_in_drive_list(id, ivb_list); if (hwif->cbl == ATA_CBL_PATA40_SHORT) return 1; - if (hwif->cbl != ATA_CBL_PATA80) + if (ivb) + printk(KERN_DEBUG "%s: skipping word 93 validity check\n", + drive->name); + + if (hwif->cbl != ATA_CBL_PATA80 && !ivb) goto no_80w; /* Check for SATA but only if we are ATA5 or higher */ @@ -587,11 +620,11 @@ u8 eighty_ninty_three (ide_drive_t *drive) /* * FIXME: * - change master/slave IDENTIFY order - * - force bit13 (80c cable present) check + * - force bit13 (80c cable present) check also for !ivb devices * (unless the slave device is pre-ATA3) */ #ifndef CONFIG_IDEDMA_IVB - if (id->hw_config & 0x4000) + if ((id->hw_config & 0x4000) || (ivb && (id->hw_config & 0x2000))) #else if (id->hw_config & 0x6000) #endif diff --git a/include/linux/ide.h b/include/linux/ide.h index 7e15e0870290..c792b4fd1588 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1285,13 +1285,14 @@ void ide_init_sg_cmd(ide_drive_t *, struct request *); #define BAD_DMA_DRIVE 0 #define GOOD_DMA_DRIVE 1 -#ifdef CONFIG_BLK_DEV_IDEDMA struct drive_list_entry { const char *id_model; const char *id_firmware; }; int ide_in_drive_list(struct hd_driveid *, const struct drive_list_entry *); + +#ifdef CONFIG_BLK_DEV_IDEDMA int __ide_dma_bad_drive(ide_drive_t *); int __ide_dma_good_drive(ide_drive_t *); u8 ide_max_dma_mode(ide_drive_t *); -- cgit v1.2.3 From 195f7fd0a7e2b3179d52aa8ed6de3637203946c6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 18 Aug 2007 00:07:40 -0700 Subject: [SPARC64]: Need to clobber global reg vars in switch_to(). Otherwise the compiler can't see the things like the per-cpu area base register are changing. Signed-off-by: David S. Miller --- include/asm-sparc64/percpu.h | 4 ++-- include/asm-sparc64/system.h | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h index caf8750792ff..a1f53a4da405 100644 --- a/include/asm-sparc64/percpu.h +++ b/include/asm-sparc64/percpu.h @@ -3,6 +3,8 @@ #include +register unsigned long __local_per_cpu_offset asm("g5"); + #ifdef CONFIG_SMP #define setup_per_cpu_areas() do { } while (0) @@ -23,8 +25,6 @@ extern unsigned long __per_cpu_shift; __typeof__(type) per_cpu__##name \ ____cacheline_aligned_in_smp -register unsigned long __local_per_cpu_offset asm("g5"); - /* var is in discarded region: offset to particular copy we want */ #define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu))) #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset)) diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index 64891cb10f05..3f175fa7e6d2 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -141,7 +141,6 @@ do { \ * not preserve it's value. Hairy, but it lets us remove 2 loads * and 2 stores in this critical code path. -DaveM */ -#define EXTRA_CLOBBER ,"%l1" #define switch_to(prev, next, last) \ do { if (test_thread_flag(TIF_PERFCTR)) { \ unsigned long __tmp; \ @@ -164,33 +163,34 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ "stx %%i6, [%%sp + 2047 + 0x70]\n\t" \ "stx %%i7, [%%sp + 2047 + 0x78]\n\t" \ "rdpr %%wstate, %%o5\n\t" \ - "stx %%o6, [%%g6 + %3]\n\t" \ - "stb %%o5, [%%g6 + %2]\n\t" \ - "rdpr %%cwp, %%o5\n\t" \ + "stx %%o6, [%%g6 + %6]\n\t" \ "stb %%o5, [%%g6 + %5]\n\t" \ - "mov %1, %%g6\n\t" \ - "ldub [%1 + %5], %%g1\n\t" \ + "rdpr %%cwp, %%o5\n\t" \ + "stb %%o5, [%%g6 + %8]\n\t" \ + "mov %4, %%g6\n\t" \ + "ldub [%4 + %8], %%g1\n\t" \ "wrpr %%g1, %%cwp\n\t" \ - "ldx [%%g6 + %3], %%o6\n\t" \ - "ldub [%%g6 + %2], %%o5\n\t" \ - "ldub [%%g6 + %4], %%o7\n\t" \ + "ldx [%%g6 + %6], %%o6\n\t" \ + "ldub [%%g6 + %5], %%o5\n\t" \ + "ldub [%%g6 + %7], %%o7\n\t" \ "wrpr %%o5, 0x0, %%wstate\n\t" \ "ldx [%%sp + 2047 + 0x70], %%i6\n\t" \ "ldx [%%sp + 2047 + 0x78], %%i7\n\t" \ - "ldx [%%g6 + %6], %%g4\n\t" \ + "ldx [%%g6 + %9], %%g4\n\t" \ "brz,pt %%o7, 1f\n\t" \ " mov %%g7, %0\n\t" \ "b,a ret_from_syscall\n\t" \ "1:\n\t" \ - : "=&r" (last) \ + : "=&r" (last), "=r" (current), "=r" (current_thread_info_reg), \ + "=r" (__local_per_cpu_offset) \ : "0" (task_thread_info(next)), \ "i" (TI_WSTATE), "i" (TI_KSP), "i" (TI_NEW_CHILD), \ "i" (TI_CWP), "i" (TI_TASK) \ : "cc", \ "g1", "g2", "g3", "g7", \ - "l2", "l3", "l4", "l5", "l6", "l7", \ + "l1", "l2", "l3", "l4", "l5", "l6", "l7", \ "i0", "i1", "i2", "i3", "i4", "i5", \ - "o0", "o1", "o2", "o3", "o4", "o5", "o7" EXTRA_CLOBBER);\ + "o0", "o1", "o2", "o3", "o4", "o5", "o7"); \ /* If you fuck with this, update ret_from_syscall code too. */ \ if (test_thread_flag(TIF_PERFCTR)) { \ write_pcr(current_thread_info()->pcr_reg); \ -- cgit v1.2.3 From 15f6ddc7d9cf96f2ee88897c7164198ed6e45a77 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 21 Aug 2007 19:15:31 -0500 Subject: [POWERPC] Fix PCI Device ID for MPC8544/8533 processors The initial user manuals for MPC8544/8533 had some issues with properly documenting the device IDs for MPC8544/8533. These processors are almost identical and both show up on the reference boards. Fix up the quirks for PCIe support to handle MPC8533/E. Signed-off-by: Kumar Gala --- arch/powerpc/sysdev/fsl_pci.c | 2 ++ include/linux/pci_ids.h | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 9fb0ce5c7176..114c90f8f560 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -251,6 +251,8 @@ DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8568E, quirk_fsl_pcie_transpare DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8568, quirk_fsl_pcie_transparent); DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8567E, quirk_fsl_pcie_transparent); DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8567, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8533E, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8533, quirk_fsl_pcie_transparent); DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8544E, quirk_fsl_pcie_transparent); DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8544, quirk_fsl_pcie_transparent); DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8641, quirk_fsl_pcie_transparent); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 07fc57429b58..8938d59013c6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2092,8 +2092,10 @@ #define PCI_DEVICE_ID_MPC8568 0x0021 #define PCI_DEVICE_ID_MPC8567E 0x0022 #define PCI_DEVICE_ID_MPC8567 0x0023 -#define PCI_DEVICE_ID_MPC8544E 0x0030 -#define PCI_DEVICE_ID_MPC8544 0x0031 +#define PCI_DEVICE_ID_MPC8533E 0x0030 +#define PCI_DEVICE_ID_MPC8533 0x0031 +#define PCI_DEVICE_ID_MPC8544E 0x0032 +#define PCI_DEVICE_ID_MPC8544 0x0033 #define PCI_DEVICE_ID_MPC8641 0x7010 #define PCI_DEVICE_ID_MPC8641D 0x7011 -- cgit v1.2.3 From 0a87c5cfc0bb0c1bdcc1cc9fd82e4a1711fac512 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 22 Aug 2007 13:51:40 +0200 Subject: [S390] vmur: fix diag14 exceptions with addresses > 2GB. There are several s390 diagnose calls, which must be executed below the 2GB memory boundary. In order to enforce this, those diagnoses must be compiled into the kernel. Currently diag 14 can be called within the vmur kernel module from addresses above 2GB. This leads to specification exceptions. This patch moves diag10, diag14 and diag210 into the new diag.c file. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/diag.c | 102 +++++++++++++++++++++++++++++++++++++++++ arch/s390/kernel/s390_ksyms.c | 1 - arch/s390/mm/cmm.c | 1 + arch/s390/mm/init.c | 17 ------- drivers/s390/block/dasd_diag.c | 1 + drivers/s390/char/raw3270.c | 1 + drivers/s390/char/vmur.c | 32 ++----------- drivers/s390/cio/device_id.c | 48 +------------------ include/asm-s390/cio.h | 15 ------ include/asm-s390/diag.h | 39 ++++++++++++++++ include/asm-s390/pgalloc.h | 2 - 12 files changed, 150 insertions(+), 111 deletions(-) create mode 100644 arch/s390/kernel/diag.c create mode 100644 include/asm-s390/diag.h (limited to 'include') diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 3195d375bd51..56cb71007cd9 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -6,7 +6,7 @@ EXTRA_AFLAGS := -traditional obj-y := bitmap.o traps.o time.o process.o base.o early.o \ setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ - semaphore.o s390_ext.o debug.o irq.o ipl.o dis.o + semaphore.o s390_ext.o debug.o irq.o ipl.o dis.o diag.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c new file mode 100644 index 000000000000..c032d11da8a1 --- /dev/null +++ b/arch/s390/kernel/diag.c @@ -0,0 +1,102 @@ +/* + * Implementation of s390 diagnose codes + * + * Copyright IBM Corp. 2007 + * Author(s): Michael Holzheu + */ + +#include +#include + +/* + * Diagnose 10: Release pages + */ +void diag10(unsigned long addr) +{ + if (addr >= 0x7ff00000) + return; + asm volatile( +#ifdef CONFIG_64BIT + " sam31\n" + " diag %0,%0,0x10\n" + "0: sam64\n" +#else + " diag %0,%0,0x10\n" + "0:\n" +#endif + EX_TABLE(0b, 0b) + : : "a" (addr)); +} +EXPORT_SYMBOL(diag10); + +/* + * Diagnose 14: Input spool file manipulation + */ +int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) +{ + register unsigned long _ry1 asm("2") = ry1; + register unsigned long _ry2 asm("3") = subcode; + int rc = 0; + + asm volatile( +#ifdef CONFIG_64BIT + " sam31\n" + " diag %2,2,0x14\n" + " sam64\n" +#else + " diag %2,2,0x14\n" +#endif + " ipm %0\n" + " srl %0,28\n" + : "=d" (rc), "+d" (_ry2) + : "d" (rx), "d" (_ry1) + : "cc"); + + return rc; +} +EXPORT_SYMBOL(diag14); + +/* + * Diagnose 210: Get information about a virtual device + */ +int diag210(struct diag210 *addr) +{ + /* + * diag 210 needs its data below the 2GB border, so we + * use a static data area to be sure + */ + static struct diag210 diag210_tmp; + static DEFINE_SPINLOCK(diag210_lock); + unsigned long flags; + int ccode; + + spin_lock_irqsave(&diag210_lock, flags); + diag210_tmp = *addr; + +#ifdef CONFIG_64BIT + asm volatile( + " lhi %0,-1\n" + " sam31\n" + " diag %1,0,0x210\n" + "0: ipm %0\n" + " srl %0,28\n" + "1: sam64\n" + EX_TABLE(0b, 1b) + : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory"); +#else + asm volatile( + " lhi %0,-1\n" + " diag %1,0,0x210\n" + "0: ipm %0\n" + " srl %0,28\n" + "1:\n" + EX_TABLE(0b, 1b) + : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory"); +#endif + + *addr = diag210_tmp; + spin_unlock_irqrestore(&diag210_lock, flags); + + return ccode; +} +EXPORT_SYMBOL(diag210); diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 90b5ef529eb7..7234c737f825 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -25,7 +25,6 @@ EXPORT_SYMBOL(_oi_bitmap); EXPORT_SYMBOL(_ni_bitmap); EXPORT_SYMBOL(_zb_findmap); EXPORT_SYMBOL(_sb_findmap); -EXPORT_SYMBOL(diag10); /* * semaphore ops diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index c5b2f4f078bc..fabc50adc46a 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -20,6 +20,7 @@ #include #include +#include static char *sender = "VMRMSVM"; module_param(sender, charp, 0400); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 9098531a2671..3a25bbf2eb0a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -42,23 +42,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); char empty_zero_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); -void diag10(unsigned long addr) -{ - if (addr >= 0x7ff00000) - return; - asm volatile( -#ifdef CONFIG_64BIT - " sam31\n" - " diag %0,%0,0x10\n" - "0: sam64\n" -#else - " diag %0,%0,0x10\n" - "0:\n" -#endif - EX_TABLE(0b,0b) - : : "a" (addr)); -} - void show_mem(void) { int i, total = 0, reserved = 0; diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index eccac1c3b71b..d32c60dbdd82 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "dasd_int.h" #include "dasd_diag.h" diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 4f2f81b16cfa..2edd5fb6d3dc 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "raw3270.h" diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c index 04b19bdc09da..2d96c958df64 100644 --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "vmur.h" @@ -379,31 +380,6 @@ static ssize_t ur_write(struct file *file, const char __user *udata, return do_write(urf->urd, udata, count, urf->dev_reclen, ppos); } -static int do_diag_14(unsigned long rx, unsigned long ry1, - unsigned long subcode) -{ - register unsigned long _ry1 asm("2") = ry1; - register unsigned long _ry2 asm("3") = subcode; - int rc = 0; - - asm volatile( -#ifdef CONFIG_64BIT - " sam31\n" - " diag %2,2,0x14\n" - " sam64\n" -#else - " diag %2,2,0x14\n" -#endif - " ipm %0\n" - " srl %0,28\n" - : "=d" (rc), "+d" (_ry2) - : "d" (rx), "d" (_ry1) - : "cc"); - - TRACE("diag 14: subcode=0x%lx, cc=%i\n", subcode, rc); - return rc; -} - /* * diagnose code 0x14 subcode 0x0028 - position spool file to designated * record @@ -415,7 +391,7 @@ static int diag_position_to_record(int devno, int record) { int cc; - cc = do_diag_14(record, devno, 0x28); + cc = diag14(record, devno, 0x28); switch (cc) { case 0: return 0; @@ -440,7 +416,7 @@ static int diag_read_file(int devno, char *buf) { int cc; - cc = do_diag_14((unsigned long) buf, devno, 0x00); + cc = diag14((unsigned long) buf, devno, 0x00); switch (cc) { case 0: return 0; @@ -533,7 +509,7 @@ static int diag_read_next_file_info(struct file_control_block *buf, int spid) { int cc; - cc = do_diag_14((unsigned long) buf, spid, 0xfff); + cc = diag14((unsigned long) buf, spid, 0xfff); switch (cc) { case 0: return 0; diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c index 60b9347f7c92..f232832f2b22 100644 --- a/drivers/s390/cio/device_id.c +++ b/drivers/s390/cio/device_id.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "cio.h" #include "cio_debug.h" @@ -24,51 +25,6 @@ #include "device.h" #include "ioasm.h" -/* - * diag210 is used under VM to get information about a virtual device - */ -int -diag210(struct diag210 * addr) -{ - /* - * diag 210 needs its data below the 2GB border, so we - * use a static data area to be sure - */ - static struct diag210 diag210_tmp; - static DEFINE_SPINLOCK(diag210_lock); - unsigned long flags; - int ccode; - - spin_lock_irqsave(&diag210_lock, flags); - diag210_tmp = *addr; - -#ifdef CONFIG_64BIT - asm volatile( - " lhi %0,-1\n" - " sam31\n" - " diag %1,0,0x210\n" - "0: ipm %0\n" - " srl %0,28\n" - "1: sam64\n" - EX_TABLE(0b,1b) - : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory"); -#else - asm volatile( - " lhi %0,-1\n" - " diag %1,0,0x210\n" - "0: ipm %0\n" - " srl %0,28\n" - "1:\n" - EX_TABLE(0b,1b) - : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory"); -#endif - - *addr = diag210_tmp; - spin_unlock_irqrestore(&diag210_lock, flags); - - return ccode; -} - /* * Input : * devno - device number @@ -349,5 +305,3 @@ ccw_device_sense_id_irq(struct ccw_device *cdev, enum dev_event dev_event) break; } } - -EXPORT_SYMBOL(diag210); diff --git a/include/asm-s390/cio.h b/include/asm-s390/cio.h index f738d2827582..1982fb344164 100644 --- a/include/asm-s390/cio.h +++ b/include/asm-s390/cio.h @@ -258,19 +258,6 @@ struct ciw { /* Sick revalidation of device. */ #define CIO_REVALIDATE 0x0008 -struct diag210 { - __u16 vrdcdvno : 16; /* device number (input) */ - __u16 vrdclen : 16; /* data block length (input) */ - __u32 vrdcvcla : 8; /* virtual device class (output) */ - __u32 vrdcvtyp : 8; /* virtual device type (output) */ - __u32 vrdcvsta : 8; /* virtual device status (output) */ - __u32 vrdcvfla : 8; /* virtual device flags (output) */ - __u32 vrdcrccl : 8; /* real device class (output) */ - __u32 vrdccrty : 8; /* real device type (output) */ - __u32 vrdccrmd : 8; /* real device model (output) */ - __u32 vrdccrft : 8; /* real device feature (output) */ -} __attribute__ ((packed,aligned(4))); - struct ccw_dev_id { u8 ssid; u16 devno; @@ -285,8 +272,6 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, return 0; } -extern int diag210(struct diag210 *addr); - extern void wait_cons_dev(void); extern void css_schedule_reprobe(void); diff --git a/include/asm-s390/diag.h b/include/asm-s390/diag.h new file mode 100644 index 000000000000..72b2e2f2d32d --- /dev/null +++ b/include/asm-s390/diag.h @@ -0,0 +1,39 @@ +/* + * s390 diagnose functions + * + * Copyright IBM Corp. 2007 + * Author(s): Michael Holzheu + */ + +#ifndef _ASM_S390_DIAG_H +#define _ASM_S390_DIAG_H + +/* + * Diagnose 10: Release pages + */ +extern void diag10(unsigned long addr); + +/* + * Diagnose 14: Input spool file manipulation + */ +extern int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode); + +/* + * Diagnose 210: Get information about a virtual device + */ +struct diag210 { + u16 vrdcdvno; /* device number (input) */ + u16 vrdclen; /* data block length (input) */ + u8 vrdcvcla; /* virtual device class (output) */ + u8 vrdcvtyp; /* virtual device type (output) */ + u8 vrdcvsta; /* virtual device status (output) */ + u8 vrdcvfla; /* virtual device flags (output) */ + u8 vrdcrccl; /* real device class (output) */ + u8 vrdccrty; /* real device type (output) */ + u8 vrdccrmd; /* real device model (output) */ + u8 vrdccrft; /* real device feature (output) */ +} __attribute__((packed, aligned(4))); + +extern int diag210(struct diag210 *addr); + +#endif /* _ASM_S390_DIAG_H */ diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index 56c8a6c80e2e..e45d3c9a4b7e 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h @@ -19,8 +19,6 @@ #define check_pgt_cache() do {} while (0) -extern void diag10(unsigned long addr); - /* * Page allocation orders. */ -- cgit v1.2.3 From c51b9621796c31810fb66509ea1faee4597d9c03 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 22 Aug 2007 13:51:45 +0200 Subject: [S390] Change atomic_read/set to inline functions with barrier semantics. After doing some tests this seems to be the best variant for s390 and should be correct as well. With gcc 4.2.1 we get the following kernel image sizes using the default configuration: atomic_t type volatile, atomic_read/set defines 5311824 bytes atomic_t type int, atomic_read/set defines 5270864 bytes atomic_t type int, atomic_read/set inline asm 5279056 bytes atomic_t type int, atomic_read/set inline barrier 5270864 bytes Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- include/asm-s390/atomic.h | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-s390/atomic.h b/include/asm-s390/atomic.h index ea486952f778..2d184655bc5d 100644 --- a/include/asm-s390/atomic.h +++ b/include/asm-s390/atomic.h @@ -67,8 +67,17 @@ typedef struct { #endif /* __GNUC__ */ -#define atomic_read(v) ((v)->counter) -#define atomic_set(v,i) (((v)->counter) = (i)) +static inline int atomic_read(const atomic_t *v) +{ + barrier(); + return v->counter; +} + +static inline void atomic_set(atomic_t *v, int i) +{ + v->counter = i; + barrier(); +} static __inline__ int atomic_add_return(int i, atomic_t * v) { @@ -182,8 +191,17 @@ typedef struct { #endif /* __GNUC__ */ -#define atomic64_read(v) ((v)->counter) -#define atomic64_set(v,i) (((v)->counter) = (i)) +static inline long long atomic64_read(const atomic64_t *v) +{ + barrier(); + return v->counter; +} + +static inline void atomic64_set(atomic64_t *v, long long i) +{ + v->counter = i; + barrier(); +} static __inline__ long long atomic64_add_return(long long i, atomic64_t * v) { -- cgit v1.2.3 From ad4c2aa6354fad5316565b1cff57f80db0e04db8 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 22 Aug 2007 14:01:18 -0700 Subject: Serial 8250: handle saving the clear-on-read bits from the LSR and MSR Reading the LSR clears the break, parity, frame error, and overrun bits in the 8250 chip, but these are not being saved in all places that read the LSR. Same goes for the MSR delta bits. Save the LSR bits off whenever the lsr is read so they can be handled later in the receive routine. Save the MSR bits to be handled in the modem status routine. Also, clear the stored bits and clear the interrupt registers before enabling interrupts, to avoid handling old values of the stored bits in the interrupt routines. [akpm@linux-foundation.org: clean up pre-existing code] Signed-off-by: Corey Minyard Cc: Russell King Cc: Yinghai Lu Cc: Bjorn Helgaas Acked-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/8250.c | 85 ++++++++++++++++++++++++++++++---------------- include/linux/serial_reg.h | 1 + 2 files changed, 57 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index 301313002f6b..f94109cbb46e 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -129,7 +129,16 @@ struct uart_8250_port { unsigned char mcr; unsigned char mcr_mask; /* mask of user bits */ unsigned char mcr_force; /* mask of forced bits */ - unsigned char lsr_break_flag; + + /* + * Some bits in registers are cleared on a read, so they must + * be saved whenever the register is read but the bits will not + * be immediately processed. + */ +#define LSR_SAVE_FLAGS UART_LSR_BRK_ERROR_BITS + unsigned char lsr_saved_flags; +#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA + unsigned char msr_saved_flags; /* * We provide a per-port pm hook. @@ -1238,6 +1247,7 @@ static void serial8250_start_tx(struct uart_port *port) if (up->bugs & UART_BUG_TXEN) { unsigned char lsr, iir; lsr = serial_in(up, UART_LSR); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; iir = serial_in(up, UART_IIR) & 0x0f; if ((up->port.type == PORT_RM9000) ? (lsr & UART_LSR_THRE && @@ -1290,18 +1300,10 @@ receive_chars(struct uart_8250_port *up, unsigned int *status) flag = TTY_NORMAL; up->port.icount.rx++; -#ifdef CONFIG_SERIAL_8250_CONSOLE - /* - * Recover the break flag from console xmit - */ - if (up->port.line == up->port.cons->index) { - lsr |= up->lsr_break_flag; - up->lsr_break_flag = 0; - } -#endif + lsr |= up->lsr_saved_flags; + up->lsr_saved_flags = 0; - if (unlikely(lsr & (UART_LSR_BI | UART_LSR_PE | - UART_LSR_FE | UART_LSR_OE))) { + if (unlikely(lsr & UART_LSR_BRK_ERROR_BITS)) { /* * For statistics only */ @@ -1392,6 +1394,8 @@ static unsigned int check_modem_status(struct uart_8250_port *up) { unsigned int status = serial_in(up, UART_MSR); + status |= up->msr_saved_flags; + up->msr_saved_flags = 0; if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI && up->port.info != NULL) { if (status & UART_MSR_TERI) @@ -1591,7 +1595,8 @@ static void serial8250_timeout(unsigned long data) static void serial8250_backup_timeout(unsigned long data) { struct uart_8250_port *up = (struct uart_8250_port *)data; - unsigned int iir, ier = 0; + unsigned int iir, ier = 0, lsr; + unsigned long flags; /* * Must disable interrupts or else we risk racing with the interrupt @@ -1610,9 +1615,13 @@ static void serial8250_backup_timeout(unsigned long data) * the "Diva" UART used on the management processor on many HP * ia64 and parisc boxes. */ + spin_lock_irqsave(&up->port.lock, flags); + lsr = serial_in(up, UART_LSR); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + spin_unlock_irqrestore(&up->port.lock, flags); if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) && (!uart_circ_empty(&up->port.info->xmit) || up->port.x_char) && - (serial_in(up, UART_LSR) & UART_LSR_THRE)) { + (lsr & UART_LSR_THRE)) { iir &= ~(UART_IIR_ID | UART_IIR_NO_INT); iir |= UART_IIR_THRI; } @@ -1631,13 +1640,14 @@ static unsigned int serial8250_tx_empty(struct uart_port *port) { struct uart_8250_port *up = (struct uart_8250_port *)port; unsigned long flags; - unsigned int ret; + unsigned int lsr; spin_lock_irqsave(&up->port.lock, flags); - ret = serial_in(up, UART_LSR) & UART_LSR_TEMT ? TIOCSER_TEMT : 0; + lsr = serial_in(up, UART_LSR); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; spin_unlock_irqrestore(&up->port.lock, flags); - return ret; + return lsr & UART_LSR_TEMT ? TIOCSER_TEMT : 0; } static unsigned int serial8250_get_mctrl(struct uart_port *port) @@ -1708,8 +1718,7 @@ static inline void wait_for_xmitr(struct uart_8250_port *up, int bits) do { status = serial_in(up, UART_LSR); - if (status & UART_LSR_BI) - up->lsr_break_flag = UART_LSR_BI; + up->lsr_saved_flags |= status & LSR_SAVE_FLAGS; if (--tmout == 0) break; @@ -1718,8 +1727,12 @@ static inline void wait_for_xmitr(struct uart_8250_port *up, int bits) /* Wait up to 1s for flow control if necessary */ if (up->port.flags & UPF_CONS_FLOW) { - tmout = 1000000; - while (!(serial_in(up, UART_MSR) & UART_MSR_CTS) && --tmout) { + unsigned int tmout; + for (tmout = 1000000; tmout; tmout--) { + unsigned int msr = serial_in(up, UART_MSR); + up->msr_saved_flags |= msr & MSR_SAVE_FLAGS; + if (msr & UART_MSR_CTS) + break; udelay(1); touch_nmi_watchdog(); } @@ -1888,6 +1901,18 @@ static int serial8250_startup(struct uart_port *port) spin_unlock_irqrestore(&up->port.lock, flags); + /* + * Clear the interrupt registers again for luck, and clear the + * saved flags to avoid getting false values from polling + * routines or the previous session. + */ + serial_inp(up, UART_LSR); + serial_inp(up, UART_RX); + serial_inp(up, UART_IIR); + serial_inp(up, UART_MSR); + up->lsr_saved_flags = 0; + up->msr_saved_flags = 0; + /* * Finally, enable interrupts. Note: Modem status interrupts * are set via set_termios(), which will be occurring imminently @@ -1906,14 +1931,6 @@ static int serial8250_startup(struct uart_port *port) (void) inb_p(icp); } - /* - * And clear the interrupt registers again for luck. - */ - (void) serial_inp(up, UART_LSR); - (void) serial_inp(up, UART_RX); - (void) serial_inp(up, UART_IIR); - (void) serial_inp(up, UART_MSR); - return 0; } @@ -2484,6 +2501,16 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count) wait_for_xmitr(up, BOTH_EMPTY); serial_out(up, UART_IER, ier); + /* + * The receive handling will happen properly because the + * receive ready bit will still be set; it is not cleared + * on read. However, modem control will not, we must + * call it if we have saved something in the saved flags + * while processing with interrupts off. + */ + if (up->msr_saved_flags) + check_modem_status(up); + if (locked) spin_unlock(&up->port.lock); local_irq_restore(flags); diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h index 1c5ed7d92b0f..96c0d93fc2ca 100644 --- a/include/linux/serial_reg.h +++ b/include/linux/serial_reg.h @@ -118,6 +118,7 @@ #define UART_LSR_PE 0x04 /* Parity error indicator */ #define UART_LSR_OE 0x02 /* Overrun error indicator */ #define UART_LSR_DR 0x01 /* Receiver data ready */ +#define UART_LSR_BRK_ERROR_BITS 0x1E /* BI, FE, PE, OE bits */ #define UART_MSR 6 /* In: Modem Status Register */ #define UART_MSR_DCD 0x80 /* Data Carrier Detect */ -- cgit v1.2.3 From 34b4e4aa3c470ce8fa2bd78abb1741b4b58baad7 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 22 Aug 2007 14:01:28 -0700 Subject: fix NULL pointer dereference in __vm_enough_memory() The new exec code inserts an accounted vma into an mm struct which is not current->mm. The existing memory check code has a hard coded assumption that this does not happen as does the security code. As the correct mm is known we pass the mm to the security method and the helper function. A new security test is added for the case where we need to pass the mm and the existing one is modified to pass current->mm to avoid the need to change large amounts of code. (Thanks to Tobias for fixing rejects and testing) Signed-off-by: Alan Cox Cc: WU Fengguang Cc: James Morris Cc: Tobias Diedrich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- include/linux/security.h | 20 +++++++++++++++----- mm/mmap.c | 6 +++--- mm/nommu.c | 2 +- security/commoncap.c | 4 ++-- security/dummy.c | 4 ++-- security/selinux/hooks.c | 4 ++-- 7 files changed, 26 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 655094dc9440..1692dd6cb915 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1042,7 +1042,7 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma, } /* mmap.c */ -extern int __vm_enough_memory(long pages, int cap_sys_admin); +extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); extern void vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); extern struct vm_area_struct *vma_merge(struct mm_struct *, diff --git a/include/linux/security.h b/include/linux/security.h index c11dc8aa0351..1a15526e9f67 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -54,7 +54,7 @@ extern int cap_inode_removexattr(struct dentry *dentry, char *name); extern int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); extern void cap_task_reparent_to_init (struct task_struct *p); extern int cap_syslog (int type); -extern int cap_vm_enough_memory (long pages); +extern int cap_vm_enough_memory (struct mm_struct *mm, long pages); struct msghdr; struct sk_buff; @@ -1125,6 +1125,7 @@ struct request_sock; * Return 0 if permission is granted. * @vm_enough_memory: * Check permissions for allocating a new virtual mapping. + * @mm contains the mm struct it is being added to. * @pages contains the number of pages. * Return 0 if permission is granted. * @@ -1169,7 +1170,7 @@ struct security_operations { int (*quota_on) (struct dentry * dentry); int (*syslog) (int type); int (*settime) (struct timespec *ts, struct timezone *tz); - int (*vm_enough_memory) (long pages); + int (*vm_enough_memory) (struct mm_struct *mm, long pages); int (*bprm_alloc_security) (struct linux_binprm * bprm); void (*bprm_free_security) (struct linux_binprm * bprm); @@ -1469,10 +1470,14 @@ static inline int security_settime(struct timespec *ts, struct timezone *tz) return security_ops->settime(ts, tz); } - static inline int security_vm_enough_memory(long pages) { - return security_ops->vm_enough_memory(pages); + return security_ops->vm_enough_memory(current->mm, pages); +} + +static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) +{ + return security_ops->vm_enough_memory(mm, pages); } static inline int security_bprm_alloc (struct linux_binprm *bprm) @@ -2219,7 +2224,12 @@ static inline int security_settime(struct timespec *ts, struct timezone *tz) static inline int security_vm_enough_memory(long pages) { - return cap_vm_enough_memory(pages); + return cap_vm_enough_memory(current->mm, pages); +} + +static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) +{ + return cap_vm_enough_memory(mm, pages); } static inline int security_bprm_alloc (struct linux_binprm *bprm) diff --git a/mm/mmap.c b/mm/mmap.c index b6537211b9cc..0d40e66c841b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -93,7 +93,7 @@ atomic_t vm_committed_space = ATOMIC_INIT(0); * Note this is a helper function intended to be used by LSMs which * wish to use this logic. */ -int __vm_enough_memory(long pages, int cap_sys_admin) +int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { unsigned long free, allowed; @@ -166,7 +166,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) /* Don't let a single process grow too big: leave 3% of the size of this process for other processes */ - allowed -= current->mm->total_vm / 32; + allowed -= mm->total_vm / 32; /* * cast `allowed' as a signed long because vm_committed_space @@ -2077,7 +2077,7 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) if (__vma && __vma->vm_start < vma->vm_end) return -ENOMEM; if ((vma->vm_flags & VM_ACCOUNT) && - security_vm_enough_memory(vma_pages(vma))) + security_vm_enough_memory_mm(mm, vma_pages(vma))) return -ENOMEM; vma_link(mm, vma, prev, rb_link, rb_parent); return 0; diff --git a/mm/nommu.c b/mm/nommu.c index 9eef6a398555..8ed0cb43118a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1270,7 +1270,7 @@ EXPORT_SYMBOL(get_unmapped_area); * Note this is a helper function intended to be used by LSMs which * wish to use this logic. */ -int __vm_enough_memory(long pages, int cap_sys_admin) +int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { unsigned long free, allowed; diff --git a/security/commoncap.c b/security/commoncap.c index 338606eb7238..7520361663e8 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -315,13 +315,13 @@ int cap_syslog (int type) return 0; } -int cap_vm_enough_memory(long pages) +int cap_vm_enough_memory(struct mm_struct *mm, long pages) { int cap_sys_admin = 0; if (cap_capable(current, CAP_SYS_ADMIN) == 0) cap_sys_admin = 1; - return __vm_enough_memory(pages, cap_sys_admin); + return __vm_enough_memory(mm, pages, cap_sys_admin); } EXPORT_SYMBOL(cap_capable); diff --git a/security/dummy.c b/security/dummy.c index 19d813d5e083..853ec2292798 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -108,13 +108,13 @@ static int dummy_settime(struct timespec *ts, struct timezone *tz) return 0; } -static int dummy_vm_enough_memory(long pages) +static int dummy_vm_enough_memory(struct mm_struct *mm, long pages) { int cap_sys_admin = 0; if (dummy_capable(current, CAP_SYS_ADMIN) == 0) cap_sys_admin = 1; - return __vm_enough_memory(pages, cap_sys_admin); + return __vm_enough_memory(mm, pages, cap_sys_admin); } static int dummy_bprm_alloc_security (struct linux_binprm *bprm) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6237933f7d82..d8bc4172819c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1584,7 +1584,7 @@ static int selinux_syslog(int type) * Do not audit the selinux permission check, as this is applied to all * processes that allocate mappings. */ -static int selinux_vm_enough_memory(long pages) +static int selinux_vm_enough_memory(struct mm_struct *mm, long pages) { int rc, cap_sys_admin = 0; struct task_security_struct *tsec = current->security; @@ -1600,7 +1600,7 @@ static int selinux_vm_enough_memory(long pages) if (rc == 0) cap_sys_admin = 1; - return __vm_enough_memory(pages, cap_sys_admin); + return __vm_enough_memory(mm, pages, cap_sys_admin); } /* binprm security operations */ -- cgit v1.2.3 From 4f855897fe6acd98f10e939dea41130cec7b9c42 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 22 Aug 2007 14:01:29 -0700 Subject: m68k: needs m68k: needs because of __attribute_const__ Signed-off-by: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-m68k/page.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-m68k/page.h b/include/asm-m68k/page.h index 9e6d0d6debdb..fff1f455bdad 100644 --- a/include/asm-m68k/page.h +++ b/include/asm-m68k/page.h @@ -27,6 +27,8 @@ #ifndef __ASSEMBLY__ +#include + #include #define get_user_page(vaddr) __get_free_page(GFP_KERNEL) -- cgit v1.2.3 From 42b359238ebd359ed443fd80023e58e47be2224a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 22 Aug 2007 14:01:32 -0700 Subject: m68k: Use _AC() instead of #ifdef __ASSEMBLY__ m68k: Use _AC() instead of #ifdef __ASSEMBLY__ hackery when needed, remove hackery when unused. Signed-off-by: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-m68k/page.h | 8 +++----- include/asm-m68k/processor.h | 4 ---- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/asm-m68k/page.h b/include/asm-m68k/page.h index fff1f455bdad..1431ea0b59e0 100644 --- a/include/asm-m68k/page.h +++ b/include/asm-m68k/page.h @@ -4,17 +4,15 @@ #ifdef __KERNEL__ +#include + /* PAGE_SHIFT determines the page size */ #ifndef CONFIG_SUN3 #define PAGE_SHIFT (12) #else #define PAGE_SHIFT (13) #endif -#ifdef __ASSEMBLY__ -#define PAGE_SIZE (1 << PAGE_SHIFT) -#else -#define PAGE_SIZE (1UL << PAGE_SHIFT) -#endif +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) #include diff --git a/include/asm-m68k/processor.h b/include/asm-m68k/processor.h index 8455f778b601..4453ec379c5d 100644 --- a/include/asm-m68k/processor.h +++ b/include/asm-m68k/processor.h @@ -38,12 +38,8 @@ static inline void wrusp(unsigned long usp) #ifndef CONFIG_SUN3 #define TASK_SIZE (0xF0000000UL) #else -#ifdef __ASSEMBLY__ -#define TASK_SIZE (0x0E000000) -#else #define TASK_SIZE (0x0E000000UL) #endif -#endif /* This decides where the kernel will search for a free chunk of vm * space during mmap's. -- cgit v1.2.3 From a5f442b2a90679f0b3bb562c01d0042d44ffd9fe Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 22 Aug 2007 14:01:32 -0700 Subject: m68k: Enable arbitary speed tty support Add the needed constants and defines to activate the existing code. Signed-off-by: Alan Cox Signed-off-by: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-m68k/ioctls.h | 4 ++++ include/asm-m68k/termbits.h | 5 ++++- include/asm-m68k/termios.h | 6 ++++-- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-m68k/ioctls.h b/include/asm-m68k/ioctls.h index 0c48929ab444..b8d2f4be7fd7 100644 --- a/include/asm-m68k/ioctls.h +++ b/include/asm-m68k/ioctls.h @@ -46,6 +46,10 @@ #define TIOCSBRK 0x5427 /* BSD compatibility */ #define TIOCCBRK 0x5428 /* BSD compatibility */ #define TIOCGSID 0x5429 /* Return the session ID of FD */ +#define TCGETS2 _IOR('T',0x2A, struct termios2) +#define TCSETS2 _IOW('T',0x2B, struct termios2) +#define TCSETSW2 _IOW('T',0x2C, struct termios2) +#define TCSETSF2 _IOW('T',0x2D, struct termios2) #define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ #define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ diff --git a/include/asm-m68k/termbits.h b/include/asm-m68k/termbits.h index 0e520f328f53..8c14170996bb 100644 --- a/include/asm-m68k/termbits.h +++ b/include/asm-m68k/termbits.h @@ -141,6 +141,7 @@ struct ktermios { #define HUPCL 0002000 #define CLOCAL 0004000 #define CBAUDEX 0010000 +#define BOTHER 0010000 #define B57600 0010001 #define B115200 0010002 #define B230400 0010003 @@ -156,10 +157,12 @@ struct ktermios { #define B3000000 0010015 #define B3500000 0010016 #define B4000000 0010017 -#define CIBAUD 002003600000 /* input baud rate (not used) */ +#define CIBAUD 002003600000 /* input baud rate */ #define CMSPAR 010000000000 /* mark or space (stick) parity */ #define CRTSCTS 020000000000 /* flow control */ +#define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */ + /* c_lflag bits */ #define ISIG 0000001 #define ICANON 0000002 diff --git a/include/asm-m68k/termios.h b/include/asm-m68k/termios.h index 00edabd76168..0823032e4045 100644 --- a/include/asm-m68k/termios.h +++ b/include/asm-m68k/termios.h @@ -82,8 +82,10 @@ struct termio { copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \ }) -#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios)) -#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios)) +#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2)) +#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2)) +#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) +#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) #endif /* __KERNEL__ */ -- cgit v1.2.3 From 2301060e2b19aa4830060524ef66abdf32b26a26 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 22 Aug 2007 14:01:35 -0700 Subject: m68k/mac: Make mac_hid_mouse_emulate_buttons() declaration visible m68k/mac: Make mac_hid_mouse_emulate_buttons() declaration visible drivers/char/keyboard.c: In function 'kbd_keycode': drivers/char/keyboard.c:1142: error: implicit declaration of function 'mac_hid_mouse_emulate_buttons' The forward declaration of mac_hid_mouse_emulate_buttons() is not visible on m68k because it's hidden in the middle of a big #ifdef block. Move it to , correct the type of the second parameter, and include where needed. Signed-off-by: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/keyboard.c | 4 ---- drivers/macintosh/mac_hid.c | 1 + include/linux/kbd_kern.h | 3 +++ 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index 2ce0af1bd588..d95f316afb5a 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -1022,10 +1022,6 @@ static const unsigned short x86_keycodes[256] = 308,310,313,314,315,317,318,319,320,357,322,323,324,325,276,330, 332,340,365,342,343,344,345,346,356,270,341,368,369,370,371,372 }; -#ifdef CONFIG_MAC_EMUMOUSEBTN -extern int mac_hid_mouse_emulate_buttons(int, int, int); -#endif /* CONFIG_MAC_EMUMOUSEBTN */ - #ifdef CONFIG_SPARC static int sparc_l1_a_state = 0; extern void sun_do_break(void); diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c index 76c1e8e4a487..33dee3a773ed 100644 --- a/drivers/macintosh/mac_hid.c +++ b/drivers/macintosh/mac_hid.c @@ -13,6 +13,7 @@ #include #include #include +#include static struct input_dev *emumousebtn; diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h index 506ad20c18f8..8bdb16bfe5fb 100644 --- a/include/linux/kbd_kern.h +++ b/include/linux/kbd_kern.h @@ -161,4 +161,7 @@ static inline void con_schedule_flip(struct tty_struct *t) schedule_delayed_work(&t->buf.work, 0); } +/* mac_hid.c */ +extern int mac_hid_mouse_emulate_buttons(int, unsigned int, int); + #endif -- cgit v1.2.3 From 0aa42632d3a0024700b25f57fd0fca56f6abad24 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 22 Aug 2007 14:02:01 -0700 Subject: selection.h: add tty_struct forward declaration In file included from drivers/video/console/newport_con.c:16: include/linux/selection.h:16: warning: "struct tty_struct" declared inside parameter list include/linux/selection.h:16: warning: its scope is only this definition or declaration, which is probably not what you want Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/selection.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/selection.h b/include/linux/selection.h index ed3408b400f1..f9457861937c 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -10,6 +10,8 @@ #include #include +struct tty_struct; + extern struct vc_data *sel_cons; extern void clear_selection(void); -- cgit v1.2.3 From b377fd3982ad957c796758a90e2988401a884241 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 22 Aug 2007 14:02:05 -0700 Subject: Apply memory policies to top two highest zones when highest zone is ZONE_MOVABLE The NUMA layer only supports NUMA policies for the highest zone. When ZONE_MOVABLE is configured with kernelcore=, the the highest zone becomes ZONE_MOVABLE. The result is that policies are only applied to allocations like anonymous pages and page cache allocated from ZONE_MOVABLE when the zone is used. This patch applies policies to the two highest zones when the highest zone is ZONE_MOVABLE. As ZONE_MOVABLE consists of pages from the highest "real" zone, it's always functionally equivalent. The patch has been tested on a variety of machines both NUMA and non-NUMA covering x86, x86_64 and ppc64. No abnormal results were seen in kernbench, tbench, dbench or hackbench. It passes regression tests from the numactl package with and without kernelcore= once numactl tests are patched to wait for vmstat counters to update. akpm: this is the nasty hack to fix NUMA mempolicies in the presence of ZONE_MOVABLE and kernelcore= in 2.6.23. Christoph says "For .24 either merge the mobility or get the other solution that Mel is working on. That solution would only use a single zonelist per node and filter on the fly. That may help performance and also help to make memory policies work better." Signed-off-by: Mel Gorman Acked-by: Lee Schermerhorn Tested-by: Lee Schermerhorn Acked-by: Christoph Lameter Cc: Andi Kleen Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 2 +- include/linux/mmzone.h | 18 ++++++++++++++++++ mm/mempolicy.c | 2 +- mm/page_alloc.c | 13 +++++++++++++ 4 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index e147cf50529f..5bdd656e88cf 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -166,7 +166,7 @@ extern enum zone_type policy_zone; static inline void check_highest_zone(enum zone_type k) { - if (k > policy_zone) + if (k > policy_zone && k != ZONE_MOVABLE) policy_zone = k; } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3ea68cd3b61f..4e5627379b09 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -410,6 +410,24 @@ struct zonelist { #endif }; +#ifdef CONFIG_NUMA +/* + * Only custom zonelists like MPOL_BIND need to be filtered as part of + * policies. As described in the comment for struct zonelist_cache, these + * zonelists will not have a zlcache so zlcache_ptr will not be set. Use + * that to determine if the zonelists needs to be filtered or not. + */ +static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) +{ + return !zonelist->zlcache_ptr; +} +#else +static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) +{ + return 0; +} +#endif /* CONFIG_NUMA */ + #ifdef CONFIG_ARCH_POPULATES_NODE_MAP struct node_active_region { unsigned long start_pfn; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 71b84b45154a..172abffeb2e3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -149,7 +149,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) lower zones etc. Avoid empty zones because the memory allocator doesn't like them. If you implement node hot removal you have to fix that. */ - k = policy_zone; + k = MAX_NR_ZONES - 1; while (1) { for_each_node_mask(nd, *nodes) { struct zone *z = &NODE_DATA(nd)->node_zones[k]; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3da85b81dabb..6427653023aa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1157,6 +1157,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ int zlc_active = 0; /* set if using zonelist_cache */ int did_zlc_setup = 0; /* just call zlc_setup() one time */ + enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */ zonelist_scan: /* @@ -1166,6 +1167,18 @@ zonelist_scan: z = zonelist->zones; do { + /* + * In NUMA, this could be a policy zonelist which contains + * zones that may not be allowed by the current gfp_mask. + * Check the zone is allowed by the current flags + */ + if (unlikely(alloc_should_filter_zonelist(zonelist))) { + if (highest_zoneidx == -1) + highest_zoneidx = gfp_zone(gfp_mask); + if (zone_idx(*z) > highest_zoneidx) + continue; + } + if (NUMA_BUILD && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes)) continue; -- cgit v1.2.3